;lib: regex: rename & export RegexError, export toRegex_, docs (#1312)

This commit is contained in:
Simon Michael 2020-08-06 11:43:22 -07:00
parent 4957008890
commit 3822c27bfe

View File

@ -19,11 +19,21 @@ Easy regular expression helpers, currently based on regex-tdfa. These should:
- work with simple strings - work with simple strings
Regex strings are automatically compiled into regular expressions the Regex strings are automatically compiled into regular expressions the first
first time they are seen, and these are cached. If you use a huge time they are seen, and these are cached. If you use a huge number of unique
number of unique regular expressions this might lead to increased regular expressions this might lead to increased memory usage. Several
memory usage. Several functions have memoised variants (*Memo), which functions have memoised variants (*Memo), which also trade space for time.
also trade space for time.
Currently two APIs are provided:
- The old partial one which will call error on any problem (eg with malformed
regexps). This comes from hledger's origin as a command-line tool.
- The new total one (with _ suffixes) which will return an error message. This
is better for long-running apps like hledger-web.
We are gradually replacing usage of the old API in hledger. Probably at some
point the suffixless names will be reclaimed for the new API.
Current limitations: Current limitations:
@ -35,6 +45,7 @@ module Hledger.Utils.Regex (
-- * type aliases -- * type aliases
Regexp Regexp
,Replacement ,Replacement
,RegexError
-- * partial regex operations (may call error) -- * partial regex operations (may call error)
,regexMatches ,regexMatches
,regexMatchesCI ,regexMatchesCI
@ -53,6 +64,7 @@ module Hledger.Utils.Regex (
,regexReplaceCIMemo_ ,regexReplaceCIMemo_
,regexReplaceBy_ ,regexReplaceBy_
,regexReplaceByCI_ ,regexReplaceByCI_
,toRegex_
) )
where where
@ -77,7 +89,7 @@ type Regexp = String
type Replacement = String type Replacement = String
-- | An regular expression compilation/processing error message. -- | An regular expression compilation/processing error message.
type Error = String type RegexError = String
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- old partial functions -- PARTIAL: -- old partial functions -- PARTIAL:
@ -155,66 +167,66 @@ replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [M
-- | Does this regexp match the given string ? -- | Does this regexp match the given string ?
-- Or return an error if the regexp is malformed. -- Or return an error if the regexp is malformed.
regexMatches_ :: Regexp -> String -> Either Error Bool regexMatches_ :: Regexp -> String -> Either RegexError Bool
regexMatches_ r s = (`match` s) <$> toRegex_ r regexMatches_ r s = (`match` s) <$> toRegex_ r
-- | Like regexMatches_ but match case-insensitively. -- | Like regexMatches_ but match case-insensitively.
regexMatchesCI_ :: Regexp -> String -> Either Error Bool regexMatchesCI_ :: Regexp -> String -> Either RegexError Bool
regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r
-- | Replace all occurrences of the regexp with the replacement -- | Replace all occurrences of the regexp with the replacement
-- pattern, or return an error message. The replacement pattern -- pattern, or return an error message. The replacement pattern
-- supports numeric backreferences (\N) but no other RE syntax. -- supports numeric backreferences (\N) but no other RE syntax.
regexReplace_ :: Regexp -> Replacement -> String -> Either Error String regexReplace_ :: Regexp -> Replacement -> String -> Either RegexError String
regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s
-- | Like regexReplace_ but match occurrences case-insensitively. -- | Like regexReplace_ but match occurrences case-insensitively.
regexReplaceCI_ :: Regexp -> Replacement -> String -> Either Error String regexReplaceCI_ :: Regexp -> Replacement -> String -> Either RegexError String
regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s
-- | A memoising version of regexReplace_. Caches the result for each -- | A memoising version of regexReplace_. Caches the result for each
-- search pattern, replacement pattern, target string tuple. -- search pattern, replacement pattern, target string tuple.
regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either Error String regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either RegexError String
regexReplaceMemo_ re repl = memo (regexReplace_ re repl) regexReplaceMemo_ re repl = memo (regexReplace_ re repl)
-- | Like regexReplaceMemo_ but match occurrences case-insensitively. -- | Like regexReplaceMemo_ but match occurrences case-insensitively.
regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either Error String regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either RegexError String
regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl) regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl)
-- | Replace all occurrences of the regexp, transforming each match -- | Replace all occurrences of the regexp, transforming each match
-- with the given function, or return an error message. -- with the given function, or return an error message.
regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either Error String regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either RegexError String
regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s
-- | Like regexReplaceBy_ but match occurrences case-insensitively. -- | Like regexReplaceBy_ but match occurrences case-insensitively.
regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either Error String regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either RegexError String
regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s
-- helpers: -- helpers:
-- Convert a Regexp string to a compiled Regex, or return an error message. -- Convert a Regexp string to a compiled Regex, or return an error message.
toRegex_ :: Regexp -> Either Error Regex toRegex_ :: Regexp -> Either RegexError Regex
toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt) toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt)
-- Like toRegex, but make a case-insensitive Regex. -- Like toRegex, but make a case-insensitive Regex.
toRegexCI_ :: Regexp -> Either Error Regex toRegexCI_ :: Regexp -> Either RegexError Regex
toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt) toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt)
-- Compile a Regexp string to a Regex with the given options, or return an -- Compile a Regexp string to a Regex with the given options, or return an
-- error message if this fails. -- error message if this fails.
compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either Error Regex compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either RegexError Regex
compileRegex_ compopt execopt r = compileRegex_ compopt execopt r =
maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $ maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $
makeRegexOptsM compopt execopt r makeRegexOptsM compopt execopt r
-- Replace this regular expression with this replacement pattern in this -- Replace this regular expression with this replacement pattern in this
-- string, or return an error message. -- string, or return an error message.
replaceRegex_ :: Regex -> Replacement -> String -> Either Error String replaceRegex_ :: Regex -> Replacement -> String -> Either RegexError String
replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String]) replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String])
where where
-- Replace one match within the string with the replacement text -- Replace one match within the string with the replacement text
-- appropriate for this match. Or return an error message. -- appropriate for this match. Or return an error message.
replaceMatch_ :: Replacement -> String -> MatchText String -> Either Error String replaceMatch_ :: Replacement -> String -> MatchText String -> Either RegexError String
replaceMatch_ replpat s matchgroups = replaceMatch_ replpat s matchgroups =
erepl >>= \repl -> Right $ pre ++ repl ++ post erepl >>= \repl -> Right $ pre ++ repl ++ post
where where
@ -228,7 +240,7 @@ replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s ::
where where
-- Given some match groups and a numeric backreference, -- Given some match groups and a numeric backreference,
-- return the referenced group text, or an error message. -- return the referenced group text, or an error message.
lookupMatchGroup_ :: MatchText String -> String -> Either Error String lookupMatchGroup_ :: MatchText String -> String -> Either RegexError String
lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s = lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s =
case read s of n | n `elem` indices grps -> Right $ fst (grps ! n) case read s of n | n `elem` indices grps -> Right $ fst (grps ! n)
_ -> Left $ "no match group exists for backreference \"\\"++s++"\"" _ -> Left $ "no match group exists for backreference \"\\"++s++"\""