From 3822c27bfefd8a23544f07c6c71c527878dfadbc Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Thu, 6 Aug 2020 11:43:22 -0700 Subject: [PATCH] ;lib: regex: rename & export RegexError, export toRegex_, docs (#1312) --- hledger-lib/Hledger/Utils/Regex.hs | 52 ++++++++++++++++++------------ 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/hledger-lib/Hledger/Utils/Regex.hs b/hledger-lib/Hledger/Utils/Regex.hs index 9198a3707..d3ccb6d7c 100644 --- a/hledger-lib/Hledger/Utils/Regex.hs +++ b/hledger-lib/Hledger/Utils/Regex.hs @@ -19,11 +19,21 @@ Easy regular expression helpers, currently based on regex-tdfa. These should: - work with simple strings -Regex strings are automatically compiled into regular expressions the -first time they are seen, and these are cached. If you use a huge -number of unique regular expressions this might lead to increased -memory usage. Several functions have memoised variants (*Memo), which -also trade space for time. +Regex strings are automatically compiled into regular expressions the first +time they are seen, and these are cached. If you use a huge number of unique +regular expressions this might lead to increased memory usage. Several +functions have memoised variants (*Memo), which also trade space for time. + +Currently two APIs are provided: + +- The old partial one which will call error on any problem (eg with malformed + regexps). This comes from hledger's origin as a command-line tool. + +- The new total one (with _ suffixes) which will return an error message. This + is better for long-running apps like hledger-web. + +We are gradually replacing usage of the old API in hledger. Probably at some +point the suffixless names will be reclaimed for the new API. Current limitations: @@ -35,6 +45,7 @@ module Hledger.Utils.Regex ( -- * type aliases Regexp ,Replacement + ,RegexError -- * partial regex operations (may call error) ,regexMatches ,regexMatchesCI @@ -53,6 +64,7 @@ module Hledger.Utils.Regex ( ,regexReplaceCIMemo_ ,regexReplaceBy_ ,regexReplaceByCI_ + ,toRegex_ ) where @@ -77,7 +89,7 @@ type Regexp = String type Replacement = String -- | An regular expression compilation/processing error message. -type Error = String +type RegexError = String -------------------------------------------------------------------------------- -- old partial functions -- PARTIAL: @@ -155,66 +167,66 @@ replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [M -- | Does this regexp match the given string ? -- Or return an error if the regexp is malformed. -regexMatches_ :: Regexp -> String -> Either Error Bool +regexMatches_ :: Regexp -> String -> Either RegexError Bool regexMatches_ r s = (`match` s) <$> toRegex_ r -- | Like regexMatches_ but match case-insensitively. -regexMatchesCI_ :: Regexp -> String -> Either Error Bool +regexMatchesCI_ :: Regexp -> String -> Either RegexError Bool regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r -- | Replace all occurrences of the regexp with the replacement -- pattern, or return an error message. The replacement pattern -- supports numeric backreferences (\N) but no other RE syntax. -regexReplace_ :: Regexp -> Replacement -> String -> Either Error String +regexReplace_ :: Regexp -> Replacement -> String -> Either RegexError String regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s -- | Like regexReplace_ but match occurrences case-insensitively. -regexReplaceCI_ :: Regexp -> Replacement -> String -> Either Error String +regexReplaceCI_ :: Regexp -> Replacement -> String -> Either RegexError String regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s -- | A memoising version of regexReplace_. Caches the result for each -- search pattern, replacement pattern, target string tuple. -regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either Error String +regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either RegexError String regexReplaceMemo_ re repl = memo (regexReplace_ re repl) -- | Like regexReplaceMemo_ but match occurrences case-insensitively. -regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either Error String +regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either RegexError String regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl) -- | Replace all occurrences of the regexp, transforming each match -- with the given function, or return an error message. -regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either Error String +regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either RegexError String regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s -- | Like regexReplaceBy_ but match occurrences case-insensitively. -regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either Error String +regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either RegexError String regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s -- helpers: -- Convert a Regexp string to a compiled Regex, or return an error message. -toRegex_ :: Regexp -> Either Error Regex +toRegex_ :: Regexp -> Either RegexError Regex toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt) -- Like toRegex, but make a case-insensitive Regex. -toRegexCI_ :: Regexp -> Either Error Regex +toRegexCI_ :: Regexp -> Either RegexError Regex toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt) -- Compile a Regexp string to a Regex with the given options, or return an -- error message if this fails. -compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either Error Regex +compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either RegexError Regex compileRegex_ compopt execopt r = maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $ makeRegexOptsM compopt execopt r -- Replace this regular expression with this replacement pattern in this -- string, or return an error message. -replaceRegex_ :: Regex -> Replacement -> String -> Either Error String +replaceRegex_ :: Regex -> Replacement -> String -> Either RegexError String replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String]) where -- Replace one match within the string with the replacement text -- appropriate for this match. Or return an error message. - replaceMatch_ :: Replacement -> String -> MatchText String -> Either Error String + replaceMatch_ :: Replacement -> String -> MatchText String -> Either RegexError String replaceMatch_ replpat s matchgroups = erepl >>= \repl -> Right $ pre ++ repl ++ post where @@ -228,7 +240,7 @@ replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: where -- Given some match groups and a numeric backreference, -- return the referenced group text, or an error message. - lookupMatchGroup_ :: MatchText String -> String -> Either Error String + lookupMatchGroup_ :: MatchText String -> String -> Either RegexError String lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s = case read s of n | n `elem` indices grps -> Right $ fst (grps ! n) _ -> Left $ "no match group exists for backreference \"\\"++s++"\""