;lib: regex: rename & export RegexError, export toRegex_, docs (#1312)
This commit is contained in:
parent
4957008890
commit
3822c27bfe
@ -19,11 +19,21 @@ Easy regular expression helpers, currently based on regex-tdfa. These should:
|
|||||||
|
|
||||||
- work with simple strings
|
- work with simple strings
|
||||||
|
|
||||||
Regex strings are automatically compiled into regular expressions the
|
Regex strings are automatically compiled into regular expressions the first
|
||||||
first time they are seen, and these are cached. If you use a huge
|
time they are seen, and these are cached. If you use a huge number of unique
|
||||||
number of unique regular expressions this might lead to increased
|
regular expressions this might lead to increased memory usage. Several
|
||||||
memory usage. Several functions have memoised variants (*Memo), which
|
functions have memoised variants (*Memo), which also trade space for time.
|
||||||
also trade space for time.
|
|
||||||
|
Currently two APIs are provided:
|
||||||
|
|
||||||
|
- The old partial one which will call error on any problem (eg with malformed
|
||||||
|
regexps). This comes from hledger's origin as a command-line tool.
|
||||||
|
|
||||||
|
- The new total one (with _ suffixes) which will return an error message. This
|
||||||
|
is better for long-running apps like hledger-web.
|
||||||
|
|
||||||
|
We are gradually replacing usage of the old API in hledger. Probably at some
|
||||||
|
point the suffixless names will be reclaimed for the new API.
|
||||||
|
|
||||||
Current limitations:
|
Current limitations:
|
||||||
|
|
||||||
@ -35,6 +45,7 @@ module Hledger.Utils.Regex (
|
|||||||
-- * type aliases
|
-- * type aliases
|
||||||
Regexp
|
Regexp
|
||||||
,Replacement
|
,Replacement
|
||||||
|
,RegexError
|
||||||
-- * partial regex operations (may call error)
|
-- * partial regex operations (may call error)
|
||||||
,regexMatches
|
,regexMatches
|
||||||
,regexMatchesCI
|
,regexMatchesCI
|
||||||
@ -53,6 +64,7 @@ module Hledger.Utils.Regex (
|
|||||||
,regexReplaceCIMemo_
|
,regexReplaceCIMemo_
|
||||||
,regexReplaceBy_
|
,regexReplaceBy_
|
||||||
,regexReplaceByCI_
|
,regexReplaceByCI_
|
||||||
|
,toRegex_
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
@ -77,7 +89,7 @@ type Regexp = String
|
|||||||
type Replacement = String
|
type Replacement = String
|
||||||
|
|
||||||
-- | An regular expression compilation/processing error message.
|
-- | An regular expression compilation/processing error message.
|
||||||
type Error = String
|
type RegexError = String
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- old partial functions -- PARTIAL:
|
-- old partial functions -- PARTIAL:
|
||||||
@ -155,66 +167,66 @@ replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [M
|
|||||||
|
|
||||||
-- | Does this regexp match the given string ?
|
-- | Does this regexp match the given string ?
|
||||||
-- Or return an error if the regexp is malformed.
|
-- Or return an error if the regexp is malformed.
|
||||||
regexMatches_ :: Regexp -> String -> Either Error Bool
|
regexMatches_ :: Regexp -> String -> Either RegexError Bool
|
||||||
regexMatches_ r s = (`match` s) <$> toRegex_ r
|
regexMatches_ r s = (`match` s) <$> toRegex_ r
|
||||||
|
|
||||||
-- | Like regexMatches_ but match case-insensitively.
|
-- | Like regexMatches_ but match case-insensitively.
|
||||||
regexMatchesCI_ :: Regexp -> String -> Either Error Bool
|
regexMatchesCI_ :: Regexp -> String -> Either RegexError Bool
|
||||||
regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r
|
regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r
|
||||||
|
|
||||||
-- | Replace all occurrences of the regexp with the replacement
|
-- | Replace all occurrences of the regexp with the replacement
|
||||||
-- pattern, or return an error message. The replacement pattern
|
-- pattern, or return an error message. The replacement pattern
|
||||||
-- supports numeric backreferences (\N) but no other RE syntax.
|
-- supports numeric backreferences (\N) but no other RE syntax.
|
||||||
regexReplace_ :: Regexp -> Replacement -> String -> Either Error String
|
regexReplace_ :: Regexp -> Replacement -> String -> Either RegexError String
|
||||||
regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s
|
regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s
|
||||||
|
|
||||||
-- | Like regexReplace_ but match occurrences case-insensitively.
|
-- | Like regexReplace_ but match occurrences case-insensitively.
|
||||||
regexReplaceCI_ :: Regexp -> Replacement -> String -> Either Error String
|
regexReplaceCI_ :: Regexp -> Replacement -> String -> Either RegexError String
|
||||||
regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s
|
regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s
|
||||||
|
|
||||||
-- | A memoising version of regexReplace_. Caches the result for each
|
-- | A memoising version of regexReplace_. Caches the result for each
|
||||||
-- search pattern, replacement pattern, target string tuple.
|
-- search pattern, replacement pattern, target string tuple.
|
||||||
regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either Error String
|
regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either RegexError String
|
||||||
regexReplaceMemo_ re repl = memo (regexReplace_ re repl)
|
regexReplaceMemo_ re repl = memo (regexReplace_ re repl)
|
||||||
|
|
||||||
-- | Like regexReplaceMemo_ but match occurrences case-insensitively.
|
-- | Like regexReplaceMemo_ but match occurrences case-insensitively.
|
||||||
regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either Error String
|
regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either RegexError String
|
||||||
regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl)
|
regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl)
|
||||||
|
|
||||||
-- | Replace all occurrences of the regexp, transforming each match
|
-- | Replace all occurrences of the regexp, transforming each match
|
||||||
-- with the given function, or return an error message.
|
-- with the given function, or return an error message.
|
||||||
regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either Error String
|
regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either RegexError String
|
||||||
regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s
|
regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s
|
||||||
|
|
||||||
-- | Like regexReplaceBy_ but match occurrences case-insensitively.
|
-- | Like regexReplaceBy_ but match occurrences case-insensitively.
|
||||||
regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either Error String
|
regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either RegexError String
|
||||||
regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s
|
regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s
|
||||||
|
|
||||||
-- helpers:
|
-- helpers:
|
||||||
|
|
||||||
-- Convert a Regexp string to a compiled Regex, or return an error message.
|
-- Convert a Regexp string to a compiled Regex, or return an error message.
|
||||||
toRegex_ :: Regexp -> Either Error Regex
|
toRegex_ :: Regexp -> Either RegexError Regex
|
||||||
toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt)
|
toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt)
|
||||||
|
|
||||||
-- Like toRegex, but make a case-insensitive Regex.
|
-- Like toRegex, but make a case-insensitive Regex.
|
||||||
toRegexCI_ :: Regexp -> Either Error Regex
|
toRegexCI_ :: Regexp -> Either RegexError Regex
|
||||||
toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt)
|
toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt)
|
||||||
|
|
||||||
-- Compile a Regexp string to a Regex with the given options, or return an
|
-- Compile a Regexp string to a Regex with the given options, or return an
|
||||||
-- error message if this fails.
|
-- error message if this fails.
|
||||||
compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either Error Regex
|
compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either RegexError Regex
|
||||||
compileRegex_ compopt execopt r =
|
compileRegex_ compopt execopt r =
|
||||||
maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $
|
maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $
|
||||||
makeRegexOptsM compopt execopt r
|
makeRegexOptsM compopt execopt r
|
||||||
|
|
||||||
-- Replace this regular expression with this replacement pattern in this
|
-- Replace this regular expression with this replacement pattern in this
|
||||||
-- string, or return an error message.
|
-- string, or return an error message.
|
||||||
replaceRegex_ :: Regex -> Replacement -> String -> Either Error String
|
replaceRegex_ :: Regex -> Replacement -> String -> Either RegexError String
|
||||||
replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String])
|
replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String])
|
||||||
where
|
where
|
||||||
-- Replace one match within the string with the replacement text
|
-- Replace one match within the string with the replacement text
|
||||||
-- appropriate for this match. Or return an error message.
|
-- appropriate for this match. Or return an error message.
|
||||||
replaceMatch_ :: Replacement -> String -> MatchText String -> Either Error String
|
replaceMatch_ :: Replacement -> String -> MatchText String -> Either RegexError String
|
||||||
replaceMatch_ replpat s matchgroups =
|
replaceMatch_ replpat s matchgroups =
|
||||||
erepl >>= \repl -> Right $ pre ++ repl ++ post
|
erepl >>= \repl -> Right $ pre ++ repl ++ post
|
||||||
where
|
where
|
||||||
@ -228,7 +240,7 @@ replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s ::
|
|||||||
where
|
where
|
||||||
-- Given some match groups and a numeric backreference,
|
-- Given some match groups and a numeric backreference,
|
||||||
-- return the referenced group text, or an error message.
|
-- return the referenced group text, or an error message.
|
||||||
lookupMatchGroup_ :: MatchText String -> String -> Either Error String
|
lookupMatchGroup_ :: MatchText String -> String -> Either RegexError String
|
||||||
lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s =
|
lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s =
|
||||||
case read s of n | n `elem` indices grps -> Right $ fst (grps ! n)
|
case read s of n | n `elem` indices grps -> Right $ fst (grps ! n)
|
||||||
_ -> Left $ "no match group exists for backreference \"\\"++s++"\""
|
_ -> Left $ "no match group exists for backreference \"\\"++s++"\""
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user