;lib: regex: rename & export RegexError, export toRegex_, docs (#1312)
This commit is contained in:
		
							parent
							
								
									4957008890
								
							
						
					
					
						commit
						3822c27bfe
					
				| @ -19,11 +19,21 @@ Easy regular expression helpers, currently based on regex-tdfa. These should: | |||||||
| 
 | 
 | ||||||
| - work with simple strings | - work with simple strings | ||||||
| 
 | 
 | ||||||
| Regex strings are automatically compiled into regular expressions the | Regex strings are automatically compiled into regular expressions the first | ||||||
| first time they are seen, and these are cached. If you use a huge | time they are seen, and these are cached. If you use a huge number of unique | ||||||
| number of unique regular expressions this might lead to increased | regular expressions this might lead to increased memory usage. Several | ||||||
| memory usage. Several functions have memoised variants (*Memo), which | functions have memoised variants (*Memo), which also trade space for time. | ||||||
| also trade space for time. | 
 | ||||||
|  | Currently two APIs are provided: | ||||||
|  | 
 | ||||||
|  | - The old partial one which will call error on any problem (eg with malformed | ||||||
|  |   regexps). This comes from hledger's origin as a command-line tool. | ||||||
|  | 
 | ||||||
|  | - The new total one (with _ suffixes) which will return an error message. This | ||||||
|  |   is better for long-running apps like hledger-web. | ||||||
|  | 
 | ||||||
|  | We are gradually replacing usage of the old API in hledger. Probably at some | ||||||
|  | point the suffixless names will be reclaimed for the new API. | ||||||
| 
 | 
 | ||||||
| Current limitations: | Current limitations: | ||||||
| 
 | 
 | ||||||
| @ -35,6 +45,7 @@ module Hledger.Utils.Regex ( | |||||||
|    -- * type aliases |    -- * type aliases | ||||||
|    Regexp |    Regexp | ||||||
|   ,Replacement |   ,Replacement | ||||||
|  |   ,RegexError | ||||||
|    -- * partial regex operations (may call error) |    -- * partial regex operations (may call error) | ||||||
|   ,regexMatches |   ,regexMatches | ||||||
|   ,regexMatchesCI |   ,regexMatchesCI | ||||||
| @ -53,6 +64,7 @@ module Hledger.Utils.Regex ( | |||||||
|   ,regexReplaceCIMemo_ |   ,regexReplaceCIMemo_ | ||||||
|   ,regexReplaceBy_ |   ,regexReplaceBy_ | ||||||
|   ,regexReplaceByCI_ |   ,regexReplaceByCI_ | ||||||
|  |   ,toRegex_ | ||||||
|   ) |   ) | ||||||
| where | where | ||||||
| 
 | 
 | ||||||
| @ -77,7 +89,7 @@ type Regexp = String | |||||||
| type Replacement = String | type Replacement = String | ||||||
| 
 | 
 | ||||||
| -- | An regular expression compilation/processing error message. | -- | An regular expression compilation/processing error message. | ||||||
| type Error = String | type RegexError = String | ||||||
| 
 | 
 | ||||||
| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||||||
| -- old partial functions  -- PARTIAL: | -- old partial functions  -- PARTIAL: | ||||||
| @ -155,66 +167,66 @@ replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [M | |||||||
| 
 | 
 | ||||||
| -- | Does this regexp match the given string ? | -- | Does this regexp match the given string ? | ||||||
| -- Or return an error if the regexp is malformed. | -- Or return an error if the regexp is malformed. | ||||||
| regexMatches_ :: Regexp -> String -> Either Error Bool | regexMatches_ :: Regexp -> String -> Either RegexError Bool | ||||||
| regexMatches_ r s = (`match` s) <$> toRegex_ r | regexMatches_ r s = (`match` s) <$> toRegex_ r | ||||||
| 
 | 
 | ||||||
| -- | Like regexMatches_ but match case-insensitively. | -- | Like regexMatches_ but match case-insensitively. | ||||||
| regexMatchesCI_ :: Regexp -> String -> Either Error Bool | regexMatchesCI_ :: Regexp -> String -> Either RegexError Bool | ||||||
| regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r | regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r | ||||||
| 
 | 
 | ||||||
| -- | Replace all occurrences of the regexp with the replacement | -- | Replace all occurrences of the regexp with the replacement | ||||||
| -- pattern, or return an error message. The replacement pattern | -- pattern, or return an error message. The replacement pattern | ||||||
| -- supports numeric backreferences (\N) but no other RE syntax. | -- supports numeric backreferences (\N) but no other RE syntax. | ||||||
| regexReplace_ :: Regexp -> Replacement -> String -> Either Error String | regexReplace_ :: Regexp -> Replacement -> String -> Either RegexError String | ||||||
| regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s | regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s | ||||||
| 
 | 
 | ||||||
| -- | Like regexReplace_ but match occurrences case-insensitively. | -- | Like regexReplace_ but match occurrences case-insensitively. | ||||||
| regexReplaceCI_ :: Regexp -> Replacement -> String -> Either Error String | regexReplaceCI_ :: Regexp -> Replacement -> String -> Either RegexError String | ||||||
| regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s | regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s | ||||||
| 
 | 
 | ||||||
| -- | A memoising version of regexReplace_. Caches the result for each | -- | A memoising version of regexReplace_. Caches the result for each | ||||||
| -- search pattern, replacement pattern, target string tuple. | -- search pattern, replacement pattern, target string tuple. | ||||||
| regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either Error String | regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either RegexError String | ||||||
| regexReplaceMemo_ re repl = memo (regexReplace_ re repl) | regexReplaceMemo_ re repl = memo (regexReplace_ re repl) | ||||||
| 
 | 
 | ||||||
| -- | Like regexReplaceMemo_ but match occurrences case-insensitively. | -- | Like regexReplaceMemo_ but match occurrences case-insensitively. | ||||||
| regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either Error String | regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either RegexError String | ||||||
| regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl) | regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl) | ||||||
| 
 | 
 | ||||||
| -- | Replace all occurrences of the regexp, transforming each match | -- | Replace all occurrences of the regexp, transforming each match | ||||||
| -- with the given function, or return an error message. | -- with the given function, or return an error message. | ||||||
| regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either Error String | regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either RegexError String | ||||||
| regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s | regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s | ||||||
| 
 | 
 | ||||||
| -- | Like regexReplaceBy_ but match occurrences case-insensitively. | -- | Like regexReplaceBy_ but match occurrences case-insensitively. | ||||||
| regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either Error String | regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either RegexError String | ||||||
| regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s | regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s | ||||||
| 
 | 
 | ||||||
| -- helpers: | -- helpers: | ||||||
| 
 | 
 | ||||||
| -- Convert a Regexp string to a compiled Regex, or return an error message. | -- Convert a Regexp string to a compiled Regex, or return an error message. | ||||||
| toRegex_ :: Regexp -> Either Error Regex | toRegex_ :: Regexp -> Either RegexError Regex | ||||||
| toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt) | toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt) | ||||||
| 
 | 
 | ||||||
| -- Like toRegex, but make a case-insensitive Regex. | -- Like toRegex, but make a case-insensitive Regex. | ||||||
| toRegexCI_ :: Regexp -> Either Error Regex | toRegexCI_ :: Regexp -> Either RegexError Regex | ||||||
| toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt) | toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt) | ||||||
| 
 | 
 | ||||||
| -- Compile a Regexp string to a Regex with the given options, or return an | -- Compile a Regexp string to a Regex with the given options, or return an | ||||||
| -- error message if this fails. | -- error message if this fails. | ||||||
| compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either Error Regex | compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either RegexError Regex | ||||||
| compileRegex_ compopt execopt r = | compileRegex_ compopt execopt r = | ||||||
|   maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $ |   maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $ | ||||||
|   makeRegexOptsM compopt execopt r |   makeRegexOptsM compopt execopt r | ||||||
| 
 | 
 | ||||||
| -- Replace this regular expression with this replacement pattern in this | -- Replace this regular expression with this replacement pattern in this | ||||||
| -- string, or return an error message. | -- string, or return an error message. | ||||||
| replaceRegex_ :: Regex -> Replacement -> String -> Either Error String | replaceRegex_ :: Regex -> Replacement -> String -> Either RegexError String | ||||||
| replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String]) | replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String]) | ||||||
|   where |   where | ||||||
|     -- Replace one match within the string with the replacement text |     -- Replace one match within the string with the replacement text | ||||||
|     -- appropriate for this match. Or return an error message. |     -- appropriate for this match. Or return an error message. | ||||||
|     replaceMatch_ :: Replacement -> String -> MatchText String -> Either Error String |     replaceMatch_ :: Replacement -> String -> MatchText String -> Either RegexError String | ||||||
|     replaceMatch_ replpat s matchgroups = |     replaceMatch_ replpat s matchgroups = | ||||||
|       erepl >>= \repl -> Right $ pre ++ repl ++ post |       erepl >>= \repl -> Right $ pre ++ repl ++ post | ||||||
|       where |       where | ||||||
| @ -228,7 +240,7 @@ replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: | |||||||
|           where |           where | ||||||
|             -- Given some match groups and a numeric backreference, |             -- Given some match groups and a numeric backreference, | ||||||
|             -- return the referenced group text, or an error message. |             -- return the referenced group text, or an error message. | ||||||
|             lookupMatchGroup_ :: MatchText String -> String -> Either Error String |             lookupMatchGroup_ :: MatchText String -> String -> Either RegexError String | ||||||
|             lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s =  |             lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s =  | ||||||
|               case read s of n | n `elem` indices grps -> Right $ fst (grps ! n) |               case read s of n | n `elem` indices grps -> Right $ fst (grps ! n) | ||||||
|                              _                         -> Left $ "no match group exists for backreference \"\\"++s++"\"" |                              _                         -> Left $ "no match group exists for backreference \"\\"++s++"\"" | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user