lib: add alternate regex utilities that don't call error (#1312)
This commit is contained in:
		
							parent
							
								
									ede4bfd5b4
								
							
						
					
					
						commit
						a112085092
					
				| @ -35,7 +35,7 @@ module Hledger.Utils.Regex ( | |||||||
|    -- * type aliases |    -- * type aliases | ||||||
|    Regexp |    Regexp | ||||||
|   ,Replacement |   ,Replacement | ||||||
|    -- * standard regex operations |    -- * partial regex operations (may call error) | ||||||
|   ,regexMatches |   ,regexMatches | ||||||
|   ,regexMatchesCI |   ,regexMatchesCI | ||||||
|   ,regexReplace |   ,regexReplace | ||||||
| @ -44,9 +44,19 @@ module Hledger.Utils.Regex ( | |||||||
|   ,regexReplaceCIMemo |   ,regexReplaceCIMemo | ||||||
|   ,regexReplaceBy |   ,regexReplaceBy | ||||||
|   ,regexReplaceByCI |   ,regexReplaceByCI | ||||||
|  |    -- * total regex operations | ||||||
|  |   ,regexMatches_ | ||||||
|  |   ,regexMatchesCI_ | ||||||
|  |   ,regexReplace_ | ||||||
|  |   ,regexReplaceCI_ | ||||||
|  |   ,regexReplaceMemo_ | ||||||
|  |   ,regexReplaceCIMemo_ | ||||||
|  |   ,regexReplaceBy_ | ||||||
|  |   ,regexReplaceByCI_ | ||||||
|   ) |   ) | ||||||
| where | where | ||||||
| 
 | 
 | ||||||
|  | import Control.Monad (foldM) | ||||||
| import Data.Array | import Data.Array | ||||||
| import Data.Char | import Data.Char | ||||||
| import Data.List (foldl') | import Data.List (foldl') | ||||||
| @ -66,23 +76,14 @@ type Regexp = String | |||||||
| -- | A replacement pattern. May include numeric backreferences (\N). | -- | A replacement pattern. May include numeric backreferences (\N). | ||||||
| type Replacement = String | type Replacement = String | ||||||
| 
 | 
 | ||||||
| -- | Convert our string-based Regexp to a real Regex. | -- | An regular expression compilation/processing error message. | ||||||
| -- Or if it's not well formed, call error with a "malformed regexp" message. | type Error = String | ||||||
| toRegex :: Regexp -> Regex |  | ||||||
| toRegex = memo (compileRegexOrError defaultCompOpt defaultExecOpt)  -- PARTIAL: |  | ||||||
| 
 | 
 | ||||||
| -- | Like toRegex but make a case-insensitive Regex. | -------------------------------------------------------------------------------- | ||||||
| toRegexCI :: Regexp -> Regex | -- old partial functions  -- PARTIAL: | ||||||
| toRegexCI = memo (compileRegexOrError defaultCompOpt{caseSensitive=False} defaultExecOpt)  -- PARTIAL: |  | ||||||
| 
 |  | ||||||
| compileRegexOrError :: CompOption -> ExecOption -> Regexp -> Regex |  | ||||||
| compileRegexOrError compopt execopt r = |  | ||||||
|   fromMaybe |  | ||||||
|   (error' $ "this regular expression could not be compiled: " ++ show r) $  -- PARTIAL: |  | ||||||
|   makeRegexOptsM compopt execopt r |  | ||||||
| 
 | 
 | ||||||
| -- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a | -- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a | ||||||
| -- regexMatch' r s = s =~ (toRegex r) | -- regexMatch' r s = s =~ (toRegex' r) | ||||||
| 
 | 
 | ||||||
| regexMatches :: Regexp -> String -> Bool | regexMatches :: Regexp -> String -> Bool | ||||||
| regexMatches = flip (=~) | regexMatches = flip (=~) | ||||||
| @ -90,13 +91,6 @@ regexMatches = flip (=~) | |||||||
| regexMatchesCI :: Regexp -> String -> Bool | regexMatchesCI :: Regexp -> String -> Bool | ||||||
| regexMatchesCI r = match (toRegexCI r) | regexMatchesCI r = match (toRegexCI r) | ||||||
| 
 | 
 | ||||||
| -- | Replace all occurrences of the regexp, transforming each match with the given function. |  | ||||||
| regexReplaceBy :: Regexp -> (String -> String) -> String -> String |  | ||||||
| regexReplaceBy r = replaceAllBy (toRegex r) |  | ||||||
| 
 |  | ||||||
| regexReplaceByCI :: Regexp -> (String -> String) -> String -> String |  | ||||||
| regexReplaceByCI r = replaceAllBy (toRegexCI r) |  | ||||||
| 
 |  | ||||||
| -- | Replace all occurrences of the regexp with the replacement | -- | Replace all occurrences of the regexp with the replacement | ||||||
| -- pattern. The replacement pattern supports numeric backreferences | -- pattern. The replacement pattern supports numeric backreferences | ||||||
| -- (\N) but no other RE syntax. | -- (\N) but no other RE syntax. | ||||||
| @ -114,36 +108,154 @@ regexReplaceMemo re repl = memo (regexReplace re repl) | |||||||
| regexReplaceCIMemo :: Regexp -> Replacement -> String -> String | regexReplaceCIMemo :: Regexp -> Replacement -> String -> String | ||||||
| regexReplaceCIMemo re repl = memo (regexReplaceCI re repl) | regexReplaceCIMemo re repl = memo (regexReplaceCI re repl) | ||||||
| 
 | 
 | ||||||
| -- | -- | Replace all occurrences of the regexp, transforming each match with the given function. | ||||||
|  | regexReplaceBy :: Regexp -> (String -> String) -> String -> String | ||||||
|  | regexReplaceBy r = replaceAllBy (toRegex r) | ||||||
|  | 
 | ||||||
|  | regexReplaceByCI :: Regexp -> (String -> String) -> String -> String | ||||||
|  | regexReplaceByCI r = replaceAllBy (toRegexCI r) | ||||||
|  | 
 | ||||||
|  | -- helpers | ||||||
|  | 
 | ||||||
|  | -- | Convert our string-based Regexp to a real Regex. | ||||||
|  | -- Or if it's not well formed, call error with a "malformed regexp" message. | ||||||
|  | toRegex :: Regexp -> Regex | ||||||
|  | toRegex = memo (compileRegex defaultCompOpt defaultExecOpt)  -- PARTIAL: | ||||||
|  | 
 | ||||||
|  | -- | Like toRegex but make a case-insensitive Regex. | ||||||
|  | toRegexCI :: Regexp -> Regex | ||||||
|  | toRegexCI = memo (compileRegex defaultCompOpt{caseSensitive=False} defaultExecOpt)  -- PARTIAL: | ||||||
|  | 
 | ||||||
|  | compileRegex :: CompOption -> ExecOption -> Regexp -> Regex | ||||||
|  | compileRegex compopt execopt r = | ||||||
|  |   fromMaybe | ||||||
|  |   (error $ "this regular expression could not be compiled: " ++ show r) $  -- PARTIAL: | ||||||
|  |   makeRegexOptsM compopt execopt r | ||||||
| 
 | 
 | ||||||
| replaceRegex :: Regex -> Replacement -> String -> String | replaceRegex :: Regex -> Replacement -> String -> String | ||||||
| replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [MatchText String]) | replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [MatchText String]) | ||||||
| 
 |  | ||||||
| replaceMatch :: Replacement -> String -> MatchText String -> String |  | ||||||
| replaceMatch replpat s matchgroups = pre ++ repl ++ post |  | ||||||
|   where |   where | ||||||
|     ((_,(off,len)):_) = elems matchgroups  -- groups should have 0-based indexes, and there should always be at least one, since this is a match |     replaceMatch :: Replacement -> String -> MatchText String -> String | ||||||
|     (pre, post') = splitAt off s |     replaceMatch replpat s matchgroups = pre ++ repl ++ post | ||||||
|     post = drop len post' |       where | ||||||
|     repl = replaceAllBy (toRegex "\\\\[0-9]+") (replaceBackReference matchgroups) replpat |         ((_,(off,len)):_) = elems matchgroups  -- groups should have 0-based indexes, and there should always be at least one, since this is a match | ||||||
|  |         (pre, post') = splitAt off s | ||||||
|  |         post = drop len post' | ||||||
|  |         repl = replaceAllBy (toRegex "\\\\[0-9]+") (lookupMatchGroup matchgroups) replpat | ||||||
|  |           where | ||||||
|  |             lookupMatchGroup :: MatchText String -> String -> String | ||||||
|  |             lookupMatchGroup grps ('\\':s@(_:_)) | all isDigit s = | ||||||
|  |               case read s of n | n `elem` indices grps -> fst (grps ! n) | ||||||
|  |               -- PARTIAL: | ||||||
|  |                              _                         -> error' $ "no match group exists for backreference \"\\"++s++"\"" | ||||||
|  |             lookupMatchGroup _ s = error' $ "lookupMatchGroup called on non-numeric-backreference \""++s++"\", shouldn't happen" | ||||||
| 
 | 
 | ||||||
| replaceBackReference :: MatchText String -> String -> String | -------------------------------------------------------------------------------- | ||||||
| replaceBackReference grps ('\\':s@(_:_)) | all isDigit s = | -- new total functions | ||||||
|   case read s of n | n `elem` indices grps -> fst (grps ! n) |  | ||||||
|   -- PARTIAL:D |  | ||||||
|                  _                         -> error' $ "no match group exists for backreference \"\\"++s++"\"" |  | ||||||
| replaceBackReference _ s = error' $ "replaceBackReference called on non-numeric-backreference \""++s++"\", shouldn't happen" |  | ||||||
| 
 | 
 | ||||||
| -- | regexMatches_ :: Regexp -> String -> Either Error Bool | ||||||
|  | regexMatches_ r s = (`match` s) <$> toRegex_ r | ||||||
| 
 | 
 | ||||||
| -- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries : | regexMatchesCI_ :: Regexp -> String -> Either Error Bool | ||||||
| -- | Replace all occurrences of a regexp in a string, transforming each match with the given function. | regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r | ||||||
|  | 
 | ||||||
|  | -- | Replace all occurrences of the regexp with the replacement | ||||||
|  | -- pattern, or return an error message. The replacement pattern | ||||||
|  | -- supports numeric backreferences (\N) but no other RE syntax. | ||||||
|  | regexReplace_ :: Regexp -> Replacement -> String -> Either Error String | ||||||
|  | regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s | ||||||
|  | 
 | ||||||
|  | regexReplaceCI_ :: Regexp -> Replacement -> String -> Either Error String | ||||||
|  | regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s | ||||||
|  | 
 | ||||||
|  | -- | A memoising version of regexReplace_. Caches the result for each | ||||||
|  | -- search pattern, replacement pattern, target string tuple. | ||||||
|  | regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either Error String | ||||||
|  | regexReplaceMemo_ re repl = memo (regexReplace_ re repl) | ||||||
|  | 
 | ||||||
|  | regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either Error String | ||||||
|  | regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl) | ||||||
|  | 
 | ||||||
|  | -- | Replace all occurrences of the regexp, transforming each match | ||||||
|  | -- with the given function, or return an error message. | ||||||
|  | regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either Error String | ||||||
|  | regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s | ||||||
|  | 
 | ||||||
|  | regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either Error String | ||||||
|  | regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s | ||||||
|  | 
 | ||||||
|  | -- helpers: | ||||||
|  | 
 | ||||||
|  | -- | Convert our string-based Regexp to a real Regex, or return a parse error. | ||||||
|  | toRegex_ :: Regexp -> Either Error Regex | ||||||
|  | toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt) | ||||||
|  | 
 | ||||||
|  | -- | Convert our string-based Regexp to a case-insensitive real Regex, | ||||||
|  | -- or return a parse error. | ||||||
|  | toRegexCI_ :: Regexp -> Either Error Regex | ||||||
|  | toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt) | ||||||
|  | 
 | ||||||
|  | compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either Error Regex | ||||||
|  | compileRegex_ compopt execopt r = | ||||||
|  |   maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $ | ||||||
|  |   makeRegexOptsM compopt execopt r | ||||||
|  | 
 | ||||||
|  | -- Replace this regular expression with this replacement pattern in this string, or return an error message. | ||||||
|  | replaceRegex_ :: Regex -> Replacement -> String -> Either Error String | ||||||
|  | replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String]) | ||||||
|  |   where | ||||||
|  |     -- Replace one match within the string with the replacement text | ||||||
|  |     -- appropriate for this match. Or return an error message. | ||||||
|  |     replaceMatch_ :: Replacement -> String -> MatchText String -> Either Error String | ||||||
|  |     replaceMatch_ replpat s matchgroups = | ||||||
|  |       erepl >>= \repl -> Right $ pre ++ repl ++ post | ||||||
|  |       where | ||||||
|  |         ((_,(off,len)):_) = elems matchgroups  -- groups should have 0-based indexes, and there should always be at least one, since this is a match | ||||||
|  |         (pre, post') = splitAt off s | ||||||
|  |         post = drop len post' | ||||||
|  |         -- The replacement text: the replacement pattern with all | ||||||
|  |         -- numeric backreferences replaced by the appropriate groups | ||||||
|  |         -- from this match. Or an error message. | ||||||
|  |         erepl = toRegex_ "\\\\[0-9]+" >>= \rx -> replaceAllByM rx (lookupMatchGroup_ matchgroups) replpat | ||||||
|  |           where | ||||||
|  |             -- Given some match groups and a numeric backreference, | ||||||
|  |             -- return the referenced group text, or an error message. | ||||||
|  |             lookupMatchGroup_ :: MatchText String -> String -> Either Error String | ||||||
|  |             lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s =  | ||||||
|  |               case read s of n | n `elem` indices grps -> Right $ fst (grps ! n) | ||||||
|  |                              _                         -> Left $ "no match group exists for backreference \"\\"++s++"\"" | ||||||
|  |             lookupMatchGroup_ _ s = Left $ "lookupMatchGroup called on non-numeric-backreference \""++s++"\", shouldn't happen" | ||||||
|  | 
 | ||||||
|  | -- helpers | ||||||
|  | 
 | ||||||
|  | -- Adapted from http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries: | ||||||
|  | 
 | ||||||
|  | -- | Replace all occurrences of a regexp in a string, transforming each match | ||||||
|  | -- with the given function. | ||||||
| replaceAllBy :: Regex -> (String -> String) -> String -> String | replaceAllBy :: Regex -> (String -> String) -> String -> String | ||||||
| replaceAllBy re f s = start end | replaceAllBy re transform s = prependdone rest | ||||||
|   where |   where | ||||||
|     (_, end, start) = foldl' go (0, s, id) $ (getAllMatches $ match re s :: [(Int, Int)]) |     (_, rest, prependdone) = foldl' go (0, s, id) matches | ||||||
|     go (ind,read,write) (off,len) = |       where | ||||||
|       let (skip, start) = splitAt (off - ind) read |         matches = getAllMatches $ match re s :: [(Int, Int)]  -- offset and length | ||||||
|           (matched, remaining) = splitAt len start |         go :: (Int,String,String->String) -> (Int,Int) ->  (Int,String,String->String) | ||||||
|       in (off + len, remaining, write . (skip++) . (f matched ++)) |         go (pos,todo,prepend) (off,len) = | ||||||
|  |           let (prematch, matchandrest) = splitAt (off - pos) todo | ||||||
|  |               (matched, rest) = splitAt len matchandrest | ||||||
|  |           in (off + len, rest, prepend . (prematch++) . (transform matched ++)) | ||||||
|  | 
 | ||||||
|  | -- | Replace all occurrences of a regexp in a string, transforming each match | ||||||
|  | -- with the given monadic transform function. Eg if the monad is Either, a | ||||||
|  | -- Left result from the transform function short-circuits and is returned as | ||||||
|  | -- the overall result. | ||||||
|  | replaceAllByM :: forall m. Monad m => Regex -> (String -> m String) -> String -> m String | ||||||
|  | replaceAllByM re transform s = | ||||||
|  |   foldM go (0, s, id) matches >>= \(_, rest, prependdone) -> pure $ prependdone rest | ||||||
|  |   where | ||||||
|  |     matches = getAllMatches $ match re s :: [(Int, Int)]  -- offset and length | ||||||
|  |     go :: (Int,String,String->String) -> (Int,Int) -> m (Int,String,String->String) | ||||||
|  |     go (pos,todo,prepend) (off,len) = | ||||||
|  |       let (prematch, matchandrest) = splitAt (off - pos) todo | ||||||
|  |           (matched, rest) = splitAt len matchandrest | ||||||
|  |       in transform matched >>= \matched' -> pure (off + len, rest, prepend . (prematch++) . (matched' ++)) | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user