;lib: regex: rename & export RegexError, export toRegex_, docs (#1312)
This commit is contained in:
		
							parent
							
								
									4957008890
								
							
						
					
					
						commit
						3822c27bfe
					
				@ -19,11 +19,21 @@ Easy regular expression helpers, currently based on regex-tdfa. These should:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
- work with simple strings
 | 
					- work with simple strings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Regex strings are automatically compiled into regular expressions the
 | 
					Regex strings are automatically compiled into regular expressions the first
 | 
				
			||||||
first time they are seen, and these are cached. If you use a huge
 | 
					time they are seen, and these are cached. If you use a huge number of unique
 | 
				
			||||||
number of unique regular expressions this might lead to increased
 | 
					regular expressions this might lead to increased memory usage. Several
 | 
				
			||||||
memory usage. Several functions have memoised variants (*Memo), which
 | 
					functions have memoised variants (*Memo), which also trade space for time.
 | 
				
			||||||
also trade space for time.
 | 
					
 | 
				
			||||||
 | 
					Currently two APIs are provided:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- The old partial one which will call error on any problem (eg with malformed
 | 
				
			||||||
 | 
					  regexps). This comes from hledger's origin as a command-line tool.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- The new total one (with _ suffixes) which will return an error message. This
 | 
				
			||||||
 | 
					  is better for long-running apps like hledger-web.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We are gradually replacing usage of the old API in hledger. Probably at some
 | 
				
			||||||
 | 
					point the suffixless names will be reclaimed for the new API.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Current limitations:
 | 
					Current limitations:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -35,6 +45,7 @@ module Hledger.Utils.Regex (
 | 
				
			|||||||
   -- * type aliases
 | 
					   -- * type aliases
 | 
				
			||||||
   Regexp
 | 
					   Regexp
 | 
				
			||||||
  ,Replacement
 | 
					  ,Replacement
 | 
				
			||||||
 | 
					  ,RegexError
 | 
				
			||||||
   -- * partial regex operations (may call error)
 | 
					   -- * partial regex operations (may call error)
 | 
				
			||||||
  ,regexMatches
 | 
					  ,regexMatches
 | 
				
			||||||
  ,regexMatchesCI
 | 
					  ,regexMatchesCI
 | 
				
			||||||
@ -53,6 +64,7 @@ module Hledger.Utils.Regex (
 | 
				
			|||||||
  ,regexReplaceCIMemo_
 | 
					  ,regexReplaceCIMemo_
 | 
				
			||||||
  ,regexReplaceBy_
 | 
					  ,regexReplaceBy_
 | 
				
			||||||
  ,regexReplaceByCI_
 | 
					  ,regexReplaceByCI_
 | 
				
			||||||
 | 
					  ,toRegex_
 | 
				
			||||||
  )
 | 
					  )
 | 
				
			||||||
where
 | 
					where
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -77,7 +89,7 @@ type Regexp = String
 | 
				
			|||||||
type Replacement = String
 | 
					type Replacement = String
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- | An regular expression compilation/processing error message.
 | 
					-- | An regular expression compilation/processing error message.
 | 
				
			||||||
type Error = String
 | 
					type RegexError = String
 | 
				
			||||||
 | 
					
 | 
				
			||||||
--------------------------------------------------------------------------------
 | 
					--------------------------------------------------------------------------------
 | 
				
			||||||
-- old partial functions  -- PARTIAL:
 | 
					-- old partial functions  -- PARTIAL:
 | 
				
			||||||
@ -155,66 +167,66 @@ replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [M
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
-- | Does this regexp match the given string ?
 | 
					-- | Does this regexp match the given string ?
 | 
				
			||||||
-- Or return an error if the regexp is malformed.
 | 
					-- Or return an error if the regexp is malformed.
 | 
				
			||||||
regexMatches_ :: Regexp -> String -> Either Error Bool
 | 
					regexMatches_ :: Regexp -> String -> Either RegexError Bool
 | 
				
			||||||
regexMatches_ r s = (`match` s) <$> toRegex_ r
 | 
					regexMatches_ r s = (`match` s) <$> toRegex_ r
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- | Like regexMatches_ but match case-insensitively.
 | 
					-- | Like regexMatches_ but match case-insensitively.
 | 
				
			||||||
regexMatchesCI_ :: Regexp -> String -> Either Error Bool
 | 
					regexMatchesCI_ :: Regexp -> String -> Either RegexError Bool
 | 
				
			||||||
regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r
 | 
					regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- | Replace all occurrences of the regexp with the replacement
 | 
					-- | Replace all occurrences of the regexp with the replacement
 | 
				
			||||||
-- pattern, or return an error message. The replacement pattern
 | 
					-- pattern, or return an error message. The replacement pattern
 | 
				
			||||||
-- supports numeric backreferences (\N) but no other RE syntax.
 | 
					-- supports numeric backreferences (\N) but no other RE syntax.
 | 
				
			||||||
regexReplace_ :: Regexp -> Replacement -> String -> Either Error String
 | 
					regexReplace_ :: Regexp -> Replacement -> String -> Either RegexError String
 | 
				
			||||||
regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s
 | 
					regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- | Like regexReplace_ but match occurrences case-insensitively.
 | 
					-- | Like regexReplace_ but match occurrences case-insensitively.
 | 
				
			||||||
regexReplaceCI_ :: Regexp -> Replacement -> String -> Either Error String
 | 
					regexReplaceCI_ :: Regexp -> Replacement -> String -> Either RegexError String
 | 
				
			||||||
regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s
 | 
					regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- | A memoising version of regexReplace_. Caches the result for each
 | 
					-- | A memoising version of regexReplace_. Caches the result for each
 | 
				
			||||||
-- search pattern, replacement pattern, target string tuple.
 | 
					-- search pattern, replacement pattern, target string tuple.
 | 
				
			||||||
regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either Error String
 | 
					regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either RegexError String
 | 
				
			||||||
regexReplaceMemo_ re repl = memo (regexReplace_ re repl)
 | 
					regexReplaceMemo_ re repl = memo (regexReplace_ re repl)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- | Like regexReplaceMemo_ but match occurrences case-insensitively.
 | 
					-- | Like regexReplaceMemo_ but match occurrences case-insensitively.
 | 
				
			||||||
regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either Error String
 | 
					regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either RegexError String
 | 
				
			||||||
regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl)
 | 
					regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- | Replace all occurrences of the regexp, transforming each match
 | 
					-- | Replace all occurrences of the regexp, transforming each match
 | 
				
			||||||
-- with the given function, or return an error message.
 | 
					-- with the given function, or return an error message.
 | 
				
			||||||
regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either Error String
 | 
					regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either RegexError String
 | 
				
			||||||
regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s
 | 
					regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- | Like regexReplaceBy_ but match occurrences case-insensitively.
 | 
					-- | Like regexReplaceBy_ but match occurrences case-insensitively.
 | 
				
			||||||
regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either Error String
 | 
					regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either RegexError String
 | 
				
			||||||
regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s
 | 
					regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- helpers:
 | 
					-- helpers:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- Convert a Regexp string to a compiled Regex, or return an error message.
 | 
					-- Convert a Regexp string to a compiled Regex, or return an error message.
 | 
				
			||||||
toRegex_ :: Regexp -> Either Error Regex
 | 
					toRegex_ :: Regexp -> Either RegexError Regex
 | 
				
			||||||
toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt)
 | 
					toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- Like toRegex, but make a case-insensitive Regex.
 | 
					-- Like toRegex, but make a case-insensitive Regex.
 | 
				
			||||||
toRegexCI_ :: Regexp -> Either Error Regex
 | 
					toRegexCI_ :: Regexp -> Either RegexError Regex
 | 
				
			||||||
toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt)
 | 
					toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- Compile a Regexp string to a Regex with the given options, or return an
 | 
					-- Compile a Regexp string to a Regex with the given options, or return an
 | 
				
			||||||
-- error message if this fails.
 | 
					-- error message if this fails.
 | 
				
			||||||
compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either Error Regex
 | 
					compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either RegexError Regex
 | 
				
			||||||
compileRegex_ compopt execopt r =
 | 
					compileRegex_ compopt execopt r =
 | 
				
			||||||
  maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $
 | 
					  maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $
 | 
				
			||||||
  makeRegexOptsM compopt execopt r
 | 
					  makeRegexOptsM compopt execopt r
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- Replace this regular expression with this replacement pattern in this
 | 
					-- Replace this regular expression with this replacement pattern in this
 | 
				
			||||||
-- string, or return an error message.
 | 
					-- string, or return an error message.
 | 
				
			||||||
replaceRegex_ :: Regex -> Replacement -> String -> Either Error String
 | 
					replaceRegex_ :: Regex -> Replacement -> String -> Either RegexError String
 | 
				
			||||||
replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String])
 | 
					replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String])
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
    -- Replace one match within the string with the replacement text
 | 
					    -- Replace one match within the string with the replacement text
 | 
				
			||||||
    -- appropriate for this match. Or return an error message.
 | 
					    -- appropriate for this match. Or return an error message.
 | 
				
			||||||
    replaceMatch_ :: Replacement -> String -> MatchText String -> Either Error String
 | 
					    replaceMatch_ :: Replacement -> String -> MatchText String -> Either RegexError String
 | 
				
			||||||
    replaceMatch_ replpat s matchgroups =
 | 
					    replaceMatch_ replpat s matchgroups =
 | 
				
			||||||
      erepl >>= \repl -> Right $ pre ++ repl ++ post
 | 
					      erepl >>= \repl -> Right $ pre ++ repl ++ post
 | 
				
			||||||
      where
 | 
					      where
 | 
				
			||||||
@ -228,7 +240,7 @@ replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s ::
 | 
				
			|||||||
          where
 | 
					          where
 | 
				
			||||||
            -- Given some match groups and a numeric backreference,
 | 
					            -- Given some match groups and a numeric backreference,
 | 
				
			||||||
            -- return the referenced group text, or an error message.
 | 
					            -- return the referenced group text, or an error message.
 | 
				
			||||||
            lookupMatchGroup_ :: MatchText String -> String -> Either Error String
 | 
					            lookupMatchGroup_ :: MatchText String -> String -> Either RegexError String
 | 
				
			||||||
            lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s = 
 | 
					            lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s = 
 | 
				
			||||||
              case read s of n | n `elem` indices grps -> Right $ fst (grps ! n)
 | 
					              case read s of n | n `elem` indices grps -> Right $ fst (grps ! n)
 | 
				
			||||||
                             _                         -> Left $ "no match group exists for backreference \"\\"++s++"\""
 | 
					                             _                         -> Left $ "no match group exists for backreference \"\\"++s++"\""
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user