135 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			135 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
{-# LANGUAGE ScopedTypeVariables #-}
 | 
						|
{-|
 | 
						|
 | 
						|
Easy regular expression helpers, based on regex-tdfa and regexpr. These should
 | 
						|
 | 
						|
- have mnemonic names
 | 
						|
 | 
						|
- have simple monomorphic types
 | 
						|
 | 
						|
- work with strings
 | 
						|
 | 
						|
- support extended regular expressions
 | 
						|
 | 
						|
- support replacement
 | 
						|
 | 
						|
- support splitting
 | 
						|
 | 
						|
- support unicode
 | 
						|
 | 
						|
- be cross-platform, not requiring C libraries
 | 
						|
 | 
						|
-}
 | 
						|
 | 
						|
module Hledger.Utils.Regex (
 | 
						|
   Regexp
 | 
						|
  ,Replacement
 | 
						|
  -- ,regexMatch
 | 
						|
  -- ,regexMatchCI
 | 
						|
  ,regexMatches
 | 
						|
  ,regexMatchesCI
 | 
						|
  ,containsRegex
 | 
						|
  ,regexReplace
 | 
						|
  ,regexReplaceCI
 | 
						|
  ,regexReplaceBy
 | 
						|
  -- ,regexpToCI
 | 
						|
  ,regexSplit
 | 
						|
  ,toRegex
 | 
						|
  )
 | 
						|
where
 | 
						|
 | 
						|
import Data.List (foldl')
 | 
						|
import Text.RegexPR (splitRegexPR)
 | 
						|
import Text.Regex.TDFA (
 | 
						|
  Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt,
 | 
						|
  makeRegexOpts, AllMatches(getAllMatches), match, (=~)
 | 
						|
  )
 | 
						|
 | 
						|
 | 
						|
type Regexp = String
 | 
						|
type Replacement = String
 | 
						|
 | 
						|
containsRegex :: Regexp -> String -> Bool
 | 
						|
containsRegex = regexMatchesCI
 | 
						|
 | 
						|
-- regexpr - may be slow
 | 
						|
 | 
						|
regexSplit :: Regexp -> String -> [Regexp]
 | 
						|
regexSplit = splitRegexPR
 | 
						|
 | 
						|
-- regexMatch :: Regexp -> String -> MatchFun Maybe
 | 
						|
-- regexMatch r s = matchRegexPR r s
 | 
						|
 | 
						|
-- regexMatchCI :: Regexp -> String -> MatchFun Maybe
 | 
						|
-- regexMatchCI r s = regexMatch (regexpToCI r) s
 | 
						|
 | 
						|
-- regexMatches :: Regexp -> String -> Bool
 | 
						|
-- regexMatches r s = isJust $ matchRegexPR r s
 | 
						|
 | 
						|
-- regexMatchesCI :: Regexp -> String -> Bool
 | 
						|
-- regexMatchesCI r s = regexMatches (regexpToCI r) s
 | 
						|
 | 
						|
-- regexReplace :: Regexp -> Replacement -> String -> String
 | 
						|
-- regexReplace r repl s = gsubRegexPR r repl s
 | 
						|
 | 
						|
-- regexReplaceCI :: Regexp -> Replacement -> String -> String
 | 
						|
-- regexReplaceCI r s = regexReplace (regexpToCI r) s
 | 
						|
 | 
						|
-- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
 | 
						|
-- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s
 | 
						|
 | 
						|
-- regex-tdfa
 | 
						|
 | 
						|
-- | Convert our string-based regexps to real ones. Can fail if the
 | 
						|
-- string regexp is malformed.
 | 
						|
toRegex :: Regexp -> Regex
 | 
						|
toRegex = makeRegexOpts compOpt execOpt
 | 
						|
 | 
						|
compOpt :: CompOption
 | 
						|
compOpt = defaultCompOpt
 | 
						|
 | 
						|
execOpt :: ExecOption
 | 
						|
execOpt = defaultExecOpt
 | 
						|
 | 
						|
-- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a
 | 
						|
-- regexMatch' r s = s =~ (toRegex r)
 | 
						|
 | 
						|
regexMatches :: Regexp -> String -> Bool
 | 
						|
regexMatches = flip (=~)
 | 
						|
 | 
						|
regexMatchesCI :: Regexp -> String -> Bool
 | 
						|
regexMatchesCI r = match (toRegexCI r)
 | 
						|
 | 
						|
regexReplace :: Regexp -> Replacement -> String -> String
 | 
						|
regexReplace r repl = regexReplaceBy r (const repl)
 | 
						|
 | 
						|
regexReplaceCI :: Regexp -> Replacement -> String -> String
 | 
						|
regexReplaceCI r repl = regexReplaceByCI r (const repl)
 | 
						|
 | 
						|
regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
 | 
						|
regexReplaceBy r = replaceAll (toRegex r)
 | 
						|
 | 
						|
regexReplaceByCI :: Regexp -> (String -> Replacement) -> String -> String
 | 
						|
regexReplaceByCI r = replaceAll (toRegexCI r)
 | 
						|
 | 
						|
toRegexCI :: Regexp -> Regex
 | 
						|
toRegexCI = makeRegexOpts compOpt{caseSensitive=False} execOpt
 | 
						|
 | 
						|
-- regexpToCI :: Regexp -> Regexp
 | 
						|
-- regexpToCI r = "(?i)"++ r
 | 
						|
 | 
						|
-- from
 | 
						|
-- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries
 | 
						|
-- | Replace all occurrences of a regexp in a string using a replacer
 | 
						|
-- function, which receives the matched string as its argument.
 | 
						|
-- Does not support standard RE syntax such as \1.
 | 
						|
replaceAll :: Regex -> (String -> Replacement) -> String -> String
 | 
						|
replaceAll re f s = start end
 | 
						|
  where
 | 
						|
    (_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s
 | 
						|
    go (ind,read,write) (off,len) =
 | 
						|
      let (skip, start) = splitAt (off - ind) read
 | 
						|
          (matched, remaining) = splitAt len start
 | 
						|
      in (off + len, remaining, write . (skip++) . (f matched ++))
 | 
						|
 |