account aliases (and regexReplace) now support backreferences
This commit is contained in:
		
							parent
							
								
									bd6322669c
								
							
						
					
					
						commit
						9fa22e0398
					
				| @ -1,7 +1,18 @@ | |||||||
| {-# LANGUAGE ScopedTypeVariables #-} | {-# LANGUAGE ScopedTypeVariables #-} | ||||||
| {-| | {-| | ||||||
| 
 | 
 | ||||||
| Easy regular expression helpers, based on regex-tdfa and regexpr. These should | Easy regular expression helpers, based on regex-tdfa and (a little) on | ||||||
|  | regexpr. These should: | ||||||
|  | 
 | ||||||
|  | - be cross-platform, not requiring C libraries | ||||||
|  | 
 | ||||||
|  | - support unicode | ||||||
|  | 
 | ||||||
|  | - support extended regular expressions | ||||||
|  | 
 | ||||||
|  | - support replacement, with backreferences etc. | ||||||
|  | 
 | ||||||
|  | - support splitting | ||||||
| 
 | 
 | ||||||
| - have mnemonic names | - have mnemonic names | ||||||
| 
 | 
 | ||||||
| @ -9,82 +20,55 @@ Easy regular expression helpers, based on regex-tdfa and regexpr. These should | |||||||
| 
 | 
 | ||||||
| - work with strings | - work with strings | ||||||
| 
 | 
 | ||||||
| - support extended regular expressions | Current limitations: | ||||||
| 
 | 
 | ||||||
| - support replacement | - (?i) and similar are not supported | ||||||
| 
 |  | ||||||
| - support splitting |  | ||||||
| 
 |  | ||||||
| - support unicode |  | ||||||
| 
 |  | ||||||
| - be cross-platform, not requiring C libraries |  | ||||||
| 
 | 
 | ||||||
| -} | -} | ||||||
| 
 | 
 | ||||||
| module Hledger.Utils.Regex ( | module Hledger.Utils.Regex ( | ||||||
|  |    -- * type aliases | ||||||
|    Regexp |    Regexp | ||||||
|   ,Replacement |   ,Replacement | ||||||
|   -- ,regexMatch |    -- * based on regex-tdfa | ||||||
|   -- ,regexMatchCI |  | ||||||
|   ,regexMatches |   ,regexMatches | ||||||
|   ,regexMatchesCI |   ,regexMatchesCI | ||||||
|   ,containsRegex |  | ||||||
|   ,regexReplace |   ,regexReplace | ||||||
|   ,regexReplaceCI |   ,regexReplaceCI | ||||||
|   ,regexReplaceBy |   ,regexReplaceBy | ||||||
|   -- ,regexpToCI |   ,regexReplaceByCI | ||||||
|  |    -- * based on regexpr | ||||||
|   ,regexSplit |   ,regexSplit | ||||||
|   ,toRegex |  | ||||||
|   ) |   ) | ||||||
| where | where | ||||||
| 
 | 
 | ||||||
|  | import Data.Array | ||||||
|  | import Data.Char | ||||||
| import Data.List (foldl') | import Data.List (foldl') | ||||||
| import Text.RegexPR (splitRegexPR) | import Text.RegexPR (splitRegexPR) | ||||||
| import Text.Regex.TDFA ( | import Text.Regex.TDFA ( | ||||||
|   Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt, |   Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt, | ||||||
|   makeRegexOpts, AllMatches(getAllMatches), match, (=~) |   makeRegexOpts, AllMatches(getAllMatches), match, (=~), MatchText | ||||||
|   ) |   ) | ||||||
| 
 | 
 | ||||||
|  | -- import Hledger.Utils.Debug | ||||||
|  | import Hledger.Utils.UTF8IOCompat (error') | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | -- | Regular expression. Extended regular expression-ish syntax ? But does not support eg (?i) syntax. | ||||||
| type Regexp = String | type Regexp = String | ||||||
|  | 
 | ||||||
|  | -- | A replacement pattern. May include backreferences (\N). | ||||||
| type Replacement = String | type Replacement = String | ||||||
| 
 | 
 | ||||||
| containsRegex :: Regexp -> String -> Bool |  | ||||||
| containsRegex = regexMatchesCI |  | ||||||
| 
 |  | ||||||
| -- regexpr - may be slow |  | ||||||
| 
 |  | ||||||
| regexSplit :: Regexp -> String -> [Regexp] |  | ||||||
| regexSplit = splitRegexPR |  | ||||||
| 
 |  | ||||||
| -- regexMatch :: Regexp -> String -> MatchFun Maybe |  | ||||||
| -- regexMatch r s = matchRegexPR r s |  | ||||||
| 
 |  | ||||||
| -- regexMatchCI :: Regexp -> String -> MatchFun Maybe |  | ||||||
| -- regexMatchCI r s = regexMatch (regexpToCI r) s |  | ||||||
| 
 |  | ||||||
| -- regexMatches :: Regexp -> String -> Bool |  | ||||||
| -- regexMatches r s = isJust $ matchRegexPR r s |  | ||||||
| 
 |  | ||||||
| -- regexMatchesCI :: Regexp -> String -> Bool |  | ||||||
| -- regexMatchesCI r s = regexMatches (regexpToCI r) s |  | ||||||
| 
 |  | ||||||
| -- regexReplace :: Regexp -> Replacement -> String -> String |  | ||||||
| -- regexReplace r repl s = gsubRegexPR r repl s |  | ||||||
| 
 |  | ||||||
| -- regexReplaceCI :: Regexp -> Replacement -> String -> String |  | ||||||
| -- regexReplaceCI r s = regexReplace (regexpToCI r) s |  | ||||||
| 
 |  | ||||||
| -- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String |  | ||||||
| -- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s |  | ||||||
| 
 |  | ||||||
| -- regex-tdfa |  | ||||||
| 
 |  | ||||||
| -- | Convert our string-based regexps to real ones. Can fail if the | -- | Convert our string-based regexps to real ones. Can fail if the | ||||||
| -- string regexp is malformed. | -- string regexp is malformed. | ||||||
| toRegex :: Regexp -> Regex | toRegex :: Regexp -> Regex | ||||||
| toRegex = makeRegexOpts compOpt execOpt | toRegex = makeRegexOpts compOpt execOpt | ||||||
| 
 | 
 | ||||||
|  | toRegexCI :: Regexp -> Regex | ||||||
|  | toRegexCI = makeRegexOpts compOpt{caseSensitive=False} execOpt | ||||||
|  | 
 | ||||||
| compOpt :: CompOption | compOpt :: CompOption | ||||||
| compOpt = defaultCompOpt | compOpt = defaultCompOpt | ||||||
| 
 | 
 | ||||||
| @ -100,31 +84,48 @@ regexMatches = flip (=~) | |||||||
| regexMatchesCI :: Regexp -> String -> Bool | regexMatchesCI :: Regexp -> String -> Bool | ||||||
| regexMatchesCI r = match (toRegexCI r) | regexMatchesCI r = match (toRegexCI r) | ||||||
| 
 | 
 | ||||||
|  | -- | Replace all occurrences of the regexp, transforming each match with the given function. | ||||||
|  | regexReplaceBy :: Regexp -> (String -> String) -> String -> String | ||||||
|  | regexReplaceBy r = replaceAllBy (toRegex r) | ||||||
|  | 
 | ||||||
|  | regexReplaceByCI :: Regexp -> (String -> String) -> String -> String | ||||||
|  | regexReplaceByCI r = replaceAllBy (toRegexCI r) | ||||||
|  | 
 | ||||||
|  | -- | Replace all occurrences of the regexp with the replacement pattern. | ||||||
|  | -- The replacement pattern supports \N backreferences but no other RE syntax. | ||||||
| regexReplace :: Regexp -> Replacement -> String -> String | regexReplace :: Regexp -> Replacement -> String -> String | ||||||
| regexReplace r repl = regexReplaceBy r (const repl) | regexReplace re = replaceRegex (toRegex re) | ||||||
| 
 | 
 | ||||||
| regexReplaceCI :: Regexp -> Replacement -> String -> String | regexReplaceCI :: Regexp -> Replacement -> String -> String | ||||||
| regexReplaceCI r repl = regexReplaceByCI r (const repl) | regexReplaceCI re = replaceRegex (toRegexCI re) | ||||||
| 
 | 
 | ||||||
| regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String | -- | ||||||
| regexReplaceBy r = replaceAll (toRegex r) |  | ||||||
| 
 | 
 | ||||||
| regexReplaceByCI :: Regexp -> (String -> Replacement) -> String -> String | replaceRegex :: Regex -> Replacement -> String -> String | ||||||
| regexReplaceByCI r = replaceAll (toRegexCI r) | replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [MatchText String]) | ||||||
| 
 | 
 | ||||||
| toRegexCI :: Regexp -> Regex | replaceMatch :: Replacement -> String -> MatchText String -> String | ||||||
| toRegexCI = makeRegexOpts compOpt{caseSensitive=False} execOpt | replaceMatch replpat s matchgroups = pre ++ repl ++ post | ||||||
|  |   where | ||||||
|  |     ((_,(off,len)):_) = elems matchgroups  -- groups should have 0-based indexes, and there should always be at least one, since this is a match | ||||||
|  |     (pre, post') = splitAt off s | ||||||
|  |     post = drop len post' | ||||||
|  |     repl = replaceAllBy (toRegex "\\\\[0-9]+") (replaceBackReference matchgroups) replpat | ||||||
| 
 | 
 | ||||||
| -- regexpToCI :: Regexp -> Regexp | replaceBackReference :: MatchText String -> String -> String | ||||||
| -- regexpToCI r = "(?i)"++ r | replaceBackReference grps ('\\':s@(_:_)) | all isDigit s = | ||||||
|  |   case read s of n | n `elem` indices grps -> fst (grps ! n) | ||||||
|  |                  _                         -> error' $ "no match group exists for backreference \"\\"++s++"\"" | ||||||
|  | replaceBackReference _ s = error' $ "replaceBackReference called on non-backreference \""++s++"\", shouldn't happen" | ||||||
| 
 | 
 | ||||||
| -- from | -- | ||||||
| -- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries | 
 | ||||||
|  | -- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries : | ||||||
| -- | Replace all occurrences of a regexp in a string using a replacer | -- | Replace all occurrences of a regexp in a string using a replacer | ||||||
| -- function, which receives the matched string as its argument. | -- function, which receives the matched string as its argument. | ||||||
| -- Does not support standard RE syntax such as \1. | -- Does not support backreferences or other RE syntax. | ||||||
| replaceAll :: Regex -> (String -> Replacement) -> String -> String | replaceAllBy :: Regex -> (String -> String) -> String -> String | ||||||
| replaceAll re f s = start end | replaceAllBy re f s = start end | ||||||
|   where |   where | ||||||
|     (_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s |     (_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s | ||||||
|     go (ind,read,write) (off,len) = |     go (ind,read,write) (off,len) = | ||||||
| @ -132,3 +133,8 @@ replaceAll re f s = start end | |||||||
|           (matched, remaining) = splitAt len start |           (matched, remaining) = splitAt len start | ||||||
|       in (off + len, remaining, write . (skip++) . (f matched ++)) |       in (off + len, remaining, write . (skip++) . (f matched ++)) | ||||||
| 
 | 
 | ||||||
|  | -- uses regexpr, may be slow: | ||||||
|  | 
 | ||||||
|  | regexSplit :: Regexp -> String -> [Regexp] | ||||||
|  | regexSplit = splitRegexPR | ||||||
|  | 
 | ||||||
|  | |||||||
| @ -82,6 +82,7 @@ library | |||||||
|                   Hledger.Utils.UTF8IOCompat |                   Hledger.Utils.UTF8IOCompat | ||||||
|   build-depends: |   build-depends: | ||||||
|                   base >= 4.3 && < 5 |                   base >= 4.3 && < 5 | ||||||
|  |                  ,array | ||||||
|                  ,blaze-markup >= 0.5.1 |                  ,blaze-markup >= 0.5.1 | ||||||
|                  ,bytestring |                  ,bytestring | ||||||
|                  ,cmdargs >= 0.10 && < 0.11 |                  ,cmdargs >= 0.10 && < 0.11 | ||||||
| @ -116,6 +117,7 @@ test-suite tests | |||||||
|   default-language: Haskell2010 |   default-language: Haskell2010 | ||||||
|   build-depends: hledger-lib |   build-depends: hledger-lib | ||||||
|                , base >= 4.3 && < 5 |                , base >= 4.3 && < 5 | ||||||
|  |                , array | ||||||
|                , blaze-markup >= 0.5.1 |                , blaze-markup >= 0.5.1 | ||||||
|                , cmdargs |                , cmdargs | ||||||
|                , containers |                , containers | ||||||
|  | |||||||
| @ -35,7 +35,7 @@ import System.Process (readProcessWithExitCode) | |||||||
| import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff)) | import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff)) | ||||||
| import Test.HUnit | import Test.HUnit | ||||||
| import Text.Printf | import Text.Printf | ||||||
| import Text.Regex.TDFA (match) | import Text.Regex.TDFA ((=~)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| -- kludge - adapt to whichever directory version is installed, or when | -- kludge - adapt to whichever directory version is installed, or when | ||||||
| @ -190,6 +190,6 @@ safeGetDirectoryContents fp = getDirectoryContents fp | |||||||
| -- | Does the second file represent a backup of the first, and if so which version is it ? | -- | Does the second file represent a backup of the first, and if so which version is it ? | ||||||
| -- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex | -- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex | ||||||
| backupNumber :: FilePath -> FilePath -> Maybe Int | backupNumber :: FilePath -> FilePath -> Maybe Int | ||||||
| backupNumber f g = case match (toRegex ("^" ++ f ++ "\\.([0-9]+)$")) g of | backupNumber f g = case g =~ ("^" ++ f ++ "\\.([0-9]+)$") of | ||||||
|                         (_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext |                         (_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext | ||||||
|                         _ -> Nothing |                         _ -> Nothing | ||||||
|  | |||||||
| @ -29,9 +29,9 @@ alias A (.)=\1 | |||||||
|     c              -3 |     c              -3 | ||||||
| 
 | 
 | ||||||
| 2011/01/01 | 2011/01/01 | ||||||
|     \1             1 |     a             1 | ||||||
|     \1             2 |     a             2 | ||||||
|     c             -3 |     c            -3 | ||||||
| 
 | 
 | ||||||
| >>>=0 | >>>=0 | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user