account aliases (and regexReplace) now support backreferences

This commit is contained in:
Simon Michael 2014-10-29 07:46:49 -07:00
parent bd6322669c
commit 9fa22e0398
4 changed files with 74 additions and 66 deletions

View File

@ -1,7 +1,18 @@
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-| {-|
Easy regular expression helpers, based on regex-tdfa and regexpr. These should Easy regular expression helpers, based on regex-tdfa and (a little) on
regexpr. These should:
- be cross-platform, not requiring C libraries
- support unicode
- support extended regular expressions
- support replacement, with backreferences etc.
- support splitting
- have mnemonic names - have mnemonic names
@ -9,82 +20,55 @@ Easy regular expression helpers, based on regex-tdfa and regexpr. These should
- work with strings - work with strings
- support extended regular expressions Current limitations:
- support replacement - (?i) and similar are not supported
- support splitting
- support unicode
- be cross-platform, not requiring C libraries
-} -}
module Hledger.Utils.Regex ( module Hledger.Utils.Regex (
-- * type aliases
Regexp Regexp
,Replacement ,Replacement
-- ,regexMatch -- * based on regex-tdfa
-- ,regexMatchCI
,regexMatches ,regexMatches
,regexMatchesCI ,regexMatchesCI
,containsRegex
,regexReplace ,regexReplace
,regexReplaceCI ,regexReplaceCI
,regexReplaceBy ,regexReplaceBy
-- ,regexpToCI ,regexReplaceByCI
-- * based on regexpr
,regexSplit ,regexSplit
,toRegex
) )
where where
import Data.Array
import Data.Char
import Data.List (foldl') import Data.List (foldl')
import Text.RegexPR (splitRegexPR) import Text.RegexPR (splitRegexPR)
import Text.Regex.TDFA ( import Text.Regex.TDFA (
Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt, Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt,
makeRegexOpts, AllMatches(getAllMatches), match, (=~) makeRegexOpts, AllMatches(getAllMatches), match, (=~), MatchText
) )
-- import Hledger.Utils.Debug
import Hledger.Utils.UTF8IOCompat (error')
-- | Regular expression. Extended regular expression-ish syntax ? But does not support eg (?i) syntax.
type Regexp = String type Regexp = String
-- | A replacement pattern. May include backreferences (\N).
type Replacement = String type Replacement = String
containsRegex :: Regexp -> String -> Bool
containsRegex = regexMatchesCI
-- regexpr - may be slow
regexSplit :: Regexp -> String -> [Regexp]
regexSplit = splitRegexPR
-- regexMatch :: Regexp -> String -> MatchFun Maybe
-- regexMatch r s = matchRegexPR r s
-- regexMatchCI :: Regexp -> String -> MatchFun Maybe
-- regexMatchCI r s = regexMatch (regexpToCI r) s
-- regexMatches :: Regexp -> String -> Bool
-- regexMatches r s = isJust $ matchRegexPR r s
-- regexMatchesCI :: Regexp -> String -> Bool
-- regexMatchesCI r s = regexMatches (regexpToCI r) s
-- regexReplace :: Regexp -> Replacement -> String -> String
-- regexReplace r repl s = gsubRegexPR r repl s
-- regexReplaceCI :: Regexp -> Replacement -> String -> String
-- regexReplaceCI r s = regexReplace (regexpToCI r) s
-- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
-- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s
-- regex-tdfa
-- | Convert our string-based regexps to real ones. Can fail if the -- | Convert our string-based regexps to real ones. Can fail if the
-- string regexp is malformed. -- string regexp is malformed.
toRegex :: Regexp -> Regex toRegex :: Regexp -> Regex
toRegex = makeRegexOpts compOpt execOpt toRegex = makeRegexOpts compOpt execOpt
toRegexCI :: Regexp -> Regex
toRegexCI = makeRegexOpts compOpt{caseSensitive=False} execOpt
compOpt :: CompOption compOpt :: CompOption
compOpt = defaultCompOpt compOpt = defaultCompOpt
@ -100,31 +84,48 @@ regexMatches = flip (=~)
regexMatchesCI :: Regexp -> String -> Bool regexMatchesCI :: Regexp -> String -> Bool
regexMatchesCI r = match (toRegexCI r) regexMatchesCI r = match (toRegexCI r)
-- | Replace all occurrences of the regexp, transforming each match with the given function.
regexReplaceBy :: Regexp -> (String -> String) -> String -> String
regexReplaceBy r = replaceAllBy (toRegex r)
regexReplaceByCI :: Regexp -> (String -> String) -> String -> String
regexReplaceByCI r = replaceAllBy (toRegexCI r)
-- | Replace all occurrences of the regexp with the replacement pattern.
-- The replacement pattern supports \N backreferences but no other RE syntax.
regexReplace :: Regexp -> Replacement -> String -> String regexReplace :: Regexp -> Replacement -> String -> String
regexReplace r repl = regexReplaceBy r (const repl) regexReplace re = replaceRegex (toRegex re)
regexReplaceCI :: Regexp -> Replacement -> String -> String regexReplaceCI :: Regexp -> Replacement -> String -> String
regexReplaceCI r repl = regexReplaceByCI r (const repl) regexReplaceCI re = replaceRegex (toRegexCI re)
regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String --
regexReplaceBy r = replaceAll (toRegex r)
regexReplaceByCI :: Regexp -> (String -> Replacement) -> String -> String replaceRegex :: Regex -> Replacement -> String -> String
regexReplaceByCI r = replaceAll (toRegexCI r) replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [MatchText String])
toRegexCI :: Regexp -> Regex replaceMatch :: Replacement -> String -> MatchText String -> String
toRegexCI = makeRegexOpts compOpt{caseSensitive=False} execOpt replaceMatch replpat s matchgroups = pre ++ repl ++ post
where
((_,(off,len)):_) = elems matchgroups -- groups should have 0-based indexes, and there should always be at least one, since this is a match
(pre, post') = splitAt off s
post = drop len post'
repl = replaceAllBy (toRegex "\\\\[0-9]+") (replaceBackReference matchgroups) replpat
-- regexpToCI :: Regexp -> Regexp replaceBackReference :: MatchText String -> String -> String
-- regexpToCI r = "(?i)"++ r replaceBackReference grps ('\\':s@(_:_)) | all isDigit s =
case read s of n | n `elem` indices grps -> fst (grps ! n)
_ -> error' $ "no match group exists for backreference \"\\"++s++"\""
replaceBackReference _ s = error' $ "replaceBackReference called on non-backreference \""++s++"\", shouldn't happen"
-- from --
-- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries
-- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries :
-- | Replace all occurrences of a regexp in a string using a replacer -- | Replace all occurrences of a regexp in a string using a replacer
-- function, which receives the matched string as its argument. -- function, which receives the matched string as its argument.
-- Does not support standard RE syntax such as \1. -- Does not support backreferences or other RE syntax.
replaceAll :: Regex -> (String -> Replacement) -> String -> String replaceAllBy :: Regex -> (String -> String) -> String -> String
replaceAll re f s = start end replaceAllBy re f s = start end
where where
(_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s (_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s
go (ind,read,write) (off,len) = go (ind,read,write) (off,len) =
@ -132,3 +133,8 @@ replaceAll re f s = start end
(matched, remaining) = splitAt len start (matched, remaining) = splitAt len start
in (off + len, remaining, write . (skip++) . (f matched ++)) in (off + len, remaining, write . (skip++) . (f matched ++))
-- uses regexpr, may be slow:
regexSplit :: Regexp -> String -> [Regexp]
regexSplit = splitRegexPR

View File

@ -82,6 +82,7 @@ library
Hledger.Utils.UTF8IOCompat Hledger.Utils.UTF8IOCompat
build-depends: build-depends:
base >= 4.3 && < 5 base >= 4.3 && < 5
,array
,blaze-markup >= 0.5.1 ,blaze-markup >= 0.5.1
,bytestring ,bytestring
,cmdargs >= 0.10 && < 0.11 ,cmdargs >= 0.10 && < 0.11
@ -116,6 +117,7 @@ test-suite tests
default-language: Haskell2010 default-language: Haskell2010
build-depends: hledger-lib build-depends: hledger-lib
, base >= 4.3 && < 5 , base >= 4.3 && < 5
, array
, blaze-markup >= 0.5.1 , blaze-markup >= 0.5.1
, cmdargs , cmdargs
, containers , containers

View File

@ -35,7 +35,7 @@ import System.Process (readProcessWithExitCode)
import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff)) import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff))
import Test.HUnit import Test.HUnit
import Text.Printf import Text.Printf
import Text.Regex.TDFA (match) import Text.Regex.TDFA ((=~))
-- kludge - adapt to whichever directory version is installed, or when -- kludge - adapt to whichever directory version is installed, or when
@ -190,6 +190,6 @@ safeGetDirectoryContents fp = getDirectoryContents fp
-- | Does the second file represent a backup of the first, and if so which version is it ? -- | Does the second file represent a backup of the first, and if so which version is it ?
-- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex -- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex
backupNumber :: FilePath -> FilePath -> Maybe Int backupNumber :: FilePath -> FilePath -> Maybe Int
backupNumber f g = case match (toRegex ("^" ++ f ++ "\\.([0-9]+)$")) g of backupNumber f g = case g =~ ("^" ++ f ++ "\\.([0-9]+)$") of
(_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext (_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext
_ -> Nothing _ -> Nothing

View File

@ -29,9 +29,9 @@ alias A (.)=\1
c -3 c -3
2011/01/01 2011/01/01
\1 1 a 1
\1 2 a 2
c -3 c -3
>>>=0 >>>=0