account aliases (and regexReplace) now support backreferences
This commit is contained in:
parent
bd6322669c
commit
9fa22e0398
@ -1,7 +1,18 @@
|
|||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
{-|
|
{-|
|
||||||
|
|
||||||
Easy regular expression helpers, based on regex-tdfa and regexpr. These should
|
Easy regular expression helpers, based on regex-tdfa and (a little) on
|
||||||
|
regexpr. These should:
|
||||||
|
|
||||||
|
- be cross-platform, not requiring C libraries
|
||||||
|
|
||||||
|
- support unicode
|
||||||
|
|
||||||
|
- support extended regular expressions
|
||||||
|
|
||||||
|
- support replacement, with backreferences etc.
|
||||||
|
|
||||||
|
- support splitting
|
||||||
|
|
||||||
- have mnemonic names
|
- have mnemonic names
|
||||||
|
|
||||||
@ -9,82 +20,55 @@ Easy regular expression helpers, based on regex-tdfa and regexpr. These should
|
|||||||
|
|
||||||
- work with strings
|
- work with strings
|
||||||
|
|
||||||
- support extended regular expressions
|
Current limitations:
|
||||||
|
|
||||||
- support replacement
|
- (?i) and similar are not supported
|
||||||
|
|
||||||
- support splitting
|
|
||||||
|
|
||||||
- support unicode
|
|
||||||
|
|
||||||
- be cross-platform, not requiring C libraries
|
|
||||||
|
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Hledger.Utils.Regex (
|
module Hledger.Utils.Regex (
|
||||||
|
-- * type aliases
|
||||||
Regexp
|
Regexp
|
||||||
,Replacement
|
,Replacement
|
||||||
-- ,regexMatch
|
-- * based on regex-tdfa
|
||||||
-- ,regexMatchCI
|
|
||||||
,regexMatches
|
,regexMatches
|
||||||
,regexMatchesCI
|
,regexMatchesCI
|
||||||
,containsRegex
|
|
||||||
,regexReplace
|
,regexReplace
|
||||||
,regexReplaceCI
|
,regexReplaceCI
|
||||||
,regexReplaceBy
|
,regexReplaceBy
|
||||||
-- ,regexpToCI
|
,regexReplaceByCI
|
||||||
|
-- * based on regexpr
|
||||||
,regexSplit
|
,regexSplit
|
||||||
,toRegex
|
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
|
import Data.Array
|
||||||
|
import Data.Char
|
||||||
import Data.List (foldl')
|
import Data.List (foldl')
|
||||||
import Text.RegexPR (splitRegexPR)
|
import Text.RegexPR (splitRegexPR)
|
||||||
import Text.Regex.TDFA (
|
import Text.Regex.TDFA (
|
||||||
Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt,
|
Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt,
|
||||||
makeRegexOpts, AllMatches(getAllMatches), match, (=~)
|
makeRegexOpts, AllMatches(getAllMatches), match, (=~), MatchText
|
||||||
)
|
)
|
||||||
|
|
||||||
|
-- import Hledger.Utils.Debug
|
||||||
|
import Hledger.Utils.UTF8IOCompat (error')
|
||||||
|
|
||||||
|
|
||||||
|
-- | Regular expression. Extended regular expression-ish syntax ? But does not support eg (?i) syntax.
|
||||||
type Regexp = String
|
type Regexp = String
|
||||||
|
|
||||||
|
-- | A replacement pattern. May include backreferences (\N).
|
||||||
type Replacement = String
|
type Replacement = String
|
||||||
|
|
||||||
containsRegex :: Regexp -> String -> Bool
|
|
||||||
containsRegex = regexMatchesCI
|
|
||||||
|
|
||||||
-- regexpr - may be slow
|
|
||||||
|
|
||||||
regexSplit :: Regexp -> String -> [Regexp]
|
|
||||||
regexSplit = splitRegexPR
|
|
||||||
|
|
||||||
-- regexMatch :: Regexp -> String -> MatchFun Maybe
|
|
||||||
-- regexMatch r s = matchRegexPR r s
|
|
||||||
|
|
||||||
-- regexMatchCI :: Regexp -> String -> MatchFun Maybe
|
|
||||||
-- regexMatchCI r s = regexMatch (regexpToCI r) s
|
|
||||||
|
|
||||||
-- regexMatches :: Regexp -> String -> Bool
|
|
||||||
-- regexMatches r s = isJust $ matchRegexPR r s
|
|
||||||
|
|
||||||
-- regexMatchesCI :: Regexp -> String -> Bool
|
|
||||||
-- regexMatchesCI r s = regexMatches (regexpToCI r) s
|
|
||||||
|
|
||||||
-- regexReplace :: Regexp -> Replacement -> String -> String
|
|
||||||
-- regexReplace r repl s = gsubRegexPR r repl s
|
|
||||||
|
|
||||||
-- regexReplaceCI :: Regexp -> Replacement -> String -> String
|
|
||||||
-- regexReplaceCI r s = regexReplace (regexpToCI r) s
|
|
||||||
|
|
||||||
-- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
|
|
||||||
-- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s
|
|
||||||
|
|
||||||
-- regex-tdfa
|
|
||||||
|
|
||||||
-- | Convert our string-based regexps to real ones. Can fail if the
|
-- | Convert our string-based regexps to real ones. Can fail if the
|
||||||
-- string regexp is malformed.
|
-- string regexp is malformed.
|
||||||
toRegex :: Regexp -> Regex
|
toRegex :: Regexp -> Regex
|
||||||
toRegex = makeRegexOpts compOpt execOpt
|
toRegex = makeRegexOpts compOpt execOpt
|
||||||
|
|
||||||
|
toRegexCI :: Regexp -> Regex
|
||||||
|
toRegexCI = makeRegexOpts compOpt{caseSensitive=False} execOpt
|
||||||
|
|
||||||
compOpt :: CompOption
|
compOpt :: CompOption
|
||||||
compOpt = defaultCompOpt
|
compOpt = defaultCompOpt
|
||||||
|
|
||||||
@ -100,31 +84,48 @@ regexMatches = flip (=~)
|
|||||||
regexMatchesCI :: Regexp -> String -> Bool
|
regexMatchesCI :: Regexp -> String -> Bool
|
||||||
regexMatchesCI r = match (toRegexCI r)
|
regexMatchesCI r = match (toRegexCI r)
|
||||||
|
|
||||||
|
-- | Replace all occurrences of the regexp, transforming each match with the given function.
|
||||||
|
regexReplaceBy :: Regexp -> (String -> String) -> String -> String
|
||||||
|
regexReplaceBy r = replaceAllBy (toRegex r)
|
||||||
|
|
||||||
|
regexReplaceByCI :: Regexp -> (String -> String) -> String -> String
|
||||||
|
regexReplaceByCI r = replaceAllBy (toRegexCI r)
|
||||||
|
|
||||||
|
-- | Replace all occurrences of the regexp with the replacement pattern.
|
||||||
|
-- The replacement pattern supports \N backreferences but no other RE syntax.
|
||||||
regexReplace :: Regexp -> Replacement -> String -> String
|
regexReplace :: Regexp -> Replacement -> String -> String
|
||||||
regexReplace r repl = regexReplaceBy r (const repl)
|
regexReplace re = replaceRegex (toRegex re)
|
||||||
|
|
||||||
regexReplaceCI :: Regexp -> Replacement -> String -> String
|
regexReplaceCI :: Regexp -> Replacement -> String -> String
|
||||||
regexReplaceCI r repl = regexReplaceByCI r (const repl)
|
regexReplaceCI re = replaceRegex (toRegexCI re)
|
||||||
|
|
||||||
regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
|
--
|
||||||
regexReplaceBy r = replaceAll (toRegex r)
|
|
||||||
|
|
||||||
regexReplaceByCI :: Regexp -> (String -> Replacement) -> String -> String
|
replaceRegex :: Regex -> Replacement -> String -> String
|
||||||
regexReplaceByCI r = replaceAll (toRegexCI r)
|
replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [MatchText String])
|
||||||
|
|
||||||
toRegexCI :: Regexp -> Regex
|
replaceMatch :: Replacement -> String -> MatchText String -> String
|
||||||
toRegexCI = makeRegexOpts compOpt{caseSensitive=False} execOpt
|
replaceMatch replpat s matchgroups = pre ++ repl ++ post
|
||||||
|
where
|
||||||
|
((_,(off,len)):_) = elems matchgroups -- groups should have 0-based indexes, and there should always be at least one, since this is a match
|
||||||
|
(pre, post') = splitAt off s
|
||||||
|
post = drop len post'
|
||||||
|
repl = replaceAllBy (toRegex "\\\\[0-9]+") (replaceBackReference matchgroups) replpat
|
||||||
|
|
||||||
-- regexpToCI :: Regexp -> Regexp
|
replaceBackReference :: MatchText String -> String -> String
|
||||||
-- regexpToCI r = "(?i)"++ r
|
replaceBackReference grps ('\\':s@(_:_)) | all isDigit s =
|
||||||
|
case read s of n | n `elem` indices grps -> fst (grps ! n)
|
||||||
|
_ -> error' $ "no match group exists for backreference \"\\"++s++"\""
|
||||||
|
replaceBackReference _ s = error' $ "replaceBackReference called on non-backreference \""++s++"\", shouldn't happen"
|
||||||
|
|
||||||
-- from
|
--
|
||||||
-- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries
|
|
||||||
|
-- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries :
|
||||||
-- | Replace all occurrences of a regexp in a string using a replacer
|
-- | Replace all occurrences of a regexp in a string using a replacer
|
||||||
-- function, which receives the matched string as its argument.
|
-- function, which receives the matched string as its argument.
|
||||||
-- Does not support standard RE syntax such as \1.
|
-- Does not support backreferences or other RE syntax.
|
||||||
replaceAll :: Regex -> (String -> Replacement) -> String -> String
|
replaceAllBy :: Regex -> (String -> String) -> String -> String
|
||||||
replaceAll re f s = start end
|
replaceAllBy re f s = start end
|
||||||
where
|
where
|
||||||
(_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s
|
(_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s
|
||||||
go (ind,read,write) (off,len) =
|
go (ind,read,write) (off,len) =
|
||||||
@ -132,3 +133,8 @@ replaceAll re f s = start end
|
|||||||
(matched, remaining) = splitAt len start
|
(matched, remaining) = splitAt len start
|
||||||
in (off + len, remaining, write . (skip++) . (f matched ++))
|
in (off + len, remaining, write . (skip++) . (f matched ++))
|
||||||
|
|
||||||
|
-- uses regexpr, may be slow:
|
||||||
|
|
||||||
|
regexSplit :: Regexp -> String -> [Regexp]
|
||||||
|
regexSplit = splitRegexPR
|
||||||
|
|
||||||
|
|||||||
@ -82,6 +82,7 @@ library
|
|||||||
Hledger.Utils.UTF8IOCompat
|
Hledger.Utils.UTF8IOCompat
|
||||||
build-depends:
|
build-depends:
|
||||||
base >= 4.3 && < 5
|
base >= 4.3 && < 5
|
||||||
|
,array
|
||||||
,blaze-markup >= 0.5.1
|
,blaze-markup >= 0.5.1
|
||||||
,bytestring
|
,bytestring
|
||||||
,cmdargs >= 0.10 && < 0.11
|
,cmdargs >= 0.10 && < 0.11
|
||||||
@ -116,6 +117,7 @@ test-suite tests
|
|||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
build-depends: hledger-lib
|
build-depends: hledger-lib
|
||||||
, base >= 4.3 && < 5
|
, base >= 4.3 && < 5
|
||||||
|
, array
|
||||||
, blaze-markup >= 0.5.1
|
, blaze-markup >= 0.5.1
|
||||||
, cmdargs
|
, cmdargs
|
||||||
, containers
|
, containers
|
||||||
|
|||||||
@ -35,7 +35,7 @@ import System.Process (readProcessWithExitCode)
|
|||||||
import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff))
|
import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff))
|
||||||
import Test.HUnit
|
import Test.HUnit
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
import Text.Regex.TDFA (match)
|
import Text.Regex.TDFA ((=~))
|
||||||
|
|
||||||
|
|
||||||
-- kludge - adapt to whichever directory version is installed, or when
|
-- kludge - adapt to whichever directory version is installed, or when
|
||||||
@ -190,6 +190,6 @@ safeGetDirectoryContents fp = getDirectoryContents fp
|
|||||||
-- | Does the second file represent a backup of the first, and if so which version is it ?
|
-- | Does the second file represent a backup of the first, and if so which version is it ?
|
||||||
-- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex
|
-- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex
|
||||||
backupNumber :: FilePath -> FilePath -> Maybe Int
|
backupNumber :: FilePath -> FilePath -> Maybe Int
|
||||||
backupNumber f g = case match (toRegex ("^" ++ f ++ "\\.([0-9]+)$")) g of
|
backupNumber f g = case g =~ ("^" ++ f ++ "\\.([0-9]+)$") of
|
||||||
(_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext
|
(_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|||||||
@ -29,8 +29,8 @@ alias A (.)=\1
|
|||||||
c -3
|
c -3
|
||||||
|
|
||||||
2011/01/01
|
2011/01/01
|
||||||
\1 1
|
a 1
|
||||||
\1 2
|
a 2
|
||||||
c -3
|
c -3
|
||||||
|
|
||||||
>>>=0
|
>>>=0
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user