diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index 1e4f7d163..5186b0622 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -655,7 +655,7 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f matcherMatches pats = patternMatches $ "(" ++ intercalate "|" pats ++ ")" where patternMatches :: Regexp -> Bool - patternMatches pat = regexMatchesCIRegexCompat pat csvline + patternMatches pat = regexMatchesCI pat csvline where csvline = intercalate "," record diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index 1e0cc2d91..2e919f961 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -81,7 +81,7 @@ detect :: FilePath -> String -> Bool detect f s | f /= "-" = takeExtension f `elem` ['.':format, ".j"] -- from a file: yes if the extension is .journal or .j -- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented) - | otherwise = isJust $ regexMatch "^[0-9]+.*\n[ \t]+" s + | otherwise = regexMatches "^[0-9]+.*\n[ \t]+" s -- | Parse and post-process a "Journal" from hledger's journal file -- format, or give an error. diff --git a/hledger-lib/Hledger/Utils/Regex.hs b/hledger-lib/Hledger/Utils/Regex.hs index bbb2fd39a..53cf993c2 100644 --- a/hledger-lib/Hledger/Utils/Regex.hs +++ b/hledger-lib/Hledger/Utils/Regex.hs @@ -1,11 +1,28 @@ --- Regular expression helpers. --- Currently using mostly regexpr and some regex-tdfa. --- Note many of these will die on malformed regexps. +{-# LANGUAGE ScopedTypeVariables #-} +{- + +Easy regular expression helpers. + +These should +- have mnemonic names +- have simple monomorphic types +- work with strings +- support extended regular expressions +- support replacement +- support splitting +- support unicode +- be cross-platform, not requiring C libraries + +They currently can +- die on malformed regexps +- be slow (regexpr) + +-} module Hledger.Utils.Regex ( - regexMatch - ,regexMatchCI - ,regexMatches + -- regexMatch + -- ,regexMatchCI + regexMatches ,regexMatchesCI ,containsRegex ,regexReplace @@ -13,47 +30,89 @@ module Hledger.Utils.Regex ( ,regexReplaceBy ,regexToCaseInsensitive ,regexSplit - ,regexMatchesRegexCompat - ,regexMatchesCIRegexCompat + ,toRegex ) where -import Data.Maybe -import Text.Regex.TDFA -import Text.RegexPR --- regexMatch :: String -> String -> MatchFun Maybe -regexMatch r s = matchRegexPR r s +import Data.List (foldl') +import Text.RegexPR (splitRegexPR) +import Text.Regex.TDFA ( + Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt, + makeRegexOpts, AllMatches(getAllMatches), match, (=~) + ) --- regexMatchCI :: String -> String -> MatchFun Maybe -regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s -regexMatches :: String -> String -> Bool -regexMatches r s = isJust $ matchRegexPR r s - -regexMatchesCI :: String -> String -> Bool -regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s +type Regexp = String +type Replacement = String +containsRegex :: Regexp -> String -> Bool containsRegex = regexMatchesCI -regexReplace :: String -> String -> String -> String -regexReplace r repl s = gsubRegexPR r repl s - -regexReplaceCI :: String -> String -> String -> String -regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s - -regexReplaceBy :: String -> (String -> String) -> String -> String -regexReplaceBy r replfn s = gsubRegexPRBy r replfn s - -regexToCaseInsensitive :: String -> String +regexToCaseInsensitive :: Regexp -> Regexp regexToCaseInsensitive r = "(?i)"++ r -regexSplit :: String -> String -> [String] +-- regexpr - may be slow + +regexSplit :: Regexp -> String -> [Regexp] regexSplit = splitRegexPR --- regex-compat (regex-posix) functions that perform better than regexpr. -regexMatchesRegexCompat :: String -> String -> Bool -regexMatchesRegexCompat = flip (=~) +-- regexMatch :: Regexp -> String -> MatchFun Maybe +-- regexMatch r s = matchRegexPR r s -regexMatchesCIRegexCompat :: String -> String -> Bool -regexMatchesCIRegexCompat r = match (makeRegexOpts defaultCompOpt { multiline = True, caseSensitive = False, newSyntax = True } defaultExecOpt r) +-- regexMatchCI :: Regexp -> String -> MatchFun Maybe +-- regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s + +-- regexMatches :: Regexp -> String -> Bool +-- regexMatches r s = isJust $ matchRegexPR r s + +-- regexMatchesCI :: Regexp -> String -> Bool +-- regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s + +-- regexReplace :: Regexp -> Replacement -> String -> String +-- regexReplace r repl s = gsubRegexPR r repl s + +-- regexReplaceCI :: Regexp -> Replacement -> String -> String +-- regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s + +-- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String +-- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s + +-- regex-tdfa + +compOpt :: CompOption +compOpt = defaultCompOpt + +execOpt :: ExecOption +execOpt = defaultExecOpt + +toRegex :: Regexp -> Regex +toRegex = makeRegexOpts compOpt execOpt + +-- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a +-- regexMatch' r s = s =~ (toRegex r) + +regexMatches :: Regexp -> String -> Bool +regexMatches = flip (=~) + +regexMatchesCI :: Regexp -> String -> Bool +regexMatchesCI r = match (makeRegexOpts compOpt{caseSensitive=False} execOpt r) + +regexReplace :: Regexp -> Replacement -> String -> String +regexReplace r repl = regexReplaceBy r (const repl) + +regexReplaceCI :: Regexp -> Replacement -> String -> String +regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s + +regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String +regexReplaceBy r = replaceAll (toRegex r) + +-- from http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries +replaceAll :: Regex -> (String -> Replacement) -> String -> String +replaceAll re f s = start end + where + (_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s + go (ind,read,write) (off,len) = + let (skip, start) = splitAt (off - ind) read + (matched, remaining) = splitAt len start + in (off + len, remaining, write . (skip++) . (f matched ++)) diff --git a/hledger/Hledger/Cli/Utils.hs b/hledger/Hledger/Cli/Utils.hs index 534ec580c..d939e743b 100644 --- a/hledger/Hledger/Cli/Utils.hs +++ b/hledger/Hledger/Cli/Utils.hs @@ -34,6 +34,8 @@ import System.Process (readProcessWithExitCode) import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff)) import Test.HUnit import Text.Printf +import Text.Regex.TDFA (match) + -- kludge - adapt to whichever directory version is installed, or when -- cabal macros aren't available, assume the new directory @@ -179,7 +181,8 @@ safeGetDirectoryContents "" = getDirectoryContents "." safeGetDirectoryContents fp = getDirectoryContents fp -- | Does the second file represent a backup of the first, and if so which version is it ? +-- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex backupNumber :: FilePath -> FilePath -> Maybe Int -backupNumber f g = case regexMatch ("^" ++ f ++ "\\.([0-9]+)$") g of - Just (_, ((_,suffix):_)) -> readMay suffix +backupNumber f g = case match (toRegex ("^" ++ f ++ "\\.([0-9]+)$")) g of + (_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext _ -> Nothing diff --git a/hledger/hledger.cabal b/hledger/hledger.cabal index 46c3ffd99..6096ba217 100644 --- a/hledger/hledger.cabal +++ b/hledger/hledger.cabal @@ -80,6 +80,7 @@ library ,old-time ,parsec ,process + ,regex-tdfa ,regexpr >= 0.5.1 ,safe >= 0.2 -- http://www.yesodweb.com/blog/2014/04/consolidation-progress @@ -149,6 +150,7 @@ executable hledger ,old-time ,parsec ,process + ,regex-tdfa ,regexpr >= 0.5.1 ,safe >= 0.2 ,shakespeare-text >= 1.0 && < 1.2 @@ -183,6 +185,7 @@ test-suite tests , old-time , parsec , process + , regex-tdfa , regexpr , safe , shakespeare-text >= 1.0 && < 1.2