mostly replace slow regexpr with regex-tdfa (fixes #189)
This commit is contained in:
parent
4c509ff933
commit
3a16e6cfc7
@ -655,7 +655,7 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f
|
||||
matcherMatches pats = patternMatches $ "(" ++ intercalate "|" pats ++ ")"
|
||||
where
|
||||
patternMatches :: Regexp -> Bool
|
||||
patternMatches pat = regexMatchesCIRegexCompat pat csvline
|
||||
patternMatches pat = regexMatchesCI pat csvline
|
||||
where
|
||||
csvline = intercalate "," record
|
||||
|
||||
|
||||
@ -81,7 +81,7 @@ detect :: FilePath -> String -> Bool
|
||||
detect f s
|
||||
| f /= "-" = takeExtension f `elem` ['.':format, ".j"] -- from a file: yes if the extension is .journal or .j
|
||||
-- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented)
|
||||
| otherwise = isJust $ regexMatch "^[0-9]+.*\n[ \t]+" s
|
||||
| otherwise = regexMatches "^[0-9]+.*\n[ \t]+" s
|
||||
|
||||
-- | Parse and post-process a "Journal" from hledger's journal file
|
||||
-- format, or give an error.
|
||||
|
||||
@ -1,11 +1,28 @@
|
||||
-- Regular expression helpers.
|
||||
-- Currently using mostly regexpr and some regex-tdfa.
|
||||
-- Note many of these will die on malformed regexps.
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-
|
||||
|
||||
Easy regular expression helpers.
|
||||
|
||||
These should
|
||||
- have mnemonic names
|
||||
- have simple monomorphic types
|
||||
- work with strings
|
||||
- support extended regular expressions
|
||||
- support replacement
|
||||
- support splitting
|
||||
- support unicode
|
||||
- be cross-platform, not requiring C libraries
|
||||
|
||||
They currently can
|
||||
- die on malformed regexps
|
||||
- be slow (regexpr)
|
||||
|
||||
-}
|
||||
|
||||
module Hledger.Utils.Regex (
|
||||
regexMatch
|
||||
,regexMatchCI
|
||||
,regexMatches
|
||||
-- regexMatch
|
||||
-- ,regexMatchCI
|
||||
regexMatches
|
||||
,regexMatchesCI
|
||||
,containsRegex
|
||||
,regexReplace
|
||||
@ -13,47 +30,89 @@ module Hledger.Utils.Regex (
|
||||
,regexReplaceBy
|
||||
,regexToCaseInsensitive
|
||||
,regexSplit
|
||||
,regexMatchesRegexCompat
|
||||
,regexMatchesCIRegexCompat
|
||||
,toRegex
|
||||
)
|
||||
where
|
||||
import Data.Maybe
|
||||
import Text.Regex.TDFA
|
||||
import Text.RegexPR
|
||||
|
||||
-- regexMatch :: String -> String -> MatchFun Maybe
|
||||
regexMatch r s = matchRegexPR r s
|
||||
import Data.List (foldl')
|
||||
import Text.RegexPR (splitRegexPR)
|
||||
import Text.Regex.TDFA (
|
||||
Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt,
|
||||
makeRegexOpts, AllMatches(getAllMatches), match, (=~)
|
||||
)
|
||||
|
||||
-- regexMatchCI :: String -> String -> MatchFun Maybe
|
||||
regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s
|
||||
|
||||
regexMatches :: String -> String -> Bool
|
||||
regexMatches r s = isJust $ matchRegexPR r s
|
||||
|
||||
regexMatchesCI :: String -> String -> Bool
|
||||
regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s
|
||||
type Regexp = String
|
||||
type Replacement = String
|
||||
|
||||
containsRegex :: Regexp -> String -> Bool
|
||||
containsRegex = regexMatchesCI
|
||||
|
||||
regexReplace :: String -> String -> String -> String
|
||||
regexReplace r repl s = gsubRegexPR r repl s
|
||||
|
||||
regexReplaceCI :: String -> String -> String -> String
|
||||
regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s
|
||||
|
||||
regexReplaceBy :: String -> (String -> String) -> String -> String
|
||||
regexReplaceBy r replfn s = gsubRegexPRBy r replfn s
|
||||
|
||||
regexToCaseInsensitive :: String -> String
|
||||
regexToCaseInsensitive :: Regexp -> Regexp
|
||||
regexToCaseInsensitive r = "(?i)"++ r
|
||||
|
||||
regexSplit :: String -> String -> [String]
|
||||
-- regexpr - may be slow
|
||||
|
||||
regexSplit :: Regexp -> String -> [Regexp]
|
||||
regexSplit = splitRegexPR
|
||||
|
||||
-- regex-compat (regex-posix) functions that perform better than regexpr.
|
||||
regexMatchesRegexCompat :: String -> String -> Bool
|
||||
regexMatchesRegexCompat = flip (=~)
|
||||
-- regexMatch :: Regexp -> String -> MatchFun Maybe
|
||||
-- regexMatch r s = matchRegexPR r s
|
||||
|
||||
regexMatchesCIRegexCompat :: String -> String -> Bool
|
||||
regexMatchesCIRegexCompat r = match (makeRegexOpts defaultCompOpt { multiline = True, caseSensitive = False, newSyntax = True } defaultExecOpt r)
|
||||
-- regexMatchCI :: Regexp -> String -> MatchFun Maybe
|
||||
-- regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s
|
||||
|
||||
-- regexMatches :: Regexp -> String -> Bool
|
||||
-- regexMatches r s = isJust $ matchRegexPR r s
|
||||
|
||||
-- regexMatchesCI :: Regexp -> String -> Bool
|
||||
-- regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s
|
||||
|
||||
-- regexReplace :: Regexp -> Replacement -> String -> String
|
||||
-- regexReplace r repl s = gsubRegexPR r repl s
|
||||
|
||||
-- regexReplaceCI :: Regexp -> Replacement -> String -> String
|
||||
-- regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s
|
||||
|
||||
-- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
|
||||
-- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s
|
||||
|
||||
-- regex-tdfa
|
||||
|
||||
compOpt :: CompOption
|
||||
compOpt = defaultCompOpt
|
||||
|
||||
execOpt :: ExecOption
|
||||
execOpt = defaultExecOpt
|
||||
|
||||
toRegex :: Regexp -> Regex
|
||||
toRegex = makeRegexOpts compOpt execOpt
|
||||
|
||||
-- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a
|
||||
-- regexMatch' r s = s =~ (toRegex r)
|
||||
|
||||
regexMatches :: Regexp -> String -> Bool
|
||||
regexMatches = flip (=~)
|
||||
|
||||
regexMatchesCI :: Regexp -> String -> Bool
|
||||
regexMatchesCI r = match (makeRegexOpts compOpt{caseSensitive=False} execOpt r)
|
||||
|
||||
regexReplace :: Regexp -> Replacement -> String -> String
|
||||
regexReplace r repl = regexReplaceBy r (const repl)
|
||||
|
||||
regexReplaceCI :: Regexp -> Replacement -> String -> String
|
||||
regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s
|
||||
|
||||
regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
|
||||
regexReplaceBy r = replaceAll (toRegex r)
|
||||
|
||||
-- from http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries
|
||||
replaceAll :: Regex -> (String -> Replacement) -> String -> String
|
||||
replaceAll re f s = start end
|
||||
where
|
||||
(_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s
|
||||
go (ind,read,write) (off,len) =
|
||||
let (skip, start) = splitAt (off - ind) read
|
||||
(matched, remaining) = splitAt len start
|
||||
in (off + len, remaining, write . (skip++) . (f matched ++))
|
||||
|
||||
|
||||
@ -34,6 +34,8 @@ import System.Process (readProcessWithExitCode)
|
||||
import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff))
|
||||
import Test.HUnit
|
||||
import Text.Printf
|
||||
import Text.Regex.TDFA (match)
|
||||
|
||||
|
||||
-- kludge - adapt to whichever directory version is installed, or when
|
||||
-- cabal macros aren't available, assume the new directory
|
||||
@ -179,7 +181,8 @@ safeGetDirectoryContents "" = getDirectoryContents "."
|
||||
safeGetDirectoryContents fp = getDirectoryContents fp
|
||||
|
||||
-- | Does the second file represent a backup of the first, and if so which version is it ?
|
||||
-- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex
|
||||
backupNumber :: FilePath -> FilePath -> Maybe Int
|
||||
backupNumber f g = case regexMatch ("^" ++ f ++ "\\.([0-9]+)$") g of
|
||||
Just (_, ((_,suffix):_)) -> readMay suffix
|
||||
backupNumber f g = case match (toRegex ("^" ++ f ++ "\\.([0-9]+)$")) g of
|
||||
(_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext
|
||||
_ -> Nothing
|
||||
|
||||
@ -80,6 +80,7 @@ library
|
||||
,old-time
|
||||
,parsec
|
||||
,process
|
||||
,regex-tdfa
|
||||
,regexpr >= 0.5.1
|
||||
,safe >= 0.2
|
||||
-- http://www.yesodweb.com/blog/2014/04/consolidation-progress
|
||||
@ -149,6 +150,7 @@ executable hledger
|
||||
,old-time
|
||||
,parsec
|
||||
,process
|
||||
,regex-tdfa
|
||||
,regexpr >= 0.5.1
|
||||
,safe >= 0.2
|
||||
,shakespeare-text >= 1.0 && < 1.2
|
||||
@ -183,6 +185,7 @@ test-suite tests
|
||||
, old-time
|
||||
, parsec
|
||||
, process
|
||||
, regex-tdfa
|
||||
, regexpr
|
||||
, safe
|
||||
, shakespeare-text >= 1.0 && < 1.2
|
||||
|
||||
Loading…
Reference in New Issue
Block a user