mostly replace slow regexpr with regex-tdfa (fixes #189)
This commit is contained in:
parent
4c509ff933
commit
3a16e6cfc7
@ -655,7 +655,7 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f
|
|||||||
matcherMatches pats = patternMatches $ "(" ++ intercalate "|" pats ++ ")"
|
matcherMatches pats = patternMatches $ "(" ++ intercalate "|" pats ++ ")"
|
||||||
where
|
where
|
||||||
patternMatches :: Regexp -> Bool
|
patternMatches :: Regexp -> Bool
|
||||||
patternMatches pat = regexMatchesCIRegexCompat pat csvline
|
patternMatches pat = regexMatchesCI pat csvline
|
||||||
where
|
where
|
||||||
csvline = intercalate "," record
|
csvline = intercalate "," record
|
||||||
|
|
||||||
|
|||||||
@ -81,7 +81,7 @@ detect :: FilePath -> String -> Bool
|
|||||||
detect f s
|
detect f s
|
||||||
| f /= "-" = takeExtension f `elem` ['.':format, ".j"] -- from a file: yes if the extension is .journal or .j
|
| f /= "-" = takeExtension f `elem` ['.':format, ".j"] -- from a file: yes if the extension is .journal or .j
|
||||||
-- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented)
|
-- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented)
|
||||||
| otherwise = isJust $ regexMatch "^[0-9]+.*\n[ \t]+" s
|
| otherwise = regexMatches "^[0-9]+.*\n[ \t]+" s
|
||||||
|
|
||||||
-- | Parse and post-process a "Journal" from hledger's journal file
|
-- | Parse and post-process a "Journal" from hledger's journal file
|
||||||
-- format, or give an error.
|
-- format, or give an error.
|
||||||
|
|||||||
@ -1,11 +1,28 @@
|
|||||||
-- Regular expression helpers.
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
-- Currently using mostly regexpr and some regex-tdfa.
|
{-
|
||||||
-- Note many of these will die on malformed regexps.
|
|
||||||
|
Easy regular expression helpers.
|
||||||
|
|
||||||
|
These should
|
||||||
|
- have mnemonic names
|
||||||
|
- have simple monomorphic types
|
||||||
|
- work with strings
|
||||||
|
- support extended regular expressions
|
||||||
|
- support replacement
|
||||||
|
- support splitting
|
||||||
|
- support unicode
|
||||||
|
- be cross-platform, not requiring C libraries
|
||||||
|
|
||||||
|
They currently can
|
||||||
|
- die on malformed regexps
|
||||||
|
- be slow (regexpr)
|
||||||
|
|
||||||
|
-}
|
||||||
|
|
||||||
module Hledger.Utils.Regex (
|
module Hledger.Utils.Regex (
|
||||||
regexMatch
|
-- regexMatch
|
||||||
,regexMatchCI
|
-- ,regexMatchCI
|
||||||
,regexMatches
|
regexMatches
|
||||||
,regexMatchesCI
|
,regexMatchesCI
|
||||||
,containsRegex
|
,containsRegex
|
||||||
,regexReplace
|
,regexReplace
|
||||||
@ -13,47 +30,89 @@ module Hledger.Utils.Regex (
|
|||||||
,regexReplaceBy
|
,regexReplaceBy
|
||||||
,regexToCaseInsensitive
|
,regexToCaseInsensitive
|
||||||
,regexSplit
|
,regexSplit
|
||||||
,regexMatchesRegexCompat
|
,toRegex
|
||||||
,regexMatchesCIRegexCompat
|
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
import Data.Maybe
|
|
||||||
import Text.Regex.TDFA
|
|
||||||
import Text.RegexPR
|
|
||||||
|
|
||||||
-- regexMatch :: String -> String -> MatchFun Maybe
|
import Data.List (foldl')
|
||||||
regexMatch r s = matchRegexPR r s
|
import Text.RegexPR (splitRegexPR)
|
||||||
|
import Text.Regex.TDFA (
|
||||||
|
Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt,
|
||||||
|
makeRegexOpts, AllMatches(getAllMatches), match, (=~)
|
||||||
|
)
|
||||||
|
|
||||||
-- regexMatchCI :: String -> String -> MatchFun Maybe
|
|
||||||
regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s
|
|
||||||
|
|
||||||
regexMatches :: String -> String -> Bool
|
type Regexp = String
|
||||||
regexMatches r s = isJust $ matchRegexPR r s
|
type Replacement = String
|
||||||
|
|
||||||
regexMatchesCI :: String -> String -> Bool
|
|
||||||
regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s
|
|
||||||
|
|
||||||
|
containsRegex :: Regexp -> String -> Bool
|
||||||
containsRegex = regexMatchesCI
|
containsRegex = regexMatchesCI
|
||||||
|
|
||||||
regexReplace :: String -> String -> String -> String
|
regexToCaseInsensitive :: Regexp -> Regexp
|
||||||
regexReplace r repl s = gsubRegexPR r repl s
|
|
||||||
|
|
||||||
regexReplaceCI :: String -> String -> String -> String
|
|
||||||
regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s
|
|
||||||
|
|
||||||
regexReplaceBy :: String -> (String -> String) -> String -> String
|
|
||||||
regexReplaceBy r replfn s = gsubRegexPRBy r replfn s
|
|
||||||
|
|
||||||
regexToCaseInsensitive :: String -> String
|
|
||||||
regexToCaseInsensitive r = "(?i)"++ r
|
regexToCaseInsensitive r = "(?i)"++ r
|
||||||
|
|
||||||
regexSplit :: String -> String -> [String]
|
-- regexpr - may be slow
|
||||||
|
|
||||||
|
regexSplit :: Regexp -> String -> [Regexp]
|
||||||
regexSplit = splitRegexPR
|
regexSplit = splitRegexPR
|
||||||
|
|
||||||
-- regex-compat (regex-posix) functions that perform better than regexpr.
|
-- regexMatch :: Regexp -> String -> MatchFun Maybe
|
||||||
regexMatchesRegexCompat :: String -> String -> Bool
|
-- regexMatch r s = matchRegexPR r s
|
||||||
regexMatchesRegexCompat = flip (=~)
|
|
||||||
|
|
||||||
regexMatchesCIRegexCompat :: String -> String -> Bool
|
-- regexMatchCI :: Regexp -> String -> MatchFun Maybe
|
||||||
regexMatchesCIRegexCompat r = match (makeRegexOpts defaultCompOpt { multiline = True, caseSensitive = False, newSyntax = True } defaultExecOpt r)
|
-- regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s
|
||||||
|
|
||||||
|
-- regexMatches :: Regexp -> String -> Bool
|
||||||
|
-- regexMatches r s = isJust $ matchRegexPR r s
|
||||||
|
|
||||||
|
-- regexMatchesCI :: Regexp -> String -> Bool
|
||||||
|
-- regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s
|
||||||
|
|
||||||
|
-- regexReplace :: Regexp -> Replacement -> String -> String
|
||||||
|
-- regexReplace r repl s = gsubRegexPR r repl s
|
||||||
|
|
||||||
|
-- regexReplaceCI :: Regexp -> Replacement -> String -> String
|
||||||
|
-- regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s
|
||||||
|
|
||||||
|
-- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
|
||||||
|
-- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s
|
||||||
|
|
||||||
|
-- regex-tdfa
|
||||||
|
|
||||||
|
compOpt :: CompOption
|
||||||
|
compOpt = defaultCompOpt
|
||||||
|
|
||||||
|
execOpt :: ExecOption
|
||||||
|
execOpt = defaultExecOpt
|
||||||
|
|
||||||
|
toRegex :: Regexp -> Regex
|
||||||
|
toRegex = makeRegexOpts compOpt execOpt
|
||||||
|
|
||||||
|
-- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a
|
||||||
|
-- regexMatch' r s = s =~ (toRegex r)
|
||||||
|
|
||||||
|
regexMatches :: Regexp -> String -> Bool
|
||||||
|
regexMatches = flip (=~)
|
||||||
|
|
||||||
|
regexMatchesCI :: Regexp -> String -> Bool
|
||||||
|
regexMatchesCI r = match (makeRegexOpts compOpt{caseSensitive=False} execOpt r)
|
||||||
|
|
||||||
|
regexReplace :: Regexp -> Replacement -> String -> String
|
||||||
|
regexReplace r repl = regexReplaceBy r (const repl)
|
||||||
|
|
||||||
|
regexReplaceCI :: Regexp -> Replacement -> String -> String
|
||||||
|
regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s
|
||||||
|
|
||||||
|
regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String
|
||||||
|
regexReplaceBy r = replaceAll (toRegex r)
|
||||||
|
|
||||||
|
-- from http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries
|
||||||
|
replaceAll :: Regex -> (String -> Replacement) -> String -> String
|
||||||
|
replaceAll re f s = start end
|
||||||
|
where
|
||||||
|
(_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s
|
||||||
|
go (ind,read,write) (off,len) =
|
||||||
|
let (skip, start) = splitAt (off - ind) read
|
||||||
|
(matched, remaining) = splitAt len start
|
||||||
|
in (off + len, remaining, write . (skip++) . (f matched ++))
|
||||||
|
|
||||||
|
|||||||
@ -34,6 +34,8 @@ import System.Process (readProcessWithExitCode)
|
|||||||
import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff))
|
import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff))
|
||||||
import Test.HUnit
|
import Test.HUnit
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
import Text.Regex.TDFA (match)
|
||||||
|
|
||||||
|
|
||||||
-- kludge - adapt to whichever directory version is installed, or when
|
-- kludge - adapt to whichever directory version is installed, or when
|
||||||
-- cabal macros aren't available, assume the new directory
|
-- cabal macros aren't available, assume the new directory
|
||||||
@ -179,7 +181,8 @@ safeGetDirectoryContents "" = getDirectoryContents "."
|
|||||||
safeGetDirectoryContents fp = getDirectoryContents fp
|
safeGetDirectoryContents fp = getDirectoryContents fp
|
||||||
|
|
||||||
-- | Does the second file represent a backup of the first, and if so which version is it ?
|
-- | Does the second file represent a backup of the first, and if so which version is it ?
|
||||||
|
-- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex
|
||||||
backupNumber :: FilePath -> FilePath -> Maybe Int
|
backupNumber :: FilePath -> FilePath -> Maybe Int
|
||||||
backupNumber f g = case regexMatch ("^" ++ f ++ "\\.([0-9]+)$") g of
|
backupNumber f g = case match (toRegex ("^" ++ f ++ "\\.([0-9]+)$")) g of
|
||||||
Just (_, ((_,suffix):_)) -> readMay suffix
|
(_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|||||||
@ -80,6 +80,7 @@ library
|
|||||||
,old-time
|
,old-time
|
||||||
,parsec
|
,parsec
|
||||||
,process
|
,process
|
||||||
|
,regex-tdfa
|
||||||
,regexpr >= 0.5.1
|
,regexpr >= 0.5.1
|
||||||
,safe >= 0.2
|
,safe >= 0.2
|
||||||
-- http://www.yesodweb.com/blog/2014/04/consolidation-progress
|
-- http://www.yesodweb.com/blog/2014/04/consolidation-progress
|
||||||
@ -149,6 +150,7 @@ executable hledger
|
|||||||
,old-time
|
,old-time
|
||||||
,parsec
|
,parsec
|
||||||
,process
|
,process
|
||||||
|
,regex-tdfa
|
||||||
,regexpr >= 0.5.1
|
,regexpr >= 0.5.1
|
||||||
,safe >= 0.2
|
,safe >= 0.2
|
||||||
,shakespeare-text >= 1.0 && < 1.2
|
,shakespeare-text >= 1.0 && < 1.2
|
||||||
@ -183,6 +185,7 @@ test-suite tests
|
|||||||
, old-time
|
, old-time
|
||||||
, parsec
|
, parsec
|
||||||
, process
|
, process
|
||||||
|
, regex-tdfa
|
||||||
, regexpr
|
, regexpr
|
||||||
, safe
|
, safe
|
||||||
, shakespeare-text >= 1.0 && < 1.2
|
, shakespeare-text >= 1.0 && < 1.2
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user