mostly replace slow regexpr with regex-tdfa (fixes #189)
This commit is contained in:
		
							parent
							
								
									4c509ff933
								
							
						
					
					
						commit
						3a16e6cfc7
					
				| @ -655,7 +655,7 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f | |||||||
|                 matcherMatches pats = patternMatches $  "(" ++ intercalate "|" pats ++ ")" |                 matcherMatches pats = patternMatches $  "(" ++ intercalate "|" pats ++ ")" | ||||||
|                   where |                   where | ||||||
|                     patternMatches :: Regexp -> Bool |                     patternMatches :: Regexp -> Bool | ||||||
|                     patternMatches pat = regexMatchesCIRegexCompat pat csvline |                     patternMatches pat = regexMatchesCI pat csvline | ||||||
|                       where |                       where | ||||||
|                         csvline = intercalate "," record |                         csvline = intercalate "," record | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -81,7 +81,7 @@ detect :: FilePath -> String -> Bool | |||||||
| detect f s | detect f s | ||||||
|   | f /= "-"  = takeExtension f `elem` ['.':format, ".j"]  -- from a file: yes if the extension is .journal or .j |   | f /= "-"  = takeExtension f `elem` ['.':format, ".j"]  -- from a file: yes if the extension is .journal or .j | ||||||
|   -- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented) |   -- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented) | ||||||
|   | otherwise = isJust $ regexMatch "^[0-9]+.*\n[ \t]+" s |   | otherwise = regexMatches "^[0-9]+.*\n[ \t]+" s | ||||||
| 
 | 
 | ||||||
| -- | Parse and post-process a "Journal" from hledger's journal file | -- | Parse and post-process a "Journal" from hledger's journal file | ||||||
| -- format, or give an error. | -- format, or give an error. | ||||||
|  | |||||||
| @ -1,11 +1,28 @@ | |||||||
| -- Regular expression helpers. | {-# LANGUAGE ScopedTypeVariables #-} | ||||||
| -- Currently using mostly regexpr and some regex-tdfa. | {- | ||||||
| -- Note many of these will die on malformed regexps. | 
 | ||||||
|  | Easy regular expression helpers. | ||||||
|  | 
 | ||||||
|  | These should | ||||||
|  | - have mnemonic names | ||||||
|  | - have simple monomorphic types | ||||||
|  | - work with strings | ||||||
|  | - support extended regular expressions | ||||||
|  | - support replacement | ||||||
|  | - support splitting | ||||||
|  | - support unicode | ||||||
|  | - be cross-platform, not requiring C libraries | ||||||
|  | 
 | ||||||
|  | They currently can | ||||||
|  | - die on malformed regexps | ||||||
|  | - be slow (regexpr) | ||||||
|  | 
 | ||||||
|  | -} | ||||||
| 
 | 
 | ||||||
| module Hledger.Utils.Regex ( | module Hledger.Utils.Regex ( | ||||||
|    regexMatch |   --  regexMatch | ||||||
|   ,regexMatchCI |   -- ,regexMatchCI | ||||||
|   ,regexMatches |    regexMatches | ||||||
|   ,regexMatchesCI |   ,regexMatchesCI | ||||||
|   ,containsRegex |   ,containsRegex | ||||||
|   ,regexReplace |   ,regexReplace | ||||||
| @ -13,47 +30,89 @@ module Hledger.Utils.Regex ( | |||||||
|   ,regexReplaceBy |   ,regexReplaceBy | ||||||
|   ,regexToCaseInsensitive |   ,regexToCaseInsensitive | ||||||
|   ,regexSplit |   ,regexSplit | ||||||
|   ,regexMatchesRegexCompat |   ,toRegex | ||||||
|   ,regexMatchesCIRegexCompat |  | ||||||
|   ) |   ) | ||||||
| where | where | ||||||
| import Data.Maybe |  | ||||||
| import Text.Regex.TDFA |  | ||||||
| import Text.RegexPR |  | ||||||
| 
 | 
 | ||||||
| -- regexMatch :: String -> String -> MatchFun Maybe | import Data.List (foldl') | ||||||
| regexMatch r s = matchRegexPR r s | import Text.RegexPR (splitRegexPR) | ||||||
|  | import Text.Regex.TDFA ( | ||||||
|  |   Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt, | ||||||
|  |   makeRegexOpts, AllMatches(getAllMatches), match, (=~) | ||||||
|  |   ) | ||||||
| 
 | 
 | ||||||
| -- regexMatchCI :: String -> String -> MatchFun Maybe |  | ||||||
| regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s |  | ||||||
| 
 | 
 | ||||||
| regexMatches :: String -> String -> Bool | type Regexp = String | ||||||
| regexMatches r s = isJust $ matchRegexPR r s | type Replacement = String | ||||||
| 
 |  | ||||||
| regexMatchesCI :: String -> String -> Bool |  | ||||||
| regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s |  | ||||||
| 
 | 
 | ||||||
|  | containsRegex :: Regexp -> String -> Bool | ||||||
| containsRegex = regexMatchesCI | containsRegex = regexMatchesCI | ||||||
| 
 | 
 | ||||||
| regexReplace :: String -> String -> String -> String | regexToCaseInsensitive :: Regexp -> Regexp | ||||||
| regexReplace r repl s = gsubRegexPR r repl s |  | ||||||
| 
 |  | ||||||
| regexReplaceCI :: String -> String -> String -> String |  | ||||||
| regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s |  | ||||||
| 
 |  | ||||||
| regexReplaceBy :: String -> (String -> String) -> String -> String |  | ||||||
| regexReplaceBy r replfn s = gsubRegexPRBy r replfn s |  | ||||||
| 
 |  | ||||||
| regexToCaseInsensitive :: String -> String |  | ||||||
| regexToCaseInsensitive r = "(?i)"++ r | regexToCaseInsensitive r = "(?i)"++ r | ||||||
| 
 | 
 | ||||||
| regexSplit :: String -> String -> [String] | -- regexpr - may be slow | ||||||
|  | 
 | ||||||
|  | regexSplit :: Regexp -> String -> [Regexp] | ||||||
| regexSplit = splitRegexPR | regexSplit = splitRegexPR | ||||||
| 
 | 
 | ||||||
| -- regex-compat (regex-posix) functions that perform better than regexpr. | -- regexMatch :: Regexp -> String -> MatchFun Maybe | ||||||
| regexMatchesRegexCompat :: String -> String -> Bool | -- regexMatch r s = matchRegexPR r s | ||||||
| regexMatchesRegexCompat = flip (=~) |  | ||||||
| 
 | 
 | ||||||
| regexMatchesCIRegexCompat :: String -> String -> Bool | -- regexMatchCI :: Regexp -> String -> MatchFun Maybe | ||||||
| regexMatchesCIRegexCompat r = match (makeRegexOpts defaultCompOpt { multiline = True, caseSensitive = False, newSyntax = True } defaultExecOpt r) | -- regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s | ||||||
|  | 
 | ||||||
|  | -- regexMatches :: Regexp -> String -> Bool | ||||||
|  | -- regexMatches r s = isJust $ matchRegexPR r s | ||||||
|  | 
 | ||||||
|  | -- regexMatchesCI :: Regexp -> String -> Bool | ||||||
|  | -- regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s | ||||||
|  | 
 | ||||||
|  | -- regexReplace :: Regexp -> Replacement -> String -> String | ||||||
|  | -- regexReplace r repl s = gsubRegexPR r repl s | ||||||
|  | 
 | ||||||
|  | -- regexReplaceCI :: Regexp -> Replacement -> String -> String | ||||||
|  | -- regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s | ||||||
|  | 
 | ||||||
|  | -- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String | ||||||
|  | -- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s | ||||||
|  | 
 | ||||||
|  | -- regex-tdfa | ||||||
|  | 
 | ||||||
|  | compOpt :: CompOption | ||||||
|  | compOpt = defaultCompOpt | ||||||
|  | 
 | ||||||
|  | execOpt :: ExecOption | ||||||
|  | execOpt = defaultExecOpt | ||||||
|  | 
 | ||||||
|  | toRegex :: Regexp -> Regex | ||||||
|  | toRegex = makeRegexOpts compOpt execOpt | ||||||
|  | 
 | ||||||
|  | -- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a | ||||||
|  | -- regexMatch' r s = s =~ (toRegex r) | ||||||
|  | 
 | ||||||
|  | regexMatches :: Regexp -> String -> Bool | ||||||
|  | regexMatches = flip (=~) | ||||||
|  | 
 | ||||||
|  | regexMatchesCI :: Regexp -> String -> Bool | ||||||
|  | regexMatchesCI r = match (makeRegexOpts compOpt{caseSensitive=False} execOpt r) | ||||||
|  | 
 | ||||||
|  | regexReplace :: Regexp -> Replacement -> String -> String | ||||||
|  | regexReplace r repl = regexReplaceBy r (const repl) | ||||||
|  | 
 | ||||||
|  | regexReplaceCI :: Regexp -> Replacement -> String -> String | ||||||
|  | regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s | ||||||
|  | 
 | ||||||
|  | regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String | ||||||
|  | regexReplaceBy r = replaceAll (toRegex r) | ||||||
|  | 
 | ||||||
|  | -- from http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries | ||||||
|  | replaceAll :: Regex -> (String -> Replacement) -> String -> String | ||||||
|  | replaceAll re f s = start end | ||||||
|  |   where | ||||||
|  |     (_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s | ||||||
|  |     go (ind,read,write) (off,len) = | ||||||
|  |       let (skip, start) = splitAt (off - ind) read  | ||||||
|  |           (matched, remaining) = splitAt len start  | ||||||
|  |       in (off + len, remaining, write . (skip++) . (f matched ++)) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -34,6 +34,8 @@ import System.Process (readProcessWithExitCode) | |||||||
| import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff)) | import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff)) | ||||||
| import Test.HUnit | import Test.HUnit | ||||||
| import Text.Printf | import Text.Printf | ||||||
|  | import Text.Regex.TDFA (match) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| -- kludge - adapt to whichever directory version is installed, or when | -- kludge - adapt to whichever directory version is installed, or when | ||||||
| -- cabal macros aren't available, assume the new directory | -- cabal macros aren't available, assume the new directory | ||||||
| @ -179,7 +181,8 @@ safeGetDirectoryContents "" = getDirectoryContents "." | |||||||
| safeGetDirectoryContents fp = getDirectoryContents fp | safeGetDirectoryContents fp = getDirectoryContents fp | ||||||
| 
 | 
 | ||||||
| -- | Does the second file represent a backup of the first, and if so which version is it ? | -- | Does the second file represent a backup of the first, and if so which version is it ? | ||||||
|  | -- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex | ||||||
| backupNumber :: FilePath -> FilePath -> Maybe Int | backupNumber :: FilePath -> FilePath -> Maybe Int | ||||||
| backupNumber f g = case regexMatch ("^" ++ f ++ "\\.([0-9]+)$") g of | backupNumber f g = case match (toRegex ("^" ++ f ++ "\\.([0-9]+)$")) g of | ||||||
|                         Just (_, ((_,suffix):_)) -> readMay suffix |                         (_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext | ||||||
|                         _ -> Nothing |                         _ -> Nothing | ||||||
|  | |||||||
| @ -80,6 +80,7 @@ library | |||||||
|                  ,old-time |                  ,old-time | ||||||
|                  ,parsec |                  ,parsec | ||||||
|                  ,process |                  ,process | ||||||
|  |                  ,regex-tdfa | ||||||
|                  ,regexpr >= 0.5.1 |                  ,regexpr >= 0.5.1 | ||||||
|                  ,safe >= 0.2 |                  ,safe >= 0.2 | ||||||
| -- http://www.yesodweb.com/blog/2014/04/consolidation-progress | -- http://www.yesodweb.com/blog/2014/04/consolidation-progress | ||||||
| @ -149,6 +150,7 @@ executable hledger | |||||||
|                  ,old-time |                  ,old-time | ||||||
|                  ,parsec |                  ,parsec | ||||||
|                  ,process |                  ,process | ||||||
|  |                  ,regex-tdfa | ||||||
|                  ,regexpr >= 0.5.1 |                  ,regexpr >= 0.5.1 | ||||||
|                  ,safe >= 0.2 |                  ,safe >= 0.2 | ||||||
|                  ,shakespeare-text >= 1.0 && < 1.2 |                  ,shakespeare-text >= 1.0 && < 1.2 | ||||||
| @ -183,6 +185,7 @@ test-suite tests | |||||||
|                , old-time |                , old-time | ||||||
|                , parsec |                , parsec | ||||||
|                , process |                , process | ||||||
|  |                , regex-tdfa | ||||||
|                , regexpr |                , regexpr | ||||||
|                , safe |                , safe | ||||||
|                , shakespeare-text >= 1.0 && < 1.2 |                , shakespeare-text >= 1.0 && < 1.2 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user