mostly replace slow regexpr with regex-tdfa (fixes #189)
This commit is contained in:
		
							parent
							
								
									4c509ff933
								
							
						
					
					
						commit
						3a16e6cfc7
					
				| @ -655,7 +655,7 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f | ||||
|                 matcherMatches pats = patternMatches $  "(" ++ intercalate "|" pats ++ ")" | ||||
|                   where | ||||
|                     patternMatches :: Regexp -> Bool | ||||
|                     patternMatches pat = regexMatchesCIRegexCompat pat csvline | ||||
|                     patternMatches pat = regexMatchesCI pat csvline | ||||
|                       where | ||||
|                         csvline = intercalate "," record | ||||
| 
 | ||||
|  | ||||
| @ -81,7 +81,7 @@ detect :: FilePath -> String -> Bool | ||||
| detect f s | ||||
|   | f /= "-"  = takeExtension f `elem` ['.':format, ".j"]  -- from a file: yes if the extension is .journal or .j | ||||
|   -- from stdin: yes if we can see something that looks like a journal entry (digits in column 0 with the next line indented) | ||||
|   | otherwise = isJust $ regexMatch "^[0-9]+.*\n[ \t]+" s | ||||
|   | otherwise = regexMatches "^[0-9]+.*\n[ \t]+" s | ||||
| 
 | ||||
| -- | Parse and post-process a "Journal" from hledger's journal file | ||||
| -- format, or give an error. | ||||
|  | ||||
| @ -1,11 +1,28 @@ | ||||
| -- Regular expression helpers. | ||||
| -- Currently using mostly regexpr and some regex-tdfa. | ||||
| -- Note many of these will die on malformed regexps. | ||||
| {-# LANGUAGE ScopedTypeVariables #-} | ||||
| {- | ||||
| 
 | ||||
| Easy regular expression helpers. | ||||
| 
 | ||||
| These should | ||||
| - have mnemonic names | ||||
| - have simple monomorphic types | ||||
| - work with strings | ||||
| - support extended regular expressions | ||||
| - support replacement | ||||
| - support splitting | ||||
| - support unicode | ||||
| - be cross-platform, not requiring C libraries | ||||
| 
 | ||||
| They currently can | ||||
| - die on malformed regexps | ||||
| - be slow (regexpr) | ||||
| 
 | ||||
| -} | ||||
| 
 | ||||
| module Hledger.Utils.Regex ( | ||||
|    regexMatch | ||||
|   ,regexMatchCI | ||||
|   ,regexMatches | ||||
|   --  regexMatch | ||||
|   -- ,regexMatchCI | ||||
|    regexMatches | ||||
|   ,regexMatchesCI | ||||
|   ,containsRegex | ||||
|   ,regexReplace | ||||
| @ -13,47 +30,89 @@ module Hledger.Utils.Regex ( | ||||
|   ,regexReplaceBy | ||||
|   ,regexToCaseInsensitive | ||||
|   ,regexSplit | ||||
|   ,regexMatchesRegexCompat | ||||
|   ,regexMatchesCIRegexCompat | ||||
|   ,toRegex | ||||
|   ) | ||||
| where | ||||
| import Data.Maybe | ||||
| import Text.Regex.TDFA | ||||
| import Text.RegexPR | ||||
| 
 | ||||
| -- regexMatch :: String -> String -> MatchFun Maybe | ||||
| regexMatch r s = matchRegexPR r s | ||||
| import Data.List (foldl') | ||||
| import Text.RegexPR (splitRegexPR) | ||||
| import Text.Regex.TDFA ( | ||||
|   Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt, | ||||
|   makeRegexOpts, AllMatches(getAllMatches), match, (=~) | ||||
|   ) | ||||
| 
 | ||||
| -- regexMatchCI :: String -> String -> MatchFun Maybe | ||||
| regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s | ||||
| 
 | ||||
| regexMatches :: String -> String -> Bool | ||||
| regexMatches r s = isJust $ matchRegexPR r s | ||||
| 
 | ||||
| regexMatchesCI :: String -> String -> Bool | ||||
| regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s | ||||
| type Regexp = String | ||||
| type Replacement = String | ||||
| 
 | ||||
| containsRegex :: Regexp -> String -> Bool | ||||
| containsRegex = regexMatchesCI | ||||
| 
 | ||||
| regexReplace :: String -> String -> String -> String | ||||
| regexReplace r repl s = gsubRegexPR r repl s | ||||
| 
 | ||||
| regexReplaceCI :: String -> String -> String -> String | ||||
| regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s | ||||
| 
 | ||||
| regexReplaceBy :: String -> (String -> String) -> String -> String | ||||
| regexReplaceBy r replfn s = gsubRegexPRBy r replfn s | ||||
| 
 | ||||
| regexToCaseInsensitive :: String -> String | ||||
| regexToCaseInsensitive :: Regexp -> Regexp | ||||
| regexToCaseInsensitive r = "(?i)"++ r | ||||
| 
 | ||||
| regexSplit :: String -> String -> [String] | ||||
| -- regexpr - may be slow | ||||
| 
 | ||||
| regexSplit :: Regexp -> String -> [Regexp] | ||||
| regexSplit = splitRegexPR | ||||
| 
 | ||||
| -- regex-compat (regex-posix) functions that perform better than regexpr. | ||||
| regexMatchesRegexCompat :: String -> String -> Bool | ||||
| regexMatchesRegexCompat = flip (=~) | ||||
| -- regexMatch :: Regexp -> String -> MatchFun Maybe | ||||
| -- regexMatch r s = matchRegexPR r s | ||||
| 
 | ||||
| regexMatchesCIRegexCompat :: String -> String -> Bool | ||||
| regexMatchesCIRegexCompat r = match (makeRegexOpts defaultCompOpt { multiline = True, caseSensitive = False, newSyntax = True } defaultExecOpt r) | ||||
| -- regexMatchCI :: Regexp -> String -> MatchFun Maybe | ||||
| -- regexMatchCI r s = regexMatch (regexToCaseInsensitive r) s | ||||
| 
 | ||||
| -- regexMatches :: Regexp -> String -> Bool | ||||
| -- regexMatches r s = isJust $ matchRegexPR r s | ||||
| 
 | ||||
| -- regexMatchesCI :: Regexp -> String -> Bool | ||||
| -- regexMatchesCI r s = regexMatches (regexToCaseInsensitive r) s | ||||
| 
 | ||||
| -- regexReplace :: Regexp -> Replacement -> String -> String | ||||
| -- regexReplace r repl s = gsubRegexPR r repl s | ||||
| 
 | ||||
| -- regexReplaceCI :: Regexp -> Replacement -> String -> String | ||||
| -- regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s | ||||
| 
 | ||||
| -- regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String | ||||
| -- regexReplaceBy r replfn s = gsubRegexPRBy r replfn s | ||||
| 
 | ||||
| -- regex-tdfa | ||||
| 
 | ||||
| compOpt :: CompOption | ||||
| compOpt = defaultCompOpt | ||||
| 
 | ||||
| execOpt :: ExecOption | ||||
| execOpt = defaultExecOpt | ||||
| 
 | ||||
| toRegex :: Regexp -> Regex | ||||
| toRegex = makeRegexOpts compOpt execOpt | ||||
| 
 | ||||
| -- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a | ||||
| -- regexMatch' r s = s =~ (toRegex r) | ||||
| 
 | ||||
| regexMatches :: Regexp -> String -> Bool | ||||
| regexMatches = flip (=~) | ||||
| 
 | ||||
| regexMatchesCI :: Regexp -> String -> Bool | ||||
| regexMatchesCI r = match (makeRegexOpts compOpt{caseSensitive=False} execOpt r) | ||||
| 
 | ||||
| regexReplace :: Regexp -> Replacement -> String -> String | ||||
| regexReplace r repl = regexReplaceBy r (const repl) | ||||
| 
 | ||||
| regexReplaceCI :: Regexp -> Replacement -> String -> String | ||||
| regexReplaceCI r s = regexReplace (regexToCaseInsensitive r) s | ||||
| 
 | ||||
| regexReplaceBy :: Regexp -> (String -> Replacement) -> String -> String | ||||
| regexReplaceBy r = replaceAll (toRegex r) | ||||
| 
 | ||||
| -- from http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries | ||||
| replaceAll :: Regex -> (String -> Replacement) -> String -> String | ||||
| replaceAll re f s = start end | ||||
|   where | ||||
|     (_, end, start) = foldl' go (0, s, id) $ getAllMatches $ match re s | ||||
|     go (ind,read,write) (off,len) = | ||||
|       let (skip, start) = splitAt (off - ind) read  | ||||
|           (matched, remaining) = splitAt len start  | ||||
|       in (off + len, remaining, write . (skip++) . (f matched ++)) | ||||
| 
 | ||||
|  | ||||
| @ -34,6 +34,8 @@ import System.Process (readProcessWithExitCode) | ||||
| import System.Time (ClockTime, getClockTime, diffClockTimes, TimeDiff(TimeDiff)) | ||||
| import Test.HUnit | ||||
| import Text.Printf | ||||
| import Text.Regex.TDFA (match) | ||||
| 
 | ||||
| 
 | ||||
| -- kludge - adapt to whichever directory version is installed, or when | ||||
| -- cabal macros aren't available, assume the new directory | ||||
| @ -179,7 +181,8 @@ safeGetDirectoryContents "" = getDirectoryContents "." | ||||
| safeGetDirectoryContents fp = getDirectoryContents fp | ||||
| 
 | ||||
| -- | Does the second file represent a backup of the first, and if so which version is it ? | ||||
| -- XXX nasty regex types intruding, add a simpler api to Hledger.Utils.Regex | ||||
| backupNumber :: FilePath -> FilePath -> Maybe Int | ||||
| backupNumber f g = case regexMatch ("^" ++ f ++ "\\.([0-9]+)$") g of | ||||
|                         Just (_, ((_,suffix):_)) -> readMay suffix | ||||
| backupNumber f g = case match (toRegex ("^" ++ f ++ "\\.([0-9]+)$")) g of | ||||
|                         (_::FilePath, _::FilePath, _::FilePath, [ext::FilePath]) -> readMay ext | ||||
|                         _ -> Nothing | ||||
|  | ||||
| @ -80,6 +80,7 @@ library | ||||
|                  ,old-time | ||||
|                  ,parsec | ||||
|                  ,process | ||||
|                  ,regex-tdfa | ||||
|                  ,regexpr >= 0.5.1 | ||||
|                  ,safe >= 0.2 | ||||
| -- http://www.yesodweb.com/blog/2014/04/consolidation-progress | ||||
| @ -149,6 +150,7 @@ executable hledger | ||||
|                  ,old-time | ||||
|                  ,parsec | ||||
|                  ,process | ||||
|                  ,regex-tdfa | ||||
|                  ,regexpr >= 0.5.1 | ||||
|                  ,safe >= 0.2 | ||||
|                  ,shakespeare-text >= 1.0 && < 1.2 | ||||
| @ -183,6 +185,7 @@ test-suite tests | ||||
|                , old-time | ||||
|                , parsec | ||||
|                , process | ||||
|                , regex-tdfa | ||||
|                , regexpr | ||||
|                , safe | ||||
|                , shakespeare-text >= 1.0 && < 1.2 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user