lib: memoize "if blocks that assign filed f" for 50% CSV reader speedup
This commit is contained in:
parent
834e9ec104
commit
c397b90b5b
@ -53,6 +53,7 @@ import Data.Bifunctor (first)
|
|||||||
import "base-compat-batteries" Data.List.Compat
|
import "base-compat-batteries" Data.List.Compat
|
||||||
import qualified Data.List.Split as LS (splitOn)
|
import qualified Data.List.Split as LS (splitOn)
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
|
import Data.MemoUgly (memo)
|
||||||
import Data.Ord
|
import Data.Ord
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
@ -164,29 +165,29 @@ defaultRulesText csvfile = T.pack $ unlines
|
|||||||
," account2 assets:bank:savings\n"
|
," account2 assets:bank:savings\n"
|
||||||
]
|
]
|
||||||
|
|
||||||
addDirective :: (DirectiveName, String) -> CsvRules -> CsvRules
|
addDirective :: (DirectiveName, String) -> CsvRulesParsed -> CsvRulesParsed
|
||||||
addDirective d r = r{rdirectives=d:rdirectives r}
|
addDirective d r = r{rdirectives=d:rdirectives r}
|
||||||
|
|
||||||
addAssignment :: (HledgerFieldName, FieldTemplate) -> CsvRules -> CsvRules
|
addAssignment :: (HledgerFieldName, FieldTemplate) -> CsvRulesParsed -> CsvRulesParsed
|
||||||
addAssignment a r = r{rassignments=a:rassignments r}
|
addAssignment a r = r{rassignments=a:rassignments r}
|
||||||
|
|
||||||
setIndexesAndAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
setIndexesAndAssignmentsFromList :: [CsvFieldName] -> CsvRulesParsed -> CsvRulesParsed
|
||||||
setIndexesAndAssignmentsFromList fs r = addAssignmentsFromList fs . setCsvFieldIndexesFromList fs $ r
|
setIndexesAndAssignmentsFromList fs r = addAssignmentsFromList fs . setCsvFieldIndexesFromList fs $ r
|
||||||
|
|
||||||
setCsvFieldIndexesFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
setCsvFieldIndexesFromList :: [CsvFieldName] -> CsvRulesParsed -> CsvRulesParsed
|
||||||
setCsvFieldIndexesFromList fs r = r{rcsvfieldindexes=zip fs [1..]}
|
setCsvFieldIndexesFromList fs r = r{rcsvfieldindexes=zip fs [1..]}
|
||||||
|
|
||||||
addAssignmentsFromList :: [CsvFieldName] -> CsvRules -> CsvRules
|
addAssignmentsFromList :: [CsvFieldName] -> CsvRulesParsed -> CsvRulesParsed
|
||||||
addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
|
addAssignmentsFromList fs r = foldl' maybeAddAssignment r journalfieldnames
|
||||||
where
|
where
|
||||||
maybeAddAssignment rules f = (maybe id addAssignmentFromIndex $ elemIndex f fs) rules
|
maybeAddAssignment rules f = (maybe id addAssignmentFromIndex $ elemIndex f fs) rules
|
||||||
where
|
where
|
||||||
addAssignmentFromIndex i = addAssignment (f, "%"++show (i+1))
|
addAssignmentFromIndex i = addAssignment (f, "%"++show (i+1))
|
||||||
|
|
||||||
addConditionalBlock :: ConditionalBlock -> CsvRules -> CsvRules
|
addConditionalBlock :: ConditionalBlock -> CsvRulesParsed -> CsvRulesParsed
|
||||||
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
|
addConditionalBlock b r = r{rconditionalblocks=b:rconditionalblocks r}
|
||||||
|
|
||||||
addConditionalBlocks :: [ConditionalBlock] -> CsvRules -> CsvRules
|
addConditionalBlocks :: [ConditionalBlock] -> CsvRulesParsed -> CsvRulesParsed
|
||||||
addConditionalBlocks bs r = r{rconditionalblocks=bs++rconditionalblocks r}
|
addConditionalBlocks bs r = r{rconditionalblocks=bs++rconditionalblocks r}
|
||||||
|
|
||||||
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
|
getDirective :: DirectiveName -> CsvRules -> Maybe FieldTemplate
|
||||||
@ -239,18 +240,42 @@ validateRules rules = do
|
|||||||
|
|
||||||
-- | A set of data definitions and account-matching patterns sufficient to
|
-- | A set of data definitions and account-matching patterns sufficient to
|
||||||
-- convert a particular CSV data file into meaningful journal transactions.
|
-- convert a particular CSV data file into meaningful journal transactions.
|
||||||
data CsvRules = CsvRules {
|
data CsvRules' a = CsvRules' {
|
||||||
rdirectives :: [(DirectiveName,String)],
|
rdirectives :: [(DirectiveName,String)],
|
||||||
-- ^ top-level rules, as (keyword, value) pairs
|
-- ^ top-level rules, as (keyword, value) pairs
|
||||||
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
|
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
|
||||||
-- ^ csv field names and their column number, if declared by a fields list
|
-- ^ csv field names and their column number, if declared by a fields list
|
||||||
rassignments :: [(HledgerFieldName, FieldTemplate)],
|
rassignments :: [(HledgerFieldName, FieldTemplate)],
|
||||||
-- ^ top-level assignments to hledger fields, as (field name, value template) pairs
|
-- ^ top-level assignments to hledger fields, as (field name, value template) pairs
|
||||||
rconditionalblocks :: [ConditionalBlock]
|
rconditionalblocks :: [ConditionalBlock],
|
||||||
-- ^ conditional blocks, which containing additional assignments/rules to apply to matched csv records
|
-- ^ conditional blocks, which containing additional assignments/rules to apply to matched csv records
|
||||||
} deriving (Show, Eq)
|
rblocksassigning :: a -- (String -> [ConditionalBlock])
|
||||||
|
-- ^ all conditional blocks which can potentially assign field with a given name (memoized)
|
||||||
|
}
|
||||||
|
|
||||||
type CsvRulesParser a = StateT CsvRules SimpleTextParser a
|
-- | Type used by parsers. Directives, assignments and conditional blocks
|
||||||
|
-- are in the reverse order compared to what is in the file and rblocksassigning is non-functional,
|
||||||
|
-- could not be used for processing CSV records yet
|
||||||
|
type CsvRulesParsed = CsvRules' ()
|
||||||
|
|
||||||
|
-- | Type used after parsing is done. Directives, assignments and conditional blocks
|
||||||
|
-- are in the same order as they were in the unput file and rblocksassigning is functional.
|
||||||
|
-- Ready to be used for CSV record processing
|
||||||
|
type CsvRules = CsvRules' (String -> [ConditionalBlock])
|
||||||
|
|
||||||
|
instance Eq CsvRules where
|
||||||
|
r1 == r2 = (rdirectives r1, rcsvfieldindexes r1, rassignments r1) ==
|
||||||
|
(rdirectives r2, rcsvfieldindexes r2, rassignments r2)
|
||||||
|
|
||||||
|
-- It is used for debug output only
|
||||||
|
instance Show CsvRules where
|
||||||
|
show r = "CsvRules { rdirectives=" ++ show (rdirectives r) ++
|
||||||
|
", rcsvfieldindexes=" ++ show (rcsvfieldindexes r) ++
|
||||||
|
", rassignments=" ++ show (rassignments r) ++
|
||||||
|
", rconditionalblocks="++ show (rconditionalblocks r) ++
|
||||||
|
" }"
|
||||||
|
|
||||||
|
type CsvRulesParser a = StateT CsvRulesParsed SimpleTextParser a
|
||||||
|
|
||||||
-- | The keyword of a CSV rule - "fields", "skip", "if", etc.
|
-- | The keyword of a CSV rule - "fields", "skip", "if", etc.
|
||||||
type DirectiveName = String
|
type DirectiveName = String
|
||||||
@ -296,11 +321,25 @@ data ConditionalBlock = CB {
|
|||||||
,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
|
,cbAssignments :: [(HledgerFieldName, FieldTemplate)]
|
||||||
} deriving (Show, Eq)
|
} deriving (Show, Eq)
|
||||||
|
|
||||||
defrules = CsvRules {
|
defrules :: CsvRulesParsed
|
||||||
|
defrules = CsvRules' {
|
||||||
rdirectives=[],
|
rdirectives=[],
|
||||||
rcsvfieldindexes=[],
|
rcsvfieldindexes=[],
|
||||||
rassignments=[],
|
rassignments=[],
|
||||||
rconditionalblocks=[]
|
rconditionalblocks=[],
|
||||||
|
rblocksassigning = ()
|
||||||
|
}
|
||||||
|
|
||||||
|
-- | Create CsvRules from the content parsed out of the rules file
|
||||||
|
mkrules :: CsvRulesParsed -> CsvRules
|
||||||
|
mkrules rules =
|
||||||
|
let conditionalblocks = reverse $ rconditionalblocks rules in
|
||||||
|
CsvRules' {
|
||||||
|
rdirectives=reverse $ rdirectives rules,
|
||||||
|
rcsvfieldindexes=rcsvfieldindexes rules,
|
||||||
|
rassignments=reverse $ rassignments rules,
|
||||||
|
rconditionalblocks=conditionalblocks,
|
||||||
|
rblocksassigning = memo (\f -> filter (any ((==f).fst) . cbAssignments) conditionalblocks)
|
||||||
}
|
}
|
||||||
|
|
||||||
--- *** rules parsers
|
--- *** rules parsers
|
||||||
@ -382,10 +421,7 @@ rulesp = do
|
|||||||
]
|
]
|
||||||
eof
|
eof
|
||||||
r <- get
|
r <- get
|
||||||
return r{rdirectives=reverse $ rdirectives r
|
return $ mkrules r
|
||||||
,rassignments=reverse $ rassignments r
|
|
||||||
,rconditionalblocks=reverse $ rconditionalblocks r
|
|
||||||
}
|
|
||||||
|
|
||||||
blankorcommentlinep :: CsvRulesParser ()
|
blankorcommentlinep :: CsvRulesParser ()
|
||||||
blankorcommentlinep = lift (dbgparse 8 "trying blankorcommentlinep") >> choiceInState [blanklinep, commentlinep]
|
blankorcommentlinep = lift (dbgparse 8 "trying blankorcommentlinep") >> choiceInState [blanklinep, commentlinep]
|
||||||
@ -1105,10 +1141,8 @@ getEffectiveAssignment rules record f = lastMay $ map snd $ assignments
|
|||||||
-- all top level field assignments
|
-- all top level field assignments
|
||||||
toplevelassignments = rassignments rules
|
toplevelassignments = rassignments rules
|
||||||
-- all field assignments in conditional blocks assigning to field f and active for the current csv record
|
-- all field assignments in conditional blocks assigning to field f and active for the current csv record
|
||||||
conditionalassignments = concatMap cbAssignments $ filter isBlockActive $ blocksAssigning f
|
conditionalassignments = concatMap cbAssignments $ filter isBlockActive $ rblocksassigning rules f
|
||||||
where
|
where
|
||||||
-- all conditional blocks which can potentially assign field f
|
|
||||||
blocksAssigning f = filter (any ((==f).fst) . cbAssignments) $ rconditionalblocks rules
|
|
||||||
-- does this conditional block match the current csv record ?
|
-- does this conditional block match the current csv record ?
|
||||||
isBlockActive :: ConditionalBlock -> Bool
|
isBlockActive :: ConditionalBlock -> Bool
|
||||||
isBlockActive CB{..} = any matcherMatches cbMatchers
|
isBlockActive CB{..} = any matcherMatches cbMatchers
|
||||||
@ -1182,21 +1216,21 @@ parseDateWithCustomOrDefaultFormats mformat s = asum $ map parsewith formats
|
|||||||
tests_CsvReader = tests "CsvReader" [
|
tests_CsvReader = tests "CsvReader" [
|
||||||
tests "parseCsvRules" [
|
tests "parseCsvRules" [
|
||||||
test "empty file" $
|
test "empty file" $
|
||||||
parseCsvRules "unknown" "" @?= Right defrules
|
parseCsvRules "unknown" "" @?= Right (mkrules defrules)
|
||||||
]
|
]
|
||||||
,tests "rulesp" [
|
,tests "rulesp" [
|
||||||
test "trailing comments" $
|
test "trailing comments" $
|
||||||
parseWithState' defrules rulesp "skip\n# \n#\n" @?= Right defrules{rdirectives = [("skip","")]}
|
parseWithState' defrules rulesp "skip\n# \n#\n" @?= Right (mkrules $ defrules{rdirectives = [("skip","")]})
|
||||||
|
|
||||||
,test "trailing blank lines" $
|
,test "trailing blank lines" $
|
||||||
parseWithState' defrules rulesp "skip\n\n \n" @?= (Right defrules{rdirectives = [("skip","")]})
|
parseWithState' defrules rulesp "skip\n\n \n" @?= (Right (mkrules $ defrules{rdirectives = [("skip","")]}))
|
||||||
|
|
||||||
,test "no final newline" $
|
,test "no final newline" $
|
||||||
parseWithState' defrules rulesp "skip" @?= (Right defrules{rdirectives=[("skip","")]})
|
parseWithState' defrules rulesp "skip" @?= (Right (mkrules $ defrules{rdirectives=[("skip","")]}))
|
||||||
|
|
||||||
,test "assignment with empty value" $
|
,test "assignment with empty value" $
|
||||||
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
|
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
|
||||||
(Right defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher "foo"],cbAssignments=[("account2","foo")]}]})
|
(Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher "foo"],cbAssignments=[("account2","foo")]}]}))
|
||||||
]
|
]
|
||||||
,tests "conditionalblockp" [
|
,tests "conditionalblockp" [
|
||||||
test "space after conditional" $ -- #1120
|
test "space after conditional" $ -- #1120
|
||||||
@ -1226,11 +1260,11 @@ tests_CsvReader = tests "CsvReader" [
|
|||||||
]
|
]
|
||||||
|
|
||||||
,tests "getEffectiveAssignment" [
|
,tests "getEffectiveAssignment" [
|
||||||
let rules = defrules{rcsvfieldindexes=[("csvdate",1)],rassignments=[("date","%csvdate")]}
|
let rules = mkrules $ defrules {rcsvfieldindexes=[("csvdate",1)],rassignments=[("date","%csvdate")]}
|
||||||
|
|
||||||
in test "toplevel" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
|
in test "toplevel" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
|
||||||
|
|
||||||
,let rules = defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher "%csvdate" "a"] [("date","%csvdate")]]}
|
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher "%csvdate" "a"] [("date","%csvdate")]]}
|
||||||
in test "conditional" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
|
in test "conditional" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|||||||
@ -833,6 +833,31 @@ expecting end of input, field assignment, or newline
|
|||||||
)
|
)
|
||||||
>=1
|
>=1
|
||||||
|
|
||||||
|
# 42. Rules override each other in the order listed in the file
|
||||||
|
<
|
||||||
|
10/2009/09,Flubber Co,50
|
||||||
|
|
||||||
|
RULES
|
||||||
|
fields date, description, amount
|
||||||
|
date-format %d/%Y/%m
|
||||||
|
currency $
|
||||||
|
account1 assets:myacct
|
||||||
|
if Flubber
|
||||||
|
account2 foo
|
||||||
|
comment bar
|
||||||
|
|
||||||
|
if 10/2009/09.*Flubber
|
||||||
|
account2 acct
|
||||||
|
comment cmt
|
||||||
|
|
||||||
|
$ ./csvtest.sh
|
||||||
|
2009-09-10 Flubber Co ; cmt
|
||||||
|
assets:myacct $50
|
||||||
|
acct $-50
|
||||||
|
|
||||||
|
>=0
|
||||||
|
|
||||||
|
|
||||||
## .
|
## .
|
||||||
#<
|
#<
|
||||||
#$ ./csvtest.sh
|
#$ ./csvtest.sh
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user