;csv: Matcher type, simplify

This commit is contained in:
Simon Michael 2020-02-12 06:20:40 -08:00
parent 1cbce98a68
commit becd891dd1

View File

@ -391,22 +391,27 @@ type CsvFieldName = String
type CsvFieldIndex = Int type CsvFieldIndex = Int
type JournalFieldName = String type JournalFieldName = String
type FieldTemplate = String type FieldTemplate = String
type DateFormat = String
type RegexpPattern = String
-- | A conditional block: a CSV record matcher, and a sequence of zero -- | A single test for matching a CSV record, in one way or another.
-- or more rules which will be enabled only for matched records. Three data Matcher =
-- types of rule are allowed inside conditional blocks: field RecordMatcher RegexpPattern -- ^ match if this regexp matches the overall CSV record
-- | FieldMatcher CsvFieldName RegexpPattern -- ^ match if this regexp matches the named CSV field
deriving (Show, Eq)
-- | A conditional block: a set of CSV record matchers, and a sequence
-- of zero or more rules which will be enabled only when one or of the
-- matchers succeeds.
--
-- Three types of rule are allowed inside conditional blocks: field
-- assignments, skip, end. (A skip or end rule is stored as if it was -- assignments, skip, end. (A skip or end rule is stored as if it was
-- a field assignment, and executed in validateCsv. XXX) -- a field assignment, and executed in validateCsv. XXX)
data ConditionalBlock = CB { data ConditionalBlock = CB {
cbMatcher :: [RecordMatcher] cbMatchers :: [Matcher]
,cbAssignments :: [(JournalFieldName, FieldTemplate)] ,cbAssignments :: [(JournalFieldName, FieldTemplate)]
} deriving (Show, Eq) } deriving (Show, Eq)
type RecordMatcher = [RegexpPattern] -- match if any regexps match any of the csv fields
-- type FieldMatcher = (CsvFieldName, [RegexpPattern]) -- match if any regexps match this csv field
type DateFormat = String
type RegexpPattern = String
defrules = CsvRules { defrules = CsvRules {
rdirectives=[], rdirectives=[],
rcsvfieldindexes=[], rcsvfieldindexes=[],
@ -633,24 +638,28 @@ conditionalblockp :: CsvRulesParser ConditionalBlock
conditionalblockp = do conditionalblockp = do
lift $ dbgparse 3 "trying conditionalblockp" lift $ dbgparse 3 "trying conditionalblockp"
string "if" >> lift (skipMany spacenonewline) >> optional newline string "if" >> lift (skipMany spacenonewline) >> optional newline
ms <- some recordmatcherp ms <- some matcherp
as <- many (try $ lift (skipSome spacenonewline) >> fieldassignmentp) as <- many (try $ lift (skipSome spacenonewline) >> fieldassignmentp)
when (null as) $ when (null as) $
Fail.fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n" Fail.fail "start of conditional block found, but no assignment rules afterward\n(assignment rules in a conditional block should be indented)\n"
return $ CB{cbMatcher=ms, cbAssignments=as} return $ CB{cbMatchers=ms, cbAssignments=as}
<?> "conditional block" <?> "conditional block"
recordmatcherp :: CsvRulesParser [String] -- A single matcher, on one line
recordmatcherp = do -- XXX Currently only parses a RecordMatcher
lift $ dbgparse 2 "trying recordmatcherp" matcherp :: CsvRulesParser Matcher
matcherp = do
lift $ dbgparse 2 "trying matcherp"
-- pos <- currentPos -- pos <- currentPos
_ <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline) _ <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline)
ps <- patternsp r <- regexp
when (null ps) $ -- when (null ps) $
Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n" -- Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n"
return ps return $ RecordMatcher r
<?> "record matcher" <?> "record matcher"
-- An operator indicating the type of match
-- XXX currently only ~ (regex), unused
matchoperatorp :: CsvRulesParser String matchoperatorp :: CsvRulesParser String
matchoperatorp = fmap T.unpack $ choiceInState $ map string matchoperatorp = fmap T.unpack $ choiceInState $ map string
["~" ["~"
@ -659,13 +668,8 @@ matchoperatorp = fmap T.unpack $ choiceInState $ map string
-- ,"!=" -- ,"!="
] ]
patternsp :: CsvRulesParser [String] -- A single regular expression
patternsp = do regexp :: CsvRulesParser RegexpPattern
lift $ dbgparse 3 "trying patternsp"
ps <- many regexp
return ps
regexp :: CsvRulesParser String
regexp = do regexp = do
lift $ dbgparse 3 "trying regexp" lift $ dbgparse 3 "trying regexp"
notFollowedBy matchoperatorp notFollowedBy matchoperatorp
@ -972,16 +976,13 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f
where where
blocksAssigning f = filter (any ((==f).fst) . cbAssignments) $ rconditionalblocks rules blocksAssigning f = filter (any ((==f).fst) . cbAssignments) $ rconditionalblocks rules
blockMatches :: ConditionalBlock -> Bool blockMatches :: ConditionalBlock -> Bool
blockMatches CB{..} = all matcherMatches cbMatcher blockMatches CB{..} = any matcherMatches cbMatchers
where where
matcherMatches :: RecordMatcher -> Bool matcherMatches :: Matcher -> Bool
-- matcherMatches pats = any patternMatches pats matcherMatches (RecordMatcher pat) = regexMatchesCI pat csvline
matcherMatches pats = patternMatches $ "(" ++ intercalate "|" pats ++ ")"
where where
patternMatches :: RegexpPattern -> Bool csvline = intercalate "," record
patternMatches pat = regexMatchesCI pat csvline -- matcherMatches (FieldMatcher field pat) = undefined
where
csvline = intercalate "," record
-- | Render a field assigment's template, possibly interpolating referenced -- | Render a field assigment's template, possibly interpolating referenced
-- CSV field values. Outer whitespace is removed from interpolated values. -- CSV field values. Outer whitespace is removed from interpolated values.
@ -1039,11 +1040,11 @@ tests_CsvReader = tests "CsvReader" [
,test"assignment with empty value" $ ,test"assignment with empty value" $
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?= parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
(Right defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatcher=[["foo"]],cbAssignments=[("account2","foo")]}]}) (Right defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher "foo"],cbAssignments=[("account2","foo")]}]})
] ]
,tests "conditionalblockp" [ ,tests "conditionalblockp" [
test"space after conditional" $ -- #1120 test"space after conditional" $ -- #1120
parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?= parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
(Right $ CB{cbMatcher=[["a"]],cbAssignments=[("account2","b")]}) (Right $ CB{cbMatchers=[RecordMatcher "a"],cbAssignments=[("account2","b")]})
] ]
] ]