From 5ec0a518da6e3bd1e096fee3b5d33eb583533846 Mon Sep 17 00:00:00 2001 From: Michael Sanders Date: Mon, 6 Jul 2020 12:17:35 -0700 Subject: [PATCH] csv: Add support for & operators in conditional blocks --- hledger-lib/Hledger/Read/CsvReader.hs | 73 ++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/hledger-lib/Hledger/Read/CsvReader.hs b/hledger-lib/Hledger/Read/CsvReader.hs index 6804d5a8c..c70d89946 100644 --- a/hledger-lib/Hledger/Read/CsvReader.hs +++ b/hledger-lib/Hledger/Read/CsvReader.hs @@ -303,10 +303,14 @@ type DateFormat = String -- | A regular expression. type RegexpPattern = String +-- | A prefix for a matcher test, either & or none (implicit or). +data MatcherPrefix = And | None + deriving (Show, Eq) + -- | A single test for matching a CSV record, in one way or another. data Matcher = - RecordMatcher RegexpPattern -- ^ match if this regexp matches the overall CSV record - | FieldMatcher CsvFieldReference RegexpPattern -- ^ match if this regexp matches the referenced CSV field's value + RecordMatcher MatcherPrefix RegexpPattern -- ^ match if this regexp matches the overall CSV record + | FieldMatcher MatcherPrefix CsvFieldReference RegexpPattern -- ^ match if this regexp matches the referenced CSV field's value deriving (Show, Eq) -- | A conditional block: a set of CSV record matchers, and a sequence @@ -344,6 +348,22 @@ mkrules rules = rblocksassigning = maybeMemo (\f -> filter (any ((==f).fst) . cbAssignments) conditionalblocks) } +matcherPrefix :: Matcher -> MatcherPrefix +matcherPrefix (RecordMatcher prefix _) = prefix +matcherPrefix (FieldMatcher prefix _ _) = prefix + +-- | Group matchers into associative pairs based on prefix, e.g.: +-- A +-- & B +-- C +-- D +-- & E +-- => [[A, B], [C], [D, E]] +groupedMatchers :: [Matcher] -> [[Matcher]] +groupedMatchers [] = [] +groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs + where (ys, zs) = span (\y -> matcherPrefix y == And) xs + --- *** rules parsers {- @@ -601,10 +621,11 @@ recordmatcherp end = do lift $ dbgparse 8 "trying recordmatcherp" -- pos <- currentPos -- _ <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline) + p <- matcherprefixp r <- regexp end -- when (null ps) $ -- Fail.fail "start of record matcher found, but no patterns afterward\n(patterns should not be indented)\n" - return $ RecordMatcher r + return $ RecordMatcher p r "record matcher" -- | A single matcher for a specific field. A csv field reference @@ -619,14 +640,20 @@ fieldmatcherp end = do -- f' <- fieldnamep -- lift (skipMany spacenonewline) -- return f') + p <- matcherprefixp f <- csvfieldreferencep <* lift (skipMany spacenonewline) -- optional operator.. just ~ (case insensitive infix regex) for now -- _op <- fromMaybe "~" <$> optional matchoperatorp lift (skipMany spacenonewline) r <- regexp end - return $ FieldMatcher f r + return $ FieldMatcher p f r "field matcher" +matcherprefixp :: CsvRulesParser MatcherPrefix +matcherprefixp = do + lift $ dbgparse 8 "trying matcherprefixp" + (char '&' >> lift (skipMany spacenonewline) >> return And) <|> return None + csvfieldreferencep :: CsvRulesParser CsvFieldReference csvfieldreferencep = do lift $ dbgparse 8 "trying csvfieldreferencep" @@ -1147,11 +1174,11 @@ getEffectiveAssignment rules record f = lastMay $ map snd $ assignments where -- does this conditional block match the current csv record ? isBlockActive :: ConditionalBlock -> Bool - isBlockActive CB{..} = any matcherMatches cbMatchers + isBlockActive CB{..} = any (all matcherMatches) $ groupedMatchers cbMatchers where -- does this individual matcher match the current csv record ? matcherMatches :: Matcher -> Bool - matcherMatches (RecordMatcher pat) = regexMatchesCI pat' wholecsvline + matcherMatches (RecordMatcher _ pat) = regexMatchesCI pat' wholecsvline where pat' = dbg7 "regex" pat -- A synthetic whole CSV record to match against. Note, this can be @@ -1161,7 +1188,7 @@ getEffectiveAssignment rules record f = lastMay $ map snd $ assignments -- - and the field separator is always comma -- which means that a field containing a comma will look like two fields. wholecsvline = dbg7 "wholecsvline" $ intercalate "," record - matcherMatches (FieldMatcher csvfieldref pat) = regexMatchesCI pat csvfieldvalue + matcherMatches (FieldMatcher _ csvfieldref pat) = regexMatchesCI pat csvfieldvalue where -- the value of the referenced CSV field to match against. csvfieldvalue = dbg7 "csvfieldvalue" $ replaceCsvFieldReference rules record csvfieldref @@ -1232,12 +1259,12 @@ tests_CsvReader = tests "CsvReader" [ ,test "assignment with empty value" $ parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?= - (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher "foo"],cbAssignments=[("account2","foo")]}]})) + (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher None "foo"],cbAssignments=[("account2","foo")]}]})) ] ,tests "conditionalblockp" [ test "space after conditional" $ -- #1120 parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?= - (Right $ CB{cbMatchers=[RecordMatcher "a"],cbAssignments=[("account2","b")]}) + (Right $ CB{cbMatchers=[RecordMatcher None "a"],cbAssignments=[("account2","b")]}) ,tests "csvfieldreferencep" [ test "number" $ parseWithState' defrules csvfieldreferencep "%1" @?= (Right "%1") @@ -1248,13 +1275,19 @@ tests_CsvReader = tests "CsvReader" [ ,tests "matcherp" [ test "recordmatcherp" $ - parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher "A A") + parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher None "A A") + + ,test "recordmatcherp.starts-with-&" $ + parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And "A A") ,test "fieldmatcherp.starts-with-%" $ - parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher "description A A") + parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher None "description A A") ,test "fieldmatcherp" $ - parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher "%description" "A A") + parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher None "%description" "A A") + + ,test "fieldmatcherp.starts-with-&" $ + parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" "A A") -- ,test "fieldmatcherp with operator" $ -- parseWithState' defrules matcherp "%description ~ A A\n" @?= (Right $ FieldMatcher "%description" "A A") @@ -1266,9 +1299,21 @@ tests_CsvReader = tests "CsvReader" [ in test "toplevel" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate") - ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher "%csvdate" "a"] [("date","%csvdate")]]} + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a"] [("date","%csvdate")]]} in test "conditional" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate") - + + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a", FieldMatcher None "%description" "b"] [("date","%csvdate")]]} + in test "conditional-with-or-a" $ getEffectiveAssignment rules ["a"] "date" @?= (Just "%csvdate") + + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a", FieldMatcher None "%description" "b"] [("date","%csvdate")]]} + in test "conditional-with-or-b" $ getEffectiveAssignment rules ["_", "b"] "date" @?= (Just "%csvdate") + + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a", FieldMatcher And "%description" "b"] [("date","%csvdate")]]} + in test "conditional.with-and" $ getEffectiveAssignment rules ["a", "b"] "date" @?= (Just "%csvdate") + + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" "a", FieldMatcher And "%description" "b", FieldMatcher None "%description" "c"] [("date","%csvdate")]]} + in test "conditional.with-and-or" $ getEffectiveAssignment rules ["_", "c"] "date" @?= (Just "%csvdate") + ] ]