From 2faceb8e1b74b10014a843f9df1f3c64f7a0ca97 Mon Sep 17 00:00:00 2001 From: Thomas Miedema Date: Mon, 24 Feb 2025 11:29:54 +0100 Subject: [PATCH] feat: csv: allow multiple matchers on the same line `If blocks` and `If tables` now allow multiple matchers on the same line separated by `&&` (AND) or `&& !` (AND NOT). Example `if block` with two matchers on the same line: if %description amazon && %date 2025-02-22 account2 expenses:books Example `if table` with two matchers on the same line: if,account2 %description amazon && %date 2025-02-22, expenses:books --- hledger-lib/Hledger/Read/RulesReader.hs | 55 +++++++++++++++++++------ hledger/hledger.m4.md | 25 ++++++----- 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs index 96da0a2be..907e35a9d 100644 --- a/hledger-lib/Hledger/Read/RulesReader.hs +++ b/hledger-lib/Hledger/Read/RulesReader.hs @@ -637,19 +637,19 @@ conditionaltablep = do ]) when (null body) $ customFailure $ parseErrorAt start $ "start of conditional table found, but no assignment rules afterward" - return $ flip map body $ \(m,vs) -> - CB{cbMatchers=[m], cbAssignments=zip fields vs} + return $ flip map body $ \(ms,vs) -> + CB{cbMatchers=ms, cbAssignments=zip fields vs} "conditional table" where - bodylinep :: Char -> [Text] -> CsvRulesParser (Matcher,[FieldTemplate]) + bodylinep :: Char -> [Text] -> CsvRulesParser ([Matcher],[FieldTemplate]) bodylinep sep fields = do off <- getOffset - m <- matcherp' $ void $ char sep + ms <- matcherp' (lookAhead . void . char $ sep) `manyTill` char sep vs <- T.split (==sep) . T.pack <$> lift restofline if (length vs /= length fields) then customFailure $ parseErrorAt off $ ((printf "line of conditional table should have %d values, but this one has only %d" (length fields) (length vs)) :: String) - else return (m,vs) - + else return (ms,vs) + -- A single matcher, on one line. -- This tries to parse first as a field matcher, then if that fails, as a whole-record matcher; @@ -700,7 +700,7 @@ matcherprefixp :: CsvRulesParser MatcherPrefix matcherprefixp = do lift $ dbgparse 8 "trying matcherprefixp" (do - char '&' >> lift skipNonNewlineSpaces + char '&' >> optional (char '&') >> lift skipNonNewlineSpaces fromMaybe And <$> optional (char '!' >> lift skipNonNewlineSpaces >> return AndNot)) <|> (char '!' >> lift skipNonNewlineSpaces >> return Not) <|> return Or @@ -718,10 +718,12 @@ regexp end = do lift $ dbgparse 8 "trying regexp" -- notFollowedBy matchoperatorp c <- lift nonspace - cs <- anySingle `manyTill` end + cs <- anySingle `manyTill` (double_ampersand <|> end) case toRegexCI . T.strip . T.pack $ c:cs of Left x -> Fail.fail $ "CSV parser: " ++ x Right x -> return x + where + double_ampersand = lookAhead . void $ char '&' >> char '&' -- -- A match operator, indicating the type of match to perform. -- -- Currently just ~ meaning case insensitive infix regex match. @@ -1559,7 +1561,7 @@ tests_RulesReader = testGroup "RulesReader" [ ,testCase "quoted name" $ parseWithState' defrules csvfieldreferencep "%\"csv date\"" @?= (Right "%\"csv date\"") ] - ,testGroup "matcherp" [ + ,testGroup "recordmatcherp" [ testCase "recordmatcherp" $ parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "A A") @@ -1567,18 +1569,47 @@ tests_RulesReader = testGroup "RulesReader" [ ,testCase "recordmatcherp.starts-with-&" $ parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A") - ,testCase "fieldmatcherp.starts-with-%" $ - parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A") + ,testCase "recordmatcherp.starts-with-&&" $ + parseWithState' defrules matcherp "&& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A") - ,testCase "fieldmatcherp" $ + ,testCase "recordmatcherp.starts-with-&&-!" $ + parseWithState' defrules matcherp "&& ! A A\n" @?= (Right $ RecordMatcher AndNot $ toRegexCI' "A A") + + ,testCase "recordmatcherp.does-not-start-with-%" $ + parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A") + ] + + ,testGroup "fieldmatcherp" [ + testCase "fieldmatcherp" $ parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher Or "%description" $ toRegexCI' "A A") ,testCase "fieldmatcherp.starts-with-&" $ parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A") + ,testCase "fieldmatcherp.starts-with-&&" $ + parseWithState' defrules matcherp "&& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A") + + ,testCase "fieldmatcherp.starts-with-&&-!" $ + parseWithState' defrules matcherp "&& ! %description A A\n" @?= (Right $ FieldMatcher AndNot "%description" $ toRegexCI' "A A") + -- ,testCase "fieldmatcherp with operator" $ -- parseWithState' defrules matcherp "%description ~ A A\n" @?= (Right $ FieldMatcher "%description" "A A") + ] + ,testGroup "regexp" [ + testCase "regexp.ends-before-&&" $ + parseWithState' defrules (regexp empty) "A A && xxx" @?= (Right $ toRegexCI' "A A") + ] + + , let matchers = [RecordMatcher Or (toRegexCI' "A"), RecordMatcher And (toRegexCI' "B")] + assignments = [("account2", "foo"), ("comment2", "bar")] + block = CB matchers assignments + in + testGroup "Combine multiple matchers on the same line" [ + testCase "conditionalblockp" $ + parseWithState' defrules conditionalblockp "if A && B\n account2 foo\n comment2 bar" @?= (Right block) + ,testCase "conditionaltablep" $ + parseWithState' defrules conditionaltablep "if,account2,comment2\nA && B,foo,bar" @?= (Right [block]) ] ,testGroup "hledgerField" [ diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index 8cd97483f..e39f6cc18 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -3741,10 +3741,13 @@ If you have trouble with it, see "Regular expressions" in the hledger manual ( ``` @@ -3796,8 +3799,8 @@ You can use the comment lines in the table body. The table must be terminated by an empty line (or end of file). An if table like the above is interpreted as follows: -try all of the matchers; -whenever a matcher succeeds, assign all of the values on that line to the corresponding hledger fields; +try all of the lines with matchers; +whenever a line with matchers succeeds, assign all of the values on that line to the corresponding hledger fields; If multiple lines match, later lines will override fields assigned by the earlier ones - just like the sequence of `if` blocks would behave. If table presented above is equivalent to this sequence of if blocks: @@ -3808,13 +3811,13 @@ if MATCHERA HLEDGERFIELD2 VALUE2 ... -if MATCHERB +if MATCHERB && MATCHERC HLEDGERFIELD1 VALUE1 HLEDGERFIELD2 VALUE2 ... -; Comment line which explains MATCHERC -if MATCHERC +; Comment line which explains MATCHERD +if MATCHERD HLEDGERFIELD1 VALUE1 HLEDGERFIELD2 VALUE2 ...