From 2faceb8e1b74b10014a843f9df1f3c64f7a0ca97 Mon Sep 17 00:00:00 2001
From: Thomas Miedema <thomasmiedema@gmail.com>
Date: Mon, 24 Feb 2025 11:29:54 +0100
Subject: [PATCH] feat: csv: allow multiple matchers on the same line

`If blocks` and `If tables` now allow multiple matchers on the same line
separated by `&&` (AND) or `&& !` (AND NOT).

Example `if block` with two matchers on the same line:

	if %description amazon && %date 2025-02-22
	    account2 expenses:books

Example `if table` with two matchers on the same line:

	if,account2
	%description amazon && %date 2025-02-22, expenses:books
---
 hledger-lib/Hledger/Read/RulesReader.hs | 55 +++++++++++++++++++------
 hledger/hledger.m4.md                   | 25 ++++++-----
 2 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs
index 96da0a2be..907e35a9d 100644
--- a/hledger-lib/Hledger/Read/RulesReader.hs
+++ b/hledger-lib/Hledger/Read/RulesReader.hs
@@ -637,19 +637,19 @@ conditionaltablep = do
                  ])
   when (null body) $
     customFailure $ parseErrorAt start $ "start of conditional table found, but no assignment rules afterward"
-  return $ flip map body $ \(m,vs) ->
-    CB{cbMatchers=[m], cbAssignments=zip fields vs}
+  return $ flip map body $ \(ms,vs) ->
+    CB{cbMatchers=ms, cbAssignments=zip fields vs}
   <?> "conditional table"
   where
-    bodylinep :: Char -> [Text] -> CsvRulesParser (Matcher,[FieldTemplate])
+    bodylinep :: Char -> [Text] -> CsvRulesParser ([Matcher],[FieldTemplate])
     bodylinep sep fields = do
       off <- getOffset
-      m <- matcherp' $ void $ char sep
+      ms <- matcherp' (lookAhead . void . char $ sep) `manyTill` char sep
       vs <- T.split (==sep) . T.pack <$> lift restofline
       if (length vs /= length fields)
         then customFailure $ parseErrorAt off $ ((printf "line of conditional table should have %d values, but this one has only %d" (length fields) (length vs)) :: String)
-        else return (m,vs)
-      
+        else return (ms,vs)
+
 
 -- A single matcher, on one line.
 -- This tries to parse first as a field matcher, then if that fails, as a whole-record matcher;
@@ -700,7 +700,7 @@ matcherprefixp :: CsvRulesParser MatcherPrefix
 matcherprefixp = do
   lift $ dbgparse 8 "trying matcherprefixp"
   (do
-    char '&' >> lift skipNonNewlineSpaces
+    char '&' >> optional (char '&') >> lift skipNonNewlineSpaces
     fromMaybe And <$> optional (char '!' >> lift skipNonNewlineSpaces >> return AndNot))
   <|> (char '!' >> lift skipNonNewlineSpaces >> return Not)
   <|> return Or
@@ -718,10 +718,12 @@ regexp end = do
   lift $ dbgparse 8 "trying regexp"
   -- notFollowedBy matchoperatorp
   c <- lift nonspace
-  cs <- anySingle `manyTill` end
+  cs <- anySingle `manyTill` (double_ampersand <|> end)
   case toRegexCI . T.strip . T.pack $ c:cs of
        Left x -> Fail.fail $ "CSV parser: " ++ x
        Right x -> return x
+  where
+    double_ampersand = lookAhead . void $ char '&' >> char '&'
 
 -- -- A match operator, indicating the type of match to perform.
 -- -- Currently just ~ meaning case insensitive infix regex match.
@@ -1559,7 +1561,7 @@ tests_RulesReader = testGroup "RulesReader" [
    ,testCase "quoted name" $ parseWithState' defrules csvfieldreferencep "%\"csv date\"" @?= (Right "%\"csv date\"")
    ]
 
-  ,testGroup "matcherp" [
+  ,testGroup "recordmatcherp" [
 
     testCase "recordmatcherp" $
       parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "A A")
@@ -1567,18 +1569,47 @@ tests_RulesReader = testGroup "RulesReader" [
    ,testCase "recordmatcherp.starts-with-&" $
       parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A")
 
-   ,testCase "fieldmatcherp.starts-with-%" $
-      parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A")
+   ,testCase "recordmatcherp.starts-with-&&" $
+      parseWithState' defrules matcherp "&& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A")
 
-   ,testCase "fieldmatcherp" $
+   ,testCase "recordmatcherp.starts-with-&&-!" $
+      parseWithState' defrules matcherp "&& ! A A\n" @?= (Right $ RecordMatcher AndNot $ toRegexCI' "A A")
+
+   ,testCase "recordmatcherp.does-not-start-with-%" $
+      parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A")
+   ]
+
+  ,testGroup "fieldmatcherp" [
+    testCase "fieldmatcherp" $
       parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher Or "%description" $ toRegexCI' "A A")
 
    ,testCase "fieldmatcherp.starts-with-&" $
       parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A")
 
+   ,testCase "fieldmatcherp.starts-with-&&" $
+      parseWithState' defrules matcherp "&& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A")
+
+   ,testCase "fieldmatcherp.starts-with-&&-!" $
+      parseWithState' defrules matcherp "&& ! %description A A\n" @?= (Right $ FieldMatcher AndNot "%description" $ toRegexCI' "A A")
+
    -- ,testCase "fieldmatcherp with operator" $
    --    parseWithState' defrules matcherp "%description ~ A A\n" @?= (Right $ FieldMatcher "%description" "A A")
+   ]
 
+  ,testGroup "regexp" [
+    testCase "regexp.ends-before-&&" $
+      parseWithState' defrules (regexp empty) "A A && xxx" @?= (Right $ toRegexCI' "A A")
+   ]
+
+  , let matchers = [RecordMatcher Or (toRegexCI' "A"), RecordMatcher And (toRegexCI' "B")]
+        assignments = [("account2", "foo"), ("comment2", "bar")]
+        block = CB matchers assignments
+    in
+   testGroup "Combine multiple matchers on the same line" [
+    testCase "conditionalblockp" $
+      parseWithState' defrules conditionalblockp "if A && B\n account2 foo\n comment2 bar" @?= (Right block)
+   ,testCase "conditionaltablep" $
+      parseWithState' defrules conditionaltablep "if,account2,comment2\nA && B,foo,bar" @?= (Right [block])
    ]
 
  ,testGroup "hledgerField" [
diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md
index 8cd97483f..e39f6cc18 100644
--- a/hledger/hledger.m4.md
+++ b/hledger/hledger.m4.md
@@ -3741,10 +3741,13 @@ If you have trouble with it, see "Regular expressions" in the hledger manual (<h
 When an if block has multiple matchers, each on its own line,
 
 - By default they are OR'd (any of them can match).
-- Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match).
+- Matcher lines beginning with `&` (or `&&`, *since 1.42*) are AND'ed with the matcher above (all in the AND'ed group must match).
+- Matcher lines beginning with `& !` (*since 1.41*, or `&& !`, *since 1.42*) are first negated and then AND'ed with the matcher above.
 
-*(Since 1.41:)*
-You can use a negated `!` matcher on a `&` line, meaning AND NOT.
+You can also combine multiple matchers one the same line separated by `&&` (AND) or `&& !` (AND NOT).
+Eg `%description amazon && %date 2025-01-01` will match only when the
+description field contains "amazon" and the date field equals 2025-01-01.
+*Added in 1.42.*
 
 ### Match groups
 
@@ -3779,9 +3782,9 @@ they can express many matchers and field assignments in a more compact tabular f
 ```rules
 if,HLEDGERFIELD1,HLEDGERFIELD2,...
 MATCHERA,VALUE1,VALUE2,...
-MATCHERB,VALUE1,VALUE2,...
-; Comment line that explains MATCHERC
-MATCHERC,VALUE1,VALUE2,...
+MATCHERB && MATCHERC,VALUE1,VALUE2,...  (*since 1.42*)
+; Comment line that explains MATCHERD
+MATCHERD,VALUE1,VALUE2,...
 <empty line>
 ```
 
@@ -3796,8 +3799,8 @@ You can use the comment lines in the table body.
 The table must be terminated by an empty line (or end of file).
 
 An if table like the above is interpreted as follows:
-try all of the matchers; 
-whenever a matcher succeeds, assign all of the values on that line to the corresponding hledger fields;
+try all of the lines with matchers; 
+whenever a line with matchers succeeds, assign all of the values on that line to the corresponding hledger fields;
 If multiple lines match, later lines will override fields assigned by the earlier ones - just like the sequence of `if` blocks would behave.
 
 If table presented above is equivalent to this sequence of if blocks:
@@ -3808,13 +3811,13 @@ if MATCHERA
   HLEDGERFIELD2 VALUE2
   ...
 
-if MATCHERB
+if MATCHERB && MATCHERC
   HLEDGERFIELD1 VALUE1
   HLEDGERFIELD2 VALUE2
   ...
 
-; Comment line which explains MATCHERC
-if MATCHERC
+; Comment line which explains MATCHERD
+if MATCHERD
   HLEDGERFIELD1 VALUE1
   HLEDGERFIELD2 VALUE2
   ...