imp:csv:if: support & ! (AND NOT)

2024-12-03 17:25:43 -10:00 · 2024-12-03 17:25:43 -10:00 · 054a204aa0
commit 054a204aa0
parent f73888d3b6
3 changed files with 81 additions and 41 deletions
--- a/hledger-lib/Hledger/Read/RulesReader.hs
+++ b/hledger-lib/Hledger/Read/RulesReader.hs
@ -308,8 +308,13 @@ type MatchGroupReference = Text
 -- | A strptime date parsing pattern, as supported by Data.Time.Format.
 type DateFormat       = Text
-- | A prefix for a matcher test, either & or none (implicit or).
+-- | A representation of a matcher's prefix, which indicates how it should be
-data MatcherPrefix = And | Not | None
+-- interpreted or combined with other matchers.
 data MatcherPrefix =
    Or      -- ^ no prefix
  | And     -- ^ &
  | Not     -- ^ !
  | AndNot  -- ^ & !
  deriving (Show, Eq)
 -- | A single test for matching a CSV record, in one way or another.
@ -318,6 +323,14 @@ data Matcher =
  | FieldMatcher MatcherPrefix CsvFieldReference Regexp         -- ^ match if this regexp matches the referenced CSV field's value
  deriving (Show, Eq)
 matcherPrefix :: Matcher -> MatcherPrefix
 matcherPrefix (RecordMatcher prefix _) = prefix
 matcherPrefix (FieldMatcher prefix _ _) = prefix
 matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher
 matcherSetPrefix p (RecordMatcher _ r)  = RecordMatcher p r
 matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r
 -- | A conditional block: a set of CSV record matchers, and a sequence
 -- of rules which will be enabled only if one or more of the matchers
 -- succeeds.
@ -682,7 +695,11 @@ fieldmatcherp end = do
 matcherprefixp :: CsvRulesParser MatcherPrefix
 matcherprefixp = do
  lift $ dbgparse 8 "trying matcherprefixp"
-  (char '&' >> lift skipNonNewlineSpaces >> return And) <|> (char '!' >> lift skipNonNewlineSpaces >> return Not) <|> return None
+  (do
    char '&' >> lift skipNonNewlineSpaces
    fromMaybe And <$> optional (char '!' >> lift skipNonNewlineSpaces >> return AndNot))
  <|> (char '!' >> lift skipNonNewlineSpaces >> return Not)
  <|> return Or
 csvfieldreferencep :: CsvRulesParser CsvFieldReference
 csvfieldreferencep = do
@ -744,7 +761,7 @@ lastCBAssignmentTemplate f = snd . last . filter ((==f).fst) . cbAssignments
 maybeNegate :: MatcherPrefix -> Bool -> Bool
 maybeNegate Not origbool = not origbool
-maybeNegate _ origbool = origbool
+maybeNegate _   origbool = origbool
 -- | Given the conversion rules, a CSV record and a hledger field name, find
 -- either the last applicable `ConditionalBlock`, or the final value template
@ -789,7 +806,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
    --
    matcherMatches :: Matcher -> Bool
    matcherMatches = \case
-      RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
+      RecordMatcher prefix             pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
      FieldMatcher  prefix csvfieldref pat -> maybeNegate prefix $ match pat $
        fromMaybe (warn "'if %CSVFIELD' should use a name declared with 'fields', or a number" "") $
        replaceCsvFieldReference rules record csvfieldref
@ -803,14 +820,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
    --   D
    --   & E
    --   => [[A, B], [C], [D, E]]
    --  & ! M (and not M) are converted to ! M (not M) within the and groups.
    groupedMatchers :: [Matcher] -> [[Matcher]]
    groupedMatchers [] = []
-    groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs
+    groupedMatchers (m:ms) = (m:ands) : groupedMatchers rest
      where
-        (ys, zs) = span (\y -> matcherPrefix y == And) xs
+        (andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms
-        matcherPrefix :: Matcher -> MatcherPrefix
+        ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And]
        matcherPrefix (RecordMatcher prefix _) = prefix
        matcherPrefix (FieldMatcher prefix _ _) = prefix
 -- | Render a field assignment's template, possibly interpolating referenced
 -- CSV field values or match groups. Outer whitespace is removed from interpolated values.
@ -1514,12 +1530,12 @@ tests_RulesReader = testGroup "RulesReader" [
    ,testCase "assignment with empty value" $
      parseWithState' defrules rulesp "account1 \nif foo\n  account2 foo\n" @?=
-        (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher None (toRegex' "foo")],cbAssignments=[("account2","foo")]}]}))
+        (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher Or (toRegex' "foo")],cbAssignments=[("account2","foo")]}]}))
   ]
  ,testGroup "conditionalblockp" [
    testCase "space after conditional" $
      parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
-        (Right $ CB{cbMatchers=[RecordMatcher None $ toRegexCI' "a"],cbAssignments=[("account2","b")]})
+        (Right $ CB{cbMatchers=[RecordMatcher Or $ toRegexCI' "a"],cbAssignments=[("account2","b")]})
  ],
  testGroup "csvfieldreferencep" [
@ -1531,16 +1547,16 @@ tests_RulesReader = testGroup "RulesReader" [
  ,testGroup "matcherp" [
    testCase "recordmatcherp" $
-      parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "A A")
+      parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "A A")
   ,testCase "recordmatcherp.starts-with-&" $
      parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A")
   ,testCase "fieldmatcherp.starts-with-%" $
-      parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "description A A")
+      parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A")
   ,testCase "fieldmatcherp" $
-      parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher None "%description" $ toRegexCI' "A A")
+      parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher Or "%description" $ toRegexCI' "A A")
   ,testCase "fieldmatcherp.starts-with-&" $
      parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A")
@ -1555,7 +1571,7 @@ tests_RulesReader = testGroup "RulesReader" [
    in testCase "toplevel" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
    in testCase "conditional" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
@ -1564,16 +1580,16 @@ tests_RulesReader = testGroup "RulesReader" [
   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "b"] [("date","%csvdate")]]}
    in testCase "negated-conditional-true" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]}
    in testCase "conditional-with-or-a" $ hledgerField rules ["a"] "date" @?= (Just "%csvdate")
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]}
    in testCase "conditional-with-or-b" $ hledgerField rules ["_", "b"] "date" @?= (Just "%csvdate")
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]}
    in testCase "conditional.with-and" $ hledgerField rules ["a", "b"] "date" @?= (Just "%csvdate")
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher None "%description" $ toRegex' "c"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher Or "%description" $ toRegex' "c"] [("date","%csvdate")]]}
    in testCase "conditional.with-and-or" $ hledgerField rules ["_", "c"] "date" @?= (Just "%csvdate")
   ]
@ -1584,9 +1600,9 @@ tests_RulesReader = testGroup "RulesReader" [
          { rcsvfieldindexes=[ ("date",1), ("description",2) ]
          , rassignments=[ ("account2","equity"), ("amount1","1") ]
          -- ConditionalBlocks here are in reverse order: mkrules reverses the list
-          , rconditionalblocks=[ CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*) - (.*)")] 
+          , rconditionalblocks=[ CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*) - (.*)")]
                                    , cbAssignments=[("account1","account:\\1:\\2")] }
-                               , CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*)")]
+                               , CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*)")]
                                    , cbAssignments=[("account1","account:\\1"), ("comment1","\\1")] }
                               ]
          }
--- a/hledger/hledger.m4.md
+++ b/hledger/hledger.m4.md
@ -3693,8 +3693,8 @@ When an if block has multiple matchers, each on its own line,
 - By default they are OR'd (any of them can match).
 - Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match).
-You can't use both `&` and `!` on the same line (you can't AND a negated matcher),
+You can use a negated `!` matcher on a `&` line, meaning AND NOT.
-[currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398).
+*Since 1.41.*
 ### Match groups
--- a/hledger/test/csv.test
+++ b/hledger/test/csv.test
@ -884,7 +884,7 @@ start of conditional block found, but no assignment rules afterward
 >=1
 # XXX
-# ** 44. handle conditions with & operator
+# ** 44. handle matchers with & prefix
 <
 10/2009/09,Flubber Co,50
 10/2009/09,Blubber Co,50
@ -908,7 +908,31 @@ $  ./csvtest.sh
 >=0
-# ** 45. decimal-mark helps parse ambiguous decimals correctly.
+# ** 45. handle matchers with both & and !
 <
 10/2009/09,Flubber Co,50
 10/2009/09,Blubber Co,50
 RULES
 fields date, description, amount
 date-format %d/%Y/%m
 currency $
 account1 assets:myacct
 if Flubber
 &!%amount 50
  account2 acct
 $  ./csvtest.sh
 2009-09-10 Flubber Co
    assets:myacct              $50
    income:unknown            $-50
 2009-09-10 Blubber Co
    assets:myacct              $50
    income:unknown            $-50
 >=0
 # ** 46. decimal-mark helps parse ambiguous decimals correctly.
 # Here it's one thousand, one.
 <
 2020-01-01,"1,000"
@ -929,7 +953,7 @@ $  ./csvtest.sh
 >=
-# ** 46. Again, this time with comma as decimal mark.
+# ** 47. Again, this time with comma as decimal mark.
 # Here it's one, one thousand.
 <
 2020-01-01,"1,000"
@ -950,7 +974,7 @@ $  ./csvtest.sh
 >=
-# ** 47. Account aliases work when reading from CSV.
+# ** 48. Account aliases work when reading from CSV.
 <
 2020-01-01,10
@ -964,7 +988,7 @@ $  ./csvtest.sh --alias expenses=FOO
 >=
-# ** 48. Allow for whitespace in csv amounts
+# ** 49. Allow for whitespace in csv amounts
 <
 2009-09-10,+ $20
 2009-09-10, $ +30
@ -1000,7 +1024,7 @@ $  ./csvtest.sh
 >=0
-# ** 49. Handle an entry with all zeros
+# ** 50. Handle an entry with all zeros
 <
 Date;Description;Category;Debit;Credit;Balance
 "2020-01-21","Client card point of sale fee",Fees,"0","0","1068.94"
@ -1018,7 +1042,7 @@ $  ./csvtest.sh
 >=0
-# ** 50. Allow unicode field references (#1809)
+# ** 51. Allow unicode field references (#1809)
 <
 Date, Description, Id, Amount
 12/11/2019, Foo, 123, 10.23
@ -1036,7 +1060,7 @@ $  ./csvtest.sh
 >=0
-# ** 51. Throw an error when unable to substitute csv templates
+# ** 52. Throw an error when unable to substitute csv templates
 <
 "2021-12-23","caffe_siciliaexpenses:cibo:dolce","-10.5"
@ -1048,7 +1072,7 @@ $  ./csvtest.sh
 >2 /transaction is unbalanced/
 >=1
-# ** 52. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966)
+# ** 53. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966)
 <
 2022-01-01,B"B",C
 RULES
@ -1057,7 +1081,7 @@ $  ./csvtest.sh
 >2 /unexpected '"'/
 >=1
-# ** 53. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967)
+# ** 54. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967)
 <
 2022-01-01,B"B",C
 RULES
@ -1066,7 +1090,7 @@ fields date, b, c
 $  ./csvtest.sh
 >=
-# ** 54. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines
+# ** 55. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines
 # are skipped automatically, including inner ones; skip's argument
 # counts only the non-empty/blank lines.
 <
@ -1087,7 +1111,7 @@ $  ./csvtest.sh
 >=
-# ** 55. Some validation is done on account name assignments; trying to
+# ** 56. Some validation is done on account name assignments; trying to
 # also set an amount there (with 2+ spaces) will be rejected. (#1978)
 <
 2022-01-01,1
@ -1098,7 +1122,7 @@ $  ./csvtest.sh
 >2 /unexpected space/
 >=1
-# ** 56. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241)
+# ** 57. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241)
 <
 2020-01-01, 1
 RULES
@ -1116,7 +1140,7 @@ $  ./csvtest.sh tag:ttag2
 >=
-# ** 57. and also posting tags, and (primary, yearful) posting dates (#2114, #2241)
+# ** 59. and also posting tags, and (primary, yearful) posting dates (#2114, #2241)
 $  ./csvtest.sh tag:date
 2020-01-01  ; ttag:tval
    ; ttag2:
@ -1125,7 +1149,7 @@ $  ./csvtest.sh tag:date
 >=
-# ** 58. handle newlines in an assignment value, without breaking interpolations (#2134)
+# ** 59. handle newlines in an assignment value, without breaking interpolations (#2134)
 <
 2023-01-01,1
 RULES
@ -1139,7 +1163,7 @@ $  ./csvtest.sh
 >=
-# ** 59. specify ssv prefix and no extension
+# ** 60. specify ssv prefix and no extension
 <
 12/11/2019;Foo;123;10.23
 RULES
@ -1152,7 +1176,7 @@ $  ./ssvtest.sh
 >=
-# ** 60. tabular rules with comments
+# ** 61. tabular rules with comments
 <
 10/2009/09,Flubber Co,50
 10/2009/09,Blubber Co,150