imp:csv:if: support & ! (AND NOT)

This commit is contained in:
Simon Michael 2024-12-03 17:25:43 -10:00
parent f73888d3b6
commit 054a204aa0
3 changed files with 81 additions and 41 deletions

View File

@ -308,8 +308,13 @@ type MatchGroupReference = Text
-- | A strptime date parsing pattern, as supported by Data.Time.Format. -- | A strptime date parsing pattern, as supported by Data.Time.Format.
type DateFormat = Text type DateFormat = Text
-- | A prefix for a matcher test, either & or none (implicit or). -- | A representation of a matcher's prefix, which indicates how it should be
data MatcherPrefix = And | Not | None -- interpreted or combined with other matchers.
data MatcherPrefix =
Or -- ^ no prefix
| And -- ^ &
| Not -- ^ !
| AndNot -- ^ & !
deriving (Show, Eq) deriving (Show, Eq)
-- | A single test for matching a CSV record, in one way or another. -- | A single test for matching a CSV record, in one way or another.
@ -318,6 +323,14 @@ data Matcher =
| FieldMatcher MatcherPrefix CsvFieldReference Regexp -- ^ match if this regexp matches the referenced CSV field's value | FieldMatcher MatcherPrefix CsvFieldReference Regexp -- ^ match if this regexp matches the referenced CSV field's value
deriving (Show, Eq) deriving (Show, Eq)
matcherPrefix :: Matcher -> MatcherPrefix
matcherPrefix (RecordMatcher prefix _) = prefix
matcherPrefix (FieldMatcher prefix _ _) = prefix
matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher
matcherSetPrefix p (RecordMatcher _ r) = RecordMatcher p r
matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r
-- | A conditional block: a set of CSV record matchers, and a sequence -- | A conditional block: a set of CSV record matchers, and a sequence
-- of rules which will be enabled only if one or more of the matchers -- of rules which will be enabled only if one or more of the matchers
-- succeeds. -- succeeds.
@ -682,7 +695,11 @@ fieldmatcherp end = do
matcherprefixp :: CsvRulesParser MatcherPrefix matcherprefixp :: CsvRulesParser MatcherPrefix
matcherprefixp = do matcherprefixp = do
lift $ dbgparse 8 "trying matcherprefixp" lift $ dbgparse 8 "trying matcherprefixp"
(char '&' >> lift skipNonNewlineSpaces >> return And) <|> (char '!' >> lift skipNonNewlineSpaces >> return Not) <|> return None (do
char '&' >> lift skipNonNewlineSpaces
fromMaybe And <$> optional (char '!' >> lift skipNonNewlineSpaces >> return AndNot))
<|> (char '!' >> lift skipNonNewlineSpaces >> return Not)
<|> return Or
csvfieldreferencep :: CsvRulesParser CsvFieldReference csvfieldreferencep :: CsvRulesParser CsvFieldReference
csvfieldreferencep = do csvfieldreferencep = do
@ -744,7 +761,7 @@ lastCBAssignmentTemplate f = snd . last . filter ((==f).fst) . cbAssignments
maybeNegate :: MatcherPrefix -> Bool -> Bool maybeNegate :: MatcherPrefix -> Bool -> Bool
maybeNegate Not origbool = not origbool maybeNegate Not origbool = not origbool
maybeNegate _ origbool = origbool maybeNegate _ origbool = origbool
-- | Given the conversion rules, a CSV record and a hledger field name, find -- | Given the conversion rules, a CSV record and a hledger field name, find
-- either the last applicable `ConditionalBlock`, or the final value template -- either the last applicable `ConditionalBlock`, or the final value template
@ -789,7 +806,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
-- --
matcherMatches :: Matcher -> Bool matcherMatches :: Matcher -> Bool
matcherMatches = \case matcherMatches = \case
RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat $ FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat $
fromMaybe (warn "'if %CSVFIELD' should use a name declared with 'fields', or a number" "") $ fromMaybe (warn "'if %CSVFIELD' should use a name declared with 'fields', or a number" "") $
replaceCsvFieldReference rules record csvfieldref replaceCsvFieldReference rules record csvfieldref
@ -803,14 +820,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
-- D -- D
-- & E -- & E
-- => [[A, B], [C], [D, E]] -- => [[A, B], [C], [D, E]]
-- & ! M (and not M) are converted to ! M (not M) within the and groups.
groupedMatchers :: [Matcher] -> [[Matcher]] groupedMatchers :: [Matcher] -> [[Matcher]]
groupedMatchers [] = [] groupedMatchers [] = []
groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs groupedMatchers (m:ms) = (m:ands) : groupedMatchers rest
where where
(ys, zs) = span (\y -> matcherPrefix y == And) xs (andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms
matcherPrefix :: Matcher -> MatcherPrefix ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And]
matcherPrefix (RecordMatcher prefix _) = prefix
matcherPrefix (FieldMatcher prefix _ _) = prefix
-- | Render a field assignment's template, possibly interpolating referenced -- | Render a field assignment's template, possibly interpolating referenced
-- CSV field values or match groups. Outer whitespace is removed from interpolated values. -- CSV field values or match groups. Outer whitespace is removed from interpolated values.
@ -1514,12 +1530,12 @@ tests_RulesReader = testGroup "RulesReader" [
,testCase "assignment with empty value" $ ,testCase "assignment with empty value" $
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?= parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
(Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher None (toRegex' "foo")],cbAssignments=[("account2","foo")]}]})) (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher Or (toRegex' "foo")],cbAssignments=[("account2","foo")]}]}))
] ]
,testGroup "conditionalblockp" [ ,testGroup "conditionalblockp" [
testCase "space after conditional" $ testCase "space after conditional" $
parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?= parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
(Right $ CB{cbMatchers=[RecordMatcher None $ toRegexCI' "a"],cbAssignments=[("account2","b")]}) (Right $ CB{cbMatchers=[RecordMatcher Or $ toRegexCI' "a"],cbAssignments=[("account2","b")]})
], ],
testGroup "csvfieldreferencep" [ testGroup "csvfieldreferencep" [
@ -1531,16 +1547,16 @@ tests_RulesReader = testGroup "RulesReader" [
,testGroup "matcherp" [ ,testGroup "matcherp" [
testCase "recordmatcherp" $ testCase "recordmatcherp" $
parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "A A") parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "A A")
,testCase "recordmatcherp.starts-with-&" $ ,testCase "recordmatcherp.starts-with-&" $
parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A") parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A")
,testCase "fieldmatcherp.starts-with-%" $ ,testCase "fieldmatcherp.starts-with-%" $
parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "description A A") parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A")
,testCase "fieldmatcherp" $ ,testCase "fieldmatcherp" $
parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher None "%description" $ toRegexCI' "A A") parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher Or "%description" $ toRegexCI' "A A")
,testCase "fieldmatcherp.starts-with-&" $ ,testCase "fieldmatcherp.starts-with-&" $
parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A") parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A")
@ -1555,7 +1571,7 @@ tests_RulesReader = testGroup "RulesReader" [
in testCase "toplevel" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate") in testCase "toplevel" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]} ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
in testCase "conditional" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate") in testCase "conditional" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]} ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
@ -1564,16 +1580,16 @@ tests_RulesReader = testGroup "RulesReader" [
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "b"] [("date","%csvdate")]]} ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "b"] [("date","%csvdate")]]}
in testCase "negated-conditional-true" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate") in testCase "negated-conditional-true" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]} ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]}
in testCase "conditional-with-or-a" $ hledgerField rules ["a"] "date" @?= (Just "%csvdate") in testCase "conditional-with-or-a" $ hledgerField rules ["a"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]} ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]}
in testCase "conditional-with-or-b" $ hledgerField rules ["_", "b"] "date" @?= (Just "%csvdate") in testCase "conditional-with-or-b" $ hledgerField rules ["_", "b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]} ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]}
in testCase "conditional.with-and" $ hledgerField rules ["a", "b"] "date" @?= (Just "%csvdate") in testCase "conditional.with-and" $ hledgerField rules ["a", "b"] "date" @?= (Just "%csvdate")
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher None "%description" $ toRegex' "c"] [("date","%csvdate")]]} ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher Or "%description" $ toRegex' "c"] [("date","%csvdate")]]}
in testCase "conditional.with-and-or" $ hledgerField rules ["_", "c"] "date" @?= (Just "%csvdate") in testCase "conditional.with-and-or" $ hledgerField rules ["_", "c"] "date" @?= (Just "%csvdate")
] ]
@ -1584,9 +1600,9 @@ tests_RulesReader = testGroup "RulesReader" [
{ rcsvfieldindexes=[ ("date",1), ("description",2) ] { rcsvfieldindexes=[ ("date",1), ("description",2) ]
, rassignments=[ ("account2","equity"), ("amount1","1") ] , rassignments=[ ("account2","equity"), ("amount1","1") ]
-- ConditionalBlocks here are in reverse order: mkrules reverses the list -- ConditionalBlocks here are in reverse order: mkrules reverses the list
, rconditionalblocks=[ CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*) - (.*)")] , rconditionalblocks=[ CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*) - (.*)")]
, cbAssignments=[("account1","account:\\1:\\2")] } , cbAssignments=[("account1","account:\\1:\\2")] }
, CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*)")] , CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*)")]
, cbAssignments=[("account1","account:\\1"), ("comment1","\\1")] } , cbAssignments=[("account1","account:\\1"), ("comment1","\\1")] }
] ]
} }

View File

@ -3693,8 +3693,8 @@ When an if block has multiple matchers, each on its own line,
- By default they are OR'd (any of them can match). - By default they are OR'd (any of them can match).
- Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match). - Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match).
You can't use both `&` and `!` on the same line (you can't AND a negated matcher), You can use a negated `!` matcher on a `&` line, meaning AND NOT.
[currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398). *Since 1.41.*
### Match groups ### Match groups

View File

@ -884,7 +884,7 @@ start of conditional block found, but no assignment rules afterward
>=1 >=1
# XXX # XXX
# ** 44. handle conditions with & operator # ** 44. handle matchers with & prefix
< <
10/2009/09,Flubber Co,50 10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,50 10/2009/09,Blubber Co,50
@ -908,7 +908,31 @@ $ ./csvtest.sh
>=0 >=0
# ** 45. decimal-mark helps parse ambiguous decimals correctly. # ** 45. handle matchers with both & and !
<
10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,50
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if Flubber
&!%amount 50
account2 acct
$ ./csvtest.sh
2009-09-10 Flubber Co
assets:myacct $50
income:unknown $-50
2009-09-10 Blubber Co
assets:myacct $50
income:unknown $-50
>=0
# ** 46. decimal-mark helps parse ambiguous decimals correctly.
# Here it's one thousand, one. # Here it's one thousand, one.
< <
2020-01-01,"1,000" 2020-01-01,"1,000"
@ -929,7 +953,7 @@ $ ./csvtest.sh
>= >=
# ** 46. Again, this time with comma as decimal mark. # ** 47. Again, this time with comma as decimal mark.
# Here it's one, one thousand. # Here it's one, one thousand.
< <
2020-01-01,"1,000" 2020-01-01,"1,000"
@ -950,7 +974,7 @@ $ ./csvtest.sh
>= >=
# ** 47. Account aliases work when reading from CSV. # ** 48. Account aliases work when reading from CSV.
< <
2020-01-01,10 2020-01-01,10
@ -964,7 +988,7 @@ $ ./csvtest.sh --alias expenses=FOO
>= >=
# ** 48. Allow for whitespace in csv amounts # ** 49. Allow for whitespace in csv amounts
< <
2009-09-10,+ $20 2009-09-10,+ $20
2009-09-10, $ +30 2009-09-10, $ +30
@ -1000,7 +1024,7 @@ $ ./csvtest.sh
>=0 >=0
# ** 49. Handle an entry with all zeros # ** 50. Handle an entry with all zeros
< <
Date;Description;Category;Debit;Credit;Balance Date;Description;Category;Debit;Credit;Balance
"2020-01-21","Client card point of sale fee",Fees,"0","0","1068.94" "2020-01-21","Client card point of sale fee",Fees,"0","0","1068.94"
@ -1018,7 +1042,7 @@ $ ./csvtest.sh
>=0 >=0
# ** 50. Allow unicode field references (#1809) # ** 51. Allow unicode field references (#1809)
< <
Date, Description, Id, Amount Date, Description, Id, Amount
12/11/2019, Foo, 123, 10.23 12/11/2019, Foo, 123, 10.23
@ -1036,7 +1060,7 @@ $ ./csvtest.sh
>=0 >=0
# ** 51. Throw an error when unable to substitute csv templates # ** 52. Throw an error when unable to substitute csv templates
< <
"2021-12-23","caffe_siciliaexpenses:cibo:dolce","-10.5" "2021-12-23","caffe_siciliaexpenses:cibo:dolce","-10.5"
@ -1048,7 +1072,7 @@ $ ./csvtest.sh
>2 /transaction is unbalanced/ >2 /transaction is unbalanced/
>=1 >=1
# ** 52. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966) # ** 53. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966)
< <
2022-01-01,B"B",C 2022-01-01,B"B",C
RULES RULES
@ -1057,7 +1081,7 @@ $ ./csvtest.sh
>2 /unexpected '"'/ >2 /unexpected '"'/
>=1 >=1
# ** 53. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967) # ** 54. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967)
< <
2022-01-01,B"B",C 2022-01-01,B"B",C
RULES RULES
@ -1066,7 +1090,7 @@ fields date, b, c
$ ./csvtest.sh $ ./csvtest.sh
>= >=
# ** 54. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines # ** 55. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines
# are skipped automatically, including inner ones; skip's argument # are skipped automatically, including inner ones; skip's argument
# counts only the non-empty/blank lines. # counts only the non-empty/blank lines.
< <
@ -1087,7 +1111,7 @@ $ ./csvtest.sh
>= >=
# ** 55. Some validation is done on account name assignments; trying to # ** 56. Some validation is done on account name assignments; trying to
# also set an amount there (with 2+ spaces) will be rejected. (#1978) # also set an amount there (with 2+ spaces) will be rejected. (#1978)
< <
2022-01-01,1 2022-01-01,1
@ -1098,7 +1122,7 @@ $ ./csvtest.sh
>2 /unexpected space/ >2 /unexpected space/
>=1 >=1
# ** 56. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241) # ** 57. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241)
< <
2020-01-01, 1 2020-01-01, 1
RULES RULES
@ -1116,7 +1140,7 @@ $ ./csvtest.sh tag:ttag2
>= >=
# ** 57. and also posting tags, and (primary, yearful) posting dates (#2114, #2241) # ** 59. and also posting tags, and (primary, yearful) posting dates (#2114, #2241)
$ ./csvtest.sh tag:date $ ./csvtest.sh tag:date
2020-01-01 ; ttag:tval 2020-01-01 ; ttag:tval
; ttag2: ; ttag2:
@ -1125,7 +1149,7 @@ $ ./csvtest.sh tag:date
>= >=
# ** 58. handle newlines in an assignment value, without breaking interpolations (#2134) # ** 59. handle newlines in an assignment value, without breaking interpolations (#2134)
< <
2023-01-01,1 2023-01-01,1
RULES RULES
@ -1139,7 +1163,7 @@ $ ./csvtest.sh
>= >=
# ** 59. specify ssv prefix and no extension # ** 60. specify ssv prefix and no extension
< <
12/11/2019;Foo;123;10.23 12/11/2019;Foo;123;10.23
RULES RULES
@ -1152,7 +1176,7 @@ $ ./ssvtest.sh
>= >=
# ** 60. tabular rules with comments # ** 61. tabular rules with comments
< <
10/2009/09,Flubber Co,50 10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,150 10/2009/09,Blubber Co,150