feat: csv: allow multiple matchers on the same line

`If blocks` and `If tables` now allow multiple matchers on the same line
separated by `&&` (AND) or `&& !` (AND NOT).

Example `if block` with two matchers on the same line:

	if %description amazon && %date 2025-02-22
	    account2 expenses:books

Example `if table` with two matchers on the same line:

	if,account2
	%description amazon && %date 2025-02-22, expenses:books
This commit is contained in:
Thomas Miedema 2025-02-24 11:29:54 +01:00 committed by Simon Michael
parent 0d98dd1351
commit 2faceb8e1b
2 changed files with 57 additions and 23 deletions

View File

@ -637,19 +637,19 @@ conditionaltablep = do
])
when (null body) $
customFailure $ parseErrorAt start $ "start of conditional table found, but no assignment rules afterward"
return $ flip map body $ \(m,vs) ->
CB{cbMatchers=[m], cbAssignments=zip fields vs}
return $ flip map body $ \(ms,vs) ->
CB{cbMatchers=ms, cbAssignments=zip fields vs}
<?> "conditional table"
where
bodylinep :: Char -> [Text] -> CsvRulesParser (Matcher,[FieldTemplate])
bodylinep :: Char -> [Text] -> CsvRulesParser ([Matcher],[FieldTemplate])
bodylinep sep fields = do
off <- getOffset
m <- matcherp' $ void $ char sep
ms <- matcherp' (lookAhead . void . char $ sep) `manyTill` char sep
vs <- T.split (==sep) . T.pack <$> lift restofline
if (length vs /= length fields)
then customFailure $ parseErrorAt off $ ((printf "line of conditional table should have %d values, but this one has only %d" (length fields) (length vs)) :: String)
else return (m,vs)
else return (ms,vs)
-- A single matcher, on one line.
-- This tries to parse first as a field matcher, then if that fails, as a whole-record matcher;
@ -700,7 +700,7 @@ matcherprefixp :: CsvRulesParser MatcherPrefix
matcherprefixp = do
lift $ dbgparse 8 "trying matcherprefixp"
(do
char '&' >> lift skipNonNewlineSpaces
char '&' >> optional (char '&') >> lift skipNonNewlineSpaces
fromMaybe And <$> optional (char '!' >> lift skipNonNewlineSpaces >> return AndNot))
<|> (char '!' >> lift skipNonNewlineSpaces >> return Not)
<|> return Or
@ -718,10 +718,12 @@ regexp end = do
lift $ dbgparse 8 "trying regexp"
-- notFollowedBy matchoperatorp
c <- lift nonspace
cs <- anySingle `manyTill` end
cs <- anySingle `manyTill` (double_ampersand <|> end)
case toRegexCI . T.strip . T.pack $ c:cs of
Left x -> Fail.fail $ "CSV parser: " ++ x
Right x -> return x
where
double_ampersand = lookAhead . void $ char '&' >> char '&'
-- -- A match operator, indicating the type of match to perform.
-- -- Currently just ~ meaning case insensitive infix regex match.
@ -1559,7 +1561,7 @@ tests_RulesReader = testGroup "RulesReader" [
,testCase "quoted name" $ parseWithState' defrules csvfieldreferencep "%\"csv date\"" @?= (Right "%\"csv date\"")
]
,testGroup "matcherp" [
,testGroup "recordmatcherp" [
testCase "recordmatcherp" $
parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "A A")
@ -1567,18 +1569,47 @@ tests_RulesReader = testGroup "RulesReader" [
,testCase "recordmatcherp.starts-with-&" $
parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A")
,testCase "fieldmatcherp.starts-with-%" $
parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A")
,testCase "recordmatcherp.starts-with-&&" $
parseWithState' defrules matcherp "&& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A")
,testCase "fieldmatcherp" $
,testCase "recordmatcherp.starts-with-&&-!" $
parseWithState' defrules matcherp "&& ! A A\n" @?= (Right $ RecordMatcher AndNot $ toRegexCI' "A A")
,testCase "recordmatcherp.does-not-start-with-%" $
parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A")
]
,testGroup "fieldmatcherp" [
testCase "fieldmatcherp" $
parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher Or "%description" $ toRegexCI' "A A")
,testCase "fieldmatcherp.starts-with-&" $
parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A")
,testCase "fieldmatcherp.starts-with-&&" $
parseWithState' defrules matcherp "&& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A")
,testCase "fieldmatcherp.starts-with-&&-!" $
parseWithState' defrules matcherp "&& ! %description A A\n" @?= (Right $ FieldMatcher AndNot "%description" $ toRegexCI' "A A")
-- ,testCase "fieldmatcherp with operator" $
-- parseWithState' defrules matcherp "%description ~ A A\n" @?= (Right $ FieldMatcher "%description" "A A")
]
,testGroup "regexp" [
testCase "regexp.ends-before-&&" $
parseWithState' defrules (regexp empty) "A A && xxx" @?= (Right $ toRegexCI' "A A")
]
, let matchers = [RecordMatcher Or (toRegexCI' "A"), RecordMatcher And (toRegexCI' "B")]
assignments = [("account2", "foo"), ("comment2", "bar")]
block = CB matchers assignments
in
testGroup "Combine multiple matchers on the same line" [
testCase "conditionalblockp" $
parseWithState' defrules conditionalblockp "if A && B\n account2 foo\n comment2 bar" @?= (Right block)
,testCase "conditionaltablep" $
parseWithState' defrules conditionaltablep "if,account2,comment2\nA && B,foo,bar" @?= (Right [block])
]
,testGroup "hledgerField" [

View File

@ -3741,10 +3741,13 @@ If you have trouble with it, see "Regular expressions" in the hledger manual (<h
When an if block has multiple matchers, each on its own line,
- By default they are OR'd (any of them can match).
- Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match).
- Matcher lines beginning with `&` (or `&&`, *since 1.42*) are AND'ed with the matcher above (all in the AND'ed group must match).
- Matcher lines beginning with `& !` (*since 1.41*, or `&& !`, *since 1.42*) are first negated and then AND'ed with the matcher above.
*(Since 1.41:)*
You can use a negated `!` matcher on a `&` line, meaning AND NOT.
You can also combine multiple matchers one the same line separated by `&&` (AND) or `&& !` (AND NOT).
Eg `%description amazon && %date 2025-01-01` will match only when the
description field contains "amazon" and the date field equals 2025-01-01.
*Added in 1.42.*
### Match groups
@ -3779,9 +3782,9 @@ they can express many matchers and field assignments in a more compact tabular f
```rules
if,HLEDGERFIELD1,HLEDGERFIELD2,...
MATCHERA,VALUE1,VALUE2,...
MATCHERB,VALUE1,VALUE2,...
; Comment line that explains MATCHERC
MATCHERC,VALUE1,VALUE2,...
MATCHERB && MATCHERC,VALUE1,VALUE2,... (*since 1.42*)
; Comment line that explains MATCHERD
MATCHERD,VALUE1,VALUE2,...
<empty line>
```
@ -3796,8 +3799,8 @@ You can use the comment lines in the table body.
The table must be terminated by an empty line (or end of file).
An if table like the above is interpreted as follows:
try all of the matchers;
whenever a matcher succeeds, assign all of the values on that line to the corresponding hledger fields;
try all of the lines with matchers;
whenever a line with matchers succeeds, assign all of the values on that line to the corresponding hledger fields;
If multiple lines match, later lines will override fields assigned by the earlier ones - just like the sequence of `if` blocks would behave.
If table presented above is equivalent to this sequence of if blocks:
@ -3808,13 +3811,13 @@ if MATCHERA
HLEDGERFIELD2 VALUE2
...
if MATCHERB
if MATCHERB && MATCHERC
HLEDGERFIELD1 VALUE1
HLEDGERFIELD2 VALUE2
...
; Comment line which explains MATCHERC
if MATCHERC
; Comment line which explains MATCHERD
if MATCHERD
HLEDGERFIELD1 VALUE1
HLEDGERFIELD2 VALUE2
...