imp:csv:if: warn on invalid csv field names; improve doc [#2289]
This commit is contained in:
parent
05ae105b42
commit
3d55f260b3
@ -635,6 +635,9 @@ conditionaltablep = do
|
||||
|
||||
|
||||
-- A single matcher, on one line.
|
||||
-- This tries to parse first as a field matcher, then if that fails, as a whole-record matcher;
|
||||
-- the goal was to not break legacy whole-record patterns that happened to look a bit like a field matcher
|
||||
-- (eg, beginning with %, possibly preceded by & or !), or at least not to raise an error.
|
||||
matcherp' :: CsvRulesParser () -> CsvRulesParser Matcher
|
||||
matcherp' end = try (fieldmatcherp end) <|> recordmatcherp end
|
||||
|
||||
@ -686,6 +689,7 @@ csvfieldreferencep = do
|
||||
lift $ dbgparse 8 "trying csvfieldreferencep"
|
||||
char '%'
|
||||
T.cons '%' . textQuoteIfNeeded <$> fieldnamep
|
||||
-- XXX this parses any generic field name, which may not actually be a valid CSV field name [#2289]
|
||||
|
||||
-- A single regular expression
|
||||
regexp :: CsvRulesParser () -> CsvRulesParser Regexp
|
||||
@ -774,8 +778,8 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
|
||||
-- A matcher's target can be a specific CSV field, or the "whole record".
|
||||
--
|
||||
-- In the former case, note that the field reference must be either numeric or
|
||||
-- a csv field name declared by a `fields` rule; anything else will raise an error
|
||||
-- (to avoid confusion when a hledger field name doesn't work, see #2289).
|
||||
-- a csv field name declared by a `fields` rule; anything else will emit a warning to stderr
|
||||
-- (to reduce confusion when a hledger field name doesn't work; not an error, to avoid breaking legacy rules; see #2289).
|
||||
--
|
||||
-- In the latter case, the matched value will be a synthetic CSV record.
|
||||
-- Note this will not necessarily be the same as the original CSV record:
|
||||
@ -784,14 +788,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
|
||||
-- (This means that a field containing a comma will now look like two fields.)
|
||||
--
|
||||
matcherMatches :: Matcher -> Bool
|
||||
matcherMatches m =
|
||||
case m of
|
||||
RecordMatcher prefix pat -> maybeNegate prefix $ match pat val
|
||||
where val = T.intercalate "," record
|
||||
FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat val
|
||||
where val = replaceCsvFieldReference rules record csvfieldref
|
||||
matcherMatches = \case
|
||||
RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
|
||||
FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat $
|
||||
fromMaybe (warn "'if %CSVFIELD' should use a name declared with 'fields', or a number" "") $
|
||||
replaceCsvFieldReference rules record csvfieldref
|
||||
where
|
||||
match pat val = regexMatchText (dbg7 "regex" pat) (dbg7 "value" val)
|
||||
match p v = regexMatchText (dbg7 "regex" p) (dbg7 "value" v)
|
||||
|
||||
-- | Group matchers into associative pairs based on prefix, e.g.:
|
||||
-- A
|
||||
@ -817,7 +820,7 @@ renderTemplate rules record t =
|
||||
(many
|
||||
( literaltextp
|
||||
<|> (matchrefp <&> replaceRegexGroupReference rules record)
|
||||
<|> (fieldrefp <&> replaceCsvFieldReference rules record)
|
||||
<|> (fieldrefp <&> replaceCsvFieldReference rules record <&> fromMaybe "")
|
||||
)
|
||||
)
|
||||
t
|
||||
@ -850,20 +853,18 @@ regexMatchValue rules record sgroup = let
|
||||
in atMay matchgroups group
|
||||
|
||||
getMatchGroups :: CsvRules -> CsvRecord -> Matcher -> [Text]
|
||||
getMatchGroups _ record (RecordMatcher _ regex) = let
|
||||
txt = T.intercalate "," record -- see caveats of wholecsvline, in `isBlockActive`
|
||||
in regexMatchTextGroups regex txt
|
||||
getMatchGroups rules record (FieldMatcher _ fieldref regex) = let
|
||||
txt = replaceCsvFieldReference rules record fieldref
|
||||
in regexMatchTextGroups regex txt
|
||||
getMatchGroups _ record (RecordMatcher _ regex) =
|
||||
regexMatchTextGroups regex $ T.intercalate "," record -- see caveats in matcherMatches
|
||||
getMatchGroups rules record (FieldMatcher _ fieldref regex) =
|
||||
regexMatchTextGroups regex $ fromMaybe "" $ replaceCsvFieldReference rules record fieldref
|
||||
|
||||
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
|
||||
-- with that field's value. If it doesn't look like a field reference, or if we
|
||||
-- can't find such a field, replace it with the empty string.
|
||||
replaceCsvFieldReference :: CsvRules -> CsvRecord -> CsvFieldReference -> Text
|
||||
-- can't find a csv field with that name, return nothing.
|
||||
replaceCsvFieldReference :: CsvRules -> CsvRecord -> CsvFieldReference -> Maybe Text
|
||||
replaceCsvFieldReference rules record s = case T.uncons s of
|
||||
Just ('%', fieldname) -> fromMaybe "" $ csvFieldValue rules record fieldname
|
||||
_ -> s
|
||||
Just ('%', fieldname) -> csvFieldValue rules record fieldname
|
||||
_ -> Nothing
|
||||
|
||||
-- | Get the (whitespace-stripped) value of a CSV field, identified by its name or
|
||||
-- column number, ("date" or "1"), from the given CSV record, if such a field exists.
|
||||
|
||||
@ -3655,44 +3655,44 @@ if ,,,,
|
||||
|
||||
## Matchers
|
||||
|
||||
There are two kinds:
|
||||
There are two kinds of matcher:
|
||||
|
||||
1. A record matcher is a word or single-line text fragment or regular expression (`REGEX`),
|
||||
which hledger will try to match case-insensitively anywhere within the CSV record.\
|
||||
Eg: `whole foods`
|
||||
1. A whole record matcher is simplest: it is just a word, single-line text fragment, or other regular expression,
|
||||
which hledger will try to match case-insensitively anywhere within the CSV record. Eg: `whole foods`.
|
||||
|
||||
2. A field matcher is preceded with a percent sign and [CSV field name](#field-names) (`%CSVFIELD REGEX`).
|
||||
hledger will try to match these just within the named CSV field.\
|
||||
Eg: `%date 2023`
|
||||
2. A field matcher has a percent-prefixed CSV field number or name before the pattern.
|
||||
Eg: `%3 whole foods` or `%description whole foods`.
|
||||
hledger will try to match the pattern just within the named CSV field.
|
||||
|
||||
The regular expression is (as usual in hledger) a POSIX extended regular expression,
|
||||
that also supports GNU word boundaries (`\b`, `\B`, `\<`, `\>`),
|
||||
and nothing else.
|
||||
If you have trouble, see "Regular expressions" in the hledger manual (<https://hledger.org/hledger.html#regular-expressions>).
|
||||
When using these, there's two things to be aware of:
|
||||
|
||||
### What matchers match
|
||||
1. Whole record matchers see a synthetic reconstruction or record, not the original data;
|
||||
values will be comma-separated, and quotes enclosing values and whitespace outside those quotes will be removed.\
|
||||
Eg when reading an SSV record like: `2023-01-01 ; "Acme, Inc. " ; 1,000`\
|
||||
the whole record matcher sees instead: `2023-01-01,Acme, Inc. ,1,000`
|
||||
|
||||
With record matchers, it's important to know that the record matched is not the original CSV record, but a modified one:
|
||||
separators will be converted to commas, and enclosing double quotes (but not enclosing whitespace) are removed.
|
||||
So for example, when reading an SSV file, if the original record was:
|
||||
```ssv
|
||||
2023-01-01; "Acme, Inc."; 1,000
|
||||
```
|
||||
the regex would see, and try to match, this modified record text:
|
||||
```
|
||||
2023-01-01,Acme, Inc., 1,000
|
||||
```
|
||||
2. In field matchers you must use either a CSV field number,
|
||||
or a [CSV field name](#field-names) which has been set by a [`fields` list](#fields-list).
|
||||
Anything else will print a warning, to avoid [confusion](https://github.com/simonmichael/hledger/issues/2289);
|
||||
if you see it, you should adjust your matchers. This might become an error in future.
|
||||
|
||||
### Combining matchers
|
||||
You can also prefix a matcher with `!` (and optional space) to negate it.
|
||||
Eg `! whole foods`, `! %3 whole foods`, `!%description whole foods` will match if "whole foods" is not present.
|
||||
*Added in 1.32.*
|
||||
|
||||
When an if block has multiple matchers, they are combined as follows:
|
||||
The pattern is, as usual in hledger, a POSIX extended regular expression
|
||||
that also supports GNU word boundaries (`\b`, `\B`, `\<`, `\>`) and nothing else.
|
||||
If you have trouble with it, see "Regular expressions" in the hledger manual (<https://hledger.org/hledger.html#regular-expressions>).
|
||||
|
||||
- By default they are OR'd (any of them can match)
|
||||
- When a matcher is preceded by ampersand (`&`, at the start of the line) it will be AND'ed with the previous matcher (all in the AND'ed group must match)
|
||||
- *Added in 1.32* When a matcher is preceded by an exclamation mark (`!`), it is negated (it must not match).
|
||||
### Multiple matchers
|
||||
|
||||
Note [currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398) there is a limitation:
|
||||
you can't use both `&` and `!` on the same line (you can't AND a negated matcher).
|
||||
When an if block has multiple matchers, each on its own line,
|
||||
|
||||
- By default they are OR'd (any of them can match).
|
||||
- Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match).
|
||||
|
||||
You can't use both `&` and `!` on the same line (you can't AND a negated matcher),
|
||||
[currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398).
|
||||
|
||||
### Match groups
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user