diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs index 245e56e49..9dcaa3c96 100644 --- a/hledger-lib/Hledger/Read/RulesReader.hs +++ b/hledger-lib/Hledger/Read/RulesReader.hs @@ -635,6 +635,9 @@ conditionaltablep = do -- A single matcher, on one line. +-- This tries to parse first as a field matcher, then if that fails, as a whole-record matcher; +-- the goal was to not break legacy whole-record patterns that happened to look a bit like a field matcher +-- (eg, beginning with %, possibly preceded by & or !), or at least not to raise an error. matcherp' :: CsvRulesParser () -> CsvRulesParser Matcher matcherp' end = try (fieldmatcherp end) <|> recordmatcherp end @@ -686,6 +689,7 @@ csvfieldreferencep = do lift $ dbgparse 8 "trying csvfieldreferencep" char '%' T.cons '%' . textQuoteIfNeeded <$> fieldnamep + -- XXX this parses any generic field name, which may not actually be a valid CSV field name [#2289] -- A single regular expression regexp :: CsvRulesParser () -> CsvRulesParser Regexp @@ -774,8 +778,8 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c -- A matcher's target can be a specific CSV field, or the "whole record". -- -- In the former case, note that the field reference must be either numeric or - -- a csv field name declared by a `fields` rule; anything else will raise an error - -- (to avoid confusion when a hledger field name doesn't work, see #2289). + -- a csv field name declared by a `fields` rule; anything else will emit a warning to stderr + -- (to reduce confusion when a hledger field name doesn't work; not an error, to avoid breaking legacy rules; see #2289). -- -- In the latter case, the matched value will be a synthetic CSV record. -- Note this will not necessarily be the same as the original CSV record: @@ -784,14 +788,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c -- (This means that a field containing a comma will now look like two fields.) -- matcherMatches :: Matcher -> Bool - matcherMatches m = - case m of - RecordMatcher prefix pat -> maybeNegate prefix $ match pat val - where val = T.intercalate "," record - FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat val - where val = replaceCsvFieldReference rules record csvfieldref + matcherMatches = \case + RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record + FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat $ + fromMaybe (warn "'if %CSVFIELD' should use a name declared with 'fields', or a number" "") $ + replaceCsvFieldReference rules record csvfieldref where - match pat val = regexMatchText (dbg7 "regex" pat) (dbg7 "value" val) + match p v = regexMatchText (dbg7 "regex" p) (dbg7 "value" v) -- | Group matchers into associative pairs based on prefix, e.g.: -- A @@ -817,7 +820,7 @@ renderTemplate rules record t = (many ( literaltextp <|> (matchrefp <&> replaceRegexGroupReference rules record) - <|> (fieldrefp <&> replaceCsvFieldReference rules record) + <|> (fieldrefp <&> replaceCsvFieldReference rules record <&> fromMaybe "") ) ) t @@ -850,20 +853,18 @@ regexMatchValue rules record sgroup = let in atMay matchgroups group getMatchGroups :: CsvRules -> CsvRecord -> Matcher -> [Text] -getMatchGroups _ record (RecordMatcher _ regex) = let - txt = T.intercalate "," record -- see caveats of wholecsvline, in `isBlockActive` - in regexMatchTextGroups regex txt -getMatchGroups rules record (FieldMatcher _ fieldref regex) = let - txt = replaceCsvFieldReference rules record fieldref - in regexMatchTextGroups regex txt +getMatchGroups _ record (RecordMatcher _ regex) = + regexMatchTextGroups regex $ T.intercalate "," record -- see caveats in matcherMatches +getMatchGroups rules record (FieldMatcher _ fieldref regex) = + regexMatchTextGroups regex $ fromMaybe "" $ replaceCsvFieldReference rules record fieldref -- | Replace something that looks like a reference to a csv field ("%date" or "%1) -- with that field's value. If it doesn't look like a field reference, or if we --- can't find such a field, replace it with the empty string. -replaceCsvFieldReference :: CsvRules -> CsvRecord -> CsvFieldReference -> Text +-- can't find a csv field with that name, return nothing. +replaceCsvFieldReference :: CsvRules -> CsvRecord -> CsvFieldReference -> Maybe Text replaceCsvFieldReference rules record s = case T.uncons s of - Just ('%', fieldname) -> fromMaybe "" $ csvFieldValue rules record fieldname - _ -> s + Just ('%', fieldname) -> csvFieldValue rules record fieldname + _ -> Nothing -- | Get the (whitespace-stripped) value of a CSV field, identified by its name or -- column number, ("date" or "1"), from the given CSV record, if such a field exists. diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index f1a1de0f5..64af02625 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -3655,44 +3655,44 @@ if ,,,, ## Matchers -There are two kinds: +There are two kinds of matcher: -1. A record matcher is a word or single-line text fragment or regular expression (`REGEX`), - which hledger will try to match case-insensitively anywhere within the CSV record.\ - Eg: `whole foods` +1. A whole record matcher is simplest: it is just a word, single-line text fragment, or other regular expression, + which hledger will try to match case-insensitively anywhere within the CSV record. Eg: `whole foods`. -2. A field matcher is preceded with a percent sign and [CSV field name](#field-names) (`%CSVFIELD REGEX`). - hledger will try to match these just within the named CSV field.\ - Eg: `%date 2023` +2. A field matcher has a percent-prefixed CSV field number or name before the pattern. + Eg: `%3 whole foods` or `%description whole foods`. + hledger will try to match the pattern just within the named CSV field. -The regular expression is (as usual in hledger) a POSIX extended regular expression, -that also supports GNU word boundaries (`\b`, `\B`, `\<`, `\>`), -and nothing else. -If you have trouble, see "Regular expressions" in the hledger manual (). +When using these, there's two things to be aware of: -### What matchers match +1. Whole record matchers see a synthetic reconstruction or record, not the original data; + values will be comma-separated, and quotes enclosing values and whitespace outside those quotes will be removed.\ + Eg when reading an SSV record like: `2023-01-01 ; "Acme, Inc. " ; 1,000`\ + the whole record matcher sees instead: `2023-01-01,Acme, Inc. ,1,000` -With record matchers, it's important to know that the record matched is not the original CSV record, but a modified one: -separators will be converted to commas, and enclosing double quotes (but not enclosing whitespace) are removed. -So for example, when reading an SSV file, if the original record was: -```ssv -2023-01-01; "Acme, Inc."; 1,000 -``` -the regex would see, and try to match, this modified record text: -``` -2023-01-01,Acme, Inc., 1,000 -``` +2. In field matchers you must use either a CSV field number, + or a [CSV field name](#field-names) which has been set by a [`fields` list](#fields-list). + Anything else will print a warning, to avoid [confusion](https://github.com/simonmichael/hledger/issues/2289); + if you see it, you should adjust your matchers. This might become an error in future. -### Combining matchers +You can also prefix a matcher with `!` (and optional space) to negate it. +Eg `! whole foods`, `! %3 whole foods`, `!%description whole foods` will match if "whole foods" is not present. +*Added in 1.32.* -When an if block has multiple matchers, they are combined as follows: +The pattern is, as usual in hledger, a POSIX extended regular expression +that also supports GNU word boundaries (`\b`, `\B`, `\<`, `\>`) and nothing else. +If you have trouble with it, see "Regular expressions" in the hledger manual (). -- By default they are OR'd (any of them can match) -- When a matcher is preceded by ampersand (`&`, at the start of the line) it will be AND'ed with the previous matcher (all in the AND'ed group must match) -- *Added in 1.32* When a matcher is preceded by an exclamation mark (`!`), it is negated (it must not match). +### Multiple matchers -Note [currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398) there is a limitation: -you can't use both `&` and `!` on the same line (you can't AND a negated matcher). +When an if block has multiple matchers, each on its own line, + +- By default they are OR'd (any of them can match). +- Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match). + +You can't use both `&` and `!` on the same line (you can't AND a negated matcher), +[currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398). ### Match groups