feat: import: interpolate regex matches in field templates (#2009)
Replace occurrences of '\N' (where N is a positive number) in field
templates with the corresponding regular expression match group, if it
exists.
E.g. Warp the date to the first of the month for the second posting
if %date (....-..)-..
comment2 date:\1-01
E.g. Strip a prefix from an imported account name
if %account1 liabilities:jon:(.*)
account1 \1
Fixes #2009.
Signed-off-by: Jonathan Dowland <jon@dow.land>
This commit is contained in:
parent
549c47bca8
commit
8bfa382c68
@ -372,10 +372,12 @@ JOURNAL-PSEUDO-FIELD: amount-in | amount-out | currency
|
|||||||
|
|
||||||
ASSIGNMENT-SEPARATOR: SPACE | ( : SPACE? )
|
ASSIGNMENT-SEPARATOR: SPACE | ( : SPACE? )
|
||||||
|
|
||||||
FIELD-VALUE: VALUE (possibly containing CSV-FIELD-REFERENCEs)
|
FIELD-VALUE: VALUE (possibly containing CSV-FIELD-REFERENCEs and REGEX-MATCHGROUP-REFERENCEs)
|
||||||
|
|
||||||
CSV-FIELD-REFERENCE: % CSV-FIELD
|
CSV-FIELD-REFERENCE: % CSV-FIELD
|
||||||
|
|
||||||
|
REGEX-MATCHGROUP-REFERENCE: \ DIGIT+
|
||||||
|
|
||||||
CSV-FIELD: ( FIELD-NAME | FIELD-NUMBER ) (corresponding to a CSV field)
|
CSV-FIELD: ( FIELD-NAME | FIELD-NUMBER ) (corresponding to a CSV field)
|
||||||
|
|
||||||
FIELD-NUMBER: DIGIT+
|
FIELD-NUMBER: DIGIT+
|
||||||
@ -707,7 +709,7 @@ hledgerField = getEffectiveAssignment
|
|||||||
-- | Look up the final value assigned to a hledger field, with csv field
|
-- | Look up the final value assigned to a hledger field, with csv field
|
||||||
-- references interpolated.
|
-- references interpolated.
|
||||||
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe Text
|
hledgerFieldValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Maybe Text
|
||||||
hledgerFieldValue rules record = fmap (renderTemplate rules record) . hledgerField rules record
|
hledgerFieldValue rules record f = (fmap (renderTemplate rules record f) . hledgerField rules record) f
|
||||||
|
|
||||||
maybeNegate :: MatcherPrefix -> Bool -> Bool
|
maybeNegate :: MatcherPrefix -> Bool -> Bool
|
||||||
maybeNegate Not origbool = not origbool
|
maybeNegate Not origbool = not origbool
|
||||||
@ -725,61 +727,88 @@ getEffectiveAssignment rules record f = lastMay $ map snd $ assignments
|
|||||||
where
|
where
|
||||||
-- all active assignments to field f, in order
|
-- all active assignments to field f, in order
|
||||||
assignments = dbg9 "csv assignments" $ filter ((==f).fst) $ toplevelassignments ++ conditionalassignments
|
assignments = dbg9 "csv assignments" $ filter ((==f).fst) $ toplevelassignments ++ conditionalassignments
|
||||||
where
|
-- all top level field assignments
|
||||||
-- all top level field assignments
|
toplevelassignments = rassignments rules
|
||||||
toplevelassignments = rassignments rules
|
-- all field assignments in conditional blocks assigning to field f and active for the current csv record
|
||||||
-- all field assignments in conditional blocks assigning to field f and active for the current csv record
|
conditionalassignments = concatMap cbAssignments $ filter (isBlockActive rules record) $ (rblocksassigning rules) f
|
||||||
conditionalassignments = concatMap cbAssignments $ filter isBlockActive $ (rblocksassigning rules) f
|
|
||||||
where
|
|
||||||
-- does this conditional block match the current csv record ?
|
|
||||||
isBlockActive :: ConditionalBlock -> Bool
|
|
||||||
isBlockActive CB{..} = any (all matcherMatches) $ groupedMatchers cbMatchers
|
|
||||||
where
|
|
||||||
-- does this individual matcher match the current csv record ?
|
|
||||||
matcherMatches :: Matcher -> Bool
|
|
||||||
matcherMatches (RecordMatcher prefix pat) = maybeNegate prefix origbool
|
|
||||||
where
|
|
||||||
pat' = dbg7 "regex" pat
|
|
||||||
-- A synthetic whole CSV record to match against. Note, this can be
|
|
||||||
-- different from the original CSV data:
|
|
||||||
-- - any whitespace surrounding field values is preserved
|
|
||||||
-- - any quotes enclosing field values are removed
|
|
||||||
-- - and the field separator is always comma
|
|
||||||
-- which means that a field containing a comma will look like two fields.
|
|
||||||
wholecsvline = dbg7 "wholecsvline" $ T.intercalate "," record
|
|
||||||
origbool = regexMatchText pat' wholecsvline
|
|
||||||
matcherMatches (FieldMatcher prefix csvfieldref pat) = maybeNegate prefix origbool
|
|
||||||
where
|
|
||||||
-- the value of the referenced CSV field to match against.
|
|
||||||
csvfieldvalue = dbg7 "csvfieldvalue" $ replaceCsvFieldReference rules record csvfieldref
|
|
||||||
origbool = regexMatchText pat csvfieldvalue
|
|
||||||
|
|
||||||
-- | Group matchers into associative pairs based on prefix, e.g.:
|
-- does this conditional block match the current csv record ?
|
||||||
-- A
|
isBlockActive :: CsvRules -> CsvRecord -> ConditionalBlock -> Bool
|
||||||
-- & B
|
isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers cbMatchers
|
||||||
-- C
|
where
|
||||||
-- D
|
-- does this individual matcher match the current csv record ?
|
||||||
-- & E
|
matcherMatches :: Matcher -> Bool
|
||||||
-- => [[A, B], [C], [D, E]]
|
matcherMatches (RecordMatcher prefix pat) = maybeNegate prefix origbool
|
||||||
groupedMatchers :: [Matcher] -> [[Matcher]]
|
where
|
||||||
groupedMatchers [] = []
|
pat' = dbg7 "regex" pat
|
||||||
groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs
|
-- A synthetic whole CSV record to match against. Note, this can be
|
||||||
where
|
-- different from the original CSV data:
|
||||||
(ys, zs) = span (\y -> matcherPrefix y == And) xs
|
-- - any whitespace surrounding field values is preserved
|
||||||
matcherPrefix :: Matcher -> MatcherPrefix
|
-- - any quotes enclosing field values are removed
|
||||||
matcherPrefix (RecordMatcher prefix _) = prefix
|
-- - and the field separator is always comma
|
||||||
matcherPrefix (FieldMatcher prefix _ _) = prefix
|
-- which means that a field containing a comma will look like two fields.
|
||||||
|
wholecsvline = dbg7 "wholecsvline" $ T.intercalate "," record
|
||||||
|
origbool = regexMatchText pat' wholecsvline
|
||||||
|
matcherMatches (FieldMatcher prefix csvfieldref pat) = maybeNegate prefix origbool
|
||||||
|
where
|
||||||
|
-- the value of the referenced CSV field to match against.
|
||||||
|
csvfieldvalue = dbg7 "csvfieldvalue" $ replaceCsvFieldReference rules record csvfieldref
|
||||||
|
origbool = regexMatchText pat csvfieldvalue
|
||||||
|
|
||||||
|
-- | Group matchers into associative pairs based on prefix, e.g.:
|
||||||
|
-- A
|
||||||
|
-- & B
|
||||||
|
-- C
|
||||||
|
-- D
|
||||||
|
-- & E
|
||||||
|
-- => [[A, B], [C], [D, E]]
|
||||||
|
groupedMatchers :: [Matcher] -> [[Matcher]]
|
||||||
|
groupedMatchers [] = []
|
||||||
|
groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs
|
||||||
|
where
|
||||||
|
(ys, zs) = span (\y -> matcherPrefix y == And) xs
|
||||||
|
matcherPrefix :: Matcher -> MatcherPrefix
|
||||||
|
matcherPrefix (RecordMatcher prefix _) = prefix
|
||||||
|
matcherPrefix (FieldMatcher prefix _ _) = prefix
|
||||||
|
|
||||||
-- | Render a field assignment's template, possibly interpolating referenced
|
-- | Render a field assignment's template, possibly interpolating referenced
|
||||||
-- CSV field values. Outer whitespace is removed from interpolated values.
|
-- CSV field values. Outer whitespace is removed from interpolated values.
|
||||||
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> Text
|
renderTemplate :: CsvRules -> CsvRecord -> HledgerFieldName -> FieldTemplate -> Text
|
||||||
renderTemplate rules record t = maybe t mconcat $ parseMaybe
|
renderTemplate rules record f t = maybe t mconcat $ parseMaybe
|
||||||
(many $ takeWhile1P Nothing (/='%')
|
(many $ takeWhile1P Nothing isNotEscapeChar
|
||||||
|
<|> replaceRegexGroupReference rules record f <$> matchp
|
||||||
<|> replaceCsvFieldReference rules record <$> referencep)
|
<|> replaceCsvFieldReference rules record <$> referencep)
|
||||||
t
|
t
|
||||||
where
|
where
|
||||||
|
-- XXX: can we return a parsed Int here?
|
||||||
|
matchp = liftA2 T.cons (char '\\') (takeWhile1P (Just "matchref") isDigit) :: Parsec HledgerParseErrorData Text Text
|
||||||
referencep = liftA2 T.cons (char '%') (takeWhile1P (Just "reference") isFieldNameChar) :: Parsec HledgerParseErrorData Text Text
|
referencep = liftA2 T.cons (char '%') (takeWhile1P (Just "reference") isFieldNameChar) :: Parsec HledgerParseErrorData Text Text
|
||||||
isFieldNameChar c = isAlphaNum c || c == '_' || c == '-'
|
isFieldNameChar c = isAlphaNum c || c == '_' || c == '-'
|
||||||
|
isNotEscapeChar c = c /='%' && c /= '\\'
|
||||||
|
|
||||||
|
-- | Replace something that looks like a Regex match group reference with the
|
||||||
|
-- resulting match group value after applying the Regex.
|
||||||
|
replaceRegexGroupReference :: CsvRules -> CsvRecord -> HledgerFieldName -> FieldTemplate -> Text
|
||||||
|
replaceRegexGroupReference rules record f s = case T.uncons s of
|
||||||
|
Just ('\\', group) -> fromMaybe "" $ regexMatchValue rules record f group
|
||||||
|
_ -> s
|
||||||
|
|
||||||
|
regexMatchValue :: CsvRules -> CsvRecord -> HledgerFieldName -> Text -> Maybe Text
|
||||||
|
regexMatchValue rules record f sgroup = let
|
||||||
|
matchgroups = concatMap (getMatchGroups rules record)
|
||||||
|
$ concatMap cbMatchers
|
||||||
|
$ filter (isBlockActive rules record)
|
||||||
|
$ rblocksassigning rules f
|
||||||
|
group = (read (T.unpack sgroup) :: Int) - 1 -- adjust to 0-indexing
|
||||||
|
in atMay matchgroups group
|
||||||
|
|
||||||
|
getMatchGroups :: CsvRules -> CsvRecord -> Matcher -> [Text]
|
||||||
|
getMatchGroups _ record (RecordMatcher _ regex) = let
|
||||||
|
txt = T.intercalate "," record -- see caveats of wholecsvline, in `isBlockActive`
|
||||||
|
in regexMatchTextGroups regex txt
|
||||||
|
getMatchGroups rules record (FieldMatcher _ fieldref regex) = let
|
||||||
|
txt = replaceCsvFieldReference rules record fieldref
|
||||||
|
in regexMatchTextGroups regex txt
|
||||||
|
|
||||||
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
|
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
|
||||||
-- with that field's value. If it doesn't look like a field reference, or if we
|
-- with that field's value. If it doesn't look like a field reference, or if we
|
||||||
@ -1159,7 +1188,7 @@ getAmount rules record currency p1IsVirtual n =
|
|||||||
|
|
||||||
-- assignments to any of these field names with non-empty values
|
-- assignments to any of these field names with non-empty values
|
||||||
assignments = [(f,a') | f <- fieldnames
|
assignments = [(f,a') | f <- fieldnames
|
||||||
, Just v <- [T.strip . renderTemplate rules record <$> hledgerField rules record f]
|
, Just v <- [T.strip . renderTemplate rules record f <$> hledgerField rules record f]
|
||||||
, not $ T.null v
|
, not $ T.null v
|
||||||
-- XXX maybe ignore rule-generated values like "", "-", "$", "-$", "$-" ? cf CSV FORMAT -> "amount", "Setting amounts",
|
-- XXX maybe ignore rule-generated values like "", "-", "$", "-$", "$-" ? cf CSV FORMAT -> "amount", "Setting amounts",
|
||||||
, let a = parseAmount rules record currency v
|
, let a = parseAmount rules record currency v
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user