csv: allow single field matching; more docs and tests
This commit is contained in:
parent
becd891dd1
commit
d537f1fe07
@ -3,6 +3,10 @@
|
|||||||
A reader for CSV data, using an extra rules file to help interpret the data.
|
A reader for CSV data, using an extra rules file to help interpret the data.
|
||||||
|
|
||||||
-}
|
-}
|
||||||
|
-- Lots of haddocks in this file are for non-exported types.
|
||||||
|
-- Here's a command that will render them:
|
||||||
|
-- stack haddock hledger-lib --fast --no-haddock-deps --haddock-arguments='--ignore-all-exports' --open
|
||||||
|
|
||||||
{-# LANGUAGE CPP #-}
|
{-# LANGUAGE CPP #-}
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
@ -12,6 +16,8 @@ A reader for CSV data, using an extra rules file to help interpret the data.
|
|||||||
{-# LANGUAGE TypeSynonymInstances #-}
|
{-# LANGUAGE TypeSynonymInstances #-}
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
{-# LANGUAGE PackageImports #-}
|
{-# LANGUAGE PackageImports #-}
|
||||||
|
{-# LANGUAGE MultiWayIf #-}
|
||||||
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
{-# LANGUAGE RecordWildCards #-}
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
|
|
||||||
module Hledger.Read.CsvReader (
|
module Hledger.Read.CsvReader (
|
||||||
@ -115,15 +121,19 @@ parseSeparator = specials . map toLower
|
|||||||
|
|
||||||
-- | Read a Journal from the given CSV data (and filename, used for error
|
-- | Read a Journal from the given CSV data (and filename, used for error
|
||||||
-- messages), or return an error. Proceed as follows:
|
-- messages), or return an error. Proceed as follows:
|
||||||
-- @
|
--
|
||||||
-- 1. parse CSV conversion rules from the specified rules file, or from
|
-- 1. parse CSV conversion rules from the specified rules file, or from
|
||||||
-- the default rules file for the specified CSV file, if it exists,
|
-- the default rules file for the specified CSV file, if it exists,
|
||||||
-- or throw a parse error; if it doesn't exist, use built-in default rules
|
-- or throw a parse error; if it doesn't exist, use built-in default rules
|
||||||
|
--
|
||||||
-- 2. parse the CSV data, or throw a parse error
|
-- 2. parse the CSV data, or throw a parse error
|
||||||
|
--
|
||||||
-- 3. convert the CSV records to transactions using the rules
|
-- 3. convert the CSV records to transactions using the rules
|
||||||
|
--
|
||||||
-- 4. if the rules file didn't exist, create it with the default rules and filename
|
-- 4. if the rules file didn't exist, create it with the default rules and filename
|
||||||
|
--
|
||||||
-- 5. return the transactions as a Journal
|
-- 5. return the transactions as a Journal
|
||||||
-- @
|
--
|
||||||
readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
|
readJournalFromCsv :: Maybe FilePath -> FilePath -> Text -> IO (Either String Journal)
|
||||||
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
|
readJournalFromCsv Nothing "-" _ = return $ Left "please use --rules-file when reading CSV from stdin"
|
||||||
readJournalFromCsv mrulesfile csvfile csvdata =
|
readJournalFromCsv mrulesfile csvfile csvdata =
|
||||||
@ -379,30 +389,44 @@ convert a particular CSV data file into meaningful journal transactions.
|
|||||||
-}
|
-}
|
||||||
data CsvRules = CsvRules {
|
data CsvRules = CsvRules {
|
||||||
rdirectives :: [(DirectiveName,String)],
|
rdirectives :: [(DirectiveName,String)],
|
||||||
|
-- ^ top-level rules, as (keyword, value) pairs
|
||||||
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
|
rcsvfieldindexes :: [(CsvFieldName, CsvFieldIndex)],
|
||||||
|
-- ^ csv field names and their column number, if declared by a fields list
|
||||||
rassignments :: [(JournalFieldName, FieldTemplate)],
|
rassignments :: [(JournalFieldName, FieldTemplate)],
|
||||||
|
-- ^ top-level assignments to hledger fields, as (field name, value template) pairs
|
||||||
rconditionalblocks :: [ConditionalBlock]
|
rconditionalblocks :: [ConditionalBlock]
|
||||||
|
-- ^ conditional blocks, which containing additional assignments/rules to apply to matched csv records
|
||||||
} deriving (Show, Eq)
|
} deriving (Show, Eq)
|
||||||
|
|
||||||
type CsvRulesParser a = StateT CsvRules SimpleTextParser a
|
type CsvRulesParser a = StateT CsvRules SimpleTextParser a
|
||||||
|
|
||||||
|
-- | The keyword of a CSV rule - "fields", "skip", "if", etc.
|
||||||
type DirectiveName = String
|
type DirectiveName = String
|
||||||
|
-- | CSV field name.
|
||||||
type CsvFieldName = String
|
type CsvFieldName = String
|
||||||
|
-- | 1-based CSV column number.
|
||||||
type CsvFieldIndex = Int
|
type CsvFieldIndex = Int
|
||||||
type JournalFieldName = String
|
-- | Percent symbol followed by a CSV field name or column number. Eg: %date, %1.
|
||||||
|
type CsvFieldReference = String
|
||||||
|
-- | One of the standard hledger field names that can be assigned to.
|
||||||
|
type JournalFieldName = String -- XXX rename to HledgerFieldName
|
||||||
|
-- | A text value to be assigned to a hledger field, possibly
|
||||||
|
-- containing csv field references to be interpolated.
|
||||||
type FieldTemplate = String
|
type FieldTemplate = String
|
||||||
|
-- | A strptime date parsing pattern, as supported by Data.Time.Format.
|
||||||
type DateFormat = String
|
type DateFormat = String
|
||||||
|
-- | A regular expression.
|
||||||
type RegexpPattern = String
|
type RegexpPattern = String
|
||||||
|
|
||||||
-- | A single test for matching a CSV record, in one way or another.
|
-- | A single test for matching a CSV record, in one way or another.
|
||||||
data Matcher =
|
data Matcher =
|
||||||
RecordMatcher RegexpPattern -- ^ match if this regexp matches the overall CSV record
|
RecordMatcher RegexpPattern -- ^ match if this regexp matches the overall CSV record
|
||||||
-- | FieldMatcher CsvFieldName RegexpPattern -- ^ match if this regexp matches the named CSV field
|
| FieldMatcher CsvFieldReference RegexpPattern -- ^ match if this regexp matches the referenced CSV field's value
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
-- | A conditional block: a set of CSV record matchers, and a sequence
|
-- | A conditional block: a set of CSV record matchers, and a sequence
|
||||||
-- of zero or more rules which will be enabled only when one or of the
|
-- of rules which will be enabled only if one or more of the matchers
|
||||||
-- matchers succeeds.
|
-- succeeds.
|
||||||
--
|
--
|
||||||
-- Three types of rule are allowed inside conditional blocks: field
|
-- Three types of rule are allowed inside conditional blocks: field
|
||||||
-- assignments, skip, end. (A skip or end rule is stored as if it was
|
-- assignments, skip, end. (A skip or end rule is stored as if it was
|
||||||
@ -634,6 +658,7 @@ fieldvalp = do
|
|||||||
lift $ dbgparse 2 "trying fieldvalp"
|
lift $ dbgparse 2 "trying fieldvalp"
|
||||||
anySingle `manyTill` lift eolof
|
anySingle `manyTill` lift eolof
|
||||||
|
|
||||||
|
-- A conditional block: one or more matchers, one per line, followed by one or more indented rules.
|
||||||
conditionalblockp :: CsvRulesParser ConditionalBlock
|
conditionalblockp :: CsvRulesParser ConditionalBlock
|
||||||
conditionalblockp = do
|
conditionalblockp = do
|
||||||
lift $ dbgparse 3 "trying conditionalblockp"
|
lift $ dbgparse 3 "trying conditionalblockp"
|
||||||
@ -645,10 +670,14 @@ conditionalblockp = do
|
|||||||
return $ CB{cbMatchers=ms, cbAssignments=as}
|
return $ CB{cbMatchers=ms, cbAssignments=as}
|
||||||
<?> "conditional block"
|
<?> "conditional block"
|
||||||
|
|
||||||
-- A single matcher, on one line
|
-- A single matcher, on one line.
|
||||||
-- XXX Currently only parses a RecordMatcher
|
|
||||||
matcherp :: CsvRulesParser Matcher
|
matcherp :: CsvRulesParser Matcher
|
||||||
matcherp = do
|
matcherp = try fieldmatcherp <|> recordmatcherp
|
||||||
|
|
||||||
|
-- A single whole-record matcher.
|
||||||
|
-- A pattern on the whole line, not containing any of the match operators (~).
|
||||||
|
recordmatcherp :: CsvRulesParser Matcher
|
||||||
|
recordmatcherp = do
|
||||||
lift $ dbgparse 2 "trying matcherp"
|
lift $ dbgparse 2 "trying matcherp"
|
||||||
-- pos <- currentPos
|
-- pos <- currentPos
|
||||||
_ <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline)
|
_ <- optional (matchoperatorp >> lift (skipMany spacenonewline) >> optional newline)
|
||||||
@ -658,8 +687,35 @@ matcherp = do
|
|||||||
return $ RecordMatcher r
|
return $ RecordMatcher r
|
||||||
<?> "record matcher"
|
<?> "record matcher"
|
||||||
|
|
||||||
-- An operator indicating the type of match
|
-- | A single matcher for a specific field. A csv field reference (like %date or %1),
|
||||||
-- XXX currently only ~ (regex), unused
|
-- a match operator (~), and a pattern on the rest of the line, optionally
|
||||||
|
-- space-separated. Eg:
|
||||||
|
-- %description ~ chez jacques
|
||||||
|
fieldmatcherp :: CsvRulesParser Matcher
|
||||||
|
fieldmatcherp = do
|
||||||
|
lift $ dbgparse 2 "trying fieldmatcher"
|
||||||
|
-- An optional fieldname (default: "all")
|
||||||
|
-- f <- fromMaybe "all" `fmap` (optional $ do
|
||||||
|
-- f' <- fieldnamep
|
||||||
|
-- lift (skipMany spacenonewline)
|
||||||
|
-- return f')
|
||||||
|
f <- csvfieldreferencep <* lift (skipMany spacenonewline)
|
||||||
|
-- optional operator.. just ~ (case insensitive infix regex) for now
|
||||||
|
_op <- fromMaybe "~" <$> optional matchoperatorp
|
||||||
|
lift (skipMany spacenonewline)
|
||||||
|
r <- regexp
|
||||||
|
return $ FieldMatcher f r
|
||||||
|
<?> "field matcher"
|
||||||
|
|
||||||
|
csvfieldreferencep :: CsvRulesParser CsvFieldReference
|
||||||
|
csvfieldreferencep = do
|
||||||
|
lift $ dbgparse 3 "trying csvfieldreferencep"
|
||||||
|
char '%'
|
||||||
|
f <- fieldnamep
|
||||||
|
return $ '%' : quoteIfNeeded f
|
||||||
|
|
||||||
|
-- A match operator, indicating the type of match to perform.
|
||||||
|
-- Currently just ~ meaning case insensitive infix regex match.
|
||||||
matchoperatorp :: CsvRulesParser String
|
matchoperatorp :: CsvRulesParser String
|
||||||
matchoperatorp = fmap T.unpack $ choiceInState $ map string
|
matchoperatorp = fmap T.unpack $ choiceInState $ map string
|
||||||
["~"
|
["~"
|
||||||
@ -677,19 +733,6 @@ regexp = do
|
|||||||
cs <- anySingle `manyTill` lift eolof
|
cs <- anySingle `manyTill` lift eolof
|
||||||
return $ strip $ c:cs
|
return $ strip $ c:cs
|
||||||
|
|
||||||
-- fieldmatcher = do
|
|
||||||
-- dbgparse 2 "trying fieldmatcher"
|
|
||||||
-- f <- fromMaybe "all" `fmap` (optional $ do
|
|
||||||
-- f' <- fieldname
|
|
||||||
-- lift (skipMany spacenonewline)
|
|
||||||
-- return f')
|
|
||||||
-- char '~'
|
|
||||||
-- lift (skipMany spacenonewline)
|
|
||||||
-- ps <- patterns
|
|
||||||
-- let r = "(" ++ intercalate "|" ps ++ ")"
|
|
||||||
-- return (f,r)
|
|
||||||
-- <?> "field matcher"
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- Converting CSV records to journal transactions
|
-- Converting CSV records to journal transactions
|
||||||
|
|
||||||
@ -962,40 +1005,64 @@ showRecord :: CsvRecord -> String
|
|||||||
showRecord r = "the CSV record is: "++intercalate "," (map show r)
|
showRecord r = "the CSV record is: "++intercalate "," (map show r)
|
||||||
|
|
||||||
-- | Given the conversion rules, a CSV record and a journal entry field name, find
|
-- | Given the conversion rules, a CSV record and a journal entry field name, find
|
||||||
-- the template value ultimately assigned to this field, either at top
|
-- the template value ultimately assigned to this field, if any,
|
||||||
-- level or in a matching conditional block. Conditional blocks'
|
-- by a field assignment at top level or in a conditional block matching this record.
|
||||||
-- patterns are matched against an approximation of the original CSV
|
--
|
||||||
-- record: all the field values with commas intercalated.
|
-- Note conditional blocks' patterns are matched against an approximation of the
|
||||||
|
-- CSV record: all the field values, without enclosing quotes, comma-separated.
|
||||||
|
--
|
||||||
getEffectiveAssignment :: CsvRules -> CsvRecord -> JournalFieldName -> Maybe FieldTemplate
|
getEffectiveAssignment :: CsvRules -> CsvRecord -> JournalFieldName -> Maybe FieldTemplate
|
||||||
getEffectiveAssignment rules record f = lastMay $ assignmentsFor f
|
getEffectiveAssignment rules record f = lastMay $ map snd $ assignments
|
||||||
where
|
where
|
||||||
assignmentsFor f = map snd $ filter ((==f).fst) $ toplevelassignments ++ conditionalassignments
|
-- all active assignments to field f, in order
|
||||||
|
assignments = dbg2 "assignments" $ filter ((==f).fst) $ toplevelassignments ++ conditionalassignments
|
||||||
where
|
where
|
||||||
|
-- all top level field assignments
|
||||||
toplevelassignments = rassignments rules
|
toplevelassignments = rassignments rules
|
||||||
conditionalassignments = concatMap cbAssignments $ filter blockMatches $ blocksAssigning f
|
-- all field assignments in conditional blocks assigning to field f and active for the current csv record
|
||||||
|
conditionalassignments = concatMap cbAssignments $ filter isBlockActive $ blocksAssigning f
|
||||||
where
|
where
|
||||||
|
-- all conditional blocks which can potentially assign field f
|
||||||
blocksAssigning f = filter (any ((==f).fst) . cbAssignments) $ rconditionalblocks rules
|
blocksAssigning f = filter (any ((==f).fst) . cbAssignments) $ rconditionalblocks rules
|
||||||
blockMatches :: ConditionalBlock -> Bool
|
-- does this conditional block match the current csv record ?
|
||||||
blockMatches CB{..} = any matcherMatches cbMatchers
|
isBlockActive :: ConditionalBlock -> Bool
|
||||||
|
isBlockActive CB{..} = any matcherMatches cbMatchers
|
||||||
where
|
where
|
||||||
|
-- does this individual matcher match the current csv record ?
|
||||||
matcherMatches :: Matcher -> Bool
|
matcherMatches :: Matcher -> Bool
|
||||||
matcherMatches (RecordMatcher pat) = regexMatchesCI pat csvline
|
matcherMatches (RecordMatcher pat) = regexMatchesCI pat wholecsvline
|
||||||
where
|
where
|
||||||
csvline = intercalate "," record
|
-- a synthetic whole CSV record to match against; note, it has
|
||||||
-- matcherMatches (FieldMatcher field pat) = undefined
|
-- no quotes enclosing fields, and is always comma-separated,
|
||||||
|
-- so may differ from the actual record, and may not be valid CSV.
|
||||||
|
wholecsvline = dbg3 "wholecsvline" $ intercalate "," record
|
||||||
|
matcherMatches (FieldMatcher csvfieldref pat) = regexMatchesCI pat csvfieldvalue
|
||||||
|
where
|
||||||
|
-- the value of the referenced CSV field to match against.
|
||||||
|
csvfieldvalue = dbg3 "csvfieldvalue" $ replaceCsvFieldReference rules record csvfieldref
|
||||||
|
|
||||||
-- | Render a field assigment's template, possibly interpolating referenced
|
-- | Render a field assigment's template, possibly interpolating referenced
|
||||||
-- CSV field values. Outer whitespace is removed from interpolated values.
|
-- CSV field values. Outer whitespace is removed from interpolated values.
|
||||||
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
|
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
|
||||||
renderTemplate rules record t = regexReplaceBy "%[A-z0-9_-]+" replace t
|
renderTemplate rules record t = regexReplaceBy "%[A-z0-9_-]+" (replaceCsvFieldReference rules record) t
|
||||||
where
|
|
||||||
replace ('%':pat) = maybe pat (\i -> strip $ atDef "" record (i-1)) mindex
|
|
||||||
where
|
|
||||||
mindex | all isDigit pat = readMay pat
|
|
||||||
| otherwise = lookup (map toLower pat) $ rcsvfieldindexes rules
|
|
||||||
replace pat = pat
|
|
||||||
|
|
||||||
-- Parse the date string using the specified date-format, or if unspecified try these default formats:
|
-- | Replace something that looks like a reference to a csv field ("%date" or "%1)
|
||||||
|
-- with that field's value. If it doesn't look like a field reference, or if we
|
||||||
|
-- can't find such a field, leave it unchanged.
|
||||||
|
replaceCsvFieldReference :: CsvRules -> CsvRecord -> CsvFieldReference -> String
|
||||||
|
replaceCsvFieldReference rules record s@('%':fieldname) = fromMaybe s $ csvFieldValue rules record fieldname
|
||||||
|
replaceCsvFieldReference _ _ s = s
|
||||||
|
|
||||||
|
-- | Get the (whitespace-stripped) value of a CSV field, identified by its name or
|
||||||
|
-- column number, ("date" or "1"), from the given CSV record, if such a field exists.
|
||||||
|
csvFieldValue :: CsvRules -> CsvRecord -> CsvFieldName -> Maybe String
|
||||||
|
csvFieldValue rules record fieldname = do
|
||||||
|
fieldindex <- if | all isDigit fieldname -> readMay fieldname
|
||||||
|
| otherwise -> lookup (map toLower fieldname) $ rcsvfieldindexes rules
|
||||||
|
fieldvalue <- strip <$> atMay record (fieldindex-1)
|
||||||
|
return fieldvalue
|
||||||
|
|
||||||
|
-- | Parse the date string using the specified date-format, or if unspecified try these default formats:
|
||||||
-- YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, MM/DD/YYYY (month and day can be 1 or 2 digits, year must be 4).
|
-- YYYY/MM/DD, YYYY-MM-DD, YYYY.MM.DD, MM/DD/YYYY (month and day can be 1 or 2 digits, year must be 4).
|
||||||
parseDateWithFormatOrDefaultFormats :: Maybe DateFormat -> String -> Maybe Day
|
parseDateWithFormatOrDefaultFormats :: Maybe DateFormat -> String -> Maybe Day
|
||||||
parseDateWithFormatOrDefaultFormats mformat s = firstJust $ map parsewith formats
|
parseDateWithFormatOrDefaultFormats mformat s = firstJust $ map parsewith formats
|
||||||
@ -1025,26 +1092,60 @@ parseDateWithFormatOrDefaultFormats mformat s = firstJust $ map parsewith format
|
|||||||
|
|
||||||
tests_CsvReader = tests "CsvReader" [
|
tests_CsvReader = tests "CsvReader" [
|
||||||
tests "parseCsvRules" [
|
tests "parseCsvRules" [
|
||||||
test"empty file" $
|
test "empty file" $
|
||||||
parseCsvRules "unknown" "" @?= Right defrules
|
parseCsvRules "unknown" "" @?= Right defrules
|
||||||
]
|
]
|
||||||
,tests "rulesp" [
|
,tests "rulesp" [
|
||||||
test"trailing comments" $
|
test "trailing comments" $
|
||||||
parseWithState' defrules rulesp "skip\n# \n#\n" @?= Right defrules{rdirectives = [("skip","")]}
|
parseWithState' defrules rulesp "skip\n# \n#\n" @?= Right defrules{rdirectives = [("skip","")]}
|
||||||
|
|
||||||
,test"trailing blank lines" $
|
,test "trailing blank lines" $
|
||||||
parseWithState' defrules rulesp "skip\n\n \n" @?= (Right defrules{rdirectives = [("skip","")]})
|
parseWithState' defrules rulesp "skip\n\n \n" @?= (Right defrules{rdirectives = [("skip","")]})
|
||||||
|
|
||||||
,test"no final newline" $
|
,test "no final newline" $
|
||||||
parseWithState' defrules rulesp "skip" @?= (Right defrules{rdirectives=[("skip","")]})
|
parseWithState' defrules rulesp "skip" @?= (Right defrules{rdirectives=[("skip","")]})
|
||||||
|
|
||||||
,test"assignment with empty value" $
|
,test "assignment with empty value" $
|
||||||
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
|
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
|
||||||
(Right defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher "foo"],cbAssignments=[("account2","foo")]}]})
|
(Right defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher "foo"],cbAssignments=[("account2","foo")]}]})
|
||||||
]
|
]
|
||||||
,tests "conditionalblockp" [
|
,tests "conditionalblockp" [
|
||||||
test"space after conditional" $ -- #1120
|
test "space after conditional" $ -- #1120
|
||||||
parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
|
parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
|
||||||
(Right $ CB{cbMatchers=[RecordMatcher "a"],cbAssignments=[("account2","b")]})
|
(Right $ CB{cbMatchers=[RecordMatcher "a"],cbAssignments=[("account2","b")]})
|
||||||
|
|
||||||
|
,tests "csvfieldreferencep" [
|
||||||
|
test "number" $ parseWithState' defrules csvfieldreferencep "%1" @?= (Right "%1")
|
||||||
|
,test "name" $ parseWithState' defrules csvfieldreferencep "%date" @?= (Right "%date")
|
||||||
|
,test "quoted name" $ parseWithState' defrules csvfieldreferencep "%\"csv date\"" @?= (Right "%\"csv date\"")
|
||||||
]
|
]
|
||||||
|
|
||||||
|
,tests "matcherp" [
|
||||||
|
|
||||||
|
test "recordmatcherp" $
|
||||||
|
parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher "A A")
|
||||||
|
|
||||||
|
,test "fieldmatcherp.starts-with-%" $
|
||||||
|
parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher "description A A")
|
||||||
|
|
||||||
|
,test "fieldmatcherp" $
|
||||||
|
parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher "%description" "A A")
|
||||||
|
|
||||||
|
,test "fieldmatcherp with operator" $
|
||||||
|
parseWithState' defrules matcherp "%description ~ A A\n" @?= (Right $ FieldMatcher "%description" "A A")
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
,tests "getEffectiveAssignment" [
|
||||||
|
let rules = defrules{rcsvfieldindexes=[("csvdate",1)],rassignments=[("date","%csvdate")]}
|
||||||
|
|
||||||
|
in test "toplevel" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
|
||||||
|
|
||||||
|
,let rules = defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher "%csvdate" "a"] [("date","%csvdate")]]}
|
||||||
|
in test "conditional" $ getEffectiveAssignment rules ["a","b"] "date" @?= (Just "%csvdate")
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|||||||
@ -495,13 +495,13 @@ See also: [File Extension](#file-extension).
|
|||||||
## `if`
|
## `if`
|
||||||
|
|
||||||
```rules
|
```rules
|
||||||
if PATTERN
|
if MATCHER
|
||||||
RULE
|
RULE
|
||||||
|
|
||||||
if
|
if
|
||||||
PATTERN
|
MATCHER
|
||||||
PATTERN
|
MATCHER
|
||||||
PATTERN
|
MATCHER
|
||||||
RULE
|
RULE
|
||||||
RULE
|
RULE
|
||||||
```
|
```
|
||||||
@ -510,22 +510,29 @@ Conditional blocks ("if blocks") are a block of rules that are applied
|
|||||||
only to CSV records which match certain patterns. They are often used
|
only to CSV records which match certain patterns. They are often used
|
||||||
for customising account names based on transaction descriptions.
|
for customising account names based on transaction descriptions.
|
||||||
|
|
||||||
A single pattern can be written on the same line as the "if";
|
Each MATCHER can be a record matcher, which looks like this:
|
||||||
or multiple patterns can be written on the following lines, non-indented.
|
|
||||||
Multiple patterns are OR'd (any one of them can match).
|
|
||||||
Patterns are case-insensitive regular expressions
|
|
||||||
which try to match anywhere within the whole CSV record
|
|
||||||
(POSIX extended regular expressions with some additions, see https://hledger.org/hledger.html#regular-expressions).
|
|
||||||
Note the CSV record they see is close to, but not identical to, the one in the CSV file;
|
|
||||||
enclosing double quotes will be removed, and the separator character is always comma.
|
|
||||||
|
|
||||||
It's not yet easy to match within a specific field.
|
|
||||||
If the data does not contain commas, you can hack it with a regular expression like:
|
|
||||||
```rules
|
```rules
|
||||||
# match "foo" in the fourth field
|
REGEX
|
||||||
if ^([^,]*,){3}foo
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
REGEX is a case-insensitive regular expression which tries to match anywhere within the CSV record.
|
||||||
|
It is a POSIX extended regular expressions with some additions (see
|
||||||
|
[Regular expressions](https://hledger.org/hledger.html#regular-expressions) in the hledger manual).
|
||||||
|
Note: the "CSV record" it is matched against is not the original record, but a synthetic one,
|
||||||
|
with enclosing double quotes or whitespace removed, and always comma-separated.
|
||||||
|
(Eg, an SSV record `2020-01-01; "Acme, Inc."; 1,000` appears to REGEX as `2020-01-01,Acme, Inc.,1,000`).
|
||||||
|
|
||||||
|
Or, MATCHER can be a field matcher, like this:
|
||||||
|
```rules
|
||||||
|
%CSVFIELD REGEX
|
||||||
|
```
|
||||||
|
which matches just the content of a particular CSV field.
|
||||||
|
CSVFIELD is a percent sign followed by the field's name or column number, like `%date` or `%1`.
|
||||||
|
|
||||||
|
A single matcher can be written on the same line as the "if";
|
||||||
|
or multiple matchers can be written on the following lines, non-indented.
|
||||||
|
Multiple matchers are OR'd (any one of them can match).
|
||||||
|
|
||||||
After the patterns there should be one or more rules to apply, all
|
After the patterns there should be one or more rules to apply, all
|
||||||
indented by at least one space. Three kinds of rule are allowed in
|
indented by at least one space. Three kinds of rule are allowed in
|
||||||
conditional blocks:
|
conditional blocks:
|
||||||
|
|||||||
@ -560,8 +560,26 @@ $ ./hledger-csv
|
|||||||
|
|
||||||
>=0
|
>=0
|
||||||
|
|
||||||
|
# 27. match a specific field
|
||||||
|
<
|
||||||
|
2020-01-01, 1
|
||||||
|
2020-01-01, 2
|
||||||
|
RULES
|
||||||
|
fields date, desc
|
||||||
|
if %desc 1
|
||||||
|
description one
|
||||||
|
|
||||||
## 26. A single unbalanced posting with number other than 1 also should not generate a balancing posting.
|
$ ./hledger-csv desc:one
|
||||||
|
2020-01-01 one
|
||||||
|
|
||||||
|
>=0
|
||||||
|
|
||||||
|
## .
|
||||||
|
#<
|
||||||
|
#$ ./hledger-csv
|
||||||
|
#>=0
|
||||||
|
|
||||||
|
## . A single unbalanced posting with number other than 1 also should not generate a balancing posting.
|
||||||
#<
|
#<
|
||||||
#2019-01-01,1
|
#2019-01-01,1
|
||||||
#
|
#
|
||||||
@ -575,7 +593,7 @@ $ ./hledger-csv
|
|||||||
#
|
#
|
||||||
#>=0
|
#>=0
|
||||||
#
|
#
|
||||||
## 27. A single posting that's zero also should not generate a balancing posting.
|
## . A single posting that's zero also should not generate a balancing posting.
|
||||||
#<
|
#<
|
||||||
#2019-01-01,0
|
#2019-01-01,0
|
||||||
#
|
#
|
||||||
@ -589,7 +607,7 @@ $ ./hledger-csv
|
|||||||
#
|
#
|
||||||
#>=0
|
#>=0
|
||||||
|
|
||||||
## 28. With a bracketed account name, the auto-generated second posting should also be bracketed.
|
## . With a bracketed account name, the auto-generated second posting should also be bracketed.
|
||||||
#<
|
#<
|
||||||
#2019-01-01,1
|
#2019-01-01,1
|
||||||
#
|
#
|
||||||
@ -604,8 +622,6 @@ $ ./hledger-csv
|
|||||||
#
|
#
|
||||||
#>=0
|
#>=0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# . TODO: without --separator gives obscure error
|
# . TODO: without --separator gives obscure error
|
||||||
# |
|
# |
|
||||||
# 1 | 10/2009/09;Flubber Co🎅;50;
|
# 1 | 10/2009/09;Flubber Co🎅;50;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user