Merge branch 'csv-mega-pack' (#1095)

This commit is contained in:
Simon Michael 2019-11-06 13:13:11 -08:00
commit dcfc833d92
5 changed files with 678 additions and 140 deletions

View File

@ -140,7 +140,7 @@ readJournalFromCsv separator mrulesfile csvfile csvdata =
-- parsec seems to fail if you pass it "-" here TODO: try again with megaparsec
let parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
records <- (either throwerr id .
dbg2 "validateCsv" . validateCsv skiplines .
dbg2 "validateCsv" . validateCsv rules skiplines .
dbg2 "parseCsv")
`fmap` parseCsv separator parsecfilename csvdata
dbg1IO "first 3 csv records" $ take 3 records
@ -216,11 +216,22 @@ printCSV records = unlined (printRecord `map` records)
unlined = concat . intersperse "\n"
-- | Return the cleaned up and validated CSV data (can be empty), or an error.
validateCsv :: Int -> Either String CSV -> Either String [CsvRecord]
validateCsv _ (Left err) = Left err
validateCsv numhdrlines (Right rs) = validate $ drop numhdrlines $ filternulls rs
validateCsv :: CsvRules -> Int -> Either String CSV -> Either String [CsvRecord]
validateCsv _ _ (Left err) = Left err
validateCsv rules numhdrlines (Right rs) = validate $ applyConditionalSkips $ drop numhdrlines $ filternulls rs
where
filternulls = filter (/=[""])
skipCount r =
case (getEffectiveAssignment rules r "end", getEffectiveAssignment rules r "skip") of
(Nothing, Nothing) -> Nothing
(Just _, _) -> Just maxBound
(Nothing, Just "") -> Just 1
(Nothing, Just x) -> Just (read x)
applyConditionalSkips [] = []
applyConditionalSkips (r:rest) =
case skipCount r of
Nothing -> r:(applyConditionalSkips rest)
Just cnt -> applyConditionalSkips (drop (cnt-1) rest)
validate [] = Right []
validate rs@(_first:_)
| isJust lessthan2 = let r = fromJust lessthan2 in
@ -266,7 +277,7 @@ defaultRulesText csvfile = T.pack $ unlines
,""
,"account1 assets:bank:checking"
,""
,"fields date, description, amount"
,"fields date, description, amount1"
,""
,"#skip 1"
,"#newest-first"
@ -454,20 +465,8 @@ parseCsvRules rulesfile s =
validateRules :: CsvRules -> Either String CsvRules
validateRules rules = do
unless (isAssigned "date") $ Left "Please specify (at top level) the date field. Eg: date %1\n"
unless ((amount && not (amountin || amountout)) ||
(not amount && (amountin && amountout)) ||
balance)
$ Left $ unlines [
"Please specify (as a top level CSV rule) either the amount field,"
,"both the amount-in and amount-out fields, or the balance field. Eg:"
,"amount %2\n"
]
Right rules
where
amount = isAssigned "amount"
amountin = isAssigned "amount-in"
amountout = isAssigned "amount-out"
balance = isAssigned "balance" || isAssigned "balance1" || isAssigned "balance2"
isAssigned f = isJust $ getEffectiveAssignment rules [] f
-- parsers
@ -553,8 +552,9 @@ fieldassignmentp :: CsvRulesParser (JournalFieldName, FieldTemplate)
fieldassignmentp = do
lift $ dbgparse 3 "trying fieldassignmentp"
f <- journalfieldnamep
assignmentseparatorp
v <- fieldvalp
v <- choiceInState [ assignmentseparatorp >> fieldvalp
, lift eolof >> return ""
]
return (f,v)
<?> "field assignment"
@ -566,14 +566,19 @@ journalfieldnamep = do
-- Transaction fields and pseudo fields for CSV conversion.
-- Names must precede any other name they contain, for the parser
-- (amount-in before amount; date2 before date). TODO: fix
journalfieldnames = [
"account1"
,"account2"
,"amount-in"
journalfieldnames =
concat [[ "account" ++ i
,"amount" ++ i ++ "-in"
,"amount" ++ i ++ "-out"
,"amount" ++ i
,"balance" ++ i
,"comment" ++ i
,"currency" ++ i
] | x <- [1..9], let i = show x]
++
["amount-in"
,"amount-out"
,"amount"
,"balance1"
,"balance2"
,"balance"
,"code"
,"comment"
@ -582,17 +587,16 @@ journalfieldnames = [
,"date"
,"description"
,"status"
,"skip" -- skip and end are not really fields, but we list it here to allow conditional rules that skip records
,"end"
]
assignmentseparatorp :: CsvRulesParser ()
assignmentseparatorp = do
lift $ dbgparse 3 "trying assignmentseparatorp"
choice [
-- try (lift (skipMany spacenonewline) >> oneOf ":="),
try (lift (skipMany spacenonewline) >> char ':'),
spaceChar
]
_ <- lift (skipMany spacenonewline)
_ <- choiceInState [ lift (skipMany spacenonewline) >> char ':' >> lift (skipMany spacenonewline)
, lift (skipSome spacenonewline)
]
return ()
fieldvalp :: CsvRulesParser String
@ -662,8 +666,9 @@ regexp = do
type CsvRecord = [String]
-- Convert a CSV record to a transaction using the rules, or raise an
-- error if the data can not be parsed.
showRules rules record =
unlines $ catMaybes [ (("the "++fld++" rule is: ")++) <$> getEffectiveAssignment rules record fld | fld <- journalfieldnames]
transactionFromCsvRecord :: SourcePos -> CsvRules -> CsvRecord -> Transaction
transactionFromCsvRecord sourcepos rules record = t
where
@ -679,11 +684,11 @@ transactionFromCsvRecord sourcepos rules record = t
mdateformat = mdirective "date-format"
date = render $ fromMaybe "" $ mfieldtemplate "date"
date' = fromMaybe (error' $ dateerror "date" date mdateformat) $ mparsedate date
mdate2 = maybe Nothing (Just . render) $ mfieldtemplate "date2"
mdate2 = render <$> mfieldtemplate "date2"
mdate2' = maybe Nothing (maybe (error' $ dateerror "date2" (fromMaybe "" mdate2) mdateformat) Just . mparsedate) mdate2
dateerror datefield value mdateformat = unlines
["error: could not parse \""++value++"\" as a date using date format "++maybe "\"YYYY/M/D\", \"YYYY-M-D\" or \"YYYY.M.D\"" show mdateformat
,"the CSV record is: "++showRecord record
, showRecord record
,"the "++datefield++" rule is: "++(fromMaybe "required, but missing" $ mfieldtemplate datefield)
,"the date-format is: "++fromMaybe "unspecified" mdateformat
,"you may need to "
@ -703,58 +708,110 @@ transactionFromCsvRecord sourcepos rules record = t
["error: could not parse \""++str++"\" as a cleared status (should be *, ! or empty)"
,"the parse error is: "++customErrorBundlePretty err
]
code = maybe "" render $ mfieldtemplate "code"
description = maybe "" render $ mfieldtemplate "description"
comment = maybe "" render $ mfieldtemplate "comment"
precomment = maybe "" render $ mfieldtemplate "precomment"
currency = maybe (fromMaybe "" mdefaultcurrency) render $ mfieldtemplate "currency"
amountstr = (currency++) <$> simplifySign <$> getAmountStr rules record
maybeamount = either amounterror (Mixed . (:[])) <$> runParser (evalStateT (amountp <* eof) mempty) "" <$> T.pack <$> amountstr
amounterror err = error' $ unlines
["error: could not parse \""++fromJust amountstr++"\" as an amount"
,showRecord record
,"the amount rule is: "++(fromMaybe "" $ mfieldtemplate "amount")
,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency")
,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
,"the parse error is: "++customErrorBundlePretty err
,"you may need to "
++"change your amount or currency rules, "
++"or "++maybe "add a" (const "change your") mskip++" skip rule"
]
amount1 = case maybeamount of
Just a -> a
Nothing | balance1 /= Nothing || balance2 /= Nothing -> nullmixedamt
Nothing -> error' $ "amount and balance have no value\n"++showRecord record
-- convert balancing amount to cost like hledger print, so eg if
-- amount1 is "10 GBP @@ 15 USD", amount2 will be "-15 USD".
amount2 = costOfMixedAmount (-amount1)
code = singleline $ maybe "" render $ mfieldtemplate "code"
description = singleline $ maybe "" render $ mfieldtemplate "description"
comment = singleline $ maybe "" render $ mfieldtemplate "comment"
precomment = singleline $ maybe "" render $ mfieldtemplate "precomment"
s `or` def = if null s then def else s
defaccount1 = fromMaybe "unknown" $ mdirective "default-account1"
defaccount2 = case isNegativeMixedAmount amount2 of
Just True -> "income:unknown"
_ -> "expenses:unknown"
account1 = T.pack $ maybe "" render (mfieldtemplate "account1") `or` defaccount1
account2 = T.pack $ maybe "" render (mfieldtemplate "account2") `or` defaccount2
balance1template =
case (mfieldtemplate "balance", mfieldtemplate "balance1") of
(Nothing, Nothing) -> Nothing
(balance, Nothing) -> balance
(Nothing, balance1) -> balance1
(Just _, Just _) -> error' "Please use either balance or balance1, but not both"
balance1 = maybe Nothing (parsebalance "1".render) $ balance1template
balance2 = maybe Nothing (parsebalance "2".render) $ mfieldtemplate "balance2"
parsebalance n str
parsebalance currency n str
| all isSpace str = Nothing
| otherwise = Just $ (either (balanceerror n str) id $ runParser (evalStateT (amountp <* eof) mempty) "" $ T.pack $ (currency++) $ simplifySign str, nullsourcepos)
balanceerror n str err = error' $ unlines
["error: could not parse \""++str++"\" as balance"++n++" amount"
,showRecord record
,"the balance"++n++" rule is: "++(fromMaybe "" $ mfieldtemplate ("balance"++n))
,"the currency rule is: "++(fromMaybe "unspecified" $ mfieldtemplate "currency")
,showRules rules record
,"the default-currency is: "++fromMaybe "unspecified" mdefaultcurrency
,"the parse error is: "++customErrorBundlePretty err
]
parsePosting' number accountFld amountFld amountInFld amountOutFld balanceFld commentFld =
let currency = maybe (fromMaybe "" mdefaultcurrency) render $
(mfieldtemplate ("currency"++number) `or `mfieldtemplate "currency")
amount = chooseAmount rules record currency amountFld amountInFld amountOutFld
account' = ((T.pack . render) <$> (mfieldtemplate accountFld
`or` mdirective ("default-account" ++ number)))
balance = (parsebalance currency number.render) =<< mfieldtemplate balanceFld
comment = T.pack $ maybe "" render $ mfieldtemplate commentFld
account =
case account' of
-- If account is explicitly "unassigned", suppress posting
-- Otherwise, generate posting with "expenses:unknown" account if we have amount/balance information
Just "" -> Nothing
Just account -> Just account
Nothing ->
-- If we have amount or balance assertion (which implies potential amount change),
-- but no account name, lets generate "expenses:unknown" account name.
case (amount, balance) of
(Just _, _ ) -> Just "expenses:unknown"
(_, Just _) -> Just "expenses:unknown"
(Nothing, Nothing) -> Nothing
in
case account of
Nothing -> Nothing
Just account ->
Just $ (number, posting {paccount=account, pamount=fromMaybe missingmixedamt amount, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance, pcomment = comment})
parsePosting number =
parsePosting' number
("account"++number)
("amount"++number)
("amount"++number++"-in")
("amount"++number++"-out")
("balance"++number)
("comment" ++ number)
postingLegacy = parsePosting' "" "account1" "amount" "amount-in" "amount-out" "balance" "comment1"
posting1' = parsePosting "1"
posting1 =
case (postingLegacy,posting1') of
(Just (_,legacy), Nothing) -> Just ("1", legacy)
(Nothing, Just (_,posting1)) -> Just ("1", posting1)
(Just (_,legacy), Just (_,posting1)) ->
-- Here we merge legacy fields such as "amount" with "amount1", etc
-- Account and Comment would be the same by construction
let balanceassertion = (pbalanceassertion legacy) `or` (pbalanceassertion posting1)
amount =
let al = pamount legacy
a1 = pamount posting1
in
if al == a1 then al
else
case (isZeroMixedAmount al, isZeroMixedAmount a1) of
(True, _) -> a1
(False, True) -> al
(False, False) ->
error' $ unlines [ "amount/amount-in/amount-out and amount1/amount1-in/amount1-out produced conflicting values"
, showRecord record
, showRules rules record
, "amount/amount-in/amount-out is " ++ showMixedAmount al
, "amount1/amount1-in/amount1-out is " ++ showMixedAmount a1
]
in Just $ ("1", posting {paccount=paccount posting1, pamount=amount, ptransaction=Just t, pbalanceassertion=balanceassertion, pcomment = pcomment posting1})
(Nothing, Nothing) -> Nothing
postings' = catMaybes $ posting1:[ parsePosting i | x<-[2..9], let i = show x]
improveUnknownAccountName p =
if paccount p /="expenses:unknown"
then p
else case isNegativeMixedAmount (pamount p) of
Just True -> p{paccount = "income:unknown"}
Just False -> p{paccount = "expenses:unknown"}
_ -> p
postings =
case postings' of
-- To be compatible with the behavior of the old code which allowed two postings only, we enforce
-- second posting when rules generated just first of them.
-- When we have srictly first and second posting, but second posting does not have amount, we fill it in.
[("1",posting1)] ->
[posting1,improveUnknownAccountName (posting{paccount="expenses:unknown", pamount=costOfMixedAmount(-(pamount posting1)), ptransaction=Just t})]
[("1",posting1),("2",posting2)] ->
case (pamount posting1 == missingmixedamt , pamount posting2 == missingmixedamt) of
(False, True) -> [posting1, improveUnknownAccountName (posting2{pamount=costOfMixedAmount(-(pamount posting1))})]
_ -> [posting1, posting2]
_ -> map snd postings'
-- build the transaction
t = nulltransaction{
tsourcepos = genericSourcePos sourcepos,
@ -764,39 +821,56 @@ transactionFromCsvRecord sourcepos rules record = t
tcode = T.pack code,
tdescription = T.pack description,
tcomment = T.pack comment,
tprecedingcomment = T.pack precomment,
tpostings =
[posting {paccount=account1, pamount=amount1, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance1}
,posting {paccount=account2, pamount=amount2, ptransaction=Just t, pbalanceassertion=toAssertion <$> balance2}
]
tprecedingcomment = T.pack precomment,
tpostings = postings
}
toAssertion (a, b) = assertion{
baamount = a,
baposition = b
}
getAmountStr :: CsvRules -> CsvRecord -> Maybe String
getAmountStr rules record =
chooseAmount :: CsvRules -> CsvRecord -> String -> String -> String -> String -> Maybe MixedAmount
chooseAmount rules record currency amountFld amountInFld amountOutFld =
let
mamount = getEffectiveAssignment rules record "amount"
mamountin = getEffectiveAssignment rules record "amount-in"
mamountout = getEffectiveAssignment rules record "amount-out"
render = fmap (strip . renderTemplate rules record)
mamount = getEffectiveAssignment rules record amountFld
mamountin = getEffectiveAssignment rules record amountInFld
mamountout = getEffectiveAssignment rules record amountOutFld
parse amt = notZero =<< (parseAmount currency <$> notEmpty =<< (strip . renderTemplate rules record) <$> amt)
in
case (render mamount, render mamountin, render mamountout) of
(Just "", Nothing, Nothing) -> Nothing
case (parse mamount, parse mamountin, parse mamountout) of
(Nothing, Nothing, Nothing) -> Nothing
(Just a, Nothing, Nothing) -> Just a
(Nothing, Just "", Just "") -> error' $ "neither amount-in or amount-out has a value\n"
++ " record: " ++ showRecord record
(Nothing, Just i, Just "") -> Just i
(Nothing, Just "", Just o) -> Just $ negateStr o
(Nothing, Just i, Just o) -> error' $ "both amount-in and amount-out have a value\n"
++ " amount-in: " ++ i ++ "\n"
++ " amount-out: " ++ o ++ "\n"
(Nothing, Just i, Nothing) -> Just i
(Nothing, Nothing, Just o) -> Just $ negate o
(Nothing, Just i, Just o) -> error' $ "both "++amountInFld++" and "++amountOutFld++" have a value\n"
++ " "++amountInFld++": " ++ show i ++ "\n"
++ " "++amountOutFld++": " ++ show o ++ "\n"
++ " record: " ++ showRecord record
_ -> error' $ "found values for amount and for amount-in/amount-out\n"
++ "please use either amount or amount-in/amount-out\n"
_ -> error' $ "found values for "++amountFld++" and for "++amountInFld++"/"++amountOutFld++"\n"
++ "please use either "++amountFld++" or "++amountInFld++"/"++amountOutFld++"\n"
++ " record: " ++ showRecord record
where
notZero amt = if isZeroMixedAmount amt then Nothing else Just amt
notEmpty str = if str=="" then Nothing else Just str
parseAmount currency amountstr =
either (amounterror amountstr) (Mixed . (:[]))
<$> runParser (evalStateT (amountp <* eof) mempty) ""
<$> T.pack
<$> (currency++)
<$> simplifySign
<$> amountstr
amounterror amountstr err = error' $ unlines
["error: could not parse \""++fromJust amountstr++"\" as an amount"
,showRecord record
,showRules rules record
,"the default-currency is: "++fromMaybe "unspecified" (getDirective "default-currency" rules)
,"the parse error is: "++customErrorBundlePretty err
,"you may need to "
++"change your amount or currency rules, "
++"or add or change your skip rule"
]
type CsvAmountString = String
@ -861,7 +935,7 @@ getEffectiveAssignment rules record f = lastMay $ assignmentsFor f
-- | Render a field assigment's template, possibly interpolating referenced
-- CSV field values. Outer whitespace is removed from interpolated values.
renderTemplate :: CsvRules -> CsvRecord -> FieldTemplate -> String
renderTemplate rules record t = regexReplaceBy "%[A-z0-9]+" replace t
renderTemplate rules record t = regexReplaceBy "%[A-z0-9_-]+" replace t
where
replace ('%':pat) = maybe pat (\i -> strip $ atDef "" record (i-1)) mindex
where

View File

@ -21,6 +21,7 @@ module Hledger.Utils.String (
lstrip,
rstrip,
chomp,
singleline,
elideLeft,
elideRight,
formatString,
@ -76,6 +77,10 @@ rstrip = reverse . lstrip . reverse
chomp :: String -> String
chomp = reverse . dropWhile (`elem` "\r\n") . reverse
-- | Remove consequtive line breaks, replacing them with single space
singleline :: String -> String
singleline = unwords . filter (/="") . (map strip) . lines
stripbrackets :: String -> String
stripbrackets = dropWhile (`elem` "([") . reverse . dropWhile (`elem` "])") . reverse :: String -> String

View File

@ -38,7 +38,7 @@ At minimum, the rules file must identify the date and amount fields.
It's often necessary to specify the date format, and the number of header lines to skip, also.
Eg:
```
fields date, _, _, amount
fields date, _, _, amount1
date-format %d/%m/%Y
skip 1
```
@ -55,7 +55,7 @@ A more complete example:
skip 1
# name the csv fields (and assign the transaction's date, amount and code)
fields date, _, toorfrom, name, amzstatus, amount, fees, code
fields date, _, toorfrom, name, amzstatus, amount1, fees, code
# how to parse the date
date-format %b %-d, %Y
@ -64,7 +64,7 @@ date-format %b %-d, %Y
description %toorfrom %name
# save these fields as tags
comment status:%amzstatus, fees:%fees
comment status:%amzstatus
# set the base account for all transactions
account1 assets:amazon
@ -72,6 +72,9 @@ account1 assets:amazon
# flip the sign on the amount
amount -%amount
# Put fees in a separate posting
amount3 %fees
comment3 fees
```
For more examples, see [Convert CSV files](https://github.com/simonmichael/hledger/wiki/Convert-CSV-files).
@ -92,7 +95,7 @@ You'll need this whenever your CSV data contains header lines. Eg:
<!-- XXX -->
<!-- hledger tries to skip initial CSV header lines automatically. -->
<!-- If it guesses wrong, use this directive to skip exactly N lines. -->
<!-- This can also be used in a conditional block to ignore certain CSV records. -->
This can also be used in a conditional block to ignore certain CSV records.
```rules
# ignore the first CSV line
skip 1
@ -133,7 +136,13 @@ date-format %-m/%-d/%Y %l:%M %p
This (a) names the CSV fields, in order (names may not contain whitespace; uninteresting names may be left blank),
and (b) assigns them to journal entry fields if you use any of these standard field names:
`date`, `date2`, `status`, `code`, `description`, `comment`, `account1`, `account2`, `amount`, `amount-in`, `amount-out`, `currency`, `balance`, `balance1`, `balance2`.
Fields `date`, `date2`, `status`, `code`, `description` will form transaction description.
An assignment to any of `accountN`, `amountN`, `amountN-in`, `amountN-out`, `balanceN` or `currencyN` will generate a posting (though it's your responsibility to ensure it is a well formed one). Normally the `N`'s are consecutive starting from 1 but it's not required. One posting will be generated for each unique `N`. If you wish to supply a comment for the posting, use `commentN`, though comment on its own will not cause posting to be generated.
Fields `amount`, `amount-in`, `amount-out`, `currency`, `balance` and `comment` are treated as aliases for `amount1`, and so on. If your rules file leads to both aliased fields having different values, `hledger` will raise an error.
Eg:
```rules
# use the 1st, 2nd and 4th CSV fields as the entry's date, description and amount,
@ -142,9 +151,11 @@ Eg:
# CSV field:
# 1 2 3 4 5 6 7 8
# entry field:
fields date, description, , amount, , , somefield, anotherfield
fields date, description, , amount1, , , somefield, anotherfield
```
For backwards compatibility, we treat posting 1 specially. If your rules generated just posting 1, another posting would be added to your transaction to balance it. If your rules generated posting 1 and posting 2, but amount in the posting 2 is empty, hledger will fill it out with the opposite of posting 1. This special handling is needed to ensure smooth upgrade path from version 1.15.
## field assignment
*`ENTRYFIELDNAME`* *`FIELDVALUE`*
@ -177,12 +188,23 @@ Note, interpolation strips any outer whitespace, so a CSV value like
*`PATTERN`*...\
&nbsp;&nbsp;&nbsp;&nbsp;*`FIELDASSIGNMENTS`*...
`if` *`PATTERN`*\
*`PATTERN`*...\
&nbsp;&nbsp;&nbsp;&nbsp;*`skip N`*...
`if` *`PATTERN`*\
*`PATTERN`*...\
&nbsp;&nbsp;&nbsp;&nbsp;*`end`*...
This applies one or more field assignments, only to those CSV records matched by one of the PATTERNs.
The patterns are case-insensitive regular expressions which match anywhere
within the whole CSV record (it's not yet possible to match within a
specific field). When there are multiple patterns they can be written
on separate lines, unindented.
The field assignments are on separate lines indented by at least one space.
Instead of field assignments you can specify `skip` or `skip 1` to skip this record, `skip N` to skip the next N records (including the one that matchied) or `end` to skip the rest of the file.
Examples:
```rules
# if the CSV record contains "groceries", set account2 to "expenses:groceries"
@ -231,22 +253,21 @@ The order of same-day entries will be preserved
## CSV accounts
Each journal entry will have two [postings](journal.html#postings), to `account1` and `account2` respectively.
It's not yet possible to generate entries with more than two postings.
Each journal entry will have at least two [postings](journal.html#postings), to `account1` and some other account (usually `account2`).
It's conventional and recommended to use `account1` for the account whose CSV we are reading.
## CSV amounts
A transaction [amount](journal.html#amounts) must be set, in one of these ways:
A posting [amount](journal.html#amounts) could be set in one of these ways:
- with an `amount` field assignment, which sets the first posting's amount
- with an `amountN` field assignment, which sets the Nth posting's amount
- (When the CSV has debit and credit amounts in separate fields:)\
with field assignments for the `amount-in` and `amount-out` pseudo
with field assignments for the `amountN-in` and `amountN-out` pseudo
fields (both of them). Whichever one has a value will be used, with
appropriate sign. If both contain a value, it might not work so well.
- or implicitly by means of a [balance assignment](journal.html#balance-assignments) (see below).
- with `balanceN` field assignment that creates a [balance assignment](journal.html#balance-assignments) (see below).
There is some special handling for sign in amounts:
@ -254,24 +275,53 @@ There is some special handling for sign in amounts:
- If an amount value begins with a double minus sign, those will cancel out and be removed.
If the currency/commodity symbol is provided as a separate CSV field,
assign it to the `currency` pseudo field; the symbol will be prepended
assign it to the `currency` pseudo field (applicable to the whole transaction) or `currencyN` (applicable to Nth posting only); the symbol will be prepended
to the amount
(TODO: <s>when there is an amount</s>).
Or, you can use an `amount` [field assignment](#field-assignment) for more control, eg:
Or, you can use an `amountN` [field assignment](#field-assignment) for more control, eg:
```
fields date,description,currency,amount
amount %amount %currency
fields date,description,currency,amount1
amount1 %amount1 %currency
```
## CSV balance assertions/assignments
If the CSV includes a running balance, you can assign that to one of the pseudo fields
`balance` (or `balance1`) or `balance2`.
`balance` (or `balance1`), `balance2`, ... up to `balance9`.
This will generate a [balance assertion](journal.html#balance-assertions)
(or if the amount is left empty, a [balance assignment](journal.html#balance-assignments)),
on the first or second posting,
whenever the running balance field is non-empty.
(TODO: [#1000](https://github.com/simonmichael/hledger/issues/1000))
on the appropriate posting, whenever the running balance field is non-empty.
## References to other fields and evaluation order
Field assignments could include references to other fields or even to the same field you are trying to assign:
```
fields date,description,currency,amount1
amount1 %amount1 USD
amount1 %amount1 EUR
amount1 %amount1 %currency
if SOME_REGEXP
amount1 %amount1 GBP
```
This is how this file would be evaluated.
First, parts of CVS record are assigned according to `fields` directive.
Then all other field assignments -- written at top level, or included in `if` blocks -- are considered to see if they should be applied. They are checked in the order they are written, with later assignment overwriting earlier ones.
Once full set of field assignments that should be applied is known, their values are computed, and this is when all `%<fieldname>` references are evaluated.
So for a particular row from CSV file, value from fourth column would be assigned to `amount1`.
Then `hledger` will decide that `amount1` would have to be amended to `%amount1 USD`, but this will not happen immediately. This choice would be replaced by decision to rewrite `amount1` to `%amount EUR`, which will in turn be thrown away in favor of `%amount1 %currency`. If the `if` block condition will match the row, it will assign `amount1` to `%amount1 GBP`.
Overall, we will end up with one of the two alternatives for `amount1` - either `%amount1 %currency` or `%amount1 GBP`.
Now substitution of all referenced values will happen, using the current values for `%amount1` and `currency`, which were provided by the `fields` directive.
## Reading multiple CSV files

View File

@ -1,11 +1,13 @@
# These tests read CSV from stdin for convenience, so to ensure we get the CSV reader's
# error, the csv: prefix is used.
#
# The final cleanup command is chained with && so as not to mask hledger's exit code,
# but this means a temp file is left behind whenever hledger fails. What TODO ?
# 1. read CSV to hledger journal format
$ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<
10/2009/09,Flubber Co,50
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50
income:unknown $-50
@ -16,7 +18,16 @@ $ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $
<
10/2009/09,Flubber Co🎅,50,
11/2009/09,Flubber Co🎅,,50
$ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >t.$$.csv.rules ; hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
RULES
account1 Assets:MyAccount
date %1
date-format %d/%Y/%m
description %2
amount-in %3
amount-out %4
currency $
$ ./hledger-csv
2009/09/10 Flubber Co🎅
Assets:MyAccount $50
income:unknown $-50
@ -28,7 +39,18 @@ $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescrip
>=0
# 3. handle conditions assigning multiple fields
$ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\nif Flubber\n account2 acct\n comment cmt' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<
10/2009/09,Flubber Co,50
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if Flubber
account2 acct
comment cmt
$ ./hledger-csv
2009/09/10 Flubber Co ; cmt
assets:myacct $50
acct $-50
@ -36,7 +58,16 @@ $ printf 'fields date, description, amount\ndate-format %%d/%%Y/%%m\ncurrency $
>=0
# 4. read CSV with balance field
$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<
10/2009/09,Flubber Co,50,123
RULES
fields date, description, amount, balance
date-format %d/%Y/%m
currency $
account1 assets:myacct
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $123
income:unknown $-50
@ -44,7 +75,17 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc
>=0
# 5. read CSV with empty balance field
$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n11/2009/09,Blubber Co,60,\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<
10/2009/09,Flubber Co,50,123
11/2009/09,Blubber Co,60,
RULES
fields date, description, amount, balance
date-format %d/%Y/%m
currency $
account1 assets:myacct
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $123
income:unknown $-50
@ -56,7 +97,17 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc
>=0
# 6. read CSV with only whitespace in balance field
$ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n11/2009/09,Blubber Co,60, \n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<
10/2009/09,Flubber Co,50,123
11/2009/09,Blubber Co,60,
RULES
fields date, description, amount, balance
date-format %d/%Y/%m
currency $
account1 assets:myacct
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $123
income:unknown $-50
@ -68,7 +119,22 @@ $ printf 'fields date, description, amount, balance\ndate-format %%d/%%Y/%%m\nc
>=0
# 7. read CSV with rule double-negating column
$ printf 'skip 1\n\ncurrency $\n\nfields date, payee, payment\n\namount -%%payment\naccount1 liabilities:bank\naccount2 expense:other' >t.$$.csv.rules; printf 'date,payee,amount\n2009/10/9,Flubber Co,50\n2009/11/09,Merchant Credit,-60\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<
date,payee,amount
2009/10/9,Flubber Co,50
2009/11/09,Merchant Credit,-60
RULES
skip 1
currency $
fields date, payee, payment
amount -%payment
account1 liabilities:bank
account2 expense:other
$ ./hledger-csv
2009/10/09
liabilities:bank $-50
expense:other $50
@ -83,7 +149,16 @@ $ printf 'skip 1\n\ncurrency $\n\nfields date, payee, payment\n\namount -%%paym
<
10/2009/09;Flubber Co🎅;50;
11/2009/09;Flubber Co🎅;;50
$ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger --separator ';' -f csv:- --rules-file rules.$$ print && rm -rf rules.$$
RULES
account1 Assets:MyAccount
date %1
date-format %d/%Y/%m
description %2
amount-in %3
amount-out %4
currency $
$ ./hledger-csv --separator ';'
2009/09/10 Flubber Co🎅
Assets:MyAccount $50
income:unknown $-50
@ -95,7 +170,16 @@ $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescrip
>=0
# 9. read CSV with balance2 field
$ printf 'fields date, description, amount, balance2\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<
10/2009/09,Flubber Co,50,123
RULES
fields date, description, amount, balance2
date-format %d/%Y/%m
currency $
account1 assets:myacct
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50
income:unknown $-50 = $123
@ -103,7 +187,16 @@ $ printf 'fields date, description, amount, balance2\ndate-format %%d/%%Y/%%m\n
>=0
# 10. read CSV with balance1 and balance2 fields
$ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d/%%Y/%%m\ncurrency $\naccount1 assets:myacct\n' >t.$$.csv.rules; printf '10/2009/09,Flubber Co,50,321,123\n' | hledger -f csv:- --rules-file t.$$.csv.rules print && rm -rf t.$$.csv.rules
<
10/2009/09,Flubber Co,50,321,123
RULES
fields date, description, amount, balance1, balance2
date-format %d/%Y/%m
currency $
account1 assets:myacct
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $321
income:unknown $-50 = $123
@ -111,6 +204,291 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d
>=0
# 11. More than two postings
<
10/2009/09,Flubber Co,50,321,123,0.234,VAT
RULES
fields date, description, amount, balance1, balance2, amount3,comment3
date-format %d/%Y/%m
currency $
account1 assets:myacct
account3 expenses:tax
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $321
expenses:unknown = $123
expenses:tax $0.234 ; VAT
>=0
# 12. More than two postings and different currencies
<
10/2009/09,Flubber Co,50,321,123,£,0.234,VAT
RULES
fields date, description, amount, balance1, balance2, currency3, amount3,comment3
date-format %d/%Y/%m
currency $
account1 assets:myacct
account3 expenses:tax
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50 = $321
expenses:unknown = $123
expenses:tax £0.234 ; VAT
>=0
# 13. reading CSV with in-field and out-field, where one could be zero
<
10/2009/09,Flubber Co🎅,50,0
11/2009/09,Flubber Co🎅,0.00,50
RULES
account1 Assets:MyAccount
date %1
date-format %d/%Y/%m
description %2
amount-in %3
amount-out %4
currency $
$ ./hledger-csv
2009/09/10 Flubber Co🎅
Assets:MyAccount $50
income:unknown $-50
2009/09/11 Flubber Co🎅
Assets:MyAccount $-50
expenses:unknown $50
>=0
# 14. multiline descriptions
<
10/2009/09,"Flubber Co
Co
Co
",50
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
$ ./hledger-csv
2009/09/10 Flubber Co Co Co
assets:myacct $50
income:unknown $-50
>=0
# 15. recursive interpolation
<
myacct,10/2009/09,Flubber Co,50,
RULES
fields account1, date, description, amount-in, amount-out
date-format %d/%Y/%m
currency $
if Flubber
account1 assets:%account1
amount-in (%amount-in)
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $-50
expenses:unknown $50
>=0
# 16. Real life-ish paypal parsing example
<
"12/22/2018","06:22:50","PST","Someone","Subscription Payment","Completed","USD","10.00","-0.59","9.41","someone@some.where","simon@joyful.com","123456789","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","57.60",""
RULES
fields date, time, timezone, description, type, status_, currency, grossamount, feeamount, netamount, fromemail, toemail, code, itemtitle, itemid, referencetxnid, receiptid, balance, note
account1 sm:assets:online:paypal
amount1 %netamount
account2 sm:expenses:unknown
account3 JS:expenses:banking:paypal
amount3 %feeamount
balance %18
code %13
currency $
date %1
date-format %m/%d/%Y
description %description for %itemtitle
$ ./hledger-csv
2018/12/22 (123456789) Someone for Joyful Systems
sm:assets:online:paypal $9.41 = $57.60
sm:expenses:unknown
JS:expenses:banking:paypal $-0.59
>=0
# 17. Show that #415 is fixed
<
"2016/01/01","$1"
"2016/02/02","$1,000.00"
RULES
account1 unknown
amount %2
date %1
date-format %Y/%m/%d
$ ./hledger-csv | hledger balance -f - --no-total
$-1,001.00 income:unknown
$1,001.00 unknown
>=0
# 18. Conditional skips
<
HEADER
10/2009/09,Flubber Co,50
MIDDLE SKIP THIS LINE
AND THIS
AND THIS ONE
10/2009/09,Flubber Co,50
*** END OF FILE ***
More lines of the trailer here
They all should be ignored
RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if HEADER
skip
if
END OF FILE
end
if MIDDLE
skip 3
$ ./hledger-csv
2009/09/10 Flubber Co
assets:myacct $50
income:unknown $-50
2009/09/10 Flubber Co
assets:myacct $50
income:unknown $-50
>=0
# 19. Lines with just balance, no amount (#1000)
<
2018-10-15,100
2018-10-16,200
2018-10-17,300
RULES
fields date,bal
balance EUR %bal
date-format %Y-%m-%d
description Assets Update
account1 assets
account2 income
$ ./hledger-csv
2018/10/15 Assets Update
assets = EUR 100
income
2018/10/16 Assets Update
assets = EUR 200
income
2018/10/17 Assets Update
assets = EUR 300
income
>=0
# 20. Test for #1001 - empty assignment to amount show not eat next line
<
2018-10-15,100
2018-10-16,200
2018-10-17,300
RULES
fields date,bal
balance EUR %bal
date-format %Y-%m-%d
description Assets Update
account1 assets
account2 income
if 2018
amount
comment Dont eat me
balance
comment Dont eat me
$ ./hledger-csv
2018/10/15 Assets Update ; Dont eat me
assets
income
2018/10/16 Assets Update ; Dont eat me
assets
income
2018/10/17 Assets Update ; Dont eat me
assets
income
>=0
# 21. Amountless postings and conditional third posting
<
"12/22/2018","06:22:50","PST","Someone","Subscription Payment","Completed","USD","10.00","-0.59","9.41","someone@some.where","simon@joyful.com","123456789","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","57.60",""
"12/22/2018","06:22:50","PST","Someone","Empty fee","Completed","USD","10.00","","6.66","someone@some.where","simon@joyful.com","987654321","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","99.60",""
"12/22/2018","06:22:50","PST","Someone","Conditional Empty fee","Completed","USD","10.00","-1.23","7.77","someone@some.where","simon@joyful.com","10101010101","Joyful Systems","","9KCXINCOME:UNKNOWNZXXAX","","88.66",""
RULES
fields date, time, timezone, description, type, status_, currency, grossamount, feeamount, netamount, fromemail, toemail, code, itemtitle, itemid, referencetxnid, receiptid, balance, note
account1 sm:assets:online:paypal
amount1 %netamount
account2 sm:expenses:unknown
account3 JS:expenses:banking:paypal
amount3 %feeamount
balance %18
code %13
currency $
date %1
date-format %m/%d/%Y
description %description for %itemtitle
if Conditional Empty Fee
account3
$ ./hledger-csv
2018/12/22 (123456789) Someone for Joyful Systems
sm:assets:online:paypal $9.41 = $57.60
sm:expenses:unknown
JS:expenses:banking:paypal $-0.59
2018/12/22 (987654321) Someone for Joyful Systems
sm:assets:online:paypal $6.66 = $99.60
sm:expenses:unknown
JS:expenses:banking:paypal
2018/12/22 (10101010101) Someone for Joyful Systems
sm:assets:online:paypal $7.77 = $88.66
sm:expenses:unknown $-7.77
>=0
# . TODO: without --separator gives obscure error
# |
# 1 | 10/2009/09;Flubber Co🎅;50;
@ -119,10 +497,18 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d
# <
# 10/2009/09;Flubber Co🎅;50;
# 11/2009/09;Flubber Co🎅;;50
# $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger -f csv:- --rules-file rules.$$ print && rm -rf rules.$$
# RULES
# account1 Assets:MyAccount
# date %1
# date-format %d/%Y/%m
# description %2
# amount-in %3
# amount-out %4
# currency $
# $ ./hledger-csv
# 2009/09/10 Flubber Co🎅
# Assets:MyAccount $50
# income:unknown $-50
# expenses:unknown $-50
#
# 2009/09/11 Flubber Co🎅
# Assets:MyAccount $-50
@ -134,10 +520,18 @@ $ printf 'fields date, description, amount, balance1, balance2\ndate-format %%d
# <
# 10/2009/09 Flubber Co🎅 50
# 11/2009/09 Flubber Co🎅 50
# $ printf 'account1 Assets:MyAccount\ndate %%1\ndate-format %%d/%%Y/%%m\ndescription %%2\namount-in %%3\namount-out %%4\ncurrency $\n' >rules.$$ ; hledger --separator "\t" -f csv:- --rules-file rules.$$ print && rm -rf rules.$$
# RULES
# account1 Assets:MyAccount
# date %1
# date-format %d/%Y/%m
# description %2
# amount-in %3
# amount-out %4
# currency $
# $ ./hledger-csv
# 2009/09/10 Flubber Co🎅
# Assets:MyAccount $50
# income:unknown $-50
# expenses:unknown $-50
#
# 2009/09/11 Flubber Co🎅
# Assets:MyAccount $-50

15
tests/hledger-csv Executable file
View File

@ -0,0 +1,15 @@
#!/bin/bash
#
# This scripts expects stdin formatted like this:
# <multi-line csv file>
# RULES
# <multi-line rules>
#
awk -vCSV="t.$$.csv" -vRULES="t.$$.csv.rules" '
BEGIN{output=CSV}
/^RULES/{output=RULES}
!/^RULES/{print $0 >output}'
trap "rm -f t.$$.csv t.$$.csv.rules" EXIT ERR
hledger -f csv:t.$$.csv --rules-file t.$$.csv.rules print "$@"