csv: add newest-first directive for more robust same-day ordering
This commit is contained in:
parent
28e8c76a8e
commit
6614aab5d7
@ -130,8 +130,9 @@ readJournalFromCsv mrulesfile csvfile csvdata =
|
||||
-- let (headerlines, datalines) = identifyHeaderLines records
|
||||
-- mfieldnames = lastMay headerlines
|
||||
|
||||
-- convert to transactions and return as a journal
|
||||
let txns = snd $ mapAccumL
|
||||
let
|
||||
-- convert CSV records to transactions
|
||||
txns = snd $ mapAccumL
|
||||
(\pos r -> (pos,
|
||||
transactionFromCsvRecord
|
||||
(let SourcePos name line col = pos in
|
||||
@ -140,17 +141,27 @@ readJournalFromCsv mrulesfile csvfile csvdata =
|
||||
r))
|
||||
(initialPos parsecfilename) records
|
||||
|
||||
-- heuristic: if the records appear to have been in reverse date order,
|
||||
-- reverse them all as well as doing a txn date sort,
|
||||
-- so that same-day txns' original order is preserved
|
||||
txns' | length txns > 1 && tdate (head txns) > tdate (last txns) = reverse txns
|
||||
| otherwise = txns
|
||||
-- Ensure transactions are ordered chronologically.
|
||||
-- First, reverse them to get same-date transactions ordered chronologically,
|
||||
-- if the CSV records seem to be most-recent-first, ie if there's an explicit
|
||||
-- "newest-first" directive, or if there's more than one date and the first date
|
||||
-- is more recent than the last.
|
||||
txns' =
|
||||
(if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns
|
||||
where
|
||||
newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules
|
||||
mseemsnewestfirst = dbg3 "mseemsnewestfirst" $
|
||||
case nub $ map tdate txns of
|
||||
ds | length ds > 1 -> Just $ head ds > last ds
|
||||
_ -> Nothing
|
||||
-- Second, sort by date.
|
||||
txns'' = sortBy (comparing tdate) txns'
|
||||
|
||||
when (not rulesfileexists) $ do
|
||||
hPrintf stderr "created default conversion rules file %s, edit this for better results\n" rulesfile
|
||||
writeFile rulesfile $ T.unpack rulestext
|
||||
|
||||
return $ Right nulljournal{jtxns=sortBy (comparing tdate) txns'}
|
||||
return $ Right nulljournal{jtxns=txns''}
|
||||
|
||||
parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV)
|
||||
parseCsv path csvdata =
|
||||
@ -209,6 +220,7 @@ defaultRulesText csvfile = T.pack $ unlines
|
||||
,"fields date, description, amount"
|
||||
,""
|
||||
,"#skip 1"
|
||||
,"#newest-first"
|
||||
,""
|
||||
,"#date-format %-d/%-m/%Y"
|
||||
,"#date-format %-m/%-d/%Y"
|
||||
@ -231,7 +243,7 @@ Grammar for the CSV conversion rules, more or less:
|
||||
|
||||
RULES: RULE*
|
||||
|
||||
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | DATE-FORMAT | COMMENT | BLANK ) NEWLINE
|
||||
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | NEWEST-FIRST | DATE-FORMAT | COMMENT | BLANK ) NEWLINE
|
||||
|
||||
FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )*
|
||||
|
||||
@ -453,6 +465,7 @@ directives =
|
||||
-- ,"default-currency"
|
||||
-- ,"skip-lines" -- old
|
||||
,"skip"
|
||||
,"newest-first"
|
||||
-- ,"base-account"
|
||||
-- ,"base-currency"
|
||||
]
|
||||
|
||||
@ -31,7 +31,7 @@ To learn about *exporting* CSV, see [CSV output](hledger.html#csv-output).
|
||||
|
||||
# CSV RULES
|
||||
|
||||
The following six kinds of rule can appear in the rules file, in any order.
|
||||
The following seven kinds of rule can appear in the rules file, in any order.
|
||||
Blank lines and lines beginning with `#` or `;` are ignored.
|
||||
|
||||
## skip
|
||||
@ -157,6 +157,17 @@ a path relative to the current file's directory. Eg:
|
||||
include common.rules
|
||||
```
|
||||
|
||||
## newest-first
|
||||
|
||||
`newest-first`
|
||||
|
||||
Consider adding this rule if:
|
||||
your CSV records are in reverse chronological order (newest first),
|
||||
and you care about preserving the order of same-day transactions,
|
||||
and you might be processing just one day of data.
|
||||
It usually isn't needed, because hledger autodetects the CSV order,
|
||||
but if all the CSV records have the same date it assumes they are oldest first.
|
||||
|
||||
# CSV TIPS
|
||||
|
||||
Each generated journal entry will have two postings, to `account1` and `account2` respectively.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user