csv: add newest-first directive for more robust same-day ordering

This commit is contained in:
Simon Michael 2017-07-05 15:24:17 +01:00
parent 28e8c76a8e
commit 6614aab5d7
2 changed files with 34 additions and 10 deletions

View File

@ -130,8 +130,9 @@ readJournalFromCsv mrulesfile csvfile csvdata =
-- let (headerlines, datalines) = identifyHeaderLines records
-- mfieldnames = lastMay headerlines
-- convert to transactions and return as a journal
let txns = snd $ mapAccumL
let
-- convert CSV records to transactions
txns = snd $ mapAccumL
(\pos r -> (pos,
transactionFromCsvRecord
(let SourcePos name line col = pos in
@ -140,17 +141,27 @@ readJournalFromCsv mrulesfile csvfile csvdata =
r))
(initialPos parsecfilename) records
-- heuristic: if the records appear to have been in reverse date order,
-- reverse them all as well as doing a txn date sort,
-- so that same-day txns' original order is preserved
txns' | length txns > 1 && tdate (head txns) > tdate (last txns) = reverse txns
| otherwise = txns
-- Ensure transactions are ordered chronologically.
-- First, reverse them to get same-date transactions ordered chronologically,
-- if the CSV records seem to be most-recent-first, ie if there's an explicit
-- "newest-first" directive, or if there's more than one date and the first date
-- is more recent than the last.
txns' =
(if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns
where
newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules
mseemsnewestfirst = dbg3 "mseemsnewestfirst" $
case nub $ map tdate txns of
ds | length ds > 1 -> Just $ head ds > last ds
_ -> Nothing
-- Second, sort by date.
txns'' = sortBy (comparing tdate) txns'
when (not rulesfileexists) $ do
hPrintf stderr "created default conversion rules file %s, edit this for better results\n" rulesfile
writeFile rulesfile $ T.unpack rulestext
return $ Right nulljournal{jtxns=sortBy (comparing tdate) txns'}
return $ Right nulljournal{jtxns=txns''}
parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV)
parseCsv path csvdata =
@ -209,6 +220,7 @@ defaultRulesText csvfile = T.pack $ unlines
,"fields date, description, amount"
,""
,"#skip 1"
,"#newest-first"
,""
,"#date-format %-d/%-m/%Y"
,"#date-format %-m/%-d/%Y"
@ -231,7 +243,7 @@ Grammar for the CSV conversion rules, more or less:
RULES: RULE*
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | DATE-FORMAT | COMMENT | BLANK ) NEWLINE
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | NEWEST-FIRST | DATE-FORMAT | COMMENT | BLANK ) NEWLINE
FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )*
@ -453,6 +465,7 @@ directives =
-- ,"default-currency"
-- ,"skip-lines" -- old
,"skip"
,"newest-first"
-- ,"base-account"
-- ,"base-currency"
]

View File

@ -31,7 +31,7 @@ To learn about *exporting* CSV, see [CSV output](hledger.html#csv-output).
# CSV RULES
The following six kinds of rule can appear in the rules file, in any order.
The following seven kinds of rule can appear in the rules file, in any order.
Blank lines and lines beginning with `#` or `;` are ignored.
## skip
@ -157,6 +157,17 @@ a path relative to the current file's directory. Eg:
include common.rules
```
## newest-first
`newest-first`
Consider adding this rule if:
your CSV records are in reverse chronological order (newest first),
and you care about preserving the order of same-day transactions,
and you might be processing just one day of data.
It usually isn't needed, because hledger autodetects the CSV order,
but if all the CSV records have the same date it assumes they are oldest first.
# CSV TIPS
Each generated journal entry will have two postings, to `account1` and `account2` respectively.