csv: add newest-first directive for more robust same-day ordering

This commit is contained in:
Simon Michael 2017-07-05 15:24:17 +01:00
parent 28e8c76a8e
commit 6614aab5d7
2 changed files with 34 additions and 10 deletions

View File

@ -130,8 +130,9 @@ readJournalFromCsv mrulesfile csvfile csvdata =
-- let (headerlines, datalines) = identifyHeaderLines records -- let (headerlines, datalines) = identifyHeaderLines records
-- mfieldnames = lastMay headerlines -- mfieldnames = lastMay headerlines
-- convert to transactions and return as a journal let
let txns = snd $ mapAccumL -- convert CSV records to transactions
txns = snd $ mapAccumL
(\pos r -> (pos, (\pos r -> (pos,
transactionFromCsvRecord transactionFromCsvRecord
(let SourcePos name line col = pos in (let SourcePos name line col = pos in
@ -140,17 +141,27 @@ readJournalFromCsv mrulesfile csvfile csvdata =
r)) r))
(initialPos parsecfilename) records (initialPos parsecfilename) records
-- heuristic: if the records appear to have been in reverse date order, -- Ensure transactions are ordered chronologically.
-- reverse them all as well as doing a txn date sort, -- First, reverse them to get same-date transactions ordered chronologically,
-- so that same-day txns' original order is preserved -- if the CSV records seem to be most-recent-first, ie if there's an explicit
txns' | length txns > 1 && tdate (head txns) > tdate (last txns) = reverse txns -- "newest-first" directive, or if there's more than one date and the first date
| otherwise = txns -- is more recent than the last.
txns' =
(if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns
where
newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules
mseemsnewestfirst = dbg3 "mseemsnewestfirst" $
case nub $ map tdate txns of
ds | length ds > 1 -> Just $ head ds > last ds
_ -> Nothing
-- Second, sort by date.
txns'' = sortBy (comparing tdate) txns'
when (not rulesfileexists) $ do when (not rulesfileexists) $ do
hPrintf stderr "created default conversion rules file %s, edit this for better results\n" rulesfile hPrintf stderr "created default conversion rules file %s, edit this for better results\n" rulesfile
writeFile rulesfile $ T.unpack rulestext writeFile rulesfile $ T.unpack rulestext
return $ Right nulljournal{jtxns=sortBy (comparing tdate) txns'} return $ Right nulljournal{jtxns=txns''}
parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV) parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV)
parseCsv path csvdata = parseCsv path csvdata =
@ -209,6 +220,7 @@ defaultRulesText csvfile = T.pack $ unlines
,"fields date, description, amount" ,"fields date, description, amount"
,"" ,""
,"#skip 1" ,"#skip 1"
,"#newest-first"
,"" ,""
,"#date-format %-d/%-m/%Y" ,"#date-format %-d/%-m/%Y"
,"#date-format %-m/%-d/%Y" ,"#date-format %-m/%-d/%Y"
@ -231,7 +243,7 @@ Grammar for the CSV conversion rules, more or less:
RULES: RULE* RULES: RULE*
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | DATE-FORMAT | COMMENT | BLANK ) NEWLINE RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | NEWEST-FIRST | DATE-FORMAT | COMMENT | BLANK ) NEWLINE
FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )* FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )*
@ -453,6 +465,7 @@ directives =
-- ,"default-currency" -- ,"default-currency"
-- ,"skip-lines" -- old -- ,"skip-lines" -- old
,"skip" ,"skip"
,"newest-first"
-- ,"base-account" -- ,"base-account"
-- ,"base-currency" -- ,"base-currency"
] ]

View File

@ -31,7 +31,7 @@ To learn about *exporting* CSV, see [CSV output](hledger.html#csv-output).
# CSV RULES # CSV RULES
The following six kinds of rule can appear in the rules file, in any order. The following seven kinds of rule can appear in the rules file, in any order.
Blank lines and lines beginning with `#` or `;` are ignored. Blank lines and lines beginning with `#` or `;` are ignored.
## skip ## skip
@ -157,6 +157,17 @@ a path relative to the current file's directory. Eg:
include common.rules include common.rules
``` ```
## newest-first
`newest-first`
Consider adding this rule if:
your CSV records are in reverse chronological order (newest first),
and you care about preserving the order of same-day transactions,
and you might be processing just one day of data.
It usually isn't needed, because hledger autodetects the CSV order,
but if all the CSV records have the same date it assumes they are oldest first.
# CSV TIPS # CSV TIPS
Each generated journal entry will have two postings, to `account1` and `account2` respectively. Each generated journal entry will have two postings, to `account1` and `account2` respectively.