csv: add newest-first directive for more robust same-day ordering

2017-07-05 15:24:17 +01:00 · 2017-07-05 15:24:17 +01:00 · 6614aab5d7
commit 6614aab5d7
parent 28e8c76a8e
2 changed files with 34 additions and 10 deletions
--- a/hledger-lib/Hledger/Read/CsvReader.hs
+++ b/hledger-lib/Hledger/Read/CsvReader.hs
@ -130,8 +130,9 @@ readJournalFromCsv mrulesfile csvfile csvdata =
  -- let (headerlines, datalines) = identifyHeaderLines records
  --     mfieldnames = lastMay headerlines

-  -- convert to transactions and return as a journal
-  let txns = snd $ mapAccumL
+  let 
+    -- convert CSV records to transactions
+    txns = snd $ mapAccumL
                     (\pos r -> (pos,
                                 transactionFromCsvRecord
                                   (let SourcePos name line col =  pos in
@ -140,17 +141,27 @@ readJournalFromCsv mrulesfile csvfile csvdata =
                                    r))
                     (initialPos parsecfilename) records

-  -- heuristic: if the records appear to have been in reverse date order,
-  -- reverse them all as well as doing a txn date sort,
-  -- so that same-day txns' original order is preserved
-      txns' | length txns > 1 && tdate (head txns) > tdate (last txns) = reverse txns
-            | otherwise = txns
+    -- Ensure transactions are ordered chronologically.
+    -- First, reverse them to get same-date transactions ordered chronologically,
+    -- if the CSV records seem to be most-recent-first, ie if there's an explicit 
+    -- "newest-first" directive, or if there's more than one date and the first date
+    -- is more recent than the last.
+    txns' = 
+      (if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns
+      where
+        newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules
+        mseemsnewestfirst = dbg3 "mseemsnewestfirst" $  
+          case nub $ map tdate txns of 
+            ds | length ds > 1 -> Just $ head ds > last ds 
+            _                  -> Nothing
+    -- Second, sort by date.
+    txns'' = sortBy (comparing tdate) txns'

  when (not rulesfileexists) $ do
    hPrintf stderr "created default conversion rules file %s, edit this for better results\n" rulesfile
    writeFile rulesfile $ T.unpack rulestext

-  return $ Right nulljournal{jtxns=sortBy (comparing tdate) txns'}
+  return $ Right nulljournal{jtxns=txns''}

 parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV)
 parseCsv path csvdata =
@ -209,6 +220,7 @@ defaultRulesText csvfile = T.pack $ unlines
  ,"fields date, description, amount"
  ,""
  ,"#skip 1"
+  ,"#newest-first"
  ,""
  ,"#date-format %-d/%-m/%Y"
  ,"#date-format %-m/%-d/%Y"
@ -231,7 +243,7 @@ Grammar for the CSV conversion rules, more or less:

 RULES: RULE*

-RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | DATE-FORMAT | COMMENT | BLANK ) NEWLINE
+RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | NEWEST-FIRST | DATE-FORMAT | COMMENT | BLANK ) NEWLINE

 FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )*

@ -453,6 +465,7 @@ directives =
  -- ,"default-currency"
  -- ,"skip-lines" -- old
  ,"skip"
+  ,"newest-first"
   -- ,"base-account"
   -- ,"base-currency"
  ]
--- a/hledger-lib/doc/hledger_csv.5.m4.md
+++ b/hledger-lib/doc/hledger_csv.5.m4.md
@ -31,7 +31,7 @@ To learn about *exporting* CSV, see [CSV output](hledger.html#csv-output).

 # CSV RULES

-The following six kinds of rule can appear in the rules file, in any order.
+The following seven kinds of rule can appear in the rules file, in any order.
 Blank lines and lines beginning with `#` or `;` are ignored.

 ## skip
@ -157,6 +157,17 @@ a path relative to the current file's directory. Eg:
 include common.rules
 ```

+## newest-first
+
+`newest-first`
+
+Consider adding this rule if: 
+your CSV records are in reverse chronological order (newest first),
+and you care about preserving the order of same-day transactions,
+and you might be processing just one day of data.
+It usually isn't needed, because hledger autodetects the CSV order,
+but if all the CSV records have the same date it assumes they are oldest first.
+
 # CSV TIPS

 Each generated journal entry will have two postings, to `account1` and `account2` respectively.