csv: add newest-first directive for more robust same-day ordering
This commit is contained in:
		
							parent
							
								
									28e8c76a8e
								
							
						
					
					
						commit
						6614aab5d7
					
				| @ -130,8 +130,9 @@ readJournalFromCsv mrulesfile csvfile csvdata = | ||||
|   -- let (headerlines, datalines) = identifyHeaderLines records | ||||
|   --     mfieldnames = lastMay headerlines | ||||
| 
 | ||||
|   -- convert to transactions and return as a journal | ||||
|   let txns = snd $ mapAccumL | ||||
|   let  | ||||
|     -- convert CSV records to transactions | ||||
|     txns = snd $ mapAccumL | ||||
|                      (\pos r -> (pos, | ||||
|                                  transactionFromCsvRecord | ||||
|                                    (let SourcePos name line col =  pos in | ||||
| @ -140,17 +141,27 @@ readJournalFromCsv mrulesfile csvfile csvdata = | ||||
|                                     r)) | ||||
|                      (initialPos parsecfilename) records | ||||
| 
 | ||||
|   -- heuristic: if the records appear to have been in reverse date order, | ||||
|   -- reverse them all as well as doing a txn date sort, | ||||
|   -- so that same-day txns' original order is preserved | ||||
|       txns' | length txns > 1 && tdate (head txns) > tdate (last txns) = reverse txns | ||||
|             | otherwise = txns | ||||
|     -- Ensure transactions are ordered chronologically. | ||||
|     -- First, reverse them to get same-date transactions ordered chronologically, | ||||
|     -- if the CSV records seem to be most-recent-first, ie if there's an explicit  | ||||
|     -- "newest-first" directive, or if there's more than one date and the first date | ||||
|     -- is more recent than the last. | ||||
|     txns' =  | ||||
|       (if newestfirst || mseemsnewestfirst == Just True then reverse else id) txns | ||||
|       where | ||||
|         newestfirst = dbg3 "newestfirst" $ isJust $ getDirective "newest-first" rules | ||||
|         mseemsnewestfirst = dbg3 "mseemsnewestfirst" $   | ||||
|           case nub $ map tdate txns of  | ||||
|             ds | length ds > 1 -> Just $ head ds > last ds  | ||||
|             _                  -> Nothing | ||||
|     -- Second, sort by date. | ||||
|     txns'' = sortBy (comparing tdate) txns' | ||||
| 
 | ||||
|   when (not rulesfileexists) $ do | ||||
|     hPrintf stderr "created default conversion rules file %s, edit this for better results\n" rulesfile | ||||
|     writeFile rulesfile $ T.unpack rulestext | ||||
| 
 | ||||
|   return $ Right nulljournal{jtxns=sortBy (comparing tdate) txns'} | ||||
|   return $ Right nulljournal{jtxns=txns''} | ||||
| 
 | ||||
| parseCsv :: FilePath -> String -> IO (Either Parsec.ParseError CSV) | ||||
| parseCsv path csvdata = | ||||
| @ -209,6 +220,7 @@ defaultRulesText csvfile = T.pack $ unlines | ||||
|   ,"fields date, description, amount" | ||||
|   ,"" | ||||
|   ,"#skip 1" | ||||
|   ,"#newest-first" | ||||
|   ,"" | ||||
|   ,"#date-format %-d/%-m/%Y" | ||||
|   ,"#date-format %-m/%-d/%Y" | ||||
| @ -231,7 +243,7 @@ Grammar for the CSV conversion rules, more or less: | ||||
| 
 | ||||
| RULES: RULE* | ||||
| 
 | ||||
| RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | DATE-FORMAT | COMMENT | BLANK ) NEWLINE | ||||
| RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | NEWEST-FIRST | DATE-FORMAT | COMMENT | BLANK ) NEWLINE | ||||
| 
 | ||||
| FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )* | ||||
| 
 | ||||
| @ -453,6 +465,7 @@ directives = | ||||
|   -- ,"default-currency" | ||||
|   -- ,"skip-lines" -- old | ||||
|   ,"skip" | ||||
|   ,"newest-first" | ||||
|    -- ,"base-account" | ||||
|    -- ,"base-currency" | ||||
|   ] | ||||
|  | ||||
| @ -31,7 +31,7 @@ To learn about *exporting* CSV, see [CSV output](hledger.html#csv-output). | ||||
| 
 | ||||
| # CSV RULES | ||||
| 
 | ||||
| The following six kinds of rule can appear in the rules file, in any order. | ||||
| The following seven kinds of rule can appear in the rules file, in any order. | ||||
| Blank lines and lines beginning with `#` or `;` are ignored. | ||||
| 
 | ||||
| ## skip | ||||
| @ -157,6 +157,17 @@ a path relative to the current file's directory. Eg: | ||||
| include common.rules | ||||
| ``` | ||||
| 
 | ||||
| ## newest-first | ||||
| 
 | ||||
| `newest-first` | ||||
| 
 | ||||
| Consider adding this rule if:  | ||||
| your CSV records are in reverse chronological order (newest first), | ||||
| and you care about preserving the order of same-day transactions, | ||||
| and you might be processing just one day of data. | ||||
| It usually isn't needed, because hledger autodetects the CSV order, | ||||
| but if all the CSV records have the same date it assumes they are oldest first. | ||||
| 
 | ||||
| # CSV TIPS | ||||
| 
 | ||||
| Each generated journal entry will have two postings, to `account1` and `account2` respectively. | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user