feat: csv: intra-day-reversed compensates when days' txns are reversed
As in eg vanguard CSV.
This commit is contained in:
parent
360ef833ae
commit
01387548e7
@ -45,10 +45,10 @@ import Control.Monad.Trans.Class (lift)
|
|||||||
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord)
|
import Data.Char (toLower, isDigit, isSpace, isAlphaNum, ord)
|
||||||
import Data.Bifunctor (first)
|
import Data.Bifunctor (first)
|
||||||
import Data.Functor ((<&>))
|
import Data.Functor ((<&>))
|
||||||
import Data.List (elemIndex, foldl', intersperse, mapAccumL, nub, sortBy)
|
import Data.List (elemIndex, foldl', intersperse, mapAccumL, nub, sortOn)
|
||||||
|
import Data.List.Extra (groupOn)
|
||||||
import Data.Maybe (catMaybes, fromMaybe, isJust)
|
import Data.Maybe (catMaybes, fromMaybe, isJust)
|
||||||
import Data.MemoUgly (memo)
|
import Data.MemoUgly (memo)
|
||||||
import Data.Ord (comparing)
|
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
@ -358,7 +358,7 @@ Grammar for the CSV conversion rules, more or less:
|
|||||||
|
|
||||||
RULES: RULE*
|
RULES: RULE*
|
||||||
|
|
||||||
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | NEWEST-FIRST | DATE-FORMAT | DECIMAL-MARK | COMMENT | BLANK ) NEWLINE
|
RULE: ( FIELD-LIST | FIELD-ASSIGNMENT | CONDITIONAL-BLOCK | SKIP | TIMEZONE | NEWEST-FIRST | INTRA-DAY-REVERSED | DATE-FORMAT | DECIMAL-MARK | COMMENT | BLANK ) NEWLINE
|
||||||
|
|
||||||
FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )*
|
FIELD-LIST: fields SPACE FIELD-NAME ( SPACE? , SPACE? FIELD-NAME )*
|
||||||
|
|
||||||
@ -462,6 +462,7 @@ directives =
|
|||||||
,"skip"
|
,"skip"
|
||||||
,"timezone"
|
,"timezone"
|
||||||
,"newest-first"
|
,"newest-first"
|
||||||
|
,"intra-day-reversed"
|
||||||
, "balance-type"
|
, "balance-type"
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -750,29 +751,35 @@ readJournalFromCsv mrulesfile csvfile csvdata = do
|
|||||||
Just f | any (`T.isInfixOf` f) ["%Z","%z","%EZ","%Ez"] -> True
|
Just f | any (`T.isInfixOf` f) ["%Z","%z","%EZ","%Ez"] -> True
|
||||||
_ -> False
|
_ -> False
|
||||||
|
|
||||||
-- Ensure transactions are ordered chronologically.
|
-- Do our best to ensure transactions will be ordered chronologically,
|
||||||
-- First, if the CSV records seem to be most-recent-first (because
|
-- from oldest to newest. This is done in several steps:
|
||||||
-- there's an explicit "newest-first" directive, or there's more
|
-- 1. Intra-day order: if there's an "intra-day-reversed" rule,
|
||||||
-- than one date and the first date is more recent than the last):
|
-- assume each day's CSV records were ordered in reverse of the overall date order,
|
||||||
-- reverse them to get same-date transactions ordered chronologically.
|
-- so reverse each day's txns.
|
||||||
txns' =
|
intradayreversed = dbg6 "intra-day-reversed" $ isJust $ getDirective "intra-day-reversed" rules
|
||||||
(if newestfirst || mdataseemsnewestfirst == Just True
|
txns1 = dbg7 "txns1" $
|
||||||
then dbg7 "reversed csv txns" . reverse else id)
|
(if intradayreversed then concatMap reverse . groupOn tdate else id) txns
|
||||||
txns
|
-- 2. Overall date order: now if there's a "newest-first" rule,
|
||||||
where
|
-- or if there's multiple dates and the first is more recent than the last,
|
||||||
newestfirst = dbg6 "newestfirst" $ isJust $ getDirective "newest-first" rules
|
-- assume CSV records were ordered newest dates first,
|
||||||
mdataseemsnewestfirst = dbg6 "mdataseemsnewestfirst" $
|
-- so reverse all txns.
|
||||||
case nub $ map tdate txns of
|
newestfirst = dbg6 "newest-first" $ isJust $ getDirective "newest-first" rules
|
||||||
ds | length ds > 1 -> Just $ head ds > last ds
|
mdatalooksnewestfirst = dbg6 "mdatalooksnewestfirst" $
|
||||||
_ -> Nothing
|
case nub $ map tdate txns of
|
||||||
-- Second, sort by date.
|
ds | length ds > 1 -> Just $ head ds > last ds
|
||||||
txns'' = dbg7 "date-sorted csv txns" $ sortBy (comparing tdate) txns'
|
_ -> Nothing
|
||||||
|
txns2 = dbg7 "txns2" $
|
||||||
|
(if newestfirst || mdatalooksnewestfirst == Just True then reverse else id) txns1
|
||||||
|
-- 3. Disordered dates: in case the CSV records were ordered by chaos,
|
||||||
|
-- do a final sort by date. If it was only a few records out of order,
|
||||||
|
-- this will hopefully refine any good ordering done by steps 1 and 2.
|
||||||
|
txns3 = dbg7 "date-sorted csv txns" $ sortOn tdate txns2
|
||||||
|
|
||||||
liftIO $ unless rulesfileexists $ do
|
liftIO $ unless rulesfileexists $ do
|
||||||
dbg1IO "creating conversion rules file" rulesfile
|
dbg1IO "creating conversion rules file" rulesfile
|
||||||
T.writeFile rulesfile rulestext
|
T.writeFile rulesfile rulestext
|
||||||
|
|
||||||
return nulljournal{jtxns=txns''}
|
return nulljournal{jtxns=txns3}
|
||||||
|
|
||||||
-- | Parse special separator names TAB and SPACE, or return the first
|
-- | Parse special separator names TAB and SPACE, or return the first
|
||||||
-- character. Return Nothing on empty string
|
-- character. Return Nothing on empty string
|
||||||
|
|||||||
@ -3910,7 +3910,8 @@ these are described more fully below, after the examples:
|
|||||||
| [**`end`**](#end) | skip the remaining CSV records |
|
| [**`end`**](#end) | skip the remaining CSV records |
|
||||||
| [**`date-format`**](#date-format) | how to parse dates in CSV records |
|
| [**`date-format`**](#date-format) | how to parse dates in CSV records |
|
||||||
| [**`decimal-mark`**](#decimal-mark) | the decimal mark used in CSV amounts, if ambiguous |
|
| [**`decimal-mark`**](#decimal-mark) | the decimal mark used in CSV amounts, if ambiguous |
|
||||||
| [**`newest-first`**](#newest-first) | disambiguate record order when there's only one date |
|
| [**`newest-first`**](#newest-first) | improve txn order when there are multiple records, newest first, all with the same date |
|
||||||
|
| [**`intra-day-reversed`**](#intra-day-reversed) | improve txn order when each day's txns are reverse of the overall date order |
|
||||||
| [**`include`**](#include) | inline another CSV rules file |
|
| [**`include`**](#include) | inline another CSV rules file |
|
||||||
| [**`balance-type`**](#balance-type) | choose which type of balance assignments to use |
|
| [**`balance-type`**](#balance-type) | choose which type of balance assignments to use |
|
||||||
|
|
||||||
@ -4674,6 +4675,24 @@ then, you should add the `newest-first` rule as a hint. Eg:
|
|||||||
newest-first
|
newest-first
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### `intra-day-reversed`
|
||||||
|
|
||||||
|
CSV records for each day are sometimes ordered in reverse compared to the overall date order.
|
||||||
|
Eg, here dates are newest first, but the transactions on each date are oldest first:
|
||||||
|
```csv
|
||||||
|
2022-10-02, txn 3...
|
||||||
|
2022-10-02, txn 4...
|
||||||
|
2022-10-01, txn 1...
|
||||||
|
2022-10-01, txn 2...
|
||||||
|
```
|
||||||
|
In this situation, add the `intra-day-reversed` rule, and hledger will compensate,
|
||||||
|
improving the order of transactions.
|
||||||
|
```rules
|
||||||
|
# transactions within each day are reversed, so reverse them back
|
||||||
|
intra-day-reversed
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### `include`
|
### `include`
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user