fix:PeriodData: use Integer keys to avoid date wraparound bugs [#2479]
PeriodData's use of Int keys caused wrong results with periodic reports involving dates outside the machine-specific limits of Int. Those were: 64 bits: -25252734927764696-04-22..25252734927768413-06-12 32 bits: -5877752-05-08..5881469-05-27 16 bits: 1769-02-28..1948-08-04 8 bits: 1858-07-12..1859-03-24 32 bits is supported by MicroHS; 16 and 8 bits aren't supported by any known haskell version, but that could change in future. For example, on 64 bit machines we got: 25252734927768413-06-12 PeriodData's max date (expenses) 1 25252734927768414-01-01 next year past PeriodData's max date (expenses) 2 $ hledger reg -O csv --yearly "txnidx","date","code","description","account","amount","total" "0","-25252734927764696-11-10","","","expenses","1","1" Now it uses Integer (like the time package), fixing the bug. And benchmarking shows memory and time usage slightly improved (surprisingly; tested with up to 500 subperiods, eg hledger -f examples/10ktxns-1kaccts.journal reg -1 cur:A -D >/dev/null)
This commit is contained in:
parent
70e9e7b060
commit
438c4a0469
@ -44,7 +44,6 @@ module Hledger.Data.Account
|
||||
import Control.Applicative ((<|>))
|
||||
import Data.HashSet qualified as HS
|
||||
import Data.HashMap.Strict qualified as HM
|
||||
import Data.IntMap qualified as IM
|
||||
import Data.List (find, sortOn)
|
||||
#if !MIN_VERSION_base(4,20,0)
|
||||
import Data.List (foldl')
|
||||
@ -388,7 +387,7 @@ tests_Account = testGroup "Account" [
|
||||
testCase "no postings, no days" $
|
||||
accountFromPostings undefined [] @?= accountTree "root" []
|
||||
,testCase "no postings, only 2000-01-01" $
|
||||
allAccounts (all (\d -> (ModifiedJulianDay $ toInteger d) == fromGregorian 2000 01 01) . IM.keys . pdperiods . adata)
|
||||
allAccounts (all (\d -> (ModifiedJulianDay $ toInteger d) == fromGregorian 2000 01 01) . M.keys . pdperiods . adata)
|
||||
(accountFromPostings undefined []) @? "Not all adata have exactly 2000-01-01"
|
||||
]
|
||||
]
|
||||
|
||||
@ -22,10 +22,10 @@ module Hledger.Data.DayPartition
|
||||
, tests_DayPartition
|
||||
) where
|
||||
|
||||
import qualified Data.IntMap.Strict as IM
|
||||
import Data.List.NonEmpty (NonEmpty(..))
|
||||
import qualified Data.List.NonEmpty as NE
|
||||
import Data.Time (Day(..), addDays, addGregorianMonthsClip, addGregorianYearsClip, fromGregorian)
|
||||
import Data.Map qualified as M
|
||||
import Data.Time (Day (..), addDays, addGregorianMonthsClip, addGregorianYearsClip, fromGregorian)
|
||||
|
||||
import Hledger.Data.Dates
|
||||
import Hledger.Data.PeriodData
|
||||
@ -122,13 +122,13 @@ unionDayPartitions (DayPartition (PeriodData h as)) (DayPartition (PeriodData h'
|
||||
if equalIntersection as as' && isValidDayPartition union then Just union else Nothing
|
||||
where
|
||||
union = DayPartition . PeriodData (min h h') $ as <> as'
|
||||
equalIntersection x y = and $ IM.intersectionWith (==) x y
|
||||
equalIntersection x y = and $ M.intersectionWith (==) x y
|
||||
|
||||
-- | Get this DayPartition's overall start date and end date (both inclusive).
|
||||
dayPartitionStartEnd :: DayPartition -> (Day, Day)
|
||||
dayPartitionStartEnd (DayPartition (PeriodData _ ds)) =
|
||||
-- Guaranteed not to error because the IntMap is non-empty.
|
||||
(intToDay . fst $ IM.findMin ds, snd $ IM.findMax ds)
|
||||
(intToDay . fst $ M.findMin ds, snd $ M.findMax ds)
|
||||
|
||||
-- | Find the start and end dates of the period within a 'DayPartition' which contains a given day.
|
||||
-- If the day is after the end of the last period, it is assumed to be within the last period.
|
||||
|
||||
@ -22,11 +22,11 @@ import Data.Aeson.Encode.Pretty (Config(..), Indent(..), NumberFormat(
|
||||
--import Data.Aeson.TH
|
||||
import Data.ByteString.Lazy qualified as BL
|
||||
import Data.Decimal (DecimalRaw(..), roundTo)
|
||||
import Data.IntMap qualified as IM
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Text.Lazy qualified as TL
|
||||
import Data.Text.Lazy.Builder qualified as TB
|
||||
import Data.Time (Day(..))
|
||||
import Data.Map qualified as M
|
||||
import Data.Time (Day (..))
|
||||
import Text.Megaparsec (Pos, SourcePos, mkPos, unPos)
|
||||
|
||||
import Hledger.Data.Types
|
||||
@ -161,7 +161,7 @@ instance ToJSON BalanceData
|
||||
instance ToJSON a => ToJSON (PeriodData a) where
|
||||
toJSON a = object
|
||||
[ "pdpre" .= pdpre a
|
||||
, "pdperiods" .= map (\(d, x) -> (ModifiedJulianDay (toInteger d), x)) (IM.toList $ pdperiods a)
|
||||
, "pdperiods" .= map (\(d, x) -> (ModifiedJulianDay (toInteger d), x)) (M.toList $ pdperiods a)
|
||||
]
|
||||
|
||||
instance ToJSON a => ToJSON (Account a) where
|
||||
@ -227,7 +227,7 @@ instance FromJSON BalanceData
|
||||
instance FromJSON a => FromJSON (PeriodData a) where
|
||||
parseJSON = withObject "PeriodData" $ \v -> PeriodData
|
||||
<$> v .: "pdpre"
|
||||
<*> (IM.fromList . map (\(d, x) -> (fromInteger $ toModifiedJulianDay d, x)) <$> v .: "pdperiods")
|
||||
<*> (M.fromList . map (\(d, x) -> (fromInteger $ toModifiedJulianDay d, x)) <$> v .: "pdperiods")
|
||||
|
||||
-- XXX The ToJSON instance replaces subaccounts with just names.
|
||||
-- Here we should try to make use of those to reconstruct the
|
||||
|
||||
@ -27,11 +27,11 @@ import Data.Foldable1 (Foldable1(..))
|
||||
import Control.Applicative (liftA2)
|
||||
#endif
|
||||
import Data.Bifunctor (first)
|
||||
import Data.IntMap.Strict qualified as IM
|
||||
#if !MIN_VERSION_base(4,20,0)
|
||||
import Data.List (foldl')
|
||||
#endif
|
||||
import Data.Time (Day(..), fromGregorian)
|
||||
import Data.Map qualified as M
|
||||
import Data.Time (Day (..), fromGregorian)
|
||||
|
||||
import Hledger.Data.Amount
|
||||
import Hledger.Data.Types
|
||||
@ -44,7 +44,7 @@ instance Show a => Show (PeriodData a) where
|
||||
showString "PeriodData"
|
||||
. showString "{ pdpre = " . shows h
|
||||
. showString ", pdperiods = "
|
||||
. showString "fromList " . shows (map (\(day, x) -> (intToDay day, x)) $ IM.toList ds)
|
||||
. showString "fromList " . shows (map (\(day, x) -> (intToDay day, x)) $ M.toList ds)
|
||||
. showChar '}'
|
||||
|
||||
instance Foldable PeriodData where
|
||||
@ -66,24 +66,24 @@ instance Traversable PeriodData where
|
||||
-- keys in the date map section. This may not be the result you want if the
|
||||
-- keys are not identical.
|
||||
instance Semigroup a => Semigroup (PeriodData a) where
|
||||
PeriodData h1 as1 <> PeriodData h2 as2 = PeriodData (h1 <> h2) $ IM.unionWith (<>) as1 as2
|
||||
PeriodData h1 as1 <> PeriodData h2 as2 = PeriodData (h1 <> h2) $ M.unionWith (<>) as1 as2
|
||||
|
||||
instance Monoid a => Monoid (PeriodData a) where
|
||||
mempty = PeriodData mempty mempty
|
||||
|
||||
-- | Construct a 'PeriodData' from a historical data value and a list of (period start, period data) pairs.
|
||||
periodDataFromList :: a -> [(Day, a)] -> PeriodData a
|
||||
periodDataFromList h = PeriodData h . IM.fromList . map (\(d, a) -> (dayToInt d, a))
|
||||
periodDataFromList h = PeriodData h . M.fromList . map (\(d, a) -> (dayToInt d, a))
|
||||
|
||||
-- | Convert 'PeriodData' to a historical data value and a list of (period start, period data) pairs.
|
||||
periodDataToList :: PeriodData a -> (a, [(Day, a)])
|
||||
periodDataToList (PeriodData h as) = (h, map (\(s, e) -> (intToDay s, e)) $ IM.toList as)
|
||||
periodDataToList (PeriodData h as) = (h, map (\(s, e) -> (intToDay s, e)) $ M.toList as)
|
||||
|
||||
-- | Get the data for the period containing the given 'Day', and that period's start date.
|
||||
-- If the day is after the end of the last period, it is assumed to be within the last period.
|
||||
-- If the day is before the start of the first period (ie, in the historical period), return Nothing.
|
||||
lookupPeriodData :: Day -> PeriodData a -> Maybe (Day, a)
|
||||
lookupPeriodData d (PeriodData _ as) = first intToDay <$> IM.lookupLE (dayToInt d) as
|
||||
lookupPeriodData d (PeriodData _ as) = first intToDay <$> M.lookupLE (dayToInt d) as
|
||||
|
||||
-- | Get the data for the period containing the given 'Day', and that period's start date.
|
||||
-- If the day is after the end of the last period, it is assumed to be within the last period.
|
||||
@ -98,14 +98,14 @@ lookupPeriodDataOrHistorical d pd@(PeriodData h _) = case lookupPeriodData d pd
|
||||
insertPeriodData :: Semigroup a => Maybe Day -> a -> PeriodData a -> PeriodData a
|
||||
insertPeriodData mday b balances = case mday of
|
||||
Nothing -> balances{pdpre = pdpre balances <> b}
|
||||
Just day -> balances{pdperiods = IM.insertWith (<>) (dayToInt day) b $ pdperiods balances}
|
||||
Just day -> balances{pdperiods = M.insertWith (<>) (dayToInt day) b $ pdperiods balances}
|
||||
|
||||
-- | Merge two 'PeriodData', using the given operation to combine their data values.
|
||||
--
|
||||
-- This will drop keys if they are not present in both 'PeriodData'.
|
||||
opPeriodData :: (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c
|
||||
opPeriodData f (PeriodData h1 as1) (PeriodData h2 as2) =
|
||||
PeriodData (f h1 h2) $ IM.intersectionWith f as1 as2
|
||||
PeriodData (f h1 h2) $ M.intersectionWith f as1 as2
|
||||
|
||||
-- | Merge two 'PeriodData', using the given operations for combining data
|
||||
-- that's only in the first, only in the second, or in both, respectively.
|
||||
@ -113,7 +113,7 @@ mergePeriodData :: (a -> c) -> (b -> c) -> (a -> b -> c) -> PeriodData a -> Peri
|
||||
mergePeriodData only1 only2 f = \(PeriodData h1 as1) (PeriodData h2 as2) ->
|
||||
PeriodData (f h1 h2) $ merge as1 as2
|
||||
where
|
||||
merge = IM.mergeWithKey (\_ x y -> Just $ f x y) (fmap only1) (fmap only2)
|
||||
merge = M.mergeWithKey (\_ x y -> Just $ f x y) (fmap only1) (fmap only2)
|
||||
|
||||
-- | Pad out the date map of a 'PeriodData' so that every key from another 'PeriodData' is present.
|
||||
padPeriodData :: a -> PeriodData b -> PeriodData a -> PeriodData a
|
||||
|
||||
@ -40,7 +40,6 @@ import Data.Bifunctor (first)
|
||||
import Data.Decimal (Decimal, DecimalRaw(..))
|
||||
import Data.Default (Default(..))
|
||||
import Data.Functor (($>))
|
||||
import Data.IntMap.Strict qualified as IM
|
||||
import Data.List (intercalate, sortBy)
|
||||
--XXX https://hackage.haskell.org/package/containers/docs/Data-Map.html
|
||||
--Note: You should use Data.Map.Strict instead of this module if:
|
||||
@ -754,20 +753,11 @@ data Account a = Account {
|
||||
-- contiguous report (sub)periods, and with the (open ended) pre-report period.
|
||||
-- The report periods are typically all the same length, but need not be.
|
||||
--
|
||||
-- Report periods are represented only by their start dates, used as the keys of an 'IntMap'.
|
||||
-- Like the Integer inside the Day type, these Int keys are a count of days before or after 1858-11-17.
|
||||
--
|
||||
-- Note the use of Int limits the dates this type can represent.
|
||||
-- On a 64 bit machine, the range is about 25 quadrillion years into past and future
|
||||
-- (-25252734927764696-04-22 to 25252734927768413-06-12).
|
||||
-- On a 32 bit machine, it is about 5 million years into past and future
|
||||
-- (-5877752-05-08 to 5881469-05-27).
|
||||
-- Exceeding the machine's Int range here will silently wrap around,
|
||||
-- causing this type (and periodic reports) to give wrong results.
|
||||
--
|
||||
-- Report periods are represented only by their start dates, used as the keys of a Map.
|
||||
-- (As integers, like the one inside the Day type, representing days before/after 1858-11-17.)
|
||||
data PeriodData a = PeriodData {
|
||||
pdpre :: a -- ^ data for the period before the report
|
||||
,pdperiods :: IM.IntMap a -- ^ data for each period within the report
|
||||
pdpre :: a -- ^ data for the period before the report
|
||||
,pdperiods :: M.Map Integer a -- ^ data for each period within the report
|
||||
} deriving (Eq, Ord, Functor, Generic)
|
||||
|
||||
-- | Data that's useful in "balance" reports:
|
||||
|
||||
@ -39,10 +39,10 @@ import Control.Applicative (liftA2)
|
||||
#endif
|
||||
import Control.Monad (guard)
|
||||
import Data.Foldable (toList)
|
||||
import Data.HashSet qualified as HS
|
||||
import Data.List (sortOn)
|
||||
import Data.List.NonEmpty (NonEmpty((:|)))
|
||||
import Data.HashSet qualified as HS
|
||||
import Data.IntMap.Strict qualified as IM
|
||||
import Data.Map qualified as M
|
||||
import Data.Maybe (fromMaybe, isJust)
|
||||
import Data.Ord (Down(..))
|
||||
import Data.Semigroup (sconcat)
|
||||
@ -394,7 +394,7 @@ generatePeriodicReport makeRow treeAmt flatAmt ropts colspans acct =
|
||||
amt = mixedAmountStripCosts . sortKey . fmap treeAmt . pdperiods . adata
|
||||
sortKey = case balanceaccum_ ropts of
|
||||
PerPeriod -> maSum
|
||||
_ -> maybe nullmixedamt snd . IM.lookupMax
|
||||
_ -> maybe nullmixedamt snd . M.lookupMax
|
||||
|
||||
sortFlatByAmount = case fromMaybe NormallyPositive $ normalbalance_ ropts of
|
||||
NormallyPositive -> sortOn (\r -> (Down $ amt r, prrFullName r))
|
||||
@ -454,7 +454,7 @@ makeMultiBalanceReportRow = makePeriodicReportRow nullmixedamt sumAndAverageMixe
|
||||
-- | Build a report row.
|
||||
--
|
||||
-- Calculate the column totals. These are always the sum of column amounts.
|
||||
makePeriodicReportRow :: c -> (IM.IntMap c -> (c, c))
|
||||
makePeriodicReportRow :: c -> (M.Map Integer c -> (c, c))
|
||||
-> ReportOpts -> (b -> c)
|
||||
-> a -> Account b -> PeriodicReportRow a c
|
||||
makePeriodicReportRow nullEntry totalAndAverage ropts balance name acct =
|
||||
@ -465,7 +465,7 @@ makePeriodicReportRow nullEntry totalAndAverage ropts balance name acct =
|
||||
-- Total for a cumulative/historical report is always the last column.
|
||||
rowtotal = case balanceaccum_ ropts of
|
||||
PerPeriod -> total
|
||||
_ -> maybe nullEntry snd $ IM.lookupMax rowbals
|
||||
_ -> maybe nullEntry snd $ M.lookupMax rowbals
|
||||
|
||||
-- | Map the report rows to percentages if needed
|
||||
reportPercent :: ReportOpts -> MultiBalanceReport -> MultiBalanceReport
|
||||
|
||||
Loading…
Reference in New Issue
Block a user