fix:PeriodData: use Integer keys to avoid date wraparound bugs [#2479]

PeriodData's use of Int keys caused wrong results with periodic
reports involving dates outside the machine-specific limits of Int.
Those were:

64 bits: -25252734927764696-04-22..25252734927768413-06-12
32 bits: -5877752-05-08..5881469-05-27
16 bits:  1769-02-28..1948-08-04
 8 bits:  1858-07-12..1859-03-24

32 bits is supported by MicroHS; 16 and 8 bits aren't supported by
any known haskell version, but that could change in future.

For example, on 64 bit machines we got:

25252734927768413-06-12 PeriodData's max date
   (expenses)   1

25252734927768414-01-01 next year past PeriodData's max date
   (expenses)   2

$ hledger reg -O csv --yearly
"txnidx","date","code","description","account","amount","total"
"0","-25252734927764696-11-10","","","expenses","1","1"

Now it uses Integer (like the time package), fixing the bug.
And benchmarking shows memory and time usage slightly improved
(surprisingly; tested with up to 500 subperiods, eg
hledger -f examples/10ktxns-1kaccts.journal reg -1 cur:A -D >/dev/null)
This commit is contained in:
Simon Michael 2025-10-11 07:04:39 -10:00
parent 70e9e7b060
commit 438c4a0469
6 changed files with 28 additions and 39 deletions

View File

@ -44,7 +44,6 @@ module Hledger.Data.Account
import Control.Applicative ((<|>))
import Data.HashSet qualified as HS
import Data.HashMap.Strict qualified as HM
import Data.IntMap qualified as IM
import Data.List (find, sortOn)
#if !MIN_VERSION_base(4,20,0)
import Data.List (foldl')
@ -388,7 +387,7 @@ tests_Account = testGroup "Account" [
testCase "no postings, no days" $
accountFromPostings undefined [] @?= accountTree "root" []
,testCase "no postings, only 2000-01-01" $
allAccounts (all (\d -> (ModifiedJulianDay $ toInteger d) == fromGregorian 2000 01 01) . IM.keys . pdperiods . adata)
allAccounts (all (\d -> (ModifiedJulianDay $ toInteger d) == fromGregorian 2000 01 01) . M.keys . pdperiods . adata)
(accountFromPostings undefined []) @? "Not all adata have exactly 2000-01-01"
]
]

View File

@ -22,10 +22,10 @@ module Hledger.Data.DayPartition
, tests_DayPartition
) where
import qualified Data.IntMap.Strict as IM
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Time (Day(..), addDays, addGregorianMonthsClip, addGregorianYearsClip, fromGregorian)
import Data.Map qualified as M
import Data.Time (Day (..), addDays, addGregorianMonthsClip, addGregorianYearsClip, fromGregorian)
import Hledger.Data.Dates
import Hledger.Data.PeriodData
@ -122,13 +122,13 @@ unionDayPartitions (DayPartition (PeriodData h as)) (DayPartition (PeriodData h'
if equalIntersection as as' && isValidDayPartition union then Just union else Nothing
where
union = DayPartition . PeriodData (min h h') $ as <> as'
equalIntersection x y = and $ IM.intersectionWith (==) x y
equalIntersection x y = and $ M.intersectionWith (==) x y
-- | Get this DayPartition's overall start date and end date (both inclusive).
dayPartitionStartEnd :: DayPartition -> (Day, Day)
dayPartitionStartEnd (DayPartition (PeriodData _ ds)) =
-- Guaranteed not to error because the IntMap is non-empty.
(intToDay . fst $ IM.findMin ds, snd $ IM.findMax ds)
(intToDay . fst $ M.findMin ds, snd $ M.findMax ds)
-- | Find the start and end dates of the period within a 'DayPartition' which contains a given day.
-- If the day is after the end of the last period, it is assumed to be within the last period.

View File

@ -22,11 +22,11 @@ import Data.Aeson.Encode.Pretty (Config(..), Indent(..), NumberFormat(
--import Data.Aeson.TH
import Data.ByteString.Lazy qualified as BL
import Data.Decimal (DecimalRaw(..), roundTo)
import Data.IntMap qualified as IM
import Data.Maybe (fromMaybe)
import Data.Text.Lazy qualified as TL
import Data.Text.Lazy.Builder qualified as TB
import Data.Time (Day(..))
import Data.Map qualified as M
import Data.Time (Day (..))
import Text.Megaparsec (Pos, SourcePos, mkPos, unPos)
import Hledger.Data.Types
@ -161,7 +161,7 @@ instance ToJSON BalanceData
instance ToJSON a => ToJSON (PeriodData a) where
toJSON a = object
[ "pdpre" .= pdpre a
, "pdperiods" .= map (\(d, x) -> (ModifiedJulianDay (toInteger d), x)) (IM.toList $ pdperiods a)
, "pdperiods" .= map (\(d, x) -> (ModifiedJulianDay (toInteger d), x)) (M.toList $ pdperiods a)
]
instance ToJSON a => ToJSON (Account a) where
@ -227,7 +227,7 @@ instance FromJSON BalanceData
instance FromJSON a => FromJSON (PeriodData a) where
parseJSON = withObject "PeriodData" $ \v -> PeriodData
<$> v .: "pdpre"
<*> (IM.fromList . map (\(d, x) -> (fromInteger $ toModifiedJulianDay d, x)) <$> v .: "pdperiods")
<*> (M.fromList . map (\(d, x) -> (fromInteger $ toModifiedJulianDay d, x)) <$> v .: "pdperiods")
-- XXX The ToJSON instance replaces subaccounts with just names.
-- Here we should try to make use of those to reconstruct the

View File

@ -27,11 +27,11 @@ import Data.Foldable1 (Foldable1(..))
import Control.Applicative (liftA2)
#endif
import Data.Bifunctor (first)
import Data.IntMap.Strict qualified as IM
#if !MIN_VERSION_base(4,20,0)
import Data.List (foldl')
#endif
import Data.Time (Day(..), fromGregorian)
import Data.Map qualified as M
import Data.Time (Day (..), fromGregorian)
import Hledger.Data.Amount
import Hledger.Data.Types
@ -44,7 +44,7 @@ instance Show a => Show (PeriodData a) where
showString "PeriodData"
. showString "{ pdpre = " . shows h
. showString ", pdperiods = "
. showString "fromList " . shows (map (\(day, x) -> (intToDay day, x)) $ IM.toList ds)
. showString "fromList " . shows (map (\(day, x) -> (intToDay day, x)) $ M.toList ds)
. showChar '}'
instance Foldable PeriodData where
@ -66,24 +66,24 @@ instance Traversable PeriodData where
-- keys in the date map section. This may not be the result you want if the
-- keys are not identical.
instance Semigroup a => Semigroup (PeriodData a) where
PeriodData h1 as1 <> PeriodData h2 as2 = PeriodData (h1 <> h2) $ IM.unionWith (<>) as1 as2
PeriodData h1 as1 <> PeriodData h2 as2 = PeriodData (h1 <> h2) $ M.unionWith (<>) as1 as2
instance Monoid a => Monoid (PeriodData a) where
mempty = PeriodData mempty mempty
-- | Construct a 'PeriodData' from a historical data value and a list of (period start, period data) pairs.
periodDataFromList :: a -> [(Day, a)] -> PeriodData a
periodDataFromList h = PeriodData h . IM.fromList . map (\(d, a) -> (dayToInt d, a))
periodDataFromList h = PeriodData h . M.fromList . map (\(d, a) -> (dayToInt d, a))
-- | Convert 'PeriodData' to a historical data value and a list of (period start, period data) pairs.
periodDataToList :: PeriodData a -> (a, [(Day, a)])
periodDataToList (PeriodData h as) = (h, map (\(s, e) -> (intToDay s, e)) $ IM.toList as)
periodDataToList (PeriodData h as) = (h, map (\(s, e) -> (intToDay s, e)) $ M.toList as)
-- | Get the data for the period containing the given 'Day', and that period's start date.
-- If the day is after the end of the last period, it is assumed to be within the last period.
-- If the day is before the start of the first period (ie, in the historical period), return Nothing.
lookupPeriodData :: Day -> PeriodData a -> Maybe (Day, a)
lookupPeriodData d (PeriodData _ as) = first intToDay <$> IM.lookupLE (dayToInt d) as
lookupPeriodData d (PeriodData _ as) = first intToDay <$> M.lookupLE (dayToInt d) as
-- | Get the data for the period containing the given 'Day', and that period's start date.
-- If the day is after the end of the last period, it is assumed to be within the last period.
@ -98,14 +98,14 @@ lookupPeriodDataOrHistorical d pd@(PeriodData h _) = case lookupPeriodData d pd
insertPeriodData :: Semigroup a => Maybe Day -> a -> PeriodData a -> PeriodData a
insertPeriodData mday b balances = case mday of
Nothing -> balances{pdpre = pdpre balances <> b}
Just day -> balances{pdperiods = IM.insertWith (<>) (dayToInt day) b $ pdperiods balances}
Just day -> balances{pdperiods = M.insertWith (<>) (dayToInt day) b $ pdperiods balances}
-- | Merge two 'PeriodData', using the given operation to combine their data values.
--
-- This will drop keys if they are not present in both 'PeriodData'.
opPeriodData :: (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c
opPeriodData f (PeriodData h1 as1) (PeriodData h2 as2) =
PeriodData (f h1 h2) $ IM.intersectionWith f as1 as2
PeriodData (f h1 h2) $ M.intersectionWith f as1 as2
-- | Merge two 'PeriodData', using the given operations for combining data
-- that's only in the first, only in the second, or in both, respectively.
@ -113,7 +113,7 @@ mergePeriodData :: (a -> c) -> (b -> c) -> (a -> b -> c) -> PeriodData a -> Peri
mergePeriodData only1 only2 f = \(PeriodData h1 as1) (PeriodData h2 as2) ->
PeriodData (f h1 h2) $ merge as1 as2
where
merge = IM.mergeWithKey (\_ x y -> Just $ f x y) (fmap only1) (fmap only2)
merge = M.mergeWithKey (\_ x y -> Just $ f x y) (fmap only1) (fmap only2)
-- | Pad out the date map of a 'PeriodData' so that every key from another 'PeriodData' is present.
padPeriodData :: a -> PeriodData b -> PeriodData a -> PeriodData a

View File

@ -40,7 +40,6 @@ import Data.Bifunctor (first)
import Data.Decimal (Decimal, DecimalRaw(..))
import Data.Default (Default(..))
import Data.Functor (($>))
import Data.IntMap.Strict qualified as IM
import Data.List (intercalate, sortBy)
--XXX https://hackage.haskell.org/package/containers/docs/Data-Map.html
--Note: You should use Data.Map.Strict instead of this module if:
@ -754,20 +753,11 @@ data Account a = Account {
-- contiguous report (sub)periods, and with the (open ended) pre-report period.
-- The report periods are typically all the same length, but need not be.
--
-- Report periods are represented only by their start dates, used as the keys of an 'IntMap'.
-- Like the Integer inside the Day type, these Int keys are a count of days before or after 1858-11-17.
--
-- Note the use of Int limits the dates this type can represent.
-- On a 64 bit machine, the range is about 25 quadrillion years into past and future
-- (-25252734927764696-04-22 to 25252734927768413-06-12).
-- On a 32 bit machine, it is about 5 million years into past and future
-- (-5877752-05-08 to 5881469-05-27).
-- Exceeding the machine's Int range here will silently wrap around,
-- causing this type (and periodic reports) to give wrong results.
--
-- Report periods are represented only by their start dates, used as the keys of a Map.
-- (As integers, like the one inside the Day type, representing days before/after 1858-11-17.)
data PeriodData a = PeriodData {
pdpre :: a -- ^ data for the period before the report
,pdperiods :: IM.IntMap a -- ^ data for each period within the report
pdpre :: a -- ^ data for the period before the report
,pdperiods :: M.Map Integer a -- ^ data for each period within the report
} deriving (Eq, Ord, Functor, Generic)
-- | Data that's useful in "balance" reports:

View File

@ -39,10 +39,10 @@ import Control.Applicative (liftA2)
#endif
import Control.Monad (guard)
import Data.Foldable (toList)
import Data.HashSet qualified as HS
import Data.List (sortOn)
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.HashSet qualified as HS
import Data.IntMap.Strict qualified as IM
import Data.Map qualified as M
import Data.Maybe (fromMaybe, isJust)
import Data.Ord (Down(..))
import Data.Semigroup (sconcat)
@ -394,7 +394,7 @@ generatePeriodicReport makeRow treeAmt flatAmt ropts colspans acct =
amt = mixedAmountStripCosts . sortKey . fmap treeAmt . pdperiods . adata
sortKey = case balanceaccum_ ropts of
PerPeriod -> maSum
_ -> maybe nullmixedamt snd . IM.lookupMax
_ -> maybe nullmixedamt snd . M.lookupMax
sortFlatByAmount = case fromMaybe NormallyPositive $ normalbalance_ ropts of
NormallyPositive -> sortOn (\r -> (Down $ amt r, prrFullName r))
@ -454,7 +454,7 @@ makeMultiBalanceReportRow = makePeriodicReportRow nullmixedamt sumAndAverageMixe
-- | Build a report row.
--
-- Calculate the column totals. These are always the sum of column amounts.
makePeriodicReportRow :: c -> (IM.IntMap c -> (c, c))
makePeriodicReportRow :: c -> (M.Map Integer c -> (c, c))
-> ReportOpts -> (b -> c)
-> a -> Account b -> PeriodicReportRow a c
makePeriodicReportRow nullEntry totalAndAverage ropts balance name acct =
@ -465,7 +465,7 @@ makePeriodicReportRow nullEntry totalAndAverage ropts balance name acct =
-- Total for a cumulative/historical report is always the last column.
rowtotal = case balanceaccum_ ropts of
PerPeriod -> total
_ -> maybe nullEntry snd $ IM.lookupMax rowbals
_ -> maybe nullEntry snd $ M.lookupMax rowbals
-- | Map the report rows to percentages if needed
reportPercent :: ReportOpts -> MultiBalanceReport -> MultiBalanceReport