From 8779f2481adf9ecdc359d296e15887d28c3b1383 Mon Sep 17 00:00:00 2001
From: Simon Michael <simon@joyful.com>
Date: Thu, 9 Oct 2025 17:07:59 -1000
Subject: [PATCH] ;dev: PeriodData, DayPartition: haddock updates

Clarify some things. Also note an example of PeriodData wrapping around.
---
 examples/farfuture.j                     |  20 +++
 hledger-lib/Hledger/Data/DayPartition.hs | 148 ++++++++++++-----------
 hledger-lib/Hledger/Data/PeriodData.hs   |  43 +++----
 hledger-lib/Hledger/Data/Types.hs        |  23 ++--
 4 files changed, 136 insertions(+), 98 deletions(-)
 create mode 100644 examples/farfuture.j

diff --git a/examples/farfuture.j b/examples/farfuture.j
new file mode 100644
index 000000000..63d580607
--- /dev/null
+++ b/examples/farfuture.j
@@ -0,0 +1,20 @@
+; Testing behaviour at PeriodData's limit.
+; PeriodData is used in multi-period reports.
+; On a 64 bit machine it can work with dates from -25252734927764696-04-22 to 25252734927768413-06-12.
+; Dates outside that range wrap around, giving wrong results:
+;
+; $ hledger -f examples/farfuture.j reg -O csv -Y
+; "txnidx","date","code","description","account","amount","total"
+; "0","-25252734927764696-11-10","","","expenses","6","6"
+
+25252734927768413-06-12 PeriodData's max date
+   (expenses)   1
+
+25252734927768413-06-13 one day past PeriodData's max date
+   (expenses)   2
+
+25252734927768413-12-01 farther past PeriodData's max date, same year
+   (expenses)   3
+
+25252734927768414-01-01 next year past PeriodData's max date
+   (expenses)   4
diff --git a/hledger-lib/Hledger/Data/DayPartition.hs b/hledger-lib/Hledger/Data/DayPartition.hs
index b4d23adc7..959263cea 100644
--- a/hledger-lib/Hledger/Data/DayPartition.hs
+++ b/hledger-lib/Hledger/Data/DayPartition.hs
@@ -3,22 +3,22 @@ A partition of time into contiguous spans, for defining reporting periods.
 -}
 module Hledger.Data.DayPartition
 ( DayPartition
+-- * constructors
 , boundariesToDayPartition
 , boundariesToMaybeDayPartition
-
-, lookupDayPartition
-, unionDayPartitions
-
+-- * conversions
 , dayPartitionToNonEmpty
 , dayPartitionToList
-, dayPartitionSpans
 , dayPartitionToDateSpans
 , dayPartitionToPeriodData
 , maybeDayPartitionToDateSpans
-
+-- * operations
+, unionDayPartitions
+, dayPartitionSpans
+, lookupDayPartition
 , splitSpan
 , intervalBoundaryBefore
-
+-- * tests
 , tests_DayPartition
 ) where
 
@@ -33,97 +33,110 @@ import Hledger.Data.Types
 import Hledger.Utils
 
 
--- | A partition of time into contiguous spans, along with a historical period
--- before any of the spans.
+-- | A partition of time into one or more contiguous periods,
+-- plus a historical period that precedes them.
+-- Note 'DayPartition' does not store per-period data - only the periods' start/end dates.
+
+-- Each period is at least one day in length.
+-- The historical period is open ended, with no start date.
+-- The last period has an end date, but note some queries (like 'dayPartitionFind') ignore that, acting as if the last period is open ended.
+-- Only smart constructors are exported, so that a DayPartition always satisfies these invariants.
 --
--- This is a newtype wrapper around 'PeriodData Day', where the start dates are
--- the keys and the end dates are the values. Spans are stored in inclusive format
--- [start, end]. Note that this differs from 'DateSpan' which uses [start, end)
--- format.
+-- This is implemented as a newtype wrapper around 'PeriodData Day', which is a map from date to date.
+-- The map's keys are the period start dates, and the values are the corresponding period end dates.
+-- Note unlike 'DateSpan', which stores exclusive end dates ( @[start, end)@ ),
+-- here both start and end dates are inclusive ( @[start, end]@ ).
 --
--- The constructor is not exported so that we can ensure the spans are valid
--- partitions of time.
 newtype DayPartition = DayPartition { dayPartitionToPeriodData :: PeriodData Day } deriving (Eq, Ord, Show)
 
--- Developer's note. All constructors must guarantee that:
---   1. The value stored in pdperiods has at least one key.
---   2. The value stored in pdpre equals one day before the smallest key in pdperiods.
---   3. The value stored in each entry of pdperiods equals one day before the
---      next largest key, except for the value associated to the largest key.
-isValidDayPartition :: DayPartition -> Bool
-isValidDayPartition (DayPartition pd) = case ds of
-    [] -> False  -- Must be at least one key in pdperiods
-    xs -> and $ zipWith isContiguous ((nulldate, h) : xs) xs
-  where
-    (h, ds) = periodDataToList pd
-    isContiguous (_, e) (s, _) = addDays 1 e == s
 
+-- constructors:
 
--- | Construct a 'DayPartition' from a non-empty list of boundary days.
+-- | Construct a 'DayPartition' from a non-empty list of period boundary dates (start dates plus a final exclusive end date).
+--
+-- >>> boundariesToDayPartition (fromGregorian 2025 01 01 :| [fromGregorian 2025 02 01])
+-- DayPartition {dayPartitionToPeriodData = PeriodData{ pdpre = 2024-12-31, pdperiods = fromList [(2025-01-01,2025-01-31)]}}
+--
 boundariesToDayPartition :: NonEmpty Day -> DayPartition
 boundariesToDayPartition xs = DayPartition . periodDataFromList (addDays (-1) b) $ case bs of
     []  -> [(b, b)]  -- If only one boundary is supplied, it ends on the same day
     _:_ -> zip (b:bs) $ map (addDays (-1)) bs  -- Guaranteed non-empty
   where b:|bs = NE.nub $ NE.sort xs
 
--- | Construct a 'DayPartition' from a list of boundary days, returning
--- 'Nothing' for the empty list.
+-- | Construct a 'DayPartition' from a list of period boundary dates (start dates plus a final exclusive end date),
+-- if it's a non-empty list.
 boundariesToMaybeDayPartition :: [Day] -> Maybe DayPartition
 boundariesToMaybeDayPartition = fmap boundariesToDayPartition . NE.nonEmpty
 
 
--- | Find the span of a 'DayPartition' which contains a given day.
-lookupDayPartition :: Day -> DayPartition -> (Maybe Day, Day)
-lookupDayPartition d (DayPartition xs) = lookupPeriodDataOrHistorical d xs
+-- conversions:
 
--- | Return the union of two 'DayPartition's if they are consistent, or 'Nothing' otherwise.
-unionDayPartitions :: DayPartition -> DayPartition -> Maybe DayPartition
-unionDayPartitions (DayPartition (PeriodData h as)) (DayPartition (PeriodData h' as')) =
-    if equalIntersection as as' && isValidDayPartition union then Just union else Nothing
-  where
-    union = DayPartition . PeriodData (min h h') $ as <> as'
-    equalIntersection x y = and $ IM.intersectionWith (==) x y
-
-
--- | Convert 'DayPartition' to a non-empty list of start and end dates for the periods.
---
--- Note that the end date of each period will be one day before the start date
--- of the next period.
+-- | Convert 'DayPartition' to a non-empty list of period start and end dates (both inclusive).
+-- Each end date will be one day before the next period's start date.
 dayPartitionToNonEmpty :: DayPartition -> NonEmpty (Day, Day)
 dayPartitionToNonEmpty (DayPartition xs) = NE.fromList . snd $ periodDataToList xs  -- Constructors guarantee this is non-empty
 
--- | Convert 'DayPartition' to a list of start and end dates for the periods.
---
--- Note that the end date of each period will be one day before the start date
--- of the next period.
+-- | Convert 'DayPartition' to a list (which will always be non-empty) of period start and end dates (both inclusive).
+-- Each end date will be one day before the next period's start date.
 dayPartitionToList :: DayPartition -> [(Day, Day)]
 dayPartitionToList = NE.toList . dayPartitionToNonEmpty
 
--- | Return the whole day range spanned by a `PeriodData Day`.
-dayPartitionSpans :: DayPartition -> (Day, Day)
-dayPartitionSpans (DayPartition (PeriodData _ ds)) =
-    -- Guaranteed not to error because the IntMap in non-empty.
-    (intToDay . fst $ IM.findMin ds, snd $ IM.findMax ds)
-
 -- | Convert 'DayPartition' to a list of 'DateSpan's.
---
--- Note that the end date of each period will be equal to the start date of
--- the next period.
+-- Each span will end one day before the next span begins
+-- (the span's exclusive end date will be equal to the next span's start date).
 dayPartitionToDateSpans :: DayPartition -> [DateSpan]
 dayPartitionToDateSpans = map toDateSpan . dayPartitionToList
   where
     toDateSpan (s, e) = DateSpan (toEFDay s) (toEFDay $ addDays 1 e)
     toEFDay = Just . Exact
 
--- Convert a periodic report 'Maybe DayPartition' to a list of 'DateSpans',
--- replacing the empty case with an appropriate placeholder.
---
--- Note that the end date of each period will be equal to the start date of
--- the next period.
+-- Convert a 'Maybe DayPartition' to a list of one or more 'DateSpans'.
+-- Each span will end one day before the next span begins
+-- (the span's exclusive end date will be equal to the next span's start date).
+-- If given Nothing, it returns a single open-ended span.
 maybeDayPartitionToDateSpans :: Maybe DayPartition -> [DateSpan]
 maybeDayPartitionToDateSpans = maybe [DateSpan Nothing Nothing] dayPartitionToDateSpans
 
 
+-- operations:
+
+-- | Check that a DayPartition has been constructed correctly,
+-- with internal invariants satisfied, as well as the external ones described in 'DayPartition'.
+-- Internally, all constructors must guarantee:
+-- 1. The pdperiods map contains at least one key and value.
+-- 2. The value stored in pdpre is one day before pdperiods' smallest key.
+-- 3. Each value stored in pdperiods is one day before the next largest key,
+--    (except for the value associated with the largest key).
+isValidDayPartition :: DayPartition -> Bool
+isValidDayPartition (DayPartition pd) = case ds of
+  [] -> False
+  xs -> and $ zipWith isContiguous ((nulldate, h) : xs) xs
+ where
+  (h, ds) = periodDataToList pd
+  isContiguous (_, e) (s, _) = addDays 1 e == s
+
+-- | Return the union of two 'DayPartition's if that is a valid 'DayPartition',
+-- or 'Nothing' otherwise.
+unionDayPartitions :: DayPartition -> DayPartition -> Maybe DayPartition
+unionDayPartitions (DayPartition (PeriodData h as)) (DayPartition (PeriodData h' as')) =
+  if equalIntersection as as' && isValidDayPartition union then Just union else Nothing
+ where
+  union = DayPartition . PeriodData (min h h') $ as <> as'
+  equalIntersection x y = and $ IM.intersectionWith (==) x y
+
+-- | Get this DayPartition's overall start date and end date (both inclusive).
+dayPartitionSpans :: DayPartition -> (Day, Day)
+dayPartitionSpans (DayPartition (PeriodData _ ds)) =
+  -- Guaranteed not to error because the IntMap is non-empty.
+  (intToDay . fst $ IM.findMin ds, snd $ IM.findMax ds)
+
+lookupDayPartition :: Day -> DayPartition -> (Maybe Day, Day)
+lookupDayPartition d (DayPartition xs) = lookupPeriodDataOrHistorical d xs
+-- | Find the start and end dates of the period within a 'DayPartition' which contains a given day.
+-- If the day is after the end of the last period, it is assumed to be within the last period.
+-- If the day is before the start of the first period (ie, in the historical period),
+-- only the historical period's end date is returned.
+
 -- | Split a 'DateSpan' into a 'DayPartition' consisting of consecutive exact
 -- spans of the specified Interval, or `Nothing` if the span is invalid.
 -- If no interval is specified, the original span is returned.
@@ -193,7 +206,6 @@ splitSpan _      (DaysOfWeek days@(n:_))  ds = do
         starts = map (\d -> addDays (toInteger $ d - n) $ nthdayofweekcontaining n s) days
     spansFromBoundaries e bdrys
 
-
 -- | Fill in missing start/end dates for calculating 'splitSpan'.
 dateSpanSplitLimits :: (Day -> Day) -> (Day -> Day) -> DateSpan -> Maybe (Day, Day)
 dateSpanSplitLimits _     _    (DateSpan Nothing   Nothing) = Nothing
@@ -221,8 +233,7 @@ spansFromBoundaries _ []     = Nothing
 spansFromBoundaries e (x:_)  | x >= e = Nothing
 spansFromBoundaries e (x:xs) = Just . boundariesToDayPartition $ takeUntilFailsNE (<e) (x:|xs)
 
-
--- Get the natural start for the given interval that falls on or before the given day,
+-- | Get the natural start for the given interval that falls on or before the given day,
 -- when applicable. Works for Weeks, Months, Quarters, Years, eg.
 intervalBoundaryBefore :: Interval -> Day -> Day
 intervalBoundaryBefore i d =
@@ -230,10 +241,11 @@ intervalBoundaryBefore i d =
     Just ((start, _) :| _ ) -> start
     _ -> d
 
-
 intToDay = ModifiedJulianDay . toInteger
 
 
+-- tests:
+
 tests_DayPartition =
   testGroup "splitSpan" [
       testCase "weekday" $ do
diff --git a/hledger-lib/Hledger/Data/PeriodData.hs b/hledger-lib/Hledger/Data/PeriodData.hs
index 162897c09..98298c978 100644
--- a/hledger-lib/Hledger/Data/PeriodData.hs
+++ b/hledger-lib/Hledger/Data/PeriodData.hs
@@ -62,7 +62,7 @@ instance Foldable1 PeriodData where
 instance Traversable PeriodData where
   traverse f (PeriodData h as) = liftA2 PeriodData (f h) $ traverse f as
 
--- | The Semigroup instance for 'AccountBalance' will simply take the union of
+-- | The Semigroup instance for 'PeriodData' simply takes the union of
 -- keys in the date map section. This may not be the result you want if the
 -- keys are not identical.
 instance Semigroup a => Semigroup (PeriodData a) where
@@ -71,57 +71,54 @@ instance Semigroup a => Semigroup (PeriodData a) where
 instance Monoid a => Monoid (PeriodData a) where
   mempty = PeriodData mempty mempty
 
--- | Construct an 'PeriodData' from a list.
+-- | Construct a 'PeriodData' from a historical data value and a list of (period start, period data) pairs.
 periodDataFromList :: a -> [(Day, a)] -> PeriodData a
 periodDataFromList h = PeriodData h . IM.fromList . map (\(d, a) -> (dayToInt d, a))
 
--- | Convert 'PeriodData' to a list of pairs.
+-- | Convert 'PeriodData' to a historical data value and a list of (period start, period data) pairs.
 periodDataToList :: PeriodData a -> (a, [(Day, a)])
 periodDataToList (PeriodData h as) = (h, map (\(s, e) -> (intToDay s, e)) $ IM.toList as)
 
-
--- | Get account balance information for the period containing a given 'Day',
--- along with the start of the period, or 'Nothing' if this day lies in the
--- historical period.
+-- | Get the data for the period containing the given 'Day', and that period's start date.
+-- If the day is after the end of the last period, it is assumed to be within the last period.
+-- If the day is before the start of the first period (ie, in the historical period), return Nothing.
 lookupPeriodData :: Day -> PeriodData a -> Maybe (Day, a)
 lookupPeriodData d (PeriodData _ as) = first intToDay <$> IM.lookupLE (dayToInt d) as
 
--- | Get account balance information for the period containing a given 'Day'
--- or the historical data if this day lies in the historical period, along with
--- the start of the period or 'Nothing' if it lies in the historical period.
+-- | Get the data for the period containing the given 'Day', and that period's start date.
+-- If the day is after the end of the last period, it is assumed to be within the last period.
+-- If the day is before the start of the first period (ie, in the historical period),
+-- return the data for the historical period and no start date.
 lookupPeriodDataOrHistorical :: Day -> PeriodData a -> (Maybe Day, a)
 lookupPeriodDataOrHistorical d pd@(PeriodData h _) = case lookupPeriodData d pd of
-    Nothing     -> (Nothing, h)
-    Just (a, b) -> (Just a, b)
+  Nothing     -> (Nothing, h)
+  Just (a, b) -> (Just a, b)
 
--- | Add account balance information to the appropriate location in 'PeriodData'.
+-- | Set historical or period data in the appropriate location in a 'PeriodData'.
 insertPeriodData :: Semigroup a => Maybe Day -> a -> PeriodData a -> PeriodData a
 insertPeriodData mday b balances = case mday of
     Nothing  -> balances{pdpre = pdpre balances <> b}
     Just day -> balances{pdperiods = IM.insertWith (<>) (dayToInt day) b $ pdperiods balances}
 
--- | Merges two 'PeriodData', using the given operation to combine their balance information.
+-- | Merge two 'PeriodData', using the given operation to combine their data values.
 --
 -- This will drop keys if they are not present in both 'PeriodData'.
 opPeriodData :: (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c
 opPeriodData f (PeriodData h1 as1) (PeriodData h2 as2) =
-    PeriodData (f h1 h2) $ IM.intersectionWith f as1 as2
+  PeriodData (f h1 h2) $ IM.intersectionWith f as1 as2
 
--- | Merges two 'PeriodData', using the given operations for balance
--- information only in the first, only in the second, or in both
--- 'PeriodData', respectively.
-mergePeriodData :: (a -> c) -> (b -> c) -> (a -> b -> c)
-                -> PeriodData a -> PeriodData b -> PeriodData c
+-- | Merge two 'PeriodData', using the given operations for combining data
+-- that's only in the first, only in the second, or in both, respectively.
+mergePeriodData :: (a -> c) -> (b -> c) -> (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c
 mergePeriodData only1 only2 f = \(PeriodData h1 as1) (PeriodData h2 as2) ->
-    PeriodData (f h1 h2) $ merge as1 as2
+  PeriodData (f h1 h2) $ merge as1 as2
   where
     merge = IM.mergeWithKey (\_ x y -> Just $ f x y) (fmap only1) (fmap only2)
 
--- | Pad out the datemap of a 'PeriodData' so that every key from another 'PeriodData' is present.
+-- | Pad out the date map of a 'PeriodData' so that every key from another 'PeriodData' is present.
 padPeriodData :: a -> PeriodData b -> PeriodData a -> PeriodData a
 padPeriodData x pad bal = bal{pdperiods = pdperiods bal <> (x <$ pdperiods pad)}
 
-
 intToDay = ModifiedJulianDay . toInteger
 dayToInt = fromInteger . toModifiedJulianDay
 
diff --git a/hledger-lib/Hledger/Data/Types.hs b/hledger-lib/Hledger/Data/Types.hs
index 4d2e3627d..44abe0c9f 100644
--- a/hledger-lib/Hledger/Data/Types.hs
+++ b/hledger-lib/Hledger/Data/Types.hs
@@ -750,15 +750,24 @@ data Account a = Account {
   ,adata                     :: PeriodData a       -- ^ associated data per report period
   } deriving (Generic, Functor)
 
--- | A general container for storing data values associated to zero or more
--- report periods, and for the pre-report period. Report periods are assumed to
--- be contiguous, and represented only by start dates.
+-- | A general container for storing data values associated with zero or more
+-- contiguous report (sub)periods, and with the (open ended) pre-report period.
+-- The report periods are typically all the same length, but need not be.
+--
+-- Report periods are represented only by their start dates, used as the keys of an 'IntMap'.
+-- Like the Integer inside the Day type, these Int keys are a count of days before or after 1858-11-17.
+--
+-- Note the use of Int limits the dates this type can represent.
+-- On a 64 bit machine, the range is about 25 quadrillion years into past and future
+-- (-25252734927764696-04-22 to 25252734927768413-06-12).
+-- On a 32 bit machine, it is about 5 million years into past and future
+-- (-5877752-05-08 to 5881469-05-27).
+-- Exceeding the machine's Int range here will silently wrap around,
+-- causing this type (and periodic reports) to give wrong results.
 --
--- Data is stored in an 'IntMap' for efficiency, where Days are stored as as
--- Int representing the underlying modified Julian date.
 data PeriodData a = PeriodData {
-   pdpre     :: a            -- ^ data from the pre-report period (e.g. historical balances)
-  ,pdperiods :: IM.IntMap a  -- ^ data for the periods
+   pdpre     :: a            -- ^ data for the period before the report
+  ,pdperiods :: IM.IntMap a  -- ^ data for each period within the report
   } deriving (Eq, Ord, Functor, Generic)
 
 -- | Data that's useful in "balance" reports: