;dev: PeriodData, DayPartition: haddock updates

Clarify some things. Also note an example of PeriodData wrapping around.
This commit is contained in:
Simon Michael 2025-10-09 17:07:59 -10:00
parent cef35b97bd
commit 8779f2481a
4 changed files with 136 additions and 98 deletions

20
examples/farfuture.j Normal file
View File

@ -0,0 +1,20 @@
; Testing behaviour at PeriodData's limit.
; PeriodData is used in multi-period reports.
; On a 64 bit machine it can work with dates from -25252734927764696-04-22 to 25252734927768413-06-12.
; Dates outside that range wrap around, giving wrong results:
;
; $ hledger -f examples/farfuture.j reg -O csv -Y
; "txnidx","date","code","description","account","amount","total"
; "0","-25252734927764696-11-10","","","expenses","6","6"
25252734927768413-06-12 PeriodData's max date
(expenses) 1
25252734927768413-06-13 one day past PeriodData's max date
(expenses) 2
25252734927768413-12-01 farther past PeriodData's max date, same year
(expenses) 3
25252734927768414-01-01 next year past PeriodData's max date
(expenses) 4

View File

@ -3,22 +3,22 @@ A partition of time into contiguous spans, for defining reporting periods.
-} -}
module Hledger.Data.DayPartition module Hledger.Data.DayPartition
( DayPartition ( DayPartition
-- * constructors
, boundariesToDayPartition , boundariesToDayPartition
, boundariesToMaybeDayPartition , boundariesToMaybeDayPartition
-- * conversions
, lookupDayPartition
, unionDayPartitions
, dayPartitionToNonEmpty , dayPartitionToNonEmpty
, dayPartitionToList , dayPartitionToList
, dayPartitionSpans
, dayPartitionToDateSpans , dayPartitionToDateSpans
, dayPartitionToPeriodData , dayPartitionToPeriodData
, maybeDayPartitionToDateSpans , maybeDayPartitionToDateSpans
-- * operations
, unionDayPartitions
, dayPartitionSpans
, lookupDayPartition
, splitSpan , splitSpan
, intervalBoundaryBefore , intervalBoundaryBefore
-- * tests
, tests_DayPartition , tests_DayPartition
) where ) where
@ -33,97 +33,110 @@ import Hledger.Data.Types
import Hledger.Utils import Hledger.Utils
-- | A partition of time into contiguous spans, along with a historical period -- | A partition of time into one or more contiguous periods,
-- before any of the spans. -- plus a historical period that precedes them.
-- Note 'DayPartition' does not store per-period data - only the periods' start/end dates.
-- Each period is at least one day in length.
-- The historical period is open ended, with no start date.
-- The last period has an end date, but note some queries (like 'dayPartitionFind') ignore that, acting as if the last period is open ended.
-- Only smart constructors are exported, so that a DayPartition always satisfies these invariants.
-- --
-- This is a newtype wrapper around 'PeriodData Day', where the start dates are -- This is implemented as a newtype wrapper around 'PeriodData Day', which is a map from date to date.
-- the keys and the end dates are the values. Spans are stored in inclusive format -- The map's keys are the period start dates, and the values are the corresponding period end dates.
-- [start, end]. Note that this differs from 'DateSpan' which uses [start, end) -- Note unlike 'DateSpan', which stores exclusive end dates ( @[start, end)@ ),
-- format. -- here both start and end dates are inclusive ( @[start, end]@ ).
-- --
-- The constructor is not exported so that we can ensure the spans are valid
-- partitions of time.
newtype DayPartition = DayPartition { dayPartitionToPeriodData :: PeriodData Day } deriving (Eq, Ord, Show) newtype DayPartition = DayPartition { dayPartitionToPeriodData :: PeriodData Day } deriving (Eq, Ord, Show)
-- Developer's note. All constructors must guarantee that:
-- 1. The value stored in pdperiods has at least one key.
-- 2. The value stored in pdpre equals one day before the smallest key in pdperiods.
-- 3. The value stored in each entry of pdperiods equals one day before the
-- next largest key, except for the value associated to the largest key.
isValidDayPartition :: DayPartition -> Bool
isValidDayPartition (DayPartition pd) = case ds of
[] -> False -- Must be at least one key in pdperiods
xs -> and $ zipWith isContiguous ((nulldate, h) : xs) xs
where
(h, ds) = periodDataToList pd
isContiguous (_, e) (s, _) = addDays 1 e == s
-- constructors:
-- | Construct a 'DayPartition' from a non-empty list of boundary days. -- | Construct a 'DayPartition' from a non-empty list of period boundary dates (start dates plus a final exclusive end date).
--
-- >>> boundariesToDayPartition (fromGregorian 2025 01 01 :| [fromGregorian 2025 02 01])
-- DayPartition {dayPartitionToPeriodData = PeriodData{ pdpre = 2024-12-31, pdperiods = fromList [(2025-01-01,2025-01-31)]}}
--
boundariesToDayPartition :: NonEmpty Day -> DayPartition boundariesToDayPartition :: NonEmpty Day -> DayPartition
boundariesToDayPartition xs = DayPartition . periodDataFromList (addDays (-1) b) $ case bs of boundariesToDayPartition xs = DayPartition . periodDataFromList (addDays (-1) b) $ case bs of
[] -> [(b, b)] -- If only one boundary is supplied, it ends on the same day [] -> [(b, b)] -- If only one boundary is supplied, it ends on the same day
_:_ -> zip (b:bs) $ map (addDays (-1)) bs -- Guaranteed non-empty _:_ -> zip (b:bs) $ map (addDays (-1)) bs -- Guaranteed non-empty
where b:|bs = NE.nub $ NE.sort xs where b:|bs = NE.nub $ NE.sort xs
-- | Construct a 'DayPartition' from a list of boundary days, returning -- | Construct a 'DayPartition' from a list of period boundary dates (start dates plus a final exclusive end date),
-- 'Nothing' for the empty list. -- if it's a non-empty list.
boundariesToMaybeDayPartition :: [Day] -> Maybe DayPartition boundariesToMaybeDayPartition :: [Day] -> Maybe DayPartition
boundariesToMaybeDayPartition = fmap boundariesToDayPartition . NE.nonEmpty boundariesToMaybeDayPartition = fmap boundariesToDayPartition . NE.nonEmpty
-- | Find the span of a 'DayPartition' which contains a given day. -- conversions:
lookupDayPartition :: Day -> DayPartition -> (Maybe Day, Day)
lookupDayPartition d (DayPartition xs) = lookupPeriodDataOrHistorical d xs
-- | Return the union of two 'DayPartition's if they are consistent, or 'Nothing' otherwise. -- | Convert 'DayPartition' to a non-empty list of period start and end dates (both inclusive).
unionDayPartitions :: DayPartition -> DayPartition -> Maybe DayPartition -- Each end date will be one day before the next period's start date.
unionDayPartitions (DayPartition (PeriodData h as)) (DayPartition (PeriodData h' as')) =
if equalIntersection as as' && isValidDayPartition union then Just union else Nothing
where
union = DayPartition . PeriodData (min h h') $ as <> as'
equalIntersection x y = and $ IM.intersectionWith (==) x y
-- | Convert 'DayPartition' to a non-empty list of start and end dates for the periods.
--
-- Note that the end date of each period will be one day before the start date
-- of the next period.
dayPartitionToNonEmpty :: DayPartition -> NonEmpty (Day, Day) dayPartitionToNonEmpty :: DayPartition -> NonEmpty (Day, Day)
dayPartitionToNonEmpty (DayPartition xs) = NE.fromList . snd $ periodDataToList xs -- Constructors guarantee this is non-empty dayPartitionToNonEmpty (DayPartition xs) = NE.fromList . snd $ periodDataToList xs -- Constructors guarantee this is non-empty
-- | Convert 'DayPartition' to a list of start and end dates for the periods. -- | Convert 'DayPartition' to a list (which will always be non-empty) of period start and end dates (both inclusive).
-- -- Each end date will be one day before the next period's start date.
-- Note that the end date of each period will be one day before the start date
-- of the next period.
dayPartitionToList :: DayPartition -> [(Day, Day)] dayPartitionToList :: DayPartition -> [(Day, Day)]
dayPartitionToList = NE.toList . dayPartitionToNonEmpty dayPartitionToList = NE.toList . dayPartitionToNonEmpty
-- | Return the whole day range spanned by a `PeriodData Day`.
dayPartitionSpans :: DayPartition -> (Day, Day)
dayPartitionSpans (DayPartition (PeriodData _ ds)) =
-- Guaranteed not to error because the IntMap in non-empty.
(intToDay . fst $ IM.findMin ds, snd $ IM.findMax ds)
-- | Convert 'DayPartition' to a list of 'DateSpan's. -- | Convert 'DayPartition' to a list of 'DateSpan's.
-- -- Each span will end one day before the next span begins
-- Note that the end date of each period will be equal to the start date of -- (the span's exclusive end date will be equal to the next span's start date).
-- the next period.
dayPartitionToDateSpans :: DayPartition -> [DateSpan] dayPartitionToDateSpans :: DayPartition -> [DateSpan]
dayPartitionToDateSpans = map toDateSpan . dayPartitionToList dayPartitionToDateSpans = map toDateSpan . dayPartitionToList
where where
toDateSpan (s, e) = DateSpan (toEFDay s) (toEFDay $ addDays 1 e) toDateSpan (s, e) = DateSpan (toEFDay s) (toEFDay $ addDays 1 e)
toEFDay = Just . Exact toEFDay = Just . Exact
-- Convert a periodic report 'Maybe DayPartition' to a list of 'DateSpans', -- Convert a 'Maybe DayPartition' to a list of one or more 'DateSpans'.
-- replacing the empty case with an appropriate placeholder. -- Each span will end one day before the next span begins
-- -- (the span's exclusive end date will be equal to the next span's start date).
-- Note that the end date of each period will be equal to the start date of -- If given Nothing, it returns a single open-ended span.
-- the next period.
maybeDayPartitionToDateSpans :: Maybe DayPartition -> [DateSpan] maybeDayPartitionToDateSpans :: Maybe DayPartition -> [DateSpan]
maybeDayPartitionToDateSpans = maybe [DateSpan Nothing Nothing] dayPartitionToDateSpans maybeDayPartitionToDateSpans = maybe [DateSpan Nothing Nothing] dayPartitionToDateSpans
-- operations:
-- | Check that a DayPartition has been constructed correctly,
-- with internal invariants satisfied, as well as the external ones described in 'DayPartition'.
-- Internally, all constructors must guarantee:
-- 1. The pdperiods map contains at least one key and value.
-- 2. The value stored in pdpre is one day before pdperiods' smallest key.
-- 3. Each value stored in pdperiods is one day before the next largest key,
-- (except for the value associated with the largest key).
isValidDayPartition :: DayPartition -> Bool
isValidDayPartition (DayPartition pd) = case ds of
[] -> False
xs -> and $ zipWith isContiguous ((nulldate, h) : xs) xs
where
(h, ds) = periodDataToList pd
isContiguous (_, e) (s, _) = addDays 1 e == s
-- | Return the union of two 'DayPartition's if that is a valid 'DayPartition',
-- or 'Nothing' otherwise.
unionDayPartitions :: DayPartition -> DayPartition -> Maybe DayPartition
unionDayPartitions (DayPartition (PeriodData h as)) (DayPartition (PeriodData h' as')) =
if equalIntersection as as' && isValidDayPartition union then Just union else Nothing
where
union = DayPartition . PeriodData (min h h') $ as <> as'
equalIntersection x y = and $ IM.intersectionWith (==) x y
-- | Get this DayPartition's overall start date and end date (both inclusive).
dayPartitionSpans :: DayPartition -> (Day, Day)
dayPartitionSpans (DayPartition (PeriodData _ ds)) =
-- Guaranteed not to error because the IntMap is non-empty.
(intToDay . fst $ IM.findMin ds, snd $ IM.findMax ds)
lookupDayPartition :: Day -> DayPartition -> (Maybe Day, Day)
lookupDayPartition d (DayPartition xs) = lookupPeriodDataOrHistorical d xs
-- | Find the start and end dates of the period within a 'DayPartition' which contains a given day.
-- If the day is after the end of the last period, it is assumed to be within the last period.
-- If the day is before the start of the first period (ie, in the historical period),
-- only the historical period's end date is returned.
-- | Split a 'DateSpan' into a 'DayPartition' consisting of consecutive exact -- | Split a 'DateSpan' into a 'DayPartition' consisting of consecutive exact
-- spans of the specified Interval, or `Nothing` if the span is invalid. -- spans of the specified Interval, or `Nothing` if the span is invalid.
-- If no interval is specified, the original span is returned. -- If no interval is specified, the original span is returned.
@ -193,7 +206,6 @@ splitSpan _ (DaysOfWeek days@(n:_)) ds = do
starts = map (\d -> addDays (toInteger $ d - n) $ nthdayofweekcontaining n s) days starts = map (\d -> addDays (toInteger $ d - n) $ nthdayofweekcontaining n s) days
spansFromBoundaries e bdrys spansFromBoundaries e bdrys
-- | Fill in missing start/end dates for calculating 'splitSpan'. -- | Fill in missing start/end dates for calculating 'splitSpan'.
dateSpanSplitLimits :: (Day -> Day) -> (Day -> Day) -> DateSpan -> Maybe (Day, Day) dateSpanSplitLimits :: (Day -> Day) -> (Day -> Day) -> DateSpan -> Maybe (Day, Day)
dateSpanSplitLimits _ _ (DateSpan Nothing Nothing) = Nothing dateSpanSplitLimits _ _ (DateSpan Nothing Nothing) = Nothing
@ -221,8 +233,7 @@ spansFromBoundaries _ [] = Nothing
spansFromBoundaries e (x:_) | x >= e = Nothing spansFromBoundaries e (x:_) | x >= e = Nothing
spansFromBoundaries e (x:xs) = Just . boundariesToDayPartition $ takeUntilFailsNE (<e) (x:|xs) spansFromBoundaries e (x:xs) = Just . boundariesToDayPartition $ takeUntilFailsNE (<e) (x:|xs)
-- | Get the natural start for the given interval that falls on or before the given day,
-- Get the natural start for the given interval that falls on or before the given day,
-- when applicable. Works for Weeks, Months, Quarters, Years, eg. -- when applicable. Works for Weeks, Months, Quarters, Years, eg.
intervalBoundaryBefore :: Interval -> Day -> Day intervalBoundaryBefore :: Interval -> Day -> Day
intervalBoundaryBefore i d = intervalBoundaryBefore i d =
@ -230,10 +241,11 @@ intervalBoundaryBefore i d =
Just ((start, _) :| _ ) -> start Just ((start, _) :| _ ) -> start
_ -> d _ -> d
intToDay = ModifiedJulianDay . toInteger intToDay = ModifiedJulianDay . toInteger
-- tests:
tests_DayPartition = tests_DayPartition =
testGroup "splitSpan" [ testGroup "splitSpan" [
testCase "weekday" $ do testCase "weekday" $ do

View File

@ -62,7 +62,7 @@ instance Foldable1 PeriodData where
instance Traversable PeriodData where instance Traversable PeriodData where
traverse f (PeriodData h as) = liftA2 PeriodData (f h) $ traverse f as traverse f (PeriodData h as) = liftA2 PeriodData (f h) $ traverse f as
-- | The Semigroup instance for 'AccountBalance' will simply take the union of -- | The Semigroup instance for 'PeriodData' simply takes the union of
-- keys in the date map section. This may not be the result you want if the -- keys in the date map section. This may not be the result you want if the
-- keys are not identical. -- keys are not identical.
instance Semigroup a => Semigroup (PeriodData a) where instance Semigroup a => Semigroup (PeriodData a) where
@ -71,57 +71,54 @@ instance Semigroup a => Semigroup (PeriodData a) where
instance Monoid a => Monoid (PeriodData a) where instance Monoid a => Monoid (PeriodData a) where
mempty = PeriodData mempty mempty mempty = PeriodData mempty mempty
-- | Construct an 'PeriodData' from a list. -- | Construct a 'PeriodData' from a historical data value and a list of (period start, period data) pairs.
periodDataFromList :: a -> [(Day, a)] -> PeriodData a periodDataFromList :: a -> [(Day, a)] -> PeriodData a
periodDataFromList h = PeriodData h . IM.fromList . map (\(d, a) -> (dayToInt d, a)) periodDataFromList h = PeriodData h . IM.fromList . map (\(d, a) -> (dayToInt d, a))
-- | Convert 'PeriodData' to a list of pairs. -- | Convert 'PeriodData' to a historical data value and a list of (period start, period data) pairs.
periodDataToList :: PeriodData a -> (a, [(Day, a)]) periodDataToList :: PeriodData a -> (a, [(Day, a)])
periodDataToList (PeriodData h as) = (h, map (\(s, e) -> (intToDay s, e)) $ IM.toList as) periodDataToList (PeriodData h as) = (h, map (\(s, e) -> (intToDay s, e)) $ IM.toList as)
-- | Get the data for the period containing the given 'Day', and that period's start date.
-- | Get account balance information for the period containing a given 'Day', -- If the day is after the end of the last period, it is assumed to be within the last period.
-- along with the start of the period, or 'Nothing' if this day lies in the -- If the day is before the start of the first period (ie, in the historical period), return Nothing.
-- historical period.
lookupPeriodData :: Day -> PeriodData a -> Maybe (Day, a) lookupPeriodData :: Day -> PeriodData a -> Maybe (Day, a)
lookupPeriodData d (PeriodData _ as) = first intToDay <$> IM.lookupLE (dayToInt d) as lookupPeriodData d (PeriodData _ as) = first intToDay <$> IM.lookupLE (dayToInt d) as
-- | Get account balance information for the period containing a given 'Day' -- | Get the data for the period containing the given 'Day', and that period's start date.
-- or the historical data if this day lies in the historical period, along with -- If the day is after the end of the last period, it is assumed to be within the last period.
-- the start of the period or 'Nothing' if it lies in the historical period. -- If the day is before the start of the first period (ie, in the historical period),
-- return the data for the historical period and no start date.
lookupPeriodDataOrHistorical :: Day -> PeriodData a -> (Maybe Day, a) lookupPeriodDataOrHistorical :: Day -> PeriodData a -> (Maybe Day, a)
lookupPeriodDataOrHistorical d pd@(PeriodData h _) = case lookupPeriodData d pd of lookupPeriodDataOrHistorical d pd@(PeriodData h _) = case lookupPeriodData d pd of
Nothing -> (Nothing, h) Nothing -> (Nothing, h)
Just (a, b) -> (Just a, b) Just (a, b) -> (Just a, b)
-- | Add account balance information to the appropriate location in 'PeriodData'. -- | Set historical or period data in the appropriate location in a 'PeriodData'.
insertPeriodData :: Semigroup a => Maybe Day -> a -> PeriodData a -> PeriodData a insertPeriodData :: Semigroup a => Maybe Day -> a -> PeriodData a -> PeriodData a
insertPeriodData mday b balances = case mday of insertPeriodData mday b balances = case mday of
Nothing -> balances{pdpre = pdpre balances <> b} Nothing -> balances{pdpre = pdpre balances <> b}
Just day -> balances{pdperiods = IM.insertWith (<>) (dayToInt day) b $ pdperiods balances} Just day -> balances{pdperiods = IM.insertWith (<>) (dayToInt day) b $ pdperiods balances}
-- | Merges two 'PeriodData', using the given operation to combine their balance information. -- | Merge two 'PeriodData', using the given operation to combine their data values.
-- --
-- This will drop keys if they are not present in both 'PeriodData'. -- This will drop keys if they are not present in both 'PeriodData'.
opPeriodData :: (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c opPeriodData :: (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c
opPeriodData f (PeriodData h1 as1) (PeriodData h2 as2) = opPeriodData f (PeriodData h1 as1) (PeriodData h2 as2) =
PeriodData (f h1 h2) $ IM.intersectionWith f as1 as2 PeriodData (f h1 h2) $ IM.intersectionWith f as1 as2
-- | Merges two 'PeriodData', using the given operations for balance -- | Merge two 'PeriodData', using the given operations for combining data
-- information only in the first, only in the second, or in both -- that's only in the first, only in the second, or in both, respectively.
-- 'PeriodData', respectively. mergePeriodData :: (a -> c) -> (b -> c) -> (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c
mergePeriodData :: (a -> c) -> (b -> c) -> (a -> b -> c)
-> PeriodData a -> PeriodData b -> PeriodData c
mergePeriodData only1 only2 f = \(PeriodData h1 as1) (PeriodData h2 as2) -> mergePeriodData only1 only2 f = \(PeriodData h1 as1) (PeriodData h2 as2) ->
PeriodData (f h1 h2) $ merge as1 as2 PeriodData (f h1 h2) $ merge as1 as2
where where
merge = IM.mergeWithKey (\_ x y -> Just $ f x y) (fmap only1) (fmap only2) merge = IM.mergeWithKey (\_ x y -> Just $ f x y) (fmap only1) (fmap only2)
-- | Pad out the datemap of a 'PeriodData' so that every key from another 'PeriodData' is present. -- | Pad out the date map of a 'PeriodData' so that every key from another 'PeriodData' is present.
padPeriodData :: a -> PeriodData b -> PeriodData a -> PeriodData a padPeriodData :: a -> PeriodData b -> PeriodData a -> PeriodData a
padPeriodData x pad bal = bal{pdperiods = pdperiods bal <> (x <$ pdperiods pad)} padPeriodData x pad bal = bal{pdperiods = pdperiods bal <> (x <$ pdperiods pad)}
intToDay = ModifiedJulianDay . toInteger intToDay = ModifiedJulianDay . toInteger
dayToInt = fromInteger . toModifiedJulianDay dayToInt = fromInteger . toModifiedJulianDay

View File

@ -750,15 +750,24 @@ data Account a = Account {
,adata :: PeriodData a -- ^ associated data per report period ,adata :: PeriodData a -- ^ associated data per report period
} deriving (Generic, Functor) } deriving (Generic, Functor)
-- | A general container for storing data values associated to zero or more -- | A general container for storing data values associated with zero or more
-- report periods, and for the pre-report period. Report periods are assumed to -- contiguous report (sub)periods, and with the (open ended) pre-report period.
-- be contiguous, and represented only by start dates. -- The report periods are typically all the same length, but need not be.
--
-- Report periods are represented only by their start dates, used as the keys of an 'IntMap'.
-- Like the Integer inside the Day type, these Int keys are a count of days before or after 1858-11-17.
--
-- Note the use of Int limits the dates this type can represent.
-- On a 64 bit machine, the range is about 25 quadrillion years into past and future
-- (-25252734927764696-04-22 to 25252734927768413-06-12).
-- On a 32 bit machine, it is about 5 million years into past and future
-- (-5877752-05-08 to 5881469-05-27).
-- Exceeding the machine's Int range here will silently wrap around,
-- causing this type (and periodic reports) to give wrong results.
-- --
-- Data is stored in an 'IntMap' for efficiency, where Days are stored as as
-- Int representing the underlying modified Julian date.
data PeriodData a = PeriodData { data PeriodData a = PeriodData {
pdpre :: a -- ^ data from the pre-report period (e.g. historical balances) pdpre :: a -- ^ data for the period before the report
,pdperiods :: IM.IntMap a -- ^ data for the periods ,pdperiods :: IM.IntMap a -- ^ data for each period within the report
} deriving (Eq, Ord, Functor, Generic) } deriving (Eq, Ord, Functor, Generic)
-- | Data that's useful in "balance" reports: -- | Data that's useful in "balance" reports: