From 8779f2481adf9ecdc359d296e15887d28c3b1383 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Thu, 9 Oct 2025 17:07:59 -1000 Subject: [PATCH] ;dev: PeriodData, DayPartition: haddock updates Clarify some things. Also note an example of PeriodData wrapping around. --- examples/farfuture.j | 20 +++ hledger-lib/Hledger/Data/DayPartition.hs | 148 ++++++++++++----------- hledger-lib/Hledger/Data/PeriodData.hs | 43 +++---- hledger-lib/Hledger/Data/Types.hs | 23 ++-- 4 files changed, 136 insertions(+), 98 deletions(-) create mode 100644 examples/farfuture.j diff --git a/examples/farfuture.j b/examples/farfuture.j new file mode 100644 index 000000000..63d580607 --- /dev/null +++ b/examples/farfuture.j @@ -0,0 +1,20 @@ +; Testing behaviour at PeriodData's limit. +; PeriodData is used in multi-period reports. +; On a 64 bit machine it can work with dates from -25252734927764696-04-22 to 25252734927768413-06-12. +; Dates outside that range wrap around, giving wrong results: +; +; $ hledger -f examples/farfuture.j reg -O csv -Y +; "txnidx","date","code","description","account","amount","total" +; "0","-25252734927764696-11-10","","","expenses","6","6" + +25252734927768413-06-12 PeriodData's max date + (expenses) 1 + +25252734927768413-06-13 one day past PeriodData's max date + (expenses) 2 + +25252734927768413-12-01 farther past PeriodData's max date, same year + (expenses) 3 + +25252734927768414-01-01 next year past PeriodData's max date + (expenses) 4 diff --git a/hledger-lib/Hledger/Data/DayPartition.hs b/hledger-lib/Hledger/Data/DayPartition.hs index b4d23adc7..959263cea 100644 --- a/hledger-lib/Hledger/Data/DayPartition.hs +++ b/hledger-lib/Hledger/Data/DayPartition.hs @@ -3,22 +3,22 @@ A partition of time into contiguous spans, for defining reporting periods. -} module Hledger.Data.DayPartition ( DayPartition +-- * constructors , boundariesToDayPartition , boundariesToMaybeDayPartition - -, lookupDayPartition -, unionDayPartitions - +-- * conversions , dayPartitionToNonEmpty , dayPartitionToList -, dayPartitionSpans , dayPartitionToDateSpans , dayPartitionToPeriodData , maybeDayPartitionToDateSpans - +-- * operations +, unionDayPartitions +, dayPartitionSpans +, lookupDayPartition , splitSpan , intervalBoundaryBefore - +-- * tests , tests_DayPartition ) where @@ -33,97 +33,110 @@ import Hledger.Data.Types import Hledger.Utils --- | A partition of time into contiguous spans, along with a historical period --- before any of the spans. +-- | A partition of time into one or more contiguous periods, +-- plus a historical period that precedes them. +-- Note 'DayPartition' does not store per-period data - only the periods' start/end dates. + +-- Each period is at least one day in length. +-- The historical period is open ended, with no start date. +-- The last period has an end date, but note some queries (like 'dayPartitionFind') ignore that, acting as if the last period is open ended. +-- Only smart constructors are exported, so that a DayPartition always satisfies these invariants. -- --- This is a newtype wrapper around 'PeriodData Day', where the start dates are --- the keys and the end dates are the values. Spans are stored in inclusive format --- [start, end]. Note that this differs from 'DateSpan' which uses [start, end) --- format. +-- This is implemented as a newtype wrapper around 'PeriodData Day', which is a map from date to date. +-- The map's keys are the period start dates, and the values are the corresponding period end dates. +-- Note unlike 'DateSpan', which stores exclusive end dates ( @[start, end)@ ), +-- here both start and end dates are inclusive ( @[start, end]@ ). -- --- The constructor is not exported so that we can ensure the spans are valid --- partitions of time. newtype DayPartition = DayPartition { dayPartitionToPeriodData :: PeriodData Day } deriving (Eq, Ord, Show) --- Developer's note. All constructors must guarantee that: --- 1. The value stored in pdperiods has at least one key. --- 2. The value stored in pdpre equals one day before the smallest key in pdperiods. --- 3. The value stored in each entry of pdperiods equals one day before the --- next largest key, except for the value associated to the largest key. -isValidDayPartition :: DayPartition -> Bool -isValidDayPartition (DayPartition pd) = case ds of - [] -> False -- Must be at least one key in pdperiods - xs -> and $ zipWith isContiguous ((nulldate, h) : xs) xs - where - (h, ds) = periodDataToList pd - isContiguous (_, e) (s, _) = addDays 1 e == s +-- constructors: --- | Construct a 'DayPartition' from a non-empty list of boundary days. +-- | Construct a 'DayPartition' from a non-empty list of period boundary dates (start dates plus a final exclusive end date). +-- +-- >>> boundariesToDayPartition (fromGregorian 2025 01 01 :| [fromGregorian 2025 02 01]) +-- DayPartition {dayPartitionToPeriodData = PeriodData{ pdpre = 2024-12-31, pdperiods = fromList [(2025-01-01,2025-01-31)]}} +-- boundariesToDayPartition :: NonEmpty Day -> DayPartition boundariesToDayPartition xs = DayPartition . periodDataFromList (addDays (-1) b) $ case bs of [] -> [(b, b)] -- If only one boundary is supplied, it ends on the same day _:_ -> zip (b:bs) $ map (addDays (-1)) bs -- Guaranteed non-empty where b:|bs = NE.nub $ NE.sort xs --- | Construct a 'DayPartition' from a list of boundary days, returning --- 'Nothing' for the empty list. +-- | Construct a 'DayPartition' from a list of period boundary dates (start dates plus a final exclusive end date), +-- if it's a non-empty list. boundariesToMaybeDayPartition :: [Day] -> Maybe DayPartition boundariesToMaybeDayPartition = fmap boundariesToDayPartition . NE.nonEmpty --- | Find the span of a 'DayPartition' which contains a given day. -lookupDayPartition :: Day -> DayPartition -> (Maybe Day, Day) -lookupDayPartition d (DayPartition xs) = lookupPeriodDataOrHistorical d xs +-- conversions: --- | Return the union of two 'DayPartition's if they are consistent, or 'Nothing' otherwise. -unionDayPartitions :: DayPartition -> DayPartition -> Maybe DayPartition -unionDayPartitions (DayPartition (PeriodData h as)) (DayPartition (PeriodData h' as')) = - if equalIntersection as as' && isValidDayPartition union then Just union else Nothing - where - union = DayPartition . PeriodData (min h h') $ as <> as' - equalIntersection x y = and $ IM.intersectionWith (==) x y - - --- | Convert 'DayPartition' to a non-empty list of start and end dates for the periods. --- --- Note that the end date of each period will be one day before the start date --- of the next period. +-- | Convert 'DayPartition' to a non-empty list of period start and end dates (both inclusive). +-- Each end date will be one day before the next period's start date. dayPartitionToNonEmpty :: DayPartition -> NonEmpty (Day, Day) dayPartitionToNonEmpty (DayPartition xs) = NE.fromList . snd $ periodDataToList xs -- Constructors guarantee this is non-empty --- | Convert 'DayPartition' to a list of start and end dates for the periods. --- --- Note that the end date of each period will be one day before the start date --- of the next period. +-- | Convert 'DayPartition' to a list (which will always be non-empty) of period start and end dates (both inclusive). +-- Each end date will be one day before the next period's start date. dayPartitionToList :: DayPartition -> [(Day, Day)] dayPartitionToList = NE.toList . dayPartitionToNonEmpty --- | Return the whole day range spanned by a `PeriodData Day`. -dayPartitionSpans :: DayPartition -> (Day, Day) -dayPartitionSpans (DayPartition (PeriodData _ ds)) = - -- Guaranteed not to error because the IntMap in non-empty. - (intToDay . fst $ IM.findMin ds, snd $ IM.findMax ds) - -- | Convert 'DayPartition' to a list of 'DateSpan's. --- --- Note that the end date of each period will be equal to the start date of --- the next period. +-- Each span will end one day before the next span begins +-- (the span's exclusive end date will be equal to the next span's start date). dayPartitionToDateSpans :: DayPartition -> [DateSpan] dayPartitionToDateSpans = map toDateSpan . dayPartitionToList where toDateSpan (s, e) = DateSpan (toEFDay s) (toEFDay $ addDays 1 e) toEFDay = Just . Exact --- Convert a periodic report 'Maybe DayPartition' to a list of 'DateSpans', --- replacing the empty case with an appropriate placeholder. --- --- Note that the end date of each period will be equal to the start date of --- the next period. +-- Convert a 'Maybe DayPartition' to a list of one or more 'DateSpans'. +-- Each span will end one day before the next span begins +-- (the span's exclusive end date will be equal to the next span's start date). +-- If given Nothing, it returns a single open-ended span. maybeDayPartitionToDateSpans :: Maybe DayPartition -> [DateSpan] maybeDayPartitionToDateSpans = maybe [DateSpan Nothing Nothing] dayPartitionToDateSpans +-- operations: + +-- | Check that a DayPartition has been constructed correctly, +-- with internal invariants satisfied, as well as the external ones described in 'DayPartition'. +-- Internally, all constructors must guarantee: +-- 1. The pdperiods map contains at least one key and value. +-- 2. The value stored in pdpre is one day before pdperiods' smallest key. +-- 3. Each value stored in pdperiods is one day before the next largest key, +-- (except for the value associated with the largest key). +isValidDayPartition :: DayPartition -> Bool +isValidDayPartition (DayPartition pd) = case ds of + [] -> False + xs -> and $ zipWith isContiguous ((nulldate, h) : xs) xs + where + (h, ds) = periodDataToList pd + isContiguous (_, e) (s, _) = addDays 1 e == s + +-- | Return the union of two 'DayPartition's if that is a valid 'DayPartition', +-- or 'Nothing' otherwise. +unionDayPartitions :: DayPartition -> DayPartition -> Maybe DayPartition +unionDayPartitions (DayPartition (PeriodData h as)) (DayPartition (PeriodData h' as')) = + if equalIntersection as as' && isValidDayPartition union then Just union else Nothing + where + union = DayPartition . PeriodData (min h h') $ as <> as' + equalIntersection x y = and $ IM.intersectionWith (==) x y + +-- | Get this DayPartition's overall start date and end date (both inclusive). +dayPartitionSpans :: DayPartition -> (Day, Day) +dayPartitionSpans (DayPartition (PeriodData _ ds)) = + -- Guaranteed not to error because the IntMap is non-empty. + (intToDay . fst $ IM.findMin ds, snd $ IM.findMax ds) + +lookupDayPartition :: Day -> DayPartition -> (Maybe Day, Day) +lookupDayPartition d (DayPartition xs) = lookupPeriodDataOrHistorical d xs +-- | Find the start and end dates of the period within a 'DayPartition' which contains a given day. +-- If the day is after the end of the last period, it is assumed to be within the last period. +-- If the day is before the start of the first period (ie, in the historical period), +-- only the historical period's end date is returned. + -- | Split a 'DateSpan' into a 'DayPartition' consisting of consecutive exact -- spans of the specified Interval, or `Nothing` if the span is invalid. -- If no interval is specified, the original span is returned. @@ -193,7 +206,6 @@ splitSpan _ (DaysOfWeek days@(n:_)) ds = do starts = map (\d -> addDays (toInteger $ d - n) $ nthdayofweekcontaining n s) days spansFromBoundaries e bdrys - -- | Fill in missing start/end dates for calculating 'splitSpan'. dateSpanSplitLimits :: (Day -> Day) -> (Day -> Day) -> DateSpan -> Maybe (Day, Day) dateSpanSplitLimits _ _ (DateSpan Nothing Nothing) = Nothing @@ -221,8 +233,7 @@ spansFromBoundaries _ [] = Nothing spansFromBoundaries e (x:_) | x >= e = Nothing spansFromBoundaries e (x:xs) = Just . boundariesToDayPartition $ takeUntilFailsNE ( Day -> Day intervalBoundaryBefore i d = @@ -230,10 +241,11 @@ intervalBoundaryBefore i d = Just ((start, _) :| _ ) -> start _ -> d - intToDay = ModifiedJulianDay . toInteger +-- tests: + tests_DayPartition = testGroup "splitSpan" [ testCase "weekday" $ do diff --git a/hledger-lib/Hledger/Data/PeriodData.hs b/hledger-lib/Hledger/Data/PeriodData.hs index 162897c09..98298c978 100644 --- a/hledger-lib/Hledger/Data/PeriodData.hs +++ b/hledger-lib/Hledger/Data/PeriodData.hs @@ -62,7 +62,7 @@ instance Foldable1 PeriodData where instance Traversable PeriodData where traverse f (PeriodData h as) = liftA2 PeriodData (f h) $ traverse f as --- | The Semigroup instance for 'AccountBalance' will simply take the union of +-- | The Semigroup instance for 'PeriodData' simply takes the union of -- keys in the date map section. This may not be the result you want if the -- keys are not identical. instance Semigroup a => Semigroup (PeriodData a) where @@ -71,57 +71,54 @@ instance Semigroup a => Semigroup (PeriodData a) where instance Monoid a => Monoid (PeriodData a) where mempty = PeriodData mempty mempty --- | Construct an 'PeriodData' from a list. +-- | Construct a 'PeriodData' from a historical data value and a list of (period start, period data) pairs. periodDataFromList :: a -> [(Day, a)] -> PeriodData a periodDataFromList h = PeriodData h . IM.fromList . map (\(d, a) -> (dayToInt d, a)) --- | Convert 'PeriodData' to a list of pairs. +-- | Convert 'PeriodData' to a historical data value and a list of (period start, period data) pairs. periodDataToList :: PeriodData a -> (a, [(Day, a)]) periodDataToList (PeriodData h as) = (h, map (\(s, e) -> (intToDay s, e)) $ IM.toList as) - --- | Get account balance information for the period containing a given 'Day', --- along with the start of the period, or 'Nothing' if this day lies in the --- historical period. +-- | Get the data for the period containing the given 'Day', and that period's start date. +-- If the day is after the end of the last period, it is assumed to be within the last period. +-- If the day is before the start of the first period (ie, in the historical period), return Nothing. lookupPeriodData :: Day -> PeriodData a -> Maybe (Day, a) lookupPeriodData d (PeriodData _ as) = first intToDay <$> IM.lookupLE (dayToInt d) as --- | Get account balance information for the period containing a given 'Day' --- or the historical data if this day lies in the historical period, along with --- the start of the period or 'Nothing' if it lies in the historical period. +-- | Get the data for the period containing the given 'Day', and that period's start date. +-- If the day is after the end of the last period, it is assumed to be within the last period. +-- If the day is before the start of the first period (ie, in the historical period), +-- return the data for the historical period and no start date. lookupPeriodDataOrHistorical :: Day -> PeriodData a -> (Maybe Day, a) lookupPeriodDataOrHistorical d pd@(PeriodData h _) = case lookupPeriodData d pd of - Nothing -> (Nothing, h) - Just (a, b) -> (Just a, b) + Nothing -> (Nothing, h) + Just (a, b) -> (Just a, b) --- | Add account balance information to the appropriate location in 'PeriodData'. +-- | Set historical or period data in the appropriate location in a 'PeriodData'. insertPeriodData :: Semigroup a => Maybe Day -> a -> PeriodData a -> PeriodData a insertPeriodData mday b balances = case mday of Nothing -> balances{pdpre = pdpre balances <> b} Just day -> balances{pdperiods = IM.insertWith (<>) (dayToInt day) b $ pdperiods balances} --- | Merges two 'PeriodData', using the given operation to combine their balance information. +-- | Merge two 'PeriodData', using the given operation to combine their data values. -- -- This will drop keys if they are not present in both 'PeriodData'. opPeriodData :: (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c opPeriodData f (PeriodData h1 as1) (PeriodData h2 as2) = - PeriodData (f h1 h2) $ IM.intersectionWith f as1 as2 + PeriodData (f h1 h2) $ IM.intersectionWith f as1 as2 --- | Merges two 'PeriodData', using the given operations for balance --- information only in the first, only in the second, or in both --- 'PeriodData', respectively. -mergePeriodData :: (a -> c) -> (b -> c) -> (a -> b -> c) - -> PeriodData a -> PeriodData b -> PeriodData c +-- | Merge two 'PeriodData', using the given operations for combining data +-- that's only in the first, only in the second, or in both, respectively. +mergePeriodData :: (a -> c) -> (b -> c) -> (a -> b -> c) -> PeriodData a -> PeriodData b -> PeriodData c mergePeriodData only1 only2 f = \(PeriodData h1 as1) (PeriodData h2 as2) -> - PeriodData (f h1 h2) $ merge as1 as2 + PeriodData (f h1 h2) $ merge as1 as2 where merge = IM.mergeWithKey (\_ x y -> Just $ f x y) (fmap only1) (fmap only2) --- | Pad out the datemap of a 'PeriodData' so that every key from another 'PeriodData' is present. +-- | Pad out the date map of a 'PeriodData' so that every key from another 'PeriodData' is present. padPeriodData :: a -> PeriodData b -> PeriodData a -> PeriodData a padPeriodData x pad bal = bal{pdperiods = pdperiods bal <> (x <$ pdperiods pad)} - intToDay = ModifiedJulianDay . toInteger dayToInt = fromInteger . toModifiedJulianDay diff --git a/hledger-lib/Hledger/Data/Types.hs b/hledger-lib/Hledger/Data/Types.hs index 4d2e3627d..44abe0c9f 100644 --- a/hledger-lib/Hledger/Data/Types.hs +++ b/hledger-lib/Hledger/Data/Types.hs @@ -750,15 +750,24 @@ data Account a = Account { ,adata :: PeriodData a -- ^ associated data per report period } deriving (Generic, Functor) --- | A general container for storing data values associated to zero or more --- report periods, and for the pre-report period. Report periods are assumed to --- be contiguous, and represented only by start dates. +-- | A general container for storing data values associated with zero or more +-- contiguous report (sub)periods, and with the (open ended) pre-report period. +-- The report periods are typically all the same length, but need not be. +-- +-- Report periods are represented only by their start dates, used as the keys of an 'IntMap'. +-- Like the Integer inside the Day type, these Int keys are a count of days before or after 1858-11-17. +-- +-- Note the use of Int limits the dates this type can represent. +-- On a 64 bit machine, the range is about 25 quadrillion years into past and future +-- (-25252734927764696-04-22 to 25252734927768413-06-12). +-- On a 32 bit machine, it is about 5 million years into past and future +-- (-5877752-05-08 to 5881469-05-27). +-- Exceeding the machine's Int range here will silently wrap around, +-- causing this type (and periodic reports) to give wrong results. -- --- Data is stored in an 'IntMap' for efficiency, where Days are stored as as --- Int representing the underlying modified Julian date. data PeriodData a = PeriodData { - pdpre :: a -- ^ data from the pre-report period (e.g. historical balances) - ,pdperiods :: IM.IntMap a -- ^ data for the periods + pdpre :: a -- ^ data for the period before the report + ,pdperiods :: IM.IntMap a -- ^ data for each period within the report } deriving (Eq, Ord, Functor, Generic) -- | Data that's useful in "balance" reports: