Calculate MultiReportBalance columns more efficiently.

Only calculate posting date once for each posting, and calculate their
columns instead of checking each DateSpan separately.
This commit is contained in:
Stephen Morgan 2019-11-12 12:14:21 +11:00 committed by Simon Michael
parent f55f814155
commit 38904372b2
4 changed files with 67 additions and 40 deletions

View File

@ -63,6 +63,8 @@ module Hledger.Data.Dates (
spanDefaultsFrom, spanDefaultsFrom,
spanUnion, spanUnion,
spansUnion, spansUnion,
daysSpan,
latestSpanContaining,
smartdate, smartdate,
splitSpan, splitSpan,
fixSmartDate, fixSmartDate,
@ -79,10 +81,11 @@ import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (fail) import "base-compat-batteries" Prelude.Compat hiding (fail)
import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (MonadFail, fail) import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (MonadFail, fail)
import Control.Applicative.Permutations import Control.Applicative.Permutations
import Control.Monad (unless) import Control.Monad (guard, unless)
import "base-compat-batteries" Data.List.Compat import "base-compat-batteries" Data.List.Compat
import Data.Default import Data.Default
import Data.Maybe import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
#if MIN_VERSION_time(1,5,0) #if MIN_VERSION_time(1,5,0)
@ -95,7 +98,7 @@ import Data.Time.Calendar
import Data.Time.Calendar.OrdinalDate import Data.Time.Calendar.OrdinalDate
import Data.Time.Clock import Data.Time.Clock
import Data.Time.LocalTime import Data.Time.LocalTime
import Safe (headMay, lastMay, readMay) import Safe (headMay, lastMay, readMay, maximumMay, minimumMay)
import Text.Megaparsec import Text.Megaparsec
import Text.Megaparsec.Char import Text.Megaparsec.Char
import Text.Megaparsec.Custom import Text.Megaparsec.Custom
@ -231,9 +234,8 @@ daysInSpan _ = Nothing
-- | Is this an empty span, ie closed with the end date on or before the start date ? -- | Is this an empty span, ie closed with the end date on or before the start date ?
isEmptySpan :: DateSpan -> Bool isEmptySpan :: DateSpan -> Bool
isEmptySpan s = case daysInSpan s of isEmptySpan (DateSpan (Just s) (Just e)) = e <= s
Just n -> n < 1 isEmptySpan _ = False
Nothing -> False
-- | Does the span include the given date ? -- | Does the span include the given date ?
spanContainsDate :: DateSpan -> Day -> Bool spanContainsDate :: DateSpan -> Day -> Bool
@ -287,6 +289,36 @@ earliest d Nothing = d
earliest Nothing d = d earliest Nothing d = d
earliest (Just d1) (Just d2) = Just $ min d1 d2 earliest (Just d1) (Just d2) = Just $ min d1 d2
-- | Calculate the minimal DateSpan containing all of the given Days (in the
-- usual exclusive-end-date sense: beginning on the earliest, and ending on
-- the day after the latest).
daysSpan :: [Day] -> DateSpan
daysSpan ds = DateSpan (minimumMay ds) (addDays 1 <$> maximumMay ds)
-- | Select the DateSpan containing a given Day, if any, from a given list of
-- DateSpans.
--
-- If the DateSpans are non-overlapping, this returns the unique containing
-- DateSpan, if it exists. If the DateSpans are overlapping, it will return the
-- containing DateSpan with the latest start date, and then latest end date.
-- Note: This will currently return `DateSpan (Just s) (Just e)` before it will
-- return `DateSpan (Just s) Nothing`. It's unclear which behaviour is desired.
-- This is irrelevant at the moment as it's never applied to any list with
-- overlapping DateSpans.
latestSpanContaining :: [DateSpan] -> Day -> Maybe DateSpan
latestSpanContaining datespans = go
where
go day = do
span <- Set.lookupLT supSpan spanSet
guard $ spanContainsDate span day
return span
where
-- The smallest DateSpan larger than any DateSpan containing day.
supSpan = DateSpan (Just $ addDays 1 day) Nothing
spanSet = Set.fromList $ filter (not . isEmptySpan) datespans
-- | Parse a period expression to an Interval and overall DateSpan using -- | Parse a period expression to an Interval and overall DateSpan using
-- the provided reference date, or return a parse error. -- the provided reference date, or return a parse error.
parsePeriodExpr parsePeriodExpr

View File

@ -34,8 +34,9 @@ import Text.Printf
import Hledger.Utils.Test import Hledger.Utils.Test
import Hledger.Data.Types import Hledger.Data.Types
import Hledger.Data.Account import Hledger.Data.Account
import Hledger.Data.Dates (daysSpan)
import Hledger.Data.Journal import Hledger.Data.Journal
import Hledger.Data.Posting import Hledger.Data.Posting (postingDate)
import Hledger.Query import Hledger.Query
@ -100,7 +101,7 @@ ledgerPostings = journalPostings . ljournal
-- | The (fully specified) date span containing all the ledger's (filtered) transactions, -- | The (fully specified) date span containing all the ledger's (filtered) transactions,
-- or DateSpan Nothing Nothing if there are none. -- or DateSpan Nothing Nothing if there are none.
ledgerDateSpan :: Ledger -> DateSpan ledgerDateSpan :: Ledger -> DateSpan
ledgerDateSpan = postingsDateSpan . ledgerPostings ledgerDateSpan = daysSpan . map postingDate . ledgerPostings
-- | All commodities used in this ledger. -- | All commodities used in this ledger.
ledgerCommodities :: Ledger -> [CommoditySymbol] ledgerCommodities :: Ledger -> [CommoditySymbol]

View File

@ -42,8 +42,6 @@ module Hledger.Data.Posting (
postingDate2, postingDate2,
isPostingInDateSpan, isPostingInDateSpan,
isPostingInDateSpan', isPostingInDateSpan',
postingsDateSpan,
postingsDateSpan',
-- * account name operations -- * account name operations
accountNamesFromPostings, accountNamesFromPostings,
accountNamePostingType, accountNamePostingType,
@ -69,6 +67,8 @@ module Hledger.Data.Posting (
tests_Posting tests_Posting
) )
where where
import Data.Foldable (asum)
import Data.List import Data.List
import qualified Data.Map as M import qualified Data.Map as M
import Data.Maybe import Data.Maybe
@ -204,20 +204,19 @@ removePrices p = p{ pamount = Mixed $ remove <$> amounts (pamount p) }
-- otherwise the parent transaction's primary date, or the null date if -- otherwise the parent transaction's primary date, or the null date if
-- there is no parent transaction. -- there is no parent transaction.
postingDate :: Posting -> Day postingDate :: Posting -> Day
postingDate p = fromMaybe txndate $ pdate p postingDate p = fromMaybe nulldate $ asum dates
where where dates = [ pdate p, tdate <$> ptransaction p ]
txndate = maybe nulldate tdate $ ptransaction p
-- | Get a posting's secondary (secondary) date, which is the first of: -- | Get a posting's secondary (secondary) date, which is the first of:
-- posting's secondary date, transaction's secondary date, posting's -- posting's secondary date, transaction's secondary date, posting's
-- primary date, transaction's primary date, or the null date if there is -- primary date, transaction's primary date, or the null date if there is
-- no parent transaction. -- no parent transaction.
postingDate2 :: Posting -> Day postingDate2 :: Posting -> Day
postingDate2 p = headDef nulldate $ catMaybes dates postingDate2 p = fromMaybe nulldate $ asum dates
where dates = [pdate2 p where dates = [ pdate2 p
,maybe Nothing tdate2 $ ptransaction p , tdate2 =<< ptransaction p
,pdate p , pdate p
,fmap tdate (ptransaction p) , tdate <$> ptransaction p
] ]
-- | Get a posting's status. This is cleared or pending if those are -- | Get a posting's status. This is cleared or pending if those are
@ -246,7 +245,7 @@ relatedPostings _ = []
-- | Does this posting fall within the given date span ? -- | Does this posting fall within the given date span ?
isPostingInDateSpan :: DateSpan -> Posting -> Bool isPostingInDateSpan :: DateSpan -> Posting -> Bool
isPostingInDateSpan s = spanContainsDate s . postingDate isPostingInDateSpan = isPostingInDateSpan' PrimaryDate
-- --date2-sensitive version, separate for now to avoid disturbing multiBalanceReport. -- --date2-sensitive version, separate for now to avoid disturbing multiBalanceReport.
isPostingInDateSpan' :: WhichDate -> DateSpan -> Posting -> Bool isPostingInDateSpan' :: WhichDate -> DateSpan -> Posting -> Bool
@ -256,21 +255,6 @@ isPostingInDateSpan' SecondaryDate s = spanContainsDate s . postingDate2
isEmptyPosting :: Posting -> Bool isEmptyPosting :: Posting -> Bool
isEmptyPosting = isZeroMixedAmount . pamount isEmptyPosting = isZeroMixedAmount . pamount
-- | Get the minimal date span which contains all the postings, or the
-- null date span if there are none.
postingsDateSpan :: [Posting] -> DateSpan
postingsDateSpan [] = DateSpan Nothing Nothing
postingsDateSpan ps = DateSpan (Just $ postingDate $ head ps') (Just $ addDays 1 $ postingDate $ last ps')
where ps' = sortOn postingDate ps
-- --date2-sensitive version, as above.
postingsDateSpan' :: WhichDate -> [Posting] -> DateSpan
postingsDateSpan' _ [] = DateSpan Nothing Nothing
postingsDateSpan' wd ps = DateSpan (Just $ postingdate $ head ps') (Just $ addDays 1 $ postingdate $ last ps')
where
ps' = sortOn postingdate ps
postingdate = if wd == PrimaryDate then postingDate else postingDate2
-- AccountName stuff that depends on PostingType -- AccountName stuff that depends on PostingType
accountNamePostingType :: AccountName -> PostingType accountNamePostingType :: AccountName -> PostingType

View File

@ -24,6 +24,7 @@ where
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Control.DeepSeq (NFData) import Control.DeepSeq (NFData)
import Data.List import Data.List
import qualified Data.Map as M
import Data.Maybe import Data.Maybe
import Data.Ord import Data.Ord
import Data.Time.Calendar import Data.Time.Calendar
@ -150,7 +151,7 @@ multiBalanceReportWith ropts@ReportOpts{..} q j priceoracle =
displayspan displayspan
| empty_ = dbg1 "displayspan (-E)" reportspan -- all the requested intervals | empty_ = dbg1 "displayspan (-E)" reportspan -- all the requested intervals
| otherwise = dbg1 "displayspan" $ requestedspan `spanIntersect` matchedspan -- exclude leading/trailing empty intervals | otherwise = dbg1 "displayspan" $ requestedspan `spanIntersect` matchedspan -- exclude leading/trailing empty intervals
matchedspan = dbg1 "matchedspan" $ postingsDateSpan' (whichDateFromOpts ropts) ps matchedspan = dbg1 "matchedspan" . daysSpan $ map snd ps
-- If doing cost valuation, convert amounts to cost. -- If doing cost valuation, convert amounts to cost.
j' = journalSelectingAmountFromOpts ropts j j' = journalSelectingAmountFromOpts ropts j
@ -187,17 +188,26 @@ multiBalanceReportWith ropts@ReportOpts{..} q j priceoracle =
-- 3. Gather postings for each column. -- 3. Gather postings for each column.
-- Postings matching the query within the report period. -- Postings matching the query within the report period.
ps :: [Posting] = ps :: [(Posting, Day)] =
dbg1 "ps" $ dbg1 "ps" $
map postingWithDate $
journalPostings $ journalPostings $
filterJournalAmounts symq $ -- remove amount parts excluded by cur: filterJournalAmounts symq $ -- remove amount parts excluded by cur:
filterJournalPostings reportq $ -- remove postings not matched by (adjusted) query filterJournalPostings reportq $ -- remove postings not matched by (adjusted) query
j' j'
where
postingWithDate p = case whichDateFromOpts ropts of
PrimaryDate -> (p, postingDate p)
SecondaryDate -> (p, postingDate2 p)
-- Group postings into their columns, with the column end dates. -- Group postings into their columns, with the column end dates.
colps :: [([Posting], Maybe Day)] = colps :: [([Posting], Maybe Day)] =
dbg1 "colps" dbg1 "colps"
[(filter (isPostingInDateSpan' (whichDateFromOpts ropts) s) ps, spanEnd s) | s <- colspans] [ (posts, end) | (DateSpan _ end, posts) <- M.toList colMap ]
where
colMap = foldr addPosting emptyMap ps
addPosting (p, d) = maybe id (M.adjust (p:)) $ latestSpanContaining colspans d
emptyMap = M.fromList . zip colspans $ repeat []
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- 4. Calculate account balance changes in each column. -- 4. Calculate account balance changes in each column.
@ -228,13 +238,13 @@ multiBalanceReportWith ropts@ReportOpts{..} q j priceoracle =
then nub $ sort $ startaccts ++ allpostedaccts then nub $ sort $ startaccts ++ allpostedaccts
else allpostedaccts else allpostedaccts
where where
allpostedaccts :: [AccountName] = dbg1 "allpostedaccts" $ sort $ accountNamesFromPostings ps allpostedaccts :: [AccountName] =
dbg1 "allpostedaccts" . sort . accountNamesFromPostings $ map fst ps
-- Each column's balance changes for each account, adding zeroes where needed. -- Each column's balance changes for each account, adding zeroes where needed.
colallacctchanges :: [[(ClippedAccountName, MixedAmount)]] = colallacctchanges :: [[(ClippedAccountName, MixedAmount)]] =
dbg1 "colallacctchanges" dbg1 "colallacctchanges"
[sortBy (comparing fst) $ [ sortOn fst $ unionBy (\(a,_) (a',_) -> a == a') postedacctchanges zeroes
unionBy (\(a,_) (a',_) -> a == a') postedacctchanges zeroes | postedacctchanges <- colacctchanges ]
| postedacctchanges <- colacctchanges]
where zeroes = [(a, nullmixedamt) | a <- displayaccts] where zeroes = [(a, nullmixedamt) | a <- displayaccts]
-- Transpose to get each account's balance changes across all columns. -- Transpose to get each account's balance changes across all columns.
acctchanges :: [(ClippedAccountName, [MixedAmount])] = acctchanges :: [(ClippedAccountName, [MixedAmount])] =