fix: a slowdown with many txns and many accounts since 1.29 [#2153]

When processing costs and equity postings in transactions during
journal finalisation, we now pass just the conversion account name(s)
rather than the entire map of account types. This slowdown was severe
for some users/data/machines.
This commit is contained in:
Simon Michael 2024-01-25 11:45:32 -10:00
parent 53431d9ac6
commit a38af98c9e
4 changed files with 38 additions and 24 deletions

View File

@ -96,6 +96,7 @@ module Hledger.Data.Journal (
journalPostingsAddAccountTags,
-- journalPrices,
journalConversionAccount,
journalConversionAccounts,
-- * Misc
canonicalStyleFrom,
nulljournal,
@ -519,13 +520,6 @@ letterPairs :: String -> [String]
letterPairs (a:b:rest) = [a,b] : letterPairs (b:rest)
letterPairs _ = []
-- | The 'AccountName' to use for automatically generated conversion postings.
journalConversionAccount :: Journal -> AccountName
journalConversionAccount =
headDef (T.pack "equity:conversion")
. M.findWithDefault [] Conversion
. jdeclaredaccounttypes
-- Newer account type code.
journalAccountType :: Journal -> AccountName -> Maybe AccountType
@ -557,7 +551,7 @@ journalAccountTypes j = M.fromList [(a,acctType) | (a, Just (acctType,_)) <- fla
then mparenttype
else (,False) <$> accountNameInferType a <|> mparenttype
-- | Build a map of the account types explicitly declared.
-- | Build a map of the account types explicitly declared for each account.
journalDeclaredAccountTypes :: Journal -> M.Map AccountName AccountType
journalDeclaredAccountTypes Journal{jdeclaredaccounttypes} =
M.fromList $ concat [map (,t) as | (t,as) <- M.toList jdeclaredaccounttypes]
@ -569,6 +563,19 @@ journalPostingsAddAccountTags :: Journal -> Journal
journalPostingsAddAccountTags j = journalMapPostings addtags j
where addtags p = p `postingAddTags` (journalInheritedAccountTags j $ paccount p)
-- | The account to use for automatically generated conversion postings in this journal:
-- the first of the journalConversionAccounts.
journalConversionAccount :: Journal -> AccountName
journalConversionAccount = headDef defaultConversionAccount . journalConversionAccounts
-- | All the accounts declared or inferred as Conversion type in this journal.
journalConversionAccounts :: Journal -> [AccountName]
journalConversionAccounts = M.keys . M.filter (==Conversion) . jaccounttypes
-- The fallback account to use for automatically generated conversion postings
-- if no account is declared with the Conversion type.
defaultConversionAccount = "equity:conversion"
-- Various kinds of filtering on journals. We do it differently depending
-- on the command.
@ -943,9 +950,11 @@ journalInferEquityFromCosts verbosetags j = journalMapTransactions (transactionA
-- See hledger manual > Cost reporting.
journalInferCostsFromEquity :: Journal -> Either String Journal
journalInferCostsFromEquity j = do
ts <- mapM (transactionInferCostsFromEquity False $ jaccounttypes j) $ jtxns j
return j{jtxns=ts}
ts <- mapM (transactionInferCostsFromEquity False conversionaccts) $ jtxns j
return j{jtxns=ts}
where conversionaccts = journalConversionAccounts j
-- XXX duplication of the above
-- | Do just the internal tagging that is normally done by journalInferCostsFromEquity,
-- identifying equity conversion postings and, in particular, postings which have redundant costs.
-- Tagging the latter is useful as it allows them to be ignored during transaction balancedness checking.
@ -953,8 +962,9 @@ journalInferCostsFromEquity j = do
-- when it will have more information (amounts) to work with.
journalMarkRedundantCosts :: Journal -> Either String Journal
journalMarkRedundantCosts j = do
ts <- mapM (transactionInferCostsFromEquity True $ jaccounttypes j) $ jtxns j
return j{jtxns=ts}
ts <- mapM (transactionInferCostsFromEquity True conversionaccts) $ jtxns j
return j{jtxns=ts}
where conversionaccts = journalConversionAccounts j
-- -- | Get this journal's unique, display-preference-canonicalised commodities, by symbol.
-- journalCanonicalCommodities :: Journal -> M.Map String CommoditySymbol

View File

@ -219,11 +219,12 @@ builtinTags = [
-- | In each tranaction, check that any conversion postings occur in adjacent pairs.
journalCheckPairedConversionPostings :: Journal -> Either String ()
journalCheckPairedConversionPostings j =
mapM_ (transactionCheckPairedConversionPostings (jaccounttypes j)) $ jtxns j
mapM_ (transactionCheckPairedConversionPostings conversionaccts) $ jtxns j
where conversionaccts = journalConversionAccounts j
transactionCheckPairedConversionPostings :: M.Map AccountName AccountType -> Transaction -> Either String ()
transactionCheckPairedConversionPostings accttypes t =
case partitionAndCheckConversionPostings True accttypes (zip [0..] $ tpostings t) of
transactionCheckPairedConversionPostings :: [AccountName] -> Transaction -> Either String ()
transactionCheckPairedConversionPostings conversionaccts t =
case partitionAndCheckConversionPostings True conversionaccts (zip [0..] $ tpostings t) of
Left err -> Left $ T.unpack err
Right _ -> Right ()

View File

@ -274,6 +274,7 @@ type IdxPosting = (Int, Posting)
label s = ((s <> ": ")++)
-- | Add costs inferred from equity postings in this transaction.
-- The name(s) of conversion equity accounts should be provided.
-- For every adjacent pair of conversion postings, it will first search the postings
-- with costs to see if any match. If so, it will tag these as matched.
-- If no postings with costs match, it will then search the postings without costs,
@ -281,13 +282,13 @@ label s = ((s <> ": ")++)
-- If it finds a match, it will add a cost and then tag it.
-- If the first argument is true, do a dry run instead: identify and tag
-- the costful and conversion postings, but don't add costs.
transactionInferCostsFromEquity :: Bool -> M.Map AccountName AccountType -> Transaction -> Either String Transaction
transactionInferCostsFromEquity dryrun acctTypes t = first (annotateErrorWithTransaction t . T.unpack) $ do
transactionInferCostsFromEquity :: Bool -> [AccountName] -> Transaction -> Either String Transaction
transactionInferCostsFromEquity dryrun conversionaccts t = first (annotateErrorWithTransaction t . T.unpack) $ do
-- number the postings
let npostings = zip [0..] $ tpostings t
-- Identify all pairs of conversion postings and all other postings (with and without costs) in the transaction.
(conversionPairs, otherps) <- partitionAndCheckConversionPostings False acctTypes npostings
(conversionPairs, otherps) <- partitionAndCheckConversionPostings False conversionaccts npostings
-- Generate a pure function that can be applied to each of this transaction's postings,
-- possibly modifying it, to produce the following end result:
@ -434,13 +435,13 @@ dbgShowAmountPrecision a =
Precision n -> show n
NaturalPrecision -> show $ decimalPlaces $ normalizeDecimal $ aquantity a
-- Using the provided account types map, sort the given indexed postings
-- Given the names of conversion equity accounts, sort the given indexed postings
-- into three lists of posting numbers (stored in two pairs), like so:
-- (conversion postings, (costful other postings, costless other postings)).
-- A true first argument activates its secondary function: check that all
-- conversion postings occur in adjacent pairs, otherwise return an error.
partitionAndCheckConversionPostings :: Bool -> M.Map AccountName AccountType -> [IdxPosting] -> Either Text ( [(IdxPosting, IdxPosting)], ([IdxPosting], [IdxPosting]) )
partitionAndCheckConversionPostings check acctTypes =
partitionAndCheckConversionPostings :: Bool -> [AccountName] -> [IdxPosting] -> Either Text ( [(IdxPosting, IdxPosting)], ([IdxPosting], [IdxPosting]) )
partitionAndCheckConversionPostings check conversionaccts =
-- Left fold processes postings in parse order, so that eg inferred costs
-- will be added to the first (top-most) posting, not the last one.
foldlM select (([], ([], [])), Nothing)
@ -455,7 +456,7 @@ partitionAndCheckConversionPostings check acctTypes =
| isConversion p = Right (((lst, np):cs, others), Nothing)
| check = Left "Conversion postings must occur in adjacent pairs"
| otherwise = Right ((cs, (ps, np:os)), Nothing)
isConversion p = M.lookup (paccount p) acctTypes == Just Conversion
isConversion p = paccount p `elem` conversionaccts
hasCost p = isJust $ acost =<< postingSingleAmount p
-- | Get a posting's amount if it is single-commodity.

View File

@ -332,7 +332,9 @@ journalFinalise iopts@InputOpts{..} f txt pj = do
& journalStyleAmounts -- Infer and apply commodity styles (but don't round) - should be done early
<&> journalAddForecast (verbose_tags_) (forecastPeriod iopts pj) -- Add forecast transactions if enabled
<&> journalPostingsAddAccountTags -- Add account tags to postings, so they can be matched by auto postings.
>>= journalMarkRedundantCosts -- Mark redundant costs, to help journalBalanceTransactions ignore them
>>= journalMarkRedundantCosts -- Mark redundant costs, to help journalBalanceTransactions ignore them.
-- (Later, journalInferEquityFromCosts will do a similar pass, adding missing equity postings.)
>>= (if auto_ && not (null $ jtxnmodifiers pj)
then journalAddAutoPostings verbose_tags_ _ioDay balancingopts_ -- Add auto postings if enabled, and account tags if needed. Does preliminary transaction balancing.
else pure)