lib: In sorting account names, perform lookups on HashSets and HashMaps,

rather than lists. This is probably not an enormous performance sink in real
situations, but it takes a huge amount of time and memory in our
benchmarks (specifically 10000x10000x10.journal).

For bal -f examples/10000x10000x10.journal, this results in
- A 23% reduction in heap allocation, from 27GiB to 21GiB
- A 33% reduction in (profiled) time running, from 26.5s to 17.9s
This commit is contained in:
Stephen Morgan 2021-02-22 22:45:36 +11:00 committed by Simon Michael
parent 13589aca2e
commit 522c8a6ad3

View File

@ -1,4 +1,6 @@
{-# LANGUAGE RecordWildCards, OverloadedStrings #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-| {-|
@ -9,12 +11,13 @@ account, and subaccounting-excluding and -including balances.
module Hledger.Data.Account module Hledger.Data.Account
where where
import qualified Data.HashSet as HS
import qualified Data.HashMap.Strict as HM
import Data.List (find, sortOn) import Data.List (find, sortOn)
import Data.List.Extra (groupSort, groupOn) import Data.List.Extra (groupOn)
import Data.Maybe (fromMaybe)
import Data.Ord (Down(..))
import qualified Data.Map as M import qualified Data.Map as M
import Safe (headMay, lookupJustDef) import Data.Ord (Down(..))
import Safe (headMay)
import Text.Printf import Text.Printf
import Hledger.Data.AccountName import Hledger.Data.AccountName
@ -63,12 +66,12 @@ nullacct = Account
accountsFromPostings :: [Posting] -> [Account] accountsFromPostings :: [Posting] -> [Account]
accountsFromPostings ps = accountsFromPostings ps =
let let
grouped = groupSort [(paccount p,pamount p) | p <- ps] summed = foldr (\p -> HM.insertWith addAndIncrement (paccount p) (1, pamount p)) mempty ps
counted = [(aname, length amts) | (aname, amts) <- grouped] where addAndIncrement (n, a) (m, b) = (n + m, a `maPlus` b)
summed = [(aname, maSum amts) | (aname, amts) <- grouped] -- always non-empty acctstree = accountTree "root" $ HM.keys summed
acctstree = accountTree "root" $ map fst summed acctswithebals = mapAccounts setnumpsebalance acctstree
acctswithnumps = mapAccounts setnumps acctstree where setnumps a = a{anumpostings=fromMaybe 0 $ lookup (aname a) counted} where setnumpsebalance a = a{anumpostings=numps, aebalance=total}
acctswithebals = mapAccounts setebalance acctswithnumps where setebalance a = a{aebalance=lookupJustDef nullmixedamt (aname a) summed} where (numps, total) = HM.lookupDefault (0, nullmixedamt) (aname a) summed
acctswithibals = sumAccounts acctswithebals acctswithibals = sumAccounts acctswithebals
acctswithparents = tieAccountParents acctswithibals acctswithparents = tieAccountParents acctswithibals
acctsflattened = flattenAccounts acctswithparents acctsflattened = flattenAccounts acctswithparents
@ -224,14 +227,14 @@ accountSetDeclarationInfo j a@Account{..} =
-- --
sortAccountNamesByDeclaration :: Journal -> Bool -> [AccountName] -> [AccountName] sortAccountNamesByDeclaration :: Journal -> Bool -> [AccountName] -> [AccountName]
sortAccountNamesByDeclaration j keepparents as = sortAccountNamesByDeclaration j keepparents as =
(if keepparents then id else filter (`elem` as)) $ -- maybe discard missing parents that were added (if keepparents then id else filter (`HS.member` HS.fromList as)) $ -- maybe discard missing parents that were added
map aname $ -- keep just the names map aname $ -- keep just the names
drop 1 $ -- drop the root node that was added drop 1 $ -- drop the root node that was added
flattenAccounts $ -- convert to an account list flattenAccounts $ -- convert to an account list
sortAccountTreeByDeclaration $ -- sort by declaration order (and name) sortAccountTreeByDeclaration $ -- sort by declaration order (and name)
mapAccounts (accountSetDeclarationInfo j) $ -- add declaration order info mapAccounts (accountSetDeclarationInfo j) $ -- add declaration order info
accountTree "root" -- convert to an account tree accountTree "root" -- convert to an account tree
as as
-- | Sort each group of siblings in an account tree by declaration order, then account name. -- | Sort each group of siblings in an account tree by declaration order, then account name.
-- So each group will contain first the declared accounts, -- So each group will contain first the declared accounts,