fix: really fix slowdown with many accounts [#2153]

The previous #2153 fix used accountNameTreeFrom, but it turns out this
has always had O(n^2) performance, so our tests with 10k accounts ran
even slower than before. Now it's faster, the main #2153 slowdown
should really be fixed, and other commands which build an account tree
should also be free of this slowdown when there are very many accounts.
This commit is contained in:
Simon Michael 2024-01-26 13:53:47 -10:00
parent a38af98c9e
commit 21adfe2c25

View File

@ -44,6 +44,7 @@ module Hledger.Data.AccountName (
,parentAccountNames ,parentAccountNames
,subAccountNamesFrom ,subAccountNamesFrom
,topAccountNames ,topAccountNames
,topAccountName
,unbudgetedAccountName ,unbudgetedAccountName
,accountNamePostingType ,accountNamePostingType
,accountNameWithoutPostingType ,accountNameWithoutPostingType
@ -67,13 +68,14 @@ import Data.MemoUgly (memo)
import qualified Data.Set as S import qualified Data.Set as S
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Tree (Tree(..)) import Data.Tree (Tree(..), unfoldTree)
import Safe import Safe
import Text.DocLayout (realLength) import Text.DocLayout (realLength)
import Hledger.Data.Types import Hledger.Data.Types hiding (asubs)
import Hledger.Utils import Hledger.Utils
import Data.Char (isDigit, isLetter) import Data.Char (isDigit, isLetter)
import Data.List (partition)
-- $setup -- $setup
-- >>> :set -XOverloadedStrings -- >>> :set -XOverloadedStrings
@ -234,6 +236,10 @@ expandAccountName = map accountNameFromComponents . NE.tail . NE.inits . account
topAccountNames :: [AccountName] -> [AccountName] topAccountNames :: [AccountName] -> [AccountName]
topAccountNames = filter ((1==) . accountNameLevel) . expandAccountNames topAccountNames = filter ((1==) . accountNameLevel) . expandAccountNames
-- | "a:b:c" -> "a"
topAccountName :: AccountName -> AccountName
topAccountName = T.takeWhile (/= acctsepchar)
parentAccountName :: AccountName -> AccountName parentAccountName :: AccountName -> AccountName
parentAccountName = accountNameFromComponents . init . accountNameComponents parentAccountName = accountNameFromComponents . init . accountNameComponents
@ -249,24 +255,28 @@ isAccountNamePrefixOf = T.isPrefixOf . (<> acctsep)
isSubAccountNameOf :: AccountName -> AccountName -> Bool isSubAccountNameOf :: AccountName -> AccountName -> Bool
s `isSubAccountNameOf` p = s `isSubAccountNameOf` p =
(p `isAccountNamePrefixOf` s) && (accountNameLevel s == (accountNameLevel p + 1)) (p `isAccountNamePrefixOf` s) && (accountNameLevel s == (accountNameLevel p + 1))
-- | From a list of account names, select those which are direct -- | From a list of account names, select those which are direct
-- subaccounts of the given account name. -- subaccounts of the given account name.
subAccountNamesFrom :: [AccountName] -> AccountName -> [AccountName] subAccountNamesFrom :: [AccountName] -> AccountName -> [AccountName]
subAccountNamesFrom accts a = filter (`isSubAccountNameOf` a) accts subAccountNamesFrom accts a = filter (`isSubAccountNameOf` a) accts
-- | Convert a list of account names to a tree. -- | Convert a list of account names to a tree, efficiently.
accountNameTreeFrom :: [AccountName] -> Tree AccountName accountNameTreeFrom :: [AccountName] -> Tree AccountName
accountNameTreeFrom accts = accountNameTreeFrom accts = unfoldTree grow ("root", expandAccountNames accts)
Node "root" (accounttreesfrom (topAccountNames accts)) where
where -- unfoldTree :: (b -> (a, [b])) -> b -> Tree a
accounttreesfrom :: [AccountName] -> [Tree AccountName] -- grow :: (b -> (a, [b]))
accounttreesfrom [] = [] -- a = AccountName - the label at each node of the tree
accounttreesfrom as = [Node a (accounttreesfrom $ subs a) | a <- as] -- b = (AccountName, [AccountName]) - the next node's account, and the accounts remaining to consume under it
subs = subAccountNamesFrom (expandAccountNames accts) grow :: ((AccountName, [AccountName]) -> (AccountName, [(AccountName, [AccountName])]))
grow (a,[]) = (a,[])
--nullaccountnametree = Node "root" [] grow (a,rest) = (a, [(s, filter (s `isAccountNamePrefixOf`) deepersubs) | s <- asubs])
where
(asubs, deepersubs) = partition (isChildOf a) rest
isChildOf "root" = (1==) . accountNameLevel
isChildOf acct = (`isSubAccountNameOf` acct)
-- | Elide an account name to fit in the specified width. -- | Elide an account name to fit in the specified width.
-- From the ledger 2.6 news: -- From the ledger 2.6 news: