This refactoring fixes an O(n^2) slowdown in the balance command with
large numbers of accounts. It's now speedy, and the implementation is
clearer. To facilitate this, the Account type now represents a tree of
accounts which can easily be traversed up or down (and/or flattened
into a list).
Benchmark on a 2010 macbook:
    +-------------------------------------------++--------------+------------+--------+
    |                                           || before:      | after:     |        |
    |                                           || hledger-0.18 | hledgeropt | ledger |
    +===========================================++==============+============+========+
    | -f data/100x100x10.journal     balance    ||         0.21 |       0.07 |   0.09 |
    | -f data/1000x1000x10.journal   balance    ||        10.13 |       0.47 |   0.62 |
    | -f data/1000x10000x10.journal  balance    ||        40.67 |       0.67 |   1.01 |
    | -f data/10000x1000x10.journal  balance    ||        15.01 |       3.22 |   2.36 |
    | -f data/10000x1000x10.journal  balance aa ||         4.77 |       4.40 |   2.33 |
    +-------------------------------------------++--------------+------------+--------+
		
	
			
		
			
				
	
	
		
			158 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			158 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
{-# LANGUAGE NoMonomorphismRestriction#-}
 | 
						|
{-|
 | 
						|
 | 
						|
'AccountName's are strings like @assets:cash:petty@, with multiple
 | 
						|
components separated by ':'.  From a set of these we derive the account
 | 
						|
hierarchy.
 | 
						|
 | 
						|
-}
 | 
						|
 | 
						|
module Hledger.Data.AccountName
 | 
						|
where
 | 
						|
import Data.List
 | 
						|
import Data.Tree
 | 
						|
import Test.HUnit
 | 
						|
import Text.Printf
 | 
						|
 | 
						|
import Hledger.Data.Types
 | 
						|
import Hledger.Utils
 | 
						|
 | 
						|
 | 
						|
 | 
						|
-- change to use a different separator for nested accounts
 | 
						|
acctsepchar = ':'
 | 
						|
 | 
						|
accountNameComponents :: AccountName -> [String]
 | 
						|
accountNameComponents = splitAtElement acctsepchar
 | 
						|
 | 
						|
accountNameFromComponents :: [String] -> AccountName
 | 
						|
accountNameFromComponents = concat . intersperse [acctsepchar]
 | 
						|
 | 
						|
accountLeafName :: AccountName -> String
 | 
						|
accountLeafName = last . accountNameComponents
 | 
						|
 | 
						|
accountNameLevel :: AccountName -> Int
 | 
						|
accountNameLevel "" = 0
 | 
						|
accountNameLevel a = length (filter (==acctsepchar) a) + 1
 | 
						|
 | 
						|
accountNameDrop :: Int -> AccountName -> AccountName
 | 
						|
accountNameDrop n = accountNameFromComponents . drop n . accountNameComponents
 | 
						|
 | 
						|
-- | ["a:b:c","d:e"] -> ["a","a:b","a:b:c","d","d:e"]
 | 
						|
expandAccountNames :: [AccountName] -> [AccountName]
 | 
						|
expandAccountNames as = nub $ concatMap expandAccountName as
 | 
						|
 | 
						|
-- | "a:b:c" -> ["a","a:b","a:b:c"]
 | 
						|
expandAccountName :: AccountName -> [AccountName]
 | 
						|
expandAccountName = map accountNameFromComponents . tail . inits . accountNameComponents
 | 
						|
 | 
						|
-- | ["a:b:c","d:e"] -> ["a","d"]
 | 
						|
topAccountNames :: [AccountName] -> [AccountName]
 | 
						|
topAccountNames as = [a | a <- expandAccountNames as, accountNameLevel a == 1]
 | 
						|
 | 
						|
parentAccountName :: AccountName -> AccountName
 | 
						|
parentAccountName = accountNameFromComponents . init . accountNameComponents
 | 
						|
 | 
						|
parentAccountNames :: AccountName -> [AccountName]
 | 
						|
parentAccountNames a = parentAccountNames' $ parentAccountName a
 | 
						|
    where
 | 
						|
      parentAccountNames' "" = []
 | 
						|
      parentAccountNames' a = a : parentAccountNames' (parentAccountName a)
 | 
						|
 | 
						|
isAccountNamePrefixOf :: AccountName -> AccountName -> Bool
 | 
						|
isAccountNamePrefixOf = isPrefixOf . (++ [acctsepchar])
 | 
						|
 | 
						|
isSubAccountNameOf :: AccountName -> AccountName -> Bool
 | 
						|
s `isSubAccountNameOf` p = 
 | 
						|
    (p `isAccountNamePrefixOf` s) && (accountNameLevel s == (accountNameLevel p + 1))
 | 
						|
 | 
						|
-- | From a list of account names, select those which are direct
 | 
						|
-- subaccounts of the given account name.
 | 
						|
subAccountNamesFrom :: [AccountName] -> AccountName -> [AccountName]
 | 
						|
subAccountNamesFrom accts a = filter (`isSubAccountNameOf` a) accts
 | 
						|
 | 
						|
-- | Convert a list of account names to a tree.
 | 
						|
accountNameTreeFrom :: [AccountName] -> Tree AccountName
 | 
						|
accountNameTreeFrom accts = 
 | 
						|
    Node "root" (accounttreesfrom (topAccountNames accts))
 | 
						|
        where
 | 
						|
          accounttreesfrom :: [AccountName] -> [Tree AccountName]
 | 
						|
          accounttreesfrom [] = []
 | 
						|
          accounttreesfrom as = [Node a (accounttreesfrom $ subs a) | a <- as]
 | 
						|
          subs = subAccountNamesFrom (expandAccountNames accts)
 | 
						|
 | 
						|
nullaccountnametree = Node "root" []
 | 
						|
 | 
						|
-- | Elide an account name to fit in the specified width.
 | 
						|
-- From the ledger 2.6 news:
 | 
						|
-- 
 | 
						|
-- @
 | 
						|
--   What Ledger now does is that if an account name is too long, it will
 | 
						|
--   start abbreviating the first parts of the account name down to two
 | 
						|
--   letters in length.  If this results in a string that is still too
 | 
						|
--   long, the front will be elided -- not the end.  For example:
 | 
						|
--
 | 
						|
--     Expenses:Cash           ; OK, not too long
 | 
						|
--     Ex:Wednesday:Cash       ; "Expenses" was abbreviated to fit
 | 
						|
--     Ex:We:Afternoon:Cash    ; "Expenses" and "Wednesday" abbreviated
 | 
						|
--     ; Expenses:Wednesday:Afternoon:Lunch:Snack:Candy:Chocolate:Cash
 | 
						|
--     ..:Af:Lu:Sn:Ca:Ch:Cash  ; Abbreviated and elided!
 | 
						|
-- @
 | 
						|
elideAccountName :: Int -> AccountName -> AccountName
 | 
						|
elideAccountName width s = 
 | 
						|
    elideLeft width $ accountNameFromComponents $ elideparts width [] $ accountNameComponents s
 | 
						|
      where
 | 
						|
        elideparts :: Int -> [String] -> [String] -> [String]
 | 
						|
        elideparts width done ss
 | 
						|
          | length (accountNameFromComponents $ done++ss) <= width = done++ss
 | 
						|
          | length ss > 1 = elideparts width (done++[take 2 $ head ss]) (tail ss)
 | 
						|
          | otherwise = done++ss
 | 
						|
 | 
						|
clipAccountName :: Int -> AccountName -> AccountName
 | 
						|
clipAccountName n = accountNameFromComponents . take n . accountNameComponents
 | 
						|
 | 
						|
-- | Convert an account name to a regular expression matching it and its subaccounts.
 | 
						|
accountNameToAccountRegex :: String -> String
 | 
						|
accountNameToAccountRegex "" = ""
 | 
						|
accountNameToAccountRegex a = printf "^%s(:|$)" a
 | 
						|
 | 
						|
-- | Convert an account name to a regular expression matching it but not its subaccounts.
 | 
						|
accountNameToAccountOnlyRegex :: String -> String
 | 
						|
accountNameToAccountOnlyRegex "" = ""
 | 
						|
accountNameToAccountOnlyRegex a = printf "^%s$" a
 | 
						|
 | 
						|
-- | Convert an exact account-matching regular expression to a plain account name.
 | 
						|
accountRegexToAccountName :: String -> String
 | 
						|
accountRegexToAccountName = regexReplace "^\\^(.*?)\\(:\\|\\$\\)$" "\\1"
 | 
						|
 | 
						|
-- | Does this string look like an exact account-matching regular expression ?
 | 
						|
isAccountRegex  :: String -> Bool
 | 
						|
isAccountRegex s = take 1 s == "^" && (take 5 $ reverse s) == ")$|:("
 | 
						|
 | 
						|
tests_Hledger_Data_AccountName = TestList
 | 
						|
 [
 | 
						|
  "accountNameTreeFrom" ~: do
 | 
						|
    accountNameTreeFrom ["a"]       `is` Node "root" [Node "a" []]
 | 
						|
    accountNameTreeFrom ["a","b"]   `is` Node "root" [Node "a" [], Node "b" []]
 | 
						|
    accountNameTreeFrom ["a","a:b"] `is` Node "root" [Node "a" [Node "a:b" []]]
 | 
						|
    accountNameTreeFrom ["a:b:c"]   `is` Node "root" [Node "a" [Node "a:b" [Node "a:b:c" []]]]
 | 
						|
 | 
						|
  ,"expandAccountNames" ~:
 | 
						|
    expandAccountNames ["assets:cash","assets:checking","expenses:vacation"] `is`
 | 
						|
     ["assets","assets:cash","assets:checking","expenses","expenses:vacation"]
 | 
						|
 | 
						|
  ,"isAccountNamePrefixOf" ~: do
 | 
						|
    "assets" `isAccountNamePrefixOf` "assets" `is` False
 | 
						|
    "assets" `isAccountNamePrefixOf` "assets:bank" `is` True
 | 
						|
    "assets" `isAccountNamePrefixOf` "assets:bank:checking" `is` True
 | 
						|
    "my assets" `isAccountNamePrefixOf` "assets:bank" `is` False
 | 
						|
 | 
						|
  ,"isSubAccountNameOf" ~: do
 | 
						|
    "assets" `isSubAccountNameOf` "assets" `is` False
 | 
						|
    "assets:bank" `isSubAccountNameOf` "assets" `is` True
 | 
						|
    "assets:bank:checking" `isSubAccountNameOf` "assets" `is` False
 | 
						|
    "assets:bank" `isSubAccountNameOf` "my assets" `is` False
 | 
						|
 | 
						|
 ]
 | 
						|
 |