hledger/hledger-lib/Hledger/Data/AccountName.hs
Simon Michael 00f22819ae balance report speedup
This refactoring fixes an O(n^2) slowdown in the balance command with
large numbers of accounts. It's now speedy, and the implementation is
clearer. To facilitate this, the Account type now represents a tree of
accounts which can easily be traversed up or down (and/or flattened
into a list).

Benchmark on a 2010 macbook:

    +-------------------------------------------++--------------+------------+--------+
    |                                           || before:      | after:     |        |
    |                                           || hledger-0.18 | hledgeropt | ledger |
    +===========================================++==============+============+========+
    | -f data/100x100x10.journal     balance    ||         0.21 |       0.07 |   0.09 |
    | -f data/1000x1000x10.journal   balance    ||        10.13 |       0.47 |   0.62 |
    | -f data/1000x10000x10.journal  balance    ||        40.67 |       0.67 |   1.01 |
    | -f data/10000x1000x10.journal  balance    ||        15.01 |       3.22 |   2.36 |
    | -f data/10000x1000x10.journal  balance aa ||         4.77 |       4.40 |   2.33 |
    +-------------------------------------------++--------------+------------+--------+
2012-10-21 17:18:18 +00:00

158 lines
6.1 KiB
Haskell

{-# LANGUAGE NoMonomorphismRestriction#-}
{-|
'AccountName's are strings like @assets:cash:petty@, with multiple
components separated by ':'. From a set of these we derive the account
hierarchy.
-}
module Hledger.Data.AccountName
where
import Data.List
import Data.Tree
import Test.HUnit
import Text.Printf
import Hledger.Data.Types
import Hledger.Utils
-- change to use a different separator for nested accounts
acctsepchar = ':'
accountNameComponents :: AccountName -> [String]
accountNameComponents = splitAtElement acctsepchar
accountNameFromComponents :: [String] -> AccountName
accountNameFromComponents = concat . intersperse [acctsepchar]
accountLeafName :: AccountName -> String
accountLeafName = last . accountNameComponents
accountNameLevel :: AccountName -> Int
accountNameLevel "" = 0
accountNameLevel a = length (filter (==acctsepchar) a) + 1
accountNameDrop :: Int -> AccountName -> AccountName
accountNameDrop n = accountNameFromComponents . drop n . accountNameComponents
-- | ["a:b:c","d:e"] -> ["a","a:b","a:b:c","d","d:e"]
expandAccountNames :: [AccountName] -> [AccountName]
expandAccountNames as = nub $ concatMap expandAccountName as
-- | "a:b:c" -> ["a","a:b","a:b:c"]
expandAccountName :: AccountName -> [AccountName]
expandAccountName = map accountNameFromComponents . tail . inits . accountNameComponents
-- | ["a:b:c","d:e"] -> ["a","d"]
topAccountNames :: [AccountName] -> [AccountName]
topAccountNames as = [a | a <- expandAccountNames as, accountNameLevel a == 1]
parentAccountName :: AccountName -> AccountName
parentAccountName = accountNameFromComponents . init . accountNameComponents
parentAccountNames :: AccountName -> [AccountName]
parentAccountNames a = parentAccountNames' $ parentAccountName a
where
parentAccountNames' "" = []
parentAccountNames' a = a : parentAccountNames' (parentAccountName a)
isAccountNamePrefixOf :: AccountName -> AccountName -> Bool
isAccountNamePrefixOf = isPrefixOf . (++ [acctsepchar])
isSubAccountNameOf :: AccountName -> AccountName -> Bool
s `isSubAccountNameOf` p =
(p `isAccountNamePrefixOf` s) && (accountNameLevel s == (accountNameLevel p + 1))
-- | From a list of account names, select those which are direct
-- subaccounts of the given account name.
subAccountNamesFrom :: [AccountName] -> AccountName -> [AccountName]
subAccountNamesFrom accts a = filter (`isSubAccountNameOf` a) accts
-- | Convert a list of account names to a tree.
accountNameTreeFrom :: [AccountName] -> Tree AccountName
accountNameTreeFrom accts =
Node "root" (accounttreesfrom (topAccountNames accts))
where
accounttreesfrom :: [AccountName] -> [Tree AccountName]
accounttreesfrom [] = []
accounttreesfrom as = [Node a (accounttreesfrom $ subs a) | a <- as]
subs = subAccountNamesFrom (expandAccountNames accts)
nullaccountnametree = Node "root" []
-- | Elide an account name to fit in the specified width.
-- From the ledger 2.6 news:
--
-- @
-- What Ledger now does is that if an account name is too long, it will
-- start abbreviating the first parts of the account name down to two
-- letters in length. If this results in a string that is still too
-- long, the front will be elided -- not the end. For example:
--
-- Expenses:Cash ; OK, not too long
-- Ex:Wednesday:Cash ; "Expenses" was abbreviated to fit
-- Ex:We:Afternoon:Cash ; "Expenses" and "Wednesday" abbreviated
-- ; Expenses:Wednesday:Afternoon:Lunch:Snack:Candy:Chocolate:Cash
-- ..:Af:Lu:Sn:Ca:Ch:Cash ; Abbreviated and elided!
-- @
elideAccountName :: Int -> AccountName -> AccountName
elideAccountName width s =
elideLeft width $ accountNameFromComponents $ elideparts width [] $ accountNameComponents s
where
elideparts :: Int -> [String] -> [String] -> [String]
elideparts width done ss
| length (accountNameFromComponents $ done++ss) <= width = done++ss
| length ss > 1 = elideparts width (done++[take 2 $ head ss]) (tail ss)
| otherwise = done++ss
clipAccountName :: Int -> AccountName -> AccountName
clipAccountName n = accountNameFromComponents . take n . accountNameComponents
-- | Convert an account name to a regular expression matching it and its subaccounts.
accountNameToAccountRegex :: String -> String
accountNameToAccountRegex "" = ""
accountNameToAccountRegex a = printf "^%s(:|$)" a
-- | Convert an account name to a regular expression matching it but not its subaccounts.
accountNameToAccountOnlyRegex :: String -> String
accountNameToAccountOnlyRegex "" = ""
accountNameToAccountOnlyRegex a = printf "^%s$" a
-- | Convert an exact account-matching regular expression to a plain account name.
accountRegexToAccountName :: String -> String
accountRegexToAccountName = regexReplace "^\\^(.*?)\\(:\\|\\$\\)$" "\\1"
-- | Does this string look like an exact account-matching regular expression ?
isAccountRegex :: String -> Bool
isAccountRegex s = take 1 s == "^" && (take 5 $ reverse s) == ")$|:("
tests_Hledger_Data_AccountName = TestList
[
"accountNameTreeFrom" ~: do
accountNameTreeFrom ["a"] `is` Node "root" [Node "a" []]
accountNameTreeFrom ["a","b"] `is` Node "root" [Node "a" [], Node "b" []]
accountNameTreeFrom ["a","a:b"] `is` Node "root" [Node "a" [Node "a:b" []]]
accountNameTreeFrom ["a:b:c"] `is` Node "root" [Node "a" [Node "a:b" [Node "a:b:c" []]]]
,"expandAccountNames" ~:
expandAccountNames ["assets:cash","assets:checking","expenses:vacation"] `is`
["assets","assets:cash","assets:checking","expenses","expenses:vacation"]
,"isAccountNamePrefixOf" ~: do
"assets" `isAccountNamePrefixOf` "assets" `is` False
"assets" `isAccountNamePrefixOf` "assets:bank" `is` True
"assets" `isAccountNamePrefixOf` "assets:bank:checking" `is` True
"my assets" `isAccountNamePrefixOf` "assets:bank" `is` False
,"isSubAccountNameOf" ~: do
"assets" `isSubAccountNameOf` "assets" `is` False
"assets:bank" `isSubAccountNameOf` "assets" `is` True
"assets:bank:checking" `isSubAccountNameOf` "assets" `is` False
"assets:bank" `isSubAccountNameOf` "my assets" `is` False
]