imp:lib:valuation: optimize price lookup with pre-built indexes [#2511]

Replace O(n log n) re-sorting of all prices on every valuation date
with O(log n) indexed lookups. By pre-building sorted price indexes
once at startup using O(n log n) time, we avoid redundant work
during reports.

This significantly improves performance for --value=end,COMM with daily
reports over long periods and large price databases.

Implementation:
- PriceIndex maps commodity pairs to a Map from date to effective
  price, enabling O(log n) temporal lookups via M.lookupLE.
- DefaultValuationIndex provides efficient resolution of destination
  commodities using the same temporal logic.
- makePriceGraph is updated to consume these indexes.

Signed-off-by: Oleg Bulatov <oleg@bulatov.me>
This commit is contained in:
Oleg Bulatov 2025-12-30 00:02:52 +01:00 committed by Simon Michael
parent 1a5f10f928
commit abd7d60884

View File

@ -32,13 +32,13 @@ module Hledger.Data.Valuation (
where where
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import Data.Function ((&), on) import Data.Function ((&))
import Data.List (partition, intercalate, sortBy) import Data.List (partition, intercalate, sortBy)
import Data.List.Extra (nubSortBy)
import Data.Map qualified as M import Data.Map qualified as M
import Data.Set qualified as S import Data.Set qualified as S
import Data.Text qualified as T import Data.Text qualified as T
import Data.Time.Calendar (Day, fromGregorian) import Data.Time.Calendar (Day, fromGregorian)
import Data.Maybe (fromMaybe, mapMaybe)
import Data.MemoUgly (memo) import Data.MemoUgly (memo)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Safe (headMay, lastMay) import Safe (headMay, lastMay)
@ -84,6 +84,92 @@ valuationTypeValuationCommodity = \case
-- given date. -- given date.
type PriceOracle = (Day, CommoditySymbol, Maybe CommoditySymbol) -> Maybe (CommoditySymbol, Quantity) type PriceOracle = (Day, CommoditySymbol, Maybe CommoditySymbol) -> Maybe (CommoditySymbol, Quantity)
-- | An index of market prices for efficient lookup by commodity pair and date.
-- Maps each (from, to) commodity pair to a Map from date to the effective price,
-- where declared prices take precedence over inferred prices on the same day.
-- This allows O(log n) lookup per pair via M.lookupLE.
type PriceIndex = M.Map (CommoditySymbol, CommoditySymbol) (M.Map Day MarketPrice)
-- | Build a price index from declared and inferred market prices.
-- This is O(n log n) but done only once, enabling fast lookups later.
buildPriceIndex :: [MarketPrice] -> [MarketPrice] -> PriceIndex
buildPriceIndex declaredprices inferredprices =
let
-- Label each price with precedence (declared=True > inferred=False) and parse order
declaredprices' = [(mpdate p, True, i, p) | (i, p) <- zip [1..] declaredprices]
inferredprices' = [(mpdate p, False, i, p) | (i, p) <- zip [1..] inferredprices]
allprices = declaredprices' ++ inferredprices'
-- Group by commodity pair
grouped = M.fromListWith (++)
[((mpfrom p, mpto p), [(d, prec, order, p)]) | (d, prec, order, p) <- allprices]
-- Build inner Map: sort ascending by (date, prec, order), then M.fromList
-- keeps the last entry per date (highest precedence/parseorder wins)
buildInnerMap prices =
prices
& sortBy compare
& map (\(d, _, _, p) -> (d, p))
& M.fromList
in
M.map buildInnerMap grouped
-- | Look up effective prices for all commodity pairs at a given date using the index.
-- Returns at most one price per commodity pair: the latest price on or before the date.
-- O(pairs × log n) where n is the number of prices per pair.
lookupEffectivePricesFromIndex :: Day -> PriceIndex -> [MarketPrice]
lookupEffectivePricesFromIndex d idx =
mapMaybe (fmap snd . M.lookupLE d) (M.elems idx)
-- | Index for default valuation commodity lookup.
-- Maps source commodity to a map of (date -> destination), supporting O(log n) lookup
-- of the latest destination commodity on or before any given date.
type DefaultValuationIndex = M.Map CommoditySymbol (M.Map Day CommoditySymbol)
-- | Build an index for default valuation commodity lookup from a list of market prices.
buildDefaultValuationIndex :: [MarketPrice] -> DefaultValuationIndex
buildDefaultValuationIndex prices =
let
-- Label with parse order
labeled = [(mpfrom p, mpdate p, i, mpto p) | (i, p) <- zip [1..] prices]
-- Group by source commodity
grouped = M.fromListWith (++) [(from, [(d, ord, to)]) | (from, d, ord, to) <- labeled]
-- Build inner Map: sort by (date, parseorder), then M.fromList keeps last (highest parseorder per date)
buildInnerMap entries = M.fromList [(d, to) | (d, _, to) <- sortBy compare entries]
in
M.map buildInnerMap grouped
-- | Combined indexes for efficient price lookup.
data PriceIndexes = PriceIndexes
{ piForward :: !PriceIndex -- ^ Index for forward prices (declared + inferred)
, piDeclaredDefault :: !DefaultValuationIndex -- ^ Index for declared prices (for default valuation)
, piInferredDefault :: !DefaultValuationIndex -- ^ Index for inferred prices (fallback for default valuation)
}
-- | Build all price indexes from declared and inferred market prices.
-- This is O(n log n) but done only once.
buildPriceIndexes :: [MarketPrice] -> [MarketPrice] -> PriceIndexes
buildPriceIndexes declaredprices inferredprices = PriceIndexes
{ piForward = buildPriceIndex declaredprices inferredprices
, piDeclaredDefault = buildDefaultValuationIndex declaredprices
, piInferredDefault = buildDefaultValuationIndex inferredprices
}
-- | Look up default valuation commodities for all source commodities at a given date.
-- Fallback logic: declared at date d, then declared at any date, then inferred at date d.
lookupDefaultValuations :: Day -> PriceIndexes -> M.Map CommoditySymbol CommoditySymbol
lookupDefaultValuations d PriceIndexes{..} =
fromMaybe fallback (tryDeclaredAtDate <|> tryDeclaredLatest)
where
nonEmpty m = if M.null m then Nothing else Just m
tryDeclaredAtDate = nonEmpty $ lookupDefaults (Just d) piDeclaredDefault
tryDeclaredLatest = nonEmpty $ lookupDefaults Nothing piDeclaredDefault
fallback = lookupDefaults (Just d) piInferredDefault
lookupDefaults mdate = M.mapMaybe $ \innerMap ->
case mdate of
Nothing -> snd <$> M.lookupMax innerMap
Just dt -> snd <$> M.lookupLE dt innerMap
-- | Generate a price oracle (memoising price lookup function) from a -- | Generate a price oracle (memoising price lookup function) from a
-- journal's directive-declared and transaction-inferred market -- journal's directive-declared and transaction-inferred market
-- prices. For best performance, generate this only once per journal, -- prices. For best performance, generate this only once per journal,
@ -98,7 +184,9 @@ journalPriceOracle infer Journal{jpricedirectives, jinferredmarketprices} =
inferredprices = inferredprices =
(if infer then jinferredmarketprices else []) (if infer then jinferredmarketprices else [])
& dbg2Msg ("use prices inferred from costs? " <> if infer then "yes" else "no") & dbg2Msg ("use prices inferred from costs? " <> if infer then "yes" else "no")
makepricegraph = memo $ makePriceGraph declaredprices inferredprices -- Build indexes once for all lookups
indexes = buildPriceIndexes declaredprices inferredprices
makepricegraph = memo $ makePriceGraph indexes
in in
memo $ uncurry3 $ priceLookup makepricegraph memo $ uncurry3 $ priceLookup makepricegraph
@ -318,7 +406,7 @@ tests_priceLookup =
,p 2000 01 01 "E" 2 "D" ,p 2000 01 01 "E" 2 "D"
,p 2001 01 01 "A" 11 "B" ,p 2001 01 01 "A" 11 "B"
] ]
makepricegraph = makePriceGraph ps1 [] makepricegraph = makePriceGraph (buildPriceIndexes ps1 [])
in testCase "priceLookup" $ do in testCase "priceLookup" $ do
priceLookup makepricegraph (fromGregorian 1999 01 01) "A" Nothing @?= Nothing priceLookup makepricegraph (fromGregorian 1999 01 01) "A" Nothing @?= Nothing
priceLookup makepricegraph (fromGregorian 2000 01 01) "A" Nothing @?= Just ("B",10) priceLookup makepricegraph (fromGregorian 2000 01 01) "A" Nothing @?= Just ("B",10)
@ -466,8 +554,9 @@ prefix l = if null l then (""++) else ((l++": ")++)
-- the `--infer-market-prices` flag is used, then the price commodity from -- the `--infer-market-prices` flag is used, then the price commodity from
-- the latest transaction price for A on or before valuation date." -- the latest transaction price for A on or before valuation date."
-- --
makePriceGraph :: [MarketPrice] -> [MarketPrice] -> Day -> PriceGraph -- | Build the price graph using pre-built indexes for O(pairs × log n) lookup.
makePriceGraph alldeclaredprices allinferredprices d = makePriceGraph :: PriceIndexes -> Day -> PriceGraph
makePriceGraph indexes d =
dbg9 ("makePriceGraph "++show d) $ dbg9 ("makePriceGraph "++show d) $
PriceGraph{ PriceGraph{
pgDate = d pgDate = d
@ -478,10 +567,9 @@ makePriceGraph alldeclaredprices allinferredprices d =
where where
-- XXX logic duplicated in Hledger.Cli.Commands.Prices.prices, keep synced -- XXX logic duplicated in Hledger.Cli.Commands.Prices.prices, keep synced
-- prices in effect on date d, either declared or inferred -- get the latest effective price for each commodity pair on or before date d
visibledeclaredprices = dbg9 "visibledeclaredprices" $ filter ((<=d).mpdate) alldeclaredprices forwardprices = dbg9 "effective forward prices" $
visibleinferredprices = dbg9 "visibleinferredprices" $ filter ((<=d).mpdate) allinferredprices lookupEffectivePricesFromIndex d (piForward indexes)
forwardprices = effectiveMarketPrices visibledeclaredprices visibleinferredprices
-- infer any additional reverse prices not already declared or inferred -- infer any additional reverse prices not already declared or inferred
reverseprices = dbg9 "additional reverse prices" $ reverseprices = dbg9 "additional reverse prices" $
@ -492,42 +580,9 @@ makePriceGraph alldeclaredprices allinferredprices d =
forwardpairs = S.fromList [(mpfrom,mpto) | MarketPrice{..} <- forwardprices] forwardpairs = S.fromList [(mpfrom,mpto) | MarketPrice{..} <- forwardprices]
allprices = forwardprices ++ reverseprices allprices = forwardprices ++ reverseprices
-- determine a default valuation commodity for each source commodity -- use indexed lookup for default valuation commodities
-- somewhat but not quite like effectiveMarketPrices defaultdests = dbg9 "default valuation commodities" $
defaultdests = M.fromList [(mpfrom,mpto) | MarketPrice{..} <- pricesfordefaultcomms] lookupDefaultValuations d indexes
where
pricesfordefaultcomms = dbg9 "prices for choosing default valuation commodities, by date then parse order" $
ps
& zip [1..] -- label items with their parse order
& sortBy (compare `on` (\(parseorder,MarketPrice{..})->(mpdate,parseorder))) -- sort by increasing date then increasing parse order
& map snd -- discard labels
where
ps | not $ null visibledeclaredprices = visibledeclaredprices
| not $ null alldeclaredprices = alldeclaredprices
| otherwise = visibleinferredprices -- will be null without --infer-market-prices
-- | Given a list of P-declared market prices in parse order and a
-- list of transaction-inferred market prices in parse order, select
-- just the latest prices that are in effect for each commodity pair.
-- That is, for each commodity pair, the latest price by date then
-- parse order, with declared prices having precedence over inferred
-- prices on the same day.
effectiveMarketPrices :: [MarketPrice] -> [MarketPrice] -> [MarketPrice]
effectiveMarketPrices declaredprices inferredprices =
let
-- label each item with its same-day precedence, then parse order
declaredprices' = [(1, i, p) | (i,p) <- zip [1..] declaredprices]
inferredprices' = [(0, i, p) | (i,p) <- zip [1..] inferredprices]
in
dbg9 "effective forward prices" $
-- combine
declaredprices' ++ inferredprices'
-- sort by decreasing date then decreasing precedence then decreasing parse order
& sortBy (flip compare `on` (\(precedence,parseorder,mp)->(mpdate mp,precedence,parseorder)))
-- discard the sorting labels
& map third3
-- keep only the first (ie the newest, highest precedence, latest parsed) price for each pair
& nubSortBy (compare `on` (\(MarketPrice{..})->(mpfrom,mpto)))
marketPriceReverse :: MarketPrice -> MarketPrice marketPriceReverse :: MarketPrice -> MarketPrice
marketPriceReverse mp@MarketPrice{..} = marketPriceReverse mp@MarketPrice{..} =