imp:journal: fix a slight pessimisation of include directives

Since 1.50.3, canonicalizePath was being called wastefully when
processing journals with many nested include files and/or many matches
for include glob paths. On a slow filesystem, with unusually
many includes, this might have been quite noticeable.

Now we canonicalise each file path just once as it is encountered,
avoiding the wasted IO work.
This commit is contained in:
Simon Michael 2025-12-10 19:14:39 -10:00
parent 0d336dc3f8
commit ebaabe4305
4 changed files with 17 additions and 15 deletions

View File

@ -245,7 +245,7 @@ journalConcat :: Journal -> Journal -> Journal
journalConcat j1 j2 = journalConcat j1 j2 =
let let
f1 = takeFileName $ journalFilePath j1 f1 = takeFileName $ journalFilePath j1
f2 = maybe "(unknown)" takeFileName $ headMay $ jincludefilestack j2 -- XXX more accurate than journalFilePath for some reason f2 = maybe "(unknown)" takeFileName $ fmap fst $ headMay $ jincludefilestack j2 -- XXX more accurate than journalFilePath for some reason
in in
dbgJournalAcctDeclOrder ("journalConcat: " <> f1 <> " <> " <> f2 <> ", acct decls renumbered: ") $ dbgJournalAcctDeclOrder ("journalConcat: " <> f1 <> " <> " <> f2 <> ", acct decls renumbered: ") $
journalRenumberAccountDeclarations $ journalRenumberAccountDeclarations $

View File

@ -633,8 +633,8 @@ data Journal = Journal {
,jparsealiases :: [AccountAlias] -- ^ the current account name aliases in effect, specified by alias directives (& options ?) ,jparsealiases :: [AccountAlias] -- ^ the current account name aliases in effect, specified by alias directives (& options ?)
-- ,jparsetransactioncount :: Integer -- ^ the current count of transactions parsed so far (only journal format txns, currently) -- ,jparsetransactioncount :: Integer -- ^ the current count of transactions parsed so far (only journal format txns, currently)
,jparsetimeclockentries :: [TimeclockEntry] -- ^ timeclock sessions which have not been clocked out ,jparsetimeclockentries :: [TimeclockEntry] -- ^ timeclock sessions which have not been clocked out
,jincludefilestack :: [FilePath] ,jincludefilestack :: [(FilePath, FilePath)] -- ^ (absolute path, canonical path) of included files, most recent first
-- principal data -- principal data
,jdeclaredpayees :: [(Payee,PayeeDeclarationInfo)] -- ^ Payees declared by payee directives, in parse order. ,jdeclaredpayees :: [(Payee,PayeeDeclarationInfo)] -- ^ Payees declared by payee directives, in parse order.
,jdeclaredtags :: [(TagName,TagDeclarationInfo)] -- ^ Tags declared by tag directives, in parse order. ,jdeclaredtags :: [(TagName,TagDeclarationInfo)] -- ^ Tags declared by tag directives, in parse order.
,jdeclaredaccounts :: [(AccountName,AccountDeclarationInfo)] -- ^ Accounts declared by account directives, in parse order. ,jdeclaredaccounts :: [(AccountName,AccountDeclarationInfo)] -- ^ Accounts declared by account directives, in parse order.

View File

@ -151,6 +151,7 @@ import Data.Time.Calendar (Day, fromGregorianValid, toGregorian)
import Data.Time.Clock.POSIX (getPOSIXTime) import Data.Time.Clock.POSIX (getPOSIXTime)
import Data.Time.LocalTime (LocalTime(..), TimeOfDay(..)) import Data.Time.LocalTime (LocalTime(..), TimeOfDay(..))
import Data.Word (Word8) import Data.Word (Word8)
import System.Directory (canonicalizePath)
import System.FilePath (takeFileName) import System.FilePath (takeFileName)
import System.IO (Handle) import System.IO (Handle)
import Text.Megaparsec import Text.Megaparsec
@ -314,11 +315,12 @@ parseAndFinaliseJournal parser iopts f txt =
-- Timeclock and Timedot files. -- Timeclock and Timedot files.
initialiseAndParseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts initialiseAndParseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts
-> FilePath -> Text -> ExceptT String IO Journal -> FilePath -> Text -> ExceptT String IO Journal
initialiseAndParseJournal parser iopts f txt = initialiseAndParseJournal parser iopts f txt = do
prettyParseErrors $ runParserT (evalStateT parser initJournal) f txt cf <- liftIO $ canonicalizePath f
prettyParseErrors $ runParserT (evalStateT parser (initJournal cf)) f txt
where where
y = first3 . toGregorian $ _ioDay iopts y = first3 . toGregorian $ _ioDay iopts
initJournal = nulljournal{jparsedefaultyear = Just y, jincludefilestack = [f]} initJournal cf = nulljournal{jparsedefaultyear = Just y, jincludefilestack = [(f, cf)]}
-- Flatten parse errors and final parse errors, and output each as a pretty String. -- Flatten parse errors and final parse errors, and output each as a pretty String.
prettyParseErrors :: ExceptT FinalParseError IO (Either (ParseErrorBundle Text HledgerParseErrorData) a) prettyParseErrors :: ExceptT FinalParseError IO (Either (ParseErrorBundle Text HledgerParseErrorData) a)
-> ExceptT String IO a -> ExceptT String IO a

View File

@ -410,12 +410,11 @@ includedirectivep iopts = do
-- Throw an error if one of these files is among the grandparent files, forming a cycle. -- Throw an error if one of these files is among the grandparent files, forming a cycle.
-- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ? -- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ?
-- We used to store the canonical paths, then switched to non-canonical paths for more useful output, -- Use canonical paths for cycle detection, show nominal absolute paths in error messages.
-- which means for each include directive we must re-canonicalise everything here; noticeable ? XXX
parentj <- get parentj <- get
let parentfiles = jincludefilestack parentj let parentfiles = jincludefilestack parentj
cparentfiles <- liftIO $ mapM canonicalizePath parentfiles cparentfiles = map snd parentfiles
let cparentf = take 1 parentfiles cparentf = take 1 cparentfiles
files2 <- forM files $ \f -> do files2 <- forM files $ \f -> do
cf <- liftIO $ canonicalizePath f cf <- liftIO $ canonicalizePath f
if if
@ -442,8 +441,9 @@ includedirectivep iopts = do
-- Read the file's content, or throw an error -- Read the file's content, or throw an error
childInput <- lift $ readFilePortably filepath & handleIOError off "failed to read a file" childInput <- lift $ readFilePortably filepath & handleIOError off "failed to read a file"
cfilepath <- liftIO $ canonicalizePath filepath
parentj <- get parentj <- get
let initChildj = newJournalWithParseStateFrom filepath parentj let initChildj = newJournalWithParseStateFrom filepath cfilepath parentj
-- Choose a reader based on the file path prefix or file extension, -- Choose a reader based on the file path prefix or file extension,
-- defaulting to JournalReader. Duplicating readJournal a bit here. -- defaulting to JournalReader. Duplicating readJournal a bit here.
@ -470,14 +470,14 @@ includedirectivep iopts = do
where where
childfilename = takeFileName filepath childfilename = takeFileName filepath
parentfilename = maybe "(unknown)" takeFileName $ headMay $ jincludefilestack parentj -- XXX more accurate than journalFilePath for some reason parentfilename = maybe "(unknown)" takeFileName $ fmap fst $ headMay $ jincludefilestack parentj -- XXX more accurate than journalFilePath for some reason
-- And update the current parse state. -- And update the current parse state.
put parentj' put parentj'
where where
newJournalWithParseStateFrom :: FilePath -> Journal -> Journal newJournalWithParseStateFrom :: FilePath -> FilePath -> Journal -> Journal
newJournalWithParseStateFrom filepath j = nulljournal{ newJournalWithParseStateFrom filepath cfilepath j = nulljournal{
jparsedefaultyear = jparsedefaultyear j jparsedefaultyear = jparsedefaultyear j
,jparsedefaultcommodity = jparsedefaultcommodity j ,jparsedefaultcommodity = jparsedefaultcommodity j
,jparseparentaccounts = jparseparentaccounts j ,jparseparentaccounts = jparseparentaccounts j
@ -486,7 +486,7 @@ includedirectivep iopts = do
,jdeclaredcommodities = jdeclaredcommodities j ,jdeclaredcommodities = jdeclaredcommodities j
-- ,jparsetransactioncount = jparsetransactioncount j -- ,jparsetransactioncount = jparsetransactioncount j
,jparsetimeclockentries = jparsetimeclockentries j ,jparsetimeclockentries = jparsetimeclockentries j
,jincludefilestack = filepath : jincludefilestack j ,jincludefilestack = (filepath, cfilepath) : jincludefilestack j
} }
-- Get the absolute path of the file referenced by this parse position. -- Get the absolute path of the file referenced by this parse position.