imp:journal: fix a slight pessimisation of include directives

Since 1.50.3, canonicalizePath was being called wastefully when
processing journals with many nested include files and/or many matches
for include glob paths. On a slow filesystem, with unusually
many includes, this might have been quite noticeable.

Now we canonicalise each file path just once as it is encountered,
avoiding the wasted IO work.
This commit is contained in:
Simon Michael 2025-12-10 19:14:39 -10:00
parent 0d336dc3f8
commit ebaabe4305
4 changed files with 17 additions and 15 deletions

View File

@ -245,7 +245,7 @@ journalConcat :: Journal -> Journal -> Journal
journalConcat j1 j2 =
let
f1 = takeFileName $ journalFilePath j1
f2 = maybe "(unknown)" takeFileName $ headMay $ jincludefilestack j2 -- XXX more accurate than journalFilePath for some reason
f2 = maybe "(unknown)" takeFileName $ fmap fst $ headMay $ jincludefilestack j2 -- XXX more accurate than journalFilePath for some reason
in
dbgJournalAcctDeclOrder ("journalConcat: " <> f1 <> " <> " <> f2 <> ", acct decls renumbered: ") $
journalRenumberAccountDeclarations $

View File

@ -633,8 +633,8 @@ data Journal = Journal {
,jparsealiases :: [AccountAlias] -- ^ the current account name aliases in effect, specified by alias directives (& options ?)
-- ,jparsetransactioncount :: Integer -- ^ the current count of transactions parsed so far (only journal format txns, currently)
,jparsetimeclockentries :: [TimeclockEntry] -- ^ timeclock sessions which have not been clocked out
,jincludefilestack :: [FilePath]
-- principal data
,jincludefilestack :: [(FilePath, FilePath)] -- ^ (absolute path, canonical path) of included files, most recent first
-- principal data
,jdeclaredpayees :: [(Payee,PayeeDeclarationInfo)] -- ^ Payees declared by payee directives, in parse order.
,jdeclaredtags :: [(TagName,TagDeclarationInfo)] -- ^ Tags declared by tag directives, in parse order.
,jdeclaredaccounts :: [(AccountName,AccountDeclarationInfo)] -- ^ Accounts declared by account directives, in parse order.

View File

@ -151,6 +151,7 @@ import Data.Time.Calendar (Day, fromGregorianValid, toGregorian)
import Data.Time.Clock.POSIX (getPOSIXTime)
import Data.Time.LocalTime (LocalTime(..), TimeOfDay(..))
import Data.Word (Word8)
import System.Directory (canonicalizePath)
import System.FilePath (takeFileName)
import System.IO (Handle)
import Text.Megaparsec
@ -314,11 +315,12 @@ parseAndFinaliseJournal parser iopts f txt =
-- Timeclock and Timedot files.
initialiseAndParseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts
-> FilePath -> Text -> ExceptT String IO Journal
initialiseAndParseJournal parser iopts f txt =
prettyParseErrors $ runParserT (evalStateT parser initJournal) f txt
initialiseAndParseJournal parser iopts f txt = do
cf <- liftIO $ canonicalizePath f
prettyParseErrors $ runParserT (evalStateT parser (initJournal cf)) f txt
where
y = first3 . toGregorian $ _ioDay iopts
initJournal = nulljournal{jparsedefaultyear = Just y, jincludefilestack = [f]}
initJournal cf = nulljournal{jparsedefaultyear = Just y, jincludefilestack = [(f, cf)]}
-- Flatten parse errors and final parse errors, and output each as a pretty String.
prettyParseErrors :: ExceptT FinalParseError IO (Either (ParseErrorBundle Text HledgerParseErrorData) a)
-> ExceptT String IO a

View File

@ -410,12 +410,11 @@ includedirectivep iopts = do
-- Throw an error if one of these files is among the grandparent files, forming a cycle.
-- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ?
-- We used to store the canonical paths, then switched to non-canonical paths for more useful output,
-- which means for each include directive we must re-canonicalise everything here; noticeable ? XXX
-- Use canonical paths for cycle detection, show nominal absolute paths in error messages.
parentj <- get
let parentfiles = jincludefilestack parentj
cparentfiles <- liftIO $ mapM canonicalizePath parentfiles
let cparentf = take 1 parentfiles
cparentfiles = map snd parentfiles
cparentf = take 1 cparentfiles
files2 <- forM files $ \f -> do
cf <- liftIO $ canonicalizePath f
if
@ -442,8 +441,9 @@ includedirectivep iopts = do
-- Read the file's content, or throw an error
childInput <- lift $ readFilePortably filepath & handleIOError off "failed to read a file"
cfilepath <- liftIO $ canonicalizePath filepath
parentj <- get
let initChildj = newJournalWithParseStateFrom filepath parentj
let initChildj = newJournalWithParseStateFrom filepath cfilepath parentj
-- Choose a reader based on the file path prefix or file extension,
-- defaulting to JournalReader. Duplicating readJournal a bit here.
@ -470,14 +470,14 @@ includedirectivep iopts = do
where
childfilename = takeFileName filepath
parentfilename = maybe "(unknown)" takeFileName $ headMay $ jincludefilestack parentj -- XXX more accurate than journalFilePath for some reason
parentfilename = maybe "(unknown)" takeFileName $ fmap fst $ headMay $ jincludefilestack parentj -- XXX more accurate than journalFilePath for some reason
-- And update the current parse state.
put parentj'
where
newJournalWithParseStateFrom :: FilePath -> Journal -> Journal
newJournalWithParseStateFrom filepath j = nulljournal{
newJournalWithParseStateFrom :: FilePath -> FilePath -> Journal -> Journal
newJournalWithParseStateFrom filepath cfilepath j = nulljournal{
jparsedefaultyear = jparsedefaultyear j
,jparsedefaultcommodity = jparsedefaultcommodity j
,jparseparentaccounts = jparseparentaccounts j
@ -486,7 +486,7 @@ includedirectivep iopts = do
,jdeclaredcommodities = jdeclaredcommodities j
-- ,jparsetransactioncount = jparsetransactioncount j
,jparsetimeclockentries = jparsetimeclockentries j
,jincludefilestack = filepath : jincludefilestack j
,jincludefilestack = (filepath, cfilepath) : jincludefilestack j
}
-- Get the absolute path of the file referenced by this parse position.