diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index 4a06a09c3..cbc382e2e 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -32,6 +32,7 @@ Hledger.Read.Common, to avoid import cycles. {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PackageImports #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE MultiWayIf #-} --- ** exports module Hledger.Read.JournalReader ( @@ -72,13 +73,14 @@ where --- ** imports import Control.Exception qualified as C -import Control.Monad (forM_, when, void, unless, filterM) +import Control.Monad (forM_, when, void, unless, filterM, forM) import Control.Monad.IO.Class (MonadIO, liftIO) import Control.Monad.Except (ExceptT(..), runExceptT) import Control.Monad.State.Strict (evalStateT,get,modify',put) import Control.Monad.Trans.Class (lift) import Data.Char (toLower) import Data.Either (isRight, lefts) +import Data.Functor ((<&>)) import Data.Map.Strict qualified as M import Data.Text (Text) import Data.String @@ -91,6 +93,7 @@ import Safe import Text.Megaparsec hiding (parse) import Text.Megaparsec.Char import Text.Printf +import System.Directory (canonicalizePath, doesFileExist, makeAbsolute) import System.FilePath import "Glob" System.FilePath.Glob hiding (match) -- import "filepattern" System.FilePattern.Directory @@ -103,8 +106,6 @@ import Hledger.Read.CsvReader qualified as CsvReader (reader) import Hledger.Read.RulesReader qualified as RulesReader (reader) import Hledger.Read.TimeclockReader qualified as TimeclockReader (reader) import Hledger.Read.TimedotReader qualified as TimedotReader (reader) -import System.Directory (canonicalizePath, doesFileExist) -import Data.Functor ((<&>)) --- ** doctest setup -- $setup @@ -341,7 +342,7 @@ includedirectivep iopts = do Just fmt -> map ((show fmt++":")++) paths -- Parse each one, as if inlined here. - forM_ prefixedpaths $ parseIncludedFile iopts eoff + forM_ prefixedpaths $ parseIncludedFile iopts where @@ -407,9 +408,30 @@ includedirectivep iopts = do -- Exclude any directories or symlinks to directories, and canonicalise, and sort. files <- liftIO $ filterM doesFileExist paths - >>= mapM canonicalizePath + >>= mapM makeAbsolute <&> sort + -- -- If a glob was used, exclude the current file, for convenience. + -- let + -- files3 = + -- dbg6 (parentf <> " include: matched files" <> if isglob then " (excluding current file)" else "") $ + -- (if isglob then filter (/= parentf) else id) files + + -- Throw an error if one of these files is among the grandparent files, forming a cycle. + -- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ? + -- We used to store the canonical paths, then switched to non-canonical paths for more useful output, + -- which means for each include directive we must re-canonicalise everything here; noticeable ? XXX + parentj <- get + let parentfiles = jincludefilestack parentj + cparentfiles <- liftIO $ mapM canonicalizePath parentfiles + let cparentf = take 1 parentfiles + files2 <- forM files $ \f -> do + cf <- liftIO $ canonicalizePath f + if + | [cf] == cparentf -> return cf -- current file - return canonicalised, will be excluded later + | cf `elem` drop 1 cparentfiles -> customFailure $ parseErrorAt off $ "This included file forms a cycle: " ++ f + | otherwise -> return f + -- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs, -- it does search dot dirs in between those two (Glob #49). -- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree. @@ -418,38 +440,33 @@ includedirectivep iopts = do -- things in dot dirs. An --old-glob command line flag disables this workaround, for backward compatibility. oldglobflag <- liftIO $ getFlag ["old-glob"] let - files2 = (if isglob && not oldglobflag then filter (not.hasdotdir) else id) files + files3 = (if isglob && not oldglobflag then filter (not.hasdotdir) else id) files2 where hasdotdir p = any isdotdir $ splitPath p where isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c -- Throw an error if no files were matched. - when (null files2) $ - customFailure $ parseErrorAt off $ "No files were matched by glob pattern: " ++ globpattern + when (null files3) $ customFailure $ parseErrorAt off $ "No files were matched by: " ++ globpattern - -- If a glob was used, exclude the current file, for convenience. + -- If the current file got included, ignore it. + -- This is done last to avoid triggering the error above. let - files3 = - dbg6 (parentf <> " include: matched files" <> if isglob then " (excluding current file)" else "") $ - (if isglob then filter (/= parentf) else id) files2 + files4 = + dbg6 (parentf <> " include: matched files (excluding current file)") $ + filter (not.(`elem` cparentf)) files3 - return files3 + return files4 -- Parse the given included file (and any deeper includes, recursively) as if it was inlined in the current (parent) file. -- The offset of the start of the include directive in the parent file is provided for error messages. - parseIncludedFile :: MonadIO m => InputOpts -> Int -> PrefixedFilePath -> ErroringJournalParser m () - parseIncludedFile iopts1 eoff prefixedpath = do + parseIncludedFile :: MonadIO m => InputOpts -> PrefixedFilePath -> ErroringJournalParser m () + parseIncludedFile iopts1 prefixedpath = do let (_mprefix,filepath) = splitReaderPrefix prefixedpath - -- Throw an error if a cycle is detected - parentj <- get - let parentfilestack = jincludefilestack parentj - when (dbg7 "parseIncludedFile: reading" filepath `elem` parentfilestack) $ - customFailure $ parseErrorAt eoff $ "This included file forms a cycle: " ++ filepath - -- Read the file's content, or throw an error childInput <- lift $ readFilePortably filepath `orRethrowIOError` "failed to read a file" + parentj <- get let initChildj = newJournalWithParseStateFrom filepath parentj -- Choose a reader based on the file path prefix or file extension, @@ -496,13 +513,11 @@ includedirectivep iopts = do ,jincludefilestack = filepath : jincludefilestack j } --- Get the canonical path of the file referenced by this parse position. --- Symbolic links will be dereferenced. This probably will always succeed --- (since the parse file's path is probably always absolute). +-- Get the absolute path of the file referenced by this parse position. +-- (Symbolic links will not be dereferenced.) +-- This probably will always succeed, since the parse file's path is probably always absolute. sourcePosFilePath :: (MonadIO m) => SourcePos -> m FilePath -sourcePosFilePath = liftIO . canonicalizePath . sourceName --- "canonicalizePath is a very big hammer. If you only need an absolute path, makeAbsolute is sufficient" --- but we only do this once per include directive, seems ok to leave it as is. +sourcePosFilePath = liftIO . makeAbsolute . sourceName -- | Lift an IO action into the exception monad, rethrowing any IO -- error with the given message prepended. @@ -1263,8 +1278,8 @@ tests_JournalReader = testGroup "JournalReader" [ assertParse ignoredpricecommoditydirectivep "N $\n" ,testGroup "includedirectivep" [ - testCase "include" $ assertParseErrorE (includedirectivep definputopts) "include nosuchfile\n" "No files were matched by glob pattern: nosuchfile" - ,testCase "glob" $ assertParseErrorE (includedirectivep definputopts) "include nosuchfile*\n" "No files were matched by glob pattern: nosuchfile*" + testCase "include" $ assertParseErrorE (includedirectivep definputopts) "include nosuchfile\n" "No files were matched by: nosuchfile" + ,testCase "glob" $ assertParseErrorE (includedirectivep definputopts) "include nosuchfile*\n" "No files were matched by: nosuchfile*" ] ,testCase "marketpricedirectivep" $ assertParseEq marketpricedirectivep diff --git a/hledger/test/journal/include/include.test b/hledger/test/journal/include/include.test index 21f30ed52..252f6614b 100644 --- a/hledger/test/journal/include/include.test +++ b/hledger/test/journal/include/include.test @@ -35,7 +35,7 @@ include ~/a.j $ HOME="$PWD" hledger -f - print 2025-01-01 a ->= 0 +>= # ** 4. include followed by whitespace and/or comment -> missing argument error < @@ -79,10 +79,9 @@ $ hledger -f- files >2 /Invalid glob/ >=1 -# ** 10. Including the current file literally -> cycle error. -$ hledger -f self.j files ->2 /included file forms a cycle/ ->=1 +# ** 10. Including the current file literally -> harmless, current file is ignored. +$ hledger -f self.j files | sed -E 's|.*/||' +self.j # ** 11. Including a multi-file cycle, all literally -> cycle error. $ hledger -f cycle/cycle.j files @@ -90,7 +89,7 @@ $ hledger -f cycle/cycle.j files >=1 # ** 12. Including the current file via glob -> harmless, globs ignore current file. -$ hledger -f glob-self.j files | sed -E 's|.*hledger/test/journal/include/||' +$ hledger -f glob-self.j files | sed -E 's|.*/||' glob-self.j # ** 13. Including a cycle, involving globs -> cycle error @@ -101,6 +100,7 @@ $ hledger -f cycle/globcycle.j files # ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or non-dot subdirs. < include glob-deep/**/*.j + $ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|' - glob-deep/a.j @@ -112,8 +112,6 @@ glob-deep/b/bb/bb.j glob-deep/c/c.j # ** 15. Simpler deep glob **.j -> same as above. -< -include glob-deep/**/*.j $ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|' - glob-deep/a.j @@ -124,25 +122,18 @@ glob-deep/b/b.j glob-deep/b/bb/bb.j glob-deep/c/c.j -# ** 16. --old-glob preserves pre-1.50 glob behaviour: avoiding dot things at top (.c/) and bottom (b/.d.j), -# but searching intermediate dot dirs (b/.e/). +# ** 16. --old-glob preserves pre-1.50 glob behaviour: avoiding dot things at top (.c/) +# and at bottom (b/.d.j), searching intermediate dot dirs (b/.e/). < include glob-dot/**.j + $ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||' - a.j b/.e/e.j b/b.j -# ** 17. Otherwise, globs avoid all paths involving dot dirs (.c/, b/.d.j, b/.e/) (a workaround). -< -include glob-dot/**.j -$ hledger -f - files | sed -E 's|.*/glob-dot/||' -- -a.j -b/b.j - -# ** 18. A non-glob path can match dot things. +# ** 17. Dot things can be matched by an explicit non-glob path. < include glob-dot/.c/c.j include glob-dot/b/.d.j @@ -153,27 +144,36 @@ $ hledger -f - files | sed -E 's|.*/glob-dot/||' b/.d.j b/.e/e.j -# ** 19. A glob follows a symlink to a regular file (and shows the target file's path). +# ** 18. They can't be matched by a glob path, even if explicitly mentioned, by default. +< +include glob-dot/.c/c.j* +$ hledger -f - files | sed -E 's|.*/glob-dot/||' +>2 /No files were matched/ +#>=1 # sed hides the exit code + +# ** 19. They can be matched by an explicit glob path if --old-glob is used. +< +include glob-dot/.c/c.j* +$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||' +- +.c/c.j + +# ** 20. A glob follows a symlink to a regular file. (And shows the symlink's path.) $ hledger -f glob-symlinked-file/a.j files | sed -E 's|.*/glob-symlinked-file/||' a.j -c.j +b.j -# ** 20. A glob follows a symlink to a regular dir. +# ** 21. A glob follows a symlink to a regular dir. $ hledger -f glob-symlinked-dir/a.j files | sed -E 's|.*/glob-symlinked-dir/||' a.j -c/c.j +b/c.j -# ** 21. A glob follows a symlink to a dot file. +# ** 22. A glob follows a symlink to a dot file. $ hledger -f glob-symlinked-dotfile/a.j files | sed -E 's|.*/glob-symlinked-dotfile/||' a.j -.c.j +b.j -# ** 22. A glob does not follow a symlink to a dot dir. +# ** 23. A glob follows a symlink to a dot dir. $ hledger -f glob-symlinked-dotdir/a.j files | sed -E 's|.*/glob-symlinked-dotdir/||' ->2 /No files were matched by glob pattern/ -#>=1 # sed hides this - -# ** 23. A glob does follow a symlink to a dot dir if --old-glob is used. -$ hledger -f glob-symlinked-dotdir/a.j files --old-glob | sed -E 's|.*/glob-symlinked-dotdir/||' a.j -.c/c.j +b/c.j