fix:journal:include: fix some regressions with glob matching in 1.50-1.50.3
Before 1.50, journal's include directive's handling of glob patterns (*, **, ?, etc.) had these limitations: - ** always searched intermediate dot directories - ** matched only directories, not files In 1.50-1.50.3, it had different limitations, some unintended: - it ignored all dot files, dot dirs, and symbolic links to dot dirs, even when explicitly mentioned in the pattern (unless using --old-glob) - it showed symbolic links dereferenced, eg in `hledger files` output Now it has fewer limitations, mainly this: - it ignores all dot files and dot dirs, even when explicitly mentioned (unless using --old-glob) Ie it no longer ignores symbolic links to dot dirs, and it no longer shows symbolic links dereferenced. Also: including the current file is now always harmless, whether using a glob pattern or not. Internally, file paths in the "include file stack" (jincludefilestack) are now just absolute, but not canonicalised; showing symbolic links un-dereferenced in output and error messages seems generally more useful. This might affect output elsewhere also. (Those paths are still canonicalised on the fly when checking for include cycles, not so efficiently: each time an include directive is parsed, all the current parent files and all the new glob-matched include files will be re-canonicalised. Hopefully this is unnoticeable.)
This commit is contained in:
parent
00f6a832d4
commit
81744d81a1
@ -32,6 +32,7 @@ Hledger.Read.Common, to avoid import cycles.
|
|||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE PackageImports #-}
|
{-# LANGUAGE PackageImports #-}
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE MultiWayIf #-}
|
||||||
|
|
||||||
--- ** exports
|
--- ** exports
|
||||||
module Hledger.Read.JournalReader (
|
module Hledger.Read.JournalReader (
|
||||||
@ -72,13 +73,14 @@ where
|
|||||||
|
|
||||||
--- ** imports
|
--- ** imports
|
||||||
import Control.Exception qualified as C
|
import Control.Exception qualified as C
|
||||||
import Control.Monad (forM_, when, void, unless, filterM)
|
import Control.Monad (forM_, when, void, unless, filterM, forM)
|
||||||
import Control.Monad.IO.Class (MonadIO, liftIO)
|
import Control.Monad.IO.Class (MonadIO, liftIO)
|
||||||
import Control.Monad.Except (ExceptT(..), runExceptT)
|
import Control.Monad.Except (ExceptT(..), runExceptT)
|
||||||
import Control.Monad.State.Strict (evalStateT,get,modify',put)
|
import Control.Monad.State.Strict (evalStateT,get,modify',put)
|
||||||
import Control.Monad.Trans.Class (lift)
|
import Control.Monad.Trans.Class (lift)
|
||||||
import Data.Char (toLower)
|
import Data.Char (toLower)
|
||||||
import Data.Either (isRight, lefts)
|
import Data.Either (isRight, lefts)
|
||||||
|
import Data.Functor ((<&>))
|
||||||
import Data.Map.Strict qualified as M
|
import Data.Map.Strict qualified as M
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.String
|
import Data.String
|
||||||
@ -91,6 +93,7 @@ import Safe
|
|||||||
import Text.Megaparsec hiding (parse)
|
import Text.Megaparsec hiding (parse)
|
||||||
import Text.Megaparsec.Char
|
import Text.Megaparsec.Char
|
||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
import System.Directory (canonicalizePath, doesFileExist, makeAbsolute)
|
||||||
import System.FilePath
|
import System.FilePath
|
||||||
import "Glob" System.FilePath.Glob hiding (match)
|
import "Glob" System.FilePath.Glob hiding (match)
|
||||||
-- import "filepattern" System.FilePattern.Directory
|
-- import "filepattern" System.FilePattern.Directory
|
||||||
@ -103,8 +106,6 @@ import Hledger.Read.CsvReader qualified as CsvReader (reader)
|
|||||||
import Hledger.Read.RulesReader qualified as RulesReader (reader)
|
import Hledger.Read.RulesReader qualified as RulesReader (reader)
|
||||||
import Hledger.Read.TimeclockReader qualified as TimeclockReader (reader)
|
import Hledger.Read.TimeclockReader qualified as TimeclockReader (reader)
|
||||||
import Hledger.Read.TimedotReader qualified as TimedotReader (reader)
|
import Hledger.Read.TimedotReader qualified as TimedotReader (reader)
|
||||||
import System.Directory (canonicalizePath, doesFileExist)
|
|
||||||
import Data.Functor ((<&>))
|
|
||||||
|
|
||||||
--- ** doctest setup
|
--- ** doctest setup
|
||||||
-- $setup
|
-- $setup
|
||||||
@ -341,7 +342,7 @@ includedirectivep iopts = do
|
|||||||
Just fmt -> map ((show fmt++":")++) paths
|
Just fmt -> map ((show fmt++":")++) paths
|
||||||
|
|
||||||
-- Parse each one, as if inlined here.
|
-- Parse each one, as if inlined here.
|
||||||
forM_ prefixedpaths $ parseIncludedFile iopts eoff
|
forM_ prefixedpaths $ parseIncludedFile iopts
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
@ -407,9 +408,30 @@ includedirectivep iopts = do
|
|||||||
-- Exclude any directories or symlinks to directories, and canonicalise, and sort.
|
-- Exclude any directories or symlinks to directories, and canonicalise, and sort.
|
||||||
files <- liftIO $
|
files <- liftIO $
|
||||||
filterM doesFileExist paths
|
filterM doesFileExist paths
|
||||||
>>= mapM canonicalizePath
|
>>= mapM makeAbsolute
|
||||||
<&> sort
|
<&> sort
|
||||||
|
|
||||||
|
-- -- If a glob was used, exclude the current file, for convenience.
|
||||||
|
-- let
|
||||||
|
-- files3 =
|
||||||
|
-- dbg6 (parentf <> " include: matched files" <> if isglob then " (excluding current file)" else "") $
|
||||||
|
-- (if isglob then filter (/= parentf) else id) files
|
||||||
|
|
||||||
|
-- Throw an error if one of these files is among the grandparent files, forming a cycle.
|
||||||
|
-- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ?
|
||||||
|
-- We used to store the canonical paths, then switched to non-canonical paths for more useful output,
|
||||||
|
-- which means for each include directive we must re-canonicalise everything here; noticeable ? XXX
|
||||||
|
parentj <- get
|
||||||
|
let parentfiles = jincludefilestack parentj
|
||||||
|
cparentfiles <- liftIO $ mapM canonicalizePath parentfiles
|
||||||
|
let cparentf = take 1 parentfiles
|
||||||
|
files2 <- forM files $ \f -> do
|
||||||
|
cf <- liftIO $ canonicalizePath f
|
||||||
|
if
|
||||||
|
| [cf] == cparentf -> return cf -- current file - return canonicalised, will be excluded later
|
||||||
|
| cf `elem` drop 1 cparentfiles -> customFailure $ parseErrorAt off $ "This included file forms a cycle: " ++ f
|
||||||
|
| otherwise -> return f
|
||||||
|
|
||||||
-- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs,
|
-- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs,
|
||||||
-- it does search dot dirs in between those two (Glob #49).
|
-- it does search dot dirs in between those two (Glob #49).
|
||||||
-- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree.
|
-- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree.
|
||||||
@ -418,38 +440,33 @@ includedirectivep iopts = do
|
|||||||
-- things in dot dirs. An --old-glob command line flag disables this workaround, for backward compatibility.
|
-- things in dot dirs. An --old-glob command line flag disables this workaround, for backward compatibility.
|
||||||
oldglobflag <- liftIO $ getFlag ["old-glob"]
|
oldglobflag <- liftIO $ getFlag ["old-glob"]
|
||||||
let
|
let
|
||||||
files2 = (if isglob && not oldglobflag then filter (not.hasdotdir) else id) files
|
files3 = (if isglob && not oldglobflag then filter (not.hasdotdir) else id) files2
|
||||||
where
|
where
|
||||||
hasdotdir p = any isdotdir $ splitPath p
|
hasdotdir p = any isdotdir $ splitPath p
|
||||||
where
|
where
|
||||||
isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c
|
isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c
|
||||||
|
|
||||||
-- Throw an error if no files were matched.
|
-- Throw an error if no files were matched.
|
||||||
when (null files2) $
|
when (null files3) $ customFailure $ parseErrorAt off $ "No files were matched by: " ++ globpattern
|
||||||
customFailure $ parseErrorAt off $ "No files were matched by glob pattern: " ++ globpattern
|
|
||||||
|
|
||||||
-- If a glob was used, exclude the current file, for convenience.
|
-- If the current file got included, ignore it.
|
||||||
|
-- This is done last to avoid triggering the error above.
|
||||||
let
|
let
|
||||||
files3 =
|
files4 =
|
||||||
dbg6 (parentf <> " include: matched files" <> if isglob then " (excluding current file)" else "") $
|
dbg6 (parentf <> " include: matched files (excluding current file)") $
|
||||||
(if isglob then filter (/= parentf) else id) files2
|
filter (not.(`elem` cparentf)) files3
|
||||||
|
|
||||||
return files3
|
return files4
|
||||||
|
|
||||||
-- Parse the given included file (and any deeper includes, recursively) as if it was inlined in the current (parent) file.
|
-- Parse the given included file (and any deeper includes, recursively) as if it was inlined in the current (parent) file.
|
||||||
-- The offset of the start of the include directive in the parent file is provided for error messages.
|
-- The offset of the start of the include directive in the parent file is provided for error messages.
|
||||||
parseIncludedFile :: MonadIO m => InputOpts -> Int -> PrefixedFilePath -> ErroringJournalParser m ()
|
parseIncludedFile :: MonadIO m => InputOpts -> PrefixedFilePath -> ErroringJournalParser m ()
|
||||||
parseIncludedFile iopts1 eoff prefixedpath = do
|
parseIncludedFile iopts1 prefixedpath = do
|
||||||
let (_mprefix,filepath) = splitReaderPrefix prefixedpath
|
let (_mprefix,filepath) = splitReaderPrefix prefixedpath
|
||||||
|
|
||||||
-- Throw an error if a cycle is detected
|
|
||||||
parentj <- get
|
|
||||||
let parentfilestack = jincludefilestack parentj
|
|
||||||
when (dbg7 "parseIncludedFile: reading" filepath `elem` parentfilestack) $
|
|
||||||
customFailure $ parseErrorAt eoff $ "This included file forms a cycle: " ++ filepath
|
|
||||||
|
|
||||||
-- Read the file's content, or throw an error
|
-- Read the file's content, or throw an error
|
||||||
childInput <- lift $ readFilePortably filepath `orRethrowIOError` "failed to read a file"
|
childInput <- lift $ readFilePortably filepath `orRethrowIOError` "failed to read a file"
|
||||||
|
parentj <- get
|
||||||
let initChildj = newJournalWithParseStateFrom filepath parentj
|
let initChildj = newJournalWithParseStateFrom filepath parentj
|
||||||
|
|
||||||
-- Choose a reader based on the file path prefix or file extension,
|
-- Choose a reader based on the file path prefix or file extension,
|
||||||
@ -496,13 +513,11 @@ includedirectivep iopts = do
|
|||||||
,jincludefilestack = filepath : jincludefilestack j
|
,jincludefilestack = filepath : jincludefilestack j
|
||||||
}
|
}
|
||||||
|
|
||||||
-- Get the canonical path of the file referenced by this parse position.
|
-- Get the absolute path of the file referenced by this parse position.
|
||||||
-- Symbolic links will be dereferenced. This probably will always succeed
|
-- (Symbolic links will not be dereferenced.)
|
||||||
-- (since the parse file's path is probably always absolute).
|
-- This probably will always succeed, since the parse file's path is probably always absolute.
|
||||||
sourcePosFilePath :: (MonadIO m) => SourcePos -> m FilePath
|
sourcePosFilePath :: (MonadIO m) => SourcePos -> m FilePath
|
||||||
sourcePosFilePath = liftIO . canonicalizePath . sourceName
|
sourcePosFilePath = liftIO . makeAbsolute . sourceName
|
||||||
-- "canonicalizePath is a very big hammer. If you only need an absolute path, makeAbsolute is sufficient"
|
|
||||||
-- but we only do this once per include directive, seems ok to leave it as is.
|
|
||||||
|
|
||||||
-- | Lift an IO action into the exception monad, rethrowing any IO
|
-- | Lift an IO action into the exception monad, rethrowing any IO
|
||||||
-- error with the given message prepended.
|
-- error with the given message prepended.
|
||||||
@ -1263,8 +1278,8 @@ tests_JournalReader = testGroup "JournalReader" [
|
|||||||
assertParse ignoredpricecommoditydirectivep "N $\n"
|
assertParse ignoredpricecommoditydirectivep "N $\n"
|
||||||
|
|
||||||
,testGroup "includedirectivep" [
|
,testGroup "includedirectivep" [
|
||||||
testCase "include" $ assertParseErrorE (includedirectivep definputopts) "include nosuchfile\n" "No files were matched by glob pattern: nosuchfile"
|
testCase "include" $ assertParseErrorE (includedirectivep definputopts) "include nosuchfile\n" "No files were matched by: nosuchfile"
|
||||||
,testCase "glob" $ assertParseErrorE (includedirectivep definputopts) "include nosuchfile*\n" "No files were matched by glob pattern: nosuchfile*"
|
,testCase "glob" $ assertParseErrorE (includedirectivep definputopts) "include nosuchfile*\n" "No files were matched by: nosuchfile*"
|
||||||
]
|
]
|
||||||
|
|
||||||
,testCase "marketpricedirectivep" $ assertParseEq marketpricedirectivep
|
,testCase "marketpricedirectivep" $ assertParseEq marketpricedirectivep
|
||||||
|
|||||||
@ -35,7 +35,7 @@ include ~/a.j
|
|||||||
$ HOME="$PWD" hledger -f - print
|
$ HOME="$PWD" hledger -f - print
|
||||||
2025-01-01 a
|
2025-01-01 a
|
||||||
|
|
||||||
>= 0
|
>=
|
||||||
|
|
||||||
# ** 4. include followed by whitespace and/or comment -> missing argument error
|
# ** 4. include followed by whitespace and/or comment -> missing argument error
|
||||||
<
|
<
|
||||||
@ -79,10 +79,9 @@ $ hledger -f- files
|
|||||||
>2 /Invalid glob/
|
>2 /Invalid glob/
|
||||||
>=1
|
>=1
|
||||||
|
|
||||||
# ** 10. Including the current file literally -> cycle error.
|
# ** 10. Including the current file literally -> harmless, current file is ignored.
|
||||||
$ hledger -f self.j files
|
$ hledger -f self.j files | sed -E 's|.*/||'
|
||||||
>2 /included file forms a cycle/
|
self.j
|
||||||
>=1
|
|
||||||
|
|
||||||
# ** 11. Including a multi-file cycle, all literally -> cycle error.
|
# ** 11. Including a multi-file cycle, all literally -> cycle error.
|
||||||
$ hledger -f cycle/cycle.j files
|
$ hledger -f cycle/cycle.j files
|
||||||
@ -90,7 +89,7 @@ $ hledger -f cycle/cycle.j files
|
|||||||
>=1
|
>=1
|
||||||
|
|
||||||
# ** 12. Including the current file via glob -> harmless, globs ignore current file.
|
# ** 12. Including the current file via glob -> harmless, globs ignore current file.
|
||||||
$ hledger -f glob-self.j files | sed -E 's|.*hledger/test/journal/include/||'
|
$ hledger -f glob-self.j files | sed -E 's|.*/||'
|
||||||
glob-self.j
|
glob-self.j
|
||||||
|
|
||||||
# ** 13. Including a cycle, involving globs -> cycle error
|
# ** 13. Including a cycle, involving globs -> cycle error
|
||||||
@ -101,6 +100,7 @@ $ hledger -f cycle/globcycle.j files
|
|||||||
# ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or non-dot subdirs.
|
# ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or non-dot subdirs.
|
||||||
<
|
<
|
||||||
include glob-deep/**/*.j
|
include glob-deep/**/*.j
|
||||||
|
|
||||||
$ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|'
|
$ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|'
|
||||||
-
|
-
|
||||||
glob-deep/a.j
|
glob-deep/a.j
|
||||||
@ -112,8 +112,6 @@ glob-deep/b/bb/bb.j
|
|||||||
glob-deep/c/c.j
|
glob-deep/c/c.j
|
||||||
|
|
||||||
# ** 15. Simpler deep glob **.j -> same as above.
|
# ** 15. Simpler deep glob **.j -> same as above.
|
||||||
<
|
|
||||||
include glob-deep/**/*.j
|
|
||||||
$ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|'
|
$ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|'
|
||||||
-
|
-
|
||||||
glob-deep/a.j
|
glob-deep/a.j
|
||||||
@ -124,25 +122,18 @@ glob-deep/b/b.j
|
|||||||
glob-deep/b/bb/bb.j
|
glob-deep/b/bb/bb.j
|
||||||
glob-deep/c/c.j
|
glob-deep/c/c.j
|
||||||
|
|
||||||
# ** 16. --old-glob preserves pre-1.50 glob behaviour: avoiding dot things at top (.c/) and bottom (b/.d.j),
|
# ** 16. --old-glob preserves pre-1.50 glob behaviour: avoiding dot things at top (.c/)
|
||||||
# but searching intermediate dot dirs (b/.e/).
|
# and at bottom (b/.d.j), searching intermediate dot dirs (b/.e/).
|
||||||
<
|
<
|
||||||
include glob-dot/**.j
|
include glob-dot/**.j
|
||||||
|
|
||||||
$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||'
|
$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||'
|
||||||
-
|
-
|
||||||
a.j
|
a.j
|
||||||
b/.e/e.j
|
b/.e/e.j
|
||||||
b/b.j
|
b/b.j
|
||||||
|
|
||||||
# ** 17. Otherwise, globs avoid all paths involving dot dirs (.c/, b/.d.j, b/.e/) (a workaround).
|
# ** 17. Dot things can be matched by an explicit non-glob path.
|
||||||
<
|
|
||||||
include glob-dot/**.j
|
|
||||||
$ hledger -f - files | sed -E 's|.*/glob-dot/||'
|
|
||||||
-
|
|
||||||
a.j
|
|
||||||
b/b.j
|
|
||||||
|
|
||||||
# ** 18. A non-glob path can match dot things.
|
|
||||||
<
|
<
|
||||||
include glob-dot/.c/c.j
|
include glob-dot/.c/c.j
|
||||||
include glob-dot/b/.d.j
|
include glob-dot/b/.d.j
|
||||||
@ -153,27 +144,36 @@ $ hledger -f - files | sed -E 's|.*/glob-dot/||'
|
|||||||
b/.d.j
|
b/.d.j
|
||||||
b/.e/e.j
|
b/.e/e.j
|
||||||
|
|
||||||
# ** 19. A glob follows a symlink to a regular file (and shows the target file's path).
|
# ** 18. They can't be matched by a glob path, even if explicitly mentioned, by default.
|
||||||
|
<
|
||||||
|
include glob-dot/.c/c.j*
|
||||||
|
$ hledger -f - files | sed -E 's|.*/glob-dot/||'
|
||||||
|
>2 /No files were matched/
|
||||||
|
#>=1 # sed hides the exit code
|
||||||
|
|
||||||
|
# ** 19. They can be matched by an explicit glob path if --old-glob is used.
|
||||||
|
<
|
||||||
|
include glob-dot/.c/c.j*
|
||||||
|
$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||'
|
||||||
|
-
|
||||||
|
.c/c.j
|
||||||
|
|
||||||
|
# ** 20. A glob follows a symlink to a regular file. (And shows the symlink's path.)
|
||||||
$ hledger -f glob-symlinked-file/a.j files | sed -E 's|.*/glob-symlinked-file/||'
|
$ hledger -f glob-symlinked-file/a.j files | sed -E 's|.*/glob-symlinked-file/||'
|
||||||
a.j
|
a.j
|
||||||
c.j
|
b.j
|
||||||
|
|
||||||
# ** 20. A glob follows a symlink to a regular dir.
|
# ** 21. A glob follows a symlink to a regular dir.
|
||||||
$ hledger -f glob-symlinked-dir/a.j files | sed -E 's|.*/glob-symlinked-dir/||'
|
$ hledger -f glob-symlinked-dir/a.j files | sed -E 's|.*/glob-symlinked-dir/||'
|
||||||
a.j
|
a.j
|
||||||
c/c.j
|
b/c.j
|
||||||
|
|
||||||
# ** 21. A glob follows a symlink to a dot file.
|
# ** 22. A glob follows a symlink to a dot file.
|
||||||
$ hledger -f glob-symlinked-dotfile/a.j files | sed -E 's|.*/glob-symlinked-dotfile/||'
|
$ hledger -f glob-symlinked-dotfile/a.j files | sed -E 's|.*/glob-symlinked-dotfile/||'
|
||||||
a.j
|
a.j
|
||||||
.c.j
|
b.j
|
||||||
|
|
||||||
# ** 22. A glob does not follow a symlink to a dot dir.
|
# ** 23. A glob follows a symlink to a dot dir.
|
||||||
$ hledger -f glob-symlinked-dotdir/a.j files | sed -E 's|.*/glob-symlinked-dotdir/||'
|
$ hledger -f glob-symlinked-dotdir/a.j files | sed -E 's|.*/glob-symlinked-dotdir/||'
|
||||||
>2 /No files were matched by glob pattern/
|
|
||||||
#>=1 # sed hides this
|
|
||||||
|
|
||||||
# ** 23. A glob does follow a symlink to a dot dir if --old-glob is used.
|
|
||||||
$ hledger -f glob-symlinked-dotdir/a.j files --old-glob | sed -E 's|.*/glob-symlinked-dotdir/||'
|
|
||||||
a.j
|
a.j
|
||||||
.c/c.j
|
b/c.j
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user