fix:journal:include: drop 1.50's exclusion of glob-matched dot paths [#2498]

1.50* attempted to work around Glob's implicit searching of non-top-level dot dirs.
This was overzealous; it meant that journal's include completely
excluded paths involving a glob and a dot dir or dot file anywhere in the path.

Now, the pre-1.50 behaviour is restored:
`*` and `**` won't implicitly match dot files or top-level dot directories.
They will implicitly search non-top-level dot directories, as before (#Glob#49).
This commit is contained in:
Simon Michael 2025-12-04 05:54:54 -10:00
parent ade7ff83eb
commit 8cd113389a
9 changed files with 26 additions and 81 deletions

View File

@ -322,9 +322,9 @@ includedirectivep iopts = do
when (null $ dbg6 (parentf <> " include: glob pattern") glb) errorNoArg when (null $ dbg6 (parentf <> " include: glob pattern") glb) errorNoArg
-- Find the file or glob-matched files (just the ones from this include directive), with some IO error checking. -- Find the file or glob-matched files (just the ones from this include directive), with some IO error checking.
paths <- findMatchedFiles eoff parentf glb
-- Also report whether a glob pattern was used, and not just a literal file path. -- Also report whether a glob pattern was used, and not just a literal file path.
-- (paths, isglob) <- findMatchedFiles off pos glb -- (paths, isglob) <- findMatchedFiles off pos glb
paths <- findMatchedFiles eoff parentf glb
-- XXX worth the trouble ? no -- XXX worth the trouble ? no
-- Comprehensively exclude files already processed. Some complexities here: -- Comprehensively exclude files already processed. Some complexities here:
@ -354,12 +354,8 @@ includedirectivep iopts = do
-- Converts ** without a slash to **/*, like zsh's GLOB_STAR_SHORT, so ** also matches file name parts. -- Converts ** without a slash to **/*, like zsh's GLOB_STAR_SHORT, so ** also matches file name parts.
-- Checks if any matched paths are directories and excludes those. -- Checks if any matched paths are directories and excludes those.
-- Converts all matched paths to their canonical form. -- Converts all matched paths to their canonical form.
-- -- Note * and ** mostly won't implicitly match dot files or dot directories,
-- Glob patterns never match dot files or files under dot directories, -- but ** will implicitly search non-top-level dot directories (see #2498, Glob#49).
-- even if it seems like they should; this is a workaround for Glob bug #49.
-- This workaround is disabled if the --old-glob flag is present in the command line
-- (detected with unsafePerformIO; it's not worth a ton of boilerplate).
-- In that case, be aware ** recursive globs will search intermediate dot directories.
findMatchedFiles :: (MonadIO m) => Int -> FilePath -> FilePath -> JournalParser m [FilePath] findMatchedFiles :: (MonadIO m) => Int -> FilePath -> FilePath -> JournalParser m [FilePath]
findMatchedFiles off parentf globpattern = do findMatchedFiles off parentf globpattern = do
@ -375,8 +371,8 @@ includedirectivep iopts = do
-- * at the start of a file name ignores dot-named files and directories, by default. -- * at the start of a file name ignores dot-named files and directories, by default.
-- ** (or zero or more consecutive *'s) not followed by slash is equivalent to *. -- ** (or zero or more consecutive *'s) not followed by slash is equivalent to *.
-- A **/ component matches any number of directory parts. -- A **/ component matches any number of directory parts.
-- A **/ ignores dot-named directories in its starting and ending directories, by default. -- A **/ does not implicitly search top-level dot directories or implicitly match do files,
-- But **/ does search intermediate dot-named directories. Eg it can find a/.b/c. -- but it does search non-top-level dot directories. Eg ** will find the c file in a/.b/c.
-- expand a tilde at the start of the glob pattern, or throw an error -- expand a tilde at the start of the glob pattern, or throw an error
expandedglob <- lift $ expandHomePath globpattern `orRethrowIOError` "failed to expand ~" expandedglob <- lift $ expandHomePath globpattern `orRethrowIOError` "failed to expand ~"
@ -400,7 +396,6 @@ includedirectivep iopts = do
g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob' of g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob' of
Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e
Right x -> pure x Right x -> pure x
let isglob = not $ isLiteral g
-- Find all matched paths. These might include directories or the current file. -- Find all matched paths. These might include directories or the current file.
paths <- liftIO $ globDir1 g cwd paths <- liftIO $ globDir1 g cwd
@ -411,12 +406,6 @@ includedirectivep iopts = do
>>= mapM makeAbsolute >>= mapM makeAbsolute
<&> sort <&> sort
-- -- If a glob was used, exclude the current file, for convenience.
-- let
-- files3 =
-- dbg6 (parentf <> " include: matched files" <> if isglob then " (excluding current file)" else "") $
-- (if isglob then filter (/= parentf) else id) files
-- Throw an error if one of these files is among the grandparent files, forming a cycle. -- Throw an error if one of these files is among the grandparent files, forming a cycle.
-- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ? -- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ?
-- We used to store the canonical paths, then switched to non-canonical paths for more useful output, -- We used to store the canonical paths, then switched to non-canonical paths for more useful output,
@ -432,31 +421,16 @@ includedirectivep iopts = do
| cf `elem` drop 1 cparentfiles -> customFailure $ parseErrorAt off $ "This included file forms a cycle: " ++ f | cf `elem` drop 1 cparentfiles -> customFailure $ parseErrorAt off $ "This included file forms a cycle: " ++ f
| otherwise -> return f | otherwise -> return f
-- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs,
-- it does search dot dirs in between those two (Glob #49).
-- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree.
-- We work around as follows: when any glob was used, paths involving dot dirs are excluded in post processing.
-- Unfortunately this means valid globs like .dotdir/* can't be used; only literal paths can match
-- things in dot dirs. An --old-glob command line flag disables this workaround, for backward compatibility.
oldglobflag <- liftIO $ getFlag ["old-glob"]
let
files3 = (if isglob && not oldglobflag then filter (not.hasdotdir) else id) files2
where
hasdotdir p = any isdotdir $ splitPath p
where
isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c
-- Throw an error if no files were matched. -- Throw an error if no files were matched.
when (null files3) $ customFailure $ parseErrorAt off $ "No files were matched by: " ++ globpattern when (null files2) $ customFailure $ parseErrorAt off $ "No files were matched by: " ++ globpattern
-- If the current file got included, ignore it. -- If the current file got included, ignore it (last, to avoid triggering the error above).
-- This is done last to avoid triggering the error above.
let let
files4 = files3 =
dbg6 (parentf <> " include: matched files (excluding current file)") $ dbg6 (parentf <> " include: matched files (excluding current file)") $
filter (not.(`elem` cparentf)) files3 filter (not.(`elem` cparentf)) files2
return files4 return files3
-- Parse the given included file (and any deeper includes, recursively) as if it was inlined in the current (parent) file. -- Parse the given included file (and any deeper includes, recursively) as if it was inlined in the current (parent) file.
-- The offset of the start of the include directive in the parent file is provided for error messages. -- The offset of the start of the include directive in the parent file is provided for error messages.

View File

@ -308,7 +308,7 @@ hiddenflagsformainmode = [
,flagNone ["anon"] (setboolopt "anon") "deprecated, renamed to --obfuscate" -- #2133, handled by anonymiseByOpts ,flagNone ["anon"] (setboolopt "anon") "deprecated, renamed to --obfuscate" -- #2133, handled by anonymiseByOpts
,flagNone ["obfuscate"] (setboolopt "obfuscate") "slightly obfuscate hledger's output. Warning, does not give privacy. Formerly --anon." -- #2133, handled by maybeObfuscate ,flagNone ["obfuscate"] (setboolopt "obfuscate") "slightly obfuscate hledger's output. Warning, does not give privacy. Formerly --anon." -- #2133, handled by maybeObfuscate
,flagNone ["old-timeclock", "timeclock-old"] (setboolopt "oldtimeclock") "don't pair timeclock entries by account name" ,flagNone ["old-timeclock", "timeclock-old"] (setboolopt "oldtimeclock") "don't pair timeclock entries by account name"
,flagNone ["old-glob"] (setboolopt "oldglob") "don't always exclude dot files/dirs to work around Glob bug" ,flagNone ["old-glob"] (setboolopt "oldglob") "deprecated, no longer used as of 1.50.4" -- #2498
,flagReq ["rules-file"] (\s opts -> Right $ setopt "rules" s opts) "RULESFILE" "was renamed to --rules" ,flagReq ["rules-file"] (\s opts -> Right $ setopt "rules" s opts) "RULESFILE" "was renamed to --rules"
] ]

View File

@ -2625,9 +2625,9 @@ So, you can do
- `include **.journal` to include all other journal files in this directory and below (excluding dot directories/files) - `include **.journal` to include all other journal files in this directory and below (excluding dot directories/files)
- `include timelogs/2???.timedot` to include all timedot files named like a year number. - `include timelogs/2???.timedot` to include all timedot files named like a year number.
There is a limitation: hledger's globs always exclude paths involving dot files or dot directories. Note `*` and `**` mostly won't implicitly match dot files or dot directories,
This is a workaround for unavoidable dot directory traversal; but `**` does implicitly search non-top-level dot directories.
you can disable it and revert to older behaviour with the `--old-glob` flag, for now. If this causes problems, make your glob pattern more specific (eg `**.journal` instead of `**`).
If you are using many, or deeply nested, include files, and have an error that's hard to pinpoint: If you are using many, or deeply nested, include files, and have an error that's hard to pinpoint:
a good troubleshooting command is `hledger files --debug=6` (or 7). a good troubleshooting command is `hledger files --debug=6` (or 7).

View File

@ -97,7 +97,8 @@ $ hledger -f cycle/globcycle.j files
>2 /cycle/ >2 /cycle/
>=1 >=1
# ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or non-dot subdirs. # ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or subdirs,
# excluding top-level dot dirs.
< <
include glob-deep/**/*.j include glob-deep/**/*.j
@ -105,9 +106,12 @@ $ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|'
- -
glob-deep/a.j glob-deep/a.j
glob-deep/b/b.j glob-deep/b/b.j
glob-deep/b/bb/.dotdir/dotdirbb.j
glob-deep/b/bb/bb.j glob-deep/b/bb/bb.j
glob-deep/c/c.j glob-deep/c/c.j
glob-deep/b/.dotdir/dotdirb.j
glob-deep/b/b.j glob-deep/b/b.j
glob-deep/b/bb/.dotdir/dotdirbb.j
glob-deep/b/bb/bb.j glob-deep/b/bb/bb.j
glob-deep/c/c.j glob-deep/c/c.j
@ -116,64 +120,31 @@ $ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|'
- -
glob-deep/a.j glob-deep/a.j
glob-deep/b/b.j glob-deep/b/b.j
glob-deep/b/bb/.dotdir/dotdirbb.j
glob-deep/b/bb/bb.j glob-deep/b/bb/bb.j
glob-deep/c/c.j glob-deep/c/c.j
glob-deep/b/.dotdir/dotdirb.j
glob-deep/b/b.j glob-deep/b/b.j
glob-deep/b/bb/.dotdir/dotdirbb.j
glob-deep/b/bb/bb.j glob-deep/b/bb/bb.j
glob-deep/c/c.j glob-deep/c/c.j
# ** 16. --old-glob preserves pre-1.50 glob behaviour: avoiding dot things at top (.c/) # ** 16. A glob follows a symlink to a regular file. (And shows the symlink's path.)
# and at bottom (b/.d.j), searching intermediate dot dirs (b/.e/).
<
include glob-dot/**.j
$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||'
-
a.j
b/.e/e.j
b/b.j
# ** 17. Dot things can be matched by an explicit non-glob path.
<
include glob-dot/.c/c.j
include glob-dot/b/.d.j
include glob-dot/b/.e/e.j
$ hledger -f - files | sed -E 's|.*/glob-dot/||'
-
.c/c.j
b/.d.j
b/.e/e.j
# ** 18. They can't be matched by a glob path, even if explicitly mentioned, by default.
<
include glob-dot/.c/c.j*
$ hledger -f - files | sed -E 's|.*/glob-dot/||'
>2 /No files were matched/
#>=1 # sed hides the exit code
# ** 19. They can be matched by an explicit glob path if --old-glob is used.
<
include glob-dot/.c/c.j*
$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||'
-
.c/c.j
# ** 20. A glob follows a symlink to a regular file. (And shows the symlink's path.)
$ hledger -f glob-symlinked-file/a.j files | sed -E 's|.*/glob-symlinked-file/||' $ hledger -f glob-symlinked-file/a.j files | sed -E 's|.*/glob-symlinked-file/||'
a.j a.j
b.j b.j
# ** 21. A glob follows a symlink to a regular dir. # ** 17. A glob follows a symlink to a regular dir.
$ hledger -f glob-symlinked-dir/a.j files | sed -E 's|.*/glob-symlinked-dir/||' $ hledger -f glob-symlinked-dir/a.j files | sed -E 's|.*/glob-symlinked-dir/||'
a.j a.j
b/c.j b/c.j
# ** 22. A glob follows a symlink to a dot file. # ** 18. A glob follows a symlink to a dot file.
$ hledger -f glob-symlinked-dotfile/a.j files | sed -E 's|.*/glob-symlinked-dotfile/||' $ hledger -f glob-symlinked-dotfile/a.j files | sed -E 's|.*/glob-symlinked-dotfile/||'
a.j a.j
b.j b.j
# ** 23. A glob follows a symlink to a dot dir. # ** 19. A glob follows a symlink to a dot dir.
$ hledger -f glob-symlinked-dotdir/a.j files | sed -E 's|.*/glob-symlinked-dotdir/||' $ hledger -f glob-symlinked-dotdir/a.j files | sed -E 's|.*/glob-symlinked-dotdir/||'
a.j a.j
b/c.j b/c.j