From 8cd113389aedb833447884ad328fe71fed56adb9 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Thu, 4 Dec 2025 05:54:54 -1000 Subject: [PATCH] fix:journal:include: drop 1.50's exclusion of glob-matched dot paths [#2498] 1.50* attempted to work around Glob's implicit searching of non-top-level dot dirs. This was overzealous; it meant that journal's include completely excluded paths involving a glob and a dot dir or dot file anywhere in the path. Now, the pre-1.50 behaviour is restored: `*` and `**` won't implicitly match dot files or top-level dot directories. They will implicitly search non-top-level dot directories, as before (#Glob#49). --- hledger-lib/Hledger/Read/JournalReader.hs | 46 ++++------------ hledger/Hledger/Cli/CliOptions.hs | 2 +- hledger/hledger.m4.md | 6 +-- .../{glob-dot/.c/c.j => glob-deep/.d/d.j} | 0 hledger/test/journal/include/glob-dot/a.j | 0 hledger/test/journal/include/glob-dot/b/.d.j | 0 .../test/journal/include/glob-dot/b/.e/e.j | 0 hledger/test/journal/include/glob-dot/b/b.j | 0 hledger/test/journal/include/include.test | 53 +++++-------------- 9 files changed, 26 insertions(+), 81 deletions(-) rename hledger/test/journal/include/{glob-dot/.c/c.j => glob-deep/.d/d.j} (100%) delete mode 100644 hledger/test/journal/include/glob-dot/a.j delete mode 100644 hledger/test/journal/include/glob-dot/b/.d.j delete mode 100644 hledger/test/journal/include/glob-dot/b/.e/e.j delete mode 100644 hledger/test/journal/include/glob-dot/b/b.j diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index cbc382e2e..1d7bc7e0a 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -322,9 +322,9 @@ includedirectivep iopts = do when (null $ dbg6 (parentf <> " include: glob pattern") glb) errorNoArg -- Find the file or glob-matched files (just the ones from this include directive), with some IO error checking. + paths <- findMatchedFiles eoff parentf glb -- Also report whether a glob pattern was used, and not just a literal file path. -- (paths, isglob) <- findMatchedFiles off pos glb - paths <- findMatchedFiles eoff parentf glb -- XXX worth the trouble ? no -- Comprehensively exclude files already processed. Some complexities here: @@ -354,12 +354,8 @@ includedirectivep iopts = do -- Converts ** without a slash to **/*, like zsh's GLOB_STAR_SHORT, so ** also matches file name parts. -- Checks if any matched paths are directories and excludes those. -- Converts all matched paths to their canonical form. - -- - -- Glob patterns never match dot files or files under dot directories, - -- even if it seems like they should; this is a workaround for Glob bug #49. - -- This workaround is disabled if the --old-glob flag is present in the command line - -- (detected with unsafePerformIO; it's not worth a ton of boilerplate). - -- In that case, be aware ** recursive globs will search intermediate dot directories. + -- Note * and ** mostly won't implicitly match dot files or dot directories, + -- but ** will implicitly search non-top-level dot directories (see #2498, Glob#49). findMatchedFiles :: (MonadIO m) => Int -> FilePath -> FilePath -> JournalParser m [FilePath] findMatchedFiles off parentf globpattern = do @@ -375,8 +371,8 @@ includedirectivep iopts = do -- * at the start of a file name ignores dot-named files and directories, by default. -- ** (or zero or more consecutive *'s) not followed by slash is equivalent to *. -- A **/ component matches any number of directory parts. - -- A **/ ignores dot-named directories in its starting and ending directories, by default. - -- But **/ does search intermediate dot-named directories. Eg it can find a/.b/c. + -- A **/ does not implicitly search top-level dot directories or implicitly match do files, + -- but it does search non-top-level dot directories. Eg ** will find the c file in a/.b/c. -- expand a tilde at the start of the glob pattern, or throw an error expandedglob <- lift $ expandHomePath globpattern `orRethrowIOError` "failed to expand ~" @@ -400,7 +396,6 @@ includedirectivep iopts = do g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob' of Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e Right x -> pure x - let isglob = not $ isLiteral g -- Find all matched paths. These might include directories or the current file. paths <- liftIO $ globDir1 g cwd @@ -411,12 +406,6 @@ includedirectivep iopts = do >>= mapM makeAbsolute <&> sort - -- -- If a glob was used, exclude the current file, for convenience. - -- let - -- files3 = - -- dbg6 (parentf <> " include: matched files" <> if isglob then " (excluding current file)" else "") $ - -- (if isglob then filter (/= parentf) else id) files - -- Throw an error if one of these files is among the grandparent files, forming a cycle. -- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ? -- We used to store the canonical paths, then switched to non-canonical paths for more useful output, @@ -432,31 +421,16 @@ includedirectivep iopts = do | cf `elem` drop 1 cparentfiles -> customFailure $ parseErrorAt off $ "This included file forms a cycle: " ++ f | otherwise -> return f - -- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs, - -- it does search dot dirs in between those two (Glob #49). - -- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree. - -- We work around as follows: when any glob was used, paths involving dot dirs are excluded in post processing. - -- Unfortunately this means valid globs like .dotdir/* can't be used; only literal paths can match - -- things in dot dirs. An --old-glob command line flag disables this workaround, for backward compatibility. - oldglobflag <- liftIO $ getFlag ["old-glob"] - let - files3 = (if isglob && not oldglobflag then filter (not.hasdotdir) else id) files2 - where - hasdotdir p = any isdotdir $ splitPath p - where - isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c - -- Throw an error if no files were matched. - when (null files3) $ customFailure $ parseErrorAt off $ "No files were matched by: " ++ globpattern + when (null files2) $ customFailure $ parseErrorAt off $ "No files were matched by: " ++ globpattern - -- If the current file got included, ignore it. - -- This is done last to avoid triggering the error above. + -- If the current file got included, ignore it (last, to avoid triggering the error above). let - files4 = + files3 = dbg6 (parentf <> " include: matched files (excluding current file)") $ - filter (not.(`elem` cparentf)) files3 + filter (not.(`elem` cparentf)) files2 - return files4 + return files3 -- Parse the given included file (and any deeper includes, recursively) as if it was inlined in the current (parent) file. -- The offset of the start of the include directive in the parent file is provided for error messages. diff --git a/hledger/Hledger/Cli/CliOptions.hs b/hledger/Hledger/Cli/CliOptions.hs index e4d5ceb0e..306d55e5f 100644 --- a/hledger/Hledger/Cli/CliOptions.hs +++ b/hledger/Hledger/Cli/CliOptions.hs @@ -308,7 +308,7 @@ hiddenflagsformainmode = [ ,flagNone ["anon"] (setboolopt "anon") "deprecated, renamed to --obfuscate" -- #2133, handled by anonymiseByOpts ,flagNone ["obfuscate"] (setboolopt "obfuscate") "slightly obfuscate hledger's output. Warning, does not give privacy. Formerly --anon." -- #2133, handled by maybeObfuscate ,flagNone ["old-timeclock", "timeclock-old"] (setboolopt "oldtimeclock") "don't pair timeclock entries by account name" - ,flagNone ["old-glob"] (setboolopt "oldglob") "don't always exclude dot files/dirs to work around Glob bug" + ,flagNone ["old-glob"] (setboolopt "oldglob") "deprecated, no longer used as of 1.50.4" -- #2498 ,flagReq ["rules-file"] (\s opts -> Right $ setopt "rules" s opts) "RULESFILE" "was renamed to --rules" ] diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index b0a6dbb09..cc44acaaa 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -2625,9 +2625,9 @@ So, you can do - `include **.journal` to include all other journal files in this directory and below (excluding dot directories/files) - `include timelogs/2???.timedot` to include all timedot files named like a year number. -There is a limitation: hledger's globs always exclude paths involving dot files or dot directories. -This is a workaround for unavoidable dot directory traversal; -you can disable it and revert to older behaviour with the `--old-glob` flag, for now. +Note `*` and `**` mostly won't implicitly match dot files or dot directories, +but `**` does implicitly search non-top-level dot directories. +If this causes problems, make your glob pattern more specific (eg `**.journal` instead of `**`). If you are using many, or deeply nested, include files, and have an error that's hard to pinpoint: a good troubleshooting command is `hledger files --debug=6` (or 7). diff --git a/hledger/test/journal/include/glob-dot/.c/c.j b/hledger/test/journal/include/glob-deep/.d/d.j similarity index 100% rename from hledger/test/journal/include/glob-dot/.c/c.j rename to hledger/test/journal/include/glob-deep/.d/d.j diff --git a/hledger/test/journal/include/glob-dot/a.j b/hledger/test/journal/include/glob-dot/a.j deleted file mode 100644 index e69de29bb..000000000 diff --git a/hledger/test/journal/include/glob-dot/b/.d.j b/hledger/test/journal/include/glob-dot/b/.d.j deleted file mode 100644 index e69de29bb..000000000 diff --git a/hledger/test/journal/include/glob-dot/b/.e/e.j b/hledger/test/journal/include/glob-dot/b/.e/e.j deleted file mode 100644 index e69de29bb..000000000 diff --git a/hledger/test/journal/include/glob-dot/b/b.j b/hledger/test/journal/include/glob-dot/b/b.j deleted file mode 100644 index e69de29bb..000000000 diff --git a/hledger/test/journal/include/include.test b/hledger/test/journal/include/include.test index 252f6614b..9a4835399 100644 --- a/hledger/test/journal/include/include.test +++ b/hledger/test/journal/include/include.test @@ -97,7 +97,8 @@ $ hledger -f cycle/globcycle.j files >2 /cycle/ >=1 -# ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or non-dot subdirs. +# ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or subdirs, +# excluding top-level dot dirs. < include glob-deep/**/*.j @@ -105,9 +106,12 @@ $ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|' - glob-deep/a.j glob-deep/b/b.j +glob-deep/b/bb/.dotdir/dotdirbb.j glob-deep/b/bb/bb.j glob-deep/c/c.j +glob-deep/b/.dotdir/dotdirb.j glob-deep/b/b.j +glob-deep/b/bb/.dotdir/dotdirbb.j glob-deep/b/bb/bb.j glob-deep/c/c.j @@ -116,64 +120,31 @@ $ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|' - glob-deep/a.j glob-deep/b/b.j +glob-deep/b/bb/.dotdir/dotdirbb.j glob-deep/b/bb/bb.j glob-deep/c/c.j +glob-deep/b/.dotdir/dotdirb.j glob-deep/b/b.j +glob-deep/b/bb/.dotdir/dotdirbb.j glob-deep/b/bb/bb.j glob-deep/c/c.j -# ** 16. --old-glob preserves pre-1.50 glob behaviour: avoiding dot things at top (.c/) -# and at bottom (b/.d.j), searching intermediate dot dirs (b/.e/). -< -include glob-dot/**.j - -$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||' -- -a.j -b/.e/e.j -b/b.j - -# ** 17. Dot things can be matched by an explicit non-glob path. -< -include glob-dot/.c/c.j -include glob-dot/b/.d.j -include glob-dot/b/.e/e.j -$ hledger -f - files | sed -E 's|.*/glob-dot/||' -- -.c/c.j -b/.d.j -b/.e/e.j - -# ** 18. They can't be matched by a glob path, even if explicitly mentioned, by default. -< -include glob-dot/.c/c.j* -$ hledger -f - files | sed -E 's|.*/glob-dot/||' ->2 /No files were matched/ -#>=1 # sed hides the exit code - -# ** 19. They can be matched by an explicit glob path if --old-glob is used. -< -include glob-dot/.c/c.j* -$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||' -- -.c/c.j - -# ** 20. A glob follows a symlink to a regular file. (And shows the symlink's path.) +# ** 16. A glob follows a symlink to a regular file. (And shows the symlink's path.) $ hledger -f glob-symlinked-file/a.j files | sed -E 's|.*/glob-symlinked-file/||' a.j b.j -# ** 21. A glob follows a symlink to a regular dir. +# ** 17. A glob follows a symlink to a regular dir. $ hledger -f glob-symlinked-dir/a.j files | sed -E 's|.*/glob-symlinked-dir/||' a.j b/c.j -# ** 22. A glob follows a symlink to a dot file. +# ** 18. A glob follows a symlink to a dot file. $ hledger -f glob-symlinked-dotfile/a.j files | sed -E 's|.*/glob-symlinked-dotfile/||' a.j b.j -# ** 23. A glob follows a symlink to a dot dir. +# ** 19. A glob follows a symlink to a dot dir. $ hledger -f glob-symlinked-dotdir/a.j files | sed -E 's|.*/glob-symlinked-dotdir/||' a.j b/c.j