From b7e35f84a21fea3e6517daea14c80c08685a080a Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Thu, 17 Jul 2025 08:00:08 -0700 Subject: [PATCH] imp: include: add hidden --old-glob flag to restore old dot behaviour This disables the workaround for Glob#49, allowing glob patterns to find dot files and traverse dot directories again (sometimes too much). --- hledger-lib/Hledger/Read/JournalReader.hs | 35 ++++++++++++++--------- hledger/Hledger/Cli/CliOptions.hs | 1 + hledger/test/journal/include/include.test | 8 +++++- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index abf65fa27..3343e4cf3 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -304,10 +304,8 @@ directivep = (do ) "directive" -- | Parse an include directive, and the file(s) it refers to, possibly recursively. --- include's argument is a file path or glob pattern, optionally with a file type prefix. --- ~ at the start is expanded to the user's home directory. --- Relative paths are relative to the current file. --- Examples: foo.j, ../foo/bar.j, timedot:/foo/2020*, *.journal +-- include's argument is a file path or glob pattern (see findMatchedFiles for details), +-- optionally with a file type prefix. Relative paths are relative to the current file. includedirectivep :: MonadIO m => ErroringJournalParser m () includedirectivep = do -- save the position @@ -330,19 +328,19 @@ includedirectivep = do paths <- findMatchedFiles off pos glb -- XXX worth the trouble ? no - -- Handle duplicates. Some complexities here: + -- Comprehensively exclude files already processed. Some complexities here: -- If this include directive uses a glob pattern, remove duplicates. -- Ie if this glob pattern matches any files we have already processed (or the current file), -- due to multiple includes in sequence or in a cycle, exclude those files so they're not processed again. -- If this include directive uses a literal file path, don't remove duplicates. -- Multiple includes in sequence will cause the included file to be processed multiple times. -- Multiple includes forming a cycle will be detected and reported as an error in parseIncludedFile. - let paths' = paths -- if isglob then filter (const True) paths else paths + -- let paths' = if isglob then filter (...) paths else paths -- if there was a reader prefix, apply it to all the file paths let prefixedpaths = case mprefix of - Nothing -> paths' - Just fmt -> map ((show fmt++":")++) paths' + Nothing -> paths + Just fmt -> map ((show fmt++":")++) paths -- Parse each one, as if inlined here. -- Reset the position to the `include` line, for error messages. @@ -352,13 +350,21 @@ includedirectivep = do where -- | Find the files matched by a literal path or a glob pattern. + -- Examples: foo.j, ../foo/bar.j, timedot:/foo/2020*, *.journal, **.journal. + -- -- Uses the current parse context for detecting the current directory and for error messages. -- Expands a leading tilde to the user's home directory. -- Converts ** without a slash to **/*, like zsh's GLOB_STAR_SHORT, so ** also matches file name parts. - -- Glob patterns at the start of a path component will exclude dot-named files and directories. -- Checks if any matched paths are directories and excludes those. -- Converts all matched paths to their canonical form. - findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath] + -- + -- Glob patterns never match dot files or files under dot directories, + -- even if it seems like they should; this is a workaround for Glob bug #49. + -- This workaround is disabled if the --old-glob flag is present in the command line + -- (detected with unsafePerformIO; it's not worth a ton of boilerplate). + -- In that case, be aware ** recursive globs will search intermediate dot directories. + + findMatchedFiles :: (MonadIO m) => Int -> SourcePos -> FilePath -> JournalParser m [FilePath] findMatchedFiles off pos globpattern = do -- Some notes about the Glob library that we use (related: https://github.com/Deewiant/glob/issues/49): @@ -410,13 +416,14 @@ includedirectivep = do <&> sort -- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs, - -- it does search dot dirs in between those two (something that should be fixed in Glob ?). + -- it does search dot dirs in between those two (Glob #49). -- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree. - -- To work around: when any glob was used, paths involving dot dirs are excluded in post processing. + -- We work around as follows: when any glob was used, paths involving dot dirs are excluded in post processing. -- Unfortunately this means valid globs like .dotdir/* can't be used; only literal paths can match - -- things in dot dirs. + -- things in dot dirs. An --old-glob command line flag disables this workaround, for backward compatibility. + oldglobflag <- liftIO $ getFlag ["old-glob"] let - files2 = (if isglob then filter (not.hasdotdir) else id) files + files2 = (if isglob && not oldglobflag then filter (not.hasdotdir) else id) files where hasdotdir p = any isdotdir $ splitPath p where diff --git a/hledger/Hledger/Cli/CliOptions.hs b/hledger/Hledger/Cli/CliOptions.hs index 091c703b9..42d797b56 100644 --- a/hledger/Hledger/Cli/CliOptions.hs +++ b/hledger/Hledger/Cli/CliOptions.hs @@ -299,6 +299,7 @@ hiddenflagsformainmode = [ ,flagNone ["anon"] (setboolopt "anon") "deprecated, renamed to --obfuscate" -- #2133, handled by anonymiseByOpts ,flagNone ["obfuscate"] (setboolopt "obfuscate") "slightly obfuscate hledger's output. Warning, does not give privacy. Formerly --anon." -- #2133, handled by maybeObfuscate ,flagNone ["old-timeclock", "timeclock-old"] (setboolopt "oldtimeclock") "don't pair timeclock entries by account name" + ,flagNone ["old-glob"] (setboolopt "oldglob") "don't always exclude dot files/dirs to work around Glob bug" ,flagReq ["rules-file"] (\s opts -> Right $ setopt "rules" s opts) "RULESFILE" "was renamed to --rules" ] diff --git a/hledger/test/journal/include/include.test b/hledger/test/journal/include/include.test index 7ecaec9b4..70932b359 100644 --- a/hledger/test/journal/include/include.test +++ b/hledger/test/journal/include/include.test @@ -123,9 +123,15 @@ $ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||' >2 /No files were matched/ # sed hides the non-zero exit code -# ** 17. Only a literal path can find it. +# ** 17. This workaround can be disabled with --old-glob, for now. +$ hledger -f - files --old-glob | sed -E 's|.*hledger/test/journal/include/||' +- +b/.dotdir/dotdirb.j + +# ** 18. A literal path can always match dot files/dirs. < include b/.dotdir/dotdirb.j $ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||' - b/.dotdir/dotdirb.j +