fix:journal:include: drop 1.50's exclusion of glob-matched dot paths [#2498]
1.50* attempted to work around Glob's implicit searching of non-top-level dot dirs. This was overzealous; it meant that journal's include completely excluded paths involving a glob and a dot dir or dot file anywhere in the path. Now, the pre-1.50 behaviour is restored: `*` and `**` won't implicitly match dot files or top-level dot directories. They will implicitly search non-top-level dot directories, as before (#Glob#49).
This commit is contained in:
parent
ade7ff83eb
commit
8cd113389a
@ -322,9 +322,9 @@ includedirectivep iopts = do
|
|||||||
when (null $ dbg6 (parentf <> " include: glob pattern") glb) errorNoArg
|
when (null $ dbg6 (parentf <> " include: glob pattern") glb) errorNoArg
|
||||||
|
|
||||||
-- Find the file or glob-matched files (just the ones from this include directive), with some IO error checking.
|
-- Find the file or glob-matched files (just the ones from this include directive), with some IO error checking.
|
||||||
|
paths <- findMatchedFiles eoff parentf glb
|
||||||
-- Also report whether a glob pattern was used, and not just a literal file path.
|
-- Also report whether a glob pattern was used, and not just a literal file path.
|
||||||
-- (paths, isglob) <- findMatchedFiles off pos glb
|
-- (paths, isglob) <- findMatchedFiles off pos glb
|
||||||
paths <- findMatchedFiles eoff parentf glb
|
|
||||||
|
|
||||||
-- XXX worth the trouble ? no
|
-- XXX worth the trouble ? no
|
||||||
-- Comprehensively exclude files already processed. Some complexities here:
|
-- Comprehensively exclude files already processed. Some complexities here:
|
||||||
@ -354,12 +354,8 @@ includedirectivep iopts = do
|
|||||||
-- Converts ** without a slash to **/*, like zsh's GLOB_STAR_SHORT, so ** also matches file name parts.
|
-- Converts ** without a slash to **/*, like zsh's GLOB_STAR_SHORT, so ** also matches file name parts.
|
||||||
-- Checks if any matched paths are directories and excludes those.
|
-- Checks if any matched paths are directories and excludes those.
|
||||||
-- Converts all matched paths to their canonical form.
|
-- Converts all matched paths to their canonical form.
|
||||||
--
|
-- Note * and ** mostly won't implicitly match dot files or dot directories,
|
||||||
-- Glob patterns never match dot files or files under dot directories,
|
-- but ** will implicitly search non-top-level dot directories (see #2498, Glob#49).
|
||||||
-- even if it seems like they should; this is a workaround for Glob bug #49.
|
|
||||||
-- This workaround is disabled if the --old-glob flag is present in the command line
|
|
||||||
-- (detected with unsafePerformIO; it's not worth a ton of boilerplate).
|
|
||||||
-- In that case, be aware ** recursive globs will search intermediate dot directories.
|
|
||||||
|
|
||||||
findMatchedFiles :: (MonadIO m) => Int -> FilePath -> FilePath -> JournalParser m [FilePath]
|
findMatchedFiles :: (MonadIO m) => Int -> FilePath -> FilePath -> JournalParser m [FilePath]
|
||||||
findMatchedFiles off parentf globpattern = do
|
findMatchedFiles off parentf globpattern = do
|
||||||
@ -375,8 +371,8 @@ includedirectivep iopts = do
|
|||||||
-- * at the start of a file name ignores dot-named files and directories, by default.
|
-- * at the start of a file name ignores dot-named files and directories, by default.
|
||||||
-- ** (or zero or more consecutive *'s) not followed by slash is equivalent to *.
|
-- ** (or zero or more consecutive *'s) not followed by slash is equivalent to *.
|
||||||
-- A **/ component matches any number of directory parts.
|
-- A **/ component matches any number of directory parts.
|
||||||
-- A **/ ignores dot-named directories in its starting and ending directories, by default.
|
-- A **/ does not implicitly search top-level dot directories or implicitly match do files,
|
||||||
-- But **/ does search intermediate dot-named directories. Eg it can find a/.b/c.
|
-- but it does search non-top-level dot directories. Eg ** will find the c file in a/.b/c.
|
||||||
|
|
||||||
-- expand a tilde at the start of the glob pattern, or throw an error
|
-- expand a tilde at the start of the glob pattern, or throw an error
|
||||||
expandedglob <- lift $ expandHomePath globpattern `orRethrowIOError` "failed to expand ~"
|
expandedglob <- lift $ expandHomePath globpattern `orRethrowIOError` "failed to expand ~"
|
||||||
@ -400,7 +396,6 @@ includedirectivep iopts = do
|
|||||||
g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob' of
|
g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob' of
|
||||||
Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e
|
Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e
|
||||||
Right x -> pure x
|
Right x -> pure x
|
||||||
let isglob = not $ isLiteral g
|
|
||||||
|
|
||||||
-- Find all matched paths. These might include directories or the current file.
|
-- Find all matched paths. These might include directories or the current file.
|
||||||
paths <- liftIO $ globDir1 g cwd
|
paths <- liftIO $ globDir1 g cwd
|
||||||
@ -411,12 +406,6 @@ includedirectivep iopts = do
|
|||||||
>>= mapM makeAbsolute
|
>>= mapM makeAbsolute
|
||||||
<&> sort
|
<&> sort
|
||||||
|
|
||||||
-- -- If a glob was used, exclude the current file, for convenience.
|
|
||||||
-- let
|
|
||||||
-- files3 =
|
|
||||||
-- dbg6 (parentf <> " include: matched files" <> if isglob then " (excluding current file)" else "") $
|
|
||||||
-- (if isglob then filter (/= parentf) else id) files
|
|
||||||
|
|
||||||
-- Throw an error if one of these files is among the grandparent files, forming a cycle.
|
-- Throw an error if one of these files is among the grandparent files, forming a cycle.
|
||||||
-- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ?
|
-- Though, ignore the immediate parent file for convenience. XXX inconsistent - should it ignore all cyclic includes ?
|
||||||
-- We used to store the canonical paths, then switched to non-canonical paths for more useful output,
|
-- We used to store the canonical paths, then switched to non-canonical paths for more useful output,
|
||||||
@ -432,31 +421,16 @@ includedirectivep iopts = do
|
|||||||
| cf `elem` drop 1 cparentfiles -> customFailure $ parseErrorAt off $ "This included file forms a cycle: " ++ f
|
| cf `elem` drop 1 cparentfiles -> customFailure $ parseErrorAt off $ "This included file forms a cycle: " ++ f
|
||||||
| otherwise -> return f
|
| otherwise -> return f
|
||||||
|
|
||||||
-- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs,
|
|
||||||
-- it does search dot dirs in between those two (Glob #49).
|
|
||||||
-- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree.
|
|
||||||
-- We work around as follows: when any glob was used, paths involving dot dirs are excluded in post processing.
|
|
||||||
-- Unfortunately this means valid globs like .dotdir/* can't be used; only literal paths can match
|
|
||||||
-- things in dot dirs. An --old-glob command line flag disables this workaround, for backward compatibility.
|
|
||||||
oldglobflag <- liftIO $ getFlag ["old-glob"]
|
|
||||||
let
|
|
||||||
files3 = (if isglob && not oldglobflag then filter (not.hasdotdir) else id) files2
|
|
||||||
where
|
|
||||||
hasdotdir p = any isdotdir $ splitPath p
|
|
||||||
where
|
|
||||||
isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c
|
|
||||||
|
|
||||||
-- Throw an error if no files were matched.
|
-- Throw an error if no files were matched.
|
||||||
when (null files3) $ customFailure $ parseErrorAt off $ "No files were matched by: " ++ globpattern
|
when (null files2) $ customFailure $ parseErrorAt off $ "No files were matched by: " ++ globpattern
|
||||||
|
|
||||||
-- If the current file got included, ignore it.
|
-- If the current file got included, ignore it (last, to avoid triggering the error above).
|
||||||
-- This is done last to avoid triggering the error above.
|
|
||||||
let
|
let
|
||||||
files4 =
|
files3 =
|
||||||
dbg6 (parentf <> " include: matched files (excluding current file)") $
|
dbg6 (parentf <> " include: matched files (excluding current file)") $
|
||||||
filter (not.(`elem` cparentf)) files3
|
filter (not.(`elem` cparentf)) files2
|
||||||
|
|
||||||
return files4
|
return files3
|
||||||
|
|
||||||
-- Parse the given included file (and any deeper includes, recursively) as if it was inlined in the current (parent) file.
|
-- Parse the given included file (and any deeper includes, recursively) as if it was inlined in the current (parent) file.
|
||||||
-- The offset of the start of the include directive in the parent file is provided for error messages.
|
-- The offset of the start of the include directive in the parent file is provided for error messages.
|
||||||
|
|||||||
@ -308,7 +308,7 @@ hiddenflagsformainmode = [
|
|||||||
,flagNone ["anon"] (setboolopt "anon") "deprecated, renamed to --obfuscate" -- #2133, handled by anonymiseByOpts
|
,flagNone ["anon"] (setboolopt "anon") "deprecated, renamed to --obfuscate" -- #2133, handled by anonymiseByOpts
|
||||||
,flagNone ["obfuscate"] (setboolopt "obfuscate") "slightly obfuscate hledger's output. Warning, does not give privacy. Formerly --anon." -- #2133, handled by maybeObfuscate
|
,flagNone ["obfuscate"] (setboolopt "obfuscate") "slightly obfuscate hledger's output. Warning, does not give privacy. Formerly --anon." -- #2133, handled by maybeObfuscate
|
||||||
,flagNone ["old-timeclock", "timeclock-old"] (setboolopt "oldtimeclock") "don't pair timeclock entries by account name"
|
,flagNone ["old-timeclock", "timeclock-old"] (setboolopt "oldtimeclock") "don't pair timeclock entries by account name"
|
||||||
,flagNone ["old-glob"] (setboolopt "oldglob") "don't always exclude dot files/dirs to work around Glob bug"
|
,flagNone ["old-glob"] (setboolopt "oldglob") "deprecated, no longer used as of 1.50.4" -- #2498
|
||||||
,flagReq ["rules-file"] (\s opts -> Right $ setopt "rules" s opts) "RULESFILE" "was renamed to --rules"
|
,flagReq ["rules-file"] (\s opts -> Right $ setopt "rules" s opts) "RULESFILE" "was renamed to --rules"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@ -2625,9 +2625,9 @@ So, you can do
|
|||||||
- `include **.journal` to include all other journal files in this directory and below (excluding dot directories/files)
|
- `include **.journal` to include all other journal files in this directory and below (excluding dot directories/files)
|
||||||
- `include timelogs/2???.timedot` to include all timedot files named like a year number.
|
- `include timelogs/2???.timedot` to include all timedot files named like a year number.
|
||||||
|
|
||||||
There is a limitation: hledger's globs always exclude paths involving dot files or dot directories.
|
Note `*` and `**` mostly won't implicitly match dot files or dot directories,
|
||||||
This is a workaround for unavoidable dot directory traversal;
|
but `**` does implicitly search non-top-level dot directories.
|
||||||
you can disable it and revert to older behaviour with the `--old-glob` flag, for now.
|
If this causes problems, make your glob pattern more specific (eg `**.journal` instead of `**`).
|
||||||
|
|
||||||
If you are using many, or deeply nested, include files, and have an error that's hard to pinpoint:
|
If you are using many, or deeply nested, include files, and have an error that's hard to pinpoint:
|
||||||
a good troubleshooting command is `hledger files --debug=6` (or 7).
|
a good troubleshooting command is `hledger files --debug=6` (or 7).
|
||||||
|
|||||||
@ -97,7 +97,8 @@ $ hledger -f cycle/globcycle.j files
|
|||||||
>2 /cycle/
|
>2 /cycle/
|
||||||
>=1
|
>=1
|
||||||
|
|
||||||
# ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or non-dot subdirs.
|
# ** 14. Old-style deep glob **/*.j -> all non-dot .j files in current dir or subdirs,
|
||||||
|
# excluding top-level dot dirs.
|
||||||
<
|
<
|
||||||
include glob-deep/**/*.j
|
include glob-deep/**/*.j
|
||||||
|
|
||||||
@ -105,9 +106,12 @@ $ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|'
|
|||||||
-
|
-
|
||||||
glob-deep/a.j
|
glob-deep/a.j
|
||||||
glob-deep/b/b.j
|
glob-deep/b/b.j
|
||||||
|
glob-deep/b/bb/.dotdir/dotdirbb.j
|
||||||
glob-deep/b/bb/bb.j
|
glob-deep/b/bb/bb.j
|
||||||
glob-deep/c/c.j
|
glob-deep/c/c.j
|
||||||
|
glob-deep/b/.dotdir/dotdirb.j
|
||||||
glob-deep/b/b.j
|
glob-deep/b/b.j
|
||||||
|
glob-deep/b/bb/.dotdir/dotdirbb.j
|
||||||
glob-deep/b/bb/bb.j
|
glob-deep/b/bb/bb.j
|
||||||
glob-deep/c/c.j
|
glob-deep/c/c.j
|
||||||
|
|
||||||
@ -116,64 +120,31 @@ $ hledger -f - files | sed -E 's|.*/glob-deep/|glob-deep/|'
|
|||||||
-
|
-
|
||||||
glob-deep/a.j
|
glob-deep/a.j
|
||||||
glob-deep/b/b.j
|
glob-deep/b/b.j
|
||||||
|
glob-deep/b/bb/.dotdir/dotdirbb.j
|
||||||
glob-deep/b/bb/bb.j
|
glob-deep/b/bb/bb.j
|
||||||
glob-deep/c/c.j
|
glob-deep/c/c.j
|
||||||
|
glob-deep/b/.dotdir/dotdirb.j
|
||||||
glob-deep/b/b.j
|
glob-deep/b/b.j
|
||||||
|
glob-deep/b/bb/.dotdir/dotdirbb.j
|
||||||
glob-deep/b/bb/bb.j
|
glob-deep/b/bb/bb.j
|
||||||
glob-deep/c/c.j
|
glob-deep/c/c.j
|
||||||
|
|
||||||
# ** 16. --old-glob preserves pre-1.50 glob behaviour: avoiding dot things at top (.c/)
|
# ** 16. A glob follows a symlink to a regular file. (And shows the symlink's path.)
|
||||||
# and at bottom (b/.d.j), searching intermediate dot dirs (b/.e/).
|
|
||||||
<
|
|
||||||
include glob-dot/**.j
|
|
||||||
|
|
||||||
$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||'
|
|
||||||
-
|
|
||||||
a.j
|
|
||||||
b/.e/e.j
|
|
||||||
b/b.j
|
|
||||||
|
|
||||||
# ** 17. Dot things can be matched by an explicit non-glob path.
|
|
||||||
<
|
|
||||||
include glob-dot/.c/c.j
|
|
||||||
include glob-dot/b/.d.j
|
|
||||||
include glob-dot/b/.e/e.j
|
|
||||||
$ hledger -f - files | sed -E 's|.*/glob-dot/||'
|
|
||||||
-
|
|
||||||
.c/c.j
|
|
||||||
b/.d.j
|
|
||||||
b/.e/e.j
|
|
||||||
|
|
||||||
# ** 18. They can't be matched by a glob path, even if explicitly mentioned, by default.
|
|
||||||
<
|
|
||||||
include glob-dot/.c/c.j*
|
|
||||||
$ hledger -f - files | sed -E 's|.*/glob-dot/||'
|
|
||||||
>2 /No files were matched/
|
|
||||||
#>=1 # sed hides the exit code
|
|
||||||
|
|
||||||
# ** 19. They can be matched by an explicit glob path if --old-glob is used.
|
|
||||||
<
|
|
||||||
include glob-dot/.c/c.j*
|
|
||||||
$ hledger -f - files --old-glob | sed -E 's|.*/glob-dot/||'
|
|
||||||
-
|
|
||||||
.c/c.j
|
|
||||||
|
|
||||||
# ** 20. A glob follows a symlink to a regular file. (And shows the symlink's path.)
|
|
||||||
$ hledger -f glob-symlinked-file/a.j files | sed -E 's|.*/glob-symlinked-file/||'
|
$ hledger -f glob-symlinked-file/a.j files | sed -E 's|.*/glob-symlinked-file/||'
|
||||||
a.j
|
a.j
|
||||||
b.j
|
b.j
|
||||||
|
|
||||||
# ** 21. A glob follows a symlink to a regular dir.
|
# ** 17. A glob follows a symlink to a regular dir.
|
||||||
$ hledger -f glob-symlinked-dir/a.j files | sed -E 's|.*/glob-symlinked-dir/||'
|
$ hledger -f glob-symlinked-dir/a.j files | sed -E 's|.*/glob-symlinked-dir/||'
|
||||||
a.j
|
a.j
|
||||||
b/c.j
|
b/c.j
|
||||||
|
|
||||||
# ** 22. A glob follows a symlink to a dot file.
|
# ** 18. A glob follows a symlink to a dot file.
|
||||||
$ hledger -f glob-symlinked-dotfile/a.j files | sed -E 's|.*/glob-symlinked-dotfile/||'
|
$ hledger -f glob-symlinked-dotfile/a.j files | sed -E 's|.*/glob-symlinked-dotfile/||'
|
||||||
a.j
|
a.j
|
||||||
b.j
|
b.j
|
||||||
|
|
||||||
# ** 23. A glob follows a symlink to a dot dir.
|
# ** 19. A glob follows a symlink to a dot dir.
|
||||||
$ hledger -f glob-symlinked-dotdir/a.j files | sed -E 's|.*/glob-symlinked-dotdir/||'
|
$ hledger -f glob-symlinked-dotdir/a.j files | sed -E 's|.*/glob-symlinked-dotdir/||'
|
||||||
a.j
|
a.j
|
||||||
b/c.j
|
b/c.j
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user