From 5ec770badde9eb5a0b6cf93ee61dbe6b9fde7b40 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Tue, 15 Jul 2025 15:05:28 -0700 Subject: [PATCH] imp: include: more flexible **; show the correct line in read errors --- hledger-lib/Hledger/Read/JournalReader.hs | 60 ++++++++++++----------- hledger/test/journal/include/include.test | 30 ++++++------ 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index ef7209161..abf65fa27 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -344,7 +344,8 @@ includedirectivep = do Nothing -> paths' Just fmt -> map ((show fmt++":")++) paths' - -- parse each one, as if inlined here + -- Parse each one, as if inlined here. + -- Reset the position to the `include` line, for error messages. setOffset off forM_ prefixedpaths $ parseIncludedFile off pos @@ -353,24 +354,23 @@ includedirectivep = do -- | Find the files matched by a literal path or a glob pattern. -- Uses the current parse context for detecting the current directory and for error messages. -- Expands a leading tilde to the user's home directory. - -- Glob patterns at the start of a path component exclude dot-named files and directories. - -- + -- Converts ** without a slash to **/*, like zsh's GLOB_STAR_SHORT, so ** also matches file name parts. + -- Glob patterns at the start of a path component will exclude dot-named files and directories. -- Checks if any matched paths are directories and excludes those. -- Converts all matched paths to their canonical form. - -- findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m ([FilePath], Bool) findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath] findMatchedFiles off pos globpattern = do - -- Some notes about the Glob library: - -- ---------------------------------- + + -- Some notes about the Glob library that we use (related: https://github.com/Deewiant/glob/issues/49): -- It does not expand tilde. -- It does not canonicalise paths. -- The results are not in any particular order. -- The results can include directories. -- DIRPAT/ is equivalent to DIRPAT, except results will end with // (double slash). - -- . or .. at the start of a pattern can match the current or parent directories. - -- * matches any number of characters in a file or directory name. + -- A . or .. path component can match the current or parent directories (including them in the results). + -- * matches zero or more characters in a file or directory name. -- * at the start of a file name ignores dot-named files and directories, by default. - -- ** (or any number of consecutive *'s) not followed by slash is equivalent to *. + -- ** (or zero or more consecutive *'s) not followed by slash is equivalent to *. -- A **/ component matches any number of directory parts. -- A **/ ignores dot-named directories in its starting and ending directories, by default. -- But **/ does search intermediate dot-named directories. Eg it can find a/.b/c. @@ -382,22 +382,26 @@ includedirectivep = do parentfile <- sourcePosFilePath pos let cwd = takeDirectory parentfile - -- Compile as a Glob Pattern (and do some extra error checking). Can throw an error. - g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob of - Left e -> - customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e - Right _ | regexMatch (toRegex' "\\*\\*[^/]") expandedglob -> - customFailure $ parseErrorAt off $ "Invalid glob pattern: double star requires slash, use * or **/" - Right _ | "***" `isInfixOf` expandedglob -> - customFailure $ parseErrorAt off $ "Invalid glob pattern: too many stars, use * or **/" + -- Don't allow 3 or more stars. + when ("***" `isInfixOf` expandedglob) $ + customFailure $ parseErrorAt off $ "Invalid glob pattern: too many stars, use * or **" + + -- Make ** also match file name parts like zsh's GLOB_STAR_SHORT. + let + expandedglob' = + -- ** without a slash is equivalent to **/* + case regexReplace (toRegex' $ T.pack "\\*\\*([^/\\])") "**/*\\1" expandedglob of + Right s -> s + Left _ -> expandedglob -- ignore any error, there should be none + + -- Compile as a Pattern. Can throw an error. + g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob' of + Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e Right x -> pure x let isglob = not $ isLiteral g - -- Find all matched paths, in lexicographic order (the order ls would normally show them). - -- These might include directories or the current file. - paths <- liftIO $ - -- (dbg6 (parentfile <> " include: matched paths") . sort) <$> - globDir1 g cwd + -- Find all matched paths. These might include directories or the current file. + paths <- liftIO $ globDir1 g cwd -- Exclude any directories or symlinks to directories, and canonicalise, and sort. files <- liftIO $ @@ -405,13 +409,12 @@ includedirectivep = do >>= mapM canonicalizePath <&> sort - -- If a glob was used, exclude any intermediate dot directories that were searched. - -- As noted above, while **/ ignores dot dirs in the starting and ending dirs, + -- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs, -- it does search dot dirs in between those two (something that should be fixed in Glob ?). - -- This seems likely to be inconvenient, eg when trying to avoid .git directories in subrepos. - -- So as an imperfect workaround: when using any glob, exclude all paths involving dot dirs. - -- Unfortunately this means valid globs like .dotdir/* will not succeed; only a literal - -- .dotdir/foo would work there. + -- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree. + -- To work around: when any glob was used, paths involving dot dirs are excluded in post processing. + -- Unfortunately this means valid globs like .dotdir/* can't be used; only literal paths can match + -- things in dot dirs. let files2 = (if isglob then filter (not.hasdotdir) else id) files where @@ -429,7 +432,6 @@ includedirectivep = do dbg6 (parentfile <> " include: matched files" <> if isglob then " (excluding current file)" else "") $ (if isglob then filter (/= parentfile) else id) files2 - -- return (files3, isglob) return files3 -- Parse the given included file (and any deeper includes, recursively) diff --git a/hledger/test/journal/include/include.test b/hledger/test/journal/include/include.test index d0f5a5b8d..7ecaec9b4 100644 --- a/hledger/test/journal/include/include.test +++ b/hledger/test/journal/include/include.test @@ -66,45 +66,38 @@ $ hledger -f - print >2 /Invalid glob/ >= 1 -# ** 8. Two *'s without / -> invalid glob error -< -include ** -$ hledger -f- files ->2 /Invalid glob/ ->=1 - -# ** 9. Three or more *'s -> invalid glob error +# ** 8. Three or more *'s -> invalid glob error < include *** $ hledger -f- files >2 /Invalid glob/ >=1 -# ** 10. Including the current file literally -> cycle error. +# ** 9. Including the current file literally -> cycle error. $ hledger -f self.j files >2 /cycle/ >=1 -# ** 11. Including the current file via glob -> harmless, globs ignore current file. +# ** 10. Including the current file via glob -> harmless, globs ignore current file. $ hledger -f selfglob.j files | sed -E 's|.*hledger/test/journal/include/||' selfglob.j -# ** 12. Including a cycle, all literally -> cycle error +# ** 11. Including a cycle, all literally -> cycle error $ hledger -f .cycle/cycle.j files >2 /cycle/ >=1 -# ** 13. Including a cycle, involving globs -> cycle error +# ** 12. Including a cycle, involving globs -> cycle error $ hledger -f .cycle/cycleglob.j files >2 /cycle/ >=1 -# ** 14. Glob patterns ignore the current file (once). +# ** 13. Glob patterns ignore the current file (once). $ hledger -f a.j files | sed -E 's|.*hledger/test/journal/include/||' a.j a2.j -# ** 15. Include */**/*.j -> all non-dot .j files in or below non-dot subdirectories. +# ** 14. Include */**/*.j -> all non-dot .j files in or below non-dot subdirectories. < include */**/*.j $ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||' @@ -113,6 +106,15 @@ b/b.j b/bb/bb.j c/c.j +# ** 15. ** without a slash can also match filename start, equivalent to **/* -> same result as above. +< +include */**.j +$ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||' +- +b/b.j +b/bb/bb.j +c/c.j + # ** 16. To avoid intermediate dot dirs in the above, we exclude all glob-matched paths involving dot dirs. # So this does not find b/bb/.dotdir/dotdirbb.j, unfortunately: <