imp: include: more flexible **; show the correct line in read errors

This commit is contained in:
Simon Michael 2025-07-15 15:05:28 -07:00
parent b35dcfda28
commit 5ec770badd
2 changed files with 47 additions and 43 deletions

View File

@ -344,7 +344,8 @@ includedirectivep = do
Nothing -> paths' Nothing -> paths'
Just fmt -> map ((show fmt++":")++) paths' Just fmt -> map ((show fmt++":")++) paths'
-- parse each one, as if inlined here -- Parse each one, as if inlined here.
-- Reset the position to the `include` line, for error messages.
setOffset off setOffset off
forM_ prefixedpaths $ parseIncludedFile off pos forM_ prefixedpaths $ parseIncludedFile off pos
@ -353,24 +354,23 @@ includedirectivep = do
-- | Find the files matched by a literal path or a glob pattern. -- | Find the files matched by a literal path or a glob pattern.
-- Uses the current parse context for detecting the current directory and for error messages. -- Uses the current parse context for detecting the current directory and for error messages.
-- Expands a leading tilde to the user's home directory. -- Expands a leading tilde to the user's home directory.
-- Glob patterns at the start of a path component exclude dot-named files and directories. -- Converts ** without a slash to **/*, like zsh's GLOB_STAR_SHORT, so ** also matches file name parts.
-- -- Glob patterns at the start of a path component will exclude dot-named files and directories.
-- Checks if any matched paths are directories and excludes those. -- Checks if any matched paths are directories and excludes those.
-- Converts all matched paths to their canonical form. -- Converts all matched paths to their canonical form.
-- findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m ([FilePath], Bool)
findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath] findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath]
findMatchedFiles off pos globpattern = do findMatchedFiles off pos globpattern = do
-- Some notes about the Glob library:
-- ---------------------------------- -- Some notes about the Glob library that we use (related: https://github.com/Deewiant/glob/issues/49):
-- It does not expand tilde. -- It does not expand tilde.
-- It does not canonicalise paths. -- It does not canonicalise paths.
-- The results are not in any particular order. -- The results are not in any particular order.
-- The results can include directories. -- The results can include directories.
-- DIRPAT/ is equivalent to DIRPAT, except results will end with // (double slash). -- DIRPAT/ is equivalent to DIRPAT, except results will end with // (double slash).
-- . or .. at the start of a pattern can match the current or parent directories. -- A . or .. path component can match the current or parent directories (including them in the results).
-- * matches any number of characters in a file or directory name. -- * matches zero or more characters in a file or directory name.
-- * at the start of a file name ignores dot-named files and directories, by default. -- * at the start of a file name ignores dot-named files and directories, by default.
-- ** (or any number of consecutive *'s) not followed by slash is equivalent to *. -- ** (or zero or more consecutive *'s) not followed by slash is equivalent to *.
-- A **/ component matches any number of directory parts. -- A **/ component matches any number of directory parts.
-- A **/ ignores dot-named directories in its starting and ending directories, by default. -- A **/ ignores dot-named directories in its starting and ending directories, by default.
-- But **/ does search intermediate dot-named directories. Eg it can find a/.b/c. -- But **/ does search intermediate dot-named directories. Eg it can find a/.b/c.
@ -382,22 +382,26 @@ includedirectivep = do
parentfile <- sourcePosFilePath pos parentfile <- sourcePosFilePath pos
let cwd = takeDirectory parentfile let cwd = takeDirectory parentfile
-- Compile as a Glob Pattern (and do some extra error checking). Can throw an error. -- Don't allow 3 or more stars.
g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob of when ("***" `isInfixOf` expandedglob) $
Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: too many stars, use * or **"
customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e
Right _ | regexMatch (toRegex' "\\*\\*[^/]") expandedglob -> -- Make ** also match file name parts like zsh's GLOB_STAR_SHORT.
customFailure $ parseErrorAt off $ "Invalid glob pattern: double star requires slash, use * or **/" let
Right _ | "***" `isInfixOf` expandedglob -> expandedglob' =
customFailure $ parseErrorAt off $ "Invalid glob pattern: too many stars, use * or **/" -- ** without a slash is equivalent to **/*
case regexReplace (toRegex' $ T.pack "\\*\\*([^/\\])") "**/*\\1" expandedglob of
Right s -> s
Left _ -> expandedglob -- ignore any error, there should be none
-- Compile as a Pattern. Can throw an error.
g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob' of
Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e
Right x -> pure x Right x -> pure x
let isglob = not $ isLiteral g let isglob = not $ isLiteral g
-- Find all matched paths, in lexicographic order (the order ls would normally show them). -- Find all matched paths. These might include directories or the current file.
-- These might include directories or the current file. paths <- liftIO $ globDir1 g cwd
paths <- liftIO $
-- (dbg6 (parentfile <> " include: matched paths") . sort) <$>
globDir1 g cwd
-- Exclude any directories or symlinks to directories, and canonicalise, and sort. -- Exclude any directories or symlinks to directories, and canonicalise, and sort.
files <- liftIO $ files <- liftIO $
@ -405,13 +409,12 @@ includedirectivep = do
>>= mapM canonicalizePath >>= mapM canonicalizePath
<&> sort <&> sort
-- If a glob was used, exclude any intermediate dot directories that were searched. -- Work around a Glob bug with dot dirs: while **/ ignores dot dirs in the starting and ending dirs,
-- As noted above, while **/ ignores dot dirs in the starting and ending dirs,
-- it does search dot dirs in between those two (something that should be fixed in Glob ?). -- it does search dot dirs in between those two (something that should be fixed in Glob ?).
-- This seems likely to be inconvenient, eg when trying to avoid .git directories in subrepos. -- This could be inconvenient, eg making it hard to avoid VCS directories in a source tree.
-- So as an imperfect workaround: when using any glob, exclude all paths involving dot dirs. -- To work around: when any glob was used, paths involving dot dirs are excluded in post processing.
-- Unfortunately this means valid globs like .dotdir/* will not succeed; only a literal -- Unfortunately this means valid globs like .dotdir/* can't be used; only literal paths can match
-- .dotdir/foo would work there. -- things in dot dirs.
let let
files2 = (if isglob then filter (not.hasdotdir) else id) files files2 = (if isglob then filter (not.hasdotdir) else id) files
where where
@ -429,7 +432,6 @@ includedirectivep = do
dbg6 (parentfile <> " include: matched files" <> if isglob then " (excluding current file)" else "") $ dbg6 (parentfile <> " include: matched files" <> if isglob then " (excluding current file)" else "") $
(if isglob then filter (/= parentfile) else id) files2 (if isglob then filter (/= parentfile) else id) files2
-- return (files3, isglob)
return files3 return files3
-- Parse the given included file (and any deeper includes, recursively) -- Parse the given included file (and any deeper includes, recursively)

View File

@ -66,45 +66,38 @@ $ hledger -f - print
>2 /Invalid glob/ >2 /Invalid glob/
>= 1 >= 1
# ** 8. Two *'s without / -> invalid glob error # ** 8. Three or more *'s -> invalid glob error
<
include **
$ hledger -f- files
>2 /Invalid glob/
>=1
# ** 9. Three or more *'s -> invalid glob error
< <
include *** include ***
$ hledger -f- files $ hledger -f- files
>2 /Invalid glob/ >2 /Invalid glob/
>=1 >=1
# ** 10. Including the current file literally -> cycle error. # ** 9. Including the current file literally -> cycle error.
$ hledger -f self.j files $ hledger -f self.j files
>2 /cycle/ >2 /cycle/
>=1 >=1
# ** 11. Including the current file via glob -> harmless, globs ignore current file. # ** 10. Including the current file via glob -> harmless, globs ignore current file.
$ hledger -f selfglob.j files | sed -E 's|.*hledger/test/journal/include/||' $ hledger -f selfglob.j files | sed -E 's|.*hledger/test/journal/include/||'
selfglob.j selfglob.j
# ** 12. Including a cycle, all literally -> cycle error # ** 11. Including a cycle, all literally -> cycle error
$ hledger -f .cycle/cycle.j files $ hledger -f .cycle/cycle.j files
>2 /cycle/ >2 /cycle/
>=1 >=1
# ** 13. Including a cycle, involving globs -> cycle error # ** 12. Including a cycle, involving globs -> cycle error
$ hledger -f .cycle/cycleglob.j files $ hledger -f .cycle/cycleglob.j files
>2 /cycle/ >2 /cycle/
>=1 >=1
# ** 14. Glob patterns ignore the current file (once). # ** 13. Glob patterns ignore the current file (once).
$ hledger -f a.j files | sed -E 's|.*hledger/test/journal/include/||' $ hledger -f a.j files | sed -E 's|.*hledger/test/journal/include/||'
a.j a.j
a2.j a2.j
# ** 15. Include */**/*.j -> all non-dot .j files in or below non-dot subdirectories. # ** 14. Include */**/*.j -> all non-dot .j files in or below non-dot subdirectories.
< <
include */**/*.j include */**/*.j
$ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||' $ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||'
@ -113,6 +106,15 @@ b/b.j
b/bb/bb.j b/bb/bb.j
c/c.j c/c.j
# ** 15. ** without a slash can also match filename start, equivalent to **/* -> same result as above.
<
include */**.j
$ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||'
-
b/b.j
b/bb/bb.j
c/c.j
# ** 16. To avoid intermediate dot dirs in the above, we exclude all glob-matched paths involving dot dirs. # ** 16. To avoid intermediate dot dirs in the above, we exclude all glob-matched paths involving dot dirs.
# So this does not find b/bb/.dotdir/dotdirbb.j, unfortunately: # So this does not find b/bb/.dotdir/dotdirbb.j, unfortunately:
< <