imp: include: globs exclude current file; more cleanup

This commit is contained in:
Simon Michael 2025-07-14 21:14:01 -07:00
parent b4a1add267
commit 460ae28826
5 changed files with 32 additions and 25 deletions

View File

@ -326,20 +326,19 @@ includedirectivep = do
customFailure $ parseErrorAt off $ "include needs a file path or glob pattern argument" customFailure $ parseErrorAt off $ "include needs a file path or glob pattern argument"
-- Find the file or glob-matched files (just the ones from this include directive), with some IO error checking. -- Find the file or glob-matched files (just the ones from this include directive), with some IO error checking.
-- Also report whether it was a literal path and not a glob pattern. -- Also report whether a glob pattern was used, and not just a literal file path.
(paths, isliteral) <- findMatchedFiles off pos glb -- (paths, isglob) <- findMatchedFiles off pos glb
paths <- findMatchedFiles off pos glb
-- XXX worth the troublel ? -- XXX worth the trouble ? no
-- Handle duplicates. Some complexities here: -- Handle duplicates. Some complexities here:
--
-- If this include directive uses a glob pattern, remove duplicates. -- If this include directive uses a glob pattern, remove duplicates.
-- Ie if this glob pattern matches any files we have already processed (or the current file), -- Ie if this glob pattern matches any files we have already processed (or the current file),
-- due to multiple includes in sequence or in a cycle, exclude those files so they're not processed again. -- due to multiple includes in sequence or in a cycle, exclude those files so they're not processed again.
--
-- If this include directive uses a literal file path, don't remove duplicates. -- If this include directive uses a literal file path, don't remove duplicates.
-- Multiple includes in sequence will cause the included file to be processed multiple times. -- Multiple includes in sequence will cause the included file to be processed multiple times.
-- Multiple includes forming a cycle will be detected and reported as an error in parseIncludedFile. -- Multiple includes forming a cycle will be detected and reported as an error in parseIncludedFile.
let paths' = if isliteral then paths else filter (const True) paths let paths' = paths -- if isglob then filter (const True) paths else paths
-- if there was a reader prefix, apply it to all the file paths -- if there was a reader prefix, apply it to all the file paths
let prefixedpaths = case mprefix of let prefixedpaths = case mprefix of
@ -358,7 +357,8 @@ includedirectivep = do
-- --
-- Checks if any matched paths are directories and excludes those. -- Checks if any matched paths are directories and excludes those.
-- Converts all matched paths to their canonical form. -- Converts all matched paths to their canonical form.
findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m ([FilePath], Bool) -- findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m ([FilePath], Bool)
findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath]
findMatchedFiles off pos globpattern = do findMatchedFiles off pos globpattern = do
-- Some notes about the Glob library: -- Some notes about the Glob library:
-- ---------------------------------- -- ----------------------------------
@ -387,6 +387,7 @@ includedirectivep = do
Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e
Right _ | "***" `isInfixOf` expandedglob -> customFailure $ parseErrorAt off $ "Invalid glob pattern: too many stars" Right _ | "***" `isInfixOf` expandedglob -> customFailure $ parseErrorAt off $ "Invalid glob pattern: too many stars"
Right x -> pure x Right x -> pure x
let isglob = not $ isLiteral g
-- Find all matched paths, in lexicographic order (the order ls would normally show them). -- Find all matched paths, in lexicographic order (the order ls would normally show them).
-- These might include directories or the current file. -- These might include directories or the current file.
@ -394,13 +395,13 @@ includedirectivep = do
-- (dbg6 (parentfile <> " include: matched paths") . sort) <$> -- (dbg6 (parentfile <> " include: matched paths") . sort) <$>
globDir1 g cwd globDir1 g cwd
-- Exclude any directories or symlinks to directories, and canonicalise -- Exclude any directories or symlinks to directories, and canonicalise, and sort.
files <- liftIO $ files <- liftIO $
filterM doesFileExist paths filterM doesFileExist paths
>>= mapM canonicalizePath >>= mapM canonicalizePath
<&> (dbg6 (parentfile <> " include: matched files") . sort) <&> sort
-- If a glob was used: exclude any intermediate dot directories that were searched. -- If a glob was used, exclude any intermediate dot directories that were searched.
-- As noted above, while **/ ignores dot dirs in the starting and ending dirs, -- As noted above, while **/ ignores dot dirs in the starting and ending dirs,
-- it does search dot dirs in between those two (something that should be fixed in Glob ?). -- it does search dot dirs in between those two (something that should be fixed in Glob ?).
-- This seems likely to be inconvenient, eg when trying to avoid .git directories in subrepos. -- This seems likely to be inconvenient, eg when trying to avoid .git directories in subrepos.
@ -408,17 +409,24 @@ includedirectivep = do
-- Unfortunately this means valid globs like .dotdir/* will not succeed; only a literal -- Unfortunately this means valid globs like .dotdir/* will not succeed; only a literal
-- .dotdir/foo would work there. -- .dotdir/foo would work there.
let let
files' = if isLiteral g then files else filter (not.hasdotdir) files files2 = (if isglob then filter (not.hasdotdir) else id) files
where where
hasdotdir p = any isdotdir $ splitPath p hasdotdir p = any isdotdir $ splitPath p
where where
isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c
-- Throw an error if no files were matched. -- Throw an error if no files were matched.
when (null files') $ when (null files2) $
customFailure $ parseErrorAt off $ "No files were matched by glob pattern: " ++ globpattern customFailure $ parseErrorAt off $ "No files were matched by glob pattern: " ++ globpattern
return (files', isLiteral g) -- If a glob was used, exclude the current file, for convenience.
let
files3 =
dbg6 (parentfile <> " include: matched files" <> if isglob then " (excluding current file)" else "") $
(if isglob then filter (/= parentfile) else id) files2
-- return (files3, isglob)
return files3
-- Parse the given included file (and any deeper includes, recursively) -- Parse the given included file (and any deeper includes, recursively)
-- as if it was inlined in the current (parent) file. -- as if it was inlined in the current (parent) file.

View File

@ -1,2 +1,4 @@
include a*.j
2016-01-01 2016-01-01
(a) 1 (a) 1

View File

@ -1 +0,0 @@
include **

View File

@ -1 +0,0 @@
include */**/*.j

View File

@ -78,10 +78,9 @@ $ hledger -f self.j files
>2 /cycle/ >2 /cycle/
>=1 >=1
# ** 10. Including the current file via glob -> cycle error. # ** 10. Including the current file via glob -> harmless, globs ignore current file.
$ hledger -f selfglob.j files $ hledger -f selfglob.j files | sed -E 's|.*hledger/test/journal/include/||'
>2 /cycle/ selfglob.j
>=1
# ** 11. Including a cycle, all literally -> cycle error # ** 11. Including a cycle, all literally -> cycle error
$ hledger -f .cycle/cycle.j files $ hledger -f .cycle/cycle.j files
@ -93,12 +92,12 @@ $ hledger -f .cycle/cycleglob.j files
>2 /cycle/ >2 /cycle/
>=1 >=1
# ** 13. Include ** -> cycle error (includes current file) # ** 13. Glob patterns ignore the current file (once).
$ hledger -f glob1.j files $ hledger -f a.j files | sed -E 's|.*hledger/test/journal/include/||'
>2 /cycle/ a.j
>=1 a2.j
# ** 14. Include **.j -> cycle error (includes current file) # ** 14. Include **.j -> cycle error (globs ignore current file, but other files include it)
$ hledger -f glob2.j files $ hledger -f glob2.j files
>2 /cycle/ >2 /cycle/
>=1 >=1
@ -109,7 +108,7 @@ glob3.j
b/b.j b/b.j
c/c.j c/c.j
# ** 16. Include **/*.j -> cycle error (includes current file) # ** 16. Include **/*.j -> cycle error (too many mutual includes)
$ hledger -f glob4.j files $ hledger -f glob4.j files
>2 /cycle/ >2 /cycle/
>=1 >=1