imp: include: more robust ** patterns, and ignore dotted directories

** now ignores anything under dotted directories, ie directories whose
name begins with a dot. Eg .git/, foo/.secret/, etc.

Switched from Glob to filepattern lib.
This commit is contained in:
Simon Michael 2025-07-11 13:02:19 -07:00
parent b1f416dee7
commit b71e001c51
3 changed files with 40 additions and 5 deletions

View File

@ -94,6 +94,7 @@ import Text.Megaparsec.Char
import Text.Printf import Text.Printf
import System.FilePath import System.FilePath
import "Glob" System.FilePath.Glob hiding (match) import "Glob" System.FilePath.Glob hiding (match)
import "filepattern" System.FilePattern.Directory
import Hledger.Data import Hledger.Data
import Hledger.Read.Common import Hledger.Read.Common
@ -301,7 +302,7 @@ includedirectivep = do
let (mprefix,glb) = splitReaderPrefix prefixedglob let (mprefix,glb) = splitReaderPrefix prefixedglob
when (null $ dbg7 "glob pattern" glb) $ when (null $ dbg7 "glob pattern" glb) $
customFailure $ parseErrorAt off $ "include needs a file path or glob pattern argument" customFailure $ parseErrorAt off $ "include needs a file path or glob pattern argument"
paths <- getFilePaths off pos glb paths <- getFilePaths2 off pos glb
let prefixedpaths = case mprefix of let prefixedpaths = case mprefix of
Nothing -> paths Nothing -> paths
Just fmt -> map ((show fmt++":")++) paths Just fmt -> map ((show fmt++":")++) paths
@ -310,10 +311,11 @@ includedirectivep = do
where where
-- Find the files matched by a glob pattern, using the current parse context -- XXX keep providing via --old-glob for a bit ?
-- for detecting the current directory and for error messages. -- Find the files matched by a glob pattern, using Glob.
getFilePaths :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath] -- Uses the current parse context for detecting the current directory and for error messages.
getFilePaths parseroff parserpos fileglobpattern = do _getFilePaths1 :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath]
_getFilePaths1 parseroff parserpos fileglobpattern = do
-- Expand a ~ at the start of the glob pattern, if any. -- Expand a ~ at the start of the glob pattern, if any.
fileglobpattern' <- lift $ expandHomePath fileglobpattern fileglobpattern' <- lift $ expandHomePath fileglobpattern
`orRethrowIOError` (show parserpos ++ " locating " ++ fileglobpattern) `orRethrowIOError` (show parserpos ++ " locating " ++ fileglobpattern)
@ -331,6 +333,35 @@ includedirectivep = do
then pure filepaths then pure filepaths
else customFailure $ parseErrorAt parseroff $ "No files were matched by file pattern: " ++ fileglobpattern else customFailure $ parseErrorAt parseroff $ "No files were matched by file pattern: " ++ fileglobpattern
-- Find the files matched by a glob pattern, using filepattern.
-- Uses the current parse context for detecting the current directory and for error messages.
-- This one also ignores all dotted directories (anything under .git/, foo/.secret/, etc.)
getFilePaths2 :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath]
getFilePaths2 off pos globpattern = do
-- expand a tilde at the start of the glob pattern, or throw an error
expandedglob <- lift $ expandHomePath globpattern
`orRethrowIOError` (show pos ++ " locating " ++ globpattern)
-- get the directory of the including file, to resolve relative paths
let parentfilepath = sourceName pos
realparentfilepath <- liftIO $ canonicalizePath parentfilepath -- Follow a symlink. If the path is already absolute, the operation never fails.
let cwd = takeDirectory realparentfilepath
-- find all matched files, in lexicographic order (the order ls would normally show them)
filepaths <- liftIO $
dbg7 "include: matched files"
. map (cwd </>)
-- . sort -- XXX needed ?
<$>
getDirectoryFilesIgnore cwd [expandedglob] ["**/.*/**"]
-- throw an error if no files matched
when (null filepaths) $
customFailure $ parseErrorAt off $ "No files were matched by file pattern: " ++ globpattern
pure filepaths
-- Parse the given included file (and any deeper includes, recursively) -- Parse the given included file (and any deeper includes, recursively)
-- as if it was inlined in the current (parent) file. -- as if it was inlined in the current (parent) file.
-- The position in the parent file is provided for error messages. -- The position in the parent file is provided for error messages.

View File

@ -148,6 +148,7 @@ library
, extra >=1.7.11 , extra >=1.7.11
, file-embed >=0.0.10 , file-embed >=0.0.10
, filepath , filepath
, filepattern >=0.1.3
, hashtables >=1.2.3.1 , hashtables >=1.2.3.1
, lucid , lucid
, megaparsec >=7.0.0 && <9.8 , megaparsec >=7.0.0 && <9.8
@ -208,6 +209,7 @@ test-suite doctest
, extra >=1.7.11 , extra >=1.7.11
, file-embed >=0.0.10 , file-embed >=0.0.10
, filepath , filepath
, filepattern >=0.1.3
, hashtables >=1.2.3.1 , hashtables >=1.2.3.1
, lucid , lucid
, megaparsec >=7.0.0 && <9.8 , megaparsec >=7.0.0 && <9.8
@ -269,6 +271,7 @@ test-suite unittest
, extra >=1.7.11 , extra >=1.7.11
, file-embed >=0.0.10 , file-embed >=0.0.10
, filepath , filepath
, filepattern >=0.1.3
, hashtables >=1.2.3.1 , hashtables >=1.2.3.1
, hledger-lib , hledger-lib
, lucid , lucid

View File

@ -94,6 +94,7 @@ dependencies:
- utf8-string >=0.3.5 - utf8-string >=0.3.5
- extra >=1.7.11 - extra >=1.7.11
- Glob >= 0.9 - Glob >= 0.9
- filepattern >=0.1.3
ghc-options: ghc-options:
- -Wall - -Wall