imp: include: more robust tests and glob pattern handling
This switches from filepattern back to Glob, which is more powerful. New notes, implementation, workarounds and tests.
This commit is contained in:
parent
1046f652b1
commit
b4a1add267
@ -73,7 +73,7 @@ where
|
|||||||
--- ** imports
|
--- ** imports
|
||||||
import qualified Control.Monad.Fail as Fail (fail)
|
import qualified Control.Monad.Fail as Fail (fail)
|
||||||
import qualified Control.Exception as C
|
import qualified Control.Exception as C
|
||||||
import Control.Monad (forM_, when, void, unless)
|
import Control.Monad (forM_, when, void, unless, filterM)
|
||||||
import Control.Monad.IO.Class (MonadIO, liftIO)
|
import Control.Monad.IO.Class (MonadIO, liftIO)
|
||||||
import Control.Monad.Except (ExceptT(..), runExceptT)
|
import Control.Monad.Except (ExceptT(..), runExceptT)
|
||||||
import Control.Monad.State.Strict (evalStateT,get,modify',put)
|
import Control.Monad.State.Strict (evalStateT,get,modify',put)
|
||||||
@ -94,7 +94,7 @@ import Text.Megaparsec.Char
|
|||||||
import Text.Printf
|
import Text.Printf
|
||||||
import System.FilePath
|
import System.FilePath
|
||||||
import "Glob" System.FilePath.Glob hiding (match)
|
import "Glob" System.FilePath.Glob hiding (match)
|
||||||
import "filepattern" System.FilePattern.Directory
|
-- import "filepattern" System.FilePattern.Directory
|
||||||
|
|
||||||
import Hledger.Data
|
import Hledger.Data
|
||||||
import Hledger.Read.Common
|
import Hledger.Read.Common
|
||||||
@ -104,7 +104,8 @@ import qualified Hledger.Read.CsvReader as CsvReader (reader)
|
|||||||
import qualified Hledger.Read.RulesReader as RulesReader (reader)
|
import qualified Hledger.Read.RulesReader as RulesReader (reader)
|
||||||
import qualified Hledger.Read.TimeclockReader as TimeclockReader (reader)
|
import qualified Hledger.Read.TimeclockReader as TimeclockReader (reader)
|
||||||
import qualified Hledger.Read.TimedotReader as TimedotReader (reader)
|
import qualified Hledger.Read.TimedotReader as TimedotReader (reader)
|
||||||
import System.Directory (canonicalizePath)
|
import System.Directory (canonicalizePath, doesFileExist)
|
||||||
|
import Data.Functor ((<&>))
|
||||||
|
|
||||||
--- ** doctest setup
|
--- ** doctest setup
|
||||||
-- $setup
|
-- $setup
|
||||||
@ -313,30 +314,66 @@ includedirectivep = do
|
|||||||
-- save the position
|
-- save the position
|
||||||
off <- getOffset
|
off <- getOffset
|
||||||
pos <- getSourcePos
|
pos <- getSourcePos
|
||||||
-- parse
|
|
||||||
|
-- parse the directive
|
||||||
string "include"
|
string "include"
|
||||||
lift skipNonNewlineSpaces1
|
lift skipNonNewlineSpaces1
|
||||||
prefixedglob <- rstrip . T.unpack <$> takeWhileP Nothing (`notElem` [';','\n'])
|
prefixedglob <- rstrip . T.unpack <$> takeWhileP Nothing (`notElem` [';','\n'])
|
||||||
lift followingcommentp
|
lift followingcommentp
|
||||||
-- find file(s)
|
|
||||||
let (mprefix,glb) = splitReaderPrefix prefixedglob
|
let (mprefix,glb) = splitReaderPrefix prefixedglob
|
||||||
f <- sourcePosFilePath pos
|
f <- sourcePosFilePath pos
|
||||||
when (null $ dbg6 (f <> " include: glob pattern") glb) $
|
when (null $ dbg6 (f <> " include: glob pattern") glb) $
|
||||||
customFailure $ parseErrorAt off $ "include needs a file path or glob pattern argument"
|
customFailure $ parseErrorAt off $ "include needs a file path or glob pattern argument"
|
||||||
paths <- getFilePaths off pos glb
|
|
||||||
|
-- Find the file or glob-matched files (just the ones from this include directive), with some IO error checking.
|
||||||
|
-- Also report whether it was a literal path and not a glob pattern.
|
||||||
|
(paths, isliteral) <- findMatchedFiles off pos glb
|
||||||
|
|
||||||
|
-- XXX worth the troublel ?
|
||||||
|
-- Handle duplicates. Some complexities here:
|
||||||
|
--
|
||||||
|
-- If this include directive uses a glob pattern, remove duplicates.
|
||||||
|
-- Ie if this glob pattern matches any files we have already processed (or the current file),
|
||||||
|
-- due to multiple includes in sequence or in a cycle, exclude those files so they're not processed again.
|
||||||
|
--
|
||||||
|
-- If this include directive uses a literal file path, don't remove duplicates.
|
||||||
|
-- Multiple includes in sequence will cause the included file to be processed multiple times.
|
||||||
|
-- Multiple includes forming a cycle will be detected and reported as an error in parseIncludedFile.
|
||||||
|
let paths' = if isliteral then paths else filter (const True) paths
|
||||||
|
|
||||||
|
-- if there was a reader prefix, apply it to all the file paths
|
||||||
let prefixedpaths = case mprefix of
|
let prefixedpaths = case mprefix of
|
||||||
Nothing -> paths
|
Nothing -> paths'
|
||||||
Just fmt -> map ((show fmt++":")++) paths
|
Just fmt -> map ((show fmt++":")++) paths'
|
||||||
-- parse them inline
|
|
||||||
|
-- parse each one, as if inlined here
|
||||||
forM_ prefixedpaths $ parseIncludedFile off pos
|
forM_ prefixedpaths $ parseIncludedFile off pos
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
-- Find the files matched by a glob pattern, if any, using filepattern.
|
-- | Find the files matched by a literal path or a glob pattern.
|
||||||
-- Uses the current parse context for detecting the current directory and for error messages.
|
-- Uses the current parse context for detecting the current directory and for error messages.
|
||||||
-- This one also ignores all dotted directories (anything under .git/, foo/.secret/, etc.)
|
-- Expands a leading tilde to the user's home directory.
|
||||||
getFilePaths :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m [FilePath]
|
-- Glob patterns at the start of a path component exclude dot-named files and directories.
|
||||||
getFilePaths off pos globpattern = do
|
--
|
||||||
|
-- Checks if any matched paths are directories and excludes those.
|
||||||
|
-- Converts all matched paths to their canonical form.
|
||||||
|
findMatchedFiles :: MonadIO m => Int -> SourcePos -> FilePath -> JournalParser m ([FilePath], Bool)
|
||||||
|
findMatchedFiles off pos globpattern = do
|
||||||
|
-- Some notes about the Glob library:
|
||||||
|
-- ----------------------------------
|
||||||
|
-- It does not expand tilde.
|
||||||
|
-- It does not canonicalise paths.
|
||||||
|
-- The results are not in any particular order.
|
||||||
|
-- The results can include directories.
|
||||||
|
-- DIRPAT/ is equivalent to DIRPAT, except results will end with // (double slash).
|
||||||
|
-- . or .. at the start of a pattern can match the current or parent directories.
|
||||||
|
-- * matches any number of characters in a file or directory name.
|
||||||
|
-- * at the start of a file name ignores dot-named files and directories, by default.
|
||||||
|
-- ** (or any number of consecutive *'s) not followed by slash is equivalent to *.
|
||||||
|
-- A **/ component matches any number of directory parts.
|
||||||
|
-- A **/ ignores dot-named directories in its starting and ending directories, by default.
|
||||||
|
-- But **/ does search intermediate dot-named directories. Eg it can find a/.b/c.
|
||||||
|
|
||||||
-- expand a tilde at the start of the glob pattern, or throw an error
|
-- expand a tilde at the start of the glob pattern, or throw an error
|
||||||
expandedglob <- lift $ expandHomePath globpattern `orRethrowIOError` "failed to expand ~"
|
expandedglob <- lift $ expandHomePath globpattern `orRethrowIOError` "failed to expand ~"
|
||||||
@ -345,29 +382,43 @@ includedirectivep = do
|
|||||||
parentfile <- sourcePosFilePath pos
|
parentfile <- sourcePosFilePath pos
|
||||||
let cwd = takeDirectory parentfile
|
let cwd = takeDirectory parentfile
|
||||||
|
|
||||||
-- Find all matched files, in lexicographic order (the order ls would normally show them).
|
-- Compile as a Glob Pattern. Can throw an error.
|
||||||
-- (This might include the current file.)
|
g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob of
|
||||||
-- 1. Old implementation, using Glob XXX keep providing this via --old-glob for a bit
|
|
||||||
_g <- case tryCompileWith compDefault{errorRecovery=False} expandedglob of
|
|
||||||
Right x -> pure x
|
|
||||||
Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e
|
Left e -> customFailure $ parseErrorAt off $ "Invalid glob pattern: " ++ e
|
||||||
_filepaths <- liftIO $ (dbg6 (parentfile <> " include: matched files") . sort) <$> globDir1 _g cwd
|
Right _ | "***" `isInfixOf` expandedglob -> customFailure $ parseErrorAt off $ "Invalid glob pattern: too many stars"
|
||||||
-- 2. New implementation, using filepattern
|
Right x -> pure x
|
||||||
filepaths <- liftIO $
|
|
||||||
map (dbg6 "cwd" cwd </>)
|
|
||||||
-- . sort -- XXX needed ?
|
|
||||||
<$>
|
|
||||||
getDirectoryFilesIgnore cwd [expandedglob] ["**/.*/**"]
|
|
||||||
|
|
||||||
-- Throw an error if no files (not even the current file) were matched.
|
-- Find all matched paths, in lexicographic order (the order ls would normally show them).
|
||||||
when (null filepaths) $
|
-- These might include directories or the current file.
|
||||||
customFailure $ parseErrorAt off $ "No files were matched by file pattern: " ++ globpattern
|
paths <- liftIO $
|
||||||
|
-- (dbg6 (parentfile <> " include: matched paths") . sort) <$>
|
||||||
|
globDir1 g cwd
|
||||||
|
|
||||||
-- If the current file was matched, exclude it now.
|
-- Exclude any directories or symlinks to directories, and canonicalise
|
||||||
let filepaths' = filter (/= parentfile) filepaths
|
files <- liftIO $
|
||||||
dbg6IO (parentfile <> " include: matched files (excluding current file)") filepaths'
|
filterM doesFileExist paths
|
||||||
|
>>= mapM canonicalizePath
|
||||||
|
<&> (dbg6 (parentfile <> " include: matched files") . sort)
|
||||||
|
|
||||||
pure filepaths'
|
-- If a glob was used: exclude any intermediate dot directories that were searched.
|
||||||
|
-- As noted above, while **/ ignores dot dirs in the starting and ending dirs,
|
||||||
|
-- it does search dot dirs in between those two (something that should be fixed in Glob ?).
|
||||||
|
-- This seems likely to be inconvenient, eg when trying to avoid .git directories in subrepos.
|
||||||
|
-- So as an imperfect workaround: when using any glob, exclude all paths involving dot dirs.
|
||||||
|
-- Unfortunately this means valid globs like .dotdir/* will not succeed; only a literal
|
||||||
|
-- .dotdir/foo would work there.
|
||||||
|
let
|
||||||
|
files' = if isLiteral g then files else filter (not.hasdotdir) files
|
||||||
|
where
|
||||||
|
hasdotdir p = any isdotdir $ splitPath p
|
||||||
|
where
|
||||||
|
isdotdir c = "." `isPrefixOf` c && "/" `isSuffixOf` c
|
||||||
|
|
||||||
|
-- Throw an error if no files were matched.
|
||||||
|
when (null files') $
|
||||||
|
customFailure $ parseErrorAt off $ "No files were matched by glob pattern: " ++ globpattern
|
||||||
|
|
||||||
|
return (files', isLiteral g)
|
||||||
|
|
||||||
-- Parse the given included file (and any deeper includes, recursively)
|
-- Parse the given included file (and any deeper includes, recursively)
|
||||||
-- as if it was inlined in the current (parent) file.
|
-- as if it was inlined in the current (parent) file.
|
||||||
@ -1195,8 +1246,8 @@ tests_JournalReader = testGroup "JournalReader" [
|
|||||||
assertParse ignoredpricecommoditydirectivep "N $\n"
|
assertParse ignoredpricecommoditydirectivep "N $\n"
|
||||||
|
|
||||||
,testGroup "includedirectivep" [
|
,testGroup "includedirectivep" [
|
||||||
testCase "include" $ assertParseErrorE includedirectivep "include nosuchfile\n" "No existing files match pattern: nosuchfile"
|
testCase "include" $ assertParseErrorE includedirectivep "include nosuchfile\n" "No files were matched by glob pattern: nosuchfile"
|
||||||
,testCase "glob" $ assertParseErrorE includedirectivep "include nosuchfile*\n" "No existing files match pattern: nosuchfile*"
|
,testCase "glob" $ assertParseErrorE includedirectivep "include nosuchfile*\n" "No files were matched by glob pattern: nosuchfile*"
|
||||||
]
|
]
|
||||||
|
|
||||||
,testCase "marketpricedirectivep" $ assertParseEq marketpricedirectivep
|
,testCase "marketpricedirectivep" $ assertParseEq marketpricedirectivep
|
||||||
|
|||||||
@ -148,7 +148,6 @@ library
|
|||||||
, extra >=1.7.11
|
, extra >=1.7.11
|
||||||
, file-embed >=0.0.10
|
, file-embed >=0.0.10
|
||||||
, filepath
|
, filepath
|
||||||
, filepattern >=0.1.3
|
|
||||||
, hashtables >=1.2.3.1
|
, hashtables >=1.2.3.1
|
||||||
, lucid
|
, lucid
|
||||||
, megaparsec >=7.0.0 && <9.8
|
, megaparsec >=7.0.0 && <9.8
|
||||||
@ -209,7 +208,6 @@ test-suite doctest
|
|||||||
, extra >=1.7.11
|
, extra >=1.7.11
|
||||||
, file-embed >=0.0.10
|
, file-embed >=0.0.10
|
||||||
, filepath
|
, filepath
|
||||||
, filepattern >=0.1.3
|
|
||||||
, hashtables >=1.2.3.1
|
, hashtables >=1.2.3.1
|
||||||
, lucid
|
, lucid
|
||||||
, megaparsec >=7.0.0 && <9.8
|
, megaparsec >=7.0.0 && <9.8
|
||||||
@ -271,7 +269,6 @@ test-suite unittest
|
|||||||
, extra >=1.7.11
|
, extra >=1.7.11
|
||||||
, file-embed >=0.0.10
|
, file-embed >=0.0.10
|
||||||
, filepath
|
, filepath
|
||||||
, filepattern >=0.1.3
|
|
||||||
, hashtables >=1.2.3.1
|
, hashtables >=1.2.3.1
|
||||||
, hledger-lib
|
, hledger-lib
|
||||||
, lucid
|
, lucid
|
||||||
|
|||||||
@ -94,7 +94,6 @@ dependencies:
|
|||||||
- utf8-string >=0.3.5
|
- utf8-string >=0.3.5
|
||||||
- extra >=1.7.11
|
- extra >=1.7.11
|
||||||
- Glob >= 0.9
|
- Glob >= 0.9
|
||||||
- filepattern >=0.1.3
|
|
||||||
|
|
||||||
ghc-options:
|
ghc-options:
|
||||||
- -Wall
|
- -Wall
|
||||||
|
|||||||
@ -29,37 +29,7 @@ $ printf '2018/01/01\n (A) 1\n' >f.journal; hledger -f - print; rm -f f.journa
|
|||||||
|
|
||||||
>=
|
>=
|
||||||
|
|
||||||
# ** 3. include with no argument: argument error
|
# ** 3. A leading tilde is expanded to $HOME.
|
||||||
<
|
|
||||||
include
|
|
||||||
$ hledger -f- files
|
|
||||||
>2 /include needs a.*argument/
|
|
||||||
>=1
|
|
||||||
|
|
||||||
# ** 4. include a nonexistent file: no files matched error
|
|
||||||
<
|
|
||||||
include nosuchfile
|
|
||||||
$ hledger -f- files
|
|
||||||
>2 /No files were matched/
|
|
||||||
>=1
|
|
||||||
|
|
||||||
# # ** 5. include a directory: no files matched error
|
|
||||||
# <
|
|
||||||
# include b
|
|
||||||
# $ hledger -f- files
|
|
||||||
# >2 /No files were matched/
|
|
||||||
# >=1
|
|
||||||
|
|
||||||
# ** 6. Including the current file is harmless, it's ignored automatically.
|
|
||||||
$ hledger -f self.j files
|
|
||||||
> /self\.j$/
|
|
||||||
|
|
||||||
# # ** 7. Including a cycle is harmless, the cycle is pruned automatically.
|
|
||||||
# $ hledger -f .cycle/cycle.j files
|
|
||||||
# > /cyc/
|
|
||||||
# >=1
|
|
||||||
|
|
||||||
# ** 8. A leading tilde is expanded to $HOME.
|
|
||||||
<
|
<
|
||||||
include ~/included.journal
|
include ~/included.journal
|
||||||
$ printf '2018/01/01\n (A) 1\n' >included.journal; HOME="$PWD" hledger -f - print; rm -rf included.journal
|
$ printf '2018/01/01\n (A) 1\n' >included.journal; HOME="$PWD" hledger -f - print; rm -rf included.journal
|
||||||
@ -68,31 +38,102 @@ $ printf '2018/01/01\n (A) 1\n' >included.journal; HOME="$PWD" hledger -f - pr
|
|||||||
|
|
||||||
>= 0
|
>= 0
|
||||||
|
|
||||||
# # ** 9. Glob patterns. Include a malformed glob -> glob error
|
# ** 4. include with no argument -> argument error
|
||||||
# <
|
<
|
||||||
# include ***
|
include
|
||||||
# $ hledger -f- files
|
$ hledger -f- files
|
||||||
# >2 /malformed/
|
>2 /include needs a.*argument/
|
||||||
# >=1
|
>=1
|
||||||
|
|
||||||
# ** 10. Include invalid glob patterns
|
# ** 5. include a nonexistent file -> no files matched error
|
||||||
|
<
|
||||||
|
include nosuchfile
|
||||||
|
$ hledger -f- files
|
||||||
|
>2 /No files were matched/
|
||||||
|
>=1
|
||||||
|
|
||||||
|
# ** 6. Including a directory literally -> no files matched error
|
||||||
|
<
|
||||||
|
include b
|
||||||
|
$ hledger -f- files
|
||||||
|
>2 /No files were matched/
|
||||||
|
>=1
|
||||||
|
|
||||||
|
# ** 7. Include invalid glob patterns -> invalid glob error
|
||||||
<
|
<
|
||||||
include [.journal
|
include [.journal
|
||||||
$ hledger -f - print
|
$ hledger -f - print
|
||||||
>2 //
|
>2 /Invalid glob/
|
||||||
>= 1
|
>= 1
|
||||||
|
|
||||||
# # ** 11. Include ** -> all files
|
# ** 8. Three or more *'s -> glob error
|
||||||
# $ hledger -f glob1.j files
|
<
|
||||||
|
include ***
|
||||||
|
$ hledger -f- files
|
||||||
|
>2 /Invalid glob/
|
||||||
|
>=1
|
||||||
|
|
||||||
# # ** 12. Include **.EXT -> ?
|
# ** 9. Including the current file literally -> cycle error.
|
||||||
# $ hledger -f glob2.j files
|
$ hledger -f self.j files
|
||||||
|
>2 /cycle/
|
||||||
|
>=1
|
||||||
|
|
||||||
# # ** 13. Include */**.EXT -> ?
|
# ** 10. Including the current file via glob -> cycle error.
|
||||||
# $ hledger -f glob3.j files
|
$ hledger -f selfglob.j files
|
||||||
|
>2 /cycle/
|
||||||
|
>=1
|
||||||
|
|
||||||
# # ** 14. Include **/*.EXT -> all files in or below cwd with .EXT
|
# ** 11. Including a cycle, all literally -> cycle error
|
||||||
# $ hledger -f glob4.j files
|
$ hledger -f .cycle/cycle.j files
|
||||||
|
>2 /cycle/
|
||||||
|
>=1
|
||||||
|
|
||||||
# # ** 15. Include */**/*.EXT -> all files below cwd with .EXT
|
# ** 12. Including a cycle, involving globs -> cycle error
|
||||||
# $ hledger -f glob5.j files
|
$ hledger -f .cycle/cycleglob.j files
|
||||||
|
>2 /cycle/
|
||||||
|
>=1
|
||||||
|
|
||||||
|
# ** 13. Include ** -> cycle error (includes current file)
|
||||||
|
$ hledger -f glob1.j files
|
||||||
|
>2 /cycle/
|
||||||
|
>=1
|
||||||
|
|
||||||
|
# ** 14. Include **.j -> cycle error (includes current file)
|
||||||
|
$ hledger -f glob2.j files
|
||||||
|
>2 /cycle/
|
||||||
|
>=1
|
||||||
|
|
||||||
|
# ** 15. Include */**.j -> all .j files in subdirectories (**.j is same as *.j)
|
||||||
|
$ hledger -f glob3.j files | sed -E 's|.*hledger/test/journal/include/||'
|
||||||
|
glob3.j
|
||||||
|
b/b.j
|
||||||
|
c/c.j
|
||||||
|
|
||||||
|
# ** 16. Include **/*.j -> cycle error (includes current file)
|
||||||
|
$ hledger -f glob4.j files
|
||||||
|
>2 /cycle/
|
||||||
|
>=1
|
||||||
|
|
||||||
|
# ** 17. Include */**/*.j -> all non-dot .j files in or below non-dot subdirectories.
|
||||||
|
<
|
||||||
|
include */**/*.j
|
||||||
|
$ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||'
|
||||||
|
-
|
||||||
|
b/b.j
|
||||||
|
b/bb/bb.j
|
||||||
|
c/c.j
|
||||||
|
|
||||||
|
# ** 18. To avoid intermediate dot dirs in the above, we exclude all glob-matched paths involving dot dirs.
|
||||||
|
# So this does not find b/bb/.dotdir/dotdirbb.j, unfortunately:
|
||||||
|
<
|
||||||
|
include b/.dotdir/*.j
|
||||||
|
$ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||'
|
||||||
|
>2 /No files were matched/
|
||||||
|
# sed hides the non-zero exit code
|
||||||
|
|
||||||
|
# ** 19. Only a literal path can find it.
|
||||||
|
<
|
||||||
|
include b/.dotdir/dotdirb.j
|
||||||
|
$ hledger -f - files | sed -E 's|.*hledger/test/journal/include/||'
|
||||||
|
-
|
||||||
|
b/.dotdir/dotdirb.j
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user