dev:rules reader: drop "fall back to reading latest archived"
This commit is contained in:
parent
c60ec90756
commit
b64ddfe813
@ -62,8 +62,9 @@ import qualified Data.Csv.Parser.Megaparsec as CassavaMegaparsec
|
||||
import Data.Encoding (encodingFromStringExplicit)
|
||||
import Data.Either (fromRight)
|
||||
import Data.Functor ((<&>))
|
||||
import Data.List (elemIndex, mapAccumL, nub, sortOn, isPrefixOf, sortBy)
|
||||
import Data.Ord (Down(..), comparing)
|
||||
import Data.List (elemIndex, mapAccumL, nub, sortOn)
|
||||
-- import Data.List (elemIndex, mapAccumL, nub, sortOn, isPrefixOf, sortBy)
|
||||
-- import Data.Ord (Down(..), comparing)
|
||||
#if !MIN_VERSION_base(4,20,0)
|
||||
import Data.List (foldl')
|
||||
#endif
|
||||
@ -77,8 +78,9 @@ import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.IO as T
|
||||
import Data.Time ( Day, TimeZone, UTCTime, LocalTime, ZonedTime(ZonedTime),
|
||||
defaultTimeLocale, getCurrentTimeZone, localDay, parseTimeM, utcToLocalTime, localTimeToUTC, zonedTimeToUTC, utctDay)
|
||||
import Safe (atMay, headMay, lastMay, readMay, headDef)
|
||||
import System.Directory (createDirectoryIfMissing, doesFileExist, getHomeDirectory, getModificationTime, listDirectory, renameFile, doesDirectoryExist)
|
||||
import Safe (atMay, headMay, lastMay, readMay)
|
||||
import System.Directory (createDirectoryIfMissing, doesFileExist, getHomeDirectory, getModificationTime, renameFile)
|
||||
-- import System.Directory (createDirectoryIfMissing, doesFileExist, getHomeDirectory, getModificationTime, listDirectory, renameFile, doesDirectoryExist)
|
||||
import System.Exit (ExitCode(..))
|
||||
import System.FilePath (stripExtension, takeBaseName, takeDirectory, takeExtension, takeFileName, (<.>), (</>))
|
||||
import System.IO (Handle, hClose, hPutStrLn, stderr, hGetContents')
|
||||
@ -189,12 +191,17 @@ parse iopts rulesfile h = do
|
||||
where err = error' $ "could not infer a data file for " <> rulesfile
|
||||
Just glb -> do
|
||||
let (dir,desc) = if isFileName glb then (dldir," in download directory") else (rulesdir,"")
|
||||
globmatches <- expandGlob dir (dbg4 "source rule" glb) >>= sortByModTime <&> dbg4 ("matched files"<>desc<>", oldest first")
|
||||
case globmatches of
|
||||
-- if the source rule matched no files, and we are reading not importing, use the most recent archive file
|
||||
[] | archive && not cmdisimport -> do
|
||||
archivesFor archivedir rulesfile <&> take 1 <&> dbg4 "latest file in archive directory"
|
||||
_ -> return globmatches
|
||||
expandGlob dir (dbg4 "source rule" glb) >>= sortByModTime <&> dbg4 ("matched files"<>desc<>", oldest first")
|
||||
-- XXX disable for now, too much complication: easy review of recent imported data:
|
||||
-- `archive` also affects non-`import` commands reading the rules file:
|
||||
-- when the `source` rule's glob pattern matches no files (no new downloads are available),
|
||||
-- they will use the archive as a fallback (reading the newest archived file, if any).
|
||||
-- if the source rule matched no files and we are reading not importing, use the most recent archived file.
|
||||
-- case globmatches of
|
||||
-- [] | archive && not cmdisimport -> do
|
||||
-- archivesFor archivedir rulesfile <&> take 1 <&> dbg4 "latest file in archive directory"
|
||||
-- _ -> return globmatches -- XXX don't let it be cleaned again
|
||||
|
||||
return $ case datafiles of
|
||||
[] -> (Nothing, Nothing)
|
||||
[f] | cmdisimport -> dbg4 "importing" (Just f , mcmd)
|
||||
@ -285,25 +292,25 @@ archiveFileName rulesfile datafile = do
|
||||
,base <.> moddate <.> ext
|
||||
)
|
||||
|
||||
-- | In the given archive directory, if it exists, find the paths of data files saved for the given rules file.
|
||||
-- They will be reverse sorted by name, ie newest first, assuming normal archive file names.
|
||||
--
|
||||
-- We don't know which extension the data files use, but we look for file names beginning with
|
||||
-- the rules file's base name followed by .YYYY-MM-DD, which will normally be good enough.
|
||||
--
|
||||
archivesFor :: FilePath -> FilePath -> IO [FilePath]
|
||||
archivesFor archivedir rulesfile = do
|
||||
exists <- doesDirectoryExist archivedir
|
||||
if not exists then return []
|
||||
else do
|
||||
let prefix = takeBaseName rulesfile <> "."
|
||||
fs <- listDirectory archivedir
|
||||
return $ map (archivedir </>) $ sortBy (comparing Down)
|
||||
[f | f <- fs,
|
||||
prefix `isPrefixOf` f,
|
||||
let nextpart = takeWhile (/= '.') $ drop (length prefix) f,
|
||||
isJust $ parsedate nextpart
|
||||
]
|
||||
-- -- | In the given archive directory, if it exists, find the paths of data files saved for the given rules file.
|
||||
-- -- They will be reverse sorted by name, ie newest first, assuming normal archive file names.
|
||||
-- --
|
||||
-- -- We don't know which extension the data files use, but we look for file names beginning with
|
||||
-- -- the rules file's base name followed by .YYYY-MM-DD, which will normally be good enough.
|
||||
-- --
|
||||
-- archivesFor :: FilePath -> FilePath -> IO [FilePath]
|
||||
-- archivesFor archivedir rulesfile = do
|
||||
-- exists <- doesDirectoryExist archivedir
|
||||
-- if not exists then return []
|
||||
-- else do
|
||||
-- let prefix = takeBaseName rulesfile <> "."
|
||||
-- fs <- listDirectory archivedir
|
||||
-- return $ map (archivedir </>) $ sortBy (comparing Down)
|
||||
-- [f | f <- fs,
|
||||
-- prefix `isPrefixOf` f,
|
||||
-- let nextpart = takeWhile (/= '.') $ drop (length prefix) f,
|
||||
-- isJust $ parsedate nextpart
|
||||
-- ]
|
||||
|
||||
--- ** reading rules files
|
||||
--- *** rules utilities
|
||||
|
||||
@ -3303,15 +3303,16 @@ and should output zero or more lines of character-separated-values, ready for co
|
||||
|
||||
## `archive`
|
||||
|
||||
Adding the `archive` rule causes `import` to archive imported data files to a nearby `data/` directory.
|
||||
This is optional, but can be useful for troubleshooting, regenerating with improved rules, etc.
|
||||
Adding `archive` to a rules file causes the `import` command
|
||||
to archive (move and rename) each imported data file, in a nearby `data/` directory.
|
||||
Also, `import` will prefer the oldest of the `source` rule's glob-matched files rather than the newest.
|
||||
(So if there are multiple downloads, they will be imported and archived oldest first.)
|
||||
|
||||
Also, it causes `import` to prefer the oldest data file, when the `source` rule's glob pattern matches multiple files.
|
||||
So multiple downloads will be imported and archived in chronological order (oldest first).
|
||||
|
||||
`archive` also affects non-`import` commands reading the rules file:
|
||||
when the `source` rule's glob pattern matches no files (no new downloads are available),
|
||||
they will use the archive as a fallback (reading the newest archived file, if any).
|
||||
Archiving imported data is optional, but it can be useful for
|
||||
troubleshooting your CSV rules,
|
||||
regenerating entries with improved rules,
|
||||
checking for variations in your bank's CSV,
|
||||
etc.
|
||||
|
||||
## `encoding`
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user