journal, lib: the include directive no longer guesses the format

The include directive now tries just one reader, based on the file
extension and defaulting to journal, like the rest of hledger.
(It doesn't yet handle a reader prefix.)

Reader-finding utilities have moved from Hledger.Read to
Hledger.Read.JournalReader so the include directive can use them.

Reader changes:
- rExperimental flag removed
- old rParser renamed to rReadFn
- new rParser field provides the actual parser.
  This seems to require making Reader a higher-kinded type, unfortunately.
This commit is contained in:
Simon Michael 2020-03-01 10:16:52 -08:00
parent b1f3880c3d
commit b9954bff60
6 changed files with 92 additions and 73 deletions

View File

@ -30,7 +30,6 @@ module Hledger.Read (
readJournalFile, readJournalFile,
requireJournalFileExists, requireJournalFileExists,
ensureJournalFileExists, ensureJournalFileExists,
splitReaderPrefix,
-- * Journal parsing -- * Journal parsing
readJournal, readJournal,
@ -39,6 +38,8 @@ module Hledger.Read (
-- * Re-exported -- * Re-exported
JournalReader.accountaliasp, JournalReader.accountaliasp,
JournalReader.postingp, JournalReader.postingp,
findReader,
splitReaderPrefix,
module Hledger.Read.Common, module Hledger.Read.Common,
-- * Tests -- * Tests
@ -70,10 +71,10 @@ import Text.Printf
import Hledger.Data.Dates (getCurrentDay, parsedate, showDate) import Hledger.Data.Dates (getCurrentDay, parsedate, showDate)
import Hledger.Data.Types import Hledger.Data.Types
import Hledger.Read.Common import Hledger.Read.Common
import Hledger.Read.JournalReader as JournalReader import Hledger.Read.JournalReader as JournalReader
import qualified Hledger.Read.TimedotReader as TimedotReader import Hledger.Read.CsvReader (tests_CsvReader)
import qualified Hledger.Read.TimeclockReader as TimeclockReader -- import Hledger.Read.TimedotReader (tests_TimedotReader)
import Hledger.Read.CsvReader as CsvReader -- import Hledger.Read.TimeclockReader (tests_TimeclockReader)
import Hledger.Utils import Hledger.Utils
import Prelude hiding (getContents, writeFile) import Prelude hiding (getContents, writeFile)
@ -85,19 +86,6 @@ journalDefaultFilename = ".hledger.journal"
-- ** journal reading -- ** journal reading
-- The available journal readers, each one handling a particular data format.
readers :: [Reader]
readers = [
JournalReader.reader
,TimeclockReader.reader
,TimedotReader.reader
,CsvReader.reader
-- ,LedgerReader.reader
]
readerNames :: [String]
readerNames = map rFormat readers
-- | Read a Journal from the given text, assuming journal format; or -- | Read a Journal from the given text, assuming journal format; or
-- throw an error. -- throw an error.
readJournal' :: Text -> IO Journal readJournal' :: Text -> IO Journal
@ -120,28 +108,10 @@ readJournal' t = readJournal def Nothing t >>= either error' return
-- since hledger 1.17, we prefer predictability.) -- since hledger 1.17, we prefer predictability.)
readJournal :: InputOpts -> Maybe FilePath -> Text -> IO (Either String Journal) readJournal :: InputOpts -> Maybe FilePath -> Text -> IO (Either String Journal)
readJournal iopts mpath txt = do readJournal iopts mpath txt = do
let r :: Reader IO =
fromMaybe JournalReader.reader $ findReader (mformat_ iopts) mpath
dbg1IO "trying reader" (rFormat r) dbg1IO "trying reader" (rFormat r)
ej <- (runExceptT . (rParser r) iopts (fromMaybe "(string)" mpath)) txt (runExceptT . (rReadFn r) iopts (fromMaybe "(string)" mpath)) txt
dbg1IO "reader result" (' ':show ej)
return ej
where
r = fromMaybe JournalReader.reader $ findReader (mformat_ iopts) mpath
-- | @findReader mformat mpath@
--
-- Find the reader named by @mformat@, if provided.
-- Or, if a file path is provided, find the first reader that handles
-- its file extension, if any.
findReader :: Maybe StorageFormat -> Maybe FilePath -> Maybe Reader
findReader Nothing Nothing = Nothing
findReader (Just fmt) _ = headMay [r | r <- readers, rFormat r == fmt]
findReader Nothing (Just path) =
case prefix of
Just fmt -> headMay [r | r <- readers, rFormat r == fmt]
Nothing -> headMay [r | r <- readers, ext `elem` rExtensions r]
where
(prefix,path') = splitReaderPrefix path
ext = drop 1 $ takeExtension path'
-- | Read the default journal file specified by the environment, or raise an error. -- | Read the default journal file specified by the environment, or raise an error.
defaultJournal :: IO Journal defaultJournal :: IO Journal
@ -218,13 +188,6 @@ readJournalFile iopts prefixedfile = do
-- ** utilities -- ** utilities
-- | If a filepath is prefixed by one of the reader names and a colon,
-- split that off. Eg "csv:-" -> (Just "csv", "-").
splitReaderPrefix :: PrefixedFilePath -> (Maybe String, FilePath)
splitReaderPrefix f =
headDef (Nothing, f)
[(Just r, drop (length r + 1) f) | r <- readerNames, (r++":") `isPrefixOf` f]
-- | If the specified journal file does not exist (and is not "-"), -- | If the specified journal file does not exist (and is not "-"),
-- give a helpful error and quit. -- give a helpful error and quit.
requireJournalFileExists :: FilePath -> IO () requireJournalFileExists :: FilePath -> IO ()

View File

@ -21,6 +21,7 @@ Some of these might belong in Hledger.Read.JournalReader or Hledger.Read.
{-# LANGUAGE NoMonoLocalBinds #-} {-# LANGUAGE NoMonoLocalBinds #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-}
@ -150,7 +151,9 @@ import Hledger.Utils
-- | A hledger journal reader is a triple of storage format name, a -- | A hledger journal reader is a triple of storage format name, a
-- detector of that format, and a parser from that format to Journal. -- detector of that format, and a parser from that format to Journal.
data Reader = Reader { -- The type variable m appears here so that rParserr can hold a
-- journal parser, which depends on it.
data Reader m = Reader {
-- The canonical name of the format handled by this reader -- The canonical name of the format handled by this reader
rFormat :: StorageFormat rFormat :: StorageFormat
@ -158,16 +161,17 @@ data Reader = Reader {
-- The file extensions recognised as containing this format -- The file extensions recognised as containing this format
,rExtensions :: [String] ,rExtensions :: [String]
-- A text parser for this format, accepting input options, file -- The entry point for reading this format, accepting input options, file
-- path for error messages and file contents, producing an exception-raising IO -- path for error messages and file contents, producing an exception-raising IO
-- action that returns a journal or error message. -- action that produces a journal or error message.
,rParser :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal ,rReadFn :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
-- Experimental readers are never tried automatically. -- The actual megaparsec parser called by the above, in case
,rExperimental :: Bool -- another parser (includedirectivep) wants to use it directly.
,rParser :: MonadIO m => ErroringJournalParser m ParsedJournal
} }
instance Show Reader where show r = rFormat r ++ " reader" instance Show (Reader m) where show r = rFormat r ++ " reader"
-- $setup -- $setup
@ -570,6 +574,7 @@ accountnamep = singlespacedtextp
-- | Parse any text beginning with a non-whitespace character, until a -- | Parse any text beginning with a non-whitespace character, until a
-- double space or the end of input. -- double space or the end of input.
-- TODO including characters which normally start a comment (;#) - exclude those ?
singlespacedtextp :: TextParser m T.Text singlespacedtextp :: TextParser m T.Text
singlespacedtextp = singlespacedtextsatisfyingp (const True) singlespacedtextp = singlespacedtextsatisfyingp (const True)

View File

@ -50,7 +50,7 @@ import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail
import Control.Exception (IOException, handle, throw) import Control.Exception (IOException, handle, throw)
import Control.Monad (liftM, unless, when) import Control.Monad (liftM, unless, when)
import Control.Monad.Except (ExceptT, throwError) import Control.Monad.Except (ExceptT, throwError)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.State.Strict (StateT, get, modify', evalStateT) import Control.Monad.State.Strict (StateT, get, modify', evalStateT)
import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.Class (lift)
import Data.Char (toLower, isDigit, isSpace, ord) import Data.Char (toLower, isDigit, isSpace, ord)
@ -95,12 +95,12 @@ type CsvValue = String
-- ** reader -- ** reader
reader :: Reader reader :: MonadIO m => Reader m
reader = Reader reader = Reader
{rFormat = "csv" {rFormat = "csv"
,rExtensions = ["csv","tsv","ssv"] ,rExtensions = ["csv","tsv","ssv"]
,rParser = parse ,rReadFn = parse
,rExperimental = False ,rParser = error' "sorry, CSV files can't be included yet"
} }
-- | Parse and post-process a "Journal" from CSV data, or give an error. -- | Parse and post-process a "Journal" from CSV data, or give an error.

View File

@ -43,6 +43,10 @@ Hledger.Read.Common, to avoid import cycles.
-- ** exports -- ** exports
module Hledger.Read.JournalReader ( module Hledger.Read.JournalReader (
-- * Reader-finding utils
findReader,
splitReaderPrefix,
-- * Reader -- * Reader
reader, reader,
@ -89,6 +93,7 @@ import Data.Monoid ((<>))
import Data.Text (Text) import Data.Text (Text)
import Data.String import Data.String
import Data.List import Data.List
import Data.Maybe
import qualified Data.Text as T import qualified Data.Text as T
import Data.Time.Calendar import Data.Time.Calendar
import Data.Time.LocalTime import Data.Time.LocalTime
@ -102,18 +107,64 @@ import "Glob" System.FilePath.Glob hiding (match)
import Hledger.Data import Hledger.Data
import Hledger.Read.Common import Hledger.Read.Common
import Hledger.Read.TimeclockReader (timeclockfilep)
import Hledger.Read.TimedotReader (timedotfilep)
import Hledger.Utils import Hledger.Utils
import qualified Hledger.Read.TimedotReader as TimedotReader (reader)
import qualified Hledger.Read.TimeclockReader as TimeclockReader (reader)
import qualified Hledger.Read.CsvReader as CsvReader (reader)
-- ** reader finding utilities
-- Defined here rather than Hledger.Read so that we can use them in includedirectivep below.
-- The available journal readers, each one handling a particular data format.
readers' :: MonadIO m => [Reader m]
readers' = [
reader
,TimeclockReader.reader
,TimedotReader.reader
,CsvReader.reader
-- ,LedgerReader.reader
]
readerNames :: [String]
readerNames = map rFormat (readers'::[Reader IO])
-- | @findReader mformat mpath@
--
-- Find the reader named by @mformat@, if provided.
-- Or, if a file path is provided, find the first reader that handles
-- its file extension, if any.
findReader :: MonadIO m => Maybe StorageFormat -> Maybe FilePath -> Maybe (Reader m)
findReader Nothing Nothing = Nothing
findReader (Just fmt) _ = headMay [r | r <- readers', rFormat r == fmt]
findReader Nothing (Just path) =
case prefix of
Just fmt -> headMay [r | r <- readers', rFormat r == fmt]
Nothing -> headMay [r | r <- readers', ext `elem` rExtensions r]
where
(prefix,path') = splitReaderPrefix path
ext = drop 1 $ takeExtension path'
-- | A file path optionally prefixed by a reader name and colon
-- (journal:, csv:, timedot:, etc.).
type PrefixedFilePath = FilePath
-- | If a filepath is prefixed by one of the reader names and a colon,
-- split that off. Eg "csv:-" -> (Just "csv", "-").
splitReaderPrefix :: PrefixedFilePath -> (Maybe String, FilePath)
splitReaderPrefix f =
headDef (Nothing, f)
[(Just r, drop (length r + 1) f) | r <- readerNames, (r++":") `isPrefixOf` f]
-- ** reader -- ** reader
reader :: Reader reader :: MonadIO m => Reader m
reader = Reader reader = Reader
{rFormat = "journal" {rFormat = "journal"
,rExtensions = ["journal", "j", "hledger", "ledger"] ,rExtensions = ["journal", "j", "hledger", "ledger"]
,rParser = parse ,rReadFn = parse
,rExperimental = False ,rParser = journalp -- no need to add command line aliases like journalp'
-- when called as a subparser I think
} }
-- | Parse and post-process a "Journal" from hledger's journal file -- | Parse and post-process a "Journal" from hledger's journal file
@ -234,11 +285,11 @@ includedirectivep = do
`orRethrowIOError` (show parentpos ++ " reading " ++ filepath) `orRethrowIOError` (show parentpos ++ " reading " ++ filepath)
let initChildj = newJournalWithParseStateFrom filepath parentj let initChildj = newJournalWithParseStateFrom filepath parentj
let parser = choiceInState -- Choose a reader/format based on the file path, or fall back
[ journalp -- on journal. Duplicating readJournal a bit here.
, timeclockfilep let r = fromMaybe reader $ findReader Nothing (Just filepath)
, timedotfilep parser = rParser r
] -- can't include a csv file yet, that reader is special dbg1IO "trying reader" (rFormat r)
updatedChildj <- journalAddFile (filepath, childInput) <$> updatedChildj <- journalAddFile (filepath, childInput) <$>
parseIncludeFile parser initChildj filepath childInput parseIncludeFile parser initChildj filepath childInput

View File

@ -78,12 +78,12 @@ import Hledger.Utils
-- ** reader -- ** reader
reader :: Reader reader :: MonadIO m => Reader m
reader = Reader reader = Reader
{rFormat = "timeclock" {rFormat = "timeclock"
,rExtensions = ["timeclock"] ,rExtensions = ["timeclock"]
,rParser = parse ,rReadFn = parse
,rExperimental = False ,rParser = timeclockfilep
} }
-- | Parse and post-process a "Journal" from timeclock.el's timeclock -- | Parse and post-process a "Journal" from timeclock.el's timeclock

View File

@ -63,12 +63,12 @@ import Hledger.Utils
-- ** reader -- ** reader
reader :: Reader reader :: MonadIO m => Reader m
reader = Reader reader = Reader
{rFormat = "timedot" {rFormat = "timedot"
,rExtensions = ["timedot"] ,rExtensions = ["timedot"]
,rParser = parse ,rReadFn = parse
,rExperimental = False ,rParser = timedotp
} }
-- | Parse and post-process a "Journal" from the timedot format, or give an error. -- | Parse and post-process a "Journal" from the timedot format, or give an error.