From 855a8f19850de3d4fcbb40715a27edd193aefc19 Mon Sep 17 00:00:00 2001 From: Alex Chen Date: Tue, 25 Sep 2018 16:07:58 -0600 Subject: [PATCH] lib: Re-implement the 'ExceptT' layer of the parser We previously had another parser type, 'type ErroringJournalParser = ExceptT String ...' for throwing parse errors without the possibility of backtracking. This parser type was removed under the assumption that it would be possible to write our parser without this capability. However, after a hairy backtracking bug, we would now prefer to have the option to prevent backtracking. - Define a 'FinalParseError' type specifically for the 'ExceptT' layer - Any parse error can be raised as a "final" parse error - Tracks the stack of include files for parser errors, anticipating the removal of the tracking of stacks of include files in megaparsec 7 - Although a stack of include files is also tracked in the 'StateT Journal' layer of the parser, it seems easier to guarantee correct error messages in the 'ExceptT FinalParserError' layer - This does not make the 'StateT Journal' stack redundant because the 'ExceptT FinalParseError' stack cannot be used to detect cycles of include files --- hledger-lib/Hledger/Read/Common.hs | 43 +++++- hledger-lib/Hledger/Read/JournalReader.hs | 2 +- hledger-lib/Hledger/Read/TimeclockReader.hs | 2 +- hledger-lib/Hledger/Read/TimedotReader.hs | 2 +- hledger-lib/Hledger/Utils/Parse.hs | 7 + hledger-lib/Text/Megaparsec/Custom.hs | 154 +++++++++++++++++++- 6 files changed, 203 insertions(+), 7 deletions(-) diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs index 2b0a475f2..89c1f1880 100644 --- a/hledger-lib/Hledger/Read/Common.hs +++ b/hledger-lib/Hledger/Read/Common.hs @@ -29,10 +29,13 @@ module Hledger.Read.Common ( rtp, runJournalParser, rjp, + runErroringJournalParser, + rejp, genericSourcePos, journalSourcePos, applyTransactionModifiers, parseAndFinaliseJournal, + parseAndFinaliseJournal', setYear, getYear, setDefaultCommodityAndStyle, @@ -99,7 +102,7 @@ where import Prelude () import "base-compat-batteries" Prelude.Compat hiding (readFile) import "base-compat-batteries" Control.Monad.Compat -import Control.Monad.Except (ExceptT(..), throwError) +import Control.Monad.Except (ExceptT(..), runExceptT, throwError) import Control.Monad.State.Strict import Data.Bifunctor (bimap, second) import Data.Char @@ -200,6 +203,16 @@ runJournalParser, rjp :: Monad m => JournalParser m a -> Text -> m (Either (Pars runJournalParser p t = runParserT (evalStateT p mempty) "" t rjp = runJournalParser +-- | Run an erroring journal parser in some monad. See also: parseWithState. +runErroringJournalParser, rejp + :: Monad m + => ErroringJournalParser m a + -> Text + -> m (Either FinalParseError (Either (ParseError Char CustomErr) a)) +runErroringJournalParser p t = + runExceptT $ runParserT (evalStateT p mempty) "" t +rejp = runErroringJournalParser + genericSourcePos :: SourcePos -> GenericSourcePos genericSourcePos p = GenericSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p) (fromIntegral . unPos $ sourceColumn p) @@ -221,9 +234,32 @@ applyTransactionModifiers j = j { jtxns = map applyallmodifiers $ jtxns j } -- | Given a megaparsec ParsedJournal parser, input options, file -- path and file content: parse and post-process a Journal, or give an error. -parseAndFinaliseJournal :: JournalParser IO ParsedJournal -> InputOpts +parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal parseAndFinaliseJournal parser iopts f txt = do + t <- liftIO getClockTime + y <- liftIO getCurrentYear + let initJournal = nulljournal + { jparsedefaultyear = Just y + , jincludefilestack = [f] } + eep <- liftIO $ runExceptT $ + runParserT (evalStateT parser initJournal) f txt + case eep of + Left finalParseError -> + throwError $ finalParseErrorPretty $ attachSource f txt finalParseError + + Right ep -> case ep of + Left e -> throwError $ customParseErrorPretty txt e + + Right pj -> + let pj' = if auto_ iopts then applyTransactionModifiers pj else pj in + case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of + Right j -> return j + Left e -> throwError e + +parseAndFinaliseJournal' :: JournalParser IO ParsedJournal -> InputOpts + -> FilePath -> Text -> ExceptT String IO Journal +parseAndFinaliseJournal' parser iopts f txt = do t <- liftIO getClockTime y <- liftIO getCurrentYear let initJournal = nulljournal @@ -231,12 +267,13 @@ parseAndFinaliseJournal parser iopts f txt = do , jincludefilestack = [f] } ep <- liftIO $ runParserT (evalStateT parser initJournal) f txt case ep of + Left e -> throwError $ customParseErrorPretty txt e + Right pj -> let pj' = if auto_ iopts then applyTransactionModifiers pj else pj in case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of Right j -> return j Left e -> throwError e - Left e -> throwError $ customParseErrorPretty txt e setYear :: Year -> JournalParser m () setYear y = modify' (\j -> j{jparsedefaultyear=Just y}) diff --git a/hledger-lib/Hledger/Read/JournalReader.hs b/hledger-lib/Hledger/Read/JournalReader.hs index 1a4f1f84c..91a2bf868 100644 --- a/hledger-lib/Hledger/Read/JournalReader.hs +++ b/hledger-lib/Hledger/Read/JournalReader.hs @@ -106,7 +106,7 @@ reader = Reader -- | Parse and post-process a "Journal" from hledger's journal file -- format, or give an error. parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal -parse iopts = parseAndFinaliseJournal journalp' iopts +parse iopts = parseAndFinaliseJournal' journalp' iopts where journalp' = do -- reverse parsed aliases to ensure that they are applied in order given on commandline diff --git a/hledger-lib/Hledger/Read/TimeclockReader.hs b/hledger-lib/Hledger/Read/TimeclockReader.hs index 22bc2901c..597c2648d 100644 --- a/hledger-lib/Hledger/Read/TimeclockReader.hs +++ b/hledger-lib/Hledger/Read/TimeclockReader.hs @@ -78,7 +78,7 @@ reader = Reader -- format, saving the provided file path and the current time, or give an -- error. parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal -parse = parseAndFinaliseJournal timeclockfilep +parse = parseAndFinaliseJournal' timeclockfilep timeclockfilep :: MonadIO m => JournalParser m ParsedJournal timeclockfilep = do many timeclockitemp diff --git a/hledger-lib/Hledger/Read/TimedotReader.hs b/hledger-lib/Hledger/Read/TimedotReader.hs index 77fb37b7a..aca5776e6 100644 --- a/hledger-lib/Hledger/Read/TimedotReader.hs +++ b/hledger-lib/Hledger/Read/TimedotReader.hs @@ -64,7 +64,7 @@ reader = Reader -- | Parse and post-process a "Journal" from the timedot format, or give an error. parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal -parse = parseAndFinaliseJournal timedotfilep +parse = parseAndFinaliseJournal' timedotfilep timedotfilep :: JournalParser m ParsedJournal timedotfilep = do many timedotfileitemp diff --git a/hledger-lib/Hledger/Utils/Parse.hs b/hledger-lib/Hledger/Utils/Parse.hs index 409ee40e3..eb723ef6d 100644 --- a/hledger-lib/Hledger/Utils/Parse.hs +++ b/hledger-lib/Hledger/Utils/Parse.hs @@ -5,6 +5,7 @@ module Hledger.Utils.Parse ( SimpleTextParser, TextParser, JournalParser, + ErroringJournalParser, choice', choiceInState, @@ -27,6 +28,7 @@ module Hledger.Utils.Parse ( ) where +import Control.Monad.Except (ExceptT) import Control.Monad.State.Strict (StateT, evalStateT) import Data.Char import Data.Functor.Identity (Identity(..)) @@ -52,6 +54,11 @@ type TextParser m a = ParsecT CustomErr Text m a -- | A parser of text in some monad, with a journal as state. type JournalParser m a = StateT Journal (ParsecT CustomErr Text m) a +-- | A parser of text in some monad, with a journal as state, that can throw a +-- "final" parse error that does not backtrack. +type ErroringJournalParser m a = + StateT Journal (ParsecT CustomErr Text (ExceptT FinalParseError m)) a + -- | Backtracking choice, use this when alternatives share a prefix. -- Consumes no input if all choices fail. choice' :: [TextParser m a] -> TextParser m a diff --git a/hledger-lib/Text/Megaparsec/Custom.hs b/hledger-lib/Text/Megaparsec/Custom.hs index 804b62ab4..c19195836 100644 --- a/hledger-lib/Text/Megaparsec/Custom.hs +++ b/hledger-lib/Text/Megaparsec/Custom.hs @@ -15,13 +15,35 @@ module Text.Megaparsec.Custom ( withSource, -- * Pretty-printing custom parse errors - customParseErrorPretty + customParseErrorPretty, + + + -- * Final parse error types + FinalParseError, + FinalParseError', + FinalParseErrorBundle, + FinalParseErrorBundle', + + -- * Constructing final parse errors + errorFinal, + finalFancyFailure, + finalFail, + finalCustomFailure, + + -- * Handling errors from include files with final parse errors + parseIncludeFile, + attachSource, + + -- * Pretty-printing final parse errors + finalParseErrorPretty, ) where import Prelude () import "base-compat-batteries" Prelude.Compat hiding (readFile) +import Control.Monad.Except +import Control.Monad.State.Strict (StateT, evalStateT) import Data.Foldable (asum, toList) import qualified Data.List.NonEmpty as NE import Data.Proxy (Proxy (Proxy)) @@ -129,6 +151,136 @@ customParseErrorPretty source err = case findCustomError err of finds f = asum . map f . toList +--- * Final parse error types + +-- | A parse error type intended for throwing parse errors without the +-- possiblity of backtracking. Intended for use as the error type in an +-- 'ExceptT' layer of the parser. +-- +-- In order to pretty-print a parse error, we must bundle it with the +-- source text and its filepaths (the 'ErrorBundle' constructor). However, +-- when an error is thrown from within a parser, we do not have access to +-- the (full) source, so we must hold the parse error until it can be +-- joined with the source text and its filepath by the parser's caller +-- (the 'ErrorFinal' constructor). + +data FinalParseError' e + = ErrorFinal (ParseError Char e) + | ErrorBundle (FinalParseErrorBundle' e) + deriving (Show) + +type FinalParseError = FinalParseError' CustomErr + +-- A 'Monoid' instance is necessary for 'ExceptT (FinalParseError'' e)' to +-- be an instance of Alternative and MonadPlus, which are required for the +-- use of e.g. the 'many' parser combinator. This monoid instance simply +-- takes the first (left-most) error. + +instance Semigroup (FinalParseError' e) where + e <> _ = e + +instance Monoid (FinalParseError' e) where + mempty = ErrorFinal $ + FancyError (initialPos "" NE.:| []) + (S.singleton (ErrorFail "default parse error")) + mappend = (<>) + +-- | A type bundling a 'ParseError' with its source file and a stack of +-- include file paths (for pretty printing). Although Megaparsec 6 +-- maintains a stack of source files, making a field of this type +-- redundant, this capability will be removed in Megaparsec 7. Therefore, +-- we implement stacks of source files here for a smoother transition in +-- the future. + +data FinalParseErrorBundle' e = FinalParseErrorBundle' + { finalParseError :: ParseError Char e + , errorSource :: Text + , sourceFileStack :: NE.NonEmpty FilePath + } deriving (Show) + +type FinalParseErrorBundle = FinalParseErrorBundle' CustomErr + +--- * Constructing and throwing final parse errors + +-- | Convert a "regular" parse error into a "final" parse error. + +errorFinal :: ParseError Char e -> FinalParseError' e +errorFinal = ErrorFinal + +-- | Like 'fancyFailure', but as a "final" parse error. + +finalFancyFailure + :: (MonadParsec e s m, MonadError (FinalParseError' e) m) + => S.Set (ErrorFancy e) -> m a +finalFancyFailure errSet = do + pos <- getPosition + let parseErr = FancyError (pos NE.:| []) errSet + throwError $ ErrorFinal parseErr + +-- | Like 'fail', but as a "final" parse error. + +finalFail + :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => String -> m a +finalFail = finalFancyFailure . S.singleton . ErrorFail + +-- | Like 'customFailure', but as a "final" parse error. + +finalCustomFailure + :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => e -> m a +finalCustomFailure = finalFancyFailure . S.singleton . ErrorCustom + +--- * Handling errors from include files with "final" parse errors + +-- Some care must be taken for sources to be attached to the right parse +-- errors when dealing with include files, so we capture the necessary +-- procedure in this function. + +parseIncludeFile + :: forall st m a. Monad m + => StateT st (ParsecT CustomErr Text (ExceptT FinalParseError m)) a + -> st + -> FilePath + -> Text + -> StateT st (ParsecT CustomErr Text (ExceptT FinalParseError m)) a +parseIncludeFile parser initState filepath text = + catchError parser' handler + where + parser' = do + eResult <- lift $ lift $ + runParserT (evalStateT parser initState) filepath text + case eResult of + Left parseError -> throwError $ errorFinal parseError + Right result -> pure result + + handler e = throwError $ ErrorBundle $ attachSource filepath text e + + +attachSource + :: FilePath -> Text -> FinalParseError' e -> FinalParseErrorBundle' e +attachSource filePath sourceText finalParseError = + case finalParseError of + ErrorFinal parseError -> FinalParseErrorBundle' + { finalParseError = parseError + , errorSource = sourceText + , sourceFileStack = filePath NE.:| [] + } + ErrorBundle bundle -> bundle + { sourceFileStack = filePath NE.<| sourceFileStack bundle + } + +--- * Pretty-printing final parse errors + +-- | Pretty-print a "final" parse error: print the stack of include files, +-- then apply the pretty-printer for custom parse errors. + +finalParseErrorPretty :: FinalParseErrorBundle' CustomErr -> String +finalParseErrorPretty bundle = + concatMap printIncludeFile (NE.init (sourceFileStack bundle)) + <> customParseErrorPretty (errorSource bundle) (finalParseError bundle) + where + printIncludeFile path = "in file included from " <> path <> ",\n" + + --- * Modified Megaparsec source -- The below code has been copied from Megaparsec (v.6.4.1,