dev: refactor: merge Text.Megaparsec.Custom into Hledger.Utils.Parse

This commit is contained in:
Simon Michael 2024-06-18 09:03:57 +01:00
parent 07a4b21620
commit f5c2ec681c
8 changed files with 437 additions and 452 deletions

View File

@ -135,7 +135,6 @@ import Data.Time.Clock.POSIX (POSIXTime)
import Data.Tree (Tree(..), flatten) import Data.Tree (Tree(..), flatten)
import Text.Printf (printf) import Text.Printf (printf)
import Text.Megaparsec (ParsecT) import Text.Megaparsec (ParsecT)
import Text.Megaparsec.Custom (FinalParseError)
import Hledger.Utils import Hledger.Utils
import Hledger.Data.Types import Hledger.Data.Types

View File

@ -151,9 +151,6 @@ import System.FilePath (takeFileName)
import Text.Megaparsec import Text.Megaparsec
import Text.Megaparsec.Char (char, char', digitChar, newline, string) import Text.Megaparsec.Char (char, char', digitChar, newline, string)
import Text.Megaparsec.Char.Lexer (decimal) import Text.Megaparsec.Char.Lexer (decimal)
import Text.Megaparsec.Custom
(FinalParseError, attachSource, finalErrorBundlePretty, parseErrorAt, parseErrorAtRegion)
-- import Text.Megaparsec.Debug (dbg) -- from megaparsec 9.3+
import Hledger.Data import Hledger.Data
import Hledger.Query (Query(..), filterQuery, parseQueryTerm, queryEndDate, queryStartDate, queryIsDate, simplifyQuery) import Hledger.Query (Query(..), filterQuery, parseQueryTerm, queryEndDate, queryStartDate, queryIsDate, simplifyQuery)

View File

@ -91,7 +91,6 @@ import Data.Time.LocalTime
import Safe import Safe
import Text.Megaparsec hiding (parse) import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char import Text.Megaparsec.Char
import Text.Megaparsec.Custom
import Text.Printf import Text.Printf
import System.FilePath import System.FilePath
import "Glob" System.FilePath.Glob hiding (match) import "Glob" System.FilePath.Glob hiding (match)

View File

@ -72,7 +72,6 @@ import qualified Data.ByteString.Lazy as BL
import Data.Foldable (asum, toList) import Data.Foldable (asum, toList)
import Text.Megaparsec hiding (match, parse) import Text.Megaparsec hiding (match, parse)
import Text.Megaparsec.Char (char, newline, string, digitChar) import Text.Megaparsec.Char (char, newline, string, digitChar)
import Text.Megaparsec.Custom (parseErrorAt)
import Text.Printf (printf) import Text.Printf (printf)
import Hledger.Data import Hledger.Data

View File

@ -78,8 +78,6 @@ import Data.Time.Calendar (Day, addDays)
import Data.Default (Default(..)) import Data.Default (Default(..))
import Safe (headMay, lastDef, lastMay, maximumMay, readMay) import Safe (headMay, lastDef, lastMay, maximumMay, readMay)
import Text.Megaparsec.Custom
import Hledger.Data import Hledger.Data
import Hledger.Query import Hledger.Query
import Hledger.Utils import Hledger.Utils

View File

@ -1,8 +1,15 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Hledger.Utils.Parse ( module Hledger.Utils.Parse (
-- * Some basic hledger parser flavours
SimpleStringParser, SimpleStringParser,
SimpleTextParser, SimpleTextParser,
TextParser, TextParser,
@ -15,6 +22,7 @@ module Hledger.Utils.Parse (
sourcePosPretty, sourcePosPretty,
sourcePosPairPretty, sourcePosPairPretty,
-- * Parsers and helpers
choice', choice',
choiceInState, choiceInState,
surroundedBy, surroundedBy,
@ -32,7 +40,6 @@ module Hledger.Utils.Parse (
isNonNewlineSpace, isNonNewlineSpace,
restofline, restofline,
eolof, eolof,
spacenonewline, spacenonewline,
skipNonNewlineSpaces, skipNonNewlineSpaces,
skipNonNewlineSpaces1, skipNonNewlineSpaces1,
@ -42,10 +49,44 @@ module Hledger.Utils.Parse (
dbgparse, dbgparse,
traceOrLogParse, traceOrLogParse,
-- * re-exports -- * More helpers, previously in Text.Megaparsec.Custom
HledgerParseErrors,
-- ** Custom parse error types
HledgerParseErrorData, HledgerParseErrorData,
HledgerParseErrors,
-- ** Failing with an arbitrary source position
parseErrorAt,
parseErrorAtRegion,
-- ** Re-parsing
SourceExcerpt,
getExcerptText,
excerpt_,
reparseExcerpt,
-- ** Pretty-printing custom parse errors
customErrorBundlePretty, customErrorBundlePretty,
-- ** "Final" parse errors
FinalParseError,
FinalParseError',
FinalParseErrorBundle,
FinalParseErrorBundle',
-- *** Constructing "final" parse errors
finalError,
finalFancyFailure,
finalFail,
finalCustomFailure,
-- *** Pretty-printing "final" parse errors
finalErrorBundlePretty,
attachSource,
-- *** Handling parse errors from include files with "final" parse errors
parseIncludeFile,
) )
where where
@ -61,7 +102,15 @@ import Data.Functor.Identity (Identity(..))
import Data.List import Data.List
import Data.Text (Text) import Data.Text (Text)
import Text.Megaparsec.Char import Text.Megaparsec.Char
import Text.Megaparsec.Custom -- import Text.Megaparsec.Debug (dbg) -- from megaparsec 9.3+
import Control.Monad.Except (ExceptT, MonadError, catchError, throwError)
-- import Control.Monad.State.Strict (StateT, evalStateT)
import Control.Monad.Trans.Class (lift)
import qualified Data.List.NonEmpty as NE
import Data.Monoid (Alt(..))
import qualified Data.Set as S
import Hledger.Utils.Debug (debugLevel, traceOrLog) import Hledger.Utils.Debug (debugLevel, traceOrLog)
-- | A parser of string to some type. -- | A parser of string to some type.
@ -200,3 +249,384 @@ skipNonNewlineSpaces' = True <$ skipNonNewlineSpaces1 <|> pure False
eolof :: TextParser m () eolof :: TextParser m ()
eolof = void newline <|> eof eolof = void newline <|> eof
-- A bunch of megaparsec helpers, eg for re-parsing (formerly in Text.Megaparsec.Custom).
-- I think these are generic apart from the HledgerParseError name.
--- * Custom parse error types
-- | Custom error data for hledger parsers. Specialised for a 'Text' parse stream.
-- ReparseableTextParseErrorData ?
data HledgerParseErrorData
-- | Fail with a message at a specific source position interval. The
-- interval must be contained within a single line.
= ErrorFailAt Int -- Starting offset
Int -- Ending offset
String -- Error message
-- | Re-throw parse errors obtained from the "re-parsing" of an excerpt
-- of the source text.
| ErrorReparsing
(NE.NonEmpty (ParseError Text HledgerParseErrorData)) -- Source fragment parse errors
deriving (Show, Eq, Ord)
-- | A specialised version of ParseErrorBundle:
-- a non-empty collection of hledger parse errors,
-- equipped with PosState to help pretty-print them.
-- Specialised for a 'Text' parse stream.
type HledgerParseErrors = ParseErrorBundle Text HledgerParseErrorData
-- We require an 'Ord' instance for 'CustomError' so that they may be
-- stored in a 'Set'. The actual instance is inconsequential, so we just
-- derive it, but the derived instance requires an (orphan) instance for
-- 'ParseError'. Hopefully this does not cause any trouble.
deriving instance Ord (ParseError Text HledgerParseErrorData)
-- Note: the pretty-printing of our 'HledgerParseErrorData' type is only partally
-- defined in its 'ShowErrorComponent' instance; we perform additional
-- adjustments in 'customErrorBundlePretty'.
instance ShowErrorComponent HledgerParseErrorData where
showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg
showErrorComponent (ErrorReparsing _) = "" -- dummy value
errorComponentLen (ErrorFailAt startOffset endOffset _) =
endOffset - startOffset
errorComponentLen (ErrorReparsing _) = 1 -- dummy value
--- * Failing with an arbitrary source position
-- | Fail at a specific source position, given by the raw offset from the
-- start of the input stream (the number of tokens processed at that
-- point).
parseErrorAt :: Int -> String -> HledgerParseErrorData
parseErrorAt offset = ErrorFailAt offset (offset+1)
-- | Fail at a specific source interval, given by the raw offsets of its
-- endpoints from the start of the input stream (the numbers of tokens
-- processed at those points).
--
-- Note that care must be taken to ensure that the specified interval does
-- not span multiple lines of the input source. This will not be checked.
parseErrorAtRegion
:: Int -- ^ Start offset
-> Int -- ^ End end offset
-> String -- ^ Error message
-> HledgerParseErrorData
parseErrorAtRegion startOffset endOffset msg =
if startOffset < endOffset
then ErrorFailAt startOffset endOffset msg'
else ErrorFailAt startOffset (startOffset+1) msg'
where
msg' = "\n" ++ msg
--- * Re-parsing
-- | A fragment of source suitable for "re-parsing". The purpose of this
-- data type is to preserve the content and source position of the excerpt
-- so that parse errors raised during "re-parsing" may properly reference
-- the original source.
data SourceExcerpt = SourceExcerpt Int -- Offset of beginning of excerpt
Text -- Fragment of source file
-- | Get the raw text of a source excerpt.
getExcerptText :: SourceExcerpt -> Text
getExcerptText (SourceExcerpt _ txt) = txt
-- | 'excerpt_ p' applies the given parser 'p' and extracts the portion of
-- the source consumed by 'p', along with the source position of this
-- portion. This is the only way to create a source excerpt suitable for
-- "re-parsing" by 'reparseExcerpt'.
-- This function could be extended to return the result of 'p', but we don't
-- currently need this.
excerpt_ :: MonadParsec HledgerParseErrorData Text m => m a -> m SourceExcerpt
excerpt_ p = do
offset <- getOffset
(!txt, _) <- match p
pure $ SourceExcerpt offset txt
-- | 'reparseExcerpt s p' "re-parses" the source excerpt 's' using the
-- parser 'p'. Parse errors raised by 'p' will be re-thrown at the source
-- position of the source excerpt.
--
-- In order for the correct source file to be displayed when re-throwing
-- parse errors, we must ensure that the source file during the use of
-- 'reparseExcerpt s p' is the same as that during the use of 'excerpt_'
-- that generated the source excerpt 's'. However, we can usually expect
-- this condition to be satisfied because, at the time of writing, the
-- only changes of source file in the codebase take place through include
-- files, and the parser for include files neither accepts nor returns
-- 'SourceExcerpt's.
reparseExcerpt
:: Monad m
=> SourceExcerpt
-> ParsecT HledgerParseErrorData Text m a
-> ParsecT HledgerParseErrorData Text m a
reparseExcerpt (SourceExcerpt offset txt) p = do
(_, res) <- lift $ runParserT' p (offsetInitialState offset txt)
case res of
Right result -> pure result
Left errBundle -> customFailure $ ErrorReparsing $ bundleErrors errBundle
where
offsetInitialState :: Int -> s ->
#if MIN_VERSION_megaparsec(8,0,0)
State s e
#else
State s
#endif
offsetInitialState initialOffset s = State
{ stateInput = s
, stateOffset = initialOffset
, statePosState = PosState
{ pstateInput = s
, pstateOffset = initialOffset
, pstateSourcePos = initialPos ""
, pstateTabWidth = defaultTabWidth
, pstateLinePrefix = ""
}
#if MIN_VERSION_megaparsec(8,0,0)
, stateParseErrors = []
#endif
}
--- * Pretty-printing custom parse errors
-- | Pretty-print our custom parse errors. It is necessary to use this
-- instead of 'errorBundlePretty' when custom parse errors are thrown.
--
-- This function intercepts our custom parse errors and applies final
-- adjustments ('finalizeCustomError') before passing them to
-- 'errorBundlePretty'. These adjustments are part of the implementation
-- of the behaviour of our custom parse errors.
--
-- Note: We must ensure that the offset of the 'PosState' of the provided
-- 'ParseErrorBundle' is no larger than the offset specified by a
-- 'ErrorFailAt' constructor. This is guaranteed if this offset is set to
-- 0 (that is, the beginning of the source file), which is the
-- case for 'ParseErrorBundle's returned from 'runParserT'.
customErrorBundlePretty :: HledgerParseErrors -> String
customErrorBundlePretty errBundle =
let errBundle' = errBundle { bundleErrors =
NE.sortWith errorOffset $ -- megaparsec requires that the list of errors be sorted by their offsets
bundleErrors errBundle >>= finalizeCustomError }
in errorBundlePretty errBundle'
where
finalizeCustomError
:: ParseError Text HledgerParseErrorData -> NE.NonEmpty (ParseError Text HledgerParseErrorData)
finalizeCustomError err = case findCustomError err of
Nothing -> pure err
Just errFailAt@(ErrorFailAt startOffset _ _) ->
-- Adjust the offset
pure $ FancyError startOffset $ S.singleton $ ErrorCustom errFailAt
Just (ErrorReparsing errs) ->
-- Extract and finalize the inner errors
errs >>= finalizeCustomError
-- If any custom errors are present, arbitrarily take the first one
-- (since only one custom error should be used at a time).
findCustomError :: ParseError Text HledgerParseErrorData -> Maybe HledgerParseErrorData
findCustomError err = case err of
FancyError _ errSet ->
finds (\case {ErrorCustom e -> Just e; _ -> Nothing}) errSet
_ -> Nothing
finds :: (Foldable t) => (a -> Maybe b) -> t a -> Maybe b
finds f = getAlt . foldMap (Alt . f)
--- * "Final" parse errors
--
-- | A type representing "final" parse errors that cannot be backtracked
-- from and are guaranteed to halt parsing. The anti-backtracking
-- behaviour is implemented by an 'ExceptT' layer in the parser's monad
-- stack, using this type as the 'ExceptT' error type.
--
-- We have three goals for this type:
-- (1) it should be possible to convert any parse error into a "final"
-- parse error,
-- (2) it should be possible to take a parse error thrown from an include
-- file and re-throw it in the parent file, and
-- (3) the pretty-printing of "final" parse errors should be consistent
-- with that of ordinary parse errors, but should also report a stack of
-- files for errors thrown from include files.
--
-- In order to pretty-print a "final" parse error (goal 3), it must be
-- bundled with include filepaths and its full source text. When a "final"
-- parse error is thrown from within a parser, we do not have access to
-- the full source, so we must hold the parse error until it can be joined
-- with its source (and include filepaths, if it was thrown from an
-- include file) by the parser's caller.
--
-- A parse error with include filepaths and its full source text is
-- represented by the 'FinalParseErrorBundle' type, while a parse error in
-- need of either include filepaths, full source text, or both is
-- represented by the 'FinalParseError' type.
data FinalParseError' e
-- a parse error thrown as a "final" parse error
= FinalError (ParseError Text e)
-- a parse error obtained from running a parser, e.g. using 'runParserT'
| FinalBundle (ParseErrorBundle Text e)
-- a parse error thrown from an include file
| FinalBundleWithStack (FinalParseErrorBundle' e)
deriving (Show)
type FinalParseError = FinalParseError' HledgerParseErrorData
-- We need a 'Monoid' instance for 'FinalParseError' so that 'ExceptT
-- FinalParseError m' is an instance of Alternative and MonadPlus, which
-- is needed to use some parser combinators, e.g. 'many'.
--
-- This monoid instance simply takes the first (left-most) error.
instance Semigroup (FinalParseError' e) where
e <> _ = e
instance Monoid (FinalParseError' e) where
mempty = FinalError $ FancyError 0 $
S.singleton (ErrorFail "default parse error")
mappend = (<>)
-- | A type bundling a 'ParseError' with its full source text, filepath,
-- and stack of include files. Suitable for pretty-printing.
--
-- Megaparsec's 'ParseErrorBundle' type already bundles a parse error with
-- its full source text and filepath, so we just add a stack of include
-- files.
data FinalParseErrorBundle' e = FinalParseErrorBundle'
{ finalErrorBundle :: ParseErrorBundle Text e
, includeFileStack :: [FilePath]
} deriving (Show)
type FinalParseErrorBundle = FinalParseErrorBundle' HledgerParseErrorData
--- * Constructing and throwing final parse errors
-- | Convert a "regular" parse error into a "final" parse error.
finalError :: ParseError Text e -> FinalParseError' e
finalError = FinalError
-- | Like megaparsec's 'fancyFailure', but as a "final" parse error.
finalFancyFailure
:: (MonadParsec e s m, MonadError (FinalParseError' e) m)
=> S.Set (ErrorFancy e) -> m a
finalFancyFailure errSet = do
offset <- getOffset
throwError $ FinalError $ FancyError offset errSet
-- | Like 'fail', but as a "final" parse error.
finalFail
:: (MonadParsec e s m, MonadError (FinalParseError' e) m) => String -> m a
finalFail = finalFancyFailure . S.singleton . ErrorFail
-- | Like megaparsec's 'customFailure', but as a "final" parse error.
finalCustomFailure
:: (MonadParsec e s m, MonadError (FinalParseError' e) m) => e -> m a
finalCustomFailure = finalFancyFailure . S.singleton . ErrorCustom
--- * Pretty-printing "final" parse errors
-- | Pretty-print a "final" parse error: print the stack of include files,
-- then apply the pretty-printer for parse error bundles. Note that
-- 'attachSource' must be used on a "final" parse error before it can be
-- pretty-printed.
finalErrorBundlePretty :: FinalParseErrorBundle' HledgerParseErrorData -> String
finalErrorBundlePretty bundle =
concatMap showIncludeFilepath (includeFileStack bundle)
<> customErrorBundlePretty (finalErrorBundle bundle)
where
showIncludeFilepath path = "in file included from " <> path <> ",\n"
-- | Supply a filepath and source text to a "final" parse error so that it
-- can be pretty-printed. You must ensure that you provide the appropriate
-- source text and filepath.
attachSource
:: FilePath -> Text -> FinalParseError' e -> FinalParseErrorBundle' e
attachSource filePath sourceText finalParseError = case finalParseError of
-- A parse error thrown directly with the 'FinalError' constructor
-- requires both source and filepath.
FinalError err ->
let bundle = ParseErrorBundle
{ bundleErrors = err NE.:| []
, bundlePosState = initialPosState filePath sourceText }
in FinalParseErrorBundle'
{ finalErrorBundle = bundle
, includeFileStack = [] }
-- A 'ParseErrorBundle' already has the appropriate source and filepath
-- and so needs neither.
FinalBundle peBundle -> FinalParseErrorBundle'
{ finalErrorBundle = peBundle
, includeFileStack = [] }
-- A parse error from a 'FinalParseErrorBundle' was thrown from an
-- include file, so we add the filepath to the stack.
FinalBundleWithStack fpeBundle -> fpeBundle
{ includeFileStack = filePath : includeFileStack fpeBundle }
--- * Handling parse errors from include files with "final" parse errors
-- | Parse a file with the given parser and initial state, discarding the
-- final state and re-throwing any parse errors as "final" parse errors.
parseIncludeFile
:: Monad m
=> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
-> st
-> FilePath
-> Text
-> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
parseIncludeFile parser initialState filepath text =
catchError parser' handler
where
parser' = do
eResult <- lift $ lift $
runParserT (evalStateT parser initialState) filepath text
case eResult of
Left parseErrorBundle -> throwError $ FinalBundle parseErrorBundle
Right result -> pure result
-- Attach source and filepath of the include file to its parse errors
handler e = throwError $ FinalBundleWithStack $ attachSource filepath text e
--- * Helpers
-- Like megaparsec's 'initialState', but instead for 'PosState'. Used when
-- constructing 'ParseErrorBundle's. The values for "tab width" and "line
-- prefix" are taken from 'initialState'.
initialPosState :: FilePath -> Text -> PosState Text
initialPosState filePath sourceText = PosState
{ pstateInput = sourceText
, pstateOffset = 0
, pstateSourcePos = initialPos filePath
, pstateTabWidth = defaultTabWidth
, pstateLinePrefix = "" }

View File

@ -31,7 +31,9 @@ import Test.Tasty.HUnit
-- import Test.Tasty.QuickCheck as QC -- import Test.Tasty.QuickCheck as QC
-- import Test.Tasty.SmallCheck as SC -- import Test.Tasty.SmallCheck as SC
import Text.Megaparsec import Text.Megaparsec
import Text.Megaparsec.Custom
import Hledger.Utils.IO (pshow)
import Hledger.Utils.Parse
( HledgerParseErrorData, ( HledgerParseErrorData,
FinalParseError, FinalParseError,
attachSource, attachSource,
@ -39,8 +41,6 @@ import Text.Megaparsec.Custom
finalErrorBundlePretty, finalErrorBundlePretty,
) )
import Hledger.Utils.IO (pshow)
-- * tasty helpers -- * tasty helpers
-- TODO: pretty-print values in failure messages -- TODO: pretty-print values in failure messages

View File

@ -1,437 +0,0 @@
-- A bunch of megaparsec helpers for re-parsing etc.
-- I think these are generic apart from the HledgerParseError name.
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-} -- new
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-} -- new
module Text.Megaparsec.Custom (
-- * Custom parse error types
HledgerParseErrorData,
HledgerParseErrors,
-- * Failing with an arbitrary source position
parseErrorAt,
parseErrorAtRegion,
-- * Re-parsing
SourceExcerpt,
getExcerptText,
excerpt_,
reparseExcerpt,
-- * Pretty-printing custom parse errors
customErrorBundlePretty,
-- * "Final" parse errors
FinalParseError,
FinalParseError',
FinalParseErrorBundle,
FinalParseErrorBundle',
-- * Constructing "final" parse errors
finalError,
finalFancyFailure,
finalFail,
finalCustomFailure,
-- * Pretty-printing "final" parse errors
finalErrorBundlePretty,
attachSource,
-- * Handling parse errors from include files with "final" parse errors
parseIncludeFile,
)
where
import Control.Monad.Except (ExceptT, MonadError, catchError, throwError)
import Control.Monad.State.Strict (StateT, evalStateT)
import Control.Monad.Trans.Class (lift)
import qualified Data.List.NonEmpty as NE
import Data.Monoid (Alt(..))
import qualified Data.Set as S
import Data.Text (Text)
import Text.Megaparsec
--- * Custom parse error types
-- | Custom error data for hledger parsers. Specialised for a 'Text' parse stream.
-- ReparseableTextParseErrorData ?
data HledgerParseErrorData
-- | Fail with a message at a specific source position interval. The
-- interval must be contained within a single line.
= ErrorFailAt Int -- Starting offset
Int -- Ending offset
String -- Error message
-- | Re-throw parse errors obtained from the "re-parsing" of an excerpt
-- of the source text.
| ErrorReparsing
(NE.NonEmpty (ParseError Text HledgerParseErrorData)) -- Source fragment parse errors
deriving (Show, Eq, Ord)
-- | A specialised version of ParseErrorBundle:
-- a non-empty collection of hledger parse errors,
-- equipped with PosState to help pretty-print them.
-- Specialised for a 'Text' parse stream.
type HledgerParseErrors = ParseErrorBundle Text HledgerParseErrorData
-- We require an 'Ord' instance for 'CustomError' so that they may be
-- stored in a 'Set'. The actual instance is inconsequential, so we just
-- derive it, but the derived instance requires an (orphan) instance for
-- 'ParseError'. Hopefully this does not cause any trouble.
deriving instance Ord (ParseError Text HledgerParseErrorData)
-- Note: the pretty-printing of our 'HledgerParseErrorData' type is only partally
-- defined in its 'ShowErrorComponent' instance; we perform additional
-- adjustments in 'customErrorBundlePretty'.
instance ShowErrorComponent HledgerParseErrorData where
showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg
showErrorComponent (ErrorReparsing _) = "" -- dummy value
errorComponentLen (ErrorFailAt startOffset endOffset _) =
endOffset - startOffset
errorComponentLen (ErrorReparsing _) = 1 -- dummy value
--- * Failing with an arbitrary source position
-- | Fail at a specific source position, given by the raw offset from the
-- start of the input stream (the number of tokens processed at that
-- point).
parseErrorAt :: Int -> String -> HledgerParseErrorData
parseErrorAt offset = ErrorFailAt offset (offset+1)
-- | Fail at a specific source interval, given by the raw offsets of its
-- endpoints from the start of the input stream (the numbers of tokens
-- processed at those points).
--
-- Note that care must be taken to ensure that the specified interval does
-- not span multiple lines of the input source. This will not be checked.
parseErrorAtRegion
:: Int -- ^ Start offset
-> Int -- ^ End end offset
-> String -- ^ Error message
-> HledgerParseErrorData
parseErrorAtRegion startOffset endOffset msg =
if startOffset < endOffset
then ErrorFailAt startOffset endOffset msg'
else ErrorFailAt startOffset (startOffset+1) msg'
where
msg' = "\n" ++ msg
--- * Re-parsing
-- | A fragment of source suitable for "re-parsing". The purpose of this
-- data type is to preserve the content and source position of the excerpt
-- so that parse errors raised during "re-parsing" may properly reference
-- the original source.
data SourceExcerpt = SourceExcerpt Int -- Offset of beginning of excerpt
Text -- Fragment of source file
-- | Get the raw text of a source excerpt.
getExcerptText :: SourceExcerpt -> Text
getExcerptText (SourceExcerpt _ txt) = txt
-- | 'excerpt_ p' applies the given parser 'p' and extracts the portion of
-- the source consumed by 'p', along with the source position of this
-- portion. This is the only way to create a source excerpt suitable for
-- "re-parsing" by 'reparseExcerpt'.
-- This function could be extended to return the result of 'p', but we don't
-- currently need this.
excerpt_ :: MonadParsec HledgerParseErrorData Text m => m a -> m SourceExcerpt
excerpt_ p = do
offset <- getOffset
(!txt, _) <- match p
pure $ SourceExcerpt offset txt
-- | 'reparseExcerpt s p' "re-parses" the source excerpt 's' using the
-- parser 'p'. Parse errors raised by 'p' will be re-thrown at the source
-- position of the source excerpt.
--
-- In order for the correct source file to be displayed when re-throwing
-- parse errors, we must ensure that the source file during the use of
-- 'reparseExcerpt s p' is the same as that during the use of 'excerpt_'
-- that generated the source excerpt 's'. However, we can usually expect
-- this condition to be satisfied because, at the time of writing, the
-- only changes of source file in the codebase take place through include
-- files, and the parser for include files neither accepts nor returns
-- 'SourceExcerpt's.
reparseExcerpt
:: Monad m
=> SourceExcerpt
-> ParsecT HledgerParseErrorData Text m a
-> ParsecT HledgerParseErrorData Text m a
reparseExcerpt (SourceExcerpt offset txt) p = do
(_, res) <- lift $ runParserT' p (offsetInitialState offset txt)
case res of
Right result -> pure result
Left errBundle -> customFailure $ ErrorReparsing $ bundleErrors errBundle
where
offsetInitialState :: Int -> s ->
#if MIN_VERSION_megaparsec(8,0,0)
State s e
#else
State s
#endif
offsetInitialState initialOffset s = State
{ stateInput = s
, stateOffset = initialOffset
, statePosState = PosState
{ pstateInput = s
, pstateOffset = initialOffset
, pstateSourcePos = initialPos ""
, pstateTabWidth = defaultTabWidth
, pstateLinePrefix = ""
}
#if MIN_VERSION_megaparsec(8,0,0)
, stateParseErrors = []
#endif
}
--- * Pretty-printing custom parse errors
-- | Pretty-print our custom parse errors. It is necessary to use this
-- instead of 'errorBundlePretty' when custom parse errors are thrown.
--
-- This function intercepts our custom parse errors and applies final
-- adjustments ('finalizeCustomError') before passing them to
-- 'errorBundlePretty'. These adjustments are part of the implementation
-- of the behaviour of our custom parse errors.
--
-- Note: We must ensure that the offset of the 'PosState' of the provided
-- 'ParseErrorBundle' is no larger than the offset specified by a
-- 'ErrorFailAt' constructor. This is guaranteed if this offset is set to
-- 0 (that is, the beginning of the source file), which is the
-- case for 'ParseErrorBundle's returned from 'runParserT'.
customErrorBundlePretty :: HledgerParseErrors -> String
customErrorBundlePretty errBundle =
let errBundle' = errBundle { bundleErrors =
NE.sortWith errorOffset $ -- megaparsec requires that the list of errors be sorted by their offsets
bundleErrors errBundle >>= finalizeCustomError }
in errorBundlePretty errBundle'
where
finalizeCustomError
:: ParseError Text HledgerParseErrorData -> NE.NonEmpty (ParseError Text HledgerParseErrorData)
finalizeCustomError err = case findCustomError err of
Nothing -> pure err
Just errFailAt@(ErrorFailAt startOffset _ _) ->
-- Adjust the offset
pure $ FancyError startOffset $ S.singleton $ ErrorCustom errFailAt
Just (ErrorReparsing errs) ->
-- Extract and finalize the inner errors
errs >>= finalizeCustomError
-- If any custom errors are present, arbitrarily take the first one
-- (since only one custom error should be used at a time).
findCustomError :: ParseError Text HledgerParseErrorData -> Maybe HledgerParseErrorData
findCustomError err = case err of
FancyError _ errSet ->
finds (\case {ErrorCustom e -> Just e; _ -> Nothing}) errSet
_ -> Nothing
finds :: (Foldable t) => (a -> Maybe b) -> t a -> Maybe b
finds f = getAlt . foldMap (Alt . f)
--- * "Final" parse errors
--
-- | A type representing "final" parse errors that cannot be backtracked
-- from and are guaranteed to halt parsing. The anti-backtracking
-- behaviour is implemented by an 'ExceptT' layer in the parser's monad
-- stack, using this type as the 'ExceptT' error type.
--
-- We have three goals for this type:
-- (1) it should be possible to convert any parse error into a "final"
-- parse error,
-- (2) it should be possible to take a parse error thrown from an include
-- file and re-throw it in the parent file, and
-- (3) the pretty-printing of "final" parse errors should be consistent
-- with that of ordinary parse errors, but should also report a stack of
-- files for errors thrown from include files.
--
-- In order to pretty-print a "final" parse error (goal 3), it must be
-- bundled with include filepaths and its full source text. When a "final"
-- parse error is thrown from within a parser, we do not have access to
-- the full source, so we must hold the parse error until it can be joined
-- with its source (and include filepaths, if it was thrown from an
-- include file) by the parser's caller.
--
-- A parse error with include filepaths and its full source text is
-- represented by the 'FinalParseErrorBundle' type, while a parse error in
-- need of either include filepaths, full source text, or both is
-- represented by the 'FinalParseError' type.
data FinalParseError' e
-- a parse error thrown as a "final" parse error
= FinalError (ParseError Text e)
-- a parse error obtained from running a parser, e.g. using 'runParserT'
| FinalBundle (ParseErrorBundle Text e)
-- a parse error thrown from an include file
| FinalBundleWithStack (FinalParseErrorBundle' e)
deriving (Show)
type FinalParseError = FinalParseError' HledgerParseErrorData
-- We need a 'Monoid' instance for 'FinalParseError' so that 'ExceptT
-- FinalParseError m' is an instance of Alternative and MonadPlus, which
-- is needed to use some parser combinators, e.g. 'many'.
--
-- This monoid instance simply takes the first (left-most) error.
instance Semigroup (FinalParseError' e) where
e <> _ = e
instance Monoid (FinalParseError' e) where
mempty = FinalError $ FancyError 0 $
S.singleton (ErrorFail "default parse error")
mappend = (<>)
-- | A type bundling a 'ParseError' with its full source text, filepath,
-- and stack of include files. Suitable for pretty-printing.
--
-- Megaparsec's 'ParseErrorBundle' type already bundles a parse error with
-- its full source text and filepath, so we just add a stack of include
-- files.
data FinalParseErrorBundle' e = FinalParseErrorBundle'
{ finalErrorBundle :: ParseErrorBundle Text e
, includeFileStack :: [FilePath]
} deriving (Show)
type FinalParseErrorBundle = FinalParseErrorBundle' HledgerParseErrorData
--- * Constructing and throwing final parse errors
-- | Convert a "regular" parse error into a "final" parse error.
finalError :: ParseError Text e -> FinalParseError' e
finalError = FinalError
-- | Like megaparsec's 'fancyFailure', but as a "final" parse error.
finalFancyFailure
:: (MonadParsec e s m, MonadError (FinalParseError' e) m)
=> S.Set (ErrorFancy e) -> m a
finalFancyFailure errSet = do
offset <- getOffset
throwError $ FinalError $ FancyError offset errSet
-- | Like 'fail', but as a "final" parse error.
finalFail
:: (MonadParsec e s m, MonadError (FinalParseError' e) m) => String -> m a
finalFail = finalFancyFailure . S.singleton . ErrorFail
-- | Like megaparsec's 'customFailure', but as a "final" parse error.
finalCustomFailure
:: (MonadParsec e s m, MonadError (FinalParseError' e) m) => e -> m a
finalCustomFailure = finalFancyFailure . S.singleton . ErrorCustom
--- * Pretty-printing "final" parse errors
-- | Pretty-print a "final" parse error: print the stack of include files,
-- then apply the pretty-printer for parse error bundles. Note that
-- 'attachSource' must be used on a "final" parse error before it can be
-- pretty-printed.
finalErrorBundlePretty :: FinalParseErrorBundle' HledgerParseErrorData -> String
finalErrorBundlePretty bundle =
concatMap showIncludeFilepath (includeFileStack bundle)
<> customErrorBundlePretty (finalErrorBundle bundle)
where
showIncludeFilepath path = "in file included from " <> path <> ",\n"
-- | Supply a filepath and source text to a "final" parse error so that it
-- can be pretty-printed. You must ensure that you provide the appropriate
-- source text and filepath.
attachSource
:: FilePath -> Text -> FinalParseError' e -> FinalParseErrorBundle' e
attachSource filePath sourceText finalParseError = case finalParseError of
-- A parse error thrown directly with the 'FinalError' constructor
-- requires both source and filepath.
FinalError err ->
let bundle = ParseErrorBundle
{ bundleErrors = err NE.:| []
, bundlePosState = initialPosState filePath sourceText }
in FinalParseErrorBundle'
{ finalErrorBundle = bundle
, includeFileStack = [] }
-- A 'ParseErrorBundle' already has the appropriate source and filepath
-- and so needs neither.
FinalBundle peBundle -> FinalParseErrorBundle'
{ finalErrorBundle = peBundle
, includeFileStack = [] }
-- A parse error from a 'FinalParseErrorBundle' was thrown from an
-- include file, so we add the filepath to the stack.
FinalBundleWithStack fpeBundle -> fpeBundle
{ includeFileStack = filePath : includeFileStack fpeBundle }
--- * Handling parse errors from include files with "final" parse errors
-- | Parse a file with the given parser and initial state, discarding the
-- final state and re-throwing any parse errors as "final" parse errors.
parseIncludeFile
:: Monad m
=> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
-> st
-> FilePath
-> Text
-> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
parseIncludeFile parser initialState filepath text =
catchError parser' handler
where
parser' = do
eResult <- lift $ lift $
runParserT (evalStateT parser initialState) filepath text
case eResult of
Left parseErrorBundle -> throwError $ FinalBundle parseErrorBundle
Right result -> pure result
-- Attach source and filepath of the include file to its parse errors
handler e = throwError $ FinalBundleWithStack $ attachSource filepath text e
--- * Helpers
-- Like megaparsec's 'initialState', but instead for 'PosState'. Used when
-- constructing 'ParseErrorBundle's. The values for "tab width" and "line
-- prefix" are taken from 'initialState'.
initialPosState :: FilePath -> Text -> PosState Text
initialPosState filePath sourceText = PosState
{ pstateInput = sourceText
, pstateOffset = 0
, pstateSourcePos = initialPos filePath
, pstateTabWidth = defaultTabWidth
, pstateLinePrefix = "" }