lib: add a custom parse error for "re-parsing"

- for pretty-printing parse errors thrown from the parsing of excerpts
  of the source text as if they were thrown from the parsing of the
  source text itself
This commit is contained in:
Alex Chen 2018-11-10 18:02:52 -07:00 committed by Simon Michael
parent a711ae60fb
commit 880e6e0a32

View File

@ -1,16 +1,26 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-} -- new
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-} -- new
module Text.Megaparsec.Custom ( module Text.Megaparsec.Custom (
-- * Custom parse error type -- * Custom parse error type
CustomErr, CustomErr,
-- * Constructing custom parse errors -- * Failing with an arbitrary source position
parseErrorAt, parseErrorAt,
parseErrorAtRegion, parseErrorAtRegion,
-- * Re-parsing
SourceExcerpt,
getExcerptText,
excerpt_,
reparseExcerpt,
-- * Pretty-printing custom parse errors -- * Pretty-printing custom parse errors
customErrorBundlePretty, customErrorBundlePretty,
@ -59,20 +69,33 @@ data CustomErr
= ErrorFailAt Int -- Starting offset = ErrorFailAt Int -- Starting offset
Int -- Ending offset Int -- Ending offset
String -- Error message String -- Error message
-- | Re-throw parse errors obtained from the "re-parsing" of an excerpt
-- of the source text.
| ErrorReparsing
(NE.NonEmpty (ParseError Text CustomErr)) -- Source fragment parse errors
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
-- We require an 'Ord' instance for 'CustomError' so that they may be -- We require an 'Ord' instance for 'CustomError' so that they may be
-- stored in a 'Set'. The actual instance is inconsequential, so we just -- stored in a 'Set'. The actual instance is inconsequential, so we just
-- derive it, but this requires an (orphan) instance for 'ParseError'. -- derive it, but the derived instance requires an (orphan) instance for
-- Hopefully this does not cause any trouble. -- 'ParseError'. Hopefully this does not cause any trouble.
deriving instance Ord (ParseError Text CustomErr)
-- Note: the pretty-printing of our 'CustomErr' type is only partally
-- defined in its 'ShowErrorComponent' instance; we perform additional
-- adjustments in 'customErrorBundlePretty'.
instance ShowErrorComponent CustomErr where instance ShowErrorComponent CustomErr where
showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg
showErrorComponent (ErrorReparsing _) = "" -- dummy value
errorComponentLen (ErrorFailAt startOffset endOffset _) = errorComponentLen (ErrorFailAt startOffset endOffset _) =
endOffset - startOffset endOffset - startOffset
errorComponentLen (ErrorReparsing _) = 1 -- dummy value
--- * Constructing custom parse errors --- * Failing with an arbitrary source position
-- | Fail at a specific source position, given by the raw offset from the -- | Fail at a specific source position, given by the raw offset from the
-- start of the input stream (the number of tokens processed at that -- start of the input stream (the number of tokens processed at that
@ -86,8 +109,7 @@ parseErrorAt offset msg = ErrorFailAt offset (offset+1) msg
-- processed at those points). -- processed at those points).
-- --
-- Note that care must be taken to ensure that the specified interval does -- Note that care must be taken to ensure that the specified interval does
-- not span multiple lines of the input source, as this will not be -- not span multiple lines of the input source. This will not be checked.
-- checked.
parseErrorAtRegion parseErrorAtRegion
:: Int -- ^ Start offset :: Int -- ^ Start offset
@ -100,28 +122,109 @@ parseErrorAtRegion startOffset endOffset msg =
else ErrorFailAt startOffset (startOffset+1) msg else ErrorFailAt startOffset (startOffset+1) msg
--- * Re-parsing
-- | A fragment of source suitable for "re-parsing". The purpose of this
-- data type is to preserve the content and source position of the excerpt
-- so that parse errors raised during "re-parsing" may properly reference
-- the original source.
data SourceExcerpt = SourceExcerpt Int -- Offset of beginning of excerpt
Text -- Fragment of source file
-- | Get the raw text of a source excerpt.
getExcerptText :: SourceExcerpt -> Text
getExcerptText (SourceExcerpt _ txt) = txt
-- | 'excerpt_ p' applies the given parser 'p' and extracts the portion of
-- the source consumed by 'p', along with the source position of this
-- portion. This is the only way to create a source excerpt suitable for
-- "re-parsing" by 'reparseExcerpt'.
-- This function could be extended to return the result of 'p', but we don't
-- currently need this.
excerpt_ :: MonadParsec CustomErr Text m => m a -> m SourceExcerpt
excerpt_ p = do
offset <- getOffset
(!txt, _) <- match p
pure $ SourceExcerpt offset txt
-- | 'reparseExcerpt s p' "re-parses" the source excerpt 's' using the
-- parser 'p'. Parse errors raised by 'p' will be re-thrown at the source
-- position of the source excerpt.
--
-- In order for the correct source file to be displayed when re-throwing
-- parse errors, we must ensure that the source file during the use of
-- 'reparseExcerpt s p' is the same as that during the use of 'excerpt_'
-- that generated the source excerpt 's'. However, we can usually expect
-- this condition to be satisfied because, at the time of writing, the
-- only changes of source file in the codebase take place through include
-- files, and the parser for include files neither accepts nor returns
-- 'SourceExcerpt's.
reparseExcerpt
:: Monad m
=> SourceExcerpt
-> ParsecT CustomErr Text m a
-> ParsecT CustomErr Text m a
reparseExcerpt (SourceExcerpt offset txt) p = do
(_, res) <- lift $ runParserT' p (offsetInitialState offset txt)
case res of
Right result -> pure result
Left errBundle -> customFailure $ ErrorReparsing $ bundleErrors errBundle
where
offsetInitialState :: Int -> s -> State s
offsetInitialState initialOffset s = State
{ stateInput = s
, stateOffset = initialOffset
, statePosState = PosState
{ pstateInput = s
, pstateOffset = initialOffset
, pstateSourcePos = initialPos ""
, pstateTabWidth = defaultTabWidth
, pstateLinePrefix = ""
}
}
--- * Pretty-printing custom parse errors --- * Pretty-printing custom parse errors
-- | Pretty-print our custom parse errors and display the line on which -- | Pretty-print our custom parse errors. It is necessary to use this
-- the parse error occured. -- instead of 'errorBundlePretty' when custom parse errors are thrown.
-- --
-- Use this instead of 'errorBundlePretty' when custom parse errors are -- This function intercepts our custom parse errors and applies final
-- thrown, otherwise the continuous highlighting in the pretty-printed -- adjustments ('finalizeCustomError') before passing them to
-- parse error will be displaced from its proper position. -- 'errorBundlePretty'. These adjustments are part of the implementation
-- of the behaviour of our custom parse errors.
--
-- Note: We must ensure that the offset of the 'PosState' of the provided
-- 'ParseErrorBundle' is no larger than the offset specified by a
-- 'ErrorFailAt' constructor. This is guaranteed if this offset is set to
-- 0 (that is, the beginning of the source file), which is the
-- case for 'ParseErrorBundle's returned from 'runParserT'.
customErrorBundlePretty :: ParseErrorBundle Text CustomErr -> String customErrorBundlePretty :: ParseErrorBundle Text CustomErr -> String
customErrorBundlePretty errBundle = customErrorBundlePretty errBundle =
let errBundle' = errBundle let errBundle' = errBundle { bundleErrors =
{ bundleErrors = fmap setCustomErrorOffset $ bundleErrors errBundle } NE.sortWith errorOffset $ -- megaparsec requires that the list of errors be sorted by their offsets
bundleErrors errBundle >>= finalizeCustomError }
in errorBundlePretty errBundle' in errorBundlePretty errBundle'
where where
setCustomErrorOffset finalizeCustomError
:: ParseError Text CustomErr -> ParseError Text CustomErr :: ParseError Text CustomErr -> NE.NonEmpty (ParseError Text CustomErr)
setCustomErrorOffset err = case findCustomError err of finalizeCustomError err = case findCustomError err of
Nothing -> err Nothing -> pure err
Just errFailAt@(ErrorFailAt startOffset _ _) -> Just errFailAt@(ErrorFailAt startOffset _ _) ->
FancyError startOffset $ S.singleton $ ErrorCustom errFailAt -- Adjust the offset
pure $ FancyError startOffset $ S.singleton $ ErrorCustom errFailAt
Just (ErrorReparsing errs) ->
-- Extract and finalize the inner errors
errs >>= finalizeCustomError
-- If any custom errors are present, arbitrarily take the first one -- If any custom errors are present, arbitrarily take the first one
-- (since only one custom error should be used at a time). -- (since only one custom error should be used at a time).