431 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			431 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
| {-# LANGUAGE BangPatterns #-}
 | |
| {-# LANGUAGE CPP #-}
 | |
| {-# LANGUAGE FlexibleContexts #-}
 | |
| {-# LANGUAGE FlexibleInstances #-} -- new
 | |
| {-# LANGUAGE LambdaCase #-}
 | |
| {-# LANGUAGE ScopedTypeVariables #-}
 | |
| {-# LANGUAGE StandaloneDeriving #-} -- new
 | |
| 
 | |
| module Text.Megaparsec.Custom (
 | |
|   -- * Custom parse error types
 | |
|   HledgerParseErrorData,
 | |
|   HledgerParseErrors,
 | |
| 
 | |
|   -- * Failing with an arbitrary source position
 | |
|   parseErrorAt,
 | |
|   parseErrorAtRegion,
 | |
| 
 | |
|   -- * Re-parsing
 | |
|   SourceExcerpt,
 | |
|   getExcerptText,
 | |
| 
 | |
|   excerpt_,
 | |
|   reparseExcerpt,
 | |
| 
 | |
|   -- * Pretty-printing custom parse errors
 | |
|   customErrorBundlePretty,
 | |
| 
 | |
| 
 | |
|   -- * "Final" parse errors
 | |
|   FinalParseError,
 | |
|   FinalParseError',
 | |
|   FinalParseErrorBundle,
 | |
|   FinalParseErrorBundle',
 | |
| 
 | |
|   -- * Constructing "final" parse errors
 | |
|   finalError,
 | |
|   finalFancyFailure,
 | |
|   finalFail,
 | |
|   finalCustomFailure,
 | |
| 
 | |
|   -- * Pretty-printing "final" parse errors
 | |
|   finalErrorBundlePretty,
 | |
|   attachSource,
 | |
| 
 | |
|   -- * Handling parse errors from include files with "final" parse errors
 | |
|   parseIncludeFile,
 | |
| )
 | |
| where
 | |
| 
 | |
| import Control.Monad.Except
 | |
| import Control.Monad.State.Strict (StateT, evalStateT)
 | |
| import qualified Data.List.NonEmpty as NE
 | |
| import Data.Monoid (Alt(..))
 | |
| import qualified Data.Set as S
 | |
| import Data.Text (Text)
 | |
| import Text.Megaparsec
 | |
| 
 | |
| 
 | |
| --- * Custom parse error types
 | |
| 
 | |
| -- | Custom error data for hledger parsers. Specialised for a 'Text' parse stream.
 | |
| data HledgerParseErrorData
 | |
|   -- | Fail with a message at a specific source position interval. The
 | |
|   -- interval must be contained within a single line.
 | |
|   = ErrorFailAt Int -- Starting offset
 | |
|                 Int -- Ending offset
 | |
|                 String -- Error message
 | |
|   -- | Re-throw parse errors obtained from the "re-parsing" of an excerpt
 | |
|   -- of the source text.
 | |
|   | ErrorReparsing
 | |
|       (NE.NonEmpty (ParseError Text HledgerParseErrorData)) -- Source fragment parse errors
 | |
|   deriving (Show, Eq, Ord)
 | |
| 
 | |
| -- | A specialised version of ParseErrorBundle: 
 | |
| -- a non-empty collection of hledger parse errors, 
 | |
| -- equipped with PosState to help pretty-print them.
 | |
| -- Specialised for a 'Text' parse stream.
 | |
| type HledgerParseErrors = ParseErrorBundle Text HledgerParseErrorData
 | |
| 
 | |
| -- We require an 'Ord' instance for 'CustomError' so that they may be
 | |
| -- stored in a 'Set'. The actual instance is inconsequential, so we just
 | |
| -- derive it, but the derived instance requires an (orphan) instance for
 | |
| -- 'ParseError'. Hopefully this does not cause any trouble.
 | |
| 
 | |
| deriving instance Ord (ParseError Text HledgerParseErrorData)
 | |
| 
 | |
| -- Note: the pretty-printing of our 'HledgerParseErrorData' type is only partally
 | |
| -- defined in its 'ShowErrorComponent' instance; we perform additional
 | |
| -- adjustments in 'customErrorBundlePretty'.
 | |
| 
 | |
| instance ShowErrorComponent HledgerParseErrorData where
 | |
|   showErrorComponent (ErrorFailAt _ _ errMsg) = errMsg
 | |
|   showErrorComponent (ErrorReparsing _) = "" -- dummy value
 | |
| 
 | |
|   errorComponentLen (ErrorFailAt startOffset endOffset _) =
 | |
|     endOffset - startOffset
 | |
|   errorComponentLen (ErrorReparsing _) = 1 -- dummy value
 | |
| 
 | |
| 
 | |
| --- * Failing with an arbitrary source position
 | |
| 
 | |
| -- | Fail at a specific source position, given by the raw offset from the
 | |
| -- start of the input stream (the number of tokens processed at that
 | |
| -- point).
 | |
| 
 | |
| parseErrorAt :: Int -> String -> HledgerParseErrorData
 | |
| parseErrorAt offset = ErrorFailAt offset (offset+1)
 | |
| 
 | |
| -- | Fail at a specific source interval, given by the raw offsets of its
 | |
| -- endpoints from the start of the input stream (the numbers of tokens
 | |
| -- processed at those points).
 | |
| --
 | |
| -- Note that care must be taken to ensure that the specified interval does
 | |
| -- not span multiple lines of the input source. This will not be checked.
 | |
| 
 | |
| parseErrorAtRegion
 | |
|   :: Int    -- ^ Start offset
 | |
|   -> Int    -- ^ End end offset
 | |
|   -> String -- ^ Error message
 | |
|   -> HledgerParseErrorData
 | |
| parseErrorAtRegion startOffset endOffset msg =
 | |
|   if startOffset < endOffset
 | |
|     then ErrorFailAt startOffset endOffset msg
 | |
|     else ErrorFailAt startOffset (startOffset+1) msg
 | |
| 
 | |
| 
 | |
| --- * Re-parsing
 | |
| 
 | |
| -- | A fragment of source suitable for "re-parsing". The purpose of this
 | |
| -- data type is to preserve the content and source position of the excerpt
 | |
| -- so that parse errors raised during "re-parsing" may properly reference
 | |
| -- the original source.
 | |
| 
 | |
| data SourceExcerpt = SourceExcerpt Int  -- Offset of beginning of excerpt
 | |
|                                    Text -- Fragment of source file
 | |
| 
 | |
| -- | Get the raw text of a source excerpt.
 | |
| 
 | |
| getExcerptText :: SourceExcerpt -> Text
 | |
| getExcerptText (SourceExcerpt _ txt) = txt
 | |
| 
 | |
| -- | 'excerpt_ p' applies the given parser 'p' and extracts the portion of
 | |
| -- the source consumed by 'p', along with the source position of this
 | |
| -- portion. This is the only way to create a source excerpt suitable for
 | |
| -- "re-parsing" by 'reparseExcerpt'.
 | |
| 
 | |
| -- This function could be extended to return the result of 'p', but we don't
 | |
| -- currently need this.
 | |
| 
 | |
| excerpt_ :: MonadParsec HledgerParseErrorData Text m => m a -> m SourceExcerpt
 | |
| excerpt_ p = do
 | |
|   offset <- getOffset
 | |
|   (!txt, _) <- match p
 | |
|   pure $ SourceExcerpt offset txt
 | |
| 
 | |
| -- | 'reparseExcerpt s p' "re-parses" the source excerpt 's' using the
 | |
| -- parser 'p'. Parse errors raised by 'p' will be re-thrown at the source
 | |
| -- position of the source excerpt.
 | |
| --
 | |
| -- In order for the correct source file to be displayed when re-throwing
 | |
| -- parse errors, we must ensure that the source file during the use of
 | |
| -- 'reparseExcerpt s p' is the same as that during the use of 'excerpt_'
 | |
| -- that generated the source excerpt 's'. However, we can usually expect
 | |
| -- this condition to be satisfied because, at the time of writing, the
 | |
| -- only changes of source file in the codebase take place through include
 | |
| -- files, and the parser for include files neither accepts nor returns
 | |
| -- 'SourceExcerpt's.
 | |
| 
 | |
| reparseExcerpt
 | |
|   :: Monad m
 | |
|   => SourceExcerpt
 | |
|   -> ParsecT HledgerParseErrorData Text m a
 | |
|   -> ParsecT HledgerParseErrorData Text m a
 | |
| reparseExcerpt (SourceExcerpt offset txt) p = do
 | |
|   (_, res) <- lift $ runParserT' p (offsetInitialState offset txt)
 | |
|   case res of
 | |
|     Right result -> pure result
 | |
|     Left errBundle -> customFailure $ ErrorReparsing $ bundleErrors errBundle
 | |
| 
 | |
|   where
 | |
|     offsetInitialState :: Int -> s ->
 | |
| #if MIN_VERSION_megaparsec(8,0,0)
 | |
|       State s e
 | |
| #else
 | |
|       State s
 | |
| #endif
 | |
|     offsetInitialState initialOffset s = State
 | |
|       { stateInput  = s
 | |
|       , stateOffset = initialOffset
 | |
|       , statePosState = PosState
 | |
|         { pstateInput = s
 | |
|         , pstateOffset = initialOffset
 | |
|         , pstateSourcePos = initialPos ""
 | |
|         , pstateTabWidth = defaultTabWidth
 | |
|         , pstateLinePrefix = ""
 | |
|         }
 | |
| #if MIN_VERSION_megaparsec(8,0,0)
 | |
|       , stateParseErrors = []
 | |
| #endif
 | |
|       }
 | |
| 
 | |
| --- * Pretty-printing custom parse errors
 | |
| 
 | |
| -- | Pretty-print our custom parse errors. It is necessary to use this
 | |
| -- instead of 'errorBundlePretty' when custom parse errors are thrown.
 | |
| --
 | |
| -- This function intercepts our custom parse errors and applies final
 | |
| -- adjustments ('finalizeCustomError') before passing them to
 | |
| -- 'errorBundlePretty'. These adjustments are part of the implementation
 | |
| -- of the behaviour of our custom parse errors.
 | |
| --
 | |
| -- Note: We must ensure that the offset of the 'PosState' of the provided
 | |
| -- 'ParseErrorBundle' is no larger than the offset specified by a
 | |
| -- 'ErrorFailAt' constructor. This is guaranteed if this offset is set to
 | |
| -- 0 (that is, the beginning of the source file), which is the
 | |
| -- case for 'ParseErrorBundle's returned from 'runParserT'.
 | |
| 
 | |
| customErrorBundlePretty :: HledgerParseErrors -> String
 | |
| customErrorBundlePretty errBundle =
 | |
|   let errBundle' = errBundle { bundleErrors =
 | |
|         NE.sortWith errorOffset $ -- megaparsec requires that the list of errors be sorted by their offsets
 | |
|         bundleErrors errBundle >>= finalizeCustomError }
 | |
|   in  errorBundlePretty errBundle'
 | |
| 
 | |
|   where
 | |
|     finalizeCustomError
 | |
|       :: ParseError Text HledgerParseErrorData -> NE.NonEmpty (ParseError Text HledgerParseErrorData)
 | |
|     finalizeCustomError err = case findCustomError err of
 | |
|       Nothing -> pure err
 | |
| 
 | |
|       Just errFailAt@(ErrorFailAt startOffset _ _) ->
 | |
|         -- Adjust the offset
 | |
|         pure $ FancyError startOffset $ S.singleton $ ErrorCustom errFailAt
 | |
| 
 | |
|       Just (ErrorReparsing errs) ->
 | |
|         -- Extract and finalize the inner errors
 | |
|         errs >>= finalizeCustomError
 | |
| 
 | |
|     -- If any custom errors are present, arbitrarily take the first one
 | |
|     -- (since only one custom error should be used at a time).
 | |
|     findCustomError :: ParseError Text HledgerParseErrorData -> Maybe HledgerParseErrorData
 | |
|     findCustomError err = case err of
 | |
|       FancyError _ errSet ->
 | |
|         finds (\case {ErrorCustom e -> Just e; _ -> Nothing}) errSet
 | |
|       _ -> Nothing
 | |
| 
 | |
|     finds :: (Foldable t) => (a -> Maybe b) -> t a -> Maybe b
 | |
|     finds f = getAlt . foldMap (Alt . f)
 | |
| 
 | |
| 
 | |
| --- * "Final" parse errors
 | |
| --
 | |
| -- | A type representing "final" parse errors that cannot be backtracked
 | |
| -- from and are guaranteed to halt parsing. The anti-backtracking
 | |
| -- behaviour is implemented by an 'ExceptT' layer in the parser's monad
 | |
| -- stack, using this type as the 'ExceptT' error type.
 | |
| --
 | |
| -- We have three goals for this type:
 | |
| -- (1) it should be possible to convert any parse error into a "final"
 | |
| -- parse error,
 | |
| -- (2) it should be possible to take a parse error thrown from an include
 | |
| -- file and re-throw it in the parent file, and
 | |
| -- (3) the pretty-printing of "final" parse errors should be consistent
 | |
| -- with that of ordinary parse errors, but should also report a stack of
 | |
| -- files for errors thrown from include files.
 | |
| --
 | |
| -- In order to pretty-print a "final" parse error (goal 3), it must be
 | |
| -- bundled with include filepaths and its full source text. When a "final"
 | |
| -- parse error is thrown from within a parser, we do not have access to
 | |
| -- the full source, so we must hold the parse error until it can be joined
 | |
| -- with its source (and include filepaths, if it was thrown from an
 | |
| -- include file) by the parser's caller.
 | |
| --
 | |
| -- A parse error with include filepaths and its full source text is
 | |
| -- represented by the 'FinalParseErrorBundle' type, while a parse error in
 | |
| -- need of either include filepaths, full source text, or both is
 | |
| -- represented by the 'FinalParseError' type.
 | |
| 
 | |
| data FinalParseError' e
 | |
|   -- a parse error thrown as a "final" parse error
 | |
|   = FinalError           (ParseError Text e)
 | |
|   -- a parse error obtained from running a parser, e.g. using 'runParserT'
 | |
|   | FinalBundle          (ParseErrorBundle Text e)
 | |
|   -- a parse error thrown from an include file
 | |
|   | FinalBundleWithStack (FinalParseErrorBundle' e)
 | |
|   deriving (Show)
 | |
| 
 | |
| type FinalParseError = FinalParseError' HledgerParseErrorData
 | |
| 
 | |
| -- We need a 'Monoid' instance for 'FinalParseError' so that 'ExceptT
 | |
| -- FinalParseError m' is an instance of Alternative and MonadPlus, which
 | |
| -- is needed to use some parser combinators, e.g. 'many'.
 | |
| --
 | |
| -- This monoid instance simply takes the first (left-most) error.
 | |
| 
 | |
| instance Semigroup (FinalParseError' e) where
 | |
|   e <> _ = e
 | |
| 
 | |
| instance Monoid (FinalParseError' e) where
 | |
|   mempty = FinalError $ FancyError 0 $
 | |
|             S.singleton (ErrorFail "default parse error")
 | |
|   mappend = (<>)
 | |
| 
 | |
| -- | A type bundling a 'ParseError' with its full source text, filepath,
 | |
| -- and stack of include files. Suitable for pretty-printing.
 | |
| --
 | |
| -- Megaparsec's 'ParseErrorBundle' type already bundles a parse error with
 | |
| -- its full source text and filepath, so we just add a stack of include
 | |
| -- files.
 | |
| 
 | |
| data FinalParseErrorBundle' e = FinalParseErrorBundle'
 | |
|   { finalErrorBundle :: ParseErrorBundle Text e
 | |
|   , includeFileStack :: [FilePath]
 | |
|   } deriving (Show)
 | |
| 
 | |
| type FinalParseErrorBundle = FinalParseErrorBundle' HledgerParseErrorData
 | |
| 
 | |
| 
 | |
| --- * Constructing and throwing final parse errors
 | |
| 
 | |
| -- | Convert a "regular" parse error into a "final" parse error.
 | |
| 
 | |
| finalError :: ParseError Text e -> FinalParseError' e
 | |
| finalError = FinalError
 | |
| 
 | |
| -- | Like megaparsec's 'fancyFailure', but as a "final" parse error.
 | |
| 
 | |
| finalFancyFailure
 | |
|   :: (MonadParsec e s m, MonadError (FinalParseError' e) m)
 | |
|   => S.Set (ErrorFancy e) -> m a
 | |
| finalFancyFailure errSet = do
 | |
|   offset <- getOffset
 | |
|   throwError $ FinalError $ FancyError offset errSet
 | |
| 
 | |
| -- | Like 'fail', but as a "final" parse error.
 | |
| 
 | |
| finalFail
 | |
|   :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => String -> m a
 | |
| finalFail = finalFancyFailure . S.singleton . ErrorFail
 | |
| 
 | |
| -- | Like megaparsec's 'customFailure', but as a "final" parse error.
 | |
| 
 | |
| finalCustomFailure
 | |
|   :: (MonadParsec e s m, MonadError (FinalParseError' e) m) => e -> m a
 | |
| finalCustomFailure = finalFancyFailure . S.singleton . ErrorCustom
 | |
| 
 | |
| 
 | |
| --- * Pretty-printing "final" parse errors
 | |
| 
 | |
| -- | Pretty-print a "final" parse error: print the stack of include files,
 | |
| -- then apply the pretty-printer for parse error bundles. Note that
 | |
| -- 'attachSource' must be used on a "final" parse error before it can be
 | |
| -- pretty-printed.
 | |
| 
 | |
| finalErrorBundlePretty :: FinalParseErrorBundle' HledgerParseErrorData -> String
 | |
| finalErrorBundlePretty bundle =
 | |
|      concatMap showIncludeFilepath (includeFileStack bundle)
 | |
|   <> customErrorBundlePretty (finalErrorBundle bundle)
 | |
|   where
 | |
|     showIncludeFilepath path = "in file included from " <> path <> ",\n"
 | |
| 
 | |
| -- | Supply a filepath and source text to a "final" parse error so that it
 | |
| -- can be pretty-printed. You must ensure that you provide the appropriate
 | |
| -- source text and filepath.
 | |
| 
 | |
| attachSource
 | |
|   :: FilePath -> Text -> FinalParseError' e -> FinalParseErrorBundle' e
 | |
| attachSource filePath sourceText finalParseError = case finalParseError of
 | |
| 
 | |
|   -- A parse error thrown directly with the 'FinalError' constructor
 | |
|   -- requires both source and filepath.
 | |
|   FinalError parseError ->
 | |
|     let bundle = ParseErrorBundle
 | |
|           { bundleErrors = parseError NE.:| []
 | |
|           , bundlePosState = initialPosState filePath sourceText }
 | |
|     in  FinalParseErrorBundle'
 | |
|           { finalErrorBundle = bundle
 | |
|           , includeFileStack  = [] }
 | |
| 
 | |
|   -- A 'ParseErrorBundle' already has the appropriate source and filepath
 | |
|   -- and so needs neither.
 | |
|   FinalBundle peBundle -> FinalParseErrorBundle'
 | |
|     { finalErrorBundle = peBundle
 | |
|     , includeFileStack = [] }
 | |
| 
 | |
|   -- A parse error from a 'FinalParseErrorBundle' was thrown from an
 | |
|   -- include file, so we add the filepath to the stack.
 | |
|   FinalBundleWithStack fpeBundle -> fpeBundle
 | |
|     { includeFileStack = filePath : includeFileStack fpeBundle }
 | |
| 
 | |
| 
 | |
| --- * Handling parse errors from include files with "final" parse errors
 | |
| 
 | |
| -- | Parse a file with the given parser and initial state, discarding the
 | |
| -- final state and re-throwing any parse errors as "final" parse errors.
 | |
| 
 | |
| parseIncludeFile
 | |
|   :: Monad m
 | |
|   => StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
 | |
|   -> st
 | |
|   -> FilePath
 | |
|   -> Text
 | |
|   -> StateT st (ParsecT HledgerParseErrorData Text (ExceptT FinalParseError m)) a
 | |
| parseIncludeFile parser initialState filepath text =
 | |
|   catchError parser' handler
 | |
|   where
 | |
|     parser' = do
 | |
|       eResult <- lift $ lift $
 | |
|                   runParserT (evalStateT parser initialState) filepath text
 | |
|       case eResult of
 | |
|         Left parseErrorBundle -> throwError $ FinalBundle parseErrorBundle
 | |
|         Right result -> pure result
 | |
| 
 | |
|     -- Attach source and filepath of the include file to its parse errors
 | |
|     handler e = throwError $ FinalBundleWithStack $ attachSource filepath text e
 | |
| 
 | |
| 
 | |
| --- * Helpers
 | |
| 
 | |
| -- Like megaparsec's 'initialState', but instead for 'PosState'. Used when
 | |
| -- constructing 'ParseErrorBundle's. The values for "tab width" and "line
 | |
| -- prefix" are taken from 'initialState'.
 | |
| 
 | |
| initialPosState :: FilePath -> Text -> PosState Text
 | |
| initialPosState filePath sourceText = PosState
 | |
|   { pstateInput      = sourceText
 | |
|   , pstateOffset     = 0
 | |
|   , pstateSourcePos  = initialPos filePath
 | |
|   , pstateTabWidth   = defaultTabWidth
 | |
|   , pstateLinePrefix = "" }
 |