-- | -- Module : Text.Megaparsec.Error -- Copyright : © 2015–2018 Megaparsec contributors -- License : FreeBSD -- -- Maintainer : Mark Karpov -- Stability : experimental -- Portability : portable -- -- Parse errors. The current version of Megaparsec supports well-typed -- errors instead of 'String'-based ones. This gives a lot of flexibility in -- describing what exactly went wrong as well as a way to return arbitrary -- data in case of failure. -- -- You probably do not want to import this module directly because -- "Text.Megaparsec" re-exports it anyway. {-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE UndecidableInstances#-} module Text.Megaparsec.Error ( -- * Parse error type ErrorItem (..) , ErrorFancy (..) , ParseError (..) , mapParseError , errorOffset , ParseErrorBundle (..) , attachSourcePos -- * Pretty-printing , ShowErrorComponent (..) , errorBundlePretty , parseErrorPretty , parseErrorTextPretty ) where import Control.DeepSeq import Control.Exception import Control.Monad.State.Strict import Data.Data (Data) import Data.List (intercalate) import Data.List.NonEmpty (NonEmpty (..)) import Data.Maybe (isNothing) import Data.Proxy import Data.Set (Set) import Data.Typeable (Typeable) import Data.Void import GHC.Generics import Text.Megaparsec.Pos import Text.Megaparsec.State import Text.Megaparsec.Stream import qualified Data.List.NonEmpty as NE import qualified Data.Set as E #if !MIN_VERSION_base(4,11,0) import Data.Semigroup #endif ---------------------------------------------------------------------------- -- Parse error type -- | Data type that is used to represent “unexpected\/expected” items in -- 'ParseError'. The data type is parametrized over the token type @t@. -- -- @since 5.0.0 data ErrorItem t = Tokens (NonEmpty t) -- ^ Non-empty stream of tokens | Label (NonEmpty Char) -- ^ Label (cannot be empty) | EndOfInput -- ^ End of input deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor) instance NFData t => NFData (ErrorItem t) -- | Additional error data, extendable by user. When no custom data is -- necessary, the type is typically indexed by 'Void' to “cancel” the -- 'ErrorCustom' constructor. -- -- @since 6.0.0 data ErrorFancy e = ErrorFail String -- ^ 'fail' has been used in parser monad | ErrorIndentation Ordering Pos Pos -- ^ Incorrect indentation error: desired ordering between reference -- level and actual level, reference indentation level, actual -- indentation level | ErrorCustom e -- ^ Custom error data deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor) instance NFData a => NFData (ErrorFancy a) where rnf (ErrorFail str) = rnf str rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act rnf (ErrorCustom a) = rnf a -- | @'ParseError' s e@ represents a parse error parametrized over the -- stream type @s@ and the custom data @e@. -- -- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse -- errors from different branches of parsing. When merging two -- 'ParseError's, the longest match is preferred; if positions are the same, -- custom data sets and collections of message items are combined. Note that -- fancy errors take precedence over trivial errors in merging. -- -- @since 7.0.0 data ParseError s e = TrivialError Int (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s))) -- ^ Trivial errors, generated by Megaparsec's machinery. The data -- constructor includes the offset of error, unexpected token (if any), -- and expected tokens. -- -- Type of the first argument was changed in the version /7.0.0/. | FancyError Int (Set (ErrorFancy e)) -- ^ Fancy, custom errors. -- -- Type of the first argument was changed in the version /7.0.0/. deriving (Typeable, Generic) deriving instance ( Show (Token s) , Show e ) => Show (ParseError s e) deriving instance ( Eq (Token s) , Eq e ) => Eq (ParseError s e) deriving instance ( Data s , Data (Token s) , Ord (Token s) , Data e , Ord e ) => Data (ParseError s e) instance ( NFData (Token s) , NFData e ) => NFData (ParseError s e) instance (Stream s, Ord e) => Semigroup (ParseError s e) where (<>) = mergeError {-# INLINE (<>) #-} instance (Stream s, Ord e) => Monoid (ParseError s e) where mempty = TrivialError 0 Nothing E.empty mappend = (<>) {-# INLINE mappend #-} instance ( Show s , Show (Token s) , Show e , ShowErrorComponent e , Stream s , Typeable s , Typeable e ) => Exception (ParseError s e) where displayException = parseErrorPretty -- | Modify the custom data component in a parse error. This could be done -- via 'fmap' if not for the 'Ord' constraint. -- -- @since 7.0.0 mapParseError :: Ord e' => (e -> e') -> ParseError s e -> ParseError s e' mapParseError _ (TrivialError o u p) = TrivialError o u p mapParseError f (FancyError o x) = FancyError o (E.map (fmap f) x) -- | Get offset of given 'ParseError'. -- -- @since 7.0.0 errorOffset :: ParseError s e -> Int errorOffset (TrivialError o _ _) = o errorOffset (FancyError o _) = o -- | Merge two error data structures into one joining their collections of -- message items and preferring the longest match. In other words, earlier -- error message is discarded. This may seem counter-intuitive, but -- 'mergeError' is only used to merge error messages of alternative branches -- of parsing and in this case longest match should be preferred. mergeError :: (Stream s, Ord e) => ParseError s e -> ParseError s e -> ParseError s e mergeError e1 e2 = case errorOffset e1 `compare` errorOffset e2 of LT -> e2 EQ -> case (e1, e2) of (TrivialError s1 u1 p1, TrivialError _ u2 p2) -> TrivialError s1 (n u1 u2) (E.union p1 p2) (FancyError {}, TrivialError {}) -> e1 (TrivialError {}, FancyError {}) -> e2 (FancyError s1 x1, FancyError _ x2) -> FancyError s1 (E.union x1 x2) GT -> e1 where -- NOTE The logic behind this merging is that since we only combine -- parse errors that happen at exactly the same position, all the -- unexpected items will be prefixes of input stream at that position or -- labels referring to the same thing. Our aim here is to choose the -- longest prefix (merging with labels and end of input is somewhat -- arbitrary, but is necessary because otherwise we can't make -- ParseError lawful Monoid and have nice parse errors at the same -- time). n Nothing Nothing = Nothing n (Just x) Nothing = Just x n Nothing (Just y) = Just y n (Just x) (Just y) = Just (max x y) {-# INLINE mergeError #-} -- | A non-empty collection of 'ParseError's equipped with 'PosState' that -- allows to pretty-print the errors efficiently and correctly. -- -- @since 7.0.0 data ParseErrorBundle s e = ParseErrorBundle { bundleErrors :: NonEmpty (ParseError s e) -- ^ A collection of 'ParseError's that is sorted by parse error offsets , bundlePosState :: PosState s -- ^ State that is used for line\/column calculation } deriving (Generic) deriving instance ( Show s , Show (Token s) , Show e ) => Show (ParseErrorBundle s e) deriving instance ( Eq s , Eq (Token s) , Eq e ) => Eq (ParseErrorBundle s e) deriving instance ( Typeable s , Typeable (Token s) , Typeable e ) => Typeable (ParseErrorBundle s e) deriving instance ( Data s , Data (Token s) , Ord (Token s) , Data e , Ord e ) => Data (ParseErrorBundle s e) instance ( NFData s , NFData (Token s) , NFData e ) => NFData (ParseErrorBundle s e) instance ( Show s , Show (Token s) , Show e , ShowErrorComponent e , Stream s , Typeable s , Typeable e ) => Exception (ParseErrorBundle s e) where displayException = errorBundlePretty -- | Attach 'SourcePos'es to items in a 'Traversable' container given that -- there is a projection allowing to get an offset per item. -- -- Items must be in ascending order with respect to their offsets. -- -- @since 7.0.0 attachSourcePos :: (Traversable t, Stream s) => (a -> Int) -- ^ How to project offset from an item (e.g. 'errorOffset') -> t a -- ^ The collection of items -> PosState s -- ^ Initial 'PosState' -> (t (a, SourcePos), PosState s) -- ^ The collection with 'SourcePos'es -- added and the final 'PosState' attachSourcePos projectOffset xs = runState (traverse f xs) where f a = do pst <- get let (spos, pst') = reachOffsetNoLine (projectOffset a) pst put pst' return (a, spos) {-# INLINEABLE attachSourcePos #-} ---------------------------------------------------------------------------- -- Pretty-printing -- | The type class defines how to print a custom component of 'ParseError'. -- -- @since 5.0.0 class Ord a => ShowErrorComponent a where -- | Pretty-print a component of 'ParseError'. showErrorComponent :: a -> String -- | Length of the error component in characters, used for highlighting of -- parse errors in input string. -- -- @since 7.0.0 errorComponentLen :: a -> Int errorComponentLen _ = 1 instance ShowErrorComponent Void where showErrorComponent = absurd -- | Pretty-print a 'ParseErrorBundle'. All 'ParseError's in the bundle will -- be pretty-printed in order together with the corresponding offending -- lines by doing a single efficient pass over the input stream. The -- rendered 'String' always ends with a newline. -- -- @since 7.0.0 errorBundlePretty :: forall s e. ( Stream s , ShowErrorComponent e ) => ParseErrorBundle s e -- ^ Parse error bundle to display -> String -- ^ Textual rendition of the bundle errorBundlePretty ParseErrorBundle {..} = let (r, _) = foldl f (id, bundlePosState) bundleErrors in drop 1 (r "") where f :: (ShowS, PosState s) -> ParseError s e -> (ShowS, PosState s) f (o, !pst) e = (o . (outChunk ++), pst') where (epos, sline, pst') = reachOffset (errorOffset e) pst outChunk = "\n" <> sourcePosPretty epos <> ":\n" <> padding <> "|\n" <> lineNumber <> " | " <> sline <> "\n" <> padding <> "| " <> rpadding <> pointer <> "\n" <> parseErrorTextPretty e lineNumber = (show . unPos . sourceLine) epos padding = replicate (length lineNumber + 1) ' ' rpadding = if pointerLen > 0 then replicate rpshift ' ' else "" rpshift = unPos (sourceColumn epos) - 1 pointer = replicate pointerLen '^' pointerLen = if rpshift + elen > slineLen then slineLen - rpshift + 1 else elen slineLen = length sline elen = case e of TrivialError _ Nothing _ -> 1 TrivialError _ (Just x) _ -> errorItemLength x FancyError _ xs -> E.foldl' (\a b -> max a (errorFancyLength b)) 1 xs -- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a -- newline. -- -- @since 5.0.0 parseErrorPretty :: (Stream s, ShowErrorComponent e) => ParseError s e -- ^ Parse error to render -> String -- ^ Result of rendering parseErrorPretty e = "offset=" <> show (errorOffset e) <> ":\n" <> parseErrorTextPretty e -- | Pretty-print a textual part of a 'ParseError', that is, everything -- except for its position. The rendered 'String' always ends with a -- newline. -- -- @since 5.1.0 parseErrorTextPretty :: forall s e. (Stream s, ShowErrorComponent e) => ParseError s e -- ^ Parse error to render -> String -- ^ Result of rendering parseErrorTextPretty (TrivialError _ us ps) = if isNothing us && E.null ps then "unknown parse error\n" else messageItemsPretty "unexpected " (showErrorItem pxy `E.map` maybe E.empty E.singleton us) <> messageItemsPretty "expecting " (showErrorItem pxy `E.map` ps) where pxy = Proxy :: Proxy s parseErrorTextPretty (FancyError _ xs) = if E.null xs then "unknown fancy parse error\n" else unlines (showErrorFancy <$> E.toAscList xs) ---------------------------------------------------------------------------- -- Helpers -- | Pretty-print an 'ErrorItem'. showErrorItem :: Stream s => Proxy s -> ErrorItem (Token s) -> String showErrorItem pxy = \case Tokens ts -> showTokens pxy ts Label label -> NE.toList label EndOfInput -> "end of input" -- | Get length of the “pointer” to display under a given 'ErrorItem'. errorItemLength :: ErrorItem t -> Int errorItemLength = \case Tokens ts -> NE.length ts _ -> 1 -- | Pretty-print an 'ErrorFancy'. showErrorFancy :: ShowErrorComponent e => ErrorFancy e -> String showErrorFancy = \case ErrorFail msg -> msg ErrorIndentation ord ref actual -> "incorrect indentation (got " <> show (unPos actual) <> ", should be " <> p <> show (unPos ref) <> ")" where p = case ord of LT -> "less than " EQ -> "equal to " GT -> "greater than " ErrorCustom a -> showErrorComponent a -- | Get length of the “pointer” to display under a given 'ErrorFancy'. errorFancyLength :: ShowErrorComponent e => ErrorFancy e -> Int errorFancyLength = \case ErrorCustom a -> errorComponentLen a _ -> 1 -- | Transforms a list of error messages into their textual representation. messageItemsPretty :: String -- ^ Prefix to prepend -> Set String -- ^ Collection of messages -> String -- ^ Result of rendering messageItemsPretty prefix ts | E.null ts = "" | otherwise = prefix <> (orList . NE.fromList . E.toAscList) ts <> "\n" -- | Print a pretty list where items are separated with commas and the word -- “or” according to the rules of English punctuation. orList :: NonEmpty String -> String orList (x:|[]) = x orList (x:|[y]) = x <> " or " <> y orList xs = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs