{-# LANGUAGE AllowAmbiguousTypes, ConstraintKinds, DefaultSignatures, DeriveDataTypeable, DeriveFunctor, FlexibleContexts, FlexibleInstances, GeneralizedNewtypeDeriving, OverloadedStrings, RankNTypes, ScopedTypeVariables, TypeApplications, TypeFamilies, TypeOperators, TypeSynonymInstances, UndecidableInstances #-} -- | The core classes supported by all the parsers in this library. module Text.Grampa.Class (MultiParsing(..), GrammarParsing(..), AmbiguousParsing(..), DeterministicParsing(..), InputParsing(..), InputCharParsing(..), CommittedParsing(..), ConsumedInputParsing(..), LexicalParsing(..), TailsParsing(..), ParseResults, ParseFailure(..), FailureDescription(..), Pos, Ambiguous(..), completeParser) where import Control.Applicative (Alternative(empty), liftA2) import Data.Char (isAlphaNum, isLetter, isSpace) import Data.Functor.Classes (Show1(..)) import Data.Functor.Compose (Compose(..)) import Data.Kind (Type) import Data.List.NonEmpty (NonEmpty((:|))) import Data.Data (Data) import Data.Typeable (Typeable) import Data.Monoid (Monoid(mempty, mappend)) import qualified Data.Monoid.Null as Null import Data.Monoid.Null (MonoidNull) import Data.Monoid.Factorial (FactorialMonoid) import Data.Monoid.Textual (TextualMonoid) import Data.Semigroup (Semigroup((<>))) import Data.Ord (Down(Down)) import Text.Parser.Combinators (Parsing(())) import Text.Parser.Token (TokenParsing) import Text.Parser.Deterministic (DeterministicParsing(..)) import Text.Parser.Input (ConsumedInputParsing(..), InputParsing(..), InputCharParsing(..)) import qualified Text.Parser.Char import Data.Kind (Constraint) import qualified Rank2 import Prelude hiding (takeWhile) type ParseResults s = Either (ParseFailure Pos s) -- | A 'ParseFailure' contains the offset of the parse failure and the list of things expected at that offset. data ParseFailure pos s = ParseFailure {failurePosition :: pos, expectedAlternatives :: FailureDescription s, -- ^ expected input alternatives errorAlternatives :: [String] -- ^ erroneous alternatives } deriving (Eq, Functor, Show) -- | A position in the input is represented as the length of its remainder. type Pos = Down Int -- | An expected or erroneous input can be described using 'String' or using the input type data FailureDescription s = FailureDescription {staticDescriptions :: [String], literalDescriptions :: [s]} deriving (Functor, Eq, Ord, Read, Show) instance (Ord pos, Ord s) => Semigroup (ParseFailure pos s) where f1@(ParseFailure pos1 exp1 err1) <> f2@(ParseFailure pos2 exp2 err2) = ParseFailure pos' exp' err' where ParseFailure pos' exp' err' | pos1 > pos2 = f1 | pos1 < pos2 = f2 | otherwise = ParseFailure pos1 (exp1 <> exp2) (mergeSorted err1 err2) instance Ord s => Semigroup (FailureDescription s) where exp1 <> exp2 = FailureDescription (mergeSorted (staticDescriptions exp1) (staticDescriptions exp2)) (mergeSorted (literalDescriptions exp1) (literalDescriptions exp2)) mergeSorted :: Ord a => [a] -> [a] -> [a] mergeSorted [] xs = xs mergeSorted xs [] = xs mergeSorted xs@(x:xs') ys@(y:ys') | x < y = x : mergeSorted xs' ys | x > y = y : mergeSorted xs ys' | otherwise = x : mergeSorted xs' ys' instance Ord s => Monoid (ParseFailure Pos s) where mempty = ParseFailure (Down maxBound) mempty [] mappend = (<>) instance Ord s => Monoid (FailureDescription s) where mempty = FailureDescription mempty mempty -- | An 'Ambiguous' parse result, produced by the 'ambiguous' combinator, contains a 'NonEmpty' list of -- alternative results. newtype Ambiguous a = Ambiguous{getAmbiguous :: NonEmpty a} deriving (Data, Eq, Ord, Show, Typeable) instance Show1 Ambiguous where liftShowsPrec sp sl d (Ambiguous (h :| l)) t | d > 5 = "(Ambiguous $ " <> sp 0 h (" :| " <> sl l (')' : t)) | otherwise = "Ambiguous (" <> sp 0 h (" :| " <> sl l (')' : t)) instance Functor Ambiguous where fmap f (Ambiguous a) = Ambiguous (fmap f a) instance Applicative Ambiguous where pure a = Ambiguous (pure a) Ambiguous f <*> Ambiguous a = Ambiguous (f <*> a) instance Monad Ambiguous where return = pure Ambiguous a >>= f = Ambiguous (a >>= getAmbiguous . f) instance Foldable Ambiguous where foldMap f (Ambiguous a) = foldMap f a instance Traversable Ambiguous where traverse f (Ambiguous a) = Ambiguous <$> traverse f a instance Semigroup a => Semigroup (Ambiguous a) where Ambiguous xs <> Ambiguous ys = Ambiguous (liftA2 (<>) xs ys) instance Monoid a => Monoid (Ambiguous a) where mempty = Ambiguous (mempty :| []) mappend = (<>) completeParser :: MonoidNull s => Compose (ParseResults s) (Compose [] ((,) s)) r -> Compose (ParseResults s) [] r completeParser (Compose (Left failure)) = Compose (Left failure) completeParser (Compose (Right (Compose results))) = case filter (Null.null . fst) results of [] -> Compose (Left $ ParseFailure 0 (FailureDescription ["a complete parse"] []) []) completeResults -> Compose (Right $ snd <$> completeResults) -- | Choose one of the instances of this class to parse with. class InputParsing m => MultiParsing m where -- | Some parser types produce a single result, others a list of results. type ResultFunctor m :: Type -> Type type GrammarConstraint m (g :: (Type -> Type) -> Type) :: Constraint type GrammarConstraint m g = Rank2.Functor g -- | Given a rank-2 record of parsers and input, produce a record of parses of the complete input. parseComplete :: (ParserInput m ~ s, GrammarConstraint m g, Eq s, FactorialMonoid s) => g m -> s -> g (ResultFunctor m) -- | Given a rank-2 record of parsers and input, produce a record of prefix parses paired with the remaining input -- suffix. parsePrefix :: (ParserInput m ~ s, GrammarConstraint m g, Eq s, FactorialMonoid s) => g m -> s -> g (Compose (ResultFunctor m) ((,) s)) -- | Parsers that belong to this class can memoize the parse results to avoid exponential performance complexity. class MultiParsing m => GrammarParsing m where -- | The record of grammar productions associated with the parser type ParserGrammar m :: (Type -> Type) -> Type -- | For internal use by 'notTerminal' type GrammarFunctor m :: Type -> Type -- | Converts the intermediate to final parsing result. parsingResult :: ParserInput m -> GrammarFunctor m a -> ResultFunctor m (ParserInput m, a) -- | Used to reference a grammar production, only necessary from outside the grammar itself nonTerminal :: (g ~ ParserGrammar m, GrammarConstraint m g) => (g (GrammarFunctor m) -> GrammarFunctor m a) -> m a -- | Construct a grammar whose every production refers to itself. selfReferring :: (g ~ ParserGrammar m, GrammarConstraint m g, Rank2.Distributive g) => g m -- | Convert a self-referring grammar function to a grammar. fixGrammar :: (g ~ ParserGrammar m, GrammarConstraint m g, Rank2.Distributive g) => (g m -> g m) -> g m -- | Mark a parser that relies on primitive recursion to prevent an infinite loop in 'fixGrammar'. recursive :: m a -> m a -- | Convert a left-recursive parser to a non-left-recursive one. For example, you can replace the left-recursive -- production -- -- > foo = BinOp <$> foo <*> bar <|> baz -- -- in the field @foo@ of grammar @g@ with -- -- > foo = chainRecursive (\x g-> g{foo = x}) baz (BinOp <$> foo <*> bar) -- -- This method works on individual parsers left-recursive on themselves, not on grammars with mutually -- left-recursive productions. Use "Text.Grampa.ContextFree.Memoizing.LeftRecursive" for the latter. chainRecursive :: (g ~ ParserGrammar m, f ~ GrammarFunctor m, GrammarConstraint m g) => (f a -> g f -> g f) -- ^ setter for the parsed results of each iteration -> m a -- ^ the non-recursive base case -> m a -- ^ the recursive case to iterate -> m a -- | Line 'chainRecursive' but produces only the longest possible parse. The modified example -- -- > foo = chainLongestRecursive (\x g-> g{foo = x}) baz (BinOp <$> foo <*> bar) -- -- would be equivalent to the left-recursive production with biased choice -- -- > foo = BinOp <$> foo <*> bar <<|> baz chainLongestRecursive :: (g ~ ParserGrammar m, f ~ GrammarFunctor m, GrammarConstraint m g) => (f a -> g f -> g f) -- ^ setter for the parsed results of each iteration -> m a -- ^ the non-recursive base case -> m a -- ^ the recursive case to iterate -> m a selfReferring = Rank2.cotraverse nonTerminal id {-# INLINE selfReferring #-} fixGrammar = ($ selfReferring) {-# INLINE fixGrammar #-} recursive = id class GrammarParsing m => TailsParsing m where -- | Parse the tails of the input together with memoized parse results parseTails :: GrammarConstraint m g => m r -> [(ParserInput m, g (GrammarFunctor m))] -> GrammarFunctor m r parseAllTails :: (GrammarConstraint m g, Rank2.Functor g) => g m -> [(ParserInput m, g (GrammarFunctor m))] -> [(ParserInput m, g (GrammarFunctor m))] parseAllTails _ [] = [] parseAllTails final parsed@((s, _):_) = (s, gd):parsed where gd = Rank2.fmap (`parseTails` parsed) final -- | Parsers that can produce alternative parses and collect them into an 'Ambiguous' node class Alternative m => AmbiguousParsing m where -- | Collect all alternative parses of the same length into a 'NonEmpty' list of results. ambiguous :: m a -> m (Ambiguous a) -- | Parsers that can temporarily package and delay failure, in a way dual to Parsec's @try@ combinator. Where Parsec -- would require something like -- -- > alternatives = try intro1 *> expected1 -- > <|> try intro2 *> expected2 -- > <|> fallback -- -- you can instead say -- -- > alternatives = admit $ intro1 *> commit expected1 -- > <|> intro2 *> commit expected2 -- > <|> commit fallback -- -- A parsing failure inside an @intro@ parser leaves the other alternatives open, a failure inside an @expected@ -- parser bubbles up and out of the whole @admit@ block. class Alternative m => CommittedParsing m where type CommittedResults m :: Type -> Type -- | Commits the argument parser to success. commit :: m a -> m (CommittedResults m a) -- | Admits a possible defeat of the argument parser. admit :: m (CommittedResults m a) -> m a -- | If a grammar is 'Lexical', its parsers can instantiate the 'TokenParsing' class. class (DeterministicParsing m, InputCharParsing m, TokenParsing m) => LexicalParsing m where -- | Always succeeds, consuming all white space and comments lexicalWhiteSpace :: m () -- | Consumes all whitespace and comments, failing if there are none someLexicalSpace :: m () -- | Consumes a single comment, defaults to 'empty' lexicalComment :: m () -- | Consumes a single semicolon and any trailing whitespace, returning the character |';'|. The method can be -- overridden for automatic semicolon insertion, but if it succeeds on semicolon or white space input it must -- consume it. lexicalSemicolon :: m Char -- | Applies the argument parser and consumes the trailing 'lexicalWhitespace' lexicalToken :: m a -> m a -- | Applies the argument parser, determines whether its result is a legal identifier, and consumes the trailing -- 'lexicalWhitespace' identifierToken :: m (ParserInput m) -> m (ParserInput m) -- | Determines whether the given character can start an identifier token, allows only a letter or underscore by -- default isIdentifierStartChar :: Char -> Bool -- | Determines whether the given character can be any part of an identifier token, also allows numbers isIdentifierFollowChar :: Char -> Bool -- | Parses a valid identifier and consumes the trailing 'lexicalWhitespace' identifier :: m (ParserInput m) -- | Parses the argument word whole, not followed by any identifier character, and consumes the trailing -- 'lexicalWhitespace' keyword :: ParserInput m -> m () default identifier :: TextualMonoid (ParserInput m) => m (ParserInput m) default keyword :: (Show (ParserInput m), TextualMonoid (ParserInput m)) => ParserInput m -> m () lexicalWhiteSpace = takeCharsWhile isSpace *> skipAll (lexicalComment *> takeCharsWhile isSpace) someLexicalSpace = takeCharsWhile1 isSpace *> (lexicalComment *> lexicalWhiteSpace <<|> pure ()) <<|> lexicalComment *> lexicalWhiteSpace "whitespace" lexicalComment = empty lexicalSemicolon = lexicalToken (Text.Parser.Char.char ';') lexicalToken p = p <* lexicalWhiteSpace isIdentifierStartChar c = isLetter c || c == '_' isIdentifierFollowChar c = isAlphaNum c || c == '_' identifier = identifierToken (liftA2 mappend (satisfyCharInput (isIdentifierStartChar @m)) (takeCharsWhile (isIdentifierFollowChar @m))) "an identifier" identifierToken = lexicalToken keyword s = lexicalToken (string s *> notSatisfyChar (isIdentifierFollowChar @m)) ("keyword " <> show s)