{-# LANGUAGE MultiParamTypeClasses, UndecidableInstances #-} {-| Module : Parsley.Precedence Description : The precedence parser functionality License : BSD-3-Clause Maintainer : Jamie Willis Stability : stable This module exposes the required machinery for parsing expressions given by a precedence table. Unlike those found in [parser-combinators](https://hackage.haskell.org/package/parser-combinators-1.3.0/docs/Control-Monad-Combinators-Expr.html) or [parsec](https://hackage.haskell.org/package/parsec-3.1.14.0/docs/Text-Parsec-Expr.html), this implementation allows the precedence layers to change type in the table. This implementation is based off of /Design Patterns for Parser Combinators (Willis and Wu 21)/. @since 0.1.0.0 -} module Parsley.Precedence ( -- * Main Precedence Combinators -- $prec-doc precedence, precHomo, -- * Operator Fixity Fixity(..), -- * Level Construction -- $ops-doc Op, GOps(..), sops, ops, -- * Level Combining -- $levels-doc Prec(..), (>+), (+<), -- * Subtype Relation Subtype(..), ) where import Prelude hiding ((<$>), (<*>), pure) import Data.List (foldl') import Parsley.Alternative (choice, (<|>)) import Parsley.Applicative ((<$>), (<*>), pure, (<**>)) import Parsley.Fold (prefix, postfix, infixl1, infixr1) import Parsley.Internal.Common.Utils (WQ(WQ)) import Parsley.Internal.Core (Parser, Defunc(BLACK, ID, FLIP)) --import qualified Data.Generics.Internal.Profunctor.Prism as GLens --import qualified Data.Generics.Sum.Internal.Subtype as GLens {- $prec-doc Compared to using chain combinators to construct parsers for expression grammars, precedence combinators provide a light-weight and convenient representation of a precedence table. In @parsley@ these take two forms: a more traditional version called `precHomo` which takes a list of precedence levels which are all of the same, monolithic, type; and a heterogeneous version called `precedence`. In /Design Patterns for Parser Combinators/ it is mentioned that the homogeneous approach to encoding precedence tables can inadvertently misrepresent the grammar: say by changing left-associative operators into right-associative ones. When the grammar doesn't specify associativities, then `precHomo` is perfectly appropriate, as it becomes an implementation decision. Otherwise, `precedence` uses a heterogeneous list to form a precedence table, allowing each layer of operators to form part of a different datatype. By encoding the resulting AST as a hierarchy of independent datatypes for each grammar rule, the precedence table can be made to be very strict: reordering levels or switching their associativities will fail to compile! -} {-| This combinator will construct and expression parser will provided with a table of precedence. @since 2.0.0.0 -} precedence :: Prec a -> Parser a precedence :: forall a. Prec a -> Parser a precedence (Atom Parser a atom) = Parser a atom precedence (Level Prec a lvls Op a a op) = forall a b. Parser a -> Op a b -> Parser b level (forall a. Prec a -> Parser a precedence Prec a lvls) Op a a op where level :: Parser a -> Op a b -> Parser b level :: forall a b. Parser a -> Op a b -> Parser b level Parser a atom (Op Fixity a b sig InfixL Parser sig op Defunc (a -> b) wrap) = forall (rep :: Type -> Type) a b. ParserOps rep => rep (a -> b) -> Parser a -> Parser (b -> a -> b) -> Parser b infixl1 Defunc (a -> b) wrap Parser a atom Parser sig op level Parser a atom (Op Fixity a b sig InfixR Parser sig op Defunc (a -> b) wrap) = forall (rep :: Type -> Type) a b. ParserOps rep => rep (a -> b) -> Parser a -> Parser (a -> b -> b) -> Parser b infixr1 Defunc (a -> b) wrap Parser a atom Parser sig op level Parser a atom (Op Fixity a b sig InfixN Parser sig op Defunc (a -> b) wrap) = Parser a atom forall a b. Parser a -> Parser (a -> b) -> Parser b <**> (forall a1 b c. Defunc ((a1 -> b -> c) -> b -> a1 -> c) FLIP forall (rep :: Type -> Type) a b. ParserOps rep => rep (a -> b) -> Parser a -> Parser b <$> Parser sig op forall a b. Parser (a -> b) -> Parser a -> Parser b <*> Parser a atom forall a. Parser a -> Parser a -> Parser a <|> forall (rep :: Type -> Type) a. ParserOps rep => rep a -> Parser a pure Defunc (a -> b) wrap) level Parser a atom (Op Fixity a b sig Prefix Parser sig op Defunc (a -> b) wrap) = forall a. Parser (a -> a) -> Parser a -> Parser a prefix Parser sig op (Defunc (a -> b) wrap forall (rep :: Type -> Type) a b. ParserOps rep => rep (a -> b) -> Parser a -> Parser b <$> Parser a atom) level Parser a atom (Op Fixity a b sig Postfix Parser sig op Defunc (a -> b) wrap) = forall a. Parser a -> Parser (a -> a) -> Parser a postfix (Defunc (a -> b) wrap forall (rep :: Type -> Type) a b. ParserOps rep => rep (a -> b) -> Parser a -> Parser b <$> Parser a atom) Parser sig op {-| A simplified version of `precedence` that does not use the heterogeneous list `Prec`, but instead requires all layers of the table to have the same type. The list encodes the precedence in strongest-to-weakest layout. @since 2.0.0.0 -} precHomo :: Parser a -- ^ The root atom of the precedence hierarchy -> [Op a a] -- ^ Each layer laid out strongest-to-weakest binding. -> Parser a precHomo :: forall a. Parser a -> [Op a a] -> Parser a precHomo Parser a atom = forall a. Prec a -> Parser a precedence forall b c a. (b -> c) -> (a -> b) -> a -> c . forall (t :: Type -> Type) b a. Foldable t => (b -> a -> b) -> b -> t a -> b foldl' forall a b. Prec a -> Op a b -> Prec b (>+) (forall a. Parser a -> Prec a Atom Parser a atom) {-| Denotes the fixity of a given level in a precedence table. The type parameter @sig@ encodes the types of the operators on this level, in a heterogeneous fashion. @since 2.0.0.0 -} data Fixity a b sig where -- | Denotes a left-associative binary operator. InfixL :: Fixity a b (b -> a -> b) -- | Denotes a right-associative binary operator. InfixR :: Fixity a b (a -> b -> b) -- | Denotes a non-associative binary operator. InfixN :: Fixity a b (a -> a -> b) -- | Denotes a prefix unary operator. Prefix :: Fixity a b (b -> b) -- | Denotes a postfix unary operator. Postfix :: Fixity a b (b -> b) {- $ops-doc By combining a `Fixity` with a `Parser` which can read the operators at a given level, a new level can be created, ready to add to the table. To provide uniformity and safety, the `Fixity` type exposes a @sig@ type that expresses the shape of operators that match it. The `Op` datatype, which is not constructed directly, ties the operators to this signature using existentials. There are three ways to create a value of type `Op`: `ops`, `sops`, and `gops`. These functions represent different degrees of relations between this layer of the table, and the one that comes below: - `ops` says that the level below is the same type as this one, in other words the classic @a -> a -> a@ type for binary operators. - `sops` is stronger, and says that the level below must be a sub-type of this one (see `Subtype`). This means that there is a known canonical embedding from one layer into the other, called an `upcast`. - `gops` is the most general, and says that the level below is related to this one by some more complex transformation than the canonical sub-type embedding. Any arbitrary function from @underlying -> this@ can be provided to this level to handle the translation. -} {-| Packages together a level of a precedence table, by associating a `Fixity` with the operators that match that specific signature converting from one layer of type @a@ to a new layer of type @b@. See `ops`, `sops`, and `gops` for how to construct them. @since 2.0.0.0 -} data Op a b where Op :: Fixity a b sig -> Parser sig -> Defunc (a -> b) -> Op a b {-| This typeclass is used to allow abstraction of the representation of user-level functions. See the instances for information on what these representations are. @since 2.0.0.0 -} class GOps rep where {-| Sometimes, the relationship between two levels of a heterogeneous precedence hierarchy is non-trivial. By using `gops`, the conversion function can be used to adapt one layer into the type of the next. @since 2.0.0.0 -} gops :: Fixity a b sig -> [Parser sig] -> rep (a -> b) -> Op a b {-| This is the default representation used for user-level functions and values: plain old code. @since 2.0.0.0 -} instance {-# INCOHERENT #-} x ~ WQ => GOps x where gops :: forall a b sig. Fixity a b sig -> [Parser sig] -> x (a -> b) -> Op a b gops Fixity a b sig fixity [Parser sig] ps = forall (rep :: Type -> Type) a b sig. GOps rep => Fixity a b sig -> [Parser sig] -> rep (a -> b) -> Op a b gops Fixity a b sig fixity [Parser sig] ps forall b c a. (b -> c) -> (a -> b) -> a -> c . forall a. WQ a -> Defunc a BLACK {-| This is used to allow defunctionalised versions of many standard Haskell functions to be used directly as an argument to relevant combinators. @since 2.0.0.0 -} instance GOps Defunc where gops :: forall a b sig. Fixity a b sig -> [Parser sig] -> Defunc (a -> b) -> Op a b gops Fixity a b sig fixity [Parser sig] ps = forall a b sig. Fixity a b sig -> Parser sig -> Defunc (a -> b) -> Op a b Op Fixity a b sig fixity (forall a. [Parser a] -> Parser a choice [Parser sig] ps) {-| When two levels in a precedence hierarchy are the same type, they are trivially embedded using the identity function. @since 2.0.0.0 -} ops :: Fixity a a sig -> [Parser sig] -> Op a a ops :: forall a sig. Fixity a a sig -> [Parser sig] -> Op a a ops Fixity a a sig fixity [Parser sig] ps = forall a b sig. Fixity a b sig -> Parser sig -> Defunc (a -> b) -> Op a b Op Fixity a a sig fixity (forall a. [Parser a] -> Parser a choice [Parser sig] ps) forall a1. Defunc (a1 -> a1) ID {-| Encodes a subtyping relationship between two types @sub@ and @sup@. This allows for the conversion or embedding of one type into the other, as well as their extraction. It should be the case that: prop> fmap upcast . downcast = Just prop> downcast . upcast = Just @since 2.0.0.0 -} class Subtype sub sup where -- | Casts a value of the subtype into one of the supertype, likely by wrapping it in some constructor upcast :: sub -> sup -- | Attempts to extract a value of a subtype from a supertype downcast :: sup -> Maybe sub {-instance GLens.Context sub sup => Subtype sub sup where upcast = GLens.build GLens.derived downcast = either (const Nothing) Just . GLens.match GLens.derived-} {-| When two levels of a precedence hierarchy are in a subtyping relation, the conversion between the two can be trivially provided as the `upcast` function. @since 2.0.0.0 -} sops :: Subtype a b => Fixity a b sig -> [Parser sig] -> Op a b sops :: forall a b sig. Subtype a b => Fixity a b sig -> [Parser sig] -> Op a b sops Fixity a b sig fixity [Parser sig] ps = forall (rep :: Type -> Type) a b sig. GOps rep => Fixity a b sig -> [Parser sig] -> rep (a -> b) -> Op a b gops Fixity a b sig fixity [Parser sig] ps (forall a. a -> Code a -> WQ a WQ forall sub sup. Subtype sub sup => sub -> sup upcast [||upcast||]) {- $levels-doc Independently, `Op`s are meaningless: they must be combined together to form a table to be useful. The `Prec` datatype encodes the structure linking together each `Op` level in turn. The base case for the table is called `Atom`, which takes a parser for the root of the table: think numbers, variables, bracketed expressions, and the like. The `Level` constructor is used to add layers on top of the growing table. It's not designed to be used directly, which would be clunky, instead use the `(>+)` and `(+<)` operators. These operators are sugar for `Level`, but allow freedom over which way round the table should be expressed: use `(>+)` to build the table from strongest- to weakest-binding (with the atom at the front); use `(+<)` to build the table from weakest- to strongest-binding (with the atom at the end). The direction the table should be built in is purely stylistic. -} {-| A heterogeneous list that represents a precedence table so that @Prec a@ produces values of type @a@. @since 2.0.0.0 -} data Prec a where -- | A Level of the table, containing the sub-level and the operators. See `(>+)` and `(+<)`. Level :: Prec a -> Op a b -> Prec b -- | The terminal atom in the table. Atom :: Parser a -> Prec a infixl 5 >+ {-| Sugar for the `Level` constructor, this operator - along with its sibling - is hungry and greedy: it eats the levels with the higher precedence: in @lvls >+ lvl@, @lvl@ is lower precedence than @lvls@. @since 2.0.0.0 -} (>+) :: Prec a -> Op a b -> Prec b >+ :: forall a b. Prec a -> Op a b -> Prec b (>+) = forall a b. Prec a -> Op a b -> Prec b Level infixr 5 +< {-| Sugar for the `Level` constructor, this operator - along with its sibling - is hungry and greedy: it eats the levels with the higher precedence: in @lvl +< lvls@, @lvl@ is lower precedence than @lvls@. @since 2.0.0.0 -} (+<) :: Op a b -> Prec a -> Prec b +< :: forall a b. Op a b -> Prec a -> Prec b (+<) = forall a b c. (a -> b -> c) -> b -> a -> c flip forall a b. Prec a -> Op a b -> Prec b (>+)