{-# LANGUAGE StandaloneDeriving #-} -- UUAGC 0.9.52.1 (src/GLL/Types/Abstract.ag) module GLL.Types.Grammar where import Data.Text -- | Identifier for nonterminals. type Nt = Text -- Prod --------------------------------------------------------- -- | -- A production binds a nonterminal identifier (left-hand side) to a list of symbols --(the right-hand side of the production). data Prod t = Prod (Nt) (Symbols t) -- Prods -------------------------------------------------------- -- | A list of 'Prod's. type Prods t = [Prod t] -- Grammar ----------------------------------------------------- -- | -- A grammar is a start symbol and a list of productions. type Grammar t = (Nt, Prods t) -- Slot -------------------------------------------------------- -- | -- A grammar slot acts as a label to identify progress of matching a production. -- As such, a slot is a "Prod" with its right-hand side split in two: -- a part before and a part after 'the dot'. -- The dot indicates which part of the right-hand side has been processed thus far. data Slot t = Slot (Nt) (([Symbol t])) (([Symbol t])) -- Symbol ------------------------------------------------------ -- | -- A 'Symbol' is either a nonterminal or a terminal, -- where a terminal contains some arbitrary token. data Symbol t = Nt Nt | Term t -- | Error (Token) (Token) -- Symbols ----------------------------------------------------- -- | -- A list of 'Symbol's type Symbols t = [Symbol t] -- Token ------------------------------------------------------- -- | -- A datatype for representing tokens with some builtins -- and an aribitrary Token constructor. -- This datatype stores (optional) lexemes. data Token = Char Char | Keyword String | EOS | Epsilon | IntLit (Maybe Int) | FloatLit (Maybe Double) | BoolLit (Maybe Bool) | StringLit (Maybe String) | CharLit (Maybe Char) | IDLit (Maybe String) -- | alternative identifiers, for example functions vs. constructors (as in Haskell). | AltIDLit (Maybe String) | Token String (Maybe String) -- Tokens ------------------------------------------------------ -- | -- A list of 'Token's type Tokens = [Token] -- | Class that captures elements of an input string (tokens). -- -- * 'eos' is the end-of-string symbol -- * 'eps' is the empty-string symbol -- -- Both 'eos' and 'eps' must be distinct from eachother and from all -- tokens in the input string. -- The show instance is required to throw error messages. class (Ord a, Eq a, Show a) => Parseable a where eos :: a eps :: a -- | This function is used for matching grammar tokens and input tokens. -- Override this method if, for example, your input tokens store lexemes -- while the grammar tokens do not matches :: a -> a -> Bool -- | This function pretty-prints the Parseable type by displaying its lexeme. -- Default implementation is 'show', which should be replaced for prettier error messages. unlex :: a -> String unlex = show -- | Class whose members are super-types of 'Token'. class SubsumesToken a where upcast :: Token -> a downcast :: a -> Maybe Token instance SubsumesToken Token where upcast = id downcast = Just deriving instance Ord Token deriving instance Eq Token instance Show Token where show (Char c) = "keychar('" ++ [c] ++ "')" show (Keyword s) = "keyword(\"" ++ s ++ "\")" show (EOS) = "" show (Epsilon) = "" show (IntLit (Just i)) = "int(" ++ show i ++ ")" show (IntLit _) = "" show (FloatLit (Just i)) = "float(" ++ show i ++ ")" show (FloatLit _) = "" show (BoolLit (Just b)) = "bool(" ++ show b ++ ")" show (BoolLit _) = "" show (StringLit (Just s)) = "string(\"" ++ s ++ "\")" show (StringLit _) = "" show (CharLit (Just c)) = "char('" ++ [c] ++ "')" show (CharLit Nothing) = "" show (AltIDLit (Just id)) = "altid(\"" ++ id ++ "\")" show (AltIDLit Nothing) = "" show (IDLit (Just id)) = "id(\"" ++ id ++ "\")" show (IDLit Nothing) = "" show (Token nm (Just s)) = nm ++ "(\"" ++ s ++ "\")" show (Token nm _) = "<" ++ nm ++ ">" instance Parseable Token where eos = EOS eps = Epsilon unlex = unlexToken Token k _ `matches` Token k' _ = k' == k Char c `matches` Char c' = c' == c Keyword k `matches` Keyword k' = k' == k EOS `matches` EOS = True Epsilon `matches` Epsilon = True StringLit _ `matches` StringLit _ = True CharLit _ `matches` CharLit _ = True IntLit _ `matches` IntLit _ = True FloatLit _ `matches` FloatLit _ = True BoolLit _ `matches` BoolLit _ = True AltIDLit _ `matches` AltIDLit _ = True IDLit _ `matches` IDLit _ = True _ `matches` _ = False -- | Pretty-prints a list of 'Token's as a concatenation of their lexemes. unlexTokens :: [Token] -> String unlexTokens = Prelude.concatMap unlexToken unlexToken :: Token -> String unlexToken t = case t of Char c -> [c] Keyword s -> s IntLit (Just i) -> show i BoolLit (Just b) -> show b StringLit (Just s) -> s CharLit (Just c) -> [c] AltIDLit (Just s) -> s IDLit (Just s) -> s Token _ (Just s) -> s _ -> "" -- some helpers isNt (Nt _) = True isNt _ = False isTerm (Term _) = True isTerm _ = False instance (Show t) => Show (Slot t) where show (Slot x alpha beta) = show x ++ " ::= " ++ showRhs alpha ++ "." ++ showRhs beta where showRhs [] = "" showRhs ((Term t):rhs) = show t ++ showRhs rhs showRhs ((Nt x):rhs) = show x ++ showRhs rhs instance (Show t) => Show (Symbol t) where show (Nt s) = unpack s show (Term t) = show t deriving instance (Ord t) => Ord (Slot t) deriving instance (Eq t) => Eq (Slot t) deriving instance (Show t) => Show (Prod t) deriving instance (Ord t) => Ord (Prod t) deriving instance (Eq t) => Eq (Prod t) deriving instance (Eq t) => Eq (Symbol t) deriving instance (Ord t) => Ord (Symbol t)