-- | Context-free grammars. {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE ScopedTypeVariables #-} module Data.Cfg.Cfg( -- * Class Cfg(..), -- * Vocabulary V(..), Vs, isNT, isT, bimapV, bimapVs, vocabulary, usedVocabulary, undeclaredVocabulary, isFullyDeclared, -- * Productions Production, productions, -- * Utility functions eqCfg {- , compareCfg -}) where import Control.Monad(liftM4) import Control.Monad.Reader(ask) import Data.Cfg.CPretty import Data.Data(Data, Typeable) import qualified Data.Set as S import Text.PrettyPrint ------------------------------------------------------------ -- | Represents a context-free grammar with its nonterminal and -- terminal types. class Cfg cfg t nt where nonterminals :: cfg t nt -> S.Set nt -- ^ the nonterminals of the grammar terminals :: cfg t nt -> S.Set t -- ^ the terminals of the grammar productionRules :: cfg t nt -> nt -> S.Set (Vs t nt) -- ^ the productions of the grammar startSymbol :: cfg t nt -> nt -- ^ the start symbol of the grammar; must be an element of -- 'nonterminals' 'cfg' instance (Cfg cfg t nt) => CPretty (cfg t nt) (V t nt -> Doc) where cpretty cfg = liftM4 vcat' ss ts nts prods where vcat' a b c d = vcat [a, b, c, d] ss = do prettyV <- ask return (text "Start symbol:" <+> prettyV (NT $ startSymbol cfg)) ts = do prettyV <- ask return (text "Terminals:" <+> fsep (punctuate comma $ map (prettyV . T) (S.toList $ terminals cfg))) nts = do prettyV <- ask return (text "Nonterminals:" <+> fsep (punctuate comma $ map (prettyV . NT) (S.toList $ nonterminals cfg))) prods = do prettyV <- ask return (text "Productions:" $$ nest 4 (vcat (map (prettyProd prettyV) (zip [1..] $ productions cfg)))) where prettyProd pv (n, (hd, rhs)) = hsep [parens (int n), pv (NT hd), text "::=", rhs' <> text "."] where rhs' = hsep $ map pv rhs ------------------------------------------------------------ ------------------------------------------------------------ -- | Vocabulary symbols of the grammar. data V t nt = T t -- ^ a terminal | NT nt -- ^ a nonterminal deriving (Eq, Ord, Show, Data, Typeable) -- | Returns 'True' iff the vocabularly symbols is a terminal. isT :: V t nt -> Bool isT (T _) = True isT _ = False -- | Returns 'True' iff the vocabularly symbols is a nonterminal. isNT :: V t nt -> Bool isNT (NT _) = True isNT _ = False instance Functor (V t) where fmap _f (T t) = T t fmap f (NT nt) = NT $ f nt -- | Maps over the terminal and nonterminal symbols in a 'V'. bimapV :: (t -> t') -> (nt -> nt') -> V t nt -> V t' nt' bimapV f _g (T t) = T $ f t bimapV _f g (NT nt) = NT $ g nt -- | Returns the vocabulary symbols of the grammar: elements of -- 'terminals' and 'nonterminals'. vocabulary :: (Cfg cfg t nt, Ord nt, Ord t) => cfg t nt -> S.Set (V t nt) vocabulary cfg = S.map T (terminals cfg) `S.union` S.map NT (nonterminals cfg) -- | Synonym for lists of vocabulary symbols. type Vs t nt = [V t nt] -- | Maps over the terminal and nonterminal symbols in a list of 'V's. bimapVs :: (t -> t') -> (nt -> nt') -> Vs t nt -> Vs t' nt' bimapVs f g = map (bimapV f g) -- | Productions over vocabulary symbols type Production t nt = (nt, Vs t nt) -- | Returns the productions of the grammar. productions :: (Cfg cfg t nt) => cfg t nt -> [Production t nt] productions cfg = do nt <- S.toList $ nonterminals cfg vs <- S.toList $ productionRules cfg nt return (nt, vs) -- | Returns 'True' iff the two inhabitants of 'Cfg' are equal. eqCfg :: forall cfg cfg' t nt . (Cfg cfg t nt, Cfg cfg' t nt, Eq nt, Eq t) => cfg t nt -> cfg' t nt -> Bool eqCfg cfg cfg' = to4Tuple cfg == to4Tuple cfg' {------------------------------------------------------------ -- | Compares the two inhabitants of 'Cfg'. compareCfg :: forall cfg cfg' t nt . (Cfg cfg t nt, Cfg cfg' t nt, Ord nt, Ord t) => cfg t nt -> cfg' t nt -> Ordering compareCfg cfg cfg' = compare (to4Tuple cfg) (to4Tuple cfg') ------------------------------------------------------------} -- | Converts the 'Cfg' to a 4-tuple that inhabits both 'Eq' and 'Ord' -- if 't' and 'nt' do. to4Tuple :: forall cfg t nt . (Cfg cfg t nt) => cfg t nt -> (nt, S.Set nt, S.Set t, [Production t nt]) -- We move the start symbol first to optimize the operations -- since it's most likely to differ. to4Tuple cfg = ( startSymbol cfg, nonterminals cfg, terminals cfg, productions cfg) -- | Returns all vocabulary used in the productions plus the start -- symbol. usedVocabulary :: (Cfg cfg t nt, Ord nt, Ord t) => cfg t nt -> S.Set (V t nt) usedVocabulary cfg = S.fromList $ NT (startSymbol cfg) : concat [ NT nt : vs | (nt, vs) <- productions cfg] -- | Returns all vocabulary used in the productions plus the start -- symbol but not declared in 'nonterminals' or 'terminals'. undeclaredVocabulary :: (Cfg cfg t nt, Ord nt, Ord t) => cfg t nt -> S.Set (V t nt) undeclaredVocabulary cfg = usedVocabulary cfg S.\\ vocabulary cfg ------------------------------------------------------------ -- | Returns 'True' all the vocabulary used in the grammar is -- declared. isFullyDeclared :: (Cfg cfg t nt, Ord nt, Ord t) => cfg t nt -> Bool isFullyDeclared = S.null . undeclaredVocabulary