-- | Context-free grammars.
{-# LANGUAGE GADTs, RankNTypes #-}
module Text.Earley.Grammar
  ( Prod(..)
  , satisfy
  , (<?>)
  , Grammar(..)
  , rule
  ) where
import Control.Applicative
import Control.Monad
import Control.Monad.Fix

infixr 0 <?>

-- | A production.
--
-- The type parameters are:
--
-- @a@: The return type of the production.
--
-- @t@: The type of the terminals that the production operates on.
--
-- @e@: The type of names, used for example to report expected tokens.
--
-- @r@: The type of a non-terminal. This plays a role similar to the @s@ in the
--      type @ST s a@.  Since the 'parser' function expects the @r@ to be
--      universally quantified, there is not much to do with this parameter
--      other than leaving it universally quantified.
--
-- As an example, @'Prod' r 'String' 'Char' 'Int'@ is the type of a production that
-- returns an 'Int', operates on (lists of) characters and reports 'String'
-- names.
--
-- Most of the functionality of 'Prod's is obtained through its instances, e.g.
-- 'Functor', 'Applicative', and 'Alternative'.
data Prod r e t a where
  -- Applicative.
  Terminal    :: !(t -> Bool) -> !(Prod r e t (t -> b)) -> Prod r e t b
  NonTerminal :: !(r e t a) -> !(Prod r e t (a -> b)) -> Prod r e t b
  Pure        :: a -> Prod r e t a
  -- Monoid/Alternative. We have to special-case 'many' (though it can be done
  -- with rules) to be able to satisfy the Alternative interface.
  Plus        :: !(Prod r e t a) -> !(Prod r e t a) -> Prod r e t a
  Many        :: !(Prod r e t a) -> !(Prod r e t ([a] -> b)) -> Prod r e t b
  Empty       :: Prod r e t a
  -- Error reporting.
  Named       :: !(Prod r e t a) -> e -> Prod r e t a

-- | Match a token that satisfies the given predicate. Returns the matched token.
{-# INLINE satisfy #-}
satisfy :: (t -> Bool) -> Prod r e t t
satisfy p = Terminal p $ Pure id

-- | A named production (used for reporting expected things).
(<?>) :: Prod r e t a -> e -> Prod r e t a
(<?>) = Named

instance Monoid (Prod r e t a) where
  mempty  = empty
  mappend = (<|>)

instance Functor (Prod r e t) where
  {-# INLINE fmap #-}
  fmap f (Terminal b p)    = Terminal b $ fmap (f .) p
  fmap f (NonTerminal r p) = NonTerminal r $ fmap (f .) p
  fmap f (Pure x)          = Pure $ f x
  fmap f (Plus p q)        = Plus (fmap f p) (fmap f q)
  fmap f (Many p q)        = Many p $ fmap (f .) q
  fmap _ Empty             = Empty
  fmap f (Named p n)       = Named (fmap f p) n

instance Applicative (Prod r e t) where
  pure = Pure
  {-# INLINE (<*>) #-}
  Terminal b p    <*> q = Terminal b $ flip <$> p <*> q
  NonTerminal r p <*> q = NonTerminal r $ flip <$> p <*> q
  Pure f          <*> q = fmap f q
  Plus a b        <*> q = a <*> q <|> b <*> q
  Many a p        <*> q = Many a $ flip <$> p <*> q
  Empty           <*> _ = Empty
  Named p n       <*> q = Named (p <*> q) n

instance Alternative (Prod r e t) where
  empty = Empty
  Empty     <|> q         = q
  p         <|> Empty     = p
  Named p m <|> q         = Named (p <|> q) m
  p         <|> Named q n = Named (p <|> q) n
  p         <|> q         = Plus p q
  many Empty = pure []
  many p     = Many p $ Pure id
  some p     = (:) <$> p <*> many p

-- | A context-free grammar.
--
-- The type parameters are:
--
-- @a@: The return type of the grammar (often a 'Prod').
--
-- @e@: The type of names, used for example to report expected tokens.
--
-- @r@: The type of a non-terminal. This plays a role similar to the @s@ in the
--      type @ST s a@.  Since the 'parser' function expects the @r@ to be
--      universally quantified, there is not much to do with this parameter
--      other than leaving it universally quantified.
--
-- Most of the functionality of 'Grammar's is obtained through its instances,
-- e.g.  'Monad' and 'MonadFix'. Note that GHC has syntactic sugar for
-- 'MonadFix': use @{-\# LANGUAGE RecursiveDo \#-}@ and @mdo@ instead of
-- @do@.
data Grammar r e a where
  RuleBind :: Prod r e t a -> (Prod r e t a -> Grammar r e b) -> Grammar r e b
  FixBind  :: (a -> Grammar r e a) -> (a -> Grammar r e b) -> Grammar r e b
  Return   :: a -> Grammar r e a

instance Functor (Grammar r e) where
  fmap f (RuleBind ps h) = RuleBind ps (fmap f . h)
  fmap f (FixBind g h)   = FixBind g (fmap f . h)
  fmap f (Return x)      = Return $ f x

instance Applicative (Grammar r e) where
  pure  = return
  (<*>) = ap

instance Monad (Grammar r e) where
  return = Return
  RuleBind ps f >>= k = RuleBind ps (f >=> k)
  FixBind f g   >>= k = FixBind f (g >=> k)
  Return x      >>= k = k x

instance MonadFix (Grammar r e) where
  mfix f = FixBind f return

-- | Create a new non-terminal by giving its production.
rule :: Prod r e t a -> Grammar r e (Prod r e t a)
rule p = RuleBind p return