{-# OPTIONS  -fglasgow-exts #-}

{-| 
    Alternative approach of 'read' that composes grammars instead 
    of parsers. Grammars describing the data types are composed
    dynamically, removing possible left-recursion and combining
    common prefixes of alternatives.   

    The function 'gread' defined here is able to handle the 
    associativities defined for infix operators.

    The function 'gread' reads data in linear time, while the 
    function 'read' has an exponential behavior in some cases 
    of data types with infix operators.

    Non uniform data types are not supported, because they
    generate infinite grammars.

    The library is documented in the paper: /Haskell, do you read me?: constructing and composing efficient top-down parsers at runtime/

    Bibtex entry: <http://www.cs.uu.nl/wiki/bin/viewfile/Center/TTTAS?rev=1;filename=GRead.bib>

    For more documentation see the TTTAS webpage: <http://www.cs.uu.nl/wiki/bin/view/Center/TTTAS> .
 -}

module Text.GRead ( 
               -- * Grammar Description 
               module Text.GRead.Grammar, 

               -- * Reading Functions
               gread, 

               -- * Types
               GReadMsg, GReadResult(..) 

             ) where

import Language.AbstractSyntax.TTTAS
import Text.GRead.Grammar
import Text.GRead.Transformations.LeftFact
import Text.GRead.Transformations.LeftCorner
import Text.GRead.Transformations.Group
import UU.Parsing hiding (Symbol,parse,Ok)
import qualified UU.Parsing as UU
import List (findIndex)


-- | Type of error repair messages.
type GReadMsg = Message Token (Maybe Token)


-- | Type of 'gread' results.
data GReadResult a = Ok  a
                   | Rep a [GReadMsg] 
      deriving Show


-- | The 'gread' reads input from a string, which must be 
--   completely consumed by the input process.
--   Returns @Ok value@ on a successful parse.
--   Otherwise returns @Rep value msgs@, where the @value@
--   results of parsing a repaired input. The list @msgs@ 
--   contains the \"corrections\" done to the input.
--
--   For example, a 'read'-like implementation can be:
--
-- > read :: (Gram a) => String -> a
-- > read input = case gread input of
-- >                  Ok  a       -> a
-- >                  Rep _ (m:_) -> error $ show m


gread ::  (Gram a) => String -> GReadResult a
gread =   (  parse . compile 
          .  leftfactoring . leftcorner 
          .  group ) grammar



-- COMPILE --------------------------------------------------------------------

newtype Const f a s = C {unC :: f a}

compile :: forall a . Grammar a -> Parser Token a
compile (Grammar (start :: Ref a env) rules) 
                       = unC (lookupEnv start result)
  where  result  =  
          mapEnv 
          (\ (PS ps) -> C (foldr1 (<|>) [ comp p | p <- ps]))
          rules
         comp :: forall t . Prod t env -> Parser Token t
         comp (End x)   = pLow x 
         comp (Seq (Term t) ss) 
                       = (flip ($))  <$> pSym t <*> comp ss 
         comp (Seq (Nont n) ss) 
                       = (flip ($))  <$> unC (lookupEnv n result)
                                     <*> comp ss

mapEnv  ::  (forall a . f a s -> g a s)  
        ->  Env f s env -> Env g s env
mapEnv  _ Empty       = Empty
mapEnv  f (Ext r v)   = Ext (mapEnv f r) (f v)


instance Show Token where
    show (Keyw s) = s
    show Open     = "("
    show Close    = ")"

instance UU.Symbol Token where
  deleteCost _ = 5#


-- PARSE ----------------------------------------------------------------------

parse :: Parser Token a -> String -> GReadResult a
parse p input = case rparse p input of
                  (a,[]  ) -> Ok a
                  (a,msgs) -> Rep a msgs


rparse :: Parser Token a -> String -> (a, [GReadMsg])
rparse p input = let ((Pair a _),msgs) =  eval (UU.parse p (tokenize input))
                 in (a,msgs)
 where eval :: Steps a Token (Maybe Token) -> (a, [GReadMsg])
       eval (OkVal v        r) = let (a,msgs) = v `seq` (eval r) 
                                 in  (v a,msgs)
       eval (UU.Ok          r) = eval r
       eval (Cost  _        r) = eval  r
       eval (StRepair _ msg r) = let (v,msgs) = eval r 
                                 in  (v,msg:msgs)
       eval (Best _   r     _) = eval  r
       eval (NoMoreSteps v   ) = (v,[]) 

       tokenize []       = []
       tokenize ('(':xs) = Open  : (tokenize xs)
       tokenize (')':xs) = Close : (tokenize xs) 
       tokenize (' ':xs) = tokenize xs
       tokenize l        = case findIndex endTok l of
                            Just i -> let (ky,rs) = splitAt i l
                                      in  Keyw ky : (tokenize rs) 
                            Nothing -> [Keyw l] 
       
       endTok t = t == ')' || t == ' ' || t == '('