{-# LANGUAGE ScopedTypeVariables #-}
module Text.XML.Expat.Chunked (
  -- * Tree structure
  Node,
  Tree.NodeG(..),
  UNode,

  -- * Generic node manipulation
  module Text.XML.Expat.Internal.NodeClass,

  -- * Generic manipulation of the child list
  module Data.List.Class,

  -- * Qualified nodes
  QNode,
  module Text.XML.Expat.Internal.Qualified,

  -- * Namespaced nodes
  NNode,
  module Text.XML.Expat.Internal.Namespaced,

  -- * Parse to tree
  ParserOptions(..),
  defaultParserOptions,
  Encoding(..),
  Text.XML.Expat.Chunked.parse,
  XMLT(..),
  XMLParseError(..),
  XMLParseLocation(..)
  ) where

import Control.Monad.ListT
import qualified Text.XML.Expat.Internal.IO as IO
import Text.XML.Expat.Internal.Namespaced
import Text.XML.Expat.Internal.NodeClass
import Text.XML.Expat.Internal.Qualified
import Text.XML.Expat.SAX
import qualified Text.XML.Expat.Tree as Tree

import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import qualified Data.ByteString as B
import Data.IORef
import Data.Iteratee hiding (head, peek)
import Data.Iteratee.WrappedByteString
import Data.List.Class
import Data.Word


------ Types ------------------------------------------------------------------

-- | A tree representation that uses a monadic list as its child list type.
--
-- Note that you can use the type function 'ListOf' to give a list of
-- any node type, using that node's associated list type, e.g.
-- @ListOf (UNode Text)@
type Node m tag text = Tree.NodeG (ListT (XMLT m)) tag text

-- | Type alias for a single node with unqualified tag names where tag and
-- text are the same string type.
type UNode m text = Node m text text

-- | Type alias for a single annotated node where qualified names are used for tags
type QNode m a text = Node a (QName text) text

-- | Type alias for a single annotated node where namespaced names are used for tags
type NNode m text a = Node a (NName text) text

------ Queue ------------------------------------------------------------------

-- Mutable queue implemented as a linked list
data QueueCell a
    = Value a (QueueHead a)
    | End
    | Pending

newtype QueueHead a = QueueHead (IORef (QueueCell a))
newtype QueueTail a = QueueTail (IORef (IORef (QueueCell a)))

newQueue :: IO (QueueHead a, QueueTail a)
newQueue = do
    end <- newIORef Pending
    endRef <- newIORef end
    return (QueueHead end, QueueTail endRef)

peek :: QueueHead a -> IO (QueueCell a)
peek (QueueHead hRef) = readIORef hRef

push :: QueueTail a -> a -> IO ()
push (QueueTail qtRef) val = do
    tl <- readIORef qtRef
    newTl <- newIORef Pending
    writeIORef tl (Value val (QueueHead newTl))
    writeIORef qtRef newTl

pushEnd :: QueueTail a -> IO ()
pushEnd (QueueTail qtRef) = do
    tl <- readIORef qtRef
    writeIORef tl End


------ Handler ----------------------------------------------------------------

data Result m a = Yield (XMLT m a) | Result a | HandlerErr String

-- | The monad transformer used for writing your handler for chunked XML trees,
-- which executes as a co-routine.
data XMLT m a = XMLT {
        runXMLT :: m (Result m a)
    }

instance Monad m => Functor (XMLT m) where
    fmap = liftM

instance Monad m => Monad (XMLT m) where
    return a = XMLT $ return $ Result a
    f >>= g = XMLT $ do
        res1 <- runXMLT f
        case res1 of
            Yield c        -> return $ Yield (c >>= g)
            Result a       -> runXMLT (g a)
            HandlerErr err -> return $ HandlerErr err 
    fail err = XMLT $ return $ HandlerErr err

instance MonadTrans XMLT where
    lift m = XMLT $ do
        r <- m
        return $ Result r

instance MonadIO m => MonadIO (XMLT m) where
    liftIO m = XMLT $ do
        r <- liftIO m
        return $ Result r

yield :: Monad m => XMLT m ()
yield = XMLT $ return $ Yield (XMLT $ return $ Result ())

-------------------------------------------------------------------------------

-- | An iteratee that parses the input document, passing a representation of it
-- to the specified handler monad.  The monad runs lazily using co-routines, so
-- if it requests a part of the tree that hasn't been parsed yet, it will
-- be suspended, and continued when it's available.
--
-- This implementation does /not/ use Haskell's lazy I/O.
parse :: forall m a tag text . (
             MonadIO m,
             GenericXMLString tag,
             GenericXMLString text
         ) =>
         ParserOptions tag text
      -> (ListT (XMLT m) (Node m tag text) -> XMLT m a)
      -> m (IterateeG WrappedByteString Word8 m (Either ErrMsg a))
parse opts handler = do
    let enc = parserEncoding opts

    parser <- liftIO $ IO.newParser enc

    (rootHd, rootTl) <- liftIO $ newQueue
    stackRef <- liftIO $ newIORef [rootTl]

    liftIO $ IO.setStartElementHandler parser $ \_ cName cAttrs -> do
        name <- textFromCString cName
        attrs <- forM cAttrs $ \(cAttrName,cAttrValue) -> do
            attrName <- textFromCString cAttrName
            attrValue <- textFromCString cAttrValue
            return (attrName, attrValue)
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        (hd, tl) <- newQueue
        push (head stack) $ Tree.Element name attrs (ListT $ iter hd)
        writeIORef stackRef (tl:stack)
        return True
    liftIO $ IO.setEndElementHandler parser $ \_ _ -> do
        --name <- textFromCString cName
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        pushEnd (head stack)
        writeIORef stackRef (tail stack)
        return True
    liftIO $ IO.setCharacterDataHandler parser $ \_ cText -> do
        txt <- gxFromCStringLen cText
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        push (head stack) $ Tree.Text txt
        return True

    let nextIter :: XMLT m a
                 -> IterateeG WrappedByteString Word8 m (Either ErrMsg a)
        nextIter handler = IterateeG $ \str -> do
            case str of
                EOF (Just err) -> return $ Done (Left err) str
                _ -> do
                    mErr <- liftIO $ IO.withParser parser $ \pp -> case str of
                        Chunk (WrapBS blk) -> IO.parseChunk pp blk False
                        EOF _              -> IO.parseChunk pp B.empty True

                    res <- runXMLT handler

                    return $ case res of
                        Yield handler' -> do
                            case (str, mErr) of
                                (Chunk _, Just err) -> Cont (nextIter handler') (Just $ Err $ show err)
                                (Chunk _, Nothing)  -> Cont (nextIter handler') Nothing
                                (EOF _,   Just err) -> Done (Left $ Err $ show err) str
                                (EOF _,   Nothing)  -> Done (Left $ Err "EOF not handled") str
                        Result a -> do
                            case mErr of
                                Just err -> Done (Left $ Err $ show err) str
                                Nothing  -> Done (Right a) str
                        HandlerErr handlerErr -> do
                            case mErr of
                                Just parseErr -> Done (Left $ Err $ show parseErr)   str
                                Nothing       -> Done (Left $ Err handlerErr) str

    res <- runXMLT $ handler $ ListT $ iter rootHd
    return $ case res of
        Yield handler' -> nextIter handler'
        Result a       -> fail $ "handler returned before it demanded from the tree"
        HandlerErr err -> fail $ "handler error before it demanded from the tree: "++err
  where
    iter :: QueueHead (Node m tag text) -> XMLT m (ListItem (ListT (XMLT m)) (Node m tag text))
    iter hd = do
        cell <- liftIO $ peek hd
        case cell of
            Pending     -> yield >> iter hd
            End         -> return $ Nil
            Value a hd' -> return $ Cons a (ListT $ iter hd')