{-# LANGUAGE ScopedTypeVariables #-}
module Text.XML.Expat.Chunked (
        -- * Tree structure
        CNode,
        NodeG(..),

        -- * Generic node manipulation
        module Text.XML.Expat.NodeClass,

        -- * Generic manipulation of the child list
        module Data.List.Class,

        -- * Parse to tree
        ParserOptions(..),
        defaultParserOptions,
        Encoding(..),
        Text.XML.Expat.Chunked.parse,
        HandlerT(..),
        XMLParseError(..),
        XMLParseLocation(..)
    ) where

import Control.Monad.ListT
import qualified Text.XML.Expat.IO as IO
import Text.XML.Expat.NodeClass
import Text.XML.Expat.SAX
import Text.XML.Expat.Tree

import Control.Monad
import Control.Monad.Trans
import qualified Data.ByteString as B
import Data.IORef
import Data.Iteratee hiding (head, peek)
import Data.Iteratee.WrappedByteString
import Data.List.Class
import Data.Word


------ Types ------------------------------------------------------------------

-- | A tree representation that uses a monadic list as its child list type.
type CNode m tag text = NodeG (ListT (HandlerT m)) tag text


------ Queue ------------------------------------------------------------------

-- Mutable queue implemented as a linked list
data QueueCell a
    = Value a (QueueHead a)
    | End
    | Pending

newtype QueueHead a = QueueHead (IORef (QueueCell a))
newtype QueueTail a = QueueTail (IORef (IORef (QueueCell a)))

newQueue :: IO (QueueHead a, QueueTail a)
newQueue = do
    end <- newIORef Pending
    endRef <- newIORef end
    return (QueueHead end, QueueTail endRef)

peek :: QueueHead a -> IO (QueueCell a)
peek (QueueHead hRef) = readIORef hRef

push :: QueueTail a -> a -> IO ()
push (QueueTail qtRef) val = do
    tl <- readIORef qtRef
    newTl <- newIORef Pending
    writeIORef tl (Value val (QueueHead newTl))
    writeIORef qtRef newTl

pushEnd :: QueueTail a -> IO ()
pushEnd (QueueTail qtRef) = do
    tl <- readIORef qtRef
    writeIORef tl End


------ Handler ----------------------------------------------------------------

data Result m a = Yield (HandlerT m a) | Result a | HandlerErr String

data HandlerT m a = HandlerT {
        runHandlerT :: m (Result m a)
    }

instance Monad m => Functor (HandlerT m) where
    fmap = liftM

instance Monad m => Monad (HandlerT m) where
    return a = HandlerT $ return $ Result a
    f >>= g = HandlerT $ do
        res1 <- runHandlerT f
        case res1 of
            Yield c        -> return $ Yield (c >>= g)
            Result a       -> runHandlerT (g a)
            HandlerErr err -> return $ HandlerErr err 
    fail err = HandlerT $ return $ HandlerErr err

instance MonadTrans HandlerT where
    lift m = HandlerT $ do
        r <- m
        return $ Result r

instance MonadIO m => MonadIO (HandlerT m) where
    liftIO m = HandlerT $ do
        r <- liftIO m
        return $ Result r

yield :: Monad m => HandlerT m ()
yield = HandlerT $ return $ Yield (HandlerT $ return $ Result ())

-------------------------------------------------------------------------------

-- | An iteratee that parses the input document, passing a representation of it
-- to the specified handler monad.  The monad runs lazily using co-routines, so
-- if it requests a part of the tree that hasn't been parsed yet, it will
-- be suspended, and continued when it's available.
--
-- This implementation does /not/ use Haskell's lazy I/O.
parse :: forall m a tag text . (
             MonadIO m,
             GenericXMLString tag,
             GenericXMLString text
         ) =>
         ParserOptions tag text
      -> (CNode m tag text -> HandlerT m a)
      -> IterateeG WrappedByteString Word8 m (Either ErrMsg a)
parse opts code = IterateeG $ \str -> do
    let enc = parserEncoding opts

    parser <- liftIO $ IO.newParser enc

    (rootHd, rootTl) <- liftIO $ newQueue
    stackRef <- liftIO $ newIORef [rootTl]

    liftIO $ IO.setStartElementHandler parser $ \_ cName cAttrs -> do
        name <- textFromCString cName
        attrs <- forM cAttrs $ \(cAttrName,cAttrValue) -> do
            attrName <- textFromCString cAttrName
            attrValue <- textFromCString cAttrValue
            return (attrName, attrValue)
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        (hd, tl) <- newQueue
        push (head stack) $ Element name attrs (ListT $ iter hd)
        writeIORef stackRef (tl:stack)
        return True
    liftIO $ IO.setEndElementHandler parser $ \_ _ -> do
        --name <- textFromCString cName
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        pushEnd (head stack)
        writeIORef stackRef (tail stack)
        return True
    liftIO $ IO.setCharacterDataHandler parser $ \_ cText -> do
        txt <- gxFromCStringLen cText
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        push (head stack) $ Text txt
        return True

    let nextIter :: HandlerT m a
                 -> IterateeG WrappedByteString Word8 m (Either ErrMsg a)
        nextIter c = IterateeG $ \str -> do
            case str of
                EOF (Just err) -> return $ Done (Left err) str
                _ -> do
                    mErr <- liftIO $ IO.withParser parser $ \pp -> case str of
                        Chunk (WrapBS blk) -> IO.parseChunk pp blk False
                        EOF _              -> IO.parseChunk pp B.empty True

                    res <- runHandlerT c

                    return $ case res of
                        Yield c' -> do
                            case (str, mErr) of
                                (Chunk _, Just err) -> Cont (nextIter c') (Just $ Err $ show err)
                                (Chunk _, Nothing)  -> Cont (nextIter c') Nothing
                                (EOF _,   Just err) -> Done (Left $ Err $ show err) str
                                (EOF _,   Nothing)  -> Done (Left $ Err "EOF not handled") str
                        Result a -> do
                            case mErr of
                                Just err -> Done (Left $ Err $ show err) str
                                Nothing  -> Done (Right a) str
                        HandlerErr handlerErr -> do
                            case mErr of
                                Just parseErr -> Done (Left $ Err $ show parseErr)   str
                                Nothing       -> Done (Left $ Err $ show handlerErr) str

    let process = do
            elt <- iter rootHd
            case elt of
                Nil         -> fail "no root node"
                Cons node _ -> code node
    runIter (nextIter process) str

  where
    iter :: QueueHead (CNode m tag text) -> HandlerT m (ListItem (ListT (HandlerT m)) (CNode m tag text))
    iter hd = do
        cell <- liftIO $ peek hd
        case cell of
            Pending     -> yield >> iter hd
            End         -> return $ Nil
            Value a hd' -> return $ Cons a (ListT $ iter hd')