{-# LANGUAGE ScopedTypeVariables, Rank2Types #-}
module Text.XML.Expat.Chunked (
  -- * Tree structure
  Node,
  Tree.NodeG(..),
  UNode,

  -- * Generic node manipulation
  module Text.XML.Expat.Internal.NodeClass,

  -- * Generic manipulation of the child list
  module Data.List.Class,

  -- * Qualified nodes
  QNode,
  module Text.XML.Expat.Internal.Qualified,

  -- * Namespaced nodes
  NNode,
  module Text.XML.Expat.Internal.Namespaced,

  -- * Parse to tree
  ParseOptions(..),
  defaultParseOptions,
  Encoding(..),
  Text.XML.Expat.Chunked.parse,
  XMLT,
  XMLParseError(..),
  XMLParseLocation(..)
  ) where

import Control.Monad.ListT
import qualified Text.XML.Expat.Internal.IO as IO
import Text.XML.Expat.Internal.Namespaced
import Text.XML.Expat.Internal.NodeClass
import Text.XML.Expat.Internal.Qualified
import Text.XML.Expat.SAX
import qualified Text.XML.Expat.Tree as Tree

import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import qualified Data.ByteString as B
import Data.IORef
import Data.Iteratee hiding (head, peek)
import Data.Iteratee.WrappedByteString
import Data.List.Class
import Data.Word


------ Types ------------------------------------------------------------------

-- | A tree representation that uses a monadic list as its child list type.
--
-- Note that you can use the type function 'ListOf' to give a list of
-- any node type, using that node's associated list type, e.g.
-- @ListOf (UNode Text)@
--
-- The /s/ parameter is a dummy type used to prevent nodes escaping from the
-- handler.  See 's' for more explanation.
type Node s m tag text = Tree.NodeG (ListT (XMLT s m)) tag text

-- | Type alias for a single node with unqualified tag names where tag and
-- text are the same string type.
--
-- The /s/ parameter is a dummy type used to prevent nodes escaping from the
-- handler.  See 's' for more explanation.
type UNode s m text = Node s m text text

-- | Type alias for a single annotated node where qualified names are used for tags
--
-- The /s/ parameter is a dummy type used to prevent nodes escaping from the
-- handler.  See 's' for more explanation.
type QNode s m a text = Node s a (QName text) text

-- | Type alias for a single annotated node where namespaced names are used for tags
--
-- The /s/ parameter is a dummy type used to prevent nodes escaping from the
-- handler.  See 's' for more explanation.
type NNode s m text a = Node s a (NName text) text

------ Queue ------------------------------------------------------------------

-- Mutable queue implemented as a linked list
data QueueCell a
    = Value a (QueueHead a)
    | End
    | Pending

newtype QueueHead a = QueueHead (IORef (QueueCell a))
newtype QueueTail a = QueueTail (IORef (IORef (QueueCell a)))

newQueue :: IO (QueueHead a, QueueTail a)
newQueue = do
    end <- newIORef Pending
    endRef <- newIORef end
    return (QueueHead end, QueueTail endRef)

peek :: QueueHead a -> IO (QueueCell a)
peek (QueueHead hRef) = readIORef hRef

push :: QueueTail a -> a -> IO ()
push (QueueTail qtRef) val = do
    tl <- readIORef qtRef
    newTl <- newIORef Pending
    writeIORef tl (Value val (QueueHead newTl))
    writeIORef qtRef newTl

pushEnd :: QueueTail a -> IO ()
pushEnd (QueueTail qtRef) = do
    tl <- readIORef qtRef
    writeIORef tl End


------ Handler ----------------------------------------------------------------

data Result s m a = Yield (XMLT s m a) | Result a | HandlerErr String

-- | The monad transformer used for writing your handler for chunked XML trees,
-- which executes as a co-routine.
data XMLT s m a = XMLT {
        runXMLT :: m (Result s m a)
    }

instance Monad m => Functor (XMLT s m) where
    fmap = liftM

instance Monad m => Monad (XMLT s m) where
    return a = XMLT $ return $ Result a
    f >>= g = XMLT $ do
        res1 <- runXMLT f
        case res1 of
            Yield c        -> return $ Yield (c >>= g)
            Result a       -> runXMLT (g a)
            HandlerErr err -> return $ HandlerErr err 
    fail err = XMLT $ return $ HandlerErr err

instance MonadTrans (XMLT s) where
    lift m = XMLT $ do
        r <- m
        return $ Result r

instance MonadIO m => MonadIO (XMLT s m) where
    liftIO m = XMLT $ do
        r <- liftIO m
        return $ Result r

yield :: Monad m => XMLT s m ()
yield = XMLT $ return $ Yield (XMLT $ return $ Result ())

-------------------------------------------------------------------------------

-- | An iteratee that parses the input document, passing a representation of it
-- to the specified handler monad.  The monad runs lazily using co-routines, so
-- if it requests a part of the tree that hasn't been parsed yet, it will
-- be suspended, and continued when it's available.
--
-- This implementation does /not/ use Haskell's lazy I/O.
--
-- The /s/ type argument is a dummy type, which you should just leave polymorphic
-- by typing /s/ when using the type.  The \"forall s .\" in the type signature
-- prevents any parsed nodes escaping from the handler, because they may refer
-- to parts of the tree that haven't been parsed yet, and this parsing can't
-- take happen outside the handler.  If you need to extract nodes from your
-- handler, use a function like 'fromNodeContainer' to convert the container type.
parse :: forall m a tag text . (
             MonadIO m,
             GenericXMLString tag,
             GenericXMLString text
         ) =>
         ParseOptions tag text
      -> (forall s . ListT (XMLT s m) (Node s m tag text) -> XMLT s m a)   -- ^ Handler for parsed tree
      -> m (IterateeG WrappedByteString Word8 m (Either ErrMsg a))
parse opts handler = do
    let enc = defaultEncoding opts

    parser <- liftIO $ IO.newParser enc

    (rootHd, rootTl) <- liftIO $ newQueue
    stackRef <- liftIO $ newIORef [rootTl]

    liftIO $ IO.setStartElementHandler parser $ \_ cName cAttrs -> do
        name <- textFromCString cName
        attrs <- forM cAttrs $ \(cAttrName,cAttrValue) -> do
            attrName <- textFromCString cAttrName
            attrValue <- textFromCString cAttrValue
            return (attrName, attrValue)
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        (hd, tl) <- newQueue
        push (head stack) $ Tree.Element name attrs (ListT $ iter hd)
        case stack of   -- If this is the first startElement of the document,
                        -- then end the root queue now.  XML documents only have
                        -- one root tag.
            [_] -> pushEnd rootTl
            _   -> return ()
        writeIORef stackRef (tl:stack)
        return True
    liftIO $ IO.setEndElementHandler parser $ \_ _ -> do
        --name <- textFromCString cName
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        pushEnd (head stack)
        writeIORef stackRef (tail stack)
        return True
    liftIO $ IO.setCharacterDataHandler parser $ \_ cText -> do
        txt <- gxFromCStringLen cText
        --loc <- getParseLocation parser
        stack <- readIORef stackRef
        push (head stack) $ Tree.Text txt
        return True

    let nextIter :: XMLT s m a
                 -> IterateeG WrappedByteString Word8 m (Either ErrMsg a)
        nextIter handler = IterateeG $ \str -> do
            case str of
                EOF (Just err) -> return $ Done (Left err) str
                _ -> do
                    mErr <- liftIO $ IO.withParser parser $ \pp -> case str of
                        Chunk (WrapBS blk) -> IO.parseChunk pp blk False
                        EOF _              -> IO.parseChunk pp B.empty True

                    res <- runXMLT handler

                    return $ case res of
                        Yield handler' -> do
                            case (str, mErr) of
                                (Chunk _, Just err) -> Cont (nextIter handler') (Just $ Err $ show err)
                                (Chunk _, Nothing)  -> Cont (nextIter handler') Nothing
                                (EOF _,   Just err) -> Done (Left $ Err $ show err) str
                                (EOF _,   Nothing)  -> Done (Left $ Err "EOF not handled") str
                        Result a -> do
                            case mErr of
                                Just err -> Done (Left $ Err $ show err) str
                                Nothing  -> Done (Right a) str
                        HandlerErr handlerErr -> do
                            case mErr of
                                Just parseErr -> Done (Left $ Err $ show parseErr)   str
                                Nothing       -> Done (Left $ Err handlerErr) str

    res <- runXMLT $ handler $ ListT $ iter rootHd
    return $ case res of
        Yield handler' -> nextIter handler'
        Result a       -> fail $ "handler returned before it demanded from the tree"
        HandlerErr err -> fail $ "handler error before it demanded from the tree: "++err
  where
    iter :: QueueHead (Node s m tag text) -> XMLT s m (ListItem (ListT (XMLT s m)) (Node s m tag text))
    iter hd = do
        cell <- liftIO $ peek hd
        case cell of
            Pending     -> yield >> iter hd
            End         -> return $ Nil
            Value a hd' -> return $ Cons a (ListT $ iter hd')