{-# LANGUAGE ScopedTypeVariables #-} module Text.XML.Expat.Chunked ( -- * Tree structure Node, Tree.NodeG(..), UNode, -- * Generic node manipulation module Text.XML.Expat.Internal.NodeClass, -- * Generic manipulation of the child list module Data.List.Class, -- * Qualified nodes QNode, module Text.XML.Expat.Internal.Qualified, -- * Namespaced nodes NNode, module Text.XML.Expat.Internal.Namespaced, -- * Parse to tree ParserOptions(..), defaultParserOptions, Encoding(..), Text.XML.Expat.Chunked.parse, XMLT(..), XMLParseError(..), XMLParseLocation(..) ) where import Control.Monad.ListT import qualified Text.XML.Expat.Internal.IO as IO import Text.XML.Expat.Internal.Namespaced import Text.XML.Expat.Internal.NodeClass import Text.XML.Expat.Internal.Qualified import Text.XML.Expat.SAX import qualified Text.XML.Expat.Tree as Tree import Control.Monad import Control.Monad.IO.Class import Control.Monad.Trans.Class import qualified Data.ByteString as B import Data.IORef import Data.Iteratee hiding (head, peek) import Data.Iteratee.WrappedByteString import Data.List.Class import Data.Word ------ Types ------------------------------------------------------------------ -- | A tree representation that uses a monadic list as its child list type. -- -- Note that you can use the type function 'ListOf' to give a list of -- any node type, using that node's associated list type, e.g. -- @ListOf (UNode Text)@ type Node m tag text = Tree.NodeG (ListT (XMLT m)) tag text -- | Type alias for a single node with unqualified tag names where tag and -- text are the same string type. type UNode m text = Node m text text -- | Type alias for a single annotated node where qualified names are used for tags type QNode m a text = Node a (QName text) text -- | Type alias for a single annotated node where namespaced names are used for tags type NNode m text a = Node a (NName text) text ------ Queue ------------------------------------------------------------------ -- Mutable queue implemented as a linked list data QueueCell a = Value a (QueueHead a) | End | Pending newtype QueueHead a = QueueHead (IORef (QueueCell a)) newtype QueueTail a = QueueTail (IORef (IORef (QueueCell a))) newQueue :: IO (QueueHead a, QueueTail a) newQueue = do end <- newIORef Pending endRef <- newIORef end return (QueueHead end, QueueTail endRef) peek :: QueueHead a -> IO (QueueCell a) peek (QueueHead hRef) = readIORef hRef push :: QueueTail a -> a -> IO () push (QueueTail qtRef) val = do tl <- readIORef qtRef newTl <- newIORef Pending writeIORef tl (Value val (QueueHead newTl)) writeIORef qtRef newTl pushEnd :: QueueTail a -> IO () pushEnd (QueueTail qtRef) = do tl <- readIORef qtRef writeIORef tl End ------ Handler ---------------------------------------------------------------- data Result m a = Yield (XMLT m a) | Result a | HandlerErr String -- | The monad transformer used for writing your handler for chunked XML trees, -- which executes as a co-routine. data XMLT m a = XMLT { runXMLT :: m (Result m a) } instance Monad m => Functor (XMLT m) where fmap = liftM instance Monad m => Monad (XMLT m) where return a = XMLT $ return $ Result a f >>= g = XMLT $ do res1 <- runXMLT f case res1 of Yield c -> return $ Yield (c >>= g) Result a -> runXMLT (g a) HandlerErr err -> return $ HandlerErr err fail err = XMLT $ return $ HandlerErr err instance MonadTrans XMLT where lift m = XMLT $ do r <- m return $ Result r instance MonadIO m => MonadIO (XMLT m) where liftIO m = XMLT $ do r <- liftIO m return $ Result r yield :: Monad m => XMLT m () yield = XMLT $ return $ Yield (XMLT $ return $ Result ()) ------------------------------------------------------------------------------- -- | An iteratee that parses the input document, passing a representation of it -- to the specified handler monad. The monad runs lazily using co-routines, so -- if it requests a part of the tree that hasn't been parsed yet, it will -- be suspended, and continued when it's available. -- -- This implementation does /not/ use Haskell's lazy I/O. parse :: forall m a tag text . ( MonadIO m, GenericXMLString tag, GenericXMLString text ) => ParserOptions tag text -> (ListT (XMLT m) (Node m tag text) -> XMLT m a) -> m (IterateeG WrappedByteString Word8 m (Either ErrMsg a)) parse opts handler = do let enc = parserEncoding opts parser <- liftIO $ IO.newParser enc (rootHd, rootTl) <- liftIO $ newQueue stackRef <- liftIO $ newIORef [rootTl] liftIO $ IO.setStartElementHandler parser $ \_ cName cAttrs -> do name <- textFromCString cName attrs <- forM cAttrs $ \(cAttrName,cAttrValue) -> do attrName <- textFromCString cAttrName attrValue <- textFromCString cAttrValue return (attrName, attrValue) --loc <- getParseLocation parser stack <- readIORef stackRef (hd, tl) <- newQueue push (head stack) $ Tree.Element name attrs (ListT $ iter hd) writeIORef stackRef (tl:stack) return True liftIO $ IO.setEndElementHandler parser $ \_ _ -> do --name <- textFromCString cName --loc <- getParseLocation parser stack <- readIORef stackRef pushEnd (head stack) writeIORef stackRef (tail stack) return True liftIO $ IO.setCharacterDataHandler parser $ \_ cText -> do txt <- gxFromCStringLen cText --loc <- getParseLocation parser stack <- readIORef stackRef push (head stack) $ Tree.Text txt return True let nextIter :: XMLT m a -> IterateeG WrappedByteString Word8 m (Either ErrMsg a) nextIter handler = IterateeG $ \str -> do case str of EOF (Just err) -> return $ Done (Left err) str _ -> do mErr <- liftIO $ IO.withParser parser $ \pp -> case str of Chunk (WrapBS blk) -> IO.parseChunk pp blk False EOF _ -> IO.parseChunk pp B.empty True res <- runXMLT handler return $ case res of Yield handler' -> do case (str, mErr) of (Chunk _, Just err) -> Cont (nextIter handler') (Just $ Err $ show err) (Chunk _, Nothing) -> Cont (nextIter handler') Nothing (EOF _, Just err) -> Done (Left $ Err $ show err) str (EOF _, Nothing) -> Done (Left $ Err "EOF not handled") str Result a -> do case mErr of Just err -> Done (Left $ Err $ show err) str Nothing -> Done (Right a) str HandlerErr handlerErr -> do case mErr of Just parseErr -> Done (Left $ Err $ show parseErr) str Nothing -> Done (Left $ Err handlerErr) str res <- runXMLT $ handler $ ListT $ iter rootHd return $ case res of Yield handler' -> nextIter handler' Result a -> fail $ "handler returned before it demanded from the tree" HandlerErr err -> fail $ "handler error before it demanded from the tree: "++err where iter :: QueueHead (Node m tag text) -> XMLT m (ListItem (ListT (XMLT m)) (Node m tag text)) iter hd = do cell <- liftIO $ peek hd case cell of Pending -> yield >> iter hd End -> return $ Nil Value a hd' -> return $ Cons a (ListT $ iter hd')