{-# LANGUAGE ScopedTypeVariables, Rank2Types #-} module Text.XML.Expat.Chunked ( -- * Tree structure Node, Tree.NodeG(..), UNode, -- * Generic node manipulation module Text.XML.Expat.Internal.NodeClass, -- * Generic manipulation of the child list module Data.List.Class, -- * Qualified nodes QNode, module Text.XML.Expat.Internal.Qualified, -- * Namespaced nodes NNode, module Text.XML.Expat.Internal.Namespaced, -- * Parse to tree ParseOptions(..), defaultParseOptions, Encoding(..), Text.XML.Expat.Chunked.parse, XMLT, XMLParseError(..), XMLParseLocation(..) ) where import Control.Monad.ListT import qualified Text.XML.Expat.Internal.IO as IO import Text.XML.Expat.Internal.Namespaced import Text.XML.Expat.Internal.NodeClass import Text.XML.Expat.Internal.Qualified import Text.XML.Expat.SAX import qualified Text.XML.Expat.Tree as Tree import Control.Monad import Control.Monad.IO.Class import Control.Monad.Trans.Class import qualified Data.ByteString as B import Data.IORef import Data.Iteratee hiding (head, peek) import Data.Iteratee.WrappedByteString import Data.List.Class import Data.Word ------ Types ------------------------------------------------------------------ -- | A tree representation that uses a monadic list as its child list type. -- -- Note that you can use the type function 'ListOf' to give a list of -- any node type, using that node's associated list type, e.g. -- @ListOf (UNode Text)@ -- -- The /s/ parameter is a dummy type used to prevent nodes escaping from the -- handler. See 's' for more explanation. type Node s m tag text = Tree.NodeG (ListT (XMLT s m)) tag text -- | Type alias for a single node with unqualified tag names where tag and -- text are the same string type. -- -- The /s/ parameter is a dummy type used to prevent nodes escaping from the -- handler. See 's' for more explanation. type UNode s m text = Node s m text text -- | Type alias for a single annotated node where qualified names are used for tags -- -- The /s/ parameter is a dummy type used to prevent nodes escaping from the -- handler. See 's' for more explanation. type QNode s m a text = Node s a (QName text) text -- | Type alias for a single annotated node where namespaced names are used for tags -- -- The /s/ parameter is a dummy type used to prevent nodes escaping from the -- handler. See 's' for more explanation. type NNode s m text a = Node s a (NName text) text ------ Queue ------------------------------------------------------------------ -- Mutable queue implemented as a linked list data QueueCell a = Value a (QueueHead a) | End | Pending newtype QueueHead a = QueueHead (IORef (QueueCell a)) newtype QueueTail a = QueueTail (IORef (IORef (QueueCell a))) newQueue :: IO (QueueHead a, QueueTail a) newQueue = do end <- newIORef Pending endRef <- newIORef end return (QueueHead end, QueueTail endRef) peek :: QueueHead a -> IO (QueueCell a) peek (QueueHead hRef) = readIORef hRef push :: QueueTail a -> a -> IO () push (QueueTail qtRef) val = do tl <- readIORef qtRef newTl <- newIORef Pending writeIORef tl (Value val (QueueHead newTl)) writeIORef qtRef newTl pushEnd :: QueueTail a -> IO () pushEnd (QueueTail qtRef) = do tl <- readIORef qtRef writeIORef tl End ------ Handler ---------------------------------------------------------------- data Result s m a = Yield (XMLT s m a) | Result a | HandlerErr String -- | The monad transformer used for writing your handler for chunked XML trees, -- which executes as a co-routine. data XMLT s m a = XMLT { runXMLT :: m (Result s m a) } instance Monad m => Functor (XMLT s m) where fmap = liftM instance Monad m => Monad (XMLT s m) where return a = XMLT $ return $ Result a f >>= g = XMLT $ do res1 <- runXMLT f case res1 of Yield c -> return $ Yield (c >>= g) Result a -> runXMLT (g a) HandlerErr err -> return $ HandlerErr err fail err = XMLT $ return $ HandlerErr err instance MonadTrans (XMLT s) where lift m = XMLT $ do r <- m return $ Result r instance MonadIO m => MonadIO (XMLT s m) where liftIO m = XMLT $ do r <- liftIO m return $ Result r yield :: Monad m => XMLT s m () yield = XMLT $ return $ Yield (XMLT $ return $ Result ()) ------------------------------------------------------------------------------- -- | An iteratee that parses the input document, passing a representation of it -- to the specified handler monad. The monad runs lazily using co-routines, so -- if it requests a part of the tree that hasn't been parsed yet, it will -- be suspended, and continued when it's available. -- -- This implementation does /not/ use Haskell's lazy I/O. -- -- The /s/ type argument is a dummy type, which you should just leave polymorphic -- by typing /s/ when using the type. The \"forall s .\" in the type signature -- prevents any parsed nodes escaping from the handler, because they may refer -- to parts of the tree that haven't been parsed yet, and this parsing can't -- take happen outside the handler. If you need to extract nodes from your -- handler, use a function like 'fromNodeContainer' to convert the container type. parse :: forall m a tag text . ( MonadIO m, GenericXMLString tag, GenericXMLString text ) => ParseOptions tag text -> (forall s . ListT (XMLT s m) (Node s m tag text) -> XMLT s m a) -- ^ Handler for parsed tree -> m (IterateeG WrappedByteString Word8 m (Either ErrMsg a)) parse opts handler = do let enc = overrideEncoding opts parser <- liftIO $ IO.newParser enc (rootHd, rootTl) <- liftIO $ newQueue stackRef <- liftIO $ newIORef [rootTl] liftIO $ IO.setStartElementHandler parser $ \_ cName cAttrs -> do name <- textFromCString cName attrs <- forM cAttrs $ \(cAttrName,cAttrValue) -> do attrName <- textFromCString cAttrName attrValue <- textFromCString cAttrValue return (attrName, attrValue) --loc <- getParseLocation parser stack <- readIORef stackRef (hd, tl) <- newQueue push (head stack) $ Tree.Element name attrs (ListT $ iter hd) case stack of -- If this is the first startElement of the document, -- then end the root queue now. XML documents only have -- one root tag. [_] -> pushEnd rootTl _ -> return () writeIORef stackRef (tl:stack) return True liftIO $ IO.setEndElementHandler parser $ \_ _ -> do --name <- textFromCString cName --loc <- getParseLocation parser stack <- readIORef stackRef pushEnd (head stack) writeIORef stackRef (tail stack) return True liftIO $ IO.setCharacterDataHandler parser $ \_ cText -> do txt <- gxFromCStringLen cText --loc <- getParseLocation parser stack <- readIORef stackRef push (head stack) $ Tree.Text txt return True let nextIter :: XMLT s m a -> IterateeG WrappedByteString Word8 m (Either ErrMsg a) nextIter handler = IterateeG $ \str -> do case str of EOF (Just err) -> return $ Done (Left err) str _ -> do mErr <- liftIO $ IO.withParser parser $ \pp -> case str of Chunk (WrapBS blk) -> IO.parseChunk pp blk False EOF _ -> IO.parseChunk pp B.empty True res <- runXMLT handler return $ case res of Yield handler' -> do case (str, mErr) of (Chunk _, Just err) -> Cont (nextIter handler') (Just $ Err $ show err) (Chunk _, Nothing) -> Cont (nextIter handler') Nothing (EOF _, Just err) -> Done (Left $ Err $ show err) str (EOF _, Nothing) -> Done (Left $ Err "EOF not handled") str Result a -> do case mErr of Just err -> Done (Left $ Err $ show err) str Nothing -> Done (Right a) str HandlerErr handlerErr -> do case mErr of Just parseErr -> Done (Left $ Err $ show parseErr) str Nothing -> Done (Left $ Err handlerErr) str res <- runXMLT $ handler $ ListT $ iter rootHd return $ case res of Yield handler' -> nextIter handler' Result a -> fail $ "handler returned before it demanded from the tree" HandlerErr err -> fail $ "handler error before it demanded from the tree: "++err where iter :: QueueHead (Node s m tag text) -> XMLT s m (ListItem (ListT (XMLT s m)) (Node s m tag text)) iter hd = do cell <- liftIO $ peek hd case cell of Pending -> yield >> iter hd End -> return $ Nil Value a hd' -> return $ Cons a (ListT $ iter hd')