{-# OPTIONS_GHC -ddump-minimal-imports #-} {-# LANGUAGE DeriveDataTypeable, TypeSynonymInstances, FlexibleInstances, MultiParamTypeClasses #-} module Text.Roundtrip.Xml.Enumerator.Parser ( XmlParseIteratee, parseXml, parseXml', parseXml'' , XmlException(..) ) where import Control.Monad.State import Control.Exception import qualified Data.Enumerator as E import qualified Data.Enumerator.List as EL import qualified Data.Text as T import Data.XML.Types import Data.Typeable import qualified Debug.Trace import qualified Text.Parsec as P import Text.Roundtrip import Text.Roundtrip.Xml import Text.Roundtrip.Xml.ParserInternal type PStreamPos = Int type PStreamState = (PStreamPos, Maybe RtEventWithPos) -- last event read from stream with position type XmlParseIteratee m a = GenXmlParser PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) a data XmlException = ParseError P.ParseError | InvalidEntity T.Text P.SourcePos deriving (Typeable, Show) instance Exception XmlException instance Monad m => P.Stream PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) RtEventWithPos where uncons pos = do (lastPos, mLastTok) <- get case () of _| pos == lastPos -> do mx <- lift $ EL.head let pos' = pos + 1 put (pos', mx) ("returning " ++ show mx ++ " from underlying stream, position " ++ show pos') `debug` case mx of Just x -> return $ Just (x, pos') Nothing -> return Nothing | pos == lastPos - 1 -> ("returning " ++ show mLastTok ++ " from lookahead buffer, position " ++ show lastPos) `debug` case mLastTok of Just lastTok -> return $ Just (lastTok, lastPos) Nothing -> return Nothing | otherwise -> ("invalid lookahead, position request: " ++ show (pos + 1) ++ ", position of next token: " ++ show (lastPos + 1) ++ ", position of buffered token: " ++ show lastPos ++ " (" ++ show mLastTok ++ ")") `warn` return Nothing parseXml :: Monad m => P.SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee Event m a parseXml sourceName entityRenderer p = E.joinI $ EL.map eventWithoutPos E.$$ E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p parseXml' :: Monad m => EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a parseXml' entityRenderer p = do first <- E.peek let sourceName = case first of Just (WithPos _ pos) -> P.sourceName pos Nothing -> "" E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p parseXml'' :: Monad m => P.SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a parseXml'' sourceName entityRenderer p = E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p parseXml''' :: Monad m => P.SourceName -> XmlParseIteratee m a -> E.Iteratee RtEventWithPos m a parseXml''' sourceName p = let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc in do let startPos = -1 res <- evalStateT (P.runParserT q Nothing sourceName startPos) (startPos, Nothing) case res of Left err -> E.returnI (E.Error $ toException $ ParseError err) Right x -> E.yield x E.EOF simplify :: Monad m => (T.Text -> Maybe T.Text) -> E.Enumeratee EventWithPos RtEventWithPos m b simplify renderEntity = loop where loop = E.checkDone go go k = do x <- EL.head case x of Nothing -> k (E.Chunks []) E.>>== return Just (WithPos EventBeginDocument pos) -> k (E.Chunks [WithPos RtBeginDocument pos]) E.>>== loop Just (WithPos EventEndDocument pos) -> k (E.Chunks [WithPos RtEndDocument pos]) E.>>== loop Just (WithPos (EventInstruction{}) pos) -> go k Just (WithPos (EventBeginDoctype{}) pos) -> go k Just (WithPos (EventEndDoctype{}) pos) -> go k Just (WithPos (EventBeginElement n as) pos) -> let insertAttr m (k, vs) = do vs' <- mapM (contentToText pos) vs return $ (k, (T.concat vs')) : m in do as' <- foldM insertAttr [] as as' `seq` k (E.Chunks [WithPos (RtBeginElement n (reverse as')) pos]) E.>>== loop Just (WithPos (EventEndElement n) pos) -> k (E.Chunks [WithPos (RtEndElement n) pos]) E.>>== loop Just (WithPos (EventContent c) pos) -> do t <- contentToText pos c ts <- takeContents $ (:) t let text = T.strip $ T.concat $ ts [] if T.null text then go k else k (E.Chunks [WithPos (RtText text) pos]) E.>>== loop Just (WithPos (EventComment{}) _) -> go k contentToText pos c = case c of ContentEntity e -> case renderEntity e of Nothing -> E.throwError $ InvalidEntity e pos Just t -> return t ContentText t -> return t takeContents front = do do x <- E.peek case x of Nothing -> return front Just (WithPos EventBeginElement{} pos) -> return front Just (WithPos EventEndElement{} pos) -> return front Just (WithPos (EventContent c) pos) -> do EL.drop 1 t <- contentToText pos c takeContents $ front . (:) t Just (WithPos EventBeginDocument pos) -> return front Just (WithPos EventEndDocument pos) -> return front Just (WithPos EventInstruction{} pos) -> helper Just (WithPos EventBeginDoctype{} pos) -> helper Just (WithPos EventEndDoctype{} pos) -> helper Just (WithPos EventComment{} pos) -> helper where helper = EL.drop 1 >> takeContents front warn = Debug.Trace.trace -- debug = Debug.Trace.trace debug _ x = x