{-# OPTIONS_GHC -ddump-minimal-imports #-} {-# LANGUAGE DeriveDataTypeable, TypeSynonymInstances, FlexibleInstances, MultiParamTypeClasses #-} module Text.Roundtrip.Xml.Enumerator.Parser ( XmlParseIteratee, parseXml, parseXml', parseXml'' , XmlException(..) , parseXml_, XmlParseIteratee' ) where import Control.Monad.State import Control.Exception import qualified Data.Enumerator as E import qualified Data.Enumerator.List as EL import qualified Data.Text as T import Data.XML.Types import Data.Typeable import qualified Debug.Trace import Text.Roundtrip import Text.Roundtrip.Parser import Text.Roundtrip.Xml import Text.Roundtrip.Xml.ParserInternal import Data.Reference import Data.IORef data (Monad m, Reference r m) => Cursor r m a = Cursor (r (NextCursor r m a)) [a] data (Monad m, Reference r m) => NextCursor r m a = NextCursor (Cursor r m a) | None | Uneval liftIteratee :: Monad m => m b -> E.Iteratee a m b liftIteratee m = E.Iteratee (m >>= E.runIteratee . return) unconsStream :: (Monad m, Reference r m) => Cursor r m a -> E.Iteratee a m (Maybe (a, Cursor r m a)) unconsStream p@(Cursor r c) | null c = do x <- liftIteratee (readRef r) unconsCursor r x | otherwise = return $! unconsChunk p unconsCursor :: (Monad m, Reference r m) => r (NextCursor r m a) -> NextCursor r m a -> E.Iteratee a m (Maybe (a, Cursor r m a)) unconsCursor _ (NextCursor c) = return $! unconsChunk c unconsCursor _ None = return Nothing unconsCursor r Uneval = E.continue (extendCursor r) unconsChunk :: (Monad m, Reference r m) => Cursor r m a -> Maybe (a, Cursor r m a) unconsChunk (Cursor r c) = Just (head c, Cursor r (tail c)) extendCursor :: (Monad m, Reference r m) => r (NextCursor r m a) -> E.Stream a -> E.Iteratee a m (Maybe (a, Cursor r m a)) extendCursor r (E.Chunks l) | null l = E.continue (extendCursor r) | otherwise = do x <- liftIteratee (insertCursor r l) return $ unconsChunk x extendCursor r E.EOF = do liftIteratee (writeRef r None) return Nothing insertCursor :: (Monad m, Reference r m) => r (NextCursor r m a) -> [a] -> m (Cursor r m a) insertCursor r l = do r' <- newRef Uneval let c = Cursor r' l writeRef r (NextCursor c) return c mkCursor :: (Monad m, Reference r m) => m (Cursor r m a) mkCursor = do r <- newRef Uneval return (Cursor r []) instance (Monad m, Reference r m) => Stream (Cursor r m a) (E.Iteratee a m) a where uncons = unconsStream type XmlParseIteratee' r m a = GenXmlParser (Cursor r m RtEventWithPos) (E.Iteratee RtEventWithPos m) a concatCursor :: (Monad m, Reference r m) => Cursor r m a -> m [a] concatCursor (Cursor r l) = liftM (l ++) (readRef r >>= concatCursor') concatCursor' :: (Monad m, Reference r m) => NextCursor r m a -> m [a] concatCursor' (NextCursor n) = concatCursor n concatCursor' _ = return [] parseXml_''' :: (Reference r m, Monad m) => SourceName -> XmlParseIteratee' r m a -> E.Iteratee RtEventWithPos m a parseXml_''' sourceName p = let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc in do cursor <- liftIteratee mkCursor res <- runParserT (liftM2 (,) q getInput) Nothing sourceName cursor case res of Left err -> E.returnI (E.Error $ toException $ ParseError err) Right (x, restCursor) -> do rest <- liftIteratee $ concatCursor restCursor E.yield x (E.Chunks rest) parseXml_ :: (Reference r m, Monad m) => SourceName -> EntityRenderer -> XmlParseIteratee' r m a -> E.Iteratee Event m a parseXml_ sourceName entityRenderer p = E.joinI $ EL.map eventWithoutPos E.$$ E.joinI $ simplify entityRenderer E.$$ parseXml_''' sourceName p type PStreamPos = Int type PStreamState = (PStreamPos, Maybe RtEventWithPos) -- last event read from stream with position type XmlParseIteratee m a = GenXmlParser PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) a data XmlException = ParseError ParseError | InvalidEntity T.Text SourcePos deriving (Typeable, Show) instance Exception XmlException instance Monad m => Stream PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) RtEventWithPos where uncons pos = do (lastPos, mLastTok) <- get case () of _| pos == lastPos -> do mx <- lift $ EL.head let pos' = pos + 1 put (pos', mx) ("returning " ++ show mx ++ " from underlying stream, position " ++ show pos') `debug` case mx of Just x -> return $ Just (x, pos') Nothing -> return Nothing | pos == lastPos - 1 -> ("returning " ++ show mLastTok ++ " from lookahead buffer, position " ++ show lastPos) `debug` case mLastTok of Just lastTok -> return $ Just (lastTok, lastPos) Nothing -> return Nothing | otherwise -> ("invalid lookahead, position request: " ++ show (pos + 1) ++ ", position of next token: " ++ show (lastPos + 1) ++ ", position of buffered token: " ++ show lastPos ++ " (" ++ show mLastTok ++ ")") `warn` return Nothing parseXml :: Monad m => SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee Event m a parseXml sourceName entityRenderer p = E.joinI $ EL.map eventWithoutPos E.$$ E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p parseXml' :: Monad m => EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a parseXml' entityRenderer p = do first <- E.peek let src = case first of Just (WithPos _ pos) -> sourceName pos Nothing -> "" E.joinI $ simplify entityRenderer E.$$ parseXml''' src p parseXml'' :: Monad m => SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a parseXml'' sourceName entityRenderer p = E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p parseXml''' :: Monad m => SourceName -> XmlParseIteratee m a -> E.Iteratee RtEventWithPos m a parseXml''' sourceName p = let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc in do let startPos = -1 res <- evalStateT (runParserT q Nothing sourceName startPos) (startPos, Nothing) case res of Left err -> E.returnI (E.Error $ toException $ ParseError err) Right x -> E.yield x E.EOF simplify :: Monad m => (T.Text -> Maybe T.Text) -> E.Enumeratee EventWithPos RtEventWithPos m b simplify renderEntity = loop where loop = E.checkDone go go k = do x <- EL.head case x of Nothing -> k (E.Chunks []) E.>>== return Just (WithPos EventBeginDocument pos) -> k (E.Chunks [WithPos RtBeginDocument pos]) E.>>== loop Just (WithPos EventEndDocument pos) -> k (E.Chunks [WithPos RtEndDocument pos]) E.>>== loop Just (WithPos (EventInstruction{}) pos) -> go k Just (WithPos (EventBeginDoctype{}) pos) -> go k Just (WithPos (EventEndDoctype{}) pos) -> go k Just (WithPos (EventBeginElement n as) pos) -> let insertAttr m (k, vs) = do vs' <- mapM (contentToText pos) vs return $ (k, (T.concat vs')) : m in do as' <- foldM insertAttr [] as as' `seq` k (E.Chunks [WithPos (RtBeginElement n (reverse as')) pos]) E.>>== loop Just (WithPos (EventEndElement n) pos) -> k (E.Chunks [WithPos (RtEndElement n) pos]) E.>>== loop Just (WithPos (EventContent c) pos) -> do t <- contentToText pos c ts <- takeContents $ (:) t let text = T.strip $ T.concat $ ts [] if T.null text then go k else k (E.Chunks [WithPos (RtText text) pos]) E.>>== loop Just (WithPos (EventComment{}) _) -> go k contentToText pos c = case c of ContentEntity e -> case renderEntity e of Nothing -> E.throwError $ InvalidEntity e pos Just t -> return t ContentText t -> return t takeContents front = do do x <- E.peek case x of Nothing -> return front Just (WithPos EventBeginElement{} pos) -> return front Just (WithPos EventEndElement{} pos) -> return front Just (WithPos (EventContent c) pos) -> do EL.drop 1 t <- contentToText pos c takeContents $ front . (:) t Just (WithPos EventBeginDocument pos) -> return front Just (WithPos EventEndDocument pos) -> return front Just (WithPos EventInstruction{} pos) -> helper Just (WithPos EventBeginDoctype{} pos) -> helper Just (WithPos EventEndDoctype{} pos) -> helper Just (WithPos EventComment{} pos) -> helper where helper = EL.drop 1 >> takeContents front warn = Debug.Trace.trace -- debug = Debug.Trace.trace debug _ x = x