module Text.Roundtrip.Xml.Enumerator.Parser (
XmlParseIteratee, parseXml, parseXml', parseXml''
, XmlException(..)
) where
import Control.Monad.State
import Control.Exception
import qualified Data.Enumerator as E
import qualified Data.Enumerator.List as EL
import qualified Data.Text as T
import Data.XML.Types
import Data.Typeable
import qualified Debug.Trace
import qualified Text.Parsec as P
import Text.Roundtrip
import Text.Roundtrip.Xml
import Text.Roundtrip.Xml.ParserInternal
type PStreamPos = Int
type PStreamState = (PStreamPos, Maybe RtEventWithPos)
type XmlParseIteratee m a = GenXmlParser PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) a
data XmlException = ParseError P.ParseError
| InvalidEntity T.Text P.SourcePos
deriving (Typeable, Show)
instance Exception XmlException
instance Monad m => P.Stream PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) RtEventWithPos where
uncons pos =
do (lastPos, mLastTok) <- get
case () of
_| pos == lastPos ->
do mx <- lift $ EL.head
let pos' = pos + 1
put (pos', mx)
("returning " ++ show mx ++ " from underlying stream, position " ++ show pos') `debug`
case mx of
Just x -> return $ Just (x, pos')
Nothing -> return Nothing
| pos == lastPos 1 ->
("returning " ++ show mLastTok ++ " from lookahead buffer, position " ++ show lastPos) `debug`
case mLastTok of
Just lastTok -> return $ Just (lastTok, lastPos)
Nothing -> return Nothing
| otherwise -> ("invalid lookahead, position request: " ++ show (pos + 1) ++
", position of next token: " ++ show (lastPos + 1) ++
", position of buffered token: " ++ show lastPos ++
" (" ++ show mLastTok ++ ")") `warn` return Nothing
parseXml :: Monad m => P.SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee Event m a
parseXml sourceName entityRenderer p =
E.joinI $ EL.map eventWithoutPos E.$$
E.joinI $ simplify entityRenderer E.$$
parseXml''' sourceName p
parseXml' :: Monad m => EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a
parseXml' entityRenderer p =
do first <- E.peek
let sourceName = case first of
Just (WithPos _ pos) -> P.sourceName pos
Nothing -> ""
E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p
parseXml'' :: Monad m => P.SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a
parseXml'' sourceName entityRenderer p =
E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p
parseXml''' :: Monad m => P.SourceName -> XmlParseIteratee m a -> E.Iteratee RtEventWithPos m a
parseXml''' sourceName p =
let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc
in do let startPos = 1
res <- evalStateT (P.runParserT q Nothing sourceName startPos) (startPos, Nothing)
case res of
Left err -> E.returnI (E.Error $ toException $ ParseError err)
Right x -> E.yield x E.EOF
simplify :: Monad m => (T.Text -> Maybe T.Text) -> E.Enumeratee EventWithPos RtEventWithPos m b
simplify renderEntity = loop
where
loop = E.checkDone go
go k =
do x <- EL.head
case x of
Nothing -> k (E.Chunks []) E.>>== return
Just (WithPos EventBeginDocument pos) ->
k (E.Chunks [WithPos RtBeginDocument pos]) E.>>== loop
Just (WithPos EventEndDocument pos) ->
k (E.Chunks [WithPos RtEndDocument pos]) E.>>== loop
Just (WithPos (EventInstruction{}) pos) -> go k
Just (WithPos (EventBeginDoctype{}) pos) -> go k
Just (WithPos (EventEndDoctype{}) pos) -> go k
Just (WithPos (EventBeginElement n as) pos) ->
let insertAttr m (k, vs) =
do vs' <- mapM (contentToText pos) vs
return $ (k, (T.concat vs')) : m
in do as' <- foldM insertAttr [] as
as' `seq` k (E.Chunks [WithPos (RtBeginElement n (reverse as')) pos])
E.>>== loop
Just (WithPos (EventEndElement n) pos) ->
k (E.Chunks [WithPos (RtEndElement n) pos]) E.>>== loop
Just (WithPos (EventContent c) pos) -> do
t <- contentToText pos c
ts <- takeContents $ (:) t
let text = T.strip $ T.concat $ ts []
if T.null text
then go k
else k (E.Chunks [WithPos (RtText text) pos]) E.>>== loop
Just (WithPos (EventComment{}) _) -> go k
contentToText pos c =
case c of
ContentEntity e ->
case renderEntity e of
Nothing -> E.throwError $ InvalidEntity e pos
Just t -> return t
ContentText t -> return t
takeContents front = do
do x <- E.peek
case x of
Nothing -> return front
Just (WithPos EventBeginElement{} pos) -> return front
Just (WithPos EventEndElement{} pos) -> return front
Just (WithPos (EventContent c) pos) ->
do EL.drop 1
t <- contentToText pos c
takeContents $ front . (:) t
Just (WithPos EventBeginDocument pos) -> return front
Just (WithPos EventEndDocument pos) -> return front
Just (WithPos EventInstruction{} pos) -> helper
Just (WithPos EventBeginDoctype{} pos) -> helper
Just (WithPos EventEndDoctype{} pos) -> helper
Just (WithPos EventComment{} pos) -> helper
where
helper = EL.drop 1 >> takeContents front
warn = Debug.Trace.trace
debug _ x = x