module Text.Roundtrip.Xml.Enumerator.Parser (
XmlParseIteratee, PureXmlParseIteratee, parseXml, parseXml', parseXml''
, XmlException(..)
) where
import Control.Monad.State
import Control.Exception
import Control.Monad.ST
import Data.STRef
import qualified Data.Enumerator as E
import qualified Data.Enumerator.List as EL
import qualified Data.Text as T
import Data.XML.Types
import Data.Typeable
import qualified Debug.Trace
import Text.Roundtrip
import Text.Roundtrip.Parser
import Text.Roundtrip.Xml
import Text.Roundtrip.Xml.ParserInternal
import Data.Reference
import Data.IORef
data (Monad m, Reference r m) => Cursor r m a = Cursor (r (NextCursor r m a)) [a]
data (Monad m, Reference r m) => NextCursor r m a = NextCursor (Cursor r m a)
| None
| Uneval
liftIteratee :: Monad m => m b -> E.Iteratee a m b
liftIteratee m = E.Iteratee (m >>= E.runIteratee . return)
unconsStream :: (Monad m, Reference r m)
=> Cursor r m a -> E.Iteratee a m (Maybe (a, Cursor r m a))
unconsStream p@(Cursor r c)
| null c = do x <- liftIteratee (readRef r)
unconsCursor r x
| otherwise = return $! unconsChunk p
unconsCursor :: (Monad m, Reference r m)
=> r (NextCursor r m a)
-> NextCursor r m a
-> E.Iteratee a m (Maybe (a, Cursor r m a))
unconsCursor _ (NextCursor c) = return $! unconsChunk c
unconsCursor _ None = return Nothing
unconsCursor r Uneval = E.continue (extendCursor r)
unconsChunk :: (Monad m, Reference r m)
=> Cursor r m a -> Maybe (a, Cursor r m a)
unconsChunk (Cursor r c) = Just (head c, Cursor r (tail c))
extendCursor :: (Monad m, Reference r m)
=> r (NextCursor r m a)
-> E.Stream a
-> E.Iteratee a m (Maybe (a, Cursor r m a))
extendCursor r (E.Chunks l)
| null l = E.continue (extendCursor r)
| otherwise = do x <- liftIteratee (insertCursor r l)
return $ unconsChunk x
extendCursor r E.EOF =
do liftIteratee (writeRef r None)
return Nothing
insertCursor :: (Monad m, Reference r m)
=> r (NextCursor r m a)
-> [a]
-> m (Cursor r m a)
insertCursor r l =
do r' <- newRef Uneval
let c = Cursor r' l
writeRef r (NextCursor c)
return c
mkCursor :: (Monad m, Reference r m) => m (Cursor r m a)
mkCursor =
do r <- newRef Uneval
return (Cursor r [])
instance (Monad m, Reference r m) => Stream (Cursor r m a) (E.Iteratee a m) a where
uncons = unconsStream
type XmlParseIteratee r m a = GenXmlParser (Cursor r m RtEventWithPos) (E.Iteratee RtEventWithPos m) a
type PureXmlParseIteratee a = forall s . XmlParseIteratee (STRef s) (ST s) a
data XmlException = ParseError ParseError
| InvalidEntity T.Text SourcePos
deriving (Typeable, Show)
instance Exception XmlException
concatCursor :: (Monad m, Reference r m) => Cursor r m a -> m [a]
concatCursor (Cursor r l) =
liftM (l ++) (readRef r >>= concatCursor')
concatCursor' :: (Monad m, Reference r m) => NextCursor r m a -> m [a]
concatCursor' (NextCursor n) = concatCursor n
concatCursor' _ = return []
parseXml :: (Reference r m, Monad m) => SourceName -> EntityRenderer -> XmlParseIteratee r m a -> E.Iteratee Event m a
parseXml sourceName entityRenderer p =
E.joinI $ EL.map eventWithoutPos E.$$
E.joinI $ simplify entityRenderer E.$$
parseXml''' sourceName p
parseXml' :: (Reference r m, Monad m) => EntityRenderer -> XmlParseIteratee r m a -> E.Iteratee EventWithPos m a
parseXml' entityRenderer p =
do first <- E.peek
let src = case first of
Just (WithPos _ pos) -> sourceName pos
Nothing -> ""
E.joinI $ simplify entityRenderer E.$$ parseXml''' src p
parseXml'' :: (Reference r m, Monad m) => SourceName -> EntityRenderer -> XmlParseIteratee r m a -> E.Iteratee EventWithPos m a
parseXml'' sourceName entityRenderer p =
E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p
parseXml''' :: (Reference r m, Monad m) => SourceName -> XmlParseIteratee r m a -> E.Iteratee RtEventWithPos m a
parseXml''' sourceName p =
let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc
in do cursor <- liftIteratee mkCursor
res <- runParserT (liftM2 (,) q getInput) Nothing sourceName cursor
case res of
Left err -> E.returnI (E.Error $ toException $ ParseError err)
Right (x, restCursor) ->
do rest <- liftIteratee $ concatCursor restCursor
E.yield x (E.Chunks rest)
simplify :: Monad m => (T.Text -> Maybe T.Text) -> E.Enumeratee EventWithPos RtEventWithPos m b
simplify renderEntity = loop
where
loop = E.checkDone go
go k =
do x <- EL.head
case x of
Nothing -> k (E.Chunks []) E.>>== return
Just (WithPos EventBeginDocument pos) ->
k (E.Chunks [WithPos RtBeginDocument pos]) E.>>== loop
Just (WithPos EventEndDocument pos) ->
k (E.Chunks [WithPos RtEndDocument pos]) E.>>== loop
Just (WithPos (EventInstruction{}) pos) -> go k
Just (WithPos (EventBeginDoctype{}) pos) -> go k
Just (WithPos (EventEndDoctype{}) pos) -> go k
Just (WithPos (EventBeginElement n as) pos) ->
let insertAttr m (k, vs) =
do vs' <- mapM (contentToText pos) vs
return $ (k, (T.concat vs')) : m
in do as' <- foldM insertAttr [] as
as' `seq` k (E.Chunks [WithPos (RtBeginElement n (reverse as')) pos])
E.>>== loop
Just (WithPos (EventEndElement n) pos) ->
k (E.Chunks [WithPos (RtEndElement n) pos]) E.>>== loop
Just (WithPos (EventContent c) pos) -> do
t <- contentToText pos c
ts <- takeContents $ (:) t
let text = T.strip $ T.concat $ ts []
if T.null text
then go k
else k (E.Chunks [WithPos (RtText text) pos]) E.>>== loop
Just (WithPos (EventComment{}) _) -> go k
contentToText pos c =
case c of
ContentEntity e ->
case renderEntity e of
Nothing -> E.throwError $ InvalidEntity e pos
Just t -> return t
ContentText t -> return t
takeContents front = do
do x <- E.peek
case x of
Nothing -> return front
Just (WithPos EventBeginElement{} pos) -> return front
Just (WithPos EventEndElement{} pos) -> return front
Just (WithPos (EventContent c) pos) ->
do EL.drop 1
t <- contentToText pos c
takeContents $ front . (:) t
Just (WithPos EventBeginDocument pos) -> return front
Just (WithPos EventEndDocument pos) -> return front
Just (WithPos EventInstruction{} pos) -> helper
Just (WithPos EventBeginDoctype{} pos) -> helper
Just (WithPos EventEndDoctype{} pos) -> helper
Just (WithPos EventComment{} pos) -> helper
where
helper = EL.drop 1 >> takeContents front
warn = Debug.Trace.trace
debug _ x = x