module Text.Roundtrip.Xml.Enumerator.Parser (
XmlParseIteratee, parseXml, parseXml', parseXml''
, XmlException(..)
, parseXml_, XmlParseIteratee'
) where
import Control.Monad.State
import Control.Exception
import qualified Data.Enumerator as E
import qualified Data.Enumerator.List as EL
import qualified Data.Text as T
import Data.XML.Types
import Data.Typeable
import qualified Debug.Trace
import Text.Roundtrip
import Text.Roundtrip.Parser
import Text.Roundtrip.Xml
import Text.Roundtrip.Xml.ParserInternal
import Data.Reference
import Data.IORef
data (Monad m, Reference r m) => Cursor r m a = Cursor (r (NextCursor r m a)) [a]
data (Monad m, Reference r m) => NextCursor r m a = NextCursor (Cursor r m a)
| None
| Uneval
liftIteratee :: Monad m => m b -> E.Iteratee a m b
liftIteratee m = E.Iteratee (m >>= E.runIteratee . return)
unconsStream :: (Monad m, Reference r m)
=> Cursor r m a -> E.Iteratee a m (Maybe (a, Cursor r m a))
unconsStream p@(Cursor r c)
| null c = do x <- liftIteratee (readRef r)
unconsCursor r x
| otherwise = return $! unconsChunk p
unconsCursor :: (Monad m, Reference r m)
=> r (NextCursor r m a)
-> NextCursor r m a
-> E.Iteratee a m (Maybe (a, Cursor r m a))
unconsCursor _ (NextCursor c) = return $! unconsChunk c
unconsCursor _ None = return Nothing
unconsCursor r Uneval = E.continue (extendCursor r)
unconsChunk :: (Monad m, Reference r m)
=> Cursor r m a -> Maybe (a, Cursor r m a)
unconsChunk (Cursor r c) = Just (head c, Cursor r (tail c))
extendCursor :: (Monad m, Reference r m)
=> r (NextCursor r m a)
-> E.Stream a
-> E.Iteratee a m (Maybe (a, Cursor r m a))
extendCursor r (E.Chunks l)
| null l = E.continue (extendCursor r)
| otherwise = do x <- liftIteratee (insertCursor r l)
return $ unconsChunk x
extendCursor r E.EOF =
do liftIteratee (writeRef r None)
return Nothing
insertCursor :: (Monad m, Reference r m)
=> r (NextCursor r m a)
-> [a]
-> m (Cursor r m a)
insertCursor r l =
do r' <- newRef Uneval
let c = Cursor r' l
writeRef r (NextCursor c)
return c
mkCursor :: (Monad m, Reference r m) => m (Cursor r m a)
mkCursor =
do r <- newRef Uneval
return (Cursor r [])
instance (Monad m, Reference r m) => Stream (Cursor r m a) (E.Iteratee a m) a where
uncons = unconsStream
type XmlParseIteratee' r m a = GenXmlParser (Cursor r m RtEventWithPos) (E.Iteratee RtEventWithPos m) a
concatCursor :: (Monad m, Reference r m) => Cursor r m a -> m [a]
concatCursor (Cursor r l) =
liftM (l ++) (readRef r >>= concatCursor')
concatCursor' :: (Monad m, Reference r m) => NextCursor r m a -> m [a]
concatCursor' (NextCursor n) = concatCursor n
concatCursor' _ = return []
parseXml_''' :: (Reference r m, Monad m) => SourceName -> XmlParseIteratee' r m a -> E.Iteratee RtEventWithPos m a
parseXml_''' sourceName p =
let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc
in do cursor <- liftIteratee mkCursor
res <- runParserT (liftM2 (,) q getInput) Nothing sourceName cursor
case res of
Left err -> E.returnI (E.Error $ toException $ ParseError err)
Right (x, restCursor) ->
do rest <- liftIteratee $ concatCursor restCursor
E.yield x (E.Chunks rest)
parseXml_ :: (Reference r m, Monad m) => SourceName -> EntityRenderer -> XmlParseIteratee' r m a -> E.Iteratee Event m a
parseXml_ sourceName entityRenderer p =
E.joinI $ EL.map eventWithoutPos E.$$
E.joinI $ simplify entityRenderer E.$$
parseXml_''' sourceName p
type PStreamPos = Int
type PStreamState = (PStreamPos, Maybe RtEventWithPos)
type XmlParseIteratee m a = GenXmlParser PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) a
data XmlException = ParseError ParseError
| InvalidEntity T.Text SourcePos
deriving (Typeable, Show)
instance Exception XmlException
instance Monad m => Stream PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) RtEventWithPos where
uncons pos =
do (lastPos, mLastTok) <- get
case () of
_| pos == lastPos ->
do mx <- lift $ EL.head
let pos' = pos + 1
put (pos', mx)
("returning " ++ show mx ++ " from underlying stream, position " ++ show pos') `debug`
case mx of
Just x -> return $ Just (x, pos')
Nothing -> return Nothing
| pos == lastPos 1 ->
("returning " ++ show mLastTok ++ " from lookahead buffer, position " ++ show lastPos) `debug`
case mLastTok of
Just lastTok -> return $ Just (lastTok, lastPos)
Nothing -> return Nothing
| otherwise -> ("invalid lookahead, position request: " ++ show (pos + 1) ++
", position of next token: " ++ show (lastPos + 1) ++
", position of buffered token: " ++ show lastPos ++
" (" ++ show mLastTok ++ ")") `warn` return Nothing
parseXml :: Monad m => SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee Event m a
parseXml sourceName entityRenderer p =
E.joinI $ EL.map eventWithoutPos E.$$
E.joinI $ simplify entityRenderer E.$$
parseXml''' sourceName p
parseXml' :: Monad m => EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a
parseXml' entityRenderer p =
do first <- E.peek
let src = case first of
Just (WithPos _ pos) -> sourceName pos
Nothing -> ""
E.joinI $ simplify entityRenderer E.$$ parseXml''' src p
parseXml'' :: Monad m => SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a
parseXml'' sourceName entityRenderer p =
E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p
parseXml''' :: Monad m => SourceName -> XmlParseIteratee m a -> E.Iteratee RtEventWithPos m a
parseXml''' sourceName p =
let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc
in do let startPos = 1
res <- evalStateT (runParserT q Nothing sourceName startPos) (startPos, Nothing)
case res of
Left err -> E.returnI (E.Error $ toException $ ParseError err)
Right x -> E.yield x E.EOF
simplify :: Monad m => (T.Text -> Maybe T.Text) -> E.Enumeratee EventWithPos RtEventWithPos m b
simplify renderEntity = loop
where
loop = E.checkDone go
go k =
do x <- EL.head
case x of
Nothing -> k (E.Chunks []) E.>>== return
Just (WithPos EventBeginDocument pos) ->
k (E.Chunks [WithPos RtBeginDocument pos]) E.>>== loop
Just (WithPos EventEndDocument pos) ->
k (E.Chunks [WithPos RtEndDocument pos]) E.>>== loop
Just (WithPos (EventInstruction{}) pos) -> go k
Just (WithPos (EventBeginDoctype{}) pos) -> go k
Just (WithPos (EventEndDoctype{}) pos) -> go k
Just (WithPos (EventBeginElement n as) pos) ->
let insertAttr m (k, vs) =
do vs' <- mapM (contentToText pos) vs
return $ (k, (T.concat vs')) : m
in do as' <- foldM insertAttr [] as
as' `seq` k (E.Chunks [WithPos (RtBeginElement n (reverse as')) pos])
E.>>== loop
Just (WithPos (EventEndElement n) pos) ->
k (E.Chunks [WithPos (RtEndElement n) pos]) E.>>== loop
Just (WithPos (EventContent c) pos) -> do
t <- contentToText pos c
ts <- takeContents $ (:) t
let text = T.strip $ T.concat $ ts []
if T.null text
then go k
else k (E.Chunks [WithPos (RtText text) pos]) E.>>== loop
Just (WithPos (EventComment{}) _) -> go k
contentToText pos c =
case c of
ContentEntity e ->
case renderEntity e of
Nothing -> E.throwError $ InvalidEntity e pos
Just t -> return t
ContentText t -> return t
takeContents front = do
do x <- E.peek
case x of
Nothing -> return front
Just (WithPos EventBeginElement{} pos) -> return front
Just (WithPos EventEndElement{} pos) -> return front
Just (WithPos (EventContent c) pos) ->
do EL.drop 1
t <- contentToText pos c
takeContents $ front . (:) t
Just (WithPos EventBeginDocument pos) -> return front
Just (WithPos EventEndDocument pos) -> return front
Just (WithPos EventInstruction{} pos) -> helper
Just (WithPos EventBeginDoctype{} pos) -> helper
Just (WithPos EventEndDoctype{} pos) -> helper
Just (WithPos EventComment{} pos) -> helper
where
helper = EL.drop 1 >> takeContents front
warn = Debug.Trace.trace
debug _ x = x