module Text.Roundtrip.Xml.Parser (
GenXmlParser, XmlParser, runXmlParser, runXmlParser', runXmlParser''
, WithPos, EventWithPos, eventWithPos, eventWithoutPos
, P.SourceName, P.Line, P.Column, P.ParseError
, EntityRenderer, defaultEntityRenderer
) where
import Control.Monad (unless, foldM)
import Control.Monad.State
import Control.Monad.Identity (Identity)
import Control.Exception (ErrorCall(..), SomeException, Exception, toException)
import qualified Data.Map as Map
import Data.Map (Map)
import qualified Data.List as List
import Data.Typeable (Typeable)
import qualified Data.Text as T
import Data.Either (partitionEithers)
import qualified Debug.Trace
import qualified Text.Parsec as P
import qualified Text.Parsec.Pos as P
import qualified Text.PrettyPrint.HughesPJ as Pp
import Data.XML.Types
import Text.Roundtrip
import Text.Roundtrip.Xml.Classes
import Text.Roundtrip.Xml.ParserInternal
import Text.Roundtrip.Xml.Pretty
type EntityRenderer = T.Text -> Maybe T.Text
defaultEntityRenderer :: EntityRenderer
defaultEntityRenderer = const Nothing
type XmlParser a = GenXmlParser [RtEventWithPos] Identity a
runXmlParser :: P.SourceName -> EntityRenderer -> [Event] -> XmlParser a -> (Either P.ParseError a)
runXmlParser sourceName renderer events p =
runXmlParser'' sourceName renderer (map eventWithoutPos events) p
runXmlParser' :: EntityRenderer -> [EventWithPos] -> XmlParser a -> (Either P.ParseError a)
runXmlParser' renderer events p = runXmlParser'' sourceName renderer events p
where
sourceName = case events of
[] -> ""
(e:_) -> P.sourceName (wp_pos e)
runXmlParser'' :: P.SourceName -> EntityRenderer -> [EventWithPos] -> XmlParser a -> (Either P.ParseError a)
runXmlParser'' sourceName entityRenderer events p =
let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc
rtEvents = List.unfoldr (simplifyEvents entityRenderer) events
in P.runParser q Nothing sourceName rtEvents
simplifyEvents :: EntityRenderer -> [EventWithPos] -> Maybe (RtEventWithPos, [EventWithPos])
simplifyEvents renderEntity evs = go evs
where
go evs =
case evs of
[] -> Nothing
(WithPos EventBeginDocument pos : rest) -> Just (WithPos RtBeginDocument pos, rest)
(WithPos EventEndDocument pos : rest) -> Just (WithPos RtEndDocument pos, rest)
(WithPos (EventInstruction _) _ : rest) -> go rest
(WithPos (EventBeginDoctype _ _) _ : rest) -> go rest
(WithPos EventEndDoctype _ : rest) -> go rest
(WithPos (EventBeginElement n as) pos : rest) ->
let insertAttr :: Either T.Text AttrMap -> Attribute
-> Either T.Text AttrMap
insertAttr em (k, vs) =
case em of
Right m ->
case partitionEithers (map contentToText vs) of
((t:_), _) -> Left t
([], vs') -> Right ((k, T.concat vs') : m)
Left t -> Left t
in case Prelude.foldl insertAttr (Right []) as of
Right as' -> as' `seq` Just (WithPos (RtBeginElement n (reverse as')) pos, rest)
Left t -> Just (WithPos (RtInvalidEntity t) pos, [])
(WithPos (EventEndElement n) pos : rest) -> Just (WithPos (RtEndElement n) pos, rest)
(WithPos (EventContent c) pos : rest) ->
case contentToText c of
Left t -> Just (WithPos (RtInvalidEntity t) pos, [])
Right t ->
let (cs, rest') = splitContent rest
in case partitionEithers (map contentToText cs) of
((t:_), _) -> Just (WithPos (RtInvalidEntity t) pos, [])
([], ts) -> let text = T.strip $ t `T.append` T.concat ts
in if T.null text
then go rest'
else Just (WithPos (RtText text) pos, rest')
(WithPos (EventComment _) _ : rest) -> go rest
splitContent (WithPos (EventContent c) pos : rest) =
let (cs, rest') = splitContent rest
in (c:cs, rest')
splitContent l = ([], l)
contentToText c =
case c of
ContentText t -> Right t
ContentEntity t ->
case renderEntity t of
Just t' -> Right t'
Nothing -> Left t
instance (Monad m, P.Stream s m RtEventWithPos) => IsoFunctor (GenXmlParser s m) where
iso <$> (GenXmlParser p) = GenXmlParser $ xmlParserApply iso p
instance (Monad m, P.Stream s m RtEventWithPos) => ProductFunctor (GenXmlParser s m) where
(GenXmlParser p) <*> (GenXmlParser q) = GenXmlParser $ xmlParserConcat p q
instance (Monad m, P.Stream s m RtEventWithPos) => Alternative (GenXmlParser s m) where
GenXmlParser p <|> GenXmlParser q = GenXmlParser $ xmlParserAlternative1Lookahead p q
GenXmlParser p <||> GenXmlParser q = GenXmlParser $ xmlParserAlternativeInfLookahead p q
empty = GenXmlParser P.parserZero
xmlParserApply iso p =
do a <- p
case apply iso a of
Just b -> return b
Nothing -> fail $ isoFailedErrorMessageL iso a
xmlParserConcat p q =
do x <- p
y <- q
return (x, y)
xmlParserAlternative1Lookahead p q = p P.<|> q
xmlParserAlternativeInfLookahead p q = P.try p P.<|> q
instance (Monad m, P.Stream s m RtEventWithPos) => Syntax (GenXmlParser s m) where
pure x = GenXmlParser (return x)
instance (Monad m, P.Stream s m RtEventWithPos) => XmlSyntax (GenXmlParser s m) where
xmlBeginDoc = GenXmlParser xmlParserBeginDoc
xmlEndDoc = GenXmlParser xmlParserEndDoc
xmlBeginElem = GenXmlParser . xmlParserBeginElem
xmlAttrValue = GenXmlParser . xmlParserAttrValue
xmlTextNotEmpty = GenXmlParser xmlParserTextNotEmpty
xmlEndElem = GenXmlParser . xmlParserEndElem
matchEvent :: (Show a, Monad m, P.Stream s m RtEventWithPos)
=> (RtEvent -> Maybe a) -> String -> PParser s m a
matchEvent matcher desc =
do state <- P.getState
case state of
Just _ -> P.parserZero
Nothing -> P.tokenPrim show (\_ t _ -> wp_pos t) debugMatcher
where
debugMatcher ev =
let res = matcher (wp_data ev)
in ("matching " ++ show ev ++ " against " ++ desc ++ ", result: " ++ show res) `debug` res
mkPParser :: Monad m => String -> PParser s m a -> PParser s m a
mkPParser msg p = (p P.<?> msg)
xmlParserBeginDoc :: (Monad m, P.Stream s m RtEventWithPos) => PParser s m ()
xmlParserBeginDoc = mkPParser "begin-document" $
let f RtBeginDocument = Just ()
f _ = Nothing
in matchEvent f "begin-document"
xmlParserEndDoc :: (Monad m, P.Stream s m RtEventWithPos) => PParser s m ()
xmlParserEndDoc = mkPParser "end-document" $
let f RtEndDocument = Just ()
f _ = Nothing
in matchEvent f "end-document"
xmlParserBeginElem :: (Monad m, P.Stream s m RtEventWithPos) => Name -> PParser s m ()
xmlParserBeginElem name = mkPParser ("<" ++ ppStr name ++ " ...>") $
do let f (RtBeginElement name' attrs) | name == name' = Just attrs
f _ = Nothing
attrs <- matchEvent f ("begin-element " ++ ppStr name)
unless (null attrs) (P.putState $ Just attrs)
return ()
xmlParserAttrValue :: Monad m => Name -> PParser s m T.Text
xmlParserAttrValue name = mkPParser ("attribute " ++ ppStr name) $
do state <- P.getState
case state of
Nothing -> P.parserZero
Just m ->
case List.break (\(x,_) -> x == name) m of
(prefix, (_, t) : suffix) ->
do let m' = prefix ++ suffix
if null m'
then P.putState Nothing
else P.putState (Just m')
return t
_ -> P.parserZero
xmlParserEndElem :: (Monad m, P.Stream s m RtEventWithPos) => Name -> PParser s m ()
xmlParserEndElem name = mkPParser ("</" ++ ppStr name ++ ">") $
let f (RtEndElement name') | name == name' = Just ()
f _ = Nothing
in matchEvent f ("end-element " ++ ppStr name)
xmlParserTextNotEmpty :: (Monad m, P.Stream s m RtEventWithPos) => PParser s m T.Text
xmlParserTextNotEmpty = mkPParser "text node" $
let f (RtText t) = Just t
f _ = Nothing
in matchEvent f "text node"
debug _ x = x