{-# LANGUAGE FlexibleContexts, UndecidableInstances #-} module Text.Roundtrip.Xml.Parser ( GenXmlParser, XmlParser, runXmlParser, runXmlParser', runXmlParser'' , WithPos, EventWithPos, eventWithPos, eventWithoutPos , P.SourceName, P.Line, P.Column, P.ParseError , EntityRenderer, defaultEntityRenderer ) where import Control.Monad (unless, foldM) import Control.Monad.State import Control.Monad.Identity (Identity) import Control.Exception (ErrorCall(..), SomeException, Exception, toException) import qualified Data.Map as Map import Data.Map (Map) import qualified Data.List as List import Data.Typeable (Typeable) import qualified Data.Text as T import Data.Either (partitionEithers) import qualified Debug.Trace import qualified Text.Parsec as P import qualified Text.Parsec.Pos as P import qualified Text.PrettyPrint.HughesPJ as Pp import Data.XML.Types import Text.Roundtrip import Text.Roundtrip.Xml.Classes import Text.Roundtrip.Xml.ParserInternal import Text.Roundtrip.Xml.Pretty type EntityRenderer = T.Text -> Maybe T.Text defaultEntityRenderer :: EntityRenderer defaultEntityRenderer = const Nothing type XmlParser a = GenXmlParser [RtEventWithPos] Identity a runXmlParser :: P.SourceName -> EntityRenderer -> [Event] -> XmlParser a -> (Either P.ParseError a) runXmlParser sourceName renderer events p = runXmlParser'' sourceName renderer (map eventWithoutPos events) p runXmlParser' :: EntityRenderer -> [EventWithPos] -> XmlParser a -> (Either P.ParseError a) runXmlParser' renderer events p = runXmlParser'' sourceName renderer events p where sourceName = case events of [] -> "" (e:_) -> P.sourceName (wp_pos e) runXmlParser'' :: P.SourceName -> EntityRenderer -> [EventWithPos] -> XmlParser a -> (Either P.ParseError a) runXmlParser'' sourceName entityRenderer events p = let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc rtEvents = List.unfoldr (simplifyEvents entityRenderer) events in P.runParser q Nothing sourceName rtEvents simplifyEvents :: EntityRenderer -> [EventWithPos] -> Maybe (RtEventWithPos, [EventWithPos]) simplifyEvents renderEntity evs = go evs where go evs = case evs of [] -> Nothing (WithPos EventBeginDocument pos : rest) -> Just (WithPos RtBeginDocument pos, rest) (WithPos EventEndDocument pos : rest) -> Just (WithPos RtEndDocument pos, rest) (WithPos (EventInstruction _) _ : rest) -> go rest (WithPos (EventBeginDoctype _ _) _ : rest) -> go rest (WithPos EventEndDoctype _ : rest) -> go rest (WithPos (EventBeginElement n as) pos : rest) -> let insertAttr :: Either T.Text AttrMap -> Attribute -> Either T.Text AttrMap insertAttr em (k, vs) = case em of Right m -> case partitionEithers (map contentToText vs) of ((t:_), _) -> Left t ([], vs') -> Right ((k, T.concat vs') : m) Left t -> Left t in case Prelude.foldl insertAttr (Right []) as of Right as' -> as' `seq` Just (WithPos (RtBeginElement n (reverse as')) pos, rest) Left t -> Just (WithPos (RtInvalidEntity t) pos, []) (WithPos (EventEndElement n) pos : rest) -> Just (WithPos (RtEndElement n) pos, rest) (WithPos (EventContent c) pos : rest) -> case contentToText c of Left t -> Just (WithPos (RtInvalidEntity t) pos, []) Right t -> let (cs, rest') = splitContent rest in case partitionEithers (map contentToText cs) of ((t:_), _) -> Just (WithPos (RtInvalidEntity t) pos, []) ([], ts) -> let text = T.strip $ t `T.append` T.concat ts in if T.null text then go rest' else Just (WithPos (RtText text) pos, rest') (WithPos (EventComment _) _ : rest) -> go rest splitContent (WithPos (EventContent c) pos : rest) = let (cs, rest') = splitContent rest in (c:cs, rest') splitContent l = ([], l) contentToText c = case c of ContentText t -> Right t ContentEntity t -> case renderEntity t of Just t' -> Right t' Nothing -> Left t instance (Monad m, P.Stream s m RtEventWithPos) => IsoFunctor (GenXmlParser s m) where iso <$> (GenXmlParser p) = GenXmlParser $ xmlParserApply iso p instance (Monad m, P.Stream s m RtEventWithPos) => ProductFunctor (GenXmlParser s m) where (GenXmlParser p) <*> (GenXmlParser q) = GenXmlParser $ xmlParserConcat p q instance (Monad m, P.Stream s m RtEventWithPos) => Alternative (GenXmlParser s m) where GenXmlParser p <|> GenXmlParser q = GenXmlParser $ xmlParserAlternative1Lookahead p q GenXmlParser p <||> GenXmlParser q = GenXmlParser $ xmlParserAlternativeInfLookahead p q empty = GenXmlParser P.parserZero xmlParserApply iso p = do a <- p case apply iso a of Just b -> return b Nothing -> fail $ isoFailedErrorMessageL iso a xmlParserConcat p q = do x <- p y <- q return (x, y) xmlParserAlternative1Lookahead p q = p P.<|> q xmlParserAlternativeInfLookahead p q = P.try p P.<|> q instance (Monad m, P.Stream s m RtEventWithPos) => Syntax (GenXmlParser s m) where pure x = GenXmlParser (return x) instance (Monad m, P.Stream s m RtEventWithPos) => XmlSyntax (GenXmlParser s m) where xmlBeginDoc = GenXmlParser xmlParserBeginDoc xmlEndDoc = GenXmlParser xmlParserEndDoc xmlBeginElem = GenXmlParser . xmlParserBeginElem xmlAttrValue = GenXmlParser . xmlParserAttrValue xmlTextNotEmpty = GenXmlParser xmlParserTextNotEmpty xmlEndElem = GenXmlParser . xmlParserEndElem matchEvent :: (Show a, Monad m, P.Stream s m RtEventWithPos) => (RtEvent -> Maybe a) -> String -> PParser s m a matchEvent matcher desc = do state <- P.getState case state of Just _ -> P.parserZero Nothing -> P.tokenPrim show (\_ t _ -> wp_pos t) debugMatcher where debugMatcher ev = let res = matcher (wp_data ev) in ("matching " ++ show ev ++ " against " ++ desc ++ ", result: " ++ show res) `debug` res mkPParser :: Monad m => String -> PParser s m a -> PParser s m a mkPParser msg p = (p P. msg) xmlParserBeginDoc :: (Monad m, P.Stream s m RtEventWithPos) => PParser s m () xmlParserBeginDoc = mkPParser "begin-document" $ let f RtBeginDocument = Just () f _ = Nothing in matchEvent f "begin-document" xmlParserEndDoc :: (Monad m, P.Stream s m RtEventWithPos) => PParser s m () xmlParserEndDoc = mkPParser "end-document" $ let f RtEndDocument = Just () f _ = Nothing in matchEvent f "end-document" xmlParserBeginElem :: (Monad m, P.Stream s m RtEventWithPos) => Name -> PParser s m () xmlParserBeginElem name = mkPParser ("<" ++ ppStr name ++ " ...>") $ do let f (RtBeginElement name' attrs) | name == name' = Just attrs f _ = Nothing attrs <- matchEvent f ("begin-element " ++ ppStr name) unless (null attrs) (P.putState $ Just attrs) return () xmlParserAttrValue :: Monad m => Name -> PParser s m T.Text xmlParserAttrValue name = mkPParser ("attribute " ++ ppStr name) $ do state <- P.getState case state of Nothing -> P.parserZero Just m -> case List.break (\(x,_) -> x == name) m of (prefix, (_, t) : suffix) -> do let m' = prefix ++ suffix if null m' then P.putState Nothing else P.putState (Just m') return t _ -> P.parserZero xmlParserEndElem :: (Monad m, P.Stream s m RtEventWithPos) => Name -> PParser s m () xmlParserEndElem name = mkPParser ("") $ let f (RtEndElement name') | name == name' = Just () f _ = Nothing in matchEvent f ("end-element " ++ ppStr name) xmlParserTextNotEmpty :: (Monad m, P.Stream s m RtEventWithPos) => PParser s m T.Text xmlParserTextNotEmpty = mkPParser "text node" $ let f (RtText t) = Just t f _ = Nothing in matchEvent f "text node" -- debug = Debug.Trace.trace debug _ x = x