{-# LANGUAGE OverloadedStrings #-} -- | 'Enumeratee's to render XML 'Event's. Unlike libxml-enumerator and -- expat-enumerator, this module does not provide IO and ST variants, since the -- underlying rendering operations are pure functions. module Text.XML.Enumerator.Render ( renderBuilder , renderBytes , renderText , prettyBuilder , prettyBytes , prettyText ) where import Data.XML.Types (Event (..), Content (..), Name (..)) import Text.XML.Enumerator.Token import qualified Data.Enumerator as E import qualified Data.Enumerator.List as EL import qualified Data.Enumerator.Text as ET import Data.Enumerator ((>>==), ($$), Iteratee, Step (..)) import qualified Data.Text as T import Data.Text (Text) import Blaze.ByteString.Builder import Blaze.ByteString.Builder.Enumerator (builderToByteString) import qualified Data.Map as Map import Data.Map (Map) import Data.Maybe (fromMaybe) import Data.ByteString (ByteString) import Control.Monad.IO.Class (MonadIO) import Data.Char (isSpace) -- | Pretty prints a stream of 'Event's into a stream of 'Builder's. This -- changes the meaning of some documents, by inserting/modifying whitespace. prettyBuilder :: Monad m => E.Enumeratee Event Builder m b prettyBuilder step0 = E.joinI $ prettify 0 [] $$ loop [] step0 where loop stack = E.checkDone $ step stack step stack k = do x <- EL.head case x of Nothing -> E.yield (E.Continue k) E.EOF Just (EventBeginElement name as) -> do x' <- E.peek if x' == Just (EventEndElement name) then do EL.drop 1 go $ mkBeginToken True True stack name as else go $ mkBeginToken True False stack name as Just e -> go $ eventToToken stack e where go (ts, stack') = k (E.Chunks $ map tokenToBuilder $ ts []) >>== loop stack' -- | Same as 'prettyBuilder', but produces a stream of 'ByteString's. prettyBytes :: MonadIO m => E.Enumeratee Event ByteString m b prettyBytes s = E.joinI $ prettyBuilder $$ builderToByteString s -- | Same as 'prettyBuilder', but produces a stream of 'Text's. prettyText :: MonadIO m => E.Enumeratee Event Text m b prettyText s = E.joinI $ prettyBytes $$ ET.decode ET.utf8 s -- | Render a stream of 'Event's into a stream of 'ByteString's. This function -- wraps around 'renderBuilder' and 'builderToByteString', so it produces -- optimally sized 'ByteString's with minimal buffer copying. -- -- The output is UTF8 encoded. renderBytes :: MonadIO m => E.Enumeratee Event ByteString m b renderBytes s = E.joinI $ renderBuilder $$ builderToByteString s -- | Render a stream of 'Event's into a stream of 'ByteString's. This function -- wraps around 'renderBuilder', 'builderToByteString' and 'renderBytes', so it -- produces optimally sized 'ByteString's with minimal buffer copying. renderText :: MonadIO m => E.Enumeratee Event Text m b renderText s = E.joinI $ renderBytes $$ ET.decode ET.utf8 s -- | Render a stream of 'Event's into a stream of 'Builder's. Builders are from -- the blaze-builder package, and allow the create of optimally sized -- 'ByteString's with minimal buffer copying. renderBuilder :: Monad m => E.Enumeratee Event Builder m b renderBuilder = loop [] where loop stack = E.checkDone $ step stack step stack k = do x <- EL.head case x of Nothing -> E.yield (E.Continue k) E.EOF Just (EventBeginElement name as) -> do x' <- E.peek if x' == Just (EventEndElement name) then do EL.drop 1 go $ mkBeginToken False True stack name as else go $ mkBeginToken False False stack name as Just e -> go $ eventToToken stack e where go (ts, stack') = k (E.Chunks $ map tokenToBuilder $ ts []) >>== loop stack' eventToToken :: Stack -> Event -> ([Token] -> [Token], [NSLevel]) eventToToken s EventBeginDocument = ((:) (TokenBeginDocument [ ("version", [ContentText "1.0"]) , ("encoding", [ContentText "UTF-8"]) ]) , s) eventToToken s EventEndDocument = (id, s) eventToToken s (EventInstruction i) = ((:) (TokenInstruction i), s) eventToToken s (EventBeginDoctype n meid) = ((:) (TokenDoctype n meid), s) eventToToken s EventEndDoctype = (id, s) eventToToken s (EventCDATA t) = ((:) (TokenCDATA t), s) eventToToken s (EventEndElement name) = ((:) (TokenEndElement $ nameToTName sl name), s') where (sl:s') = s eventToToken s (EventContent c) = ((:) (TokenContent c), s) eventToToken s (EventComment t) = ((:) (TokenComment t), s) eventToToken _ EventBeginElement{} = error "eventToToken on EventBeginElement" -- mkBeginToken False s name attrs type Stack = [NSLevel] nameToTName :: NSLevel -> Name -> TName nameToTName _ (Name name _ (Just pref)) | pref == "xml" = TName (Just "xml") name nameToTName _ (Name name Nothing _) = TName Nothing name -- invariant that this is true nameToTName (NSLevel def sl) (Name name (Just ns) _) | def == Just ns = TName Nothing name | otherwise = case Map.lookup ns sl of Nothing -> error "nameToTName" Just pref -> TName (Just pref) name mkBeginToken :: Bool -- ^ pretty print attributes? -> Bool -> Stack -> Name -> [(Name, [Content])] -> ([Token] -> [Token], Stack) mkBeginToken isPretty isClosed s name attrs = ((:) (TokenBeginElement tname tattrs2 isClosed indent), if isClosed then s else sl2 : s) where indent = if isPretty then 2 + 4 * length s else 0 prevsl = case s of [] -> NSLevel Nothing Map.empty sl':_ -> sl' (sl1, tname, tattrs1) = newElemStack prevsl name (sl2, tattrs2) = foldr newAttrStack (sl1, tattrs1) attrs newElemStack :: NSLevel -> Name -> (NSLevel, TName, [TAttribute]) newElemStack nsl@(NSLevel def _) (Name local ns _) | def == ns = (nsl, TName Nothing local, []) newElemStack (NSLevel _ nsmap) (Name local Nothing _) = (NSLevel Nothing nsmap, TName Nothing local, [(TName Nothing "xmlns", [])]) newElemStack (NSLevel _ nsmap) (Name local (Just ns) Nothing) = (NSLevel (Just ns) nsmap, TName Nothing local, [(TName Nothing "xmlns", [ContentText ns])]) newElemStack (NSLevel def nsmap) (Name local (Just ns) (Just pref)) = case Map.lookup ns nsmap of Just pref' | pref == pref' -> ( NSLevel def nsmap , TName (Just pref) local , [] ) _ -> ( NSLevel def nsmap' , TName (Just pref) local , [(TName (Just "xmlns") pref, [ContentText ns])] ) where nsmap' = Map.insert ns pref nsmap newAttrStack :: (Name, [Content]) -> (NSLevel, [TAttribute]) -> (NSLevel, [TAttribute]) newAttrStack (name, value) (NSLevel def nsmap, attrs) = (NSLevel def nsmap', addNS $ (tname, value) : attrs) where (nsmap', tname, addNS) = case name of Name local Nothing _ -> (nsmap, TName Nothing local, id) Name local (Just ns) mpref -> let ppref = fromMaybe "ns" mpref (pref, addNS') = getPrefix ppref nsmap ns in (Map.insert ns pref nsmap, TName (Just pref) local, addNS') getPrefix :: Text -> Map Text Text -> Text -> (Text, [TAttribute] -> [TAttribute]) getPrefix _ _ "http://www.w3.org/XML/1998/namespace" = ("xml", id) getPrefix ppref nsmap ns = case Map.lookup ns nsmap of Just pref -> (pref, id) Nothing -> let pref = findUnused ppref $ Map.elems nsmap in (pref, (:) (TName (Just "xmlns") pref, [ContentText ns])) where findUnused x xs | x `elem` xs = findUnused (x `T.snoc` '_') xs | otherwise = x prettify :: Monad m => Int -> [Name] -> E.Enumeratee Event Event m a prettify level names (Continue k) = do mx <- eventHead case mx of Nothing -> return $ Continue k Just x -> do y <- E.peek (chunks, level', names') <- case (x, y) of (Left contents, _) -> do let es = map EventContent $ cleanWhite contents let es' = if null es then [] else before level : es ++ [after] return (es', level, names) (Right (EventBeginElement name attrs), Just (EventEndElement _)) -> do EL.drop 1 return ([before level, EventBeginElement name attrs, EventEndElement name, after], level, names) (Right (EventBeginElement name attrs), _) -> do return ([before level, EventBeginElement name attrs, after], level + 1, name : names) (Right (EventEndElement _), _) -> do let newLevel = level - 1 return ([before newLevel, EventEndElement $ head names, after], newLevel, tail names) (Right EventBeginDocument, _) -> do _ <- takeContents id return ([EventBeginDocument], level, names) (Right EventEndDocument, _) -> do _ <- takeContents id return ([EventEndDocument, after], level, names) (Right (EventComment t), _) -> do _ <- takeContents id return ([before level, EventComment $ T.map normalSpace t, after], level, names) (Right e, _) -> do _ <- takeContents id return ([before level, e, after], level, names) k (E.Chunks chunks) >>== prettify level' names' where before l = EventContent $ ContentText $ T.replicate l " " after = EventContent $ ContentText "\n" prettify _ _ step = return step eventHead :: Monad m => Iteratee Event m (Maybe (Either [Content] Event)) eventHead = do x <- EL.head case x of Just (EventContent e) -> do es <- takeContents id return $ Just $ Left $ e : es Nothing -> return Nothing Just e -> return $ Just $ Right e takeContents :: Monad m => ([Content] -> [Content]) -> Iteratee Event m [Content] takeContents front = do x <- E.peek case x of Just (EventContent e) -> do EL.drop 1 takeContents $ front . (:) e _ -> return $ front [] normalSpace :: Char -> Char normalSpace c | isSpace c = ' ' | otherwise = c cleanWhite :: [Content] -> [Content] cleanWhite x = go True [] $ go True [] x where go _ end (ContentEntity e:rest) = go False (ContentEntity e : end) rest go isFront end (ContentText t:rest) = if T.null t' then go isFront end rest else go False (ContentText t' : end) rest where t' = (if isFront then T.dropWhile isSpace else id) $ T.map normalSpace t go _ end [] = end