{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StrictData #-} module Symantic.XML.Write where import Control.Applicative (Applicative(..), liftA2) import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), all) import Data.Function (($), (.), const) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.String (String, IsString(..)) import Data.Traversable (Traversable(..)) import System.IO (IO, FilePath) import Text.Show (Show(..)) import qualified Control.Monad.Trans.Reader as R import qualified Control.Monad.Trans.State as S import qualified Data.ByteString.Lazy as BSL import qualified Data.Char as Char import qualified Data.HashMap.Strict as HM import qualified Data.HashSet as HS import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TLB import qualified Data.Text.Lazy.Encoding as TL import Symantic.XML.Document as XML writeXML :: NoSource src => XMLs src -> TL.Text writeXML xs = TLB.toLazyText $ write xs `R.runReader` def writeXMLIndented :: NoSource src => TL.Text -> XMLs src -> TL.Text writeXMLIndented ind xs = TLB.toLazyText $ write xs `R.runReader` def { reader_indent = if TL.null ind then mempty else "\n" , reader_indent_delta = ind } writeFile :: FilePath -> TL.Text -> IO () writeFile fp t = BSL.writeFile fp $ TL.encodeUtf8 t -- * Type 'Write' type Write = R.Reader Reader TLB.Builder instance Semigroup Write where (<>) = liftA2 (<>) instance Monoid Write where mempty = return "" mappend = (<>) instance IsString Write where fromString = return . fromString -- ** Type 'Reader' data Reader = Reader { reader_ns_scope :: Namespaces NCName , reader_indent :: TLB.Builder , reader_indent_delta :: TL.Text , reader_no_text :: Bool } instance Default Reader where def = Reader { reader_ns_scope = def , reader_indent = "" , reader_indent_delta = "" , reader_no_text = False } -- * Class 'Buildable' class Buildable a where build :: a -> TLB.Builder instance Buildable Char.Char where build = TLB.singleton instance Buildable String where build = TLB.fromString instance Buildable TL.Text where build = TLB.fromLazyText instance Buildable NCName where build = build . unNCName instance Buildable Name where build = build . unName instance Buildable PName where build PName{..} = case pNameSpace of Nothing -> build pNameLocal Just p -> build p<>":"<> build pNameLocal instance Buildable Namespace where build = build . unNamespace instance Buildable EntityRef where build EntityRef{..} = "&"<>build entityRef_name<>";" instance Buildable CharRef where build (CharRef c) = "&#"<>build (show (Char.ord c))<>";" instance Buildable EscapedText where build (EscapedText et) = (`foldMap` et) $ \case EscapedPlain t -> build t EscapedEntityRef r -> build r EscapedCharRef r -> build r -- * Class 'Writable' class Writeable a where write :: a -> Write instance Writeable NCName where write = return . TLB.fromLazyText . unNCName instance NoSource src => Writeable (XMLs src) where write xs = do ro <- R.ask if TL.null (reader_indent_delta ro) then foldMap write xs else R.local (const ro{reader_no_text}) $ foldMap write xs where reader_no_text = (`all` xs) $ \case Tree (Sourced _ (NodeText (EscapedText et))) _ts -> all (\case EscapedPlain t -> TL.all Char.isSpace t _ -> False) et _ -> True instance NoSource src => Writeable (XML src) where write (Tree (Sourced _src nod) xs) = do ro <- R.ask case nod of NodeAttr an | [Tree (Sourced _ (NodeText av)) _] <- toList xs -> do return $ " "<>buildAttr (prefixifyQName (reader_ns_scope ro) an) av | otherwise -> mempty NodeCDATA t -> return $ reader_indent ro <> "<[CDATA[["<>build t<>"]]>" NodeComment t -> return $ reader_indent ro <> "" NodeElem elemQName -> do let (elemAttrs, elemChilds) = (`Seq.spanl` xs) $ \case Tree (Sourced _ NodeAttr{}) _ -> True _ -> False let (usedNS, declNS) :: ( HS.HashSet Namespace , Namespaces NCName ) = foldl' go (initUsedNS, initDeclNS) elemAttrs where initUsedNS | qNameSpace elemQName == xmlns_empty = mempty | otherwise = HS.singleton $ qNameSpace elemQName initDeclNS = def{namespaces_default = namespaces_default $ reader_ns_scope ro} go (!uNS, !dNS) = \case Tree (Sourced _ (NodeAttr QName{..})) vs -- xmlns:prefix="namespace" | qNameSpace == xmlns_xmlns , [Tree (Sourced _ (NodeText t)) _] <- toList vs -> let n = unescapeText t in (uNS,) dNS { namespaces_prefixes = (if TL.null n then HM.delete -- NOTE: empty namespace means removal of the prefix from scope. else (`HM.insert` qNameLocal)) (Namespace n) (namespaces_prefixes dNS) } -- xmlns="namespace" | qNameSpace == xmlns_empty , qNameLocal == NCName "xmlns" , [Tree (Sourced _ (NodeText t)) _] <- toList vs -> (uNS,) dNS{namespaces_default = Namespace $ unescapeText t} -- name="value" | qNameSpace == xmlns_empty -> (uNS, dNS) -- {namespace}name="value" | otherwise -> (HS.insert qNameSpace uNS, dNS) _ -> (uNS, dNS) let inhNS = -- NOTE: the inherited namespaces, -- including those declared at this element. HM.union (namespaces_prefixes declNS) (namespaces_prefixes (reader_ns_scope ro)) let autoNS = -- NOTE: the namespaces used but not declared nor default, -- with fresh prefixes. HM.delete (namespaces_default declNS) $ (`S.evalState` HS.empty) $ traverse (\() -> S.gets freshNCName) (HS.toMap usedNS `HM.difference` inhNS) let autoAttrs = -- NOTE: XMLify autoNS HM.foldlWithKey' (\acc (Namespace v) p -> (acc Seq.|>) $ Tree (notSourced $ NodeAttr QName{qNameSpace=xmlns_xmlns, qNameLocal=p}) $ pure $ tree0 $ notSourced $ NodeText $ EscapedText $ pure $ EscapedPlain v ) mempty autoNS let scopeNS = declNS { namespaces_prefixes = autoNS <> inhNS } return $ let build_elemPName = build $ prefixifyQName scopeNS elemQName in let build_elemAttrs = (`foldMap` (autoAttrs <> elemAttrs)) $ \case Tree (Sourced _ (NodeAttr an)) vs | [Tree (Sourced _ (NodeText av)) _] <- toList vs -> " "<>buildAttr (prefixifyQName scopeNS{namespaces_default=""} an) av _ -> mempty in reader_indent ro <> "<"<>build_elemPName <> build_elemAttrs <> let build_elemChilds = write elemChilds `R.runReader` ro { reader_ns_scope = scopeNS , reader_indent = reader_indent ro <> build (reader_indent_delta ro) } in if null elemChilds then "/>" else ">" <> build_elemChilds <> ( if TL.null (reader_indent_delta ro) || noIndent elemChilds then mempty else reader_indent ro ) <> "build_elemPName<>">" where noIndent = all $ \case Tree (Sourced _ (NodeText _txt)) _ts -> True _ -> False NodePI pn pv | pn == "xml" -> do write_xs <- write xs return $ "build pn<>s<>write_xs<>"?>" | otherwise -> return $ reader_indent ro <> "build pn<>s<>build pv<>"?>" where s | TL.null pv = "" | otherwise = " " NodeText t -> do return $ if reader_no_text ro then mempty else build t buildAttr :: PName -> EscapedText -> TLB.Builder buildAttr n v = build n<>"=\""<>buildAttrValue v<>"\"" buildAttrValue :: EscapedText -> TLB.Builder buildAttrValue (EscapedText et) = (`foldMap` et) $ \case EscapedPlain p -> build p EscapedEntityRef EntityRef{..} -> build $ TL.replace "\"" """ entityRef_value EscapedCharRef (CharRef c) | c == '\"' -> """ | otherwise -> build c removeSpaces :: XMLs src -> XMLs src removeSpaces xs = if (`all` xs) $ \case Tree (Sourced _ (NodeText (EscapedText et))) _ts -> all (\case EscapedPlain t -> TL.all Char.isSpace t _ -> False) et _ -> True then (`Seq.filter` xs) $ \case Tree (Sourced _ NodeText{}) _ts -> False _ -> True else xs