{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StrictData #-}
module Symantic.XML.Write where
import Control.Applicative (Applicative(..), liftA2)
import Control.Monad (Monad(..))
import Data.Bool
import Data.Default.Class (Default(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), all)
import Data.Function (($), (.), const)
import Data.Maybe (Maybe(..))
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.String (String, IsString(..))
import Data.Traversable (Traversable(..))
import System.IO (IO, FilePath)
import Text.Show (Show(..))
import qualified Control.Monad.Trans.Reader as R
import qualified Control.Monad.Trans.State as S
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Char as Char
import qualified Data.HashMap.Strict as HM
import qualified Data.HashSet as HS
import qualified Data.Sequence as Seq
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy.Encoding as TL
import Symantic.XML.Document as XML
writeXML :: NoSource src => XMLs src -> TL.Text
writeXML xs = TLB.toLazyText $ write xs `R.runReader` def
writeXMLIndented :: NoSource src => TL.Text -> XMLs src -> TL.Text
writeXMLIndented ind xs =
TLB.toLazyText $
write xs `R.runReader` def
{ reader_indent = if TL.null ind then mempty else "\n"
, reader_indent_delta = ind
}
writeFile :: FilePath -> TL.Text -> IO ()
writeFile fp t = BSL.writeFile fp $ TL.encodeUtf8 t
type Write = R.Reader Reader TLB.Builder
instance Semigroup Write where
(<>) = liftA2 (<>)
instance Monoid Write where
mempty = return ""
mappend = (<>)
instance IsString Write where
fromString = return . fromString
data Reader = Reader
{ reader_ns_scope :: Namespaces NCName
, reader_indent :: TLB.Builder
, reader_indent_delta :: TL.Text
, reader_no_text :: Bool
}
instance Default Reader where
def = Reader
{ reader_ns_scope = def
, reader_indent = ""
, reader_indent_delta = ""
, reader_no_text = False
}
class Buildable a where
build :: a -> TLB.Builder
instance Buildable Char.Char where
build = TLB.singleton
instance Buildable String where
build = TLB.fromString
instance Buildable TL.Text where
build = TLB.fromLazyText
instance Buildable NCName where
build = build . unNCName
instance Buildable Name where
build = build . unName
instance Buildable PName where
build PName{..} =
case pNameSpace of
Nothing -> build pNameLocal
Just p -> build p<>":"<> build pNameLocal
instance Buildable Namespace where
build = build . unNamespace
instance Buildable EntityRef where
build EntityRef{..} = "&"<>build entityRef_name<>";"
instance Buildable CharRef where
build (CharRef c) = "&#"<>build (show (Char.ord c))<>";"
instance Buildable EscapedText where
build (EscapedText et) = (`foldMap` et) $ \case
EscapedPlain t -> build t
EscapedEntityRef r -> build r
EscapedCharRef r -> build r
class Writeable a where
write :: a -> Write
instance Writeable NCName where
write = return . TLB.fromLazyText . unNCName
instance NoSource src => Writeable (XMLs src) where
write xs = do
ro <- R.ask
if TL.null (reader_indent_delta ro)
then foldMap write xs
else
R.local (const ro{reader_no_text}) $
foldMap write xs
where reader_no_text =
(`all` xs) $ \case
Tree (Sourced _ (NodeText (EscapedText et))) _ts ->
all (\case
EscapedPlain t -> TL.all Char.isSpace t
_ -> False) et
_ -> True
instance NoSource src => Writeable (XML src) where
write (Tree (Sourced _src nod) xs) = do
ro <- R.ask
case nod of
NodeAttr an
| [Tree (Sourced _ (NodeText av)) _] <- toList xs -> do
return $ " "<>buildAttr (prefixifyQName (reader_ns_scope ro) an) av
| otherwise -> mempty
NodeCDATA t ->
return $
reader_indent ro <>
"<[CDATA[["<>build t<>"]]>"
NodeComment t ->
return $
reader_indent ro <>
"<!--"<>build t<>"-->"
NodeElem elemQName -> do
let (elemAttrs, elemChilds) =
(`Seq.spanl` xs) $ \case
Tree (Sourced _ NodeAttr{}) _ -> True
_ -> False
let (usedNS, declNS) ::
( HS.HashSet Namespace
, Namespaces NCName
) =
foldl' go (initUsedNS, initDeclNS) elemAttrs
where
initUsedNS
| qNameSpace elemQName == xmlns_empty = mempty
| otherwise = HS.singleton $ qNameSpace elemQName
initDeclNS = def{namespaces_default = namespaces_default $ reader_ns_scope ro}
go (!uNS, !dNS) = \case
Tree (Sourced _ (NodeAttr QName{..})) vs
| qNameSpace == xmlns_xmlns
, [Tree (Sourced _ (NodeText t)) _] <- toList vs ->
let n = unescapeText t in
(uNS,) dNS
{ namespaces_prefixes =
(if TL.null n
then HM.delete
else (`HM.insert` qNameLocal))
(Namespace n)
(namespaces_prefixes dNS)
}
| qNameSpace == xmlns_empty
, qNameLocal == NCName "xmlns"
, [Tree (Sourced _ (NodeText t)) _] <- toList vs ->
(uNS,)
dNS{namespaces_default = Namespace $ unescapeText t}
| qNameSpace == xmlns_empty -> (uNS, dNS)
| otherwise -> (HS.insert qNameSpace uNS, dNS)
_ -> (uNS, dNS)
let inhNS =
HM.union
(namespaces_prefixes declNS)
(namespaces_prefixes (reader_ns_scope ro))
let autoNS =
HM.delete (namespaces_default declNS) $
(`S.evalState` HS.empty) $
traverse
(\() -> S.gets freshNCName)
(HS.toMap usedNS `HM.difference` inhNS)
let autoAttrs =
HM.foldlWithKey'
(\acc (Namespace v) p ->
(acc Seq.|>) $
Tree (notSourced $ NodeAttr QName{qNameSpace=xmlns_xmlns, qNameLocal=p}) $
pure $ tree0 $ notSourced $ NodeText $ EscapedText $ pure $ EscapedPlain v
) mempty autoNS
let scopeNS = declNS { namespaces_prefixes = autoNS <> inhNS }
return $
let build_elemPName = build $ prefixifyQName scopeNS elemQName in
let build_elemAttrs =
(`foldMap` (autoAttrs <> elemAttrs)) $ \case
Tree (Sourced _ (NodeAttr an)) vs
| [Tree (Sourced _ (NodeText av)) _] <- toList vs ->
" "<>buildAttr (prefixifyQName scopeNS{namespaces_default=""} an) av
_ -> mempty in
reader_indent ro
<> "<"<>build_elemPName
<> build_elemAttrs <>
let build_elemChilds = write elemChilds
`R.runReader` ro
{ reader_ns_scope = scopeNS
, reader_indent = reader_indent ro <> build (reader_indent_delta ro)
} in
if null elemChilds
then "/>"
else ">"
<> build_elemChilds
<> (
if TL.null (reader_indent_delta ro)
|| noIndent elemChilds
then mempty
else reader_indent ro
)
<> "</"<>build_elemPName<>">"
where
noIndent =
all $ \case
Tree (Sourced _ (NodeText _txt)) _ts -> True
_ -> False
NodePI pn pv
| pn == "xml" -> do
write_xs <- write xs
return $
"<?"<>build pn<>s<>write_xs<>"?>"
| otherwise ->
return $
reader_indent ro <>
"<?"<>build pn<>s<>build pv<>"?>"
where s | TL.null pv = ""
| otherwise = " "
NodeText t -> do
return $
if reader_no_text ro
then mempty
else build t
buildAttr :: PName -> EscapedText -> TLB.Builder
buildAttr n v = build n<>"=\""<>buildAttrValue v<>"\""
buildAttrValue :: EscapedText -> TLB.Builder
buildAttrValue (EscapedText et) = (`foldMap` et) $ \case
EscapedPlain p -> build p
EscapedEntityRef EntityRef{..} ->
build $ TL.replace "\"" """ entityRef_value
EscapedCharRef (CharRef c)
| c == '\"' -> """
| otherwise -> build c
removeSpaces :: XMLs src -> XMLs src
removeSpaces xs =
if (`all` xs) $ \case
Tree (Sourced _ (NodeText (EscapedText et))) _ts ->
all (\case
EscapedPlain t -> TL.all Char.isSpace t
_ -> False) et
_ -> True
then (`Seq.filter` xs) $ \case
Tree (Sourced _ NodeText{}) _ts -> False
_ -> True
else xs