{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module Text.XML.Stream.Token
( tokenToBuilder
, TName (..)
, Token (..)
, TAttribute
, NSLevel (..)
) where
import Data.XML.Types (Instruction (..), Content (..), ExternalID (..))
import qualified Data.Text as T
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8Builder, encodeUtf8BuilderEscaped)
import Data.String (IsString (fromString))
import Data.ByteString.Builder (Builder)
import qualified Data.ByteString.Builder.Prim as E
import Data.ByteString.Builder.Prim ((>*<), (>$<), condB)
import Data.Monoid (mconcat, mempty, (<>))
import Data.Map (Map)
import qualified Data.Set as Set
import Data.List (foldl')
import Control.Arrow (first)
import Data.Word (Word8)
oneSpace :: Builder
oneSpace = " "
data Token = TokenXMLDeclaration [TAttribute]
| TokenInstruction Instruction
| TokenBeginElement TName [TAttribute] Bool Int -- ^ indent
| TokenEndElement TName
| TokenContent Content
| TokenComment Text
| TokenDoctype Text (Maybe ExternalID) [(Text, Text)]
| TokenCDATA Text
deriving Show
tokenToBuilder :: Token -> Builder
tokenToBuilder (TokenXMLDeclaration attrs) =
"
foldAttrs oneSpace attrs <>
"?>"
tokenToBuilder (TokenInstruction (Instruction target data_)) =
"" <>
encodeUtf8Builder target <>
" " <>
encodeUtf8Builder data_ <>
"?>"
tokenToBuilder (TokenBeginElement name attrs' isEmpty indent) =
"<" <>
tnameToText name <>
foldAttrs
(if indent == 0 || lessThan3 attrs
then oneSpace
else mconcat $ ("\n" : replicate indent " "))
attrs <>
(if isEmpty then "/>" else ">")
where
attrs = nubAttrs $ map (first splitTName) attrs'
lessThan3 [] = True
lessThan3 [_] = True
lessThan3 [_, _] = True
lessThan3 _ = False
tokenToBuilder (TokenEndElement name) = "" <> tnameToText name <> ">"
tokenToBuilder (TokenContent c) = contentToText c
tokenToBuilder (TokenCDATA t) = " escCDATA t <> "]]>"
tokenToBuilder (TokenComment t) = ""
tokenToBuilder (TokenDoctype name eid _) =
"
encodeUtf8Builder name <>
go eid <>
">"
where
go Nothing = mempty
go (Just (SystemID uri)) = " SYSTEM \"" <> encodeUtf8Builder uri <> "\""
go (Just (PublicID pid uri)) =
" PUBLIC \"" <>
encodeUtf8Builder pid <>
"\" \"" <>
encodeUtf8Builder uri <>
"\""
data TName = TName (Maybe Text) Text
deriving (Show, Eq, Ord)
tnameToText :: TName -> Builder
tnameToText (TName Nothing name) = encodeUtf8Builder name
tnameToText (TName (Just prefix) name) =
encodeUtf8Builder prefix <> ":" <> encodeUtf8Builder name
contentToText :: Content -> Builder
contentToText (ContentText t) = encodeUtf8BuilderEscaped (charUtf8XmlEscaped ECContent) t
contentToText (ContentEntity e) = "&" <> encodeUtf8Builder e <> ";"
-- | What usage are we escaping for?
data EscapeContext = ECContent -- ^ ..
| ECDoubleArg -- ^
| ECSingleArg -- ^
deriving (Show, Eq)
{-# INLINE charUtf8XmlEscaped #-}
charUtf8XmlEscaped :: EscapeContext -> E.BoundedPrim Word8
charUtf8XmlEscaped ec =
(condB (> _gt) (E.liftFixedToBounded E.word8)) $
(condB (== _lt) (fixed4 (_am,(_l,(_t,_sc))))) $ -- <
escapeFor ECContent (condB (== _gt) (fixed4 (_am,(_g,(_t,_sc))))) $ -- >
(condB (== _am) (fixed5 (_am,(_a,(_m,(_p,_sc)))))) $ -- &
escapeFor ECDoubleArg (condB (== _dq) (fixed6 (_am,(_q,(_u,(_o,(_t,_sc))))))) $ -- "
escapeFor ECSingleArg (condB (== _sq) (fixed6 (_am,(_a,(_p,(_o,(_s,_sc))))))) $ -- '
(E.liftFixedToBounded E.word8) -- fallback for Chars smaller than '>'
where
_gt = 62 -- >
_lt = 60 -- <
_am = 38 -- &
_dq = 34 -- "
_sq = 39 -- '
_l = 108 -- l
_t = 116 -- t
_g = 103 -- g
_a = 97 -- a
_m = 109 -- m
_p = 112 -- p
_o = 111 -- o
_s = 115 -- s
_q = 113 -- q
_u = 117 -- u
_sc = 59 -- ;
{-# INLINE escapeFor #-}
escapeFor :: EscapeContext -> (a -> a) -> a -> a
escapeFor ec' f a
| ec == ec' = f a
| otherwise = a
{-# INLINE fixed4 #-}
fixed4 x = E.liftFixedToBounded $ const x >$<
E.word8 >*< E.word8 >*< E.word8 >*< E.word8
{-# INLINE fixed5 #-}
fixed5 x = E.liftFixedToBounded $ const x >$<
E.word8 >*< E.word8 >*< E.word8 >*< E.word8 >*< E.word8
{-# INLINE fixed6 #-}
fixed6 x = E.liftFixedToBounded $ const x >$<
E.word8 >*< E.word8 >*< E.word8 >*< E.word8 >*< E.word8 >*< E.word8
type TAttribute = (TName, [Content])
foldAttrs :: Builder -- ^ before
-> [TAttribute]
-> Builder
foldAttrs before =
foldMap go
where
go (key, val) =
before <>
tnameToText key <>
"=\"" <>
foldMap go' val <>
"\""
go' (ContentText t) =
encodeUtf8BuilderEscaped (charUtf8XmlEscaped ECDoubleArg) t
go' (ContentEntity t) = "&" <> encodeUtf8Builder t <> ";"
instance IsString TName where
fromString = TName Nothing . T.pack
data NSLevel = NSLevel
{ defaultNS :: Maybe Text
, prefixes :: Map Text Text
}
deriving Show
nubAttrs :: [TAttribute] -> [TAttribute]
nubAttrs orig =
front []
where
(front, _) = foldl' go (id, Set.empty) orig
go (dlist, used) (k, v)
| k `Set.member` used = (dlist, used)
| otherwise = (dlist . ((k, v):), Set.insert k used)
splitTName :: TName -> TName
splitTName x@(TName Just{} _) = x
splitTName x@(TName Nothing t)
| T.null b = x
| otherwise = TName (Just a) $ T.drop 1 b
where
(a, b) = T.break (== ':') t
escCDATA :: Text -> Builder
escCDATA s = encodeUtf8Builder (T.replace "]]>" "]]]]>" s)