{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module Text.XML.Stream.Token
    ( tokenToBuilder
    , TName (..)
    , Token (..)
    , TAttribute
    , NSLevel (..)
    ) where
import Data.XML.Types (Instruction (..), Content (..), ExternalID (..))
import qualified Data.Text as T
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8Builder, encodeUtf8BuilderEscaped)
import Data.String (IsString (fromString))
import Data.ByteString.Builder (Builder)
import qualified Data.ByteString.Builder.Prim as E
import Data.ByteString.Builder.Prim ((>*<), (>$<), condB)
import Data.Monoid (mconcat, mempty, (<>))
import Data.Map (Map)
import qualified Data.Set as Set
import Data.List (foldl')
import Control.Arrow (first)
import Data.Word (Word8)
oneSpace :: Builder
oneSpace = " "
data Token = TokenXMLDeclaration [TAttribute]
           | TokenInstruction Instruction
           | TokenBeginElement TName [TAttribute] Bool Int 
           | TokenEndElement TName
           | TokenContent Content
           | TokenComment Text
           | TokenDoctype Text (Maybe ExternalID) [(Text, Text)]
           | TokenCDATA Text
    deriving Show
tokenToBuilder :: Token -> Builder
tokenToBuilder (TokenXMLDeclaration attrs) =
    "<?xml" <>
    foldAttrs oneSpace attrs <>
    "?>"
tokenToBuilder (TokenInstruction (Instruction target data_)) =
    "<?" <>
    encodeUtf8Builder target <>
    " " <>
    encodeUtf8Builder data_ <>
    "?>"
tokenToBuilder (TokenBeginElement name attrs' isEmpty indent) =
    "<" <>
    tnameToText name <>
    foldAttrs
        (if indent == 0 || lessThan3 attrs
            then oneSpace
            else mconcat $ ("\n" : replicate indent " "))
        attrs <>
    (if isEmpty then "/>" else ">")
  where
    attrs = nubAttrs $ map (first splitTName) attrs'
    lessThan3 [] = True
    lessThan3 [_] = True
    lessThan3 [_, _] = True
    lessThan3 _ = False
tokenToBuilder (TokenEndElement name) = "</" <> tnameToText name <> ">"
tokenToBuilder (TokenContent c) = contentToText c
tokenToBuilder (TokenCDATA t) = "<![CDATA[" <> escCDATA t <> "]]>"
tokenToBuilder (TokenComment t) = "<!--" <> encodeUtf8Builder t <> "-->"
tokenToBuilder (TokenDoctype name eid _) =
    "<!DOCTYPE " <>
    encodeUtf8Builder name <>
    go eid <>
    ">"
  where
    go Nothing = mempty
    go (Just (SystemID uri)) = " SYSTEM \"" <> encodeUtf8Builder uri <> "\""
    go (Just (PublicID pid uri)) =
        " PUBLIC \"" <>
        encodeUtf8Builder pid <>
        "\" \"" <>
        encodeUtf8Builder uri <>
        "\""
data TName = TName (Maybe Text) Text
    deriving (Show, Eq, Ord)
tnameToText :: TName -> Builder
tnameToText (TName Nothing name) = encodeUtf8Builder name
tnameToText (TName (Just prefix) name) =
  encodeUtf8Builder prefix <> ":" <> encodeUtf8Builder name
contentToText :: Content -> Builder
contentToText (ContentText t) = encodeUtf8BuilderEscaped charUtf8XmlEscaped t
contentToText (ContentEntity e) = "&" <> encodeUtf8Builder e <> ";"
{-# INLINE charUtf8XmlEscaped #-}
charUtf8XmlEscaped :: E.BoundedPrim Word8
charUtf8XmlEscaped =
    condB (>  _gt) (E.liftFixedToBounded E.word8) $
    condB (== _lt) (fixed4 (_am,(_l,(_t,_sc)))) $       
    condB (== _gt) (fixed4 (_am,(_g,(_t,_sc)))) $       
    condB (== _am) (fixed5 (_am,(_a,(_m,(_p,_sc))))) $  
    condB (== _dq) (fixed5 (_am,(_ha,(_3,(_4,_sc))))) $ 
    condB (== _sq) (fixed5 (_am,(_ha,(_3,(_9,_sc))))) $ 
    (E.liftFixedToBounded E.word8)         
  where
    _gt = 62 
    _lt = 60 
    _am = 38 
    _dq = 34 
    _sq = 39 
    _l  = 108 
    _t  = 116 
    _g  = 103 
    _a  = 97  
    _m  = 109 
    _p  = 112 
    _3  = 51  
    _4  = 52  
    _ha = 35  
    _9  = 57  
    _sc = 59  
    {-# INLINE fixed4 #-}
    fixed4 x = E.liftFixedToBounded $ const x >$<
      E.word8 >*< E.word8 >*< E.word8 >*< E.word8
    {-# INLINE fixed5 #-}
    fixed5 x = E.liftFixedToBounded $ const x >$<
      E.word8 >*< E.word8 >*< E.word8 >*< E.word8 >*< E.word8
type TAttribute = (TName, [Content])
foldAttrs :: Builder 
          -> [TAttribute]
          -> Builder
foldAttrs before =
    foldMap go
  where
    go (key, val) =
      before <>
      tnameToText key <>
      "=\"" <>
      foldMap go' val <>
      "\""
    go' (ContentText t) =
      encodeUtf8BuilderEscaped charUtf8XmlEscaped t
    go' (ContentEntity t) = "&" <> encodeUtf8Builder t <> ";"
instance IsString TName where
    fromString = TName Nothing . T.pack
data NSLevel = NSLevel
    { defaultNS :: Maybe Text
    , prefixes :: Map Text Text
    }
    deriving Show
nubAttrs :: [TAttribute] -> [TAttribute]
nubAttrs orig =
    front []
  where
    (front, _) = foldl' go (id, Set.empty) orig
    go (dlist, used) (k, v)
        | k `Set.member` used = (dlist, used)
        | otherwise = (dlist . ((k, v):), Set.insert k used)
splitTName :: TName -> TName
splitTName x@(TName Just{} _) = x
splitTName x@(TName Nothing t)
    | T.null b = x
    | otherwise = TName (Just a) $ T.drop 1 b
  where
    (a, b) = T.break (== ':') t
escCDATA :: Text -> Builder
escCDATA s = encodeUtf8Builder (T.replace "]]>" "]]]]><![CDATA[>" s)