module Text.XML.Expat.Format (
format,
format',
formatNode,
formatNode',
formatTree,
formatTree',
xmlHeader,
treeToSAX,
formatSAX,
formatSAX',
indent,
indent_
) where
import Text.XML.Expat.Tree
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Internal (c2w, w2c)
import Data.Char (isSpace)
import Data.List
import Data.Word
formatTree :: (GenericXMLString tag, GenericXMLString text) =>
Node tag text
-> L.ByteString
formatTree = format
format :: (GenericXMLString tag, GenericXMLString text) =>
Node tag text
-> L.ByteString
format node = xmlHeader `L.append` formatNode node
formatTree' :: (GenericXMLString tag, GenericXMLString text) =>
Node tag text
-> B.ByteString
formatTree' = B.concat . L.toChunks . formatTree
format' :: (GenericXMLString tag, GenericXMLString text) =>
Node tag text
-> B.ByteString
format' = B.concat . L.toChunks . formatTree
formatNode :: (GenericXMLString tag, GenericXMLString text) =>
Node tag text
-> L.ByteString
formatNode = formatSAX . treeToSAX
formatNode' :: (GenericXMLString tag, GenericXMLString text) =>
Node tag text
-> B.ByteString
formatNode' = B.concat . L.toChunks . formatNode
xmlHeader :: L.ByteString
xmlHeader = L.pack $ map c2w "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
treeToSAX :: Node tag text -> [SAXEvent tag text]
treeToSAX (Element name atts children) =
StartElement name atts : concatMap treeToSAX children ++ [EndElement name]
treeToSAX (Text txt) = [CharacterData txt]
formatSAX :: (GenericXMLString tag, GenericXMLString text) =>
[SAXEvent tag text]
-> L.ByteString
formatSAX = L.fromChunks . putSAX
formatSAX' :: (GenericXMLString tag, GenericXMLString text) =>
[SAXEvent tag text]
-> B.ByteString
formatSAX' = B.concat . L.toChunks . formatSAX
startTagHelper :: (GenericXMLString tag, GenericXMLString text) =>
tag
-> [(tag, text)]
-> [B.ByteString]
startTagHelper name atts =
B.singleton (c2w '<'):
gxToByteString name:
concatMap (
\(aname, avalue) ->
B.singleton (c2w ' '):
gxToByteString aname:
pack "=\"":
escapeText (gxToByteString avalue)++
[B.singleton (c2w '"')]
) atts
putSAX :: (GenericXMLString tag, GenericXMLString text) =>
[SAXEvent tag text]
-> [B.ByteString]
putSAX (StartElement name attrs:EndElement _:elts) =
B.concat (startTagHelper name attrs ++ [pack "/>"]):putSAX elts
putSAX (StartElement name attrs:elts) =
B.concat (startTagHelper name attrs ++ [B.singleton (c2w '>')]):putSAX elts
putSAX (EndElement name:elts) =
B.concat [pack "</", gxToByteString name, B.singleton (c2w '>')]:putSAX elts
putSAX (CharacterData txt:elts) =
B.concat (escapeText (gxToByteString txt)):putSAX elts
putSAX (FailDocument _:elts) = putSAX elts
putSAX [] = []
pack :: String -> B.ByteString
pack = B.pack . map c2w
escapees :: [Word8]
escapees = map c2w "&<\"'"
escapeText :: B.ByteString -> [B.ByteString]
escapeText str | B.null str = []
escapeText str =
let (good, bad) = B.span (`notElem` escapees) str
in if B.null good
then case w2c $ B.head str of
'&' -> pack "&":escapeText rema
'<' -> pack "<":escapeText rema
'"' -> pack """:escapeText rema
'\'' -> pack "'":escapeText rema
_ -> error "hexpat: impossible"
else good:escapeText bad
where
rema = B.tail str
indent :: (GenericXMLString tag, GenericXMLString text) =>
Int
-> Node tag text
-> Node tag text
indent = indent_ 0
indent_ :: (GenericXMLString tag, GenericXMLString text) =>
Int
-> Int
-> Node tag text
-> Node tag text
indent_ _ _ t@(Text _) = t
indent_ cur perLevel elt@(Element name attrs chs) =
if any isElement chs
then Element name attrs $
let (_, chs') = mapAccumL (\startOfText ch -> case ch of
Element _ _ _ ->
let cur' = cur + perLevel
in (
True,
[
Text (gxFromString ('\n':replicate cur' ' ')),
indent_ cur' perLevel ch
]
)
Text t | startOfText ->
case strip t of
Nothing -> (True, [])
Just t' -> (False, [Text t'])
Text _ -> (False, [ch])
) True chs
in concat chs' ++ [Text $ gxFromString ('\n':replicate cur ' ')]
else elt
where
strip t | gxNullString t = Nothing
strip t | isSpace (gxHead t) = strip (gxTail t)
strip t = Just t