-- hexpat, a Haskell wrapper for expat -- Copyright (C) 2008 Evan Martin -- Copyright (C) 2009 Stephen Blackheath -- | This module provides functions to format a tree -- structure or SAX stream as UTF-8 encoded XML. {-# LANGUAGE FlexibleContexts #-} module Text.XML.Expat.Format ( -- * High level format, format', formatNode, formatNode', -- * Deprecated names formatTree, formatTree', -- * Low level xmlHeader, treeToSAX, formatSAX, formatSAX' ) where import Text.XML.Expat.IO import Text.XML.Expat.Tree import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import Data.ByteString.Internal (c2w, w2c) import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Data.Word -- | DEPRECATED: Renamed to 'format'. formatTree :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> L.ByteString formatTree = format -- | Format document with Node tag text -> L.ByteString format node = xmlHeader `L.append` formatNode node -- | DEPRECATED: Renamed to 'format''. formatTree' :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> B.ByteString formatTree' = B.concat . L.toChunks . formatTree -- | Format document with Node tag text -> B.ByteString format' = B.concat . L.toChunks . formatTree -- | Format XML node with no header - lazy variant that returns lazy ByteString. formatNode :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> L.ByteString formatNode = formatSAX . treeToSAX -- | Format XML node with no header - strict variant that returns strict ByteString. formatNode' :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> B.ByteString formatNode' = B.concat . L.toChunks . formatNode xmlHeader :: L.ByteString xmlHeader = L.pack $ map c2w "\n" -- | Flatten a tree structure into SAX events. treeToSAX :: Node tag text -> [SAXEvent tag text] treeToSAX (Element name atts children) = StartElement name atts : concatMap treeToSAX children ++ [EndElement name] treeToSAX (Text txt) = [CharacterData txt] -- | Format SAX events with no header - lazy variant that returns lazy ByteString. formatSAX :: (GenericXMLString tag, GenericXMLString text) => [SAXEvent tag text] -> L.ByteString formatSAX = L.fromChunks . putSAX -- | Format SAX events with no header - strict variant that returns strict ByteString. formatSAX' :: (GenericXMLString tag, GenericXMLString text) => [SAXEvent tag text] -> B.ByteString formatSAX' = B.concat . L.toChunks . formatSAX -- Do start tag and attributes but omit closing > startTagHelper :: (GenericXMLString tag, GenericXMLString text) => tag -> [(tag, text)] -> [B.ByteString] startTagHelper name atts = B.singleton (c2w '<'): gxToByteString name: concatMap ( \(aname, avalue) -> B.singleton (c2w ' '): gxToByteString aname: pack "=\"": escapeText (gxToByteString avalue)++ [B.singleton (c2w '"')] ) atts putSAX :: (GenericXMLString tag, GenericXMLString text) => [SAXEvent tag text] -> [B.ByteString] putSAX (StartElement name attrs:EndElement name2:elts) = B.concat (startTagHelper name attrs ++ [pack "/>"]):putSAX elts putSAX (StartElement name attrs:elts) = B.concat (startTagHelper name attrs ++ [B.singleton (c2w '>')]):putSAX elts putSAX (EndElement name:elts) = B.concat [pack "')]:putSAX elts putSAX (CharacterData txt:elts) = B.concat (escapeText (gxToByteString txt)):putSAX elts putSAX (FailDocument _:elts) = putSAX elts putSAX [] = [] pack :: String -> B.ByteString pack = B.pack . map c2w escapees :: [Word8] escapees = map c2w "&<\"'" escapeText :: B.ByteString -> [B.ByteString] escapeText str | B.null str = [] escapeText str = let (good, bad) = B.span (`notElem` escapees) str in if B.null good then case w2c $ B.head str of '&' -> pack "&":escapeText rem '<' -> pack "<":escapeText rem '"' -> pack """:escapeText rem '\'' -> pack "'":escapeText rem _ -> error "hexpat: impossible" else good:escapeText bad where rem = B.tail str