{-# LANGUAGE DeriveDataTypeable #-} -- | DOM-based parsing and rendering. -- -- This module requires that all entities be resolved at parsing. If you need -- to interact with unresolved entities, please use "Text.XML.Unresolved". This -- is the recommended module for most uses cases. -- -- While many of the datatypes in this module are simply re-exported from -- @Data.XML.Types@, 'Document', 'Node' and 'Element' are all redefined here to -- disallow the possibility of unresolved entities. Conversion functions are -- provided to switch between the two sets of datatypes. -- -- For simpler, bidirectional traversal of the DOM tree, see the -- "Text.XML.Cursor" module. module Text.XML ( -- * Data types Document (..) , Prologue (..) , Instruction (..) , Miscellaneous (..) , Node (..) , Element (..) , Name (..) , Doctype (..) , ExternalID (..) -- * Parsing , readFile , readFile_ , parseLBS , parseLBS_ , parseEnum , parseEnum_ , fromEvents , UnresolvedEntityException (..) -- * Rendering , writeFile , renderLBS , renderText , renderBytes -- * Settings , def -- ** Parsing , ParseSettings , psDecodeEntities -- ** Rendering , R.RenderSettings , R.rsPretty -- * Conversion , toXMLDocument , fromXMLDocument , toXMLNode , fromXMLNode , toXMLElement , fromXMLElement ) where import qualified Data.XML.Types as X import Data.XML.Types ( Prologue (..) , Miscellaneous (..) , Instruction (..) , Name (..) , Doctype (..) , ExternalID (..) ) import Data.Typeable (Typeable) import Data.Text (Text) import qualified Text.XML.Stream.Parse as P import qualified Text.XML.Unresolved as D import qualified Text.XML.Stream.Render as R import qualified Data.Text as T import Data.Either (partitionEithers) import Prelude hiding (readFile, writeFile) import Control.Exception (SomeException, Exception) import Data.Enumerator.Binary (enumFile, iterHandle) import Control.Monad.IO.Class (MonadIO) import Text.XML.Stream.Parse (ParseSettings, def, psDecodeEntities) import Data.Enumerator ( Enumerator, Iteratee, throwError, ($$), run, run_, joinI, enumList , joinE ) import Data.ByteString (ByteString) import qualified Data.ByteString.Lazy as L import Data.Functor.Identity (runIdentity) import qualified System.IO as SIO import System.IO.Unsafe (unsafePerformIO) import Text.XML.Unresolved (lazyConsume) import qualified Data.Set as Set import Data.Set (Set) import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TLE data Document = Document { documentPrologue :: Prologue , documentRoot :: Element , documentEpilogue :: [Miscellaneous] } deriving (Show, Eq, Typeable) data Node = NodeElement Element | NodeInstruction Instruction | NodeContent Text | NodeComment Text deriving (Show, Eq, Typeable) data Element = Element { elementName :: Name , elementAttributes :: [(Name, Text)] , elementNodes :: [Node] } deriving (Show, Eq, Typeable) {- readFile :: FilePath -> ParseSettings -> IO (Either SomeException Document) readFile_ :: FIlePath -> ParseSettings -> IO Document -} toXMLDocument :: Document -> X.Document toXMLDocument (Document a b c) = X.Document a (toXMLElement b) c toXMLElement :: Element -> X.Element toXMLElement (Element name as nodes) = X.Element name as' nodes' where as' = map (\(x, y) -> (x, [X.ContentText y])) as nodes' = map toXMLNode nodes toXMLNode :: Node -> X.Node toXMLNode (NodeElement e) = X.NodeElement $ toXMLElement e toXMLNode (NodeContent t) = X.NodeContent $ X.ContentText t toXMLNode (NodeComment c) = X.NodeComment c toXMLNode (NodeInstruction i) = X.NodeInstruction i fromXMLDocument :: X.Document -> Either (Set Text) Document fromXMLDocument (X.Document a b c) = case fromXMLElement b of Left es -> Left es Right b' -> Right $ Document a b' c fromXMLElement :: X.Element -> Either (Set Text) Element fromXMLElement (X.Element name as nodes) = case (lnodes, las) of ([], []) -> Right $ Element name ras rnodes (x, []) -> Left $ Set.unions x ([], y) -> Left $ Set.unions y (x, y) -> Left $ Set.unions x `Set.union` Set.unions y where enodes = map fromXMLNode nodes (lnodes, rnodes) = partitionEithers enodes eas = map go as (las, ras) = partitionEithers eas go (x, y) = case go' [] id y of Left es -> Left es Right y' -> Right (x, y') go' [] front [] = Right $ T.concat $ front [] go' errs _ [] = Left $ Set.fromList errs go' errs front (X.ContentText t:ys) = go' errs (front . (:) t) ys go' errs front (X.ContentEntity t:ys) = go' (t : errs) front ys fromXMLNode :: X.Node -> Either (Set Text) Node fromXMLNode (X.NodeElement e) = either Left (Right . NodeElement) $ fromXMLElement e fromXMLNode (X.NodeContent (X.ContentText t)) = Right $ NodeContent t fromXMLNode (X.NodeContent (X.ContentEntity t)) = Left $ Set.singleton t fromXMLNode (X.NodeComment c) = Right $ NodeComment c fromXMLNode (X.NodeInstruction i) = Right $ NodeInstruction i readFile :: ParseSettings -> FilePath -> IO (Either SomeException Document) readFile ps fn = parseEnum ps $ enumFile fn readFile_ :: ParseSettings -> FilePath -> IO Document readFile_ ps fn = parseEnum_ ps $ enumFile fn lbsEnum :: Monad m => L.ByteString -> Enumerator ByteString m a lbsEnum = enumList 8 . L.toChunks parseLBS :: ParseSettings -> L.ByteString -> Either SomeException Document parseLBS ps = runIdentity . parseEnum ps . lbsEnum parseLBS_ :: ParseSettings -> L.ByteString -> Document parseLBS_ ps = runIdentity . parseEnum_ ps . lbsEnum parseEnum :: Monad m => ParseSettings -> Enumerator ByteString m Document -> m (Either SomeException Document) parseEnum de enum = run $ enum $$ joinI $ P.parseBytes de $$ fromEvents parseEnum_ :: Monad m => ParseSettings -> Enumerator ByteString m Document -> m Document parseEnum_ de enum = run_ $ enum $$ joinI $ P.parseBytes de $$ fromEvents fromEvents :: Monad m => Iteratee X.Event m Document fromEvents = do d <- D.fromEvents either (throwError . UnresolvedEntityException) return $ fromXMLDocument d data UnresolvedEntityException = UnresolvedEntityException (Set Text) deriving (Show, Typeable) instance Exception UnresolvedEntityException renderBytes :: MonadIO m => R.RenderSettings -> Document -> Enumerator ByteString m a renderBytes rs doc = enumList 8 (D.toEvents $ toXMLDocument doc) `joinE` R.renderBytes rs writeFile :: R.RenderSettings -> FilePath -> Document -> IO () writeFile rs fn doc = SIO.withBinaryFile fn SIO.WriteMode $ \h -> run_ $ renderBytes rs doc $$ iterHandle h renderLBS :: R.RenderSettings -> Document -> L.ByteString renderLBS rs doc = L.fromChunks $ unsafePerformIO $ lazyConsume $ renderBytes rs doc renderText :: R.RenderSettings -> Document -> TL.Text renderText rs = TLE.decodeUtf8 . renderLBS rs