module Text.XML.Enumerator.Resolved
(
Document (..)
, Prologue (..)
, Instruction (..)
, Miscellaneous (..)
, Node (..)
, Element (..)
, Name (..)
, Doctype (..)
, ExternalID (..)
, DecodeEntities
, decodeEntities
, readFile
, readFile_
, parseLBS
, parseLBS_
, parseEnum
, parseEnum_
, fromEvents
, UnresolvedEntityException (..)
, writeFile
, writePrettyFile
, renderLBS
, prettyLBS
, renderText
, prettyText
, renderBytes
, prettyBytes
, toXMLDocument
, fromXMLDocument
, toXMLNode
, fromXMLNode
, toXMLElement
, fromXMLElement
) where
import qualified Data.XML.Types as X
import Data.XML.Types
( Prologue (..)
, Miscellaneous (..)
, Instruction (..)
, Name (..)
, Doctype (..)
, ExternalID (..)
)
import Data.Typeable (Typeable)
import Data.Text (Text)
import Text.XML.Enumerator.Parse (DecodeEntities, decodeEntities)
import qualified Text.XML.Enumerator.Parse as P
import qualified Text.XML.Enumerator.Document as D
import qualified Text.XML.Enumerator.Render as R
import qualified Data.Text as T
import Data.Either (partitionEithers)
import Prelude hiding (readFile, writeFile)
import Control.Exception (SomeException, Exception)
import Data.Enumerator.Binary (enumFile, iterHandle)
import Control.Monad.IO.Class (MonadIO)
import Data.Enumerator
( Enumerator, Iteratee, throwError, ($$), run, run_, joinI, enumList
, joinE
)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
import Data.Functor.Identity (runIdentity)
import qualified System.IO as SIO
import System.IO.Unsafe (unsafePerformIO)
import Text.XML.Enumerator.Document (lazyConsume)
import qualified Data.Set as Set
import Data.Set (Set)
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
data Document = Document
{ documentPrologue :: Prologue
, documentRoot :: Element
, documentEpilogue :: [Miscellaneous]
}
deriving (Show, Eq, Typeable)
data Node
= NodeElement Element
| NodeInstruction Instruction
| NodeContent Text
| NodeComment Text
deriving (Show, Eq, Typeable)
data Element = Element
{ elementName :: Name
, elementAttributes :: [(Name, Text)]
, elementNodes :: [Node]
}
deriving (Show, Eq, Typeable)
toXMLDocument :: Document -> X.Document
toXMLDocument (Document a b c) = X.Document a (toXMLElement b) c
toXMLElement :: Element -> X.Element
toXMLElement (Element name as nodes) =
X.Element name as' nodes'
where
as' = map (\(x, y) -> (x, [X.ContentText y])) as
nodes' = map toXMLNode nodes
toXMLNode :: Node -> X.Node
toXMLNode (NodeElement e) = X.NodeElement $ toXMLElement e
toXMLNode (NodeContent t) = X.NodeContent $ X.ContentText t
toXMLNode (NodeComment c) = X.NodeComment c
toXMLNode (NodeInstruction i) = X.NodeInstruction i
fromXMLDocument :: X.Document -> Either (Set Text) Document
fromXMLDocument (X.Document a b c) =
case fromXMLElement b of
Left es -> Left es
Right b' -> Right $ Document a b' c
fromXMLElement :: X.Element -> Either (Set Text) Element
fromXMLElement (X.Element name as nodes) =
case (lnodes, las) of
([], []) -> Right $ Element name ras rnodes
(x, []) -> Left $ Set.unions x
([], y) -> Left $ Set.unions y
(x, y) -> Left $ Set.unions x `Set.union` Set.unions y
where
enodes = map fromXMLNode nodes
(lnodes, rnodes) = partitionEithers enodes
eas = map go as
(las, ras) = partitionEithers eas
go (x, y) =
case go' [] id y of
Left es -> Left es
Right y' -> Right (x, y')
go' [] front [] = Right $ T.concat $ front []
go' errs _ [] = Left $ Set.fromList errs
go' errs front (X.ContentText t:ys) = go' errs (front . (:) t) ys
go' errs front (X.ContentEntity t:ys) = go' (t : errs) front ys
fromXMLNode :: X.Node -> Either (Set Text) Node
fromXMLNode (X.NodeElement e) =
either Left (Right . NodeElement) $ fromXMLElement e
fromXMLNode (X.NodeContent (X.ContentText t)) = Right $ NodeContent t
fromXMLNode (X.NodeContent (X.ContentEntity t)) = Left $ Set.singleton t
fromXMLNode (X.NodeComment c) = Right $ NodeComment c
fromXMLNode (X.NodeInstruction i) = Right $ NodeInstruction i
readFile :: FilePath -> DecodeEntities -> IO (Either SomeException Document)
readFile fn = parseEnum $ enumFile fn
readFile_ :: FilePath -> DecodeEntities -> IO Document
readFile_ fn = parseEnum_ $ enumFile fn
lbsEnum :: Monad m => L.ByteString -> Enumerator ByteString m a
lbsEnum = enumList 8 . L.toChunks
parseLBS :: L.ByteString -> DecodeEntities -> Either SomeException Document
parseLBS lbs = runIdentity . parseEnum (lbsEnum lbs)
parseLBS_ :: L.ByteString -> DecodeEntities -> Document
parseLBS_ lbs = runIdentity . parseEnum_ (lbsEnum lbs)
parseEnum :: Monad m
=> Enumerator ByteString m Document
-> DecodeEntities
-> m (Either SomeException Document)
parseEnum enum de = run $ enum $$ joinI $ P.parseBytes de $$ fromEvents
parseEnum_ :: Monad m
=> Enumerator ByteString m Document
-> DecodeEntities
-> m Document
parseEnum_ enum de = run_ $ enum $$ joinI $ P.parseBytes de $$ fromEvents
fromEvents :: Monad m => Iteratee X.Event m Document
fromEvents = do
d <- D.fromEvents
either (throwError . UnresolvedEntityException) return $ fromXMLDocument d
data UnresolvedEntityException = UnresolvedEntityException (Set Text)
deriving (Show, Typeable)
instance Exception UnresolvedEntityException
renderBytes :: MonadIO m => Document -> Enumerator ByteString m a
renderBytes doc = enumList 8 (D.toEvents $ toXMLDocument doc) `joinE` R.renderBytes
prettyBytes :: MonadIO m => Document -> Enumerator ByteString m a
prettyBytes doc = enumList 8 (D.toEvents $ toXMLDocument doc) `joinE` R.prettyBytes
writeFile :: FilePath -> Document -> IO ()
writeFile fn doc = SIO.withBinaryFile fn SIO.WriteMode $ \h ->
run_ $ renderBytes doc $$ iterHandle h
writePrettyFile :: FilePath -> Document -> IO ()
writePrettyFile fn doc = SIO.withBinaryFile fn SIO.WriteMode $ \h ->
run_ $ prettyBytes doc $$ iterHandle h
renderLBS :: Document -> L.ByteString
renderLBS doc =
L.fromChunks $ unsafePerformIO $ lazyConsume $ renderBytes doc
prettyLBS :: Document -> L.ByteString
prettyLBS doc =
L.fromChunks $ unsafePerformIO $ lazyConsume $ prettyBytes doc
renderText :: Document -> TL.Text
renderText = TLE.decodeUtf8 . renderLBS
prettyText :: Document -> TL.Text
prettyText = TLE.decodeUtf8 . prettyLBS