module Text.XML
(
Document (..)
, Prologue (..)
, Instruction (..)
, Miscellaneous (..)
, Node (..)
, Element (..)
, Name (..)
, Doctype (..)
, ExternalID (..)
, readFile
, parseLBS
, parseLBS_
, sinkDoc
, parseText
, parseText_
, sinkTextDoc
, fromEvents
, UnresolvedEntityException (..)
, writeFile
, renderLBS
, renderText
, renderBytes
, def
, ParseSettings
, psDecodeEntities
, R.RenderSettings
, R.rsPretty
, toXMLDocument
, fromXMLDocument
, toXMLNode
, fromXMLNode
, toXMLElement
, fromXMLElement
) where
import qualified Data.XML.Types as X
import Data.XML.Types
( Prologue (..)
, Miscellaneous (..)
, Instruction (..)
, Name (..)
, Doctype (..)
, ExternalID (..)
)
import Data.Typeable (Typeable)
import Data.Text (Text)
import qualified Text.XML.Stream.Parse as P
import qualified Text.XML.Unresolved as D
import qualified Text.XML.Stream.Render as R
import qualified Data.Text as T
import Data.Either (partitionEithers)
import Prelude hiding (readFile, writeFile, FilePath)
import Filesystem.Path.CurrentOS (FilePath, encodeString)
import Control.Exception (SomeException, Exception)
import Text.XML.Stream.Parse (ParseSettings, def, psDecodeEntities)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
import Control.Monad.ST (runST)
import qualified Data.Set as Set
import Data.Set (Set)
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Lazy (lazyConsume)
import System.IO.Unsafe (unsafePerformIO)
import Control.Exception (throw)
import Control.Monad.Trans.Resource (ResourceUnsafeIO, runExceptionT)
import Control.Monad.Trans.Class (lift)
data Document = Document
{ documentPrologue :: Prologue
, documentRoot :: Element
, documentEpilogue :: [Miscellaneous]
}
deriving (Show, Eq, Typeable)
data Node
= NodeElement Element
| NodeInstruction Instruction
| NodeContent Text
| NodeComment Text
deriving (Show, Eq, Ord, Typeable)
data Element = Element
{ elementName :: Name
, elementAttributes :: [(Name, Text)]
, elementNodes :: [Node]
}
deriving (Show, Eq, Ord, Typeable)
toXMLDocument :: Document -> X.Document
toXMLDocument (Document a b c) = X.Document a (toXMLElement b) c
toXMLElement :: Element -> X.Element
toXMLElement (Element name as nodes) =
X.Element name as' nodes'
where
as' = map (\(x, y) -> (x, [X.ContentText y])) as
nodes' = map toXMLNode nodes
toXMLNode :: Node -> X.Node
toXMLNode (NodeElement e) = X.NodeElement $ toXMLElement e
toXMLNode (NodeContent t) = X.NodeContent $ X.ContentText t
toXMLNode (NodeComment c) = X.NodeComment c
toXMLNode (NodeInstruction i) = X.NodeInstruction i
fromXMLDocument :: X.Document -> Either (Set Text) Document
fromXMLDocument (X.Document a b c) =
case fromXMLElement b of
Left es -> Left es
Right b' -> Right $ Document a b' c
fromXMLElement :: X.Element -> Either (Set Text) Element
fromXMLElement (X.Element name as nodes) =
case (lnodes, las) of
([], []) -> Right $ Element name ras rnodes
(x, []) -> Left $ Set.unions x
([], y) -> Left $ Set.unions y
(x, y) -> Left $ Set.unions x `Set.union` Set.unions y
where
enodes = map fromXMLNode nodes
(lnodes, rnodes) = partitionEithers enodes
eas = map go as
(las, ras) = partitionEithers eas
go (x, y) =
case go' [] id y of
Left es -> Left es
Right y' -> Right (x, y')
go' [] front [] = Right $ T.concat $ front []
go' errs _ [] = Left $ Set.fromList errs
go' errs front (X.ContentText t:ys) = go' errs (front . (:) t) ys
go' errs front (X.ContentEntity t:ys) = go' (t : errs) front ys
fromXMLNode :: X.Node -> Either (Set Text) Node
fromXMLNode (X.NodeElement e) =
either Left (Right . NodeElement) $ fromXMLElement e
fromXMLNode (X.NodeContent (X.ContentText t)) = Right $ NodeContent t
fromXMLNode (X.NodeContent (X.ContentEntity t)) = Left $ Set.singleton t
fromXMLNode (X.NodeComment c) = Right $ NodeComment c
fromXMLNode (X.NodeInstruction i) = Right $ NodeInstruction i
readFile :: ParseSettings -> FilePath -> IO Document
readFile ps fp = C.runResourceT $ CB.sourceFile (encodeString fp) C.$$ sinkDoc ps
parseLBS :: ParseSettings -> L.ByteString -> Either SomeException Document
parseLBS ps lbs = runST
$ runExceptionT
$ C.runResourceT
$ CL.sourceList (L.toChunks lbs)
C.$$ sinkDoc ps
parseLBS_ :: ParseSettings -> L.ByteString -> Document
parseLBS_ ps = either throw id . parseLBS ps
sinkDoc :: C.ResourceThrow m
=> ParseSettings
-> C.Sink ByteString m Document
sinkDoc ps = P.parseBytes ps C.=$ fromEvents
parseText :: ParseSettings -> TL.Text -> Either SomeException Document
parseText ps tl = runST
$ runExceptionT
$ C.runResourceT
$ CL.sourceList (TL.toChunks tl)
C.$$ sinkTextDoc ps
parseText_ :: ParseSettings -> TL.Text -> Document
parseText_ ps = either throw id . parseText ps
sinkTextDoc :: C.ResourceThrow m
=> ParseSettings
-> C.Sink Text m Document
sinkTextDoc ps = P.parseText ps C.=$ fromEvents
fromEvents :: C.ResourceThrow m => C.Sink X.Event m Document
fromEvents = do
d <- D.fromEvents
either (lift . C.resourceThrow . UnresolvedEntityException) return $ fromXMLDocument d
data UnresolvedEntityException = UnresolvedEntityException (Set Text)
deriving (Show, Typeable)
instance Exception UnresolvedEntityException
renderBytes :: ResourceUnsafeIO m => R.RenderSettings -> Document -> C.Source m ByteString
renderBytes rs doc = D.renderBytes rs $ toXMLDocument doc
writeFile :: R.RenderSettings -> FilePath -> Document -> IO ()
writeFile rs fp doc =
C.runResourceT $ renderBytes rs doc C.$$ CB.sinkFile (encodeString fp)
renderLBS :: R.RenderSettings -> Document -> L.ByteString
renderLBS rs doc =
L.fromChunks $ unsafePerformIO
$ C.runResourceT
$ lazyConsume
$ renderBytes rs doc
renderText :: R.RenderSettings -> Document -> TL.Text
renderText rs = TLE.decodeUtf8 . renderLBS rs