module Text.XML.Enumerator.Document
(
writeFile
, writePrettyFile
, readFile
, readFile_
, renderLBS
, prettyLBS
, parseLBS
, parseLBS_
, toEvents
, fromEvents
, renderBuilder
, renderBytes
, renderText
, prettyBuilder
, prettyBytes
, prettyText
, InvalidEventStream (InvalidEventStream)
) where
import Prelude hiding (writeFile, readFile)
import Data.XML.Types
import Data.Enumerator
( ($$), enumList, joinE, Enumerator, Iteratee, peek, returnI
, throwError, joinI, run, run_, Step (Continue), Stream (Chunks)
)
import Control.Exception (Exception, SomeException)
import Data.Typeable (Typeable)
import qualified Data.Enumerator.List as EL
import Blaze.ByteString.Builder (Builder)
import Control.Monad.IO.Class (MonadIO)
import qualified Text.XML.Enumerator.Render as R
import qualified Text.XML.Enumerator.Parse as P
import Data.ByteString (ByteString)
import Data.Text (Text)
import Control.Applicative ((<$>), (<*>))
import qualified System.IO as SIO
import Data.Enumerator.Binary (enumFile, iterHandle)
import qualified Data.Text as T
import Data.Char (isSpace)
import qualified Data.ByteString.Lazy as L
import System.IO.Unsafe (unsafePerformIO)
import qualified Control.Concurrent.MVar as M
import System.IO.Unsafe (unsafeInterleaveIO)
import Control.Monad.IO.Class (liftIO)
import Control.Concurrent (forkIO)
import Data.Functor.Identity (runIdentity)
readFile :: FilePath -> P.DecodeEntities -> IO (Either SomeException Document)
readFile fn de = run $ enumFile fn $$ joinI $ P.parseBytes de $$ fromEvents
readFile_ :: FilePath -> P.DecodeEntities -> IO Document
readFile_ fn de = run_ $ enumFile fn $$ joinI $ P.parseBytes de $$ fromEvents
writeFile :: FilePath -> Document -> IO ()
writeFile fn doc = SIO.withBinaryFile fn SIO.WriteMode $ \h ->
run_ $ renderBytes doc $$ iterHandle h
writePrettyFile :: FilePath -> Document -> IO ()
writePrettyFile fn doc = SIO.withBinaryFile fn SIO.WriteMode $ \h ->
run_ $ prettyBytes doc $$ iterHandle h
renderLBS :: Document -> L.ByteString
renderLBS doc =
L.fromChunks $ unsafePerformIO $ lazyConsume $ renderBytes doc
prettyLBS :: Document -> L.ByteString
prettyLBS doc =
L.fromChunks $ unsafePerformIO $ lazyConsume $ prettyBytes doc
parseLBS :: L.ByteString -> P.DecodeEntities -> Either SomeException Document
parseLBS lbs de = runIdentity
$ run $ enumSingle (L.toChunks lbs)
$$ joinI $ P.parseBytes de $$ fromEvents
parseLBS_ :: L.ByteString -> P.DecodeEntities -> Document
parseLBS_ lbs de = runIdentity
$ run_ $ enumSingle (L.toChunks lbs)
$$ joinI $ P.parseBytes de $$ fromEvents
enumSingle :: Monad m => [a] -> Enumerator a m b
enumSingle as (Continue k) = k $ Chunks as
enumSingle _ step = returnI step
lazyConsume :: Enumerator a IO () -> IO [a]
lazyConsume enum = do
toGrabber <- M.newEmptyMVar
toFiller <- M.newMVar True
_ <- forkIO $ run_ $ enum $$ filler toGrabber toFiller
grabber toGrabber toFiller
where
grabber toGrabber toFiller = do
x <- M.takeMVar toGrabber
case x of
Nothing -> return []
Just x' -> do
M.putMVar toFiller True
xs <- unsafeInterleaveIO $ grabber toGrabber toFiller
return $ x' : xs
filler toGrabber toFiller = do
cont <- liftIO $ M.takeMVar toFiller
if cont
then do
x <- EL.head
liftIO $ M.putMVar toGrabber x
case x of
Nothing -> return ()
Just _ -> filler toGrabber toFiller
else liftIO $ M.putMVar toGrabber Nothing
data InvalidEventStream = InvalidEventStream String
deriving (Show, Typeable)
instance Exception InvalidEventStream
prettyBuilder :: MonadIO m => Document -> Enumerator Builder m a
prettyBuilder doc = enumList 8 (toEvents doc) `joinE` R.prettyBuilder
prettyBytes :: MonadIO m => Document -> Enumerator ByteString m a
prettyBytes doc = enumList 8 (toEvents doc) `joinE` R.prettyBytes
prettyText :: MonadIO m => Document -> Enumerator Text m a
prettyText doc = enumList 8 (toEvents doc) `joinE` R.prettyText
renderBuilder :: MonadIO m => Document -> Enumerator Builder m a
renderBuilder doc = enumList 8 (toEvents doc) `joinE` R.renderBuilder
renderBytes :: MonadIO m => Document -> Enumerator ByteString m a
renderBytes doc = enumList 8 (toEvents doc) `joinE` R.renderBytes
renderText :: MonadIO m => Document -> Enumerator Text m a
renderText doc = enumList 8 (toEvents doc) `joinE` R.renderText
fromEvents :: Monad m => Iteratee Event m Document
fromEvents = do
skip EventBeginDocument
d <- Document <$> goP <*> require goE <*> goM
skip EventEndDocument
y <- EL.head
if y == Nothing
then return d
else throwError $ InvalidEventStream $ "Trailing matter after epilogue: " ++ show y
where
skip e = do
x <- peek
if x == Just e then EL.drop 1 else return ()
many f =
go id
where
go front = do
x <- f
case x of
Nothing -> return $ front []
Just y -> go (front . (:) y)
dropReturn x = EL.drop 1 >> return x
require f = do
x <- f
case x of
Just y -> return y
Nothing -> do
y <- EL.head
throwError $ InvalidEventStream $ "Document must have a single root element, got: " ++ show y
goP = Prologue <$> goM <*> goD <*> goM
goM = many goM'
goM' = do
x <- peek
case x of
Just (EventInstruction i) -> dropReturn $ Just $ MiscInstruction i
Just (EventComment t) -> dropReturn $ Just $ MiscComment t
Just (EventContent (ContentText t))
| T.all isSpace t -> EL.drop 1 >> goM'
_ -> return Nothing
goD = do
x <- peek
case x of
Just (EventBeginDoctype name meid) -> do
EL.drop 1
dropTillDoctype
return (Just $ Doctype name meid)
_ -> return Nothing
dropTillDoctype = do
x <- EL.head
case x of
Just EventEndDoctype -> return ()
_ -> throwError $ InvalidEventStream $ "Invalid event during doctype, got: " ++ show x
goE = do
x <- peek
case x of
Just (EventBeginElement n as) -> Just <$> goE' n as
_ -> return Nothing
goE' n as = do
EL.drop 1
ns <- many goN
y <- EL.head
if y == Just (EventEndElement n)
then return $ Element n as $ compressNodes ns
else throwError $ InvalidEventStream $ "Missing end element for " ++ show n ++ ", got: " ++ show y
goN = do
x <- peek
case x of
Just (EventBeginElement n as) -> (Just . NodeElement) <$> goE' n as
Just (EventInstruction i) -> dropReturn $ Just $ NodeInstruction i
Just (EventContent c) -> dropReturn $ Just $ NodeContent c
Just (EventComment t) -> dropReturn $ Just $ NodeComment t
Just (EventCDATA t) -> dropReturn $ Just $ NodeContent $ ContentText t
_ -> return Nothing
toEvents :: Document -> [Event]
toEvents (Document prol root epi) =
(EventBeginDocument :)
. goP prol . goE root . goM epi $ [EventEndDocument]
where
goP (Prologue before doctype after) =
goM before . maybe id goD doctype . goM after
goM [] = id
goM [x] = (goM' x :)
goM (x:xs) = (goM' x :) . goM xs
goM' (MiscInstruction i) = EventInstruction i
goM' (MiscComment t) = EventComment t
goD (Doctype name meid) =
(:) (EventBeginDoctype name meid)
. (:) EventEndDoctype
goE (Element name as ns) =
(EventBeginElement name as :)
. goN ns
. (EventEndElement name :)
goN [] = id
goN [x] = goN' x
goN (x:xs) = goN' x . goN xs
goN' (NodeElement e) = goE e
goN' (NodeInstruction i) = (EventInstruction i :)
goN' (NodeContent c) = (EventContent c :)
goN' (NodeComment t) = (EventComment t :)
compressNodes :: [Node] -> [Node]
compressNodes [] = []
compressNodes [x] = [x]
compressNodes (NodeContent (ContentText x) : NodeContent (ContentText y) : z) =
compressNodes $ NodeContent (ContentText $ x `T.append` y) : z
compressNodes (x:xs) = x : compressNodes xs