{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
-- | DOM-based XML parsing and rendering.
--
-- In this module, attribute values and content nodes can contain either raw
-- text or entities. In most cases, these can be fully resolved at parsing. If
-- that is the case for your documents, the "Text.XML" module provides
-- simplified datatypes that only contain raw text.
module Text.XML.Unresolved
    ( -- * Non-streaming functions
      writeFile
    , readFile
      -- * Lazy bytestrings
    , renderLBS
    , parseLBS
    , parseLBS_
      -- * Text
    , parseText
    , parseText_
    , sinkTextDoc
      -- * Byte streams
    , sinkDoc
      -- * Streaming functions
    , toEvents
    , fromEvents
    , renderBuilder
    , renderBytes
    , renderText
      -- * Exceptions
    , InvalidEventStream (InvalidEventStream)
      -- * Settings
    , P.def
      -- ** Parse
    , P.ParseSettings
    , P.psDecodeEntities
      -- ** Render
    , R.RenderSettings
    , R.rsPretty
    ) where

import Prelude hiding (writeFile, readFile, FilePath)
import Filesystem.Path.CurrentOS (FilePath, encodeString)
import Data.XML.Types
import Control.Exception (Exception, SomeException)
import Data.Typeable (Typeable)
import Blaze.ByteString.Builder (Builder)
import qualified Text.XML.Stream.Render as R
import qualified Text.XML.Stream.Parse as P
import Text.XML.Stream.Parse (ParseSettings)
import Data.ByteString (ByteString)
import Data.Text (Text)
import Control.Applicative ((<$>), (<*>))
import Control.Monad       (when)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.Char (isSpace)
import qualified Data.ByteString.Lazy as L
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Binary as CB
import Control.Exception (throw)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Resource (MonadUnsafeIO, runExceptionT)
import Control.Monad.ST (runST)
import Data.Conduit.Lazy (lazyConsume)

readFile :: P.ParseSettings -> FilePath -> IO Document
readFile ps fp = C.runResourceT $ P.parseFile ps fp C.$$ fromEvents

sinkDoc :: C.MonadThrow m
        => P.ParseSettings -> C.Sink ByteString m Document
sinkDoc ps = P.parseBytes ps C.=$ fromEvents

writeFile :: R.RenderSettings -> FilePath -> Document -> IO ()
writeFile rs fp doc =
    C.runResourceT $ renderBytes rs doc C.$$ CB.sinkFile (encodeString fp)

renderLBS :: R.RenderSettings -> Document -> L.ByteString
renderLBS rs doc =
    L.fromChunks $ unsafePerformIO
                 -- not generally safe, but we know that runResourceT
                 -- will not deallocate any of the resources being used
                 -- by the process
                 $ lazyConsume
                 $ renderBytes rs doc

parseLBS :: P.ParseSettings -> L.ByteString -> Either SomeException Document
parseLBS ps lbs =
    runST $ runExceptionT
          $ CL.sourceList (L.toChunks lbs) C.$$ sinkDoc ps

parseLBS_ :: P.ParseSettings -> L.ByteString -> Document
parseLBS_ ps lbs = either throw id $ parseLBS ps lbs

data InvalidEventStream = InvalidEventStream String
    deriving (Show, Typeable)
instance Exception InvalidEventStream

renderBuilder :: Monad m => R.RenderSettings -> Document -> C.Source m Builder
renderBuilder rs doc = CL.sourceList (toEvents doc) C.$= R.renderBuilder rs

renderBytes :: MonadUnsafeIO m => R.RenderSettings -> Document -> C.Source m ByteString
renderBytes rs doc = CL.sourceList (toEvents doc) C.$= R.renderBytes rs

renderText :: (C.MonadThrow m, MonadUnsafeIO m) => R.RenderSettings -> Document -> C.Source m Text
renderText rs doc = CL.sourceList (toEvents doc) C.$= R.renderText rs

fromEvents :: C.MonadThrow m => C.Sink Event m Document
fromEvents = do
    skip EventBeginDocument
    d <- Document <$> goP <*> require goE <*> goM
    skip EventEndDocument
    y <- CL.head
    if y == Nothing
        then return d
        else lift $ C.monadThrow $ InvalidEventStream $ "Trailing matter after epilogue: " ++ show y
  where
    skip e = do
        x <- CL.peek
        when (x == Just e) (CL.drop 1)
    many f =
        go id
      where
        go front = do
            x <- f
            case x of
                Nothing -> return $ front []
                Just y -> go (front . (:) y)
    dropReturn x = CL.drop 1 >> return x
    require f = do
        x <- f
        case x of
            Just y -> return y
            Nothing -> do
                y <- CL.head
                lift $ C.monadThrow $ InvalidEventStream $ "Document must have a single root element, got: " ++ show y
    goP = Prologue <$> goM <*> goD <*> goM
    goM = many goM'
    goM' = do
        x <- CL.peek
        case x of
            Just (EventInstruction i) -> dropReturn $ Just $ MiscInstruction i
            Just (EventComment t) -> dropReturn $ Just $ MiscComment t
            Just (EventContent (ContentText t))
                | T.all isSpace t -> CL.drop 1 >> goM'
            _ -> return Nothing
    goD = do
        x <- CL.peek
        case x of
            Just (EventBeginDoctype name meid) -> do
                CL.drop 1
                dropTillDoctype
                return (Just $ Doctype name meid)
            _ -> return Nothing
    dropTillDoctype = do
        x <- CL.head
        case x of
            -- Leaving the following line commented so that the intention of
            -- this function stays clear. I figure in the future xml-types will
            -- be expanded again to support some form of EventDeclaration
            --
            -- Just (EventDeclaration _) -> dropTillDoctype
            Just EventEndDoctype -> return ()
            _ -> lift $ C.monadThrow $ InvalidEventStream $ "Invalid event during doctype, got: " ++ show x
    goE = do
        x <- CL.peek
        case x of
            Just (EventBeginElement n as) -> Just <$> goE' n as
            _ -> return Nothing
    goE' n as = do
        CL.drop 1
        ns <- many goN
        y <- CL.head
        if y == Just (EventEndElement n)
            then return $ Element n as $ compressNodes ns
            else lift $ C.monadThrow $ InvalidEventStream $ "Missing end element for " ++ show n ++ ", got: " ++ show y
    goN = do
        x <- CL.peek
        case x of
            Just (EventBeginElement n as) -> (Just . NodeElement) <$> goE' n as
            Just (EventInstruction i) -> dropReturn $ Just $ NodeInstruction i
            Just (EventContent c) -> dropReturn $ Just $ NodeContent c
            Just (EventComment t) -> dropReturn $ Just $ NodeComment t
            Just (EventCDATA t) -> dropReturn $ Just $ NodeContent $ ContentText t
            _ -> return Nothing

toEvents :: Document -> [Event]
toEvents (Document prol root epi) =
      (EventBeginDocument :)
    . goP prol . goE root . goM epi $ [EventEndDocument]
  where
    goP (Prologue before doctype after) =
        goM before . maybe id goD doctype . goM after
    goM [] = id
    goM [x] = (goM' x :)
    goM (x:xs) = (goM' x :) . goM xs
    goM' (MiscInstruction i) = EventInstruction i
    goM' (MiscComment t) = EventComment t
    goD (Doctype name meid) =
        (:) (EventBeginDoctype name meid)
      . (:) EventEndDoctype
    goE (Element name as ns) =
          (EventBeginElement name as :)
        . goN ns
        . (EventEndElement name :)
    goN [] = id
    goN [x] = goN' x
    goN (x:xs) = goN' x . goN xs
    goN' (NodeElement e) = goE e
    goN' (NodeInstruction i) = (EventInstruction i :)
    goN' (NodeContent c) = (EventContent c :)
    goN' (NodeComment t) = (EventComment t :)

compressNodes :: [Node] -> [Node]
compressNodes [] = []
compressNodes [x] = [x]
compressNodes (NodeContent (ContentText x) : NodeContent (ContentText y) : z) =
    compressNodes $ NodeContent (ContentText $ x `T.append` y) : z
compressNodes (x:xs) = x : compressNodes xs

parseText :: ParseSettings -> TL.Text -> Either SomeException Document
parseText ps tl = runST
                $ runExceptionT
                $ CL.sourceList (TL.toChunks tl)
           C.$$ sinkTextDoc ps

parseText_ :: ParseSettings -> TL.Text -> Document
parseText_ ps = either throw id . parseText ps

sinkTextDoc :: C.MonadThrow m
            => ParseSettings
            -> C.Sink Text m Document
sinkTextDoc ps = P.parseText ps C.=$ fromEvents