{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
-- | DOM-based XML parsing and rendering.
--
-- In this module, attribute values and content nodes can contain either raw
-- text or entities. In most cases, these can be fully resolved at parsing. If
-- that is the case for your documents, the "Text.XML" module provides
-- simplified datatypes that only contain raw text.
module Text.XML.Unresolved
    ( -- * Non-streaming functions
      writeFile
    , readFile
      -- * Lazy bytestrings
    , renderLBS
    , parseLBS
    , parseLBS_
      -- * Text
    , parseText
    , parseText_
    , sinkTextDoc
      -- * Byte streams
    , sinkDoc
      -- * Streaming functions
    , toEvents
    , fromEvents
    , renderBuilder
    , renderBytes
    , renderText
      -- * Exceptions
    , InvalidEventStream (..)
      -- * Settings
    , P.def
      -- ** Parse
    , P.ParseSettings
    , P.psDecodeEntities
      -- ** Render
    , R.RenderSettings
    , R.rsPretty
    , R.rsNamespaces
    ) where

import Prelude hiding (writeFile, readFile, FilePath)
import Filesystem.Path.CurrentOS (FilePath, encodeString)
import Data.XML.Types
import Control.Exception (Exception, SomeException)
import Data.Typeable (Typeable)
import Blaze.ByteString.Builder (Builder)
import qualified Text.XML.Stream.Render as R
import qualified Text.XML.Stream.Parse as P
import Text.XML.Stream.Parse (ParseSettings)
import Data.ByteString (ByteString)
import Data.Text (Text)
import Control.Applicative ((<$>), (<*>))
import Control.Monad       (when)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.Char (isSpace)
import qualified Data.ByteString.Lazy as L
import System.IO.Unsafe (unsafePerformIO)
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Binary as CB
import Control.Exception (throw)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Resource (MonadThrow, monadThrow, runExceptionT, runResourceT)
import Control.Monad.ST (runST)
import Data.Conduit.Lazy (lazyConsume)

readFile :: P.ParseSettings -> FilePath -> IO Document
readFile ps fp = runResourceT $ CB.sourceFile (encodeString fp) $$ sinkDoc ps

sinkDoc :: MonadThrow m
        => P.ParseSettings
        -> Consumer ByteString m Document
sinkDoc ps = P.parseBytesPos ps =$= fromEvents

writeFile :: R.RenderSettings -> FilePath -> Document -> IO ()
writeFile rs fp doc =
    runResourceT $ renderBytes rs doc $$ CB.sinkFile (encodeString fp)

renderLBS :: R.RenderSettings -> Document -> L.ByteString
renderLBS rs doc =
    L.fromChunks $ unsafePerformIO
                 -- not generally safe, but we know that runResourceT
                 -- will not deallocate any of the resources being used
                 -- by the process
                 $ lazyConsume
                 $ renderBytes rs doc

parseLBS :: P.ParseSettings -> L.ByteString -> Either SomeException Document
parseLBS ps lbs =
    runST $ runExceptionT
          $ CL.sourceList (L.toChunks lbs) $$ sinkDoc ps

parseLBS_ :: P.ParseSettings -> L.ByteString -> Document
parseLBS_ ps lbs = either throw id $ parseLBS ps lbs

data InvalidEventStream = ContentAfterRoot P.EventPos
                        | MissingRootElement
                        | InvalidInlineDoctype P.EventPos
                        | MissingEndElement Name (Maybe P.EventPos)
                        | UnterminatedInlineDoctype
    deriving Typeable
instance Exception InvalidEventStream
instance Show InvalidEventStream where
    show (ContentAfterRoot (pos, e)) = mShowPos pos ++ "Found content after root element: " ++ prettyShowE e
    show MissingRootElement = "Missing root element"
    show (InvalidInlineDoctype (pos, e)) = mShowPos pos ++ "Invalid content inside doctype: " ++ prettyShowE e
    show (MissingEndElement name Nothing) = "Documented ended while expected end element for: " ++ prettyShowName name
    show (MissingEndElement name (Just (pos, e))) = mShowPos pos ++ "Expected end element for: " ++ prettyShowName name ++ ", but received: " ++ prettyShowE e
    show UnterminatedInlineDoctype = "Unterminated doctype declaration"

mShowPos :: Maybe P.PositionRange -> String
mShowPos Nothing = ""
mShowPos (Just pos) = show pos ++ ": "

prettyShowE :: Event -> String
prettyShowE = show -- FIXME

prettyShowName :: Name -> String
prettyShowName = show -- FIXME

renderBuilder :: Monad m => R.RenderSettings -> Document -> Producer m Builder
renderBuilder rs doc = CL.sourceList (toEvents doc) =$= R.renderBuilder rs

--renderBytes :: MonadUnsafeIO m => R.RenderSettings -> Document -> Producer m ByteString
renderBytes rs doc = CL.sourceList (toEvents doc) =$= R.renderBytes rs

--renderText :: (MonadThrow m, MonadUnsafeIO m) => R.RenderSettings -> Document -> Producer m Text
renderText rs doc = CL.sourceList (toEvents doc) =$= R.renderText rs

fromEvents :: MonadThrow m => Consumer P.EventPos m Document
fromEvents = do
    skip EventBeginDocument
    d <- Document <$> goP <*> require goE <*> goM
    skip EventEndDocument
    y <- CL.head
    case y of
        Nothing -> return d
        Just (_, EventEndDocument) -> lift $ monadThrow MissingRootElement
        Just z ->
            lift $ monadThrow $ ContentAfterRoot z
  where
    skip e = do
        x <- CL.peek
        when (fmap snd x == Just e) (CL.drop 1)
    many f =
        go id
      where
        go front = do
            x <- f
            case x of
                Nothing -> return $ front []
                Just y -> go (front . (:) y)
    dropReturn x = CL.drop 1 >> return x
    require f = do
        x <- f
        case x of
            Just y -> return y
            Nothing -> do
                my <- CL.head
                case my of
                    Nothing -> error "Text.XML.Unresolved:impossible"
                    Just (_, EventEndDocument) -> lift $ monadThrow MissingRootElement
                    Just y -> lift $ monadThrow $ ContentAfterRoot y
    goP = Prologue <$> goM <*> goD <*> goM
    goM = many goM'
    goM' = do
        x <- CL.peek
        case x of
            Just (_, EventInstruction i) -> dropReturn $ Just $ MiscInstruction i
            Just (_, EventComment t) -> dropReturn $ Just $ MiscComment t
            Just (_, EventContent (ContentText t))
                | T.all isSpace t -> CL.drop 1 >> goM'
            _ -> return Nothing
    goD = do
        x <- CL.peek
        case x of
            Just (_, EventBeginDoctype name meid) -> do
                CL.drop 1
                dropTillDoctype
                return (Just $ Doctype name meid)
            _ -> return Nothing
    dropTillDoctype = do
        x <- CL.head
        case x of
            -- Leaving the following line commented so that the intention of
            -- this function stays clear. I figure in the future xml-types will
            -- be expanded again to support some form of EventDeclaration
            --
            -- Just (EventDeclaration _) -> dropTillDoctype
            Just (_, EventEndDoctype) -> return ()
            Just epos -> lift $ monadThrow $ InvalidInlineDoctype epos
            Nothing -> lift $ monadThrow UnterminatedInlineDoctype
    goE = do
        x <- CL.peek
        case x of
            Just (_, EventBeginElement n as) -> Just <$> goE' n as
            _ -> return Nothing
    goE' n as = do
        CL.drop 1
        ns <- many goN
        y <- CL.head
        if fmap snd y == Just (EventEndElement n)
            then return $ Element n as $ compressNodes ns
            else lift $ monadThrow $ MissingEndElement n y
    goN = do
        x <- CL.peek
        case x of
            Just (_, EventBeginElement n as) -> (Just . NodeElement) <$> goE' n as
            Just (_, EventInstruction i) -> dropReturn $ Just $ NodeInstruction i
            Just (_, EventContent c) -> dropReturn $ Just $ NodeContent c
            Just (_, EventComment t) -> dropReturn $ Just $ NodeComment t
            Just (_, EventCDATA t) -> dropReturn $ Just $ NodeContent $ ContentText t
            _ -> return Nothing

toEvents :: Document -> [Event]
toEvents (Document prol root epi) =
      (EventBeginDocument :)
    . goP prol . goE root . goM epi $ [EventEndDocument]
  where
    goP (Prologue before doctype after) =
        goM before . maybe id goD doctype . goM after
    goM [] = id
    goM [x] = (goM' x :)
    goM (x:xs) = (goM' x :) . goM xs
    goM' (MiscInstruction i) = EventInstruction i
    goM' (MiscComment t) = EventComment t
    goD (Doctype name meid) =
        (:) (EventBeginDoctype name meid)
      . (:) EventEndDoctype
    goE (Element name as ns) =
          (EventBeginElement name as :)
        . goN ns
        . (EventEndElement name :)
    goN [] = id
    goN [x] = goN' x
    goN (x:xs) = goN' x . goN xs
    goN' (NodeElement e) = goE e
    goN' (NodeInstruction i) = (EventInstruction i :)
    goN' (NodeContent c) = (EventContent c :)
    goN' (NodeComment t) = (EventComment t :)

compressNodes :: [Node] -> [Node]
compressNodes [] = []
compressNodes [x] = [x]
compressNodes (NodeContent (ContentText x) : NodeContent (ContentText y) : z) =
    compressNodes $ NodeContent (ContentText $ x `T.append` y) : z
compressNodes (x:xs) = x : compressNodes xs

parseText :: ParseSettings -> TL.Text -> Either SomeException Document
parseText ps tl = runST
                $ runExceptionT
                $ CL.sourceList (TL.toChunks tl)
           $$ sinkTextDoc ps

parseText_ :: ParseSettings -> TL.Text -> Document
parseText_ ps = either throw id . parseText ps

sinkTextDoc :: MonadThrow m
            => ParseSettings
            -> Consumer Text m Document
sinkTextDoc ps = P.parseText ps =$= fromEvents