{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts   #-}
{-# LANGUAGE RankNTypes         #-}
-- | DOM-based XML parsing and rendering.
--
-- In this module, attribute values and content nodes can contain either raw
-- text or entities. In most cases, these can be fully resolved at parsing. If
-- that is the case for your documents, the "Text.XML" module provides
-- simplified datatypes that only contain raw text.
module Text.XML.Unresolved
    ( -- * Non-streaming functions
      writeFile
    , readFile
      -- * Lazy bytestrings
    , renderLBS
    , parseLBS
    , parseLBS_
      -- * Text
    , parseText
    , parseText_
    , sinkTextDoc
      -- * Byte streams
    , sinkDoc
      -- * Streaming functions
    , toEvents
    , elementToEvents
    , fromEvents
    , elementFromEvents
    , renderBuilder
    , renderBytes
    , renderText
      -- * Exceptions
    , InvalidEventStream (..)
      -- * Settings
    , P.def
      -- ** Parse
    , P.ParseSettings
    , P.psDecodeEntities
    , P.psRetainNamespaces
      -- ** Render
    , R.RenderSettings
    , R.rsPretty
    , R.rsNamespaces
    ) where

import           Blaze.ByteString.Builder     (Builder)
import           Control.Applicative          ((<$>), (<*>))
import           Control.Exception            (Exception, SomeException, throw)
import           Control.Monad                (when)
import           Control.Monad.ST             (runST)
import           Control.Monad.Trans.Class    (lift)
import           Control.Monad.Trans.Resource (MonadThrow, monadThrow,
                                               runExceptionT, runResourceT)
import           Data.ByteString              (ByteString)
import qualified Data.ByteString.Lazy         as L
import           Data.Char                    (isSpace)
import           Data.Conduit
import qualified Data.Conduit.Binary          as CB
import           Data.Conduit.Lazy            (lazyConsume)
import qualified Data.Conduit.List            as CL
import           Data.Maybe                   (isJust, mapMaybe)
import           Data.Monoid                  (mconcat)
import           Data.Text                    (Text)
import qualified Data.Text                    as T
import qualified Data.Text.Lazy               as TL
import           Data.Typeable                (Typeable)
import           Data.XML.Types
import           Prelude                      hiding (readFile, writeFile)
import           System.IO.Unsafe             (unsafePerformIO)
import           Text.XML.Stream.Parse        (ParseSettings)
import qualified Text.XML.Stream.Parse        as P
import qualified Text.XML.Stream.Render       as R

readFile :: P.ParseSettings -> FilePath -> IO Document
readFile ps fp = runResourceT $ CB.sourceFile fp $$ sinkDoc ps

sinkDoc :: MonadThrow m
        => P.ParseSettings
        -> Consumer ByteString m Document
sinkDoc ps = P.parseBytesPos ps =$= fromEvents

writeFile :: R.RenderSettings -> FilePath -> Document -> IO ()
writeFile rs fp doc =
    runResourceT $ renderBytes rs doc $$ CB.sinkFile fp

renderLBS :: R.RenderSettings -> Document -> L.ByteString
renderLBS rs doc =
    L.fromChunks $ unsafePerformIO
                 -- not generally safe, but we know that runResourceT
                 -- will not deallocate any of the resources being used
                 -- by the process
                 $ lazyConsume
                 $ renderBytes rs doc

parseLBS :: P.ParseSettings -> L.ByteString -> Either SomeException Document
parseLBS ps lbs =
    runST $ runExceptionT
          $ CL.sourceList (L.toChunks lbs) $$ sinkDoc ps

parseLBS_ :: P.ParseSettings -> L.ByteString -> Document
parseLBS_ ps lbs = either throw id $ parseLBS ps lbs

data InvalidEventStream = ContentAfterRoot P.EventPos
                        | MissingRootElement
                        | InvalidInlineDoctype P.EventPos
                        | MissingEndElement Name (Maybe P.EventPos)
                        | UnterminatedInlineDoctype
    deriving Typeable
instance Exception InvalidEventStream
instance Show InvalidEventStream where
    show (ContentAfterRoot (pos, e)) = mShowPos pos ++ "Found content after root element: " ++ prettyShowE e
    show MissingRootElement = "Missing root element"
    show (InvalidInlineDoctype (pos, e)) = mShowPos pos ++ "Invalid content inside doctype: " ++ prettyShowE e
    show (MissingEndElement name Nothing) = "Documented ended while expected end element for: " ++ prettyShowName name
    show (MissingEndElement name (Just (pos, e))) = mShowPos pos ++ "Expected end element for: " ++ prettyShowName name ++ ", but received: " ++ prettyShowE e
    show UnterminatedInlineDoctype = "Unterminated doctype declaration"

mShowPos :: Maybe P.PositionRange -> String
mShowPos Nothing    = ""
mShowPos (Just pos) = show pos ++ ": "

prettyShowE :: Event -> String
prettyShowE = show -- FIXME

prettyShowName :: Name -> String
prettyShowName = show -- FIXME

renderBuilder :: Monad m => R.RenderSettings -> Document -> Producer m Builder
renderBuilder rs doc = CL.sourceList (toEvents doc) =$= R.renderBuilder rs

--renderBytes :: MonadUnsafeIO m => R.RenderSettings -> Document -> Producer m ByteString
renderBytes rs doc = CL.sourceList (toEvents doc) =$= R.renderBytes rs

--renderText :: (MonadThrow m, MonadUnsafeIO m) => R.RenderSettings -> Document -> Producer m Text
renderText rs doc = CL.sourceList (toEvents doc) =$= R.renderText rs

manyTries :: Monad m => m (Maybe a) -> m [a]
manyTries f =
    go id
  where
    go front = do
        x <- f
        case x of
            Nothing -> return $ front []
            Just y  -> go (front . (:) y)

dropReturn :: Monad m => a -> ConduitM i o m a
dropReturn x = CL.drop 1 >> return x

-- | Parse a document from a stream of events.
fromEvents :: MonadThrow m => Consumer P.EventPos m Document
fromEvents = do
    skip EventBeginDocument
    d <- Document <$> goP <*> require elementFromEvents <*> goM
    skip EventEndDocument
    y <- CL.head
    case y of
        Nothing -> return d
        Just (_, EventEndDocument) -> lift $ monadThrow MissingRootElement
        Just z ->
            lift $ monadThrow $ ContentAfterRoot z
  where
    skip e = do
        x <- CL.peek
        when (fmap snd x == Just e) (CL.drop 1)
    require f = do
        x <- f
        case x of
            Just y -> return y
            Nothing -> do
                my <- CL.head
                case my of
                    Nothing -> error "Text.XML.Unresolved:impossible"
                    Just (_, EventEndDocument) -> lift $ monadThrow MissingRootElement
                    Just y -> lift $ monadThrow $ ContentAfterRoot y
    goP = Prologue <$> goM <*> goD <*> goM
    goM = manyTries goM'
    goM' = do
        x <- CL.peek
        case x of
            Just (_, EventInstruction i) -> dropReturn $ Just $ MiscInstruction i
            Just (_, EventComment t) -> dropReturn $ Just $ MiscComment t
            Just (_, EventContent (ContentText t))
                | T.all isSpace t -> CL.drop 1 >> goM'
            _ -> return Nothing
    goD = do
        x <- CL.peek
        case x of
            Just (_, EventBeginDoctype name meid) -> do
                CL.drop 1
                dropTillDoctype
                return (Just $ Doctype name meid)
            _ -> return Nothing
    dropTillDoctype = do
        x <- CL.head
        case x of
            -- Leaving the following line commented so that the intention of
            -- this function stays clear. I figure in the future xml-types will
            -- be expanded again to support some form of EventDeclaration
            --
            -- Just (EventDeclaration _) -> dropTillDoctype
            Just (_, EventEndDoctype) -> return ()
            Just epos -> lift $ monadThrow $ InvalidInlineDoctype epos
            Nothing -> lift $ monadThrow UnterminatedInlineDoctype

-- | Try to parse a document element (as defined in XML) from a stream of events.
--
-- @since 1.3.5
elementFromEvents :: MonadThrow m => Consumer P.EventPos m (Maybe Element)
elementFromEvents = goE
  where
    goE = do
        x <- CL.peek
        case x of
            Just (_, EventBeginElement n as) -> Just <$> goE' n as
            _                                -> return Nothing
    goE' n as = do
        CL.drop 1
        ns <- manyTries goN
        y <- CL.head
        if fmap snd y == Just (EventEndElement n)
            then return $ Element n as $ compressNodes ns
            else lift $ monadThrow $ MissingEndElement n y
    goN = do
        x <- CL.peek
        case x of
            Just (_, EventBeginElement n as) -> (Just . NodeElement) <$> goE' n as
            Just (_, EventInstruction i) -> dropReturn $ Just $ NodeInstruction i
            Just (_, EventContent c) -> dropReturn $ Just $ NodeContent c
            Just (_, EventComment t) -> dropReturn $ Just $ NodeComment t
            Just (_, EventCDATA t) -> dropReturn $ Just $ NodeContent $ ContentText t
            _ -> return Nothing

-- | Render a document into events.
toEvents :: Document -> [Event]
toEvents (Document prol root epi) =
      (EventBeginDocument :)
    . goP prol . elementToEvents' root . goM epi $ [EventEndDocument]
  where
    goP (Prologue before doctype after) =
        goM before . maybe id goD doctype . goM after
    goM []     = id
    goM [x]    = (goM' x :)
    goM (x:xs) = (goM' x :) . goM xs
    goM' (MiscInstruction i) = EventInstruction i
    goM' (MiscComment t)     = EventComment t
    goD (Doctype name meid) =
        (:) (EventBeginDoctype name meid)
      . (:) EventEndDoctype

-- | Render a document element into events.
--
-- @since 1.3.5
elementToEvents :: Element -> [Event]
elementToEvents e = elementToEvents' e []

elementToEvents' :: Element -> [Event] -> [Event]
elementToEvents' = goE
  where
    goE (Element name as ns) =
          (EventBeginElement name as :)
        . goN ns
        . (EventEndElement name :)
    goN []     = id
    goN [x]    = goN' x
    goN (x:xs) = goN' x . goN xs
    goN' (NodeElement e)     = goE e
    goN' (NodeInstruction i) = (EventInstruction i :)
    goN' (NodeContent c)     = (EventContent c :)
    goN' (NodeComment t)     = (EventComment t :)

compressNodes :: [Node] -> [Node]
compressNodes []     = []
compressNodes [x]    = [x]
compressNodes (x@(NodeContent (ContentText _)) : y@(NodeContent (ContentText _)) : z) =
    let (textNodes, remainder) = span (isJust . unContent) (x:y:z)
        texts = mapMaybe unContent textNodes
    in
        compressNodes $ NodeContent (ContentText $ mconcat texts) : remainder
    where
        unContent (NodeContent (ContentText text)) = Just text
        unContent _                                = Nothing
compressNodes (x:xs) = x : compressNodes xs

parseText :: ParseSettings -> TL.Text -> Either SomeException Document
parseText ps tl = runST
                $ runExceptionT
                $ CL.sourceList (TL.toChunks tl)
           $$ sinkTextDoc ps

parseText_ :: ParseSettings -> TL.Text -> Document
parseText_ ps = either throw id . parseText ps

sinkTextDoc :: MonadThrow m
            => ParseSettings
            -> Consumer Text m Document
sinkTextDoc ps = P.parseTextPos ps =$= fromEvents