{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts   #-}
{-# LANGUAGE RankNTypes         #-}
-- | DOM-based XML parsing and rendering.
--
-- In this module, attribute values and content nodes can contain either raw
-- text or entities. In most cases, these can be fully resolved at parsing. If
-- that is the case for your documents, the "Text.XML" module provides
-- simplified datatypes that only contain raw text.
module Text.XML.Unresolved
    ( -- * Non-streaming functions
      writeFile
    , readFile
      -- * Lazy bytestrings
    , renderLBS
    , parseLBS
    , parseLBS_
      -- * Text
    , parseText
    , parseText_
    , sinkTextDoc
      -- * Byte streams
    , sinkDoc
      -- * Streaming functions
    , toEvents
    , elementToEvents
    , fromEvents
    , elementFromEvents
    , renderBuilder
    , renderBytes
    , renderText
      -- * Exceptions
    , InvalidEventStream (..)
      -- * Settings
    , P.def
      -- ** Parse
    , P.ParseSettings
    , P.psDecodeEntities
    , P.psRetainNamespaces
      -- ** Render
    , R.RenderSettings
    , R.rsPretty
    , R.rsNamespaces
    ) where

import           Conduit
import           Control.Applicative          ((<$>), (<*>))
import           Control.Exception            (Exception, SomeException, throw)
import           Control.Monad                (when)
import           Control.Monad.Trans.Class    (lift)
import           Data.ByteString              (ByteString)
import           Data.ByteString.Builder      (Builder)
import qualified Data.ByteString.Lazy         as L
import           Data.Char                    (isSpace)
import qualified Data.Conduit.Binary          as CB
import           Data.Conduit.Lazy            (lazyConsume)
import qualified Data.Conduit.List            as CL
import           Data.Maybe                   (isJust, mapMaybe)
import           Data.Monoid                  (mconcat)
import           Data.Text                    (Text)
import qualified Data.Text                    as T
import qualified Data.Text.Lazy               as TL
import           Data.Typeable                (Typeable)
import           Data.XML.Types
import           Prelude                      hiding (readFile, writeFile)
import           System.IO.Unsafe             (unsafePerformIO)
import           Text.XML.Stream.Parse        (ParseSettings)
import qualified Text.XML.Stream.Parse        as P
import qualified Text.XML.Stream.Render       as R

readFile :: P.ParseSettings -> FilePath -> IO Document
readFile ps fp = runConduitRes $ CB.sourceFile fp .| sinkDoc ps

sinkDoc :: MonadThrow m
        => P.ParseSettings
        -> ConduitT ByteString o m Document
sinkDoc ps = P.parseBytesPos ps .| fromEvents

writeFile :: R.RenderSettings -> FilePath -> Document -> IO ()
writeFile rs fp doc =
    runConduitRes $ renderBytes rs doc .| CB.sinkFile fp

renderLBS :: R.RenderSettings -> Document -> L.ByteString
renderLBS rs doc =
    L.fromChunks $ unsafePerformIO
                 -- not generally safe, but we know that runResourceT
                 -- will not deallocate any of the resources being used
                 -- by the process
                 $ lazyConsume
                 $ renderBytes rs doc

parseLBS :: P.ParseSettings -> L.ByteString -> Either SomeException Document
parseLBS ps lbs = runConduit $ CL.sourceList (L.toChunks lbs) .| sinkDoc ps

parseLBS_ :: P.ParseSettings -> L.ByteString -> Document
parseLBS_ ps lbs = either throw id $ parseLBS ps lbs

data InvalidEventStream = ContentAfterRoot P.EventPos
                        | MissingRootElement
                        | InvalidInlineDoctype P.EventPos
                        | MissingEndElement Name (Maybe P.EventPos)
                        | UnterminatedInlineDoctype
    deriving Typeable
instance Exception InvalidEventStream
instance Show InvalidEventStream where
    show (ContentAfterRoot (pos, e)) = mShowPos pos ++ "Found content after root element: " ++ prettyShowE e
    show MissingRootElement = "Missing root element"
    show (InvalidInlineDoctype (pos, e)) = mShowPos pos ++ "Invalid content inside doctype: " ++ prettyShowE e
    show (MissingEndElement name Nothing) = "Documented ended while expected end element for: " ++ prettyShowName name
    show (MissingEndElement name (Just (pos, e))) = mShowPos pos ++ "Expected end element for: " ++ prettyShowName name ++ ", but received: " ++ prettyShowE e
    show UnterminatedInlineDoctype = "Unterminated doctype declaration"

mShowPos :: Maybe P.PositionRange -> String
mShowPos Nothing    = ""
mShowPos (Just pos) = show pos ++ ": "

prettyShowE :: Event -> String
prettyShowE = show -- FIXME

prettyShowName :: Name -> String
prettyShowName = show -- FIXME

renderBuilder :: Monad m => R.RenderSettings -> Document -> ConduitT i Builder m ()
renderBuilder rs doc = CL.sourceList (toEvents doc) .| R.renderBuilder rs

renderBytes :: PrimMonad m => R.RenderSettings -> Document -> ConduitT i ByteString m ()
renderBytes rs doc = CL.sourceList (toEvents doc) .| R.renderBytes rs

renderText :: (MonadThrow m, PrimMonad m) => R.RenderSettings -> Document -> ConduitT i Text m ()
renderText rs doc = CL.sourceList (toEvents doc) .| R.renderText rs

manyTries :: Monad m => m (Maybe a) -> m [a]
manyTries f =
    go id
  where
    go front = do
        x <- f
        case x of
            Nothing -> return $ front []
            Just y  -> go (front . (:) y)

dropReturn :: Monad m => a -> ConduitM i o m a
dropReturn x = CL.drop 1 >> return x

-- | Parse a document from a stream of events.
fromEvents :: MonadThrow m => ConduitT P.EventPos o m Document
fromEvents = do
    skip EventBeginDocument
    d <- Document <$> goP <*> require elementFromEvents <*> goM
    skip EventEndDocument
    y <- CL.head
    case y of
        Nothing -> return d
        Just (_, EventEndDocument) -> lift $ throwM MissingRootElement
        Just z ->
            lift $ throwM $ ContentAfterRoot z
  where
    skip e = do
        x <- CL.peek
        when (fmap snd x == Just e) (CL.drop 1)
    require f = do
        x <- f
        case x of
            Just y -> return y
            Nothing -> do
                my <- CL.head
                case my of
                    Nothing -> error "Text.XML.Unresolved:impossible"
                    Just (_, EventEndDocument) -> lift $ throwM MissingRootElement
                    Just y -> lift $ throwM $ ContentAfterRoot y
    goP = Prologue <$> goM <*> goD <*> goM
    goM = manyTries goM'
    goM' = do
        x <- CL.peek
        case x of
            Just (_, EventInstruction i) -> dropReturn $ Just $ MiscInstruction i
            Just (_, EventComment t) -> dropReturn $ Just $ MiscComment t
            Just (_, EventContent (ContentText t))
                | T.all isSpace t -> CL.drop 1 >> goM'
            _ -> return Nothing
    goD = do
        x <- CL.peek
        case x of
            Just (_, EventBeginDoctype name meid) -> do
                CL.drop 1
                dropTillDoctype
                return (Just $ Doctype name meid)
            _ -> return Nothing
    dropTillDoctype = do
        x <- CL.head
        case x of
            -- Leaving the following line commented so that the intention of
            -- this function stays clear. I figure in the future xml-types will
            -- be expanded again to support some form of EventDeclaration
            --
            -- Just (EventDeclaration _) -> dropTillDoctype
            Just (_, EventEndDoctype) -> return ()
            Just epos -> lift $ throwM $ InvalidInlineDoctype epos
            Nothing -> lift $ throwM UnterminatedInlineDoctype

-- | Try to parse a document element (as defined in XML) from a stream of events.
--
-- @since 1.3.5
elementFromEvents :: MonadThrow m => ConduitT P.EventPos o m (Maybe Element)
elementFromEvents = goE
  where
    goE = do
        x <- CL.peek
        case x of
            Just (_, EventBeginElement n as) -> Just <$> goE' n as
            _                                -> return Nothing
    goE' n as = do
        CL.drop 1
        ns <- manyTries goN
        y <- CL.head
        if fmap snd y == Just (EventEndElement n)
            then return $ Element n as $ compressNodes ns
            else lift $ throwM $ MissingEndElement n y
    goN = do
        x <- CL.peek
        case x of
            Just (_, EventBeginElement n as) -> (Just . NodeElement) <$> goE' n as
            Just (_, EventInstruction i) -> dropReturn $ Just $ NodeInstruction i
            Just (_, EventContent c) -> dropReturn $ Just $ NodeContent c
            Just (_, EventComment t) -> dropReturn $ Just $ NodeComment t
            Just (_, EventCDATA t) -> dropReturn $ Just $ NodeContent $ ContentText t
            _ -> return Nothing

-- | Render a document into events.
toEvents :: Document -> [Event]
toEvents (Document prol root epi) =
      (EventBeginDocument :)
    . goP prol . elementToEvents' root . goM epi $ [EventEndDocument]
  where
    goP (Prologue before doctype after) =
        goM before . maybe id goD doctype . goM after
    goM []     = id
    goM [x]    = (goM' x :)
    goM (x:xs) = (goM' x :) . goM xs
    goM' (MiscInstruction i) = EventInstruction i
    goM' (MiscComment t)     = EventComment t
    goD (Doctype name meid) =
        (:) (EventBeginDoctype name meid)
      . (:) EventEndDoctype

-- | Render a document element into events.
--
-- @since 1.3.5
elementToEvents :: Element -> [Event]
elementToEvents e = elementToEvents' e []

elementToEvents' :: Element -> [Event] -> [Event]
elementToEvents' = goE
  where
    goE (Element name as ns) =
          (EventBeginElement name as :)
        . goN ns
        . (EventEndElement name :)
    goN []     = id
    goN [x]    = goN' x
    goN (x:xs) = goN' x . goN xs
    goN' (NodeElement e)     = goE e
    goN' (NodeInstruction i) = (EventInstruction i :)
    goN' (NodeContent c)     = (EventContent c :)
    goN' (NodeComment t)     = (EventComment t :)

compressNodes :: [Node] -> [Node]
compressNodes []     = []
compressNodes [x]    = [x]
compressNodes (x@(NodeContent (ContentText _)) : y@(NodeContent (ContentText _)) : z) =
    let (textNodes, remainder) = span (isJust . unContent) (x:y:z)
        texts = mapMaybe unContent textNodes
    in
        compressNodes $ NodeContent (ContentText $ mconcat texts) : remainder
    where
        unContent (NodeContent (ContentText text)) = Just text
        unContent _                                = Nothing
compressNodes (x:xs) = x : compressNodes xs

parseText :: ParseSettings -> TL.Text -> Either SomeException Document
parseText ps tl =
    runConduit
  $ CL.sourceList (TL.toChunks tl)
 .| sinkTextDoc ps

parseText_ :: ParseSettings -> TL.Text -> Document
parseText_ ps = either throw id . parseText ps

sinkTextDoc :: MonadThrow m
            => ParseSettings
            -> ConduitT Text o m Document
sinkTextDoc ps = P.parseTextPos ps .| fromEvents