{-# OPTIONS_GHC -ddump-minimal-imports #-}
{-# LANGUAGE DeriveDataTypeable, TypeSynonymInstances, FlexibleInstances, MultiParamTypeClasses #-}
module Text.Roundtrip.Xml.Enumerator.Parser (

    XmlParseIteratee, parseXml, parseXml', parseXml''
  , XmlException(..)
  , parseXml_, XmlParseIteratee'

) where

import Control.Monad.State
import Control.Exception

import qualified Data.Enumerator as E
import qualified Data.Enumerator.List as EL
import qualified Data.Text as T
import Data.XML.Types
import Data.Typeable

import qualified Debug.Trace

import Text.Roundtrip
import Text.Roundtrip.Parser
import Text.Roundtrip.Xml
import Text.Roundtrip.Xml.ParserInternal

import Data.Reference
import Data.IORef

data (Monad m, Reference r m) => Cursor r m a = Cursor (r (NextCursor r m a)) [a]
data (Monad m, Reference r m) => NextCursor r m a = NextCursor (Cursor r m a)
                                                  | None
                                                  | Uneval

liftIteratee :: Monad m => m b -> E.Iteratee a m b
liftIteratee m = E.Iteratee (m >>= E.runIteratee . return)

unconsStream :: (Monad m, Reference r m)
            => Cursor r m a -> E.Iteratee a m (Maybe (a, Cursor r m a))
unconsStream p@(Cursor r c)
    | null c = do x <- liftIteratee (readRef r)
                  unconsCursor r x
    | otherwise = return $! unconsChunk p

unconsCursor :: (Monad m, Reference r m)
             => r (NextCursor r m a)
             -> NextCursor r m a
             -> E.Iteratee a m (Maybe (a, Cursor r m a))
unconsCursor _ (NextCursor c) = return $! unconsChunk c
unconsCursor _ None = return Nothing
unconsCursor r Uneval = E.continue (extendCursor r)

unconsChunk :: (Monad m, Reference r m)
            => Cursor r m a -> Maybe (a, Cursor r m a)
unconsChunk (Cursor r c) = Just (head c, Cursor r (tail c))

extendCursor :: (Monad m, Reference r m)
             => r (NextCursor r m a)
             -> E.Stream a
             -> E.Iteratee a m (Maybe (a, Cursor r m a))
extendCursor r (E.Chunks l)
    | null l = E.continue (extendCursor r)
    | otherwise = do x <- liftIteratee (insertCursor r l)
                     return $ unconsChunk x
extendCursor r E.EOF =
    do liftIteratee (writeRef r None)
       return Nothing

insertCursor :: (Monad m, Reference r m)
             => r (NextCursor r m a)
             -> [a]
             -> m (Cursor r m a)
insertCursor r l =
    do r' <- newRef Uneval
       let c = Cursor r' l
       writeRef r (NextCursor c)
       return c

mkCursor :: (Monad m, Reference r m) => m (Cursor r m a)
mkCursor =
    do r <- newRef Uneval
       return (Cursor r [])

instance (Monad m, Reference r m) => Stream (Cursor r m a) (E.Iteratee a m) a where
  uncons = unconsStream

type XmlParseIteratee' r m a = GenXmlParser (Cursor r m RtEventWithPos) (E.Iteratee RtEventWithPos m) a

concatCursor :: (Monad m, Reference r m) => Cursor r m a -> m [a]
concatCursor (Cursor r l) =
    liftM (l ++) (readRef r >>= concatCursor')

concatCursor' :: (Monad m, Reference r m) => NextCursor r m a -> m [a]
concatCursor' (NextCursor n) = concatCursor n
concatCursor' _              = return []

parseXml_''' :: (Reference r m, Monad m) => SourceName -> XmlParseIteratee' r m a -> E.Iteratee RtEventWithPos m a
parseXml_''' sourceName p =
    let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc
    in do cursor <- liftIteratee mkCursor
          res <- runParserT (liftM2 (,) q getInput) Nothing sourceName cursor
          case res of
            Left err -> E.returnI (E.Error $ toException $ ParseError err)
            Right (x, restCursor) ->
                do rest <- liftIteratee $ concatCursor restCursor
                   E.yield x (E.Chunks rest)

parseXml_ :: (Reference r m, Monad m) => SourceName -> EntityRenderer -> XmlParseIteratee' r m a -> E.Iteratee Event m a
parseXml_ sourceName entityRenderer p =
    E.joinI $ EL.map eventWithoutPos E.$$
              E.joinI $ simplify entityRenderer E.$$
              parseXml_''' sourceName p

type PStreamPos = Int
type PStreamState = (PStreamPos, Maybe RtEventWithPos) -- last event read from stream with position

type XmlParseIteratee m a = GenXmlParser PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) a

data XmlException = ParseError ParseError
                  | InvalidEntity T.Text SourcePos
    deriving (Typeable, Show)

instance Exception XmlException

instance Monad m => Stream PStreamPos (StateT PStreamState (E.Iteratee RtEventWithPos m)) RtEventWithPos where
    uncons pos =
        do (lastPos, mLastTok) <- get
           case () of
             _| pos == lastPos ->
                  do mx <- lift $ EL.head
                     let pos' = pos + 1
                     put (pos', mx)
                     ("returning " ++ show mx ++ " from underlying stream, position " ++ show pos') `debug`
                       case mx of
                         Just x -> return $ Just (x, pos')
                         Nothing -> return Nothing
              | pos == lastPos - 1 ->
                  ("returning " ++ show mLastTok ++ " from lookahead buffer, position " ++ show lastPos) `debug`
                    case mLastTok of
                      Just lastTok -> return $ Just (lastTok, lastPos)
                      Nothing -> return Nothing
              | otherwise ->  ("invalid lookahead, position request: " ++ show (pos + 1) ++
                               ", position of next token: " ++ show (lastPos + 1) ++
                               ", position of buffered token: " ++ show lastPos ++
                               " (" ++ show mLastTok ++ ")") `warn` return Nothing

parseXml :: Monad m => SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee Event m a
parseXml sourceName entityRenderer p =
    E.joinI $ EL.map eventWithoutPos E.$$
              E.joinI $ simplify entityRenderer E.$$
              parseXml''' sourceName p

parseXml' :: Monad m => EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a
parseXml' entityRenderer p =
    do first <- E.peek
       let src = case first of
                   Just (WithPos _ pos) -> sourceName pos
                   Nothing -> ""
       E.joinI $ simplify entityRenderer E.$$ parseXml''' src p

parseXml'' :: Monad m => SourceName -> EntityRenderer -> XmlParseIteratee m a -> E.Iteratee EventWithPos m a
parseXml'' sourceName entityRenderer p =
       E.joinI $ simplify entityRenderer E.$$ parseXml''' sourceName p

parseXml''' :: Monad m => SourceName -> XmlParseIteratee m a -> E.Iteratee RtEventWithPos m a
parseXml''' sourceName p =
    let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc
    in do let startPos = -1
          res <- evalStateT (runParserT q Nothing sourceName startPos) (startPos, Nothing)
          case res of
            Left err -> E.returnI (E.Error $ toException $ ParseError err)
            Right x -> E.yield x E.EOF

simplify :: Monad m => (T.Text -> Maybe T.Text) -> E.Enumeratee EventWithPos RtEventWithPos m b
simplify renderEntity = loop
  where
    loop = E.checkDone go
    go k =
        do x <- EL.head
           case x of
             Nothing -> k (E.Chunks []) E.>>== return
             Just (WithPos EventBeginDocument pos) ->
                 k (E.Chunks [WithPos RtBeginDocument pos]) E.>>== loop
             Just (WithPos EventEndDocument pos) ->
                 k (E.Chunks [WithPos RtEndDocument pos]) E.>>== loop
             Just (WithPos (EventInstruction{}) pos) -> go k
             Just (WithPos (EventBeginDoctype{}) pos) -> go k
             Just (WithPos (EventEndDoctype{}) pos) -> go k
             Just (WithPos (EventBeginElement n as) pos) ->
                 let insertAttr m (k, vs) =
                      do vs' <- mapM (contentToText pos) vs
                         return $ (k, (T.concat vs')) : m
                 in do as' <- foldM insertAttr [] as
                       as' `seq` k (E.Chunks [WithPos (RtBeginElement n (reverse as')) pos])
                                 E.>>== loop
             Just (WithPos (EventEndElement n) pos) ->
                 k (E.Chunks [WithPos (RtEndElement n) pos]) E.>>== loop
             Just (WithPos (EventContent c) pos) -> do
                 t <- contentToText pos c
                 ts <- takeContents $ (:) t
                 let text = T.strip $ T.concat $ ts []
                 if T.null text
                    then go k
                    else k (E.Chunks [WithPos (RtText text) pos]) E.>>== loop
             Just (WithPos (EventComment{}) _) -> go k
    contentToText pos c =
        case c of
          ContentEntity e ->
              case renderEntity e of
                Nothing -> E.throwError $ InvalidEntity e pos
                Just t -> return t
          ContentText t -> return t
    takeContents front = do
      do x <- E.peek
         case x of
           Nothing -> return front
           Just (WithPos EventBeginElement{} pos) -> return front
           Just (WithPos EventEndElement{} pos) -> return front
           Just (WithPos (EventContent c) pos) ->
               do EL.drop 1
                  t <- contentToText pos c
                  takeContents $ front . (:) t
           Just (WithPos EventBeginDocument pos) -> return front
           Just (WithPos EventEndDocument pos) -> return front
           Just (WithPos EventInstruction{} pos) -> helper
           Just (WithPos EventBeginDoctype{} pos) -> helper
           Just (WithPos EventEndDoctype{} pos) -> helper
           Just (WithPos EventComment{} pos) -> helper
      where
        helper = EL.drop 1 >> takeContents front

warn = Debug.Trace.trace

-- debug = Debug.Trace.trace
debug _ x = x