{-# LANGUAGE FlexibleContexts, UndecidableInstances #-}
module Text.Roundtrip.Xml.Parser (

    GenXmlParser, XmlParser, runXmlParser, runXmlParser', runXmlParser''

  , WithPos, EventWithPos, eventWithPos, eventWithoutPos

  , SourceName, Line, Column, ParseError

  , EntityRenderer, defaultEntityRenderer

  , runXmlParserString, runXmlParserText, runXmlParserLazyText
  , runXmlParserByteString, runXmlParserLazyByteString

) where

import Control.Monad (unless, foldM)
import Control.Monad.State
import Control.Monad.Identity (Identity, runIdentity)
import Control.Exception (ErrorCall(..), SomeException, Exception, toException)

import qualified Data.Enumerator as E
import qualified Data.Enumerator.List as EL
import qualified Text.XML.Enumerator.Parse as EP
import qualified Data.Map as Map
import Data.Map (Map)
import qualified Data.List as List
import Data.Typeable (Typeable)
import Data.Either (partitionEithers)

import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL


import qualified Debug.Trace

import qualified Text.PrettyPrint.HughesPJ as Pp

import Data.XML.Types

import Text.Roundtrip
import Text.Roundtrip.Parser
import Text.Roundtrip.Xml.ParserInternal
import Text.Roundtrip.Xml.Pretty

type EntityRenderer = T.Text -> Maybe T.Text

defaultEntityRenderer :: EntityRenderer
defaultEntityRenderer = const Nothing

type XmlParser a = GenXmlParser [RtEventWithPos] Identity a

type EventGen a = EP.DecodeEntities -> E.Enumeratee a Event Identity [Event]

genEvents :: [a] -> EventGen a -> Either SomeException [Event]
genEvents items f =
    runIdentity $ E.run $
                  E.joinI $
                  E.enumList chunkSize items E.$$
                  f EP.decodeEntities E.$$
                  EL.consume
    where
      chunkSize = 1

-- Parsing a string/text/bytestring into a list of events into is done via
-- enumerators. This is not optimal because the resulting list is too strict.
-- However, currently no other functions exists for such a conversion.

runXmlParserGen :: XmlParser a -> SourceName -> EntityRenderer -> [b] -> EventGen b -> (Either ParseError a)
runXmlParserGen p src er items gen =
    case genEvents items gen of
      Left err -> Left $ mkParseError (initialPos src) (show err)
      Right events -> runXmlParser p src er events

runXmlParserString :: XmlParser a -> SourceName -> EntityRenderer -> String -> (Either ParseError a)
runXmlParserString p src e str = runXmlParserGen p src e [T.pack str] EP.parseText

runXmlParserText :: XmlParser a -> SourceName -> EntityRenderer -> T.Text -> (Either ParseError a)
runXmlParserText p src e t = runXmlParserGen p src e [t] EP.parseText

runXmlParserLazyText :: XmlParser a -> SourceName -> EntityRenderer -> TL.Text -> (Either ParseError a)
runXmlParserLazyText p src e t = runXmlParserGen p src e (TL.toChunks t) EP.parseText

runXmlParserByteString :: XmlParser a -> SourceName -> EntityRenderer -> BS.ByteString -> (Either ParseError a)
runXmlParserByteString p src e bs = runXmlParserGen p src e [bs] EP.parseBytes

runXmlParserLazyByteString :: XmlParser a -> SourceName -> EntityRenderer -> BSL.ByteString -> (Either ParseError a)
runXmlParserLazyByteString p src e bs = runXmlParserGen p src e (BSL.toChunks bs) EP.parseBytes

runXmlParser :: XmlParser a -> SourceName -> EntityRenderer -> [Event] -> (Either ParseError a)
runXmlParser p sourceName renderer events =
    runXmlParser'' p sourceName renderer (map eventWithoutPos events)

runXmlParser' :: XmlParser a -> EntityRenderer -> [EventWithPos] -> (Either ParseError a)
runXmlParser' p renderer events = runXmlParser'' p src renderer events
    where
      src = case events of
              [] -> ""
              (e:_) -> sourceName (wp_pos e)

runXmlParser'' :: XmlParser a -> SourceName -> EntityRenderer -> [EventWithPos] -> (Either ParseError a)
runXmlParser'' p sourceName entityRenderer events =
    let GenXmlParser q = xmlBeginDoc *> p <* xmlEndDoc
        rtEvents = List.unfoldr (simplifyEvents entityRenderer) events
    in runParser q Nothing sourceName rtEvents

simplifyEvents :: EntityRenderer -> [EventWithPos] -> Maybe (RtEventWithPos, [EventWithPos])
simplifyEvents renderEntity evs = go evs
    where
      go evs =
        case evs of
          [] -> Nothing
          (WithPos EventBeginDocument pos : rest) -> Just (WithPos RtBeginDocument pos, rest)
          (WithPos EventEndDocument pos : rest) -> Just (WithPos RtEndDocument pos, rest)
          (WithPos (EventInstruction _) _ : rest) -> go rest
          (WithPos (EventBeginDoctype _ _) _ : rest) -> go rest
          (WithPos EventEndDoctype _ : rest) -> go rest
          (WithPos (EventBeginElement n as) pos : rest) ->
              let insertAttr :: Either T.Text AttrMap -> Attribute
                             -> Either T.Text AttrMap
                  insertAttr em (k, vs) =
                      case em of
                        Right m ->
                            case partitionEithers (map contentToText vs) of
                              ((t:_), _) -> Left t
                              ([], vs') -> Right ((k, T.concat vs') : m)
                        Left t -> Left t
              in case Prelude.foldl insertAttr (Right []) as of
                   Right as' ->  as' `seq` Just (WithPos (RtBeginElement n (reverse as')) pos, rest)
                   Left t -> Just (WithPos (RtInvalidEntity t) pos, [])
          (WithPos (EventEndElement n) pos : rest) -> Just (WithPos (RtEndElement n) pos, rest)
          (WithPos (EventContent c) pos : rest) ->
              case contentToText c of
                Left t -> Just (WithPos (RtInvalidEntity t) pos, [])
                Right t ->
                    let (cs, rest') = splitContent rest
                    in case partitionEithers (map contentToText cs) of
                         ((t:_), _) -> Just (WithPos (RtInvalidEntity t) pos, [])
                         ([], ts) -> let text = T.strip $ t `T.append` T.concat ts
                                     in if T.null text
                                           then go rest'
                                           else Just (WithPos (RtText text) pos, rest')
          (WithPos (EventComment _) _ : rest) -> go rest
      splitContent (WithPos (EventContent c) pos : rest) =
          let (cs, rest') = splitContent rest
          in (c:cs, rest')
      splitContent l = ([], l)
      contentToText c =
          case c of
            ContentText t -> Right t
            ContentEntity t ->
                case renderEntity t of
                  Just t' -> Right t'
                  Nothing -> Left t

instance (Monad m, Stream s m RtEventWithPos) => IsoFunctor (GenXmlParser s m) where
    iso <$> (GenXmlParser p) = GenXmlParser $ parsecApply iso p

instance (Monad m, Stream s m RtEventWithPos) => ProductFunctor (GenXmlParser s m) where
    (GenXmlParser p) <*> (GenXmlParser q) = GenXmlParser $ parsecConcat p q

instance (Monad m, Stream s m RtEventWithPos) => Alternative (GenXmlParser s m) where
    GenXmlParser p <|> GenXmlParser q = GenXmlParser $ parsecAlternative1Lookahead p q
    GenXmlParser p <||> GenXmlParser q = GenXmlParser $ parsecAlternativeInfLookahead p q
    empty = GenXmlParser parsecEmpty

instance (Monad m, Stream s m RtEventWithPos) => Syntax (GenXmlParser s m) where
    pure x = GenXmlParser (parsecPure x)

instance (Monad m, Stream s m RtEventWithPos) => XmlSyntax (GenXmlParser s m) where
    xmlBeginDoc = GenXmlParser xmlParserBeginDoc
    xmlEndDoc = GenXmlParser xmlParserEndDoc
    xmlBeginElem = GenXmlParser . xmlParserBeginElem
    xmlAttrValue = GenXmlParser . xmlParserAttrValue
    xmlTextNotEmpty = GenXmlParser xmlParserTextNotEmpty
    xmlEndElem = GenXmlParser . xmlParserEndElem

matchEvent :: (Show a, Monad m, Stream s m RtEventWithPos)
           => (RtEvent -> Maybe a) -> String -> PxParser s m a
matchEvent matcher desc =
    do state <- getState
       case state of
         Just _ -> parserZero
         Nothing -> tokenPrim show (\_ t _ -> wp_pos t) debugMatcher
   where
     debugMatcher ev =
         let res = matcher (wp_data ev)
         in ("matching " ++ show ev ++ " against " ++ desc ++ ", result: " ++ show res) `debug` res

mkPxParser :: Monad m => String -> PxParser s m a -> PxParser s m a
mkPxParser msg p = (p <?> msg)

xmlParserBeginDoc :: (Monad m, Stream s m RtEventWithPos) => PxParser s m ()
xmlParserBeginDoc = mkPxParser "begin-document" $
    let f RtBeginDocument = Just ()
        f _ = Nothing
    in matchEvent f "begin-document"

xmlParserEndDoc :: (Monad m, Stream s m RtEventWithPos) => PxParser s m ()
xmlParserEndDoc = mkPxParser "end-document" $
    let f RtEndDocument = Just ()
        f _ = Nothing
    in matchEvent f "end-document"

xmlParserBeginElem :: (Monad m, Stream s m RtEventWithPos) => Name -> PxParser s m ()
xmlParserBeginElem name = mkPxParser ("<" ++ ppStr name ++ " ...>") $
    do let f (RtBeginElement name' attrs) | name == name' = Just attrs
           f _ = Nothing
       attrs <- matchEvent f ("begin-element " ++ ppStr name)
       unless (null attrs) (putState $ Just attrs)
       return ()

xmlParserAttrValue :: Monad m => Name -> PxParser s m T.Text
xmlParserAttrValue name = mkPxParser ("attribute " ++ ppStr name) $
    do state <- getState
       case state of
         Nothing -> parserZero
         Just m ->
             case List.break (\(x,_) -> x == name) m of
               (prefix, (_, t) : suffix) ->
                   do let m' = prefix ++ suffix
                      if null m'
                         then putState Nothing
                         else putState (Just m')
                      return t
               _ -> parserZero

xmlParserEndElem :: (Monad m, Stream s m RtEventWithPos) => Name -> PxParser s m ()
xmlParserEndElem name = mkPxParser ("</" ++ ppStr name ++ ">") $
    let f (RtEndElement name') | name == name' = Just ()
        f _ = Nothing
    in matchEvent f ("end-element " ++ ppStr name)

xmlParserTextNotEmpty :: (Monad m, Stream s m RtEventWithPos) => PxParser s m T.Text
xmlParserTextNotEmpty = mkPxParser "text node" $
    let f (RtText t) = Just t
        f _ = Nothing
    in matchEvent f "text node"

-- debug = Debug.Trace.trace
debug _ x = x