{-# LANGUAGE DeriveFunctor              #-}
{-# LANGUAGE FlexibleInstances          #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes                 #-}
{-# LANGUAGE StandaloneDeriving         #-}
{-# LANGUAGE TupleSections              #-}
{-# LANGUAGE TypeFamilies               #-}
module Data.XML.Parser.High
  ( module Data.XML.Parser.High.AttrParser
  , module Data.XML.Parser.High.NameParser
  , Prolog(..)
  , Token(..)
  , TokenParser()
  , ContentParser()
  , noContent
  , withContent
  , anyContent
  , runTokenParser
  , prolog
  , instruction
  , textContent
  , textContent'
  , tag
  , tag'
  , anyTag
  , anyToken
  , anyToken'
  ) where
import           Control.Applicative
import           Control.Arrow
import           Control.Monad.Compat
import           Control.Monad.Fail.Compat
import           Data.Function
import           Data.Map                        (Map)
import qualified Data.Map                        as Map
import           Data.Maybe
import           Data.String
import           Data.Text                       (Text)
import qualified Data.Text                       as Text
import           Data.XML.Parser.High.AttrParser
import           Data.XML.Parser.High.NameParser
import           Data.XML.Parser.Low
import qualified Data.XML.Parser.Mid             as L1
import           Data.XML.Parser.Mid.Attribute
import           Prelude                         ()
import           Prelude.Compat
import           Text.Parser.Char
import           Text.Parser.Combinators
import           Text.ParserCombinators.ReadP    (readP_to_S)
data Prolog = Prolog
  { prologXmlDeclaration :: Maybe L1.XMLDeclaration
  , prologInstructions   :: [L1.Instruction]
  , prologDoctype        :: Maybe L1.Doctype
  } deriving (Eq, Ord, Read, Show)
data Token
  = TokenProlog Prolog
  | TokenInstruction L1.Instruction
  | TokenTag QName (Map QName Text) [Token]
  | TokenTextContent Text
  deriving (Eq, Ord, Read, Show)
newtype TokenParser m a = TokenParser { runTokenParser :: m a }
deriving instance Functor m => Functor (TokenParser m)
deriving instance Applicative m => Applicative (TokenParser m)
deriving instance Alternative m => Alternative (TokenParser m)
deriving instance Monad m => Monad (TokenParser m)
instance (Parsing m, Monad m) => MonadFail (TokenParser m) where
  fail = TokenParser . unexpected
data ContentParser m a
  = NoContent (m a)
  | AnyContent ([Token] -> m a)
  | WithContent (TokenParser m a)
deriving instance Functor m => Functor (ContentParser m)
withContent :: TokenParser m a -> ContentParser m a
withContent = WithContent
noContent :: Applicative m => ContentParser m ()
noContent = NoContent $ pure ()
anyContent :: CharParsing m => Monad m => ContentParser m ()
anyContent = AnyContent $ const $ pure ()
instruction :: CharParsing m => Monad m => TokenParser m L1.Instruction
instruction = TokenParser $ do
  skipCommentsWhitespace
  L1.runTokenParser L1.tokenInstruction
prolog :: CharParsing m => Monad m => TokenParser m Prolog
prolog = TokenParser $ do
  xmlDeclaration <- optional $ L1.runTokenParser L1.tokenXmlDeclaration
  skipCommentsWhitespace
  instructions <- runTokenParser $ many instruction
  doctype <- optional $ do
    skipCommentsWhitespace
    L1.runTokenParser L1.tokenDoctype
  when (isNothing xmlDeclaration && null instructions && isNothing doctype)
    $ unexpected "Expected XML prolog"
  return $ Prolog xmlDeclaration instructions doctype
textContent :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Text
textContent entityDecoder = TokenParser $ mconcat <$> do
  skipComments
  (textualData <|> L1.runTokenParser L1.tokenCdata) `sepBy1` L1.runTokenParser L1.tokenComment
  where textualData = expandContents entityDecoder =<< L1.runTokenParser L1.tokenData
textContent' :: CharParsing m => Monad m => TokenParser m Text
textContent' = textContent decodeStandardEntities
normalizeAttributes :: EntityDecoder -> [Attribute] -> Map QName Text
normalizeAttributes entityDecoder attributes = Map.fromList $ do
  Attribute name contents <- attributes
  value <- maybeToList $ expandContents entityDecoder contents
  return (name, value)
tag :: CharParsing m => Monad m
    => EntityDecoder             
    -> NameParser a              
    -> (a -> AttrParser b)       
    -> (b -> ContentParser m c)  
    -> TokenParser m c
tag entityDecoder parseName parseAttributes parseContent = parseStartToEnd <|> parseEmptyElement where
  parseStartToEnd = TokenParser $ do
    skipCommentsWhitespace
    L1.StartTag name attributes <- L1.runTokenParser L1.tokenStartTag
    a <- processName name
    b <- processAttributes a attributes
    c <- case parseContent b of
      NoContent f        -> f
      AnyContent f       -> f =<< runTokenParser (many $ anyToken entityDecoder)
      WithContent parser -> runTokenParser parser
    skipCommentsWhitespace
    L1.runTokenParser $ do
      name' <- L1.tokenEndTag
      when (name /= name') $ fail "Invalid end tag name"
    return c
  parseEmptyElement = TokenParser $ do
    skipCommentsWhitespace
    L1.EmptyElementTag name attributes <- L1.runTokenParser L1.tokenEmptyElementTag
    a <- processName name
    b <- processAttributes a attributes
    case parseContent b of
      NoContent f        -> f
      AnyContent f       -> f mempty
      WithContent parser -> unexpected "Expected non-empty tag"
  processName name = runNameParser parseName name
    & maybe (unexpected "Unexpected name") return
  processAttributes state attributes = runAttrParser (parseAttributes state) (normalizeAttributes entityDecoder attributes)
    & maybe (unexpected "Unexpected attributes") return
tag' :: CharParsing m => Monad m
     => NameParser a       
     -> AttrParser b       
     -> ContentParser m c  
     -> TokenParser m c
tag' parseName parseAttributes parseBody = tag decodeStandardEntities parseName (const parseAttributes) (const parseBody)
anyTag :: CharParsing m => Monad m => TokenParser m ()
anyTag = tag' anyName anyAttr anyContent
anyToken :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Token
anyToken entityDecoder = (TokenProlog <$> prolog)
  <|> (TokenInstruction <$> instruction)
  <|> tokenTag
  <|> (TokenTextContent <$> textContent entityDecoder)
  where tokenTag = tag entityDecoder anyName (\name -> (name,) <$> forwardAttrs) $ \(name, attributes) ->
          TokenTag name attributes <$> AnyContent pure
        forwardAttrs = AttrParser Just
anyToken' :: CharParsing m => Monad m => TokenParser m Token
anyToken' = anyToken decodeStandardEntities
skipComments :: CharParsing m => Monad m => m ()
skipComments = void $ many $ L1.runTokenParser L1.tokenComment
skipCommentsWhitespace :: CharParsing m => Monad m => m ()
skipCommentsWhitespace = void $ many $ void (L1.runTokenParser L1.tokenComment) <|> void tokenWhitespace
decodeStandardEntities :: EntityDecoder
decodeStandardEntities = decodePredefinedEntities <> decodeHtmlEntities