{-# LANGUAGE DeriveFunctor              #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE FlexibleInstances          #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses      #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE RankNTypes                 #-}
{-# LANGUAGE StandaloneDeriving         #-}
module Data.XML.Parser.Mid
  ( module Data.XML.Parser.Mid.Attribute
  , module Data.XML.Parser.Mid.Doctype
  , Instruction(..)
  , XMLDeclaration(..)
  , StartTag(..)
  , EmptyElementTag(..)
  , Token(..)
  , TokenParser()
  , runTokenParser
  , tokenInstruction
  , tokenComment
  , tokenCdata
  , tokenDoctype
  , tokenXmlDeclaration
  , tokenStartTag
  , tokenEndTag
  , tokenEmptyElementTag
  , tokenData
  , anyToken
  ) where
import           Control.Applicative
import           Control.Arrow                 ((>>>))
import           Control.Monad.Compat
import           Control.Monad.Fail.Compat
import           Data.Char
import           Data.Functor
import           Data.Maybe
import           Data.Text                     (Text)
import qualified Data.Text                     as Text
import           Data.XML.Parser.Low
import           Data.XML.Parser.Mid.Attribute
import           Data.XML.Parser.Mid.Doctype
import           Numeric
import           Text.Parser.Char
import           Text.Parser.Combinators
data Token
  = TokenXMLDeclaration XMLDeclaration
  | TokenDoctype Doctype
  | TokenInstruction Instruction
  | TokenStartTag StartTag
  | TokenEndTag QName
  | TokenEmptyElementTag EmptyElementTag
  | TokenData [Content]
  | TokenComment Text
  | TokenCDATA Text
  deriving (Eq, Ord, Show)
data Instruction = Instruction Text Text
  deriving (Eq, Ord, Read, Show)
data XMLDeclaration = XMLDeclaration Text (Maybe Text) (Maybe Bool)
  deriving (Eq, Ord, Read, Show)
data StartTag = StartTag QName [Attribute]
  deriving (Eq, Ord, Read, Show)
data EmptyElementTag = EmptyElementTag QName [Attribute]
  deriving (Eq, Ord, Read, Show)
newtype TokenParser m a = TokenParser { runTokenParser :: m a }
deriving instance Functor m => Functor (TokenParser m)
deriving instance Applicative m => Applicative (TokenParser m)
deriving instance Alternative m => Alternative (TokenParser m)
deriving instance Monad m => Monad (TokenParser m)
instance (Parsing m, Monad m) => MonadFail (TokenParser m) where
  fail = TokenParser . unexpected
tokenDoctype :: CharParsing m => Monad m => TokenParser m Doctype
tokenDoctype = TokenParser doctype
tokenInstruction :: CharParsing m => Monad m => TokenParser m Instruction
tokenInstruction = TokenParser $ do
  name <- tokenInstructionOpen
  tokenWhitespace
  content <- manyTill anyChar $ try tokenInstructionClose
  return $ Instruction name $ Text.pack content
tokenComment :: CharParsing m => Monad m => TokenParser m Text
tokenComment = TokenParser $ do
  tokenCommentOpen
  content <- manyTill anyChar $ try tokenCommentClose
  return $ Text.pack content
tokenCdata :: CharParsing m => Monad m => TokenParser m Text
tokenCdata = TokenParser $ do
  tokenCdataOpen
  content <- manyTill anyChar $ try tokenCdataClose
  return $ Text.pack content
tokenXmlDeclaration :: CharParsing m => Monad m => TokenParser m XMLDeclaration
tokenXmlDeclaration = TokenParser $ do
  tokenXmlDeclarationOpen
  tokenWhitespace
  Attribute key value <- attribute
  guard $ key == QName "" "version"
  version <- expandContents decodePredefinedEntities value
  encoding <- optional $ do
    tokenWhitespace
    Attribute key value <- attribute
    guard $ key == QName "" "encoding"
    expandContents decodePredefinedEntities value
  standalone <- optional $ do
    tokenWhitespace
    Attribute key value <- attribute
    guard $ key == QName "" "standalone"
    boolean <- expandContents decodePredefinedEntities value
    case boolean of
      "yes" -> return True
      "no"  -> return False
      _     -> empty
  optional tokenWhitespace
  tokenXmlDeclarationClose
  return $ XMLDeclaration version encoding standalone
tokenStartTag :: CharParsing m => Monad m => TokenParser m StartTag
tokenStartTag = TokenParser $ do
  name <- tokenStartTagOpen
  attributes <- many (tokenWhitespace >> attribute)
  optional tokenWhitespace
  tokenElementClose
  return $ StartTag name attributes
tokenEndTag :: CharParsing m => Monad m => TokenParser m QName
tokenEndTag = TokenParser $ do
  name <- tokenEndTagOpen
  optional tokenWhitespace
  tokenElementClose
  return name
tokenEmptyElementTag :: CharParsing m => Monad m => TokenParser m EmptyElementTag
tokenEmptyElementTag = TokenParser $ do
  name <- tokenStartTagOpen
  attributes <- optional $ do
    tokenWhitespace
    attribute `sepBy` tokenWhitespace
  optional tokenWhitespace
  tokenEmptyElementTagClose
  return $ EmptyElementTag name $ fromMaybe mempty attributes
tokenData :: CharParsing m => Monad m => TokenParser m [Content]
tokenData = TokenParser $ some (tokenContent "<")
anyToken :: CharParsing m => Monad m => TokenParser m Token
anyToken = TokenDoctype <$> tokenDoctype
  <|> TokenInstruction <$> tokenInstruction
  <|> TokenComment <$> tokenComment
  <|> TokenCDATA <$> tokenCdata
  <|> TokenXMLDeclaration <$> tokenXmlDeclaration
  <|> TokenStartTag <$> tokenStartTag
  <|> TokenEndTag <$> tokenEndTag
  <|> TokenEmptyElementTag <$> tokenEmptyElementTag
  <|> TokenData <$> tokenData