{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE TypeFamilies #-}
module Symantic.XML.Read.Parser where

import Control.Applicative (Applicative(..), Alternative(..))
import Control.Monad (Monad(..))
import Data.Bool
import Data.Char (Char)
import Data.Default.Class (Default(..))
import Data.Eq (Eq(..))
import Data.Function (($), (.))
import Data.Functor ((<$>))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..))
import Data.Ord (Ord(..))
import Data.String (IsString)
import Prelude (Integer)
import Text.Show (Show(..))
import qualified Control.Monad.Trans.Reader as R
import qualified Data.HashMap.Strict as HM
import qualified Data.Set as Set
import qualified Data.Text.Lazy as TL
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P

import Symantic.XML.Document hiding (XML, XMLs)
import qualified Symantic.XML.Document as XML

-- | Specify |XML.XML|'s 'src' type parameter for parsing.
type XML = XML.XML (FileSource Offset)
-- | Specify |XML.XMLs|'s 'src' type parameter for parsing.
type XMLs = XML.XMLs (FileSource Offset)

-- * Type 'Parser'
-- | Convenient alias.
type Parser   e s a =
     Parsable e s a =>
     R.ReaderT Reader (P.Parsec e s) a

-- ** Type 'Parsable'
type Parsable e s a =
 ( P.Stream s
 , P.Token s ~ Char
 , Ord e
 , IsString (P.Tokens s)
 , P.ShowErrorComponent e
 )

-- ** Type 'Reader'
data Reader = Reader
 { reader_source     :: FileSource Offset
 , reader_ns_scope   :: HM.HashMap NCName Namespace
 , reader_ns_default :: Namespace
 } deriving (Show)
instance Default Reader where
        def = Reader
         { reader_source     = pure def
         , reader_ns_scope   = HM.fromList
                 [ ("xml"  , xmlns_xml)
                 , ("xmlns", xmlns_xmlns)
                 ]
         , reader_ns_default = ""
         }

p_Offset :: Parser e s Offset
p_Offset = Offset <$> P.getOffset
{-# INLINE p_Offset #-}

p_Sourced :: Parser e s a -> Parser e s (Sourced (FileSource Offset) a)
p_Sourced pa = do
        Reader{reader_source} <- R.ask
        b <- P.getParserState
        let fileRange_file = P.sourceName $ P.pstateSourcePos $ P.statePosState b
        let fileRange_begin = Offset $ P.stateOffset b
        a <- pa
        e <- P.getParserState
        let fileRange_end = Offset $ P.stateOffset e
        return $ Sourced (setSource FileRange{..} reader_source) a

setSource :: FileRange pos -> FileSource pos -> FileSource pos
setSource fileRange (_curr:|next) = fileRange :| next

-- | Like 'p_Sourced' but uncoupled (through the use of 'p_SourcedEnd') for more flexibility.
p_SourcedBegin :: Parser e s a -> Parser e s a
p_SourcedBegin pa = do
        b <- P.getParserState
        let fileRange_file  = P.sourceName $ P.pstateSourcePos $ P.statePosState b
        let fileRange_begin = Offset $ P.stateOffset b
        let fileRange_end   = fileRange_begin
        (`R.local` pa) $ \ro@Reader{..} ->
                ro{ reader_source = setSource FileRange{..} reader_source }

-- | WARNING: Only to be used within a 'p_SourcedBegin'.
p_SourcedEnd :: Parser e s (a -> Sourced (FileSource Offset) a)
p_SourcedEnd = do
        Reader{..} <- R.ask
        e <- P.getParserState
        let fileRange_end = Offset $ P.stateOffset e
        return $ Sourced $
                 (\(curr:|path) -> curr{fileRange_end}:|path)
                 reader_source

{-
-- ** Type 'StreamSourced'
-- | Wrap 'TL.Text' to have a 'P.Stream' instance
-- whose 'P.advance1' method abuses the tab width state
-- to instead pass the line indent.
-- This in order to report correct 'P.SourcePos'
-- when parsing a 'Sourced' containing newlines.
newtype StreamSourced = StreamSourced { unStreamSourced :: TL.Text }
 deriving (IsString,Eq,Ord)
instance P.Stream StreamSourced where
	type Token  StreamSourced = Char
	type Tokens StreamSourced = TL.Text
	take1_       (StreamSourced t) = (StreamSourced <$>) <$> P.take1_ t
	takeN_     n (StreamSourced t) = (StreamSourced <$>) <$> P.takeN_ n t
	takeWhile_ f (StreamSourced t) = StreamSourced <$> P.takeWhile_ f t
	tokensToChunk _s = P.tokensToChunk (Proxy::Proxy TL.Text)
	chunkToTokens _s = P.chunkToTokens (Proxy::Proxy TL.Text)
	chunkLength   _s = P.chunkLength   (Proxy::Proxy TL.Text)
	{-
	advance1 _s indent (P.SourcePos n line col) c =
		case c of
		 '\n' -> P.SourcePos n (line <> P.pos1) indent
		 _    -> P.SourcePos n line (col <> P.pos1)
	advanceN s indent = TL.foldl' (P.advance1 s indent)
	-}

-- | Wrapper around |P.runParser'|
-- to use given 'Sourced' as starting position.
runParserOnSourced ::
 Parsable e StreamSourced a =>
 Parser e StreamSourced a ->
 Sourced FileSource TL.Text ->
 Either (P.ParseError (P.Token StreamSourced) e) a
runParserOnSourced p (Sourced (FileRange inp bp _ep :| path) s) =
	snd $
	P.runParser' (R.runReaderT p ro <* P.eof)
	 P.State
	 { P.stateInput    = StreamSourced s
	 , P.statePos      = pure $ P.SourcePos inp (P.mkPos $ filePos_line bp) indent
	 , P.stateTabWidth = indent
	 , P.stateTokensProcessed = 0
	 }
	where
	indent = P.mkPos $ filePos_column bp
	ro     = def{ reader_source = fromMaybe (pure def) $ nonEmpty path }
-}

-- * Type 'Error'
data Error
 =   Error_CharRef_invalid Integer
     -- ^ Well-formedness constraint: Legal Character.
     --
     -- Characters referred to using character references MUST match the production for Char.
 |   Error_EntityRef_unknown NCName
     -- ^ Well-formedness constraint: Entity Declared
     --
     -- In a document without any DTD, a document with only an internal DTD
     -- subset which contains no parameter entity references, or a document
     -- with " standalone='yes' ", for an entity reference that does not occur
     -- within the external subset or a parameter entity, the Name given in the
     -- entity reference MUST match that in an entity declaration that does not
     -- occur within the external subset or a parameter entity, except that
     -- well-formed documents need not declare any of the following entities:
     -- amp, lt, gt, apos, quot. The declaration of a general entity MUST
     -- precede any reference to it which appears in a default value in an
     -- attribute-list declaration.
     --
     -- Note that non-validating processors are not obligated to read and
     -- process entity declarations occurring in parameter entities or in the
     -- external subset; for such documents, the rule that an entity must be
     -- declared is a well-formedness constraint only if standalone='yes'.
 |   Error_Closing_tag_unexpected QName QName
     -- ^ Well-formedness constraint: Element Type Match.
     --
     -- The Name in an element's end-tag MUST match the element type in the start-tag.
 |   Error_Attribute_collision QName
     -- ^ Well-formedness constraint: Unique Att Spec.
     --
     -- An attribute name MUST NOT appear more than once in the same start-tag or empty-element tag.
 |   Error_PI_reserved PName
     -- ^ The target names " XML ", " xml ", and so on are reserved for standardization.
 |   Error_Namespace_prefix_unknown NCName
     -- ^ Namespace constraint: Prefix Declared
     --
     -- The namespace prefix, unless it is xml or xmlns, MUST have been declared in a namespace declaration attribute in either the start-tag of the element where the prefix is used or in an ancestor element (i.e., an element in whose content the prefixed markup occurs). 
 |   Error_Namespace_empty NCName
     -- ^ Namespace constraint: No Prefix Undeclaring
     --
     -- In a namespace declaration for a prefix (i.e., where the NSAttName is a PrefixedAttName), the attribute value MUST NOT be empty.
 |   Error_Namespace_reserved Namespace
 |   Error_Namespace_reserved_prefix NCName
     -- ^ Namespace constraint: Reserved Prefixes and Namespace Names
     --
     -- The prefix xml is by definition bound to the namespace name
     -- http://www.w3.org/XML/1998/namespace. It MAY, but need not, be
     -- declared, and MUST NOT be bound to any other namespace name. Other
     -- prefixes MUST NOT be bound to this namespace name, and it MUST NOT be
     -- declared as the default namespace.
     --
     -- The prefix xmlns is used only to declare namespace bindings and is by
     -- definition bound to the namespace name http://www.w3.org/2000/xmlns/.
     -- It MUST NOT be declared . Other prefixes MUST NOT be bound to this
     -- namespace name, and it MUST NOT be declared as the default namespace.
     -- Element names MUST NOT have the prefix xmlns.
     --
     -- All other prefixes beginning with the three-letter sequence x, m, l, in
     -- any case combination, are reserved. This means that:
     --
     -- - users SHOULD NOT use them except as defined by later specifications
     -- - processors MUST NOT treat them as fatal errors.
 deriving (Eq,Ord,Show)
instance P.ShowErrorComponent Error where
        showErrorComponent = show

-- * Helpers
p_error :: e -> Parser e s a
p_error = P.fancyFailure . Set.singleton . P.ErrorCustom

p_quoted :: P.Tokens s ~ TL.Text => (Char -> Parser e s a) -> Parser e s a
p_quoted p =
        P.between (P.char '"') (P.char '"') (p '"') <|>
        P.between (P.char '\'') (P.char '\'') (p '\'')

p_until :: P.Tokens s ~ TL.Text => (Char -> Bool) -> (Char, TL.Text) -> Parser e s TL.Text
p_until content (end, end_) =
        (TL.concat <$>) $ P.many $
                P.takeWhile1P Nothing (\c -> content c && c /= end) <|>
                P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))

p_until1 :: P.Tokens s ~ TL.Text => (Char -> Bool) -> (Char, TL.Text) -> Parser e s TL.Text
p_until1 content (end, end_) =
        (TL.concat <$>) $ P.some $
                P.takeWhile1P Nothing (\c -> content c && c /= end) <|>
                P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))