{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Rank2Types #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Symantic.XML.Tree.Read where

import Control.Arrow (left)
import Control.Applicative (Applicative(..), Alternative(..))
import Control.Monad (Monad(..), void, unless, forM)
import Data.Bool
import Data.Char (Char)
import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..))
import Data.Function (($), (.), const)
import Data.Functor ((<$>), (<$))
import Data.Maybe (Maybe(..), maybe, catMaybes)
import Data.Monoid (Monoid(..))
import Data.Ord (Ord(..))
import Data.Semigroup (Semigroup(..))
import Data.List.NonEmpty (NonEmpty(..))
import Data.String (String, IsString(..))
import Prelude (Num(..), Enum(..), Bounded(..), Integer, toInteger)
import System.IO (FilePath, IO)
import Text.Show (Show(..))
import qualified Control.Exception as Exn
import qualified Control.Monad.Trans.Reader as R
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Char as Char
import qualified Data.Char.Properties.XMLCharProps as XC
import qualified Data.HashMap.Strict as HM
import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Sequence as Seq
import qualified Data.Text.Encoding.Error as TL
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified Data.TreeSeq.Strict as TS
import qualified System.IO.Error as IO
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P

import Symantic.Base ()
import Symantic.XML.Language hiding (void)
import Symantic.XML.Tree.Source
import Symantic.XML.Tree.Data

readTree :: FilePath -> IO (Either String FileSourcedTrees)
readTree path =
  readUtf8 path >>= \case
   Left err -> return $ Left $ show err
   Right txt -> return $
    case runReadTree path txt of
     Right a -> Right a
     Left err -> Left $ P.errorBundlePretty err

runReadTree ::
 FilePath -> TL.Text ->
 Either (P.ParseErrorBundle TL.Text Error)
        FileSourcedTrees
runReadTree = P.runParser $ R.runReaderT p_document defaultReadTreeInh

-- * Type 'ErrorRead'
data ErrorRead
 =   ErrorRead_IO IO.IOError
 |   ErrorRead_Unicode TL.UnicodeException
 deriving (Show)
readUtf8 :: FilePath -> IO (Either ErrorRead TL.Text)
readUtf8 path =
  (left ErrorRead_Unicode . TL.decodeUtf8' <$> BSL.readFile path)
  `Exn.catch` \e ->
    if IO.isAlreadyInUseError e
    || IO.isDoesNotExistError e
    || IO.isPermissionError   e
    then return $ Left $ ErrorRead_IO e
    else IO.ioError e

-- * Type 'ReadTree'
-- | Convenient alias.
type ReadTree e s a =
 ReadTreeConstraints e s a =>
 R.ReaderT ReadTreeInh (P.Parsec e s) a

-- ** Type 'ReadTreeConstraints'
type ReadTreeConstraints e s a =
 ( P.Stream s
 , P.Token s ~ Char
 , Ord e
 , IsString (P.Tokens s)
 , P.ShowErrorComponent e
 )

-- ** Type 'ReadTreeInh'
data ReadTreeInh
 =   ReadTreeInh
 {   readTreeInh_source     :: FileSource Offset
 ,   readTreeInh_ns_scope   :: HM.HashMap NCName Namespace
 ,   readTreeInh_ns_default :: Namespace
 } deriving (Show)

defaultReadTreeInh :: ReadTreeInh
defaultReadTreeInh = ReadTreeInh
 { readTreeInh_source = FileSource $ pure $
  FileRange mempty mempty mempty
 , readTreeInh_ns_scope = HM.fromList
   [ ("xml"  , xmlns_xml)
   , ("xmlns", xmlns_xmlns)
   ]
 , readTreeInh_ns_default = ""
 }

p_Offset :: ReadTree e s Offset
p_Offset = Offset <$> P.getOffset
{-# INLINE p_Offset #-}

p_Sourced :: ReadTree e s a -> ReadTree e s (Sourced (FileSource Offset) a)
p_Sourced pa = do
  ReadTreeInh{readTreeInh_source} <- R.ask
  b <- P.getParserState
  let fileRange_path = P.sourceName $ P.pstateSourcePos $ P.statePosState b
  let fileRange_begin = Offset $ P.stateOffset b
  a <- pa
  e <- P.getParserState
  let fileRange_end = Offset $ P.stateOffset e
  return $ Sourced (setSource FileRange{..} readTreeInh_source) a

setSource :: FileRange pos -> FileSource pos -> FileSource pos
setSource fileRange (FileSource (_curr:|next)) = FileSource (fileRange:|next)

-- | Like 'p_Sourced' but uncoupled (through the use of 'p_SourcedEnd') for more flexibility.
p_SourcedBegin :: ReadTree e s a -> ReadTree e s a
p_SourcedBegin pa = do
  b <- P.getParserState
  let fileRange_path  = P.sourceName $ P.pstateSourcePos $ P.statePosState b
  let fileRange_begin = Offset $ P.stateOffset b
  let fileRange_end   = fileRange_begin
  (`R.local` pa) $ \inh@ReadTreeInh{..} ->
    inh{ readTreeInh_source = setSource FileRange{..} readTreeInh_source }

-- | WARNING: only to be used within a 'p_SourcedBegin'.
p_SourcedEnd :: ReadTree e s (a -> Sourced (FileSource Offset) a)
p_SourcedEnd = do
  ReadTreeInh{..} <- R.ask
  e <- P.getParserState
  let fileRange_end = Offset $ P.stateOffset e
  return $ Sourced $
     (\(FileSource (curr:|path)) -> FileSource (curr{fileRange_end}:|path))
     readTreeInh_source

-- * Type 'Error'
data Error
 =   Error_CharRef_invalid Integer
     -- ^ Well-formedness constraint: Legal Character.
     --
     -- Characters referred to using character references MUST match the production for Char.
 |   Error_EntityRef_unknown NCName
     -- ^ Well-formedness constraint: Entity Declared
     --
     -- In a document without any DTD, a document with only an internal DTD
     -- subset which contains no parameter entity references, or a document
     -- with " standalone='yes' ", for an entity reference that does not occur
     -- within the external subset or a parameter entity, the Name given in the
     -- entity reference MUST match that in an entity declaration that does not
     -- occur within the external subset or a parameter entity, except that
     -- well-formed documents need not declare any of the following entities:
     -- amp, lt, gt, apos, quot. The declaration of a general entity MUST
     -- precede any reference to it which appears in a default value in an
     -- attribute-list declaration.
     --
     -- Note that non-validating processors are not obligated to read and
     -- process entity declarations occurring in parameter entities or in the
     -- external subset; for such documents, the define that an entity must be
     -- declared is a well-formedness constraint only if standalone='yes'.
 |   Error_Closing_tag_unexpected QName QName
     -- ^ Well-formedness constraint: Element Type Match.
     --
     -- The Name in an element's end-tag MUST match the element type in the start-tag.
 |   Error_Attribute_collision QName
     -- ^ Well-formedness constraint: Unique Att Spec.
     --
     -- An attribute name MUST NOT appear more than once in the same start-tag or empty-element tag.
 |   Error_PI_reserved PName
     -- ^ The target names " XML ", " xml ", and so on are reserved for standardization.
 |   Error_Namespace_prefix_unknown NCName
     -- ^ Namespace constraint: Prefix Declared
     --
     -- The namespace prefix, unless it is xml or xmlns, MUST have been declared in a namespace declaration attribute in either the start-tag of the element where the prefix is used or in an ancestor element (i.e., an element in whose content the prefixed markup occurs). 
 |   Error_Namespace_empty NCName
     -- ^ Namespace constraint: No Prefix Undeclaring
     --
     -- In a namespace declaration for a prefix (i.e., where the NSAttName is a PrefixedAttName), the attribute value MUST NOT be empty.
 |   Error_Namespace_reserved Namespace
 |   Error_Namespace_reserved_prefix NCName
     -- ^ Namespace constraint: Reserved Prefixes and Namespace Names
     --
     -- The prefix xml is by definition bound to the namespace name
     -- http://www.w3.org/XML/1998/namespace. It MAY, but need not, be
     -- declared, and MUST NOT be bound to any other namespace name. Other
     -- prefixes MUST NOT be bound to this namespace name, and it MUST NOT be
     -- declared as the default namespace.
     --
     -- The prefix xmlns is used only to declare namespace bindings and is by
     -- definition bound to the namespace name http://www.w3.org/2000/xmlns/.
     -- It MUST NOT be declared . Other prefixes MUST NOT be bound to this
     -- namespace name, and it MUST NOT be declared as the default namespace.
     -- Element names MUST NOT have the prefix xmlns.
     --
     -- All other prefixes beginning with the three-letter sequence x, m, l, in
     -- any case combination, are reserved. This means that:
     --
     -- - users SHOULD NOT use them except as defined by later specifications
     -- - processors MUST NOT treat them as fatal errors.
 deriving (Eq,Ord,Show)
instance P.ShowErrorComponent Error where
  showErrorComponent = show

-- * Helpers
p_error :: e -> ReadTree e s a
p_error = P.fancyFailure . Set.singleton . P.ErrorCustom

p_quoted :: P.Tokens s ~ TL.Text => (Char -> ReadTree e s a) -> ReadTree e s a
p_quoted p =
  P.between (P.char '"') (P.char '"') (p '"') <|>
  P.between (P.char '\'') (P.char '\'') (p '\'')

p_until ::
 P.Tokens s ~ TL.Text =>
 (Char -> Bool) -> (Char, TL.Text) -> ReadTree e s TL.Text
p_until content (end, end_) =
  (TL.concat <$>) $ P.many $
    P.takeWhile1P Nothing (\c -> content c && c /= end) <|>
    P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))

p_until1 ::
 P.Tokens s ~ TL.Text =>
 (Char -> Bool) -> (Char, TL.Text) -> ReadTree e s TL.Text
p_until1 content (end, end_) =
  (TL.concat <$>) $ P.some $
    P.takeWhile1P Nothing (\c -> content c && c /= end) <|>
    P.try (TL.singleton <$> P.char end <* P.notFollowedBy (P.string end_))

-- * Document
p_document :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTrees
p_document = do
  ps <- p_prolog
  e  <- p_Element
  m  <- p_Miscs
  P.eof
  return (ps <> pure e <> m)

-- ** Prolog
p_prolog :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTrees
p_prolog = (<>)
 <$> P.option Seq.empty (pure <$> p_XMLDecl)
 <*> p_Miscs

-- ** Misc
p_Miscs :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTrees
p_Miscs = (Seq.fromList . catMaybes <$>) $ P.many $
  Just <$> p_Comment <|>
  Just <$> p_PI <|>
  Nothing <$ p_Spaces1

-- ** XMLDecl
p_XMLDecl :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_XMLDecl = do
  Sourced src as <- p_Sourced $ P.between (P.string "<?xml") (P.string "?>") $ do
    vi <- pure <$> p_VersionInfo
    ed <- P.option Seq.empty $ pure <$> p_EncodingDecl
    sd <- P.option Seq.empty $ pure <$> p_SDDecl
    p_Spaces
    return $ vi <> ed <> sd
  return $ TS.Tree (Sourced src $ NodePI "xml" "") as

p_VersionInfo :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_VersionInfo = do
  Sourced src v <- p_Sourced $ do
    P.try $ p_Spaces1 <* P.string "version"
    p_Eq
    p_quoted $ const $
      (<>)
       <$> P.string "1."
       <*> P.takeWhile1P Nothing Char.isDigit
  return $ TS.tree0 $ Sourced src $ NodePI "version" v

p_EncodingDecl :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_EncodingDecl = do
  Sourced src v <- p_Sourced $ do
    P.try $ p_Spaces1 <* P.string "encoding"
    p_Eq
    p_quoted $ const p_EncName
  return $ TS.tree0 $ Sourced src $ NodePI "encoding" v

p_EncName :: P.Tokens s ~ TL.Text => ReadTree Error s TL.Text
p_EncName = P.label "EncName" $ do
  P.notFollowedBy (P.satisfy $ not . isAlpha)
  P.takeWhile1P Nothing $ \c ->
    isAlpha c || Char.isDigit c ||
    c=='.' || c=='_' || c=='-'
  where isAlpha c = Char.isAsciiLower c || Char.isAsciiUpper c

-- *** SDDecl
p_SDDecl :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_SDDecl = do
  Sourced src v <- p_Sourced $ do
    P.try $ p_Spaces1 <* P.string "standalone"
    p_Eq
    p_quoted $ const $ P.string "yes" <|> P.string "no"
  return $ TS.tree0 $ Sourced src $ NodePI "standalone" v

-- ** CharData
p_CharData :: P.Tokens s ~ TL.Text => ReadTree e s EscapedText
p_CharData = P.label "[^<&]" $ escapeText <$>
  p_until1 (\c -> XC.isXmlChar c && c/='<' && c/='&') (']',"]>")

-- ** Comment
p_Comment :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_Comment = p_SourcedBegin $ P.string "<!--" *> p_Comment__
p_Comment_ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_Comment_ = P.string "--" *> p_Comment__
p_Comment__:: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_Comment__ = do
  c <- p_until XC.isXmlChar ('-', "-")
  void $ P.string "-->"
  src <- p_SourcedEnd
  return $ TS.tree0 $ src $ NodeComment c

-- ** CDATA
p_CDSect :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_CDSect = p_SourcedBegin $ P.string "<![CDATA[" *> p_CDSect__
p_CDSect_ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_CDSect_ = P.string "[CDATA[" *> p_CDSect__
p_CDSect__ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_CDSect__ = do
  c <- p_until XC.isXmlChar (']', "]>")
  void $ P.string "]]>"
  src <- p_SourcedEnd
  return $ TS.tree0 $ src $ NodeCDATA c

-- ** PI
p_PI :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_PI = p_SourcedBegin $ P.string "<?" *> p_PI__
p_PI_ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_PI_ = P.char '?' *> p_PI__
p_PI__ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_PI__ = do
  n <- p_PITarget
  v <- P.option "" $ p_Spaces1 *> p_until XC.isXmlChar ('?', ">")
  void $ P.string "?>"
  src <- p_SourcedEnd
  return $ TS.tree0 $ src $ NodePI n v
p_PITarget :: P.Tokens s ~ TL.Text => ReadTree Error s PName
p_PITarget = do
  n <- p_PName
  case n of
   PName{pNameSpace=Nothing, pNameLocal=NCName l}
    | "xml" == TL.toLower l -> p_error $ Error_PI_reserved n
   _ -> return n

-- ** Element
p_Element :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_Element = p_SourcedBegin $ (P.char '<' *> p_Element_)
p_Element_ :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_Element_ = p_STag

-- *** STag
p_STag :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTree
p_STag = do
  n  <- p_PName
  attrs <- P.many $ p_Attribute
  p_Spaces
  ro <- R.ask
  elemNS :: HM.HashMap NCName Namespace <-
    (HM.fromList . List.concat <$>) $ forM attrs $ \case
     (PName{..}, Sourced _ av)
      | ns <- Namespace $ unescapeAttr av
      , Nothing        <- pNameSpace
      , NCName "xmlns" <- pNameLocal ->
      -- Default namespace declaration
      case ns of
       _ |  ns == xmlns_xml   -- DOC: it MUST NOT be declared as the default namespace
         || ns == xmlns_xmlns -- DOC: it MUST NOT be declared as the default namespace
         -> p_error $ Error_Namespace_reserved ns
       _ -> return [(NCName "" , ns)]
      | ns <- Namespace $ unescapeAttr av
      , Just (NCName "xmlns") <- pNameSpace ->
      -- Namespace prefix declaration
      case unNCName pNameLocal of
       "xml" -- DOC: It MAY, but need not, be declared,
             -- and MUST NOT be bound to any other namespace name.
             | ns == xmlns_xml -> return []
             | otherwise -> p_error $ Error_Namespace_reserved_prefix pNameLocal
       "xmlns" -- DOC: It MUST NOT be declared
               -> p_error $ Error_Namespace_reserved_prefix pNameLocal
       local | "xml" <- TL.toLower $ TL.take 3 local -> return []
             -- DOC: All other prefixes beginning with the three-letter
             -- sequence x, m, l, in any case combination, are reserved.
             -- This means that: processors MUST NOT treat them as fatal errors.
       _ |  ns == xmlns_xml   -- DOC: Other prefixes MUST NOT be bound to this namespace name.
         || ns == xmlns_xmlns -- DOC: Other prefixes MUST NOT be bound to this namespace name.
         -> p_error $ Error_Namespace_reserved ns
       _ -> return [(pNameLocal, ns)]
      | otherwise -> return []
  let scopeNS = elemNS <> readTreeInh_ns_scope ro
  let defaultNS = HM.lookupDefault (readTreeInh_ns_default ro) (NCName "") scopeNS
  let
   lookupNamePrefix prefix =
    maybe (p_error $ Error_Namespace_prefix_unknown prefix) return $
    HM.lookup prefix scopeNS
  elemName :: QName <-
    -- Expand element's QName
    case pNameSpace n of
     Nothing -> return QName{qNameSpace=defaultNS, qNameLocal=pNameLocal n}
      -- DOC: If there is a default namespace declaration in scope,
      -- the expanded name corresponding to an unprefixed element name
      -- has the URI of the default namespace as its namespace name.
     Just prefix
      | NCName "xmlns" <- prefix ->
      -- DOC: Element names MUST NOT have the prefix xmlns.
      p_error $ Error_Namespace_reserved_prefix prefix
      | otherwise -> do
      ns <- lookupNamePrefix prefix
      return QName{qNameSpace=ns, qNameLocal=pNameLocal n}
  elemAttrs :: [(QName, FileSourced EscapedAttr)] <-
    -- Expand attributes' PName into QName
    forM attrs $ \(an, av) -> do
      ns <- maybe (return "") lookupNamePrefix $ pNameSpace an
      let qn = QName{qNameSpace=ns, qNameLocal=pNameLocal an}
      return (qn, av)
  -- Check for attribute collision
  let
   attrsByQName :: HM.HashMap QName [FileSourced EscapedAttr] =
    HM.fromListWith (<>) $ (<$> elemAttrs) $
     \(an, av) -> (an, [av])
  case HM.toList $ HM.filter (\x -> length x > 1) attrsByQName of
   (an, _):_ -> p_error $ Error_Attribute_collision an
   _ -> return ()
  content :: FileSourcedTrees <-
    mempty <$ P.string "/>" <|>
    R.local
     (const ro
       { readTreeInh_ns_scope   = scopeNS
       , readTreeInh_ns_default = defaultNS
       })
     (P.char '>' *> p_content <* p_ETag elemName)
  src <- p_SourcedEnd
  return $ TS.Tree (src $ NodeElem elemName (List.head <$> attrsByQName)) content

-- *** Attribute
-- | Note: despite the type, the returned 'FileSource'
-- encompasses also the attribute 'PName'.
-- It is pushed in the attribute value to fit the insertion
-- of the attribute into a 'HM.HashMap'.
p_Attribute :: P.Tokens s ~ TL.Text => ReadTree Error s (PName, FileSourced EscapedAttr)
p_Attribute =
  p_SourcedBegin $ do
    an <- P.try $ p_Spaces1 *> p_PName
    void p_Eq
    av <- p_AttrValue
    src <- p_SourcedEnd
    return (an, src av)

p_AttrValue :: P.Tokens s ~ TL.Text => ReadTree Error s EscapedAttr
p_AttrValue = p_quoted p_AttrValueText

p_AttrValueText :: P.Tokens s ~ TL.Text => Char -> ReadTree Error s EscapedAttr
p_AttrValueText q =
  EscapedAttr . Seq.fromList <$> P.many (
    p_Reference <|>
    -- Supplementary alternative to always escape the quote
    -- as expected by 'EscapedAttr'.
    (if q /= '\"' then EscapedEntityRef entityRef_quot <$ P.char '"' else P.empty) <|>
    EscapedPlain <$> P.label ("[^<&"<>[q]<>"]")
      (P.takeWhile1P Nothing $ \c ->
        XC.isXmlChar c &&
        c `List.notElem` (q:"<&")
      )
  )

-- * content
p_content :: P.Tokens s ~ TL.Text => ReadTree Error s FileSourcedTrees
p_content =
  (Seq.fromList <$>) $ P.many $
    (p_SourcedBegin $ do
      P.try $ P.char '<' *> P.notFollowedBy (P.char '/')
      p_Element_ <|> p_PI_ <|> (P.char '!' *> (p_Comment_ <|> p_CDSect_))
    )
    <|> (
    (TS.tree0 <$>) $
      p_Sourced $ NodeText . EscapedText . foldMap unEscapedText
       <$> P.some (
        p_CharData <|>
        EscapedText . pure <$> p_Reference
      )
    )

-- *** ETag
p_ETag :: P.Tokens s ~ TL.Text => QName -> ReadTree Error s ()
p_ETag expected = do
  got <- P.string "</" *> p_QName <* p_Spaces <* P.char '>'
  unless (got == expected) $
    p_error $ Error_Closing_tag_unexpected got expected

-- * PName
p_PName :: P.Tokens s ~ TL.Text => ReadTree e s PName
p_PName = do
  n <- p_NCName
  s <- P.optional $ P.try $ P.char ':' *> p_NCName
  return $ case s of
   Nothing -> PName{pNameSpace=Nothing, pNameLocal=n}
   Just l  -> PName{pNameSpace=Just n , pNameLocal=l}

-- * QName
p_QName :: P.Tokens s ~ TL.Text => ReadTree Error s QName
p_QName = do
  n <- p_NCName
  s <- P.optional $ P.try $ P.char ':' *> p_NCName
  ReadTreeInh{..} <- R.ask
  case s of
   Nothing -> return QName{qNameSpace=readTreeInh_ns_default, qNameLocal=n}
   Just l ->
    case HM.lookup n readTreeInh_ns_scope of
     Nothing -> p_error $ Error_Namespace_prefix_unknown n
     Just ns -> return QName{qNameSpace=ns, qNameLocal=l}

-- ** NCName
p_NCName :: P.Tokens s ~ TL.Text => ReadTree e s NCName
p_NCName = P.label "NCName" $ NCName
 <$  P.notFollowedBy (P.satisfy (not . XC.isXmlNCNameStartChar))
 <*> P.takeWhile1P Nothing XC.isXmlNCNameChar

-- * Reference
p_Reference :: P.Tokens s ~ TL.Text => ReadTree Error s Escaped
p_Reference =
  EscapedCharRef   <$> p_CharRef <|>
  EscapedEntityRef <$> p_EntityRef

-- ** EntityRef
p_EntityRef :: P.Tokens s ~ TL.Text => ReadTree Error s EntityRef
p_EntityRef = do
  ref <- P.char '&' *> p_NCName <* P.char ';'
  EntityRef ref <$> lookupEntityRef ref
  where
  -- Because entities are declared in the (unimplemented) DTD,
  -- only builtins entities are supported for now.
  lookupEntityRef (NCName "lt"  ) = pure "<"
  lookupEntityRef (NCName "gt"  ) = pure ">"
  lookupEntityRef (NCName "amp" ) = pure "&"
  lookupEntityRef (NCName "apos") = pure "'"
  lookupEntityRef (NCName "quot") = pure "\""
  lookupEntityRef n = p_error $ Error_EntityRef_unknown n

-- ** CharRef
p_CharRef :: P.Tokens s ~ TL.Text => ReadTree Error s CharRef
p_CharRef =
  do
    ref <- readHexadecimal
     <$  P.string "&#x"
     <*> P.some P.hexDigitChar
     <*  P.char ';'
    check ref
  <|> do
    ref <- readDecimal
     <$  P.string "&#"
     <*> P.some P.digitChar
     <*  P.char ';'
    check ref
  where
  check i =
    let c = toEnum (fromInteger i) in
    if i <= toInteger (fromEnum (maxBound::Char))
    && XC.isXmlChar c
    then pure $ CharRef c
    else p_error $ Error_CharRef_invalid i

readInt :: Integer -> String -> Integer
readInt base digits =
  sign * List.foldl' acc 0 (List.concatMap digToInt digits1)
  where
  acc q r = q*base + r
  (sign, digits1) =
    case digits of
     [] -> (1, digits)
     c:ds | c == '-'  -> (-1, ds)
          | c == '+'  -> ( 1, ds)
          | otherwise -> ( 1, digits)
  ord = toInteger . Char.ord
  digToInt c
   | Char.isDigit c      = [ord c - ord '0']
   | Char.isAsciiLower c = [ord c - ord 'a' + 10]
   | Char.isAsciiUpper c = [ord c - ord 'A' + 10]
   | otherwise           = []

readDecimal :: String -> Integer
readDecimal = readInt 10

readHexadecimal :: String -> Integer
readHexadecimal = readInt 16

-- * Char
p_Char :: P.Tokens s ~ TL.Text => ReadTree e s Char
p_Char = P.label "XmlChar" $ P.satisfy XC.isXmlCharCR <|> p_CRLF
{-# INLINE p_Char #-}

-- ** Space
-- | Map '\r' and '\r\n' to '\n'.
-- See: https://www.w3.org/TR/xml/#sec-line-ends
p_CRLF :: P.Tokens s ~ TL.Text => ReadTree e s Char
p_CRLF = P.char '\r' *> P.option '\n' (P.char '\n')

p_Space :: P.Tokens s ~ TL.Text => ReadTree e s Char
p_Space = P.label "space" $ P.satisfy XC.isXmlSpaceCharCR <|> p_CRLF
{-# INLINE p_Space #-}

p_Spaces :: P.Tokens s ~ TL.Text => ReadTree e s ()
p_Spaces = P.label "spaces" $ void $ P.takeWhileP Nothing XC.isXmlSpaceChar
{-# INLINE p_Spaces #-}

p_Spaces1 :: P.Tokens s ~ TL.Text => ReadTree e s ()
p_Spaces1 = P.label "spaces" $ void $ P.takeWhile1P Nothing XC.isXmlSpaceChar
{-# INLINE p_Spaces1 #-}

-- * Eq
p_separator :: P.Tokens s ~ TL.Text => Char -> ReadTree e s ()
p_separator c = P.try (() <$ p_Spaces <* P.char c) <* p_Spaces

p_Eq :: P.Tokens s ~ TL.Text => ReadTree e s ()
p_Eq = p_separator '='