{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Symantic.XML.Read where

import Control.Applicative as Alternative (Applicative(..), Alternative(..), optional)
import Control.Monad (Monad(..))
import Control.Monad.Trans.Class (MonadTrans(..))
import Data.Bool
import Data.Either (Either(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), all)
import Data.Function (($), (.), const, id, flip)
import Data.Functor (Functor(..), (<$>))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..), maybe, isNothing, maybeToList)
import Data.Monoid (Monoid(..))
import Data.Ord (Ord(..))
import Data.Proxy (Proxy(..))
import Data.Semigroup (Semigroup(..))
import Data.String (String, IsString(..))
import Data.Tuple (fst)
import Data.Void (Void)
import Numeric.Natural (Natural)
import Prelude ((+), Integer, undefined)
import System.IO (IO, FilePath)
import Text.Show (Show(..))
import qualified Data.Char as Char
import qualified Data.HashMap.Strict as HM
import qualified Data.List as List
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.TreeSeq.Strict as TS
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P
import qualified Text.Megaparsec.Char.Lexer as P
import qualified Text.Megaparsec.Internal as P

import Symantic.Base
import Symantic.XML.Language
import Symantic.XML.RelaxNG.Language
import Symantic.XML.Tree

-- | Main reading function.
read ::
 Read FileSourced Void (x->x) a ->
 FilePath ->
 IO (Either String a)
read rng path =
  readTree path >>= \case
   Left  err -> return $ Left err
   Right xml -> return $ runRead rng xml

-- | Like 'readWithRelaxNG' but on a 'FileSourcedTrees'.
runRead ::
 Read FileSourced Void (x->x) a ->
 FileSourcedTrees ->
 Either String a
runRead rng xml =
  case P.runParser (unRead rng) "" (mempty, xml) of
   Left err -> Left $ foldMap parseErrorTextPretty $ P.bundleErrors err
   Right a -> Right $ a id

-- * Type 'ReadStream'
type ReadStream src =
 ( HM.HashMap QName (src EscapedAttr)
 , Trees src
 )

-- | Take one 'Node' from the 'ReadStream',
-- or fallback to an attribute, or 'Nothing'.
-- 
-- Use 'pTokenAttr' to take only attributes.
take1_ ::
 UnSource src =>
 (Node (src EscapedAttr) -> Bool) ->
 ReadStream src ->
 Maybe ( P.Token (ReadStream src)
       , ReadStream src )
take1_ isIgnoredNode s@(attrs, trees) =
  go trees
  where
  go trs =
    case Seq.viewl trs of
     Seq.EmptyL
      | null attrs -> Nothing
      | otherwise -> Just (Left attrs, s)
     t Seq.:< ts ->
      case unSource (TS.unTree t) of
       n | isIgnoredNode n -> go ts
         | otherwise -> Just (Right t, (attrs, ts))
        -- Note that having an ignored node
        -- can split a text into two 'NodeText's.
        -- Not sure if it would be better to unify them.

-- ** Type 'ReadConstraints'
-- | Convenient alias to be less verbose.
type ReadConstraints src =
 ( Ord (src (Node (src EscapedAttr)))
 , Ord (src EscapedAttr)
 , UnSource src
 , NoSource src
 , SourceOf src
 , Show (Source src)
 , Show (src String)
 , Functor src
 )

instance ReadConstraints src => P.Stream (ReadStream src) where
  type Token (ReadStream src) = Either
   (HM.HashMap QName (src EscapedAttr))
   (Tree src)
  type Tokens (ReadStream src) = ReadStream src
  take1_ = take1_ isIgnoredNode
    where
    isIgnoredNode = \case
     NodeComment{} -> True
     NodePI{}      -> True
     _ -> False
  showTokens _s toks =
    orList $
    mconcat $
    toList $ showTree <$> toks
    where
    showSource :: src String -> String
    showSource sa =
      let src = sourceOf sa in
      if nullSource @src src
      then unSource sa
      else unSource sa<>" at "<>show src
    showTree = \case
     Left as ->
      (\(an, av) -> showSource $ ("(attribute "<>show an<>")") <$ av)
       <$> List.sortOn fst (HM.toList as)
     Right (TS.Tree nod ts) ->
      pure $
      showSource . (<$ nod) $
      case unSource nod of
       NodeElem n _as -> "(element "<>show n<>")"
       NodeText{} ->
        case Seq.viewl ts of
         TS.Tree tn _ Seq.:< _
          | NodeText lit <- unSource tn ->
          -- Abuse the encoding to detect expected 'literal'
          -- using nested 'NodeText'
          "\""<>TL.unpack (unescapeText lit)<>"\""
         _ -> "text"
       NodeComment _c -> "comment"
       NodePI n _t    -> "(processing-instruction "<>show n<>")"
       NodeCDATA _t   -> "cdata"
  -- Useless methods for validating an XML AST
  takeN_ = undefined
  tokensToChunk = undefined
  chunkToTokens = undefined
  chunkLength = undefined
  takeWhile_ = undefined
  reachOffset = undefined
  reachOffsetNoLine = undefined

-- * Type 'Read'
newtype Read src e f k
 =      Read
 {    unRead :: P.Parsec e (ReadStream src) (f->k) }

instance
 ( Ord err
 , ReadConstraints src
 ) => Emptyable (Read src err) where
  empty = Read $ id <$ P.eof
instance
 ( Ord err
 , ReadConstraints src
 ) => Unitable (Read src err) where
  unit = Read $ return ($ ())
instance
 ( Ord err
 , ReadConstraints src
 ) => Voidable (Read src err) where
  void _a (Read x) = Read $
    (\a2b2k b -> a2b2k (\_a -> b)) <$> x
instance
 ( Ord err
 , ReadConstraints src
 ) => Constant (Read src err) where
  constant a = Read $ return ($ a)
instance
 ( Ord err
 , ReadConstraints src
 ) => Permutable (Read src err) where
  type Permutation (Read src err) =
    ReadPerm src err
  permutable (ReadPerm ma p) = Read $ do
    r <- Alternative.optional p
    unRead $
      case r of
       Just perms -> permutable perms
       Nothing ->
        Read $ maybe
         -- Not 'empty' here so that 'P.TrivialError'
         -- has the unexpected token.
         (P.token (const Nothing) Set.empty)
         return ma
  noPerm = ReadPerm Nothing Alternative.empty
  perm (Read x) =
    ReadPerm Nothing $ (<$> x) $ \a ->
      ReadPerm (Just a) Alternative.empty
  permWithDefault d (Read x) =
    ReadPerm (Just ($ d)) $ (<$> x) $ \a ->
      ReadPerm (Just a) Alternative.empty
instance
 ( Ord err
 , ReadConstraints src
 ) => Composable (Read src err) where
  Read x <.> Read y = Read $
    x >>= \a2b -> (. a2b) <$> y
instance
 ( Ord err
 , ReadConstraints src
 ) => Tupable (Read src err) where
  Read x <:> Read y = Read $
    consCont (,) <$> x <*> y
instance
 ( Ord err
 , ReadConstraints src
 ) => Eitherable (Read src err) where
  Read x <+> Read y = Read $
    mapCont Left <$> P.try x <|>
    mapCont Right <$> y
{-
instance
 ( Ord err
 , ReadConstraints src
 ) => Routable (Read src err) where
  Read x <!> Read y = Read $
    (\a2k (a:!:_b) -> a2k a) <$> P.try x <|>
    (\b2k (_a:!:b) -> b2k b) <$> y
-}
instance
 ( Ord err
 , ReadConstraints src
 ) => Optionable (Read src err) where
  option (Read x) = Read $
    P.try x <|> return id
  optional (Read x) = Read $
    mapCont Just <$> P.try x <|>
    return ($ Nothing)
instance
 ( Ord err
 , ReadConstraints src
 ) => Repeatable (Read src err) where
  many0 (Read x) = Read $ concatCont <$> many x
  many1 (Read x) = Read $ concatCont <$> some x
instance
 ( Ord err
 , ReadConstraints src
 ) => Dimapable (Read src err) where
  dimap a2b _b2a (Read r) =
    Read $ (\k b2k -> k (b2k . a2b)) <$> r
instance
 ( Ord err
 , ReadConstraints src
 ) => Dicurryable (Read src err) where
  dicurry (_::proxy args) constr _destr (Read x) = Read $ do
    f <- x
    return $ \r2k ->
      f (mapresultN @args r2k constr)
instance
 ( Ord err
 , ReadConstraints src
 , Textable (Read src err)
 ) => XML (Read src err) where
  namespace _nm _ns = Read (return id)
  element n p = Read $ do
    s <- P.token check $ Set.singleton $
      P.Tokens $ pure expected
    unRead $ readNested p s
    where
    expected = Right $ TS.tree0 $ noSource $ NodeElem n mempty
    check = \case
     Right (TS.Tree nod ts)
      | NodeElem e as <- unSource nod
      , e == n
      -> Just (removeXMLNS as, removeSpaces ts)
     _ -> Nothing
  attribute n p = Read $ do
    v <- pTokenAttr n $ Set.singleton $
      P.Tokens $ pure expected
    unRead $ readNested p
     (mempty, pure (TS.tree0 (NodeText . EscapedText . unEscapedAttr <$> v)))
    -- Cast 'EscapedAttr' into 'EscapedText'
    -- because it will be read, not written,
    -- hence only given to 'unescapeText'
    -- which is the same than 'unescapeAttr'.
    where
    expected = Left $ HM.singleton n $ noSource ""
  literal lit = Read $ do
    P.token check $ Set.singleton $ P.Tokens $ pure expected
    where
    expected = Right $
      TS.Tree (noSource $ NodeText "")
       (pure $ TS.tree0 (noSource $ NodeText $ escapeText lit))
    check = \case
     Right (Tree0 nod)
      | NodeText t <- unSource nod
      , unescapeText t == lit
      -> Just id
     _ -> Nothing
  pi n = Read $ do
    v <- pTokenPI n $ Set.singleton $
      P.Tokens $ pure expected
    return ($ v)
    where
    expected = Right $ TS.tree0 $ noSource $ NodePI n mempty
  cdata = Read $
    P.token check $ Set.singleton $
      P.Tokens $ pure expected
    where
    expected = Right $ TS.tree0 $ noSource $ NodeCDATA mempty
    check = \case
     Right (Tree0 nod)
      | NodeCDATA v <- unSource nod
      -> Just ($ v)
     _ -> Nothing
  comment = Read $
    P.token check $ Set.singleton $
      P.Tokens $ pure expected
    where
    expected = Right $ TS.tree0 $ noSource $ NodeComment mempty
    check = \case
     Right (Tree0 nod)
      | NodeComment v <- unSource nod
      -> Just ($ v)
     _ -> Nothing
instance Ord err => Textable (Read FileSourced err) where
  type TextConstraint (Read FileSourced err) a =
    DecodeText a
  text :: forall a k repr.
   repr ~ Read FileSourced err =>
   TextConstraint repr a => repr (a->k) k
  text = Read $ do
    Sourced (FileSource (src :| _)) txt <-
      P.token check $ Set.singleton $ P.Tokens $ pure expected
    case P.runParser @Void (decodeText @a <* P.eof) "" (unescapeText txt) of
     Right a -> return ($ a)
     Left errs -> P.fancyFailure $ Set.singleton $ P.ErrorFail $
      (`foldMap` P.bundleErrors errs) $ \err ->
        fileRange_path src <> ":" <>
        show (fileRange_begin src <> Offset (P.errorOffset err)) <> "\n" <>
        P.parseErrorTextPretty err
    where
    expected = Right $ TS.tree0 $ noSource $ NodeText $ EscapedText mempty
    check = \case
     Right (Tree0 nod)
      | NodeText t <- unSource nod
      -> Just (t <$ nod)
     _ -> Nothing
instance
 ( Ord err
 , ReadConstraints src
 , Textable (Read src err)
 , Definable (Read src err)
 ) => RelaxNG (Read src err) where
  elementMatch nc p = Read $ do
    (n,s) <- P.token check $ Set.singleton $
      P.Tokens $ pure expected
    ((\a2k n2a -> a2k (n2a n)) <$>) $
      unRead (readNested p s)
    where
    expected = Right $ TS.tree0 $ noSource $
      NodeElem (qName (NCName (TLB.toLazyText
       (textify (mempty::Namespaces NCName,(infixN0,SideL),nc)))))
       mempty
    check = \case
     Right (TS.Tree nod ts)
      | NodeElem n as <- unSource nod
      , matchNameClass nc n
      -> Just (n, (removeXMLNS as, removeSpaces ts))
     _ -> Nothing
  attributeMatch nc p = Read $ do
    (an,av) <- pTokenAttrNameClass nc $ Set.singleton $
      P.Tokens $ pure expected
    ((\a2k n2a -> a2k (n2a an)) <$>) $
      unRead $ readNested p
       (mempty, pure (TS.tree0 (NodeText . EscapedText . unEscapedAttr <$> av)))
    -- See comment in 'attribute' about the cast to 'EscapedText' here.
    where
    expected = Left $ HM.singleton (qName (NCName n)) $ noSource ""
      where n = TLB.toLazyText $ textify (mempty::Namespaces NCName,(infixN0,SideL),nc)
instance Ord err => Definable (Read FileSourced err) where
  define n = Read . P.label n . unRead

-- ** Type 'ReadPerm'
data ReadPerm (src :: * -> *) err a k
 =   ReadPerm
 {   readPerm_result :: !(Maybe (a->k))
 ,   readPerm_parser :: P.Parsec err (ReadStream src) (ReadPerm src err a k)
 }

instance
 (Ord err, ReadConstraints src) =>
 Dimapable (ReadPerm src err) where
  dimap a2b b2a (ReadPerm a ma) =
    ReadPerm (merge <$> a)
    (dimap a2b b2a `fmap` ma)
    where merge = \a2k2k b2k -> a2k2k $ b2k . a2b
instance
 (Ord err, ReadConstraints src) =>
 Dicurryable (ReadPerm src err) where
  dicurry ::
   forall args r k proxy.
   CurryN args =>
   proxy args ->
   (args-..->r) -> -- construction
   (r->Tuples args) -> -- destruction
   ReadPerm src err (args-..->k) k ->
   ReadPerm src err (r->k) k
  dicurry px constr destr (ReadPerm a ma) =
    ReadPerm (merge <$> a)
     (dicurry px constr destr `fmap` ma)
    where
    merge args2k2k = \r2k ->
      args2k2k $ mapresultN @args r2k constr
instance
 (Ord err, ReadConstraints src) =>
 Composable (ReadPerm src err) where
  lhs@(ReadPerm da pa) <.> rhs@(ReadPerm db pb) =
    ReadPerm a $
      lhsAlt <|> rhsAlt
    where
    lhsAlt = (<.> rhs) <$> pa
    rhsAlt = (lhs <.>) <$> pb
    a = flip (.) <$> da <*> db
instance
 (Ord err, ReadConstraints src) =>
 Tupable (ReadPerm src err) where
  lhs@(ReadPerm da pa) <:> rhs@(ReadPerm db pb) =
    ReadPerm a (lhsAlt <|> rhsAlt)
    where
    lhsAlt = (<:> rhs) <$> pa
    rhsAlt = (lhs <:>) <$> pb
    a = consCont (,) <$> da <*> db
instance Definable (ReadPerm src err) where
  define _n = id

-- * Utils

concatCont :: [(a->k)->k] -> ([a]->k)->k
concatCont = List.foldr (consCont (:)) ($ [])
{-# INLINE concatCont #-}

consCont :: (a->b->c) -> ((a->k)->k) -> ((b->k)->k) -> (c->k)->k
consCont ab2c a2k2k b2k2k = \c2k -> a2k2k $ \a -> b2k2k $ \b -> c2k (ab2c a b)
{-# INLINE consCont #-}

mapCont :: (a->b) -> ((a->k)->k) -> ((b->k)->k)
mapCont a2b a2k2k = \b2k -> a2k2k (b2k . a2b)
{-# INLINE mapCont #-}

-- | An adaptation of megaparsec's 'pToken',
-- to handle 'attribute' properly.
pTokenAttr ::
 forall e m src.
 ReadConstraints src =>
 QName ->
 Set.Set (P.ErrorItem (P.Token (ReadStream src))) ->
 P.ParsecT e (ReadStream src) m (src EscapedAttr)
pTokenAttr an ps = P.ParsecT $ \st@(P.State s@(attrs,trees) o pst de) cok _ _ eerr ->
  case HM.lookup an attrs of
   Just av -> cok av (P.State (HM.delete an attrs, trees) (o+1) pst de) mempty
   Nothing -> eerr (P.TrivialError o us ps) st
    where
    us = case P.take1_ s of
     Nothing -> pure P.EndOfInput
     Just (t,_ts) -> (Just . P.Tokens . pure) t
{-# INLINE pTokenAttr #-}

-- | An adaptation of megaparsec's 'pToken',
-- to handle 'attributeMatch' properly.
pTokenAttrNameClass ::
 forall e m src.
 ReadConstraints src =>
 NameClass ->
 Set.Set (P.ErrorItem (P.Token (ReadStream src))) ->
 P.ParsecT e (ReadStream src) m (QName, src EscapedAttr)
pTokenAttrNameClass nc ps = P.ParsecT $ \st@(P.State s@(attrs,trees) o pst de) cok _ _ eerr ->
  case HM.toList attrs of
   a@(an,_av):_ | matchNameClass nc an -> cok a (P.State (HM.delete an attrs, trees) (o+1) pst de) mempty
   _ -> eerr (P.TrivialError o us ps) st
    where
    us = case P.take1_ s of
     Nothing -> pure P.EndOfInput
     Just (t,_ts) -> (Just . P.Tokens . pure) t
{-# INLINE pTokenAttrNameClass #-}

-- | An adaptation of megaparsec's 'pToken',
-- to handle 'pi' since 'NodePI' is ignored by 'P.take1_'.
pTokenPI ::
 forall e m src.
 UnSource src =>
 PName ->
 Set.Set (P.ErrorItem (P.Token (ReadStream src))) ->
 P.ParsecT e (ReadStream src) m TL.Text
pTokenPI n ps = P.ParsecT $ \st@(P.State s o pst de) cok _ _ eerr ->
  case take1 s of
   Nothing -> eerr (P.TrivialError o us ps) st
    where us = pure P.EndOfInput
   Just (c, cs)
    | Right (TS.Tree nod _) <- c
    , NodePI pn pv <- unSource nod
    , pn == n -> cok pv (P.State cs (o+1) pst de) mempty
    | otherwise -> eerr (P.TrivialError o us ps) st
      where
      us = case take1 s of
       Nothing -> pure P.EndOfInput
       Just (t,_ts) -> (Just . P.Tokens . pure) t
  where
  take1 = take1_ isIgnoredNode
    where
    isIgnoredNode = \case
     NodeComment{} -> True
     _ -> False

removeXMLNS ::
 HM.HashMap QName (src EscapedAttr) ->
 HM.HashMap QName (src EscapedAttr)
removeXMLNS =
  HM.filterWithKey $ \an _av ->
    case an of
     QName "" "xmlns" -> False
     QName ns _l -> ns /= xmlns_xmlns

removeSpaces :: UnSource src => Trees src -> Trees src
removeSpaces xs =
  if (`all` xs) $ \case
   TS.Tree nod _ts
    | NodeText (EscapedText et) <- unSource nod ->
    all (\case
     EscapedPlain t -> TL.all Char.isSpace t
     _ -> False) et
   _ -> True
  then (`Seq.filter` xs) $ \case
   TS.Tree nod _ts
    | NodeText EscapedText{} <- unSource nod -> False
   _ -> True
  else xs

-- | @readNested v xs@ returns a 'Read' parsing @xs@ entirely with @v@,
-- updating 'P.stateOffset' and re-raising any exception.
readNested ::
 Ord err =>
 ReadConstraints src =>
 Read src err f a ->
 ReadStream src ->
 Read src err f a
readNested (Read p) stateInput = Read $ do
  st <- P.getParserState
  (st', res) <- lift $ P.runParserT' (p <* P.eof) st
   { P.stateInput
   , P.stateOffset = P.stateOffset st
   }
  P.updateParserState (\s -> s{P.stateOffset = P.stateOffset st'})
  case res of
   Right a -> return a
   Left (P.ParseErrorBundle errs _) ->
    case NonEmpty.head errs of
     P.TrivialError _o us es -> P.failure us es
     P.FancyError _o es -> P.fancyFailure es

-- * Class 'DecodeText'
class DecodeText a where
  decodeText :: P.Parsec Void TL.Text a
instance DecodeText String where
  decodeText = TL.unpack . fst <$>
    P.match (P.skipMany P.anySingle)
instance DecodeText Text.Text where
  decodeText = TL.toStrict . fst <$>
    P.match (P.skipMany P.anySingle)
instance DecodeText TL.Text where
  decodeText = fst <$>
    P.match (P.skipMany P.anySingle)
instance DecodeText Bool where
  decodeText =
    False <$ (P.string "false" <|> P.string "0") <|>
    True  <$ (P.string "true"  <|> P.string "1")
instance DecodeText Integer where
  decodeText = P.signed (return ()) P.decimal
instance DecodeText Natural where
  decodeText = P.optional (P.char '+') *> P.decimal

-- * Megaparsec adaptations
-- | Rewrite 'P.parseErrorTextPretty' to keep 'Ord' of 'node's.
parseErrorTextPretty ::
 P.ShowErrorComponent err =>
 P.ParseError (ReadStream FileSourced) err -> String
parseErrorTextPretty (P.TrivialError o us ps) =
  if isNothing us && Set.null ps
  then "unknown parse error\n"
  else
    (case us of
     Just P.Tokens{} -> ""
     _ ->
      -- FIXME: this is not informative enough,
      -- but P.EndOfInput can not carry a source location,
      -- and retraversing the XML tree cannot be done
      -- exactly as the parser did only knowing the Offset,
      -- because of attributes being permutable.
      "node #"<>show o<>"\n"
    ) <>
    messageItemsPretty "unexpected "
     (showErrorItem px <$> maybeToList us) <>
    messageItemsPretty "expecting "
     (showErrorItem px <$> Set.toAscList ps)
  where px = Proxy :: Proxy s
parseErrorTextPretty err = P.parseErrorTextPretty err

messageItemsPretty :: String -> [String] -> String
messageItemsPretty prefix ts
 | null ts = ""
 | otherwise = prefix <> orList ts <> "\n"

orList :: IsString s => Monoid s => [s] -> s
orList [] = mempty
orList [x] = x
orList [x,y] = x <> " or " <> y
orList xs = mconcat (List.intersperse ", " (List.init xs)) <> ", or " <> List.last xs

showErrorItem ::
 (s ~ ReadStream (Sourced (FileSource Offset))) =>
 Proxy s -> P.ErrorItem (P.Token s) -> String
showErrorItem px = \case
 P.Tokens ts   -> P.showTokens px ts
 P.Label label -> NonEmpty.toList label
 P.EndOfInput  -> "end-of-node"