{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Symantic.XML.Document
 ( module Symantic.XML.Document
 , TS.Tree(..)
 , TS.Trees
 , TS.tree0
 ) where

import Control.Applicative (Alternative(..))
import Data.Bool
import Data.Char (Char)
import Data.Default.Class (Default(..))
import Data.Eq (Eq(..))
import Data.Foldable (Foldable(..), all)
import Data.Function (($), (.), id)
import Data.Functor (Functor(..), (<$>))
import Data.Hashable (Hashable(..))
import Data.Int (Int)
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..), fromMaybe)
import Data.Monoid (Monoid(..))
import Data.Ord (Ord(..))
import Data.Semigroup (Semigroup(..))
import Data.Sequence (Seq)
import Data.String (String, IsString(..))
import GHC.Generics (Generic)
import Prelude ((+), error)
import System.IO (FilePath)
import Text.Show (Show(..), showsPrec, showChar, showParen, showString)
import qualified Data.Char.Properties.XMLCharProps as XC
import qualified Data.HashMap.Strict as HM
import qualified Data.HashSet as HS
import qualified Data.List as List
import qualified Data.Sequence as Seq
import qualified Data.Text.Lazy as TL
import qualified Data.TreeSeq.Strict as TS

-- * Type 'XML'
type XML src = TS.Tree (Sourced src Node)
type XMLs src = Seq (XML src)

-- | Unify two 'XMLs', merging border 'NodeText's if any.
union :: Semigroup (Sourced src EscapedText) => XMLs src -> XMLs src -> XMLs src
union x y =
        case (Seq.viewr x, Seq.viewl y) of
         (xs Seq.:> x0, y0 Seq.:< ys) ->
                case (x0,y0) of
                 (  Tree0 (Sourced sx (NodeText tx))
                  , Tree0 (Sourced sy (NodeText ty)) ) ->
                        xs `union`
                        Seq.singleton (Tree0 $ (NodeText <$>) $ Sourced sx tx <> Sourced sy ty) `union`
                        ys
                 _ -> x <> y
         (Seq.EmptyR, _) -> y
         (_, Seq.EmptyL) -> x

unions ::
 Semigroup (Sourced src EscapedText) =>
 Foldable f => f (XMLs src) -> XMLs src
unions = foldl' union mempty

pattern Tree0 :: a -> TS.Tree a
pattern Tree0 a <- TS.Tree a (null -> True)
        where Tree0 a = TS.Tree a Seq.empty

-- ** Type 'Node'
data Node
 =   NodeElem    QName         -- ^ Node with some 'NodeAttr' and then other 'Node's as children.
 |   NodeAttr    QName         -- ^ Node with a 'NodeText' child.
 |   NodePI      PName TL.Text -- ^ Leaf (except for @<?xml?>@ which has 'NodeAttr's.
 |   NodeText    EscapedText   -- ^ Leaf.
 |   NodeComment TL.Text       -- ^ Leaf.
 |   NodeCDATA   TL.Text       -- ^ Leaf.
 deriving (Eq, Ord, Show)

-- ** Type 'EscapedText'
newtype EscapedText = EscapedText (Seq Escaped)
 deriving (Eq, Ord, Show)

escapeText :: TL.Text -> EscapedText
escapeText s =
        EscapedText $
        case TL.span (`List.notElem` ("<>&'\""::String)) s of
         (t, r) | TL.null t -> escape r
                | otherwise -> EscapedPlain t Seq.<| escape r
        where
        escape t = case TL.uncons t of
         Nothing -> mempty
         Just (c, cs) -> escapeChar c Seq.<| et where EscapedText et = escapeText cs

escapeChar :: Char -> Escaped
escapeChar c =
        case c of
         '<'  -> EscapedEntityRef entityRef_lt
         '>'  -> EscapedEntityRef entityRef_gt
         '&'  -> EscapedEntityRef entityRef_amp
         '\'' -> EscapedEntityRef entityRef_apos
         '"'  -> EscapedEntityRef entityRef_quot
         _    -> EscapedPlain $ TL.singleton c

unescapeText :: EscapedText -> TL.Text
unescapeText (EscapedText et) = (`foldMap` et) $ \case
 EscapedPlain t -> t
 EscapedEntityRef EntityRef{..} -> entityRef_value
 EscapedCharRef (CharRef c) -> TL.singleton c

instance Semigroup EscapedText where
 EscapedText x <> EscapedText y =
                case (x,y) of
                 (xl Seq.:|> EscapedPlain xr, EscapedPlain yl Seq.:<|yr) ->
                        (EscapedText $ xl Seq.|> EscapedPlain (xr<>yl)) <> EscapedText yr
                 _ -> EscapedText $ x <> y
instance Monoid EscapedText where
        mempty  = EscapedText mempty
        mappend = (<>)

-- *** Type 'Escaped'
-- | 'EscapedText' lexemes.
data Escaped
  =  EscapedPlain     TL.Text
  |  EscapedEntityRef EntityRef
  |  EscapedCharRef   CharRef
  deriving (Eq, Ord, Show)

-- *** Type 'EntityRef'
data EntityRef = EntityRef
 { entityRef_name  :: NCName
 , entityRef_value :: TL.Text
 } deriving (Eq, Ord, Show)

entityRef_lt, entityRef_gt, entityRef_amp, entityRef_quot, entityRef_apos :: EntityRef
entityRef_lt   = EntityRef (NCName "lt") "<"
entityRef_gt   = EntityRef (NCName "gt") ">"
entityRef_amp  = EntityRef (NCName "amp") "&"
entityRef_quot = EntityRef (NCName "quot") "\""
entityRef_apos = EntityRef (NCName "apos") "'"

-- *** Type 'CharRef'
newtype CharRef = CharRef Char
 deriving (Eq, Ord, Show)

-- ** Type 'Name'
newtype Name = Name { unName :: TL.Text }
 deriving (Eq, Ord, Hashable)
instance Show Name where
        showsPrec _p = showString . TL.unpack . unName
instance IsString Name where
        fromString s
         | c:cs <- s
         , XC.isXmlNameStartChar c
         && all XC.isXmlNameChar cs
         = Name (TL.pack s)
         | otherwise = error $ "Invalid XML Name: "<>show s

-- ** Type 'Namespace'
newtype Namespace = Namespace { unNamespace :: TL.Text }
 deriving (Eq, Ord, Show, Hashable)
instance IsString Namespace where
        fromString s =
                if all (\c -> XC.isXmlChar c && c `List.notElem` ("'\"<&"::String)) s
                then Namespace (fromString s)
                else error $ "Invalid XML Namespace: "<>show s

xmlns_xml, xmlns_xmlns, xmlns_empty :: Namespace
xmlns_xml   = Namespace "http://www.w3.org/XML/1998/namespace"
xmlns_xmlns = Namespace "http://www.w3.org/2000/xmlns/"
xmlns_empty = Namespace ""

-- * Type 'Namespaces'
data Namespaces prefix = Namespaces
 { namespaces_prefixes :: (HM.HashMap Namespace prefix)
 , namespaces_default  :: Namespace
 } deriving (Show)
instance Default (Namespaces NCName) where
        def = Namespaces
         { namespaces_prefixes = HM.fromList
                 [ (xmlns_xml  , "xml")
                 , (xmlns_xmlns, "xmlns")
                 ]
         , namespaces_default  = ""
         }
instance Default (Namespaces (Maybe NCName)) where
        def = Namespaces
         { namespaces_prefixes = HM.fromList
                 [ (xmlns_xml  , Just "xml")
                 , (xmlns_xmlns, Just "xmlns")
                 ]
         , namespaces_default  = ""
         }
instance Semigroup (Namespaces NCName) where
        x <> y = Namespaces
         { namespaces_prefixes = namespaces_prefixes x <> namespaces_prefixes y
         , namespaces_default  = namespaces_default x
         }
instance Semigroup (Namespaces (Maybe NCName)) where
        x <> y = Namespaces
         { namespaces_prefixes = HM.unionWith (<|>) (namespaces_prefixes x) (namespaces_prefixes y)
         , namespaces_default  = namespaces_default x
         }
instance Monoid (Namespaces NCName) where
        mempty  = def
        mappend = (<>)
instance Monoid (Namespaces (Maybe NCName)) where
        mempty  = def
        mappend = (<>)

prefixifyQName :: Namespaces NCName -> QName -> PName
prefixifyQName Namespaces{..} QName{..} =
        PName
         { pNameSpace =
                if qNameSpace == namespaces_default
                then Nothing
                else HM.lookup qNameSpace namespaces_prefixes
         , pNameLocal = qNameLocal
         }

-- ** Type 'NCName'
-- | Non-colonized name.
newtype NCName = NCName { unNCName :: TL.Text }
 deriving (Eq, Ord, Hashable)
instance Show NCName where
        showsPrec _p = showString . TL.unpack . unNCName
instance IsString NCName where
        fromString s =
                fromMaybe (error $ "Invalid XML NCName: "<>show s) $
                ncName (TL.pack s)

ncName :: TL.Text -> Maybe NCName
ncName t =
        case TL.uncons t of
         Just (c, cs)
          | XC.isXmlNCNameStartChar c
          , TL.all XC.isXmlNCNameChar cs
          -> Just (NCName t)
         _ -> Nothing

poolNCNames :: [NCName]
poolNCNames =
        [ NCName $ TL.pack ("ns"<>show i)
        | i <- [1 :: Int ..]
        ]

freshNCName :: HS.HashSet NCName -> NCName
freshNCName ps = List.head $ poolNCNames List.\\ HS.toList ps

freshifyNCName :: HS.HashSet NCName -> NCName -> NCName
freshifyNCName ns (NCName n) =
        let ints = [1..] :: [Int] in
        List.head
         [ fresh
         | suffix <- mempty : (show <$> ints)
         , fresh <- [ NCName $ n <> TL.pack suffix]
         , not $ fresh `HS.member` ns
         ]

-- ** Type 'PName'
-- | Prefixed name.
data PName = PName
 { pNameSpace :: (Maybe NCName) -- ^ eg. Just "xml"
 , pNameLocal :: NCName         -- ^ eg. "stylesheet"
 } deriving (Eq, Ord, Generic)
instance Show PName where
        showsPrec p PName{pNameSpace=Nothing, ..} =
                showsPrec p pNameLocal
        showsPrec _p PName{pNameSpace=Just p, ..} =
                showsPrec 10 p .
                showChar ':' .
                showsPrec 10 pNameLocal
instance IsString PName where
        fromString "" = PName Nothing "" -- NOTE: NCName's fromString will raise an error.
        fromString s =
                case List.break (== ':') s of
                 (_, "")    -> PName Nothing $ fromString s
                 (p, local) -> PName (Just $ fromString p) (fromString $ List.drop 1 local)
instance Hashable PName

pName :: NCName -> PName
pName = PName Nothing
{-# INLINE pName #-}

-- ** Type 'QName'
-- | Qualified name.
data QName = QName
 { qNameSpace :: Namespace -- ^ eg. "http://www.w3.org/1999/XSL/Transform"
 , qNameLocal :: NCName    -- ^ eg. "stylesheet"
 } deriving (Eq, Ord, Generic)
instance Show QName where
        showsPrec _p QName{..} =
                (if TL.null $ unNamespace qNameSpace then id
                else showChar '{' . showString (TL.unpack $ unNamespace qNameSpace) . showChar '}'
                ) . showsPrec 10 qNameLocal
instance IsString QName where
        fromString "" = QName "" "" -- NOTE: NCName's fromString will raise an error.
        fromString full@('{':rest) =
                case List.break (== '}') rest of
                 (_, "")     -> error $ "Invalid XML Clark notation: "<>show full
                 (ns, local) -> QName (fromString ns) $ fromString $ List.drop 1 local
        fromString local = QName "" $ fromString local
instance Hashable QName

qName :: NCName -> QName
qName = QName (Namespace "")
{-# INLINE qName #-}

-- * Type 'Sourced'
data Sourced src a
 =   Sourced
 {   source  :: src
 , unSourced :: a
 } deriving (Eq, Ord, Functor)
instance (Show src, Show a) => Show (Sourced src a) where
        showsPrec p Sourced{..} =
                showParen (p > 10) $
                showsPrec 11 unSourced .
                showString " @" . showsPrec 10 source
instance Semigroup a => Semigroup (Sourced (FileSource Offset) a) where
        (<>)
         (Sourced rx@(FileRange xf xb xe :|  xs) x)
         (Sourced    (FileRange yf yb ye :| _ys) y)
         | xf == yf && xe == yb = Sourced (FileRange xf xb ye :| xs) $ x<>y
         | otherwise = Sourced rx (x<>y)
{-
instance (FromPad a, Semigroup a) => Semigroup (Sourced (FileSource LineCol) a) where
	(<>)
	 (Sourced rx@(FileRange xf xb xe :|  xs) x)
	 (Sourced    (FileRange yf yb ye :| _ys) y)
	 | xf == yf  = Sourced (FileRange xf xb ye :| xs) $ x<>fromPad (LineColumn l c)<>y
	 | otherwise = Sourced rx (x<>y)
	where
	l = lineNum yb - lineNum xe
	c = colNum  yb - colNum (if l <= 0 then xe else xb)

-- ** Class 'FromPad'
class FromPad a where
	fromPad :: LineColumn -> a
instance FromPad T.Text where
	fromPad LineColumn{..} =
		T.replicate lineNum   "\n" <>
		T.replicate colNum " "
instance FromPad TL.Text where
	fromPad LineColumn{..} =
		TL.replicate (fromIntegral lineNum)   "\n" <>
		TL.replicate (fromIntegral colNum) " "
instance FromPad EscapedText where
	fromPad = EscapedText . pure . fromPad
instance FromPad Escaped where
	fromPad = EscapedPlain . fromPad
-}

-- ** Class 'NoSource'
class NoSource src where
        noSource :: src
instance Default pos => NoSource (FileSource pos) where
        noSource = noSource :| []
instance Default pos => NoSource (FileRange pos) where
        noSource = FileRange "" def def
instance NoSource Offset where
        noSource = Offset def
{-
instance (FromPad a, Semigroup a, Monoid a) => Monoid (Sourced a) where
	mempty  = sourced0 mempty
	mappend = (<>)
-}

notSourced :: NoSource src => a -> Sourced src a
notSourced = Sourced noSource

-- * Type 'FileSource'
type FileSource pos = NonEmpty (FileRange pos)

-- ** Type 'FileSourced'
type FileSourced = Sourced (FileSource Offset)

-- ** Type 'FileRange'
data FileRange pos
 =   FileRange
 {   fileRange_file  :: FilePath
 ,   fileRange_begin :: pos
 ,   fileRange_end   :: pos
 } deriving (Eq, Ord)
instance Default (FileRange Offset) where
        def = FileRange "" def def
instance Default (FileRange LineColumn) where
        def = FileRange "" def def
instance Show (FileRange Offset) where
        showsPrec _p FileRange{..} =
                showString fileRange_file .
                showChar '@' . showsPrec 10 fileRange_begin .
                showChar '-' . showsPrec 10 fileRange_end
instance Show (FileRange LineColumn) where
        showsPrec _p FileRange{..} =
                showString fileRange_file .
                showChar '#' . showsPrec 10 fileRange_begin .
                showChar '-' . showsPrec 10 fileRange_end

-- *** Type 'Offset'
newtype Offset = Offset Int
 deriving (Eq, Ord)
instance Show Offset where
        showsPrec p (Offset o) = showsPrec p o
instance Default Offset where
        def = Offset 0
instance Semigroup Offset where
        Offset x <> Offset y = Offset (x+y)
instance Monoid Offset where
        mempty  = def
        mappend = (<>)

-- *** Type 'LineColumn'
-- | Absolute text file position.
data LineColumn = LineColumn
 { lineNum :: {-# UNPACK #-} Offset
 , colNum  :: {-# UNPACK #-} Offset
 } deriving (Eq, Ord)
instance Default LineColumn where
        def = LineColumn def def
instance Show LineColumn where
        showsPrec _p LineColumn{..} =
                showsPrec 11 lineNum .
                showChar ':' .
                showsPrec 11 colNum

filePos1 :: LineColumn
filePos1 = def