module Text.XML.WraXML.Tree where

import qualified Text.XML.WraXML.Element as Elem

import qualified Data.Tree.BranchLeafLabel as Tree
import qualified Text.XML.WraXML.String  as XmlString
import qualified Text.HTML.WraXML.String as HtmlString

import qualified Text.XML.Basic.Tag as Tag
import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.Basic.ProcessingInstruction as PI

import qualified Text.XML.HXT.DOM.Unicode as Unicode
import qualified Data.Char               as Char

import Control.Monad.Trans.Writer (Writer, writer, runWriter, censor, )

import qualified Control.Monad.Exception.Synchronous as Exc

import           Data.Foldable as Foldable(Foldable(foldMap))
import           Data.Traversable as Traversable(Traversable(traverse))
import           Control.Applicative (Applicative, )
import qualified Control.Applicative as App
import           Data.List.HT (unzipEithers, )
import           Data.Tuple.HT (mapFst, mapSnd, mapPair, swap, )
import           Data.Maybe (mapMaybe, fromMaybe, )
import           Data.Monoid (Monoid, mempty, mappend, mconcat, )

import qualified Text.XML.Basic.Format as Format


{- * data structures -}

newtype T i name str =
   Cons {unwrap :: Tree.T i (Branch name str) (Leaf name str)}
   deriving (Show)

data Branch name str =
     Tag    {getElement :: Elem.T name str}
   deriving (Show)

{-
It is disputable whether comments (and warnings)
shall have type String or 'str'.
Can a comment contain non-ASCII characters and XML entities?
This is important for finding the comment ending "-->" properly.
What about scripts enclosed in comment delimiters?
Mozilla and Firefox don't do any encoding in the SCRIPT tag at all.
This seems to be wrong, since scripts frequently contain strings
with tag descriptions, like '<BR>'.
-}
data Leaf name str =
     Text    Bool {- is whitespace significant -} str
   | Comment String  -- better 'str'?
   | CData   String
   | PI      (Tag.Name name) (PI.T name str)
   | Warning String  -- better 'str'?


type AttributePlain = (String, String)


instance (Name.Attribute name, Show name, Show str) =>
      Show (Leaf name str) where
   showsPrec prec x =
      showParen (prec>=10) $
      case x of
         Text   _ str -> showString "Text "    . showsPrec 11 str
         Comment  str -> showString "Comment " . showsPrec 11 str
         CData    str -> showString "CData "   . showsPrec 11 str
         PI target p  ->
--            showString "ProcessingInstruction " .
            showString "PI " .
            showsPrec 11 target . showString " " .
            showsPrec 11 p
         {- Maybe it is better to attach warnings to malicious tags
            instead of throwing them into the tag soup.
            But how to tell the position of the error then? -}
         Warning  str -> showString "Warning " . showsPrec 11 str


{-
-- * Name data type

A name can only consist of (7-bit) ASCII characters.
It is used both for tag names and for attribute names.
newtype Name = Name String
   deriving (Show, Eq, Ord)

nameFromString :: String -> Name
nameFromString x =
   if all Char.isAscii x
     then Name $ map Char.toLower x
     else error ("XMLTree.nameFromString: '" ++ x ++
                 "' contains non-ASCII characters.")

nameToString :: Name -> String
nameToString (Name x) = x
-}



{- * generators -}

wrap :: Tree.T i (Branch name str) (Leaf name str) -> T i name str
wrap = Cons

wrap2 :: i -> Tree.Elem i (Branch name str) (Leaf name str) -> T i name str
wrap2 = curry wrap

lift ::
   (Tree.T i (Branch name str0) (Leaf name str0) ->
    Tree.T j (Branch name str1) (Leaf name str1)) ->
   (T i name str0 -> T j name str1)
lift f = wrap . f . unwrap

liftA :: Applicative m =>
   (Tree.T i (Branch name str0) (Leaf name str0) ->
    m (Tree.T i (Branch name str1) (Leaf name str1))) ->
   (T i name str0 -> m (T i name str1))
liftA f = App.liftA wrap . f . unwrap


-- | Build some textual content.
literal :: str -> T i name str
literal = literalIndex (error "literal: no index given")

literalIndex :: i -> str -> T i name str
literalIndex i =
   wrap2 i .
   Tree.Leaf . Text False


comment :: String -> T i name str
comment = commentIndex (error "comment: no index given")

commentIndex :: i -> String -> T i name str
commentIndex i =
   wrap2 i .
   Tree.Leaf . Comment

warning :: String -> T i name str
warning = warningIndex (error "warning: no index given")

warningIndex :: i -> String -> T i name str
warningIndex i =
   wrap2 i .
   Tree.Leaf . Warning

cdata :: String -> T i name str
cdata = cdataIndex (error "cdata: no index given")

cdataIndex :: i -> String -> T i name str
cdataIndex i =
   wrap2 i .
   Tree.Leaf . CData

processing :: Tag.Name name -> PI.T name str -> T i name str
processing = processingIndex (error "processing: no index given")

processingIndex :: i -> Tag.Name name -> PI.T name str -> T i name str
processingIndex i target =
   wrap2 i .
   Tree.Leaf . PI target

tag :: Tag.Name name -> [T i name str] -> T i name str
tag name = tagAttr name []

tagAttr :: Tag.Name name -> [Attr.T name str] -> [T i name str] -> T i name str
tagAttr = tagIndexAttr (error "tagAttr: no index given")

tagIndexAttr :: i -> Tag.Name name -> [Attr.T name str] -> [T i name str] -> T i name str
tagIndexAttr index name attrs =
   wrap2 index .
   Tree.Branch (Tag (Elem.Cons name attrs)) .
   map unwrap


{- * Conversions -}

liftTrans :: (a -> b) -> (a -> [b])
liftTrans f = (:[]) . f

liftText :: (String -> String) -> (Leaf name String -> Leaf name String)
liftText f leaf =
   case leaf of
      Text b s -> Text b (f s)
      CData s  -> CData (f s)
      _ -> leaf

liftTextA :: Applicative m => (String -> m String) -> (Leaf name String -> m (Leaf name String))
liftTextA f leaf =
   case leaf of
      Text b s -> App.liftA (Text b) (f s)
      CData s  -> App.liftA CData $ f s
      _ -> App.pure leaf



instance Functor (Leaf name) where
   fmap f leaf =
      case leaf of
         Text b s  -> Text b (f s)
         Comment s -> Comment s
         Warning s -> Warning s
         CData s   -> CData s
         PI t p    -> PI t $ fmap f p

{- this instance is quite useless but required by Traversable -}
instance Foldable (Leaf name) where
   foldMap f leaf =
      case leaf of
         Text _b s -> f s
         PI _t p   -> foldMap f p
         _ -> mempty

instance Traversable (Leaf name) where
   traverse f leaf =
      case leaf of
         Text b s  -> App.liftA (Text b) (f s)
         Comment s -> App.pure $ Comment s
         Warning s -> App.pure $ Warning s
         CData s   -> App.pure $ CData s
         PI t p    -> App.liftA (PI t) $ traverse f p



liftElement :: (Elem.T name str0 -> Elem.T name str1) -> (Branch name str0 -> Branch name str1)
liftElement f (Tag elm) = Tag (f elm)

liftElementA :: Applicative m =>
   (Elem.T name str0 -> m (Elem.T name str1)) -> (Branch name str0 -> m (Branch name str1))
liftElementA f (Tag elm) = App.liftA Tag (f elm)


{- * Tests -}

{- |
If the Tree is a Leaf, then return False.
Otherwise return the result of the predicate.
-}
checkTag :: (Elem.T name str -> Bool) -> (T i name str -> Bool)
checkTag p =
   Tree.switch (flip const) (const . p . getElement) (const False)
    . unwrap

maybeTag :: T i name str -> Maybe (Elem.T name str, [T i name str])
maybeTag (Cons (_,t)) =
   case t of
      Tree.Branch (Tag elm) subTrees ->
           Just (elm, map wrap subTrees)
      _ -> Nothing

maybeText :: T i name str -> Maybe str
maybeText (Cons (_,t)) =
   case t of
      Tree.Leaf l -> maybeTextLeaf l
      _           -> Nothing

maybeTextLeaf :: Leaf name str -> Maybe str
maybeTextLeaf t =
   case t of
      Text _ s -> Just s
      _        -> Nothing



{- * types of processors -}

type Filter i name str = T i name str -> T i name str

type FilterA m i name str = T i name str -> m (T i name str)


{- * tree processors -}


instance Functor (T i name) where
   fmap f =
      lift $
      Tree.map
         (liftElement $ fmap f)
         (fmap f)


mapText ::
   (String -> String) ->
   (T i name String -> T i name String)
mapText f =
   lift $
   Tree.map
      (liftElement $ fmap f)
      (liftText f)


mapIndex :: (i -> j) -> T i name str -> T j name str
mapIndex f =
   lift $ Tree.mapLabel f


mapTag ::
   (Elem.Filter name str) ->
   (Filter i name str)
mapTag f =
   lift $
   Tree.map (liftElement f) id

{- |
You can e.g. filter @text1 <b> text2 </b> text3@
to @text1  text2  text3@ by
@filterTag (checkTagName ("b"\/=))@.
-}
filterTag ::
   (Elem.T name str -> Bool) ->
   (T i name str -> [T i name str])
filterTag p =
   map wrap .
   Tree.filterBranch (p . getElement) .
   unwrap

mapCond ::
   (Elem.T name str -> Bool) ->
   (Elem.Filter name str) ->
   (Leaf name str -> Leaf name str) ->
   (Filter i name str)
mapCond descend elemF txtF =
   lift $
   Tree.mapCond (descend . getElement) (liftElement elemF) txtF

{-
mapTextCond ::
   (Elem.T name String -> Bool) ->
   (Elem.T name String -> Elem.T name String) ->
   (String -> String) ->
   (Filter i name String)
mapTextCond descend elemF txtF =
   lift $
   Tree.mapCond (descend . getElement) (liftElement elemF) (liftText txtF)
-}


{- |
Find all branches where the predicate applies and
return a list of matching sub-trees in depth-first order.

Example: @filterTagsFlatten (checkTagName ("meta"==))@
-}
filterTagsFlatten ::
   (Elem.T name str -> Bool) -> T i name str -> [(Elem.T name str, [T i name str])]
filterTagsFlatten p =
   filter (p . fst) .
   mapMaybe maybeTag .
   allSubTrees

filterElementsFlatten :: (Elem.T name str -> Bool) -> T i name str -> [Elem.T name str]
filterElementsFlatten p =
   Tree.fold
      (flip const)
      (\branch xs -> filter p [getElement branch] ++ concat xs)
      (const []) .
   unwrap

allSubTrees :: T i name str -> [T i name str]
allSubTrees =
   map wrap . Tree.allSubTrees . unwrap

{- | merge subsequent string leafs -}
mergeStrings :: (Monoid str) => Filter i name str
mergeStrings =
   processAllSubTrees mergeTopStrings


mergeTopStrings :: (Monoid str) => [T i name str] -> [T i name str]
mergeTopStrings =
   let prepend (i, Tree.Leaf (Text w0 t0)) rest =
          mapFst (\ ~(w1,t1) -> (i, Tree.Leaf $ Text (w0||w1) (mappend t0 t1))) $
          case rest of
             (_, Tree.Leaf (Text w1 t1)) : ss -> ((w1,t1), ss)
             _ -> ((False,mempty), rest)
       prepend x rest = (x, rest)
   in  map wrap.
       foldr (\x -> uncurry (:) . prepend x) [] .
       map unwrap


{- |
Process all sub-tree lists in bottom-up order.
-}
processAllSubTrees ::
   ([T i name str] -> [T i name str]) ->
   Filter i name str
processAllSubTrees f =
   lift $
   Tree.fold
      (,)
      (\branch -> Tree.Branch branch . map unwrap . f . map wrap)
      Tree.Leaf

processSubTrees :: (Tag.Name name -> Bool) ->
   ([T i name str] -> [T i name str]) -> Filter i name str
processSubTrees p f =
   lift $
   Tree.mapSubTrees
      (\(Tag (Elem.Cons name _)) -> p name)
      (mapSnd (map unwrap . f . map wrap))

processSubTreesAttrs :: (Tag.Name name -> Bool) ->
   (([Attr.T name str], [T i name str]) ->
    ([Attr.T name str], [T i name str])) ->
   Filter i name str
processSubTreesAttrs p f =
   lift $
   Tree.mapSubTrees
      (\(Tag (Elem.Cons name _)) -> p name)
      (\(Tag (Elem.Cons name attrs), subTrees) ->
          mapPair (Tag . Elem.Cons name, map unwrap) $
          f (attrs, map wrap subTrees))



{- * applicative functor tree processors -}

instance Foldable (T i name) where
   foldMap f =
      Tree.fold
         (const id) (const mconcat) (maybe mempty f . maybeTextLeaf) .
      unwrap


instance Traversable (T i name) where
   traverse f =
      liftA $
      Tree.mapA
         (liftElementA $ traverse  f)
         (traverse f)


mapTextA :: Applicative m =>
   (String -> m String) ->
   (T i name String -> m (T i name String))
mapTextA f =
   liftA $
   Tree.mapA
      (liftElementA $ traverse  f)
      (liftTextA f)


mapCondA :: Applicative m =>
   (Elem.T name str -> Bool) ->
   (Elem.T name str -> m (Elem.T name str)) ->
   (Leaf name str -> m (Leaf name str)) ->
   (FilterA m i name str)
mapCondA descend elemF txtF =
   liftA $
   Tree.mapCondA (descend . getElement) (liftElementA elemF) txtF

{-
mapTextCondA :: Applicative m =>
   (Elem.T name String -> Bool) ->
   (Elem.T name String -> m (Elem.T name String)) ->
   (String -> m String) ->
   (FilterA m i name String)
mapTextCondA descend elemF txtF =
   liftA $
   Tree.mapCondA (descend . getElement) (liftElementA elemF) (liftTextA txtF)
-}


{- * Character decoding -}

unescape :: T i name XmlString.T -> T i name String
unescape = fmap XmlString.toUnicodeString

{- |
Use ASCII characters, XML entity references and character references
for representing strings.
That's not human readable, but portable.
-}
escape :: T i name String -> T i name XmlString.T
escape = fmap XmlString.fromUnicodeString

{-# DEPRECATED decodeSpecialChars, maybeDecodeSpecialChars, decodeSpecialCharsDecoder, decodeAttrs, decodeAttr, maybeDecodeUTF8Chars "XMLChar.Unicode constructors must contain unicode characters and not encoded ones. Decode characters before parsing!" #-}

{- |
Decode characters like those from UTF-8 scheme.
-}
decodeSpecialChars ::
   (Name.Tag name, Name.Attribute name) =>
   String -> T i name XmlString.T -> [T i name String]
decodeSpecialChars enc tree =
   fromMaybe
      [unescape tree]
      (maybeDecodeSpecialChars enc tree)

maybeDecodeSpecialChars ::
   (Name.Tag name, Name.Attribute name) =>
   String -> T i name XmlString.T -> Maybe [T i name String]
maybeDecodeSpecialChars enc tree =
   fmap (flip decodeSpecialCharsDecoder tree) $
   Unicode.getDecodingFctEmbedErrors enc

{- test:
-- decodeSpecialChars "utf-8" $ literalIndex 0 (XmlString.fromEntString "tr&am;e")

traverse (putStrLn . showHTML . escape) $ decodeSpecialChars "utf-8" $ mapText XmlString.fromEntString $ tagIndexAttr 0 "br" [("href","urlö"), ("target","_blank")] [literalIndex 0 "\195tr&am;eü"]
-}

{- |
Conversion errors are appended as warnings to the tree.
-}
decodeSpecialCharsDecoder ::
   (Name.Tag name, Name.Attribute name) =>
   Unicode.DecodingFctEmbedErrors -> T i name XmlString.T -> [T i name String]
decodeSpecialCharsDecoder decode =
   let xmlDecode =
          HtmlString.toUnicodeStringDecodingEmbedError decode
       mergeDecode =
          XmlString.uStringWithErrorsMergePlainChars . xmlDecode
   in  Tree.foldLabel
          (\i branch subTrees ->
              case branch of
                 Tag (Elem.Cons name attrs) ->
                    let (newAttrs,warnings) =
                           runWriter $
                           decodeAttrs xmlDecode attrs
                    in  [tagIndexAttr i name newAttrs
                           (map (warningIndex i) warnings ++ concat subTrees)])
          (\i leaf -> map (wrap2 i . Tree.Leaf) $
              case leaf of
                 Text  b str ->
                    map
                      (Exc.switch Warning (Text b))
                      (mergeDecode str)
                 Comment   cmt   -> [Comment cmt]
                 Warning   str   -> [Warning str]
                 CData     str   -> [CData str]
                 PI target instr0 ->
                    let (instr1,warnings) =
                           runWriter $
                           PI.mapAttributesA (decodeAttrs xmlDecode) instr0
                    in  PI target instr1 : map Warning warnings) .
       unwrap

decodeAttrs ::
   (Name.Tag name, Name.Attribute name) =>
   (XmlString.T -> XmlString.EmbeddedExceptions) ->
   [Attr.T name XmlString.T] -> Writer [String] [Attr.T name String]
decodeAttrs xmlDecode =
   traverse
      (\attr ->
         traverse  (decodeAttr xmlDecode (Attr.name_ attr)) attr)

decodeAttr ::
   (Name.Tag name, Name.Attribute name) =>
   (XmlString.T -> XmlString.EmbeddedExceptions) ->
   Attr.Name name -> XmlString.T -> Writer [String] String
decodeAttr decode name =
   censor (map (showString $ "in attribute \"" ++ Name.toString name ++ "\": ")) .
   writer . swap . unzipEithers . map Exc.toEither . decode


maybeDecodeUTF8Chars :: String -> T i name XmlString.T -> Maybe (T i name String)
maybeDecodeUTF8Chars enc tree =
   case map Char.toLower enc of
      "utf-8" -> Just (fmap XmlString.utf8ToUnicodeString tree)
      _ -> Nothing


{- * Formatting -}

{-
show ::
   (Name.Tag name, Name.Attribute name) =>
   T i name XmlString.T -> String
show leaf = shows leaf ""
-}

formatMany ::
   (Name.Tag name, Name.Attribute name, Format.C string) =>
   [T i name string] -> ShowS
formatMany = Format.many format

-- cf. src/Text/XML/HXT/DOM/XmlTreeFunctions.hs
format ::
   (Name.Tag name, Name.Attribute name, Format.C string) =>
   T i name string -> ShowS
format =
   Tree.fold (flip const) formatBranch formatLeaf . unwrap

formatBranch ::
   (Name.Tag name, Name.Attribute name, Format.C string) =>
   Branch name string -> [ShowS] -> ShowS
formatBranch branch formatSubTrees =
   case branch of
      Tag elm ->
         Elem.format
            (\_tagName -> null formatSubTrees)
            Format.slash
            elm formatSubTrees

formatLeaf ::
   (Name.Tag name, Name.Attribute name, Format.C string) =>
   Leaf name string -> ShowS
formatLeaf leaf =
   case leaf of
      Text _ str -> Format.run str
      Comment c ->
         showString "<!--" . showString c . showString "-->"
      Warning e ->
         showString "<!-- Warning: " . showString e . showString " -->"
      CData str ->
         showString "<![CDATA[" . showString str . showString "]]>"
      PI target p ->
         Format.angle $
         Format.quest .
         Format.name target .
         Format.run p .
         Format.quest

instance
   (Name.Tag name, Name.Attribute name, Format.C string) =>
      Format.C (T i name string) where
   run = format

instance
   (Name.Tag name, Name.Attribute name, Format.C string) =>
      Format.C (Leaf name string) where
   run = formatLeaf