module Text.XML.WraXML.Tree.HXT
   (fromXmlTree, toXmlTree, lift, liftFilterToDocument,
    checkTagName, ) where

import qualified Text.XML.HXT.DOM.TypeDefs as HXT
-- import qualified Text.XML.HXT.DOM.XmlTreeFunctions as HXTTreeFunc
import qualified Data.Tree.NTree.TypeDefs  as HXTTree

import Text.XML.WraXML.Tree
   (Branch(Tag), Leaf(Text, PI, Comment, Warning, CData), )
import qualified Text.XML.WraXML.Element as Elem

import qualified Text.XML.WraXML.Tree    as XmlTree
import qualified Data.Tree.BranchLeafLabel as Tree

import qualified Text.XML.WraXML.String     as XmlString
import qualified Text.XML.WraXML.String.HXT as HXTString

import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.Basic.ProcessingInstruction as PI

import qualified Text.XML.Basic.Character as XmlChar

-- needed for code copies from HXT at the of the file
import Text.XML.HXT.DOM.XmlTree (QName, NTree, XNode, )
import Text.XML.HXT.DOM.QualifiedName (qualifiedName)

{- * conversion from our XML tree to HXT tree -}

fromXmlTree ::
   (Name.Tag name, Name.Attribute name) =>
   XmlTree.T i name XmlString.T -> HXT.XmlTree
fromXmlTree x =
   case multiFromXmlTree x of
      [y] -> y
      _   -> error "top branch can't be a string"

mkHXTName :: (Name.C name) => name -> QName
mkHXTName = HXT.mkName . Name.toString

mkHXTAttrs :: (Name.Attribute name) =>
   [Attr.T name XmlString.T] -> [NTree XNode]
mkHXTAttrs =
   map (\a ->
               (HXT.XAttr (mkHXTName $ Attr.name_ a))
               (HXTString.fromXmlString $ Attr.value_ a))

multiFromXmlTree ::
   (Name.Tag name, Name.Attribute name) =>
   XmlTree.T i name XmlString.T -> [HXT.XmlTree]
multiFromXmlTree =
      (\i f -> f i)
      (\x subs _ ->
          case x of
             Tag (Elem.Cons name attrs) ->
                    (HXT.XTag (mkHXTName name) (mkHXTAttrs attrs))
                    (concat subs)])
      (\x _ ->
          case x of
             Text _ {- whitespace -} str -> HXTString.fromXmlString str
             Comment str ->
                [HXTTree.mkTree (HXT.XCmt str) []]
             CData str ->
                [HXTTree.mkTree (HXT.XCdata str) []]
             PI target proc ->
                let (attrTree,subTrees) =
                       case proc of
                          PI.Known attrs -> (mkHXTAttrs attrs, [])
                          PI.Unknown str -> ([], [HXTTree.mkTree (HXT.XText str) []])
                in  [HXTTree.mkTree
                        (HXT.XPi (mkHXTName target) attrTree)
             Warning str ->
                [HXTTree.mkTree (HXT.XError 0 str) []])
    . XmlTree.unwrap

{- * conversion from HXT tree to our XML tree -}

toXmlTree, toXmlTree' ::
   (Name.Tag name, Name.Attribute name) =>
   HXT.XmlTree -> XmlTree.T () name XmlString.T
toXmlTree = XmlTree.mergeStrings . toXmlTree'

toXmlTree' (HXTTree.NTree label subTrees) = XmlTree.wrap $ (,) () $
   let leaf x =
          if null subTrees
            then Tree.Leaf x
            else error "HXT to WraXML: Leaf must not contain sub trees."

       fromHXTName = Name.fromString . HXT.qualifiedName

       convAttr (HXTTree.NTree x value) =
          case x of
             HXT.XAttr name ->
       (HXT.qualifiedName name) (HXTString.toXmlString value)
             _ -> error "HXT.XAttr expected"

   in  case label of
          HXT.XTag name attrs ->
                (Tag (Elem.Cons
                          (fromHXTName name)
                          (map convAttr attrs)))
                (map (XmlTree.unwrap . toXmlTree') subTrees)
          HXT.XText str ->
             leaf (Text True (XmlString.fromString str))
          HXT.XCharRef ref ->
             leaf (Text False [XmlChar.fromCharRef ref])
          HXT.XEntityRef ref ->
             leaf (Text False [XmlChar.fromEntityRef ref])
          HXT.XCmt cmt ->
             leaf (Comment cmt)
          HXT.XPi target proc ->
             Tree.Leaf $ PI (fromHXTName target) $
             case subTrees of
                [] -> PI.Known $ map convAttr proc
                [HXTTree.NTree (HXT.XText str) []] ->
                      PI.Unknown str
                _ ->  error "from HXT: processing instruction - there must be no children or a single text child"
          HXT.XAttr _    -> error "from HXT: attribute not allowed in normal text"
          HXT.XDTD _ _   -> error "from HXT: document type descriptor not allowed in normal text"
          HXT.XCdata x   -> leaf (CData x)
          HXT.XError lev x -> leaf (Warning ("Level: " ++ show lev ++ ": " ++ x))

{- * lift our XML filters to HXT filters -}

lift ::
   (Name.Tag name, Name.Attribute name) =>
   XmlTree.Filter () name XmlString.T -> (HXT.XmlTree -> HXT.XmlTree)
lift f = fromXmlTree . f . toXmlTree

{- |
Lift our XML filters to HXT document processors.
liftFilterToDocument ::
   (Name.Tag name, Name.Attribute name) =>
      String  {- ^ Name of root tag for processing, e.g. "html".
                   That tag must be in the first level.
                   It is an unchecked run-time error
                   if it is missing or occurs more than once. -}
   -> XmlTree.Filter () name XmlString.T
   -> (HXT.XmlTree -> HXT.XmlTree)
liftFilterToDocument tagName f =
   HXTTree.changeChildren $ \subTrees ->
      let (pre,x:post) =
              break (checkTagName tagName . HXTTree.getNode) subTrees
      in  pre ++ lift f x : post

checkTagName :: String -> HXT.XNode -> Bool
checkTagName tagName tree =
   case tree of
      (HXT.XTag name _)  ->  HXT.qualifiedName name == tagName
      _  ->  False

The HTML parser of the Haskell XML toolbox HXT
is great in parsing lousy HTML.
However its API is extremely weakly typed.
Everything is a monadic function, even simple XML trees.
This design was done in order to reduce the combinators essentially to .>>

That's why I decided to use HXT as a parser and pretty printer,
but not using its XML tree structure for manipulations.
This way I hope I can keep in sync with the main development path
while not interfering with HXT's typing style.