module Text.XML.WraXML.Document.TagSoup where

import qualified Text.XML.WraXML.Tree.TagSoup as TreeTagSoup
import qualified Text.XML.WraXML.Document as XmlDoc
-- import qualified Text.XML.WraXML.Tree     as XmlTree

import Text.XML.WraXML.Tree.TagSoup (PosTag, )

import qualified Text.HTML.TagSoup as Tag

import Text.HTML.TagSoup (Tag(..), )

import qualified Text.XML.Basic.Position as Position

import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name.MixedCase as NameMC
import qualified Text.XML.Basic.Name.LowerCase as NameLC
import qualified Text.XML.Basic.Name as Name

import Control.Monad.Trans.State (State, state, evalState, modify, gets, )

import Data.Char (isSpace, )



dropSpace ::[PosTag] -> [PosTag]
dropSpace =
   dropWhile
      (\tag ->
          case snd tag of
             Tag.TagText text -> all isSpace text
             _ -> False)

withoutLeadingSpace ::
   ([PosTag] -> (a, [PosTag])) ->
   State [PosTag] a
withoutLeadingSpace f =
   modify dropSpace >> state f

toXmlDocument ::
   (Name.Tag name, Name.Attribute name) =>
   [Tag] -> XmlDoc.T Position.T name String
toXmlDocument ts =
   flip evalState
      (TreeTagSoup.removeMetaPos
          (TreeTagSoup.attachPos
              (Tag.canonicalizeTags ts))) $
   do xml <- withoutLeadingSpace $ \ts0 ->
         case ts0 of
            (_, Tag.TagOpen "?xml" attrs):ts1 ->
                 (Just (map (uncurry Attr.new) attrs), ts1)
            _ -> (Nothing, ts0)
      docType <- withoutLeadingSpace $ \ts0 ->
         case ts0 of
            (_, Tag.TagOpen "!DOCTYPE" dtd):ts1 ->
                 (Just (Attr.formatListBlankHead
                         (map (Attr.fromPair :: (String,String) -> Attr.T NameMC.T String) dtd) ""), ts1)
            _ -> (Nothing, ts0)
      gets (XmlDoc.Cons xml docType . TreeTagSoup.toXmlTreesAux)

{-
toXmlDocument =
   Tag.canonicalizeTags .
   XmlDoc.Cons Nothing .
   TreeTagSoup.toXmlTreesAux
-}

toXmlDocumentString ::
   (Name.Tag name, Name.Attribute name) =>
   [Tag] -> XmlDoc.T Position.T name String
toXmlDocumentString =
   toXmlDocument
{- this would only work for String, because of isSpace
   let cts = Tag.canonicalizePosTags ts
   in  case dropWhile (Match.tagText (all isSpace) . snd) cts of
          (_, Tag.TagSpecial "DOCTYPE" dtd):rest ->
               XmlDoc.Cons (Just dtd) (TreeTagSoup.toXmlTreesAux rest)
          _ -> XmlDoc.Cons Nothing    (TreeTagSoup.toXmlTreesAux cts)
-}


example :: IO ()
example =
  print .
  (toXmlDocumentString :: [Tag] -> XmlDoc.T Position.T NameLC.T String) .
  Tag.parseTagsOptions TreeTagSoup.parseOptions
    =<< readFile "/home/thielema/public_html/index.html"