module Text.XML.WraXML.Document.Tagchup where

import qualified Text.XML.WraXML.Tree.Tagchup as TreeTagchup
import qualified Text.XML.WraXML.Document as XmlDoc
-- import qualified Text.XML.WraXML.Tree     as XmlTree

import qualified Text.HTML.Tagchup.Parser      as TagParser
import qualified Text.HTML.Tagchup.Tag         as Tag
import qualified Text.HTML.Tagchup.PositionTag as PosTag
import qualified Text.XML.Basic.Position    as Position
import qualified Text.HTML.Basic.Tag as TagH
import qualified Text.HTML.Basic.Character    as HTMLChar

import qualified Text.XML.Basic.Name.LowerCase as NameLC
import qualified Text.XML.Basic.Name           as Name
import qualified Text.XML.Basic.ProcessingInstruction as PI

import Data.List.HT (viewL, )
import Data.Maybe (fromMaybe, )
import Control.Monad (guard, )

import Control.Monad.Trans.State (State, state, evalState, modify, gets, )

import qualified Data.Char as Char



type XmlDoc = XmlDoc.T Position.T


class TagParser.CharType char => CharSpace char where
   isSpace :: char -> Bool

instance CharSpace Char where
   isSpace = Char.isSpace

instance CharSpace HTMLChar.T where
   isSpace c =
      case c of
         HTMLChar.Unicode chr -> Char.isSpace chr
         HTMLChar.EntityRef "nbsp" -> True
         _ -> False

class StringSpace string where
   isAllSpace :: string -> Bool

instance CharSpace char => StringSpace [char] where
   isAllSpace = all isSpace


dropSpace :: StringSpace string =>
   [PosTag.T name string] -> [PosTag.T name string]
dropSpace =
   dropWhile
      (\tag ->
          case PosTag.tag_ tag of
             Tag.Text text -> isAllSpace text
             _ -> False)

withoutLeadingSpace :: (StringSpace string) =>
   ([PosTag.T name string] -> (a, [PosTag.T name string])) ->
   State [PosTag.T name string] a
withoutLeadingSpace f =
   modify dropSpace >> state f

toXmlDocument ::
   (Name.Tag name, Name.Attribute name, StringSpace string) =>
   [PosTag.T name string] -> XmlDoc name string
toXmlDocument ts =
   flip evalState ts $
   do xml <- withoutLeadingSpace $ \ts0 ->
         fromMaybe (Nothing, ts0) $
         do (t,ts1) <- viewL ts0
            (name, PI.Known attrs) <- Tag.maybeProcessing (PosTag.tag_ t)
            guard (Name.match "xml" name)
            return (Just attrs, ts1)
      docType <- withoutLeadingSpace $ \ts0 ->
         fromMaybe (Nothing, ts0) $
         do (t,ts1) <- viewL ts0
            (name, dtd) <- Tag.maybeSpecial (PosTag.tag_ t)
            guard (Name.match TagH.doctypeString name)
            return (Just dtd, ts1)
      gets (XmlDoc.Cons xml docType . TreeTagchup.toXmlTrees)


example :: IO ()
example =
  print .
  (toXmlDocument :: [PosTag.T NameLC.T String] -> XmlDoc NameLC.T String) .
  TagParser.runSoupWithPositions
    =<< readFile "/home/thielema/public_html/index.html"