module ListT.HTMLParser.XML where

import ListT.HTMLParser.Prelude
import qualified Data.XML.Types as XML
import qualified HTMLTokenizer as Tokenizer


type Parser a =
  StateT [Tokenizer.Token] Maybe a

run :: Parser a -> [Tokenizer.Token] -> Maybe a
run =
  evalStateT

fetchToken :: Parser Tokenizer.Token
fetchToken =
  StateT uncons

tryEOI :: Parser Bool
tryEOI =
  fmap null get

closingTag :: Tokenizer.Identifier -> Parser ()
closingTag ident =
  fetchToken >>= \case
    Tokenizer.Token_ClosingTag ident' | ident' == ident -> return ()
    _ -> mzero

-- |
-- Expects the input tokens to be proper.
node :: Parser XML.Node
node =
  fetchToken >>= \case
    Tokenizer.Token_Text x -> return (XML.NodeContent (XML.ContentEntity x))
    Tokenizer.Token_Comment x -> return (XML.NodeComment x)
    Tokenizer.Token_OpeningTag (ident, attrs, closed) -> do
      subnodes <- if closed
        then return []
        else many node <* closingTag ident
      return (XML.NodeElement (XML.Element (convertIdent ident) (fmap convertAttribute attrs) subnodes))
    _ -> mzero
  where
    convertIdent (Tokenizer.Identifier namespace name) = XML.Name (convert name) (fmap convert namespace) Nothing
    convertAttribute (ident, content) = (convertIdent ident, convert (fmap XML.ContentEntity content))