module ListT.HTMLParser.XML where import ListT.HTMLParser.Prelude import qualified Data.XML.Types as XML import qualified HTMLTokenizer as Tokenizer type Parser a = StateT [Tokenizer.Token] Maybe a run :: Parser a -> [Tokenizer.Token] -> Maybe a run = evalStateT fetchToken :: Parser Tokenizer.Token fetchToken = StateT uncons tryEOI :: Parser Bool tryEOI = fmap null get closingTag :: Tokenizer.Identifier -> Parser () closingTag ident = fetchToken >>= \case Tokenizer.Token_ClosingTag ident' | ident' == ident -> return () _ -> mzero -- | -- Expects the input tokens to be proper. node :: Parser XML.Node node = fetchToken >>= \case Tokenizer.Token_Text x -> return (XML.NodeContent (XML.ContentEntity x)) Tokenizer.Token_Comment x -> return (XML.NodeComment x) Tokenizer.Token_OpeningTag (ident, attrs, closed) -> do subnodes <- if closed then return [] else many node <* closingTag ident return (XML.NodeElement (XML.Element (convertIdent ident) (fmap convertAttribute attrs) subnodes)) _ -> mzero where convertIdent (Tokenizer.Identifier namespace name) = XML.Name (convert name) (fmap convert namespace) Nothing convertAttribute (ident, content) = (convertIdent ident, convert (fmap XML.ContentEntity content))