-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A high-performance HTML tokenizer -- -- This package provides a fast and reasonably robust HTML5 tokenizer -- built upon the attoparsec library. The parsing strategy is -- based upon the HTML5 parsing specification with few deviations. -- -- For instance, -- --
--   >>> parseTokens "<div><h1 class=widget>Hello World</h1><br/>"
--   [TagOpen "div" [],
--   TagOpen "h1" [Attr "class" "widget"],
--   ContentText "Hello World",
--   TagClose "h1",
--   TagSelfClose "br" []]
--   
-- -- The package targets similar use-cases to the venerable -- tagsoup library, but is significantly more efficient, -- achieving parsing speeds of over 80 megabytes per second on modern -- hardware and typical web documents. Here are some typical performance -- numbers taken from parsing a Wikipedia article of moderate length: -- --
--   benchmarking Forced/tagsoup fast Text
--   time                 186.1 ms   (175.3 ms .. 194.6 ms)
--   0.999 R²   (0.995 R² .. 1.000 R²)
--   mean                 191.7 ms   (188.9 ms .. 198.3 ms)
--   std dev              5.053 ms   (1.092 ms .. 6.809 ms)
--   variance introduced by outliers: 14% (moderately inflated)
--   
--   benchmarking Forced/tagsoup normal Text
--   time                 189.7 ms   (182.8 ms .. 197.7 ms)
--   0.999 R²   (0.998 R² .. 1.000 R²)
--   mean                 196.5 ms   (193.1 ms .. 202.1 ms)
--   std dev              5.481 ms   (2.141 ms .. 7.383 ms)
--   variance introduced by outliers: 14% (moderately inflated)
--   
--   benchmarking Forced/html-parser
--   time                 15.81 ms   (15.75 ms .. 15.89 ms)
--   1.000 R²   (1.000 R² .. 1.000 R²)
--   mean                 15.72 ms   (15.66 ms .. 15.77 ms)
--   std dev              140.9 μs   (113.6 μs .. 174.5 μs)
--   
@package html-parse @version 0.2.0.2 -- | This is a performance-oriented HTML tokenizer aim at web-crawling -- applications. It follows the HTML5 parsing specification quite -- closely, so it behaves reasonable well on ill-formed documents from -- the open Web. module Text.HTML.Parser -- | Parse a lazy list of tokens from strict Text. parseTokens :: Text -> [Token] -- | Parse a lazy list of tokens from lazy Text. parseTokensLazy :: Text -> [Token] -- | Parse a single Token. token :: Parser Token -- | An HTML token data Token -- | An opening tag. Attribute ordering is arbitrary. TagOpen :: !TagName -> [Attr] -> Token -- | A self-closing tag. TagSelfClose :: !TagName -> [Attr] -> Token -- | A closing tag. TagClose :: !TagName -> Token -- | The content between tags. ContentText :: !Text -> Token -- | A single character of content ContentChar :: !Char -> Token -- | Contents of a comment. Comment :: !Builder -> Token -- | Doctype Doctype :: !Text -> Token -- | A tag name (e.g. body) type TagName = Text -- | An attribute name (e.g. href) type AttrName = Text -- | The value of an attribute type AttrValue = Text -- | An attribute of a tag data Attr Attr :: !AttrName -> !AttrValue -> Attr -- | See renderToken. renderTokens :: [Token] -> Text -- | (Somewhat) canonical string representation of Token. renderToken :: Token -> Text -- | See renderAttr. renderAttrs :: [Attr] -> Text -- | Does not escape quotation in attribute values! renderAttr :: Attr -> Text -- | Meld neighoring ContentChar and ContentText constructors -- together and drops empty text elements. canonicalizeTokens :: [Token] -> [Token] instance GHC.Generics.Generic Text.HTML.Parser.Token instance GHC.Classes.Eq Text.HTML.Parser.Token instance GHC.Classes.Ord Text.HTML.Parser.Token instance GHC.Show.Show Text.HTML.Parser.Token instance GHC.Classes.Ord Text.HTML.Parser.Attr instance GHC.Classes.Eq Text.HTML.Parser.Attr instance GHC.Show.Show Text.HTML.Parser.Attr instance Control.DeepSeq.NFData Text.HTML.Parser.Token module Text.HTML.Tree tokensToForest :: [Token] -> Either ParseTokenForestError (Forest Token) data ParseTokenForestError ParseTokenForestErrorBracketMismatch :: PStack -> (Maybe Token) -> ParseTokenForestError data PStack PStack :: Forest Token -> [(Token, Forest Token)] -> PStack [_pstackToplevelSiblings] :: PStack -> Forest Token [_pstackParents] :: PStack -> [(Token, Forest Token)] nonClosing :: [Text] tokensFromForest :: Forest Token -> [Token] tokensFromTree :: Tree Token -> [Token] instance GHC.Show.Show Text.HTML.Tree.ParseTokenForestError instance GHC.Classes.Eq Text.HTML.Tree.ParseTokenForestError instance GHC.Show.Show Text.HTML.Tree.PStack instance GHC.Classes.Eq Text.HTML.Tree.PStack