-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | XML parser and renderer with HTML 5 quirks mode -- -- Contains renderers and parsers for both XML and HTML 5 document -- fragments, which share data structures so that it's easy to work with -- both. Document fragments are bits of documents, which are not -- constrained by some of the high-level structure rules (in particular, -- they may contain more than one root element). -- -- Note that this is not a compliant HTML 5 parser. Rather, it is a -- parser for HTML 5 compliant documents. It does not implement the HTML -- 5 parsing algorithm, and should generally be expected to perform -- correctly only on documents that you trust to conform to HTML 5. This -- is not a suitable library for implementing web crawlers or other -- software that will be exposed to documents from outside sources. The -- result is also not the HTML 5 node structure, but rather something -- closer to the physical structure. For example, omitted start tags are -- not inserted (and so, their corresponding end tags must also be -- omitted). @package xmlhtml @version 0.2.5.2 module Text.XmlHtml.HTML.Meta -- | Void elements as defined by the HTML5 spec. voidTags :: HashSet Text -- | Elements that XmlHtml treats as raw text by default. Raw text elements -- are not allowed to have any other tags in them. This is necessary to -- support the Javascript less than operator inside a script tag, for -- example. -- -- The library uses the isRawText function everywhere instead of -- checking this set directly because that gives us an escape hatch to -- avoid the default behavior if necessary. rawTextTags :: HashSet Text -- | Determine whether a tag should be treated as raw text. Raw text -- elements are not allowed to have any other tags in them. This is -- necessary to support the Javascript less than operator inside a script -- tag, for example. -- -- If a tag is in the rawTextTags set, this function allows you to -- override that behavior by adding the xmlhtmlNotRaw attribute. -- Conversely, if a tag is not in the rawTextTags set, this -- function allows you to override that by adding the xmlhtmlRaw -- attribute to the tag. -- -- This is the function that is actually used in the parser and renderer. -- rawTextTags is not used any more, but is still provided for -- backwards compatibility and to let you see which tags are treated as -- raw by default. isRawText :: Text -> [(Text, Text)] -> Bool -- | List of elements with omittable end tags. endOmittableLast :: HashSet Text -- | Tags which should be considered automatically ended in case one of a -- certain set of tags pops up. endOmittableNext :: HashMap Text (HashSet Text) -- | Tags and attributes which should always be rendered with an explicit -- value, even when the value is empty. This is required by some web -- browsers for tags that are typically non-empty. explicitAttributes :: HashMap Text (HashSet Text) -- | Predefined character entity references as defined by the HTML5 spec. predefinedRefs :: Map Text Text reversePredefinedRefs :: Map Text Text -- | Parsers and renderers for XML and HTML 5. Although the formats are -- treated differently, the data types used by each are the same, which -- makes it easy to write code that works with the element structure of -- either XML or HTML 5 documents. -- -- Limitations: -- -- module Text.XmlHtml -- | Represents a document fragment, including the format, encoding, and -- document type declaration as well as its content. data Document XmlDocument :: !Encoding -> !(Maybe DocType) -> ![Node] -> Document [docEncoding] :: Document -> !Encoding [docType] :: Document -> !(Maybe DocType) [docContent] :: Document -> ![Node] HtmlDocument :: !Encoding -> !(Maybe DocType) -> ![Node] -> Document [docEncoding] :: Document -> !Encoding [docType] :: Document -> !(Maybe DocType) [docContent] :: Document -> ![Node] -- | A node of a document structure. A node can be text, a comment, or an -- element. XML processing instructions are intentionally omitted as a -- simplification, and CDATA and plain text are both text nodes, since -- they ought to be semantically interchangeable. data Node TextNode :: !Text -> Node Comment :: !Text -> Node Element :: !Text -> ![(Text, Text)] -> ![Node] -> Node [elementTag] :: Node -> !Text [elementAttrs] :: Node -> ![(Text, Text)] [elementChildren] :: Node -> ![Node] -- | A document type declaration. Note that DTD internal subsets are -- currently unimplemented. data DocType DocType :: !Text -> !ExternalID -> !InternalSubset -> DocType -- | An external ID, as in a document type declaration. This can be a -- SYSTEM identifier, or a PUBLIC identifier, or can be omitted. data ExternalID Public :: !Text -> !Text -> ExternalID System :: !Text -> ExternalID NoExternalID :: ExternalID -- | The internal subset is unparsed, but preserved in case it's actually -- wanted. data InternalSubset InternalText :: !Text -> InternalSubset NoInternalSubset :: InternalSubset -- | The character encoding of a document. Currently only the required -- character encodings are implemented. data Encoding UTF8 :: Encoding UTF16BE :: Encoding UTF16LE :: Encoding ISO_8859_1 :: Encoding -- | Rendering options data RenderOptions RenderOptions :: AttrSurround -> AttrResolveInternalQuotes -> Maybe (HashMap Text (HashSet Text)) -> RenderOptions -- | Single or double-quotes used around attribute values [roAttributeSurround] :: RenderOptions -> AttrSurround -- | Quotes inside attribute values that conflict with the surround are -- escaped, or the outer quotes are changed to avoid conflicting with the -- internal ones [roAttributeResolveInternal] :: RenderOptions -> AttrResolveInternalQuotes -- | Attributes in the whitelist with empty values are rendered as -- example="" Nothing applies this rule to all attributes -- with empty values [roExplicitEmptyAttrs] :: RenderOptions -> Maybe (HashMap Text (HashSet Text)) data AttrSurround SurroundDoubleQuote :: AttrSurround SurroundSingleQuote :: AttrSurround data AttrResolveInternalQuotes AttrResolveByEscape :: AttrResolveInternalQuotes AttrResolveAvoidEscape :: AttrResolveInternalQuotes -- | Determines whether the node is text or not. isTextNode :: Node -> Bool -- | Determines whether the node is a comment or not. isComment :: Node -> Bool -- | Determines whether the node is an element or not. isElement :: Node -> Bool -- | Gives the tag name of an element, or Nothing if the node isn't -- an element. tagName :: Node -> Maybe Text -- | Retrieves the attribute with the given name. If the Node is not -- an element, the result is always Nothing getAttribute :: Text -> Node -> Maybe Text -- | Checks if a given attribute exists in a Node. hasAttribute :: Text -> Node -> Bool -- | Sets the attribute name to the given value. If the Node is not -- an element, this is the identity. setAttribute :: Text -> Text -> Node -> Node -- | Gives the entire text content of a node, ignoring markup. nodeText :: Node -> Text -- | Gives the child nodes of the given node. Only elements have child -- nodes. childNodes :: Node -> [Node] -- | Gives the child elements of the given node. childElements :: Node -> [Node] -- | Gives all of the child elements of the node with the given tag name. childElementsTag :: Text -> Node -> [Node] -- | Gives the first child element of the node with the given tag name, or -- Nothing if there is no such child element. childElementTag :: Text -> Node -> Maybe Node -- | Gives the descendants of the given node in the order that they begin -- in the document. descendantNodes :: Node -> [Node] -- | Gives the descendant elements of the given node, in the order that -- their start tags appear in the document. descendantElements :: Node -> [Node] -- | Gives the descendant elements with a given tag name. descendantElementsTag :: Text -> Node -> [Node] -- | Gives the first descendant element of the node with the given tag -- name, or Nothing if there is no such element. descendantElementTag :: Text -> Node -> Maybe Node -- | Parses the given XML fragment. parseXML :: String -> ByteString -> Either String Document -- | Parses the given HTML fragment. This enables HTML quirks mode, which -- changes the parsing algorithm to parse valid HTML 5 documents -- correctly. parseHTML :: String -> ByteString -> Either String Document render :: Document -> Builder -- | Renders a Document. renderWithOptions :: RenderOptions -> Document -> Builder defaultRenderOptions :: RenderOptions renderXmlFragment :: Encoding -> [Node] -> Builder -- | Function for rendering XML nodes without the overhead of creating a -- Document structure. renderXmlFragmentWithOptions :: RenderOptions -> Encoding -> [Node] -> Builder -- | Function for rendering HTML nodes without the overhead of creating a -- Document structure, using default rendering options renderHtmlFragment :: Encoding -> [Node] -> Builder -- | Function for rendering HTML nodes without the overhead of creating a -- Document structure. renderHtmlFragmentWithOptions :: RenderOptions -> Encoding -> [Node] -> Builder renderDocType :: Encoding -> Maybe DocType -> Builder -- | A zipper for navigating and modifying XML trees. This is nearly the -- same exposed interface as the xml package in -- Text.XML.Light.Cursor, with modifications as needed to adapt -- to different types. module Text.XmlHtml.Cursor -- | A zipper for XML document forests. data Cursor -- | Builds a Cursor for navigating a tree. That is, a forest with a -- single root Node. fromNode :: Node -> Cursor -- | Builds a Cursor for navigating a forest with the given list of -- roots. The cursor is initially positioned at the left-most node. Gives -- Nothing if the list is empty. fromNodes :: [Node] -> Maybe Cursor -- | Retrieves the root node containing the current cursor position. topNode :: Cursor -> Node -- | Retrieves the entire forest of Nodes corresponding to a -- Cursor. topNodes :: Cursor -> [Node] -- | Retrieves the current node of a Cursor current :: Cursor -> Node -- | Retrieves a list of the Nodes at the same level as the current -- position of a cursor, including the current node. siblings :: Cursor -> [Node] -- | Navigates a Cursor to its parent in the document. parent :: Cursor -> Maybe Cursor -- | Navigates a Cursor up through parents to reach the root level. root :: Cursor -> Cursor -- | Navigates a Cursor down to the indicated child index. getChild :: Int -> Cursor -> Maybe Cursor -- | Navigates a Cursor down to its first child. firstChild :: Cursor -> Maybe Cursor -- | Navigates a Cursor down to its last child. lastChild :: Cursor -> Maybe Cursor -- | Moves a Cursor to its left sibling. left :: Cursor -> Maybe Cursor -- | Moves a Cursor to its right sibling. right :: Cursor -> Maybe Cursor -- | Moves a Cursor to the next node encountered in a depth-first -- search. If it has children, this is equivalent to firstChild. -- Otherwise, if it has a right sibling, then this is equivalent to -- right. Otherwise, the cursor moves to the first right sibling -- of one of its parents. nextDF :: Cursor -> Maybe Cursor -- | Navigates a Cursor to the first child that matches the -- predicate. findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -- | Navigates a Cursor to the nearest left sibling that matches a -- predicate. findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -- | Navigates a Cursor to the nearest right sibling that matches a -- predicate. findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -- | Does a depth-first search for a descendant matching the predicate. -- This can match the current cursor position. findRec :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -- | Determines if the Cursor is at a root node. isRoot :: Cursor -> Bool -- | Determines if the Cursor is at a first child. isFirst :: Cursor -> Bool -- | Determines if the Cursor is at a last child. isLast :: Cursor -> Bool -- | Determines if the Cursor is at a leaf node. isLeaf :: Cursor -> Bool -- | Determines if the Cursor is at a child node (i.e., if it has a -- parent). isChild :: Cursor -> Bool -- | Determines if the Cursor is at a non-leaf node (i.e., if it has -- children). hasChildren :: Cursor -> Bool -- | Gets the index of the Cursor among its siblings. getNodeIndex :: Cursor -> Int -- | Replaces the current node. setNode :: Node -> Cursor -> Cursor -- | Modifies the current node by applying a function. modifyNode :: (Node -> Node) -> Cursor -> Cursor -- | Modifies the current node by applying an action in some functor. modifyNodeM :: Functor m => (Node -> m Node) -> Cursor -> m Cursor -- | Inserts a new Node to the left of the current position. insertLeft :: Node -> Cursor -> Cursor -- | Inserts a new Node to the right of the current position. insertRight :: Node -> Cursor -> Cursor -- | Inserts a list of new Nodes to the left of the current -- position. insertManyLeft :: [Node] -> Cursor -> Cursor -- | Inserts a list of new Nodes to the right of the current -- position. insertManyRight :: [Node] -> Cursor -> Cursor -- | Inserts a Node as the first child of the current element. insertFirstChild :: Node -> Cursor -> Maybe Cursor -- | Inserts a Node as the last child of the current element. insertLastChild :: Node -> Cursor -> Maybe Cursor -- | Inserts a list of Nodes as the first children of the current -- element. insertManyFirstChild :: [Node] -> Cursor -> Maybe Cursor -- | Inserts a list of Nodes as the last children of the current -- element. insertManyLastChild :: [Node] -> Cursor -> Maybe Cursor -- | Inserts a new Node to the left of the current position, and -- moves left to the new node. insertGoLeft :: Node -> Cursor -> Cursor -- | Inserts a new Node to the right of the current position, and -- moves right to the new node. insertGoRight :: Node -> Cursor -> Cursor -- | Removes the Node to the left of the current position, if any. removeLeft :: Cursor -> Maybe (Node, Cursor) -- | Removes the Node to the right of the current position, if any. removeRight :: Cursor -> Maybe (Node, Cursor) -- | Removes the current Node, and moves the Cursor to its left -- sibling, if any. removeGoLeft :: Cursor -> Maybe Cursor -- | Removes the current Node, and moves the Cursor to its right -- sibling, if any. removeGoRight :: Cursor -> Maybe Cursor -- | Removes the current Node, and moves the Cursor to its parent, -- if any. removeGoUp :: Cursor -> Maybe Cursor instance GHC.Classes.Eq Text.XmlHtml.Cursor.Cursor -- | Renderer that supports rendering to xmlhtml forests. This is a port of -- the Hexpat renderer. -- -- Warning: because this renderer doesn't directly create the output, but -- rather an XML tree representation, it is impossible to render -- pre-escaped text. module Text.Blaze.Renderer.XmlHtml -- | Render HTML to an xmlhtml Document renderHtml :: Html -> Document -- | Render HTML to a list of xmlhtml nodes renderHtmlNodes :: Html -> [Node]