-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | XML parser and renderer with HTML 5 quirks mode
--
-- Contains renderers and parsers for both XML and HTML 5 document
-- fragments, which share data structures so that it's easy to work with
-- both. Document fragments are bits of documents, which are not
-- constrained by some of the high-level structure rules (in particular,
-- they may contain more than one root element).
--
-- Note that this is not a compliant HTML 5 parser. Rather, it is a
-- parser for HTML 5 compliant documents. It does not implement the HTML
-- 5 parsing algorithm, and should generally be expected to perform
-- correctly only on documents that you trust to conform to HTML 5. This
-- is not a suitable library for implementing web crawlers or other
-- software that will be exposed to documents from outside sources. The
-- result is also not the HTML 5 node structure, but rather something
-- closer to the physical structure. For example, omitted start tags are
-- not inserted (and so, their corresponding end tags must also be
-- omitted).
@package xmlhtml
@version 0.2.4
module Text.XmlHtml.HTML.Meta
-- | Void elements as defined by the HTML5 spec.
voidTags :: HashSet Text
-- | Elements that XmlHtml treats as raw text by default. Raw text elements
-- are not allowed to have any other tags in them. This is necessary to
-- support the Javascript less than operator inside a script tag, for
-- example.
--
-- The library uses the isRawText function everywhere instead of
-- checking this set directly because that gives us an escape hatch to
-- avoid the default behavior if necessary.
rawTextTags :: HashSet Text
-- | Determine whether a tag should be treated as raw text. Raw text
-- elements are not allowed to have any other tags in them. This is
-- necessary to support the Javascript less than operator inside a script
-- tag, for example.
--
-- If a tag is in the rawTextTags set, this function allows you to
-- override that behavior by adding the xmlhtmlNotRaw attribute.
-- Conversely, if a tag is not in the rawTextTags set, this
-- function allows you to override that by adding the xmlhtmlRaw
-- attribute to the tag.
--
-- This is the function that is actually used in the parser and renderer.
-- rawTextTags is not used any more, but is still provided for
-- backwards compatibility and to let you see which tags are treated as
-- raw by default.
isRawText :: Text -> [(Text, Text)] -> Bool
-- | List of elements with omittable end tags.
endOmittableLast :: HashSet Text
-- | Tags which should be considered automatically ended in case one of a
-- certain set of tags pops up.
endOmittableNext :: HashMap Text (HashSet Text)
-- | Tags and attributes which should always be rendered with an explicit
-- value, even when the value is empty. This is required by some web
-- browsers for tags that are typically non-empty.
explicitAttributes :: HashMap Text (HashSet Text)
-- | Predefined character entity references as defined by the HTML5 spec.
predefinedRefs :: HashMap Text Text
reversePredefinedRefs :: HashMap Text Text
-- | Parsers and renderers for XML and HTML 5. Although the formats are
-- treated differently, the data types used by each are the same, which
-- makes it easy to write code that works with the element structure of
-- either XML or HTML 5 documents.
--
-- Limitations:
--
--
-- - The XML parser does not parse internal DOCTYPE subsets. They are
-- just stored as blocks of text, with minimal scanning done to match
-- quotes and brackets to determine the end.
-- - Since DTDs are not parsed, the XML parser fails on entity
-- references, except for those defined internally. You cannot use this
-- library for parsing XML documents with entity references outside the
-- predefined set.
-- - The HTML 5 parser is not a compliant HTML parser. Instead, it is a
-- parser for valid HTML 5 content. It should only be used on content
-- that you have reason to believe is probably correct, since the
-- compatibility features of HTML 5 are missing. This is the wrong
-- library on which to build a web spider.
-- - Both parsers accept fragments of documents, by which is meant that
-- they do not enforce the top-level structure of the document. Files may
-- contain more than one root element, for example.
--
module Text.XmlHtml
-- | Represents a document fragment, including the format, encoding, and
-- document type declaration as well as its content.
data Document
XmlDocument :: !Encoding -> !(Maybe DocType) -> ![Node] -> Document
[docEncoding] :: Document -> !Encoding
[docType] :: Document -> !(Maybe DocType)
[docContent] :: Document -> ![Node]
HtmlDocument :: !Encoding -> !(Maybe DocType) -> ![Node] -> Document
[docEncoding] :: Document -> !Encoding
[docType] :: Document -> !(Maybe DocType)
[docContent] :: Document -> ![Node]
-- | A node of a document structure. A node can be text, a comment, or an
-- element. XML processing instructions are intentionally omitted as a
-- simplification, and CDATA and plain text are both text nodes, since
-- they ought to be semantically interchangeable.
data Node
TextNode :: !Text -> Node
Comment :: !Text -> Node
Element :: !Text -> ![(Text, Text)] -> ![Node] -> Node
[elementTag] :: Node -> !Text
[elementAttrs] :: Node -> ![(Text, Text)]
[elementChildren] :: Node -> ![Node]
-- | A document type declaration. Note that DTD internal subsets are
-- currently unimplemented.
data DocType
DocType :: !Text -> !ExternalID -> !InternalSubset -> DocType
-- | An external ID, as in a document type declaration. This can be a
-- SYSTEM identifier, or a PUBLIC identifier, or can be omitted.
data ExternalID
Public :: !Text -> !Text -> ExternalID
System :: !Text -> ExternalID
NoExternalID :: ExternalID
-- | The internal subset is unparsed, but preserved in case it's actually
-- wanted.
data InternalSubset
InternalText :: !Text -> InternalSubset
NoInternalSubset :: InternalSubset
-- | The character encoding of a document. Currently only the required
-- character encodings are implemented.
data Encoding
UTF8 :: Encoding
UTF16BE :: Encoding
UTF16LE :: Encoding
ISO_8859_1 :: Encoding
-- | Determines whether the node is text or not.
isTextNode :: Node -> Bool
-- | Determines whether the node is a comment or not.
isComment :: Node -> Bool
-- | Determines whether the node is an element or not.
isElement :: Node -> Bool
-- | Gives the tag name of an element, or Nothing if the node isn't
-- an element.
tagName :: Node -> Maybe Text
-- | Retrieves the attribute with the given name. If the Node is not
-- an element, the result is always Nothing
getAttribute :: Text -> Node -> Maybe Text
-- | Checks if a given attribute exists in a Node.
hasAttribute :: Text -> Node -> Bool
-- | Sets the attribute name to the given value. If the Node is not
-- an element, this is the identity.
setAttribute :: Text -> Text -> Node -> Node
-- | Gives the entire text content of a node, ignoring markup.
nodeText :: Node -> Text
-- | Gives the child nodes of the given node. Only elements have child
-- nodes.
childNodes :: Node -> [Node]
-- | Gives the child elements of the given node.
childElements :: Node -> [Node]
-- | Gives all of the child elements of the node with the given tag name.
childElementsTag :: Text -> Node -> [Node]
-- | Gives the first child element of the node with the given tag name, or
-- Nothing if there is no such child element.
childElementTag :: Text -> Node -> Maybe Node
-- | Gives the descendants of the given node in the order that they begin
-- in the document.
descendantNodes :: Node -> [Node]
-- | Gives the descendant elements of the given node, in the order that
-- their start tags appear in the document.
descendantElements :: Node -> [Node]
-- | Gives the descendant elements with a given tag name.
descendantElementsTag :: Text -> Node -> [Node]
-- | Gives the first descendant element of the node with the given tag
-- name, or Nothing if there is no such element.
descendantElementTag :: Text -> Node -> Maybe Node
-- | Parses the given XML fragment.
parseXML :: String -> ByteString -> Either String Document
-- | Parses the given HTML fragment. This enables HTML quirks mode, which
-- changes the parsing algorithm to parse valid HTML 5 documents
-- correctly.
parseHTML :: String -> ByteString -> Either String Document
-- | Renders a Document.
render :: Document -> Builder
-- | Function for rendering XML nodes without the overhead of creating a
-- Document structure.
renderXmlFragment :: Encoding -> [Node] -> Builder
-- | Function for rendering HTML nodes without the overhead of creating a
-- Document structure.
renderHtmlFragment :: Encoding -> [Node] -> Builder
renderDocType :: Encoding -> Maybe DocType -> Builder
-- | A zipper for navigating and modifying XML trees. This is nearly the
-- same exposed interface as the xml package in
-- Text.XML.Light.Cursor, with modifications as needed to adapt
-- to different types.
module Text.XmlHtml.Cursor
-- | A zipper for XML document forests.
data Cursor
-- | Builds a Cursor for navigating a tree. That is, a forest with a
-- single root Node.
fromNode :: Node -> Cursor
-- | Builds a Cursor for navigating a forest with the given list of
-- roots. The cursor is initially positioned at the left-most node. Gives
-- Nothing if the list is empty.
fromNodes :: [Node] -> Maybe Cursor
-- | Retrieves the root node containing the current cursor position.
topNode :: Cursor -> Node
-- | Retrieves the entire forest of Nodes corresponding to a
-- Cursor.
topNodes :: Cursor -> [Node]
-- | Retrieves the current node of a Cursor
current :: Cursor -> Node
-- | Retrieves a list of the Nodes at the same level as the current
-- position of a cursor, including the current node.
siblings :: Cursor -> [Node]
-- | Navigates a Cursor to its parent in the document.
parent :: Cursor -> Maybe Cursor
-- | Navigates a Cursor up through parents to reach the root level.
root :: Cursor -> Cursor
-- | Navigates a Cursor down to the indicated child index.
getChild :: Int -> Cursor -> Maybe Cursor
-- | Navigates a Cursor down to its first child.
firstChild :: Cursor -> Maybe Cursor
-- | Navigates a Cursor down to its last child.
lastChild :: Cursor -> Maybe Cursor
-- | Moves a Cursor to its left sibling.
left :: Cursor -> Maybe Cursor
-- | Moves a Cursor to its right sibling.
right :: Cursor -> Maybe Cursor
-- | Moves a Cursor to the next node encountered in a depth-first
-- search. If it has children, this is equivalent to firstChild.
-- Otherwise, if it has a right sibling, then this is equivalent to
-- right. Otherwise, the cursor moves to the first right sibling
-- of one of its parents.
nextDF :: Cursor -> Maybe Cursor
-- | Navigates a Cursor to the first child that matches the
-- predicate.
findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-- | Navigates a Cursor to the nearest left sibling that matches a
-- predicate.
findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-- | Navigates a Cursor to the nearest right sibling that matches a
-- predicate.
findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-- | Does a depth-first search for a descendant matching the predicate.
-- This can match the current cursor position.
findRec :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-- | Determines if the Cursor is at a root node.
isRoot :: Cursor -> Bool
-- | Determines if the Cursor is at a first child.
isFirst :: Cursor -> Bool
-- | Determines if the Cursor is at a last child.
isLast :: Cursor -> Bool
-- | Determines if the Cursor is at a leaf node.
isLeaf :: Cursor -> Bool
-- | Determines if the Cursor is at a child node (i.e., if it has a
-- parent).
isChild :: Cursor -> Bool
-- | Determines if the Cursor is at a non-leaf node (i.e., if it has
-- children).
hasChildren :: Cursor -> Bool
-- | Gets the index of the Cursor among its siblings.
getNodeIndex :: Cursor -> Int
-- | Replaces the current node.
setNode :: Node -> Cursor -> Cursor
-- | Modifies the current node by applying a function.
modifyNode :: (Node -> Node) -> Cursor -> Cursor
-- | Modifies the current node by applying an action in some functor.
modifyNodeM :: Functor m => (Node -> m Node) -> Cursor -> m Cursor
-- | Inserts a new Node to the left of the current position.
insertLeft :: Node -> Cursor -> Cursor
-- | Inserts a new Node to the right of the current position.
insertRight :: Node -> Cursor -> Cursor
-- | Inserts a list of new Nodes to the left of the current
-- position.
insertManyLeft :: [Node] -> Cursor -> Cursor
-- | Inserts a list of new Nodes to the right of the current
-- position.
insertManyRight :: [Node] -> Cursor -> Cursor
-- | Inserts a Node as the first child of the current element.
insertFirstChild :: Node -> Cursor -> Maybe Cursor
-- | Inserts a Node as the last child of the current element.
insertLastChild :: Node -> Cursor -> Maybe Cursor
-- | Inserts a list of Nodes as the first children of the current
-- element.
insertManyFirstChild :: [Node] -> Cursor -> Maybe Cursor
-- | Inserts a list of Nodes as the last children of the current
-- element.
insertManyLastChild :: [Node] -> Cursor -> Maybe Cursor
-- | Inserts a new Node to the left of the current position, and
-- moves left to the new node.
insertGoLeft :: Node -> Cursor -> Cursor
-- | Inserts a new Node to the right of the current position, and
-- moves right to the new node.
insertGoRight :: Node -> Cursor -> Cursor
-- | Removes the Node to the left of the current position, if any.
removeLeft :: Cursor -> Maybe (Node, Cursor)
-- | Removes the Node to the right of the current position, if any.
removeRight :: Cursor -> Maybe (Node, Cursor)
-- | Removes the current Node, and moves the Cursor to its left
-- sibling, if any.
removeGoLeft :: Cursor -> Maybe Cursor
-- | Removes the current Node, and moves the Cursor to its right
-- sibling, if any.
removeGoRight :: Cursor -> Maybe Cursor
-- | Removes the current Node, and moves the Cursor to its parent,
-- if any.
removeGoUp :: Cursor -> Maybe Cursor
instance GHC.Classes.Eq Text.XmlHtml.Cursor.Cursor
-- | Renderer that supports rendering to xmlhtml forests. This is a port of
-- the Hexpat renderer.
--
-- Warning: because this renderer doesn't directly create the output, but
-- rather an XML tree representation, it is impossible to render
-- pre-escaped text.
module Text.Blaze.Renderer.XmlHtml
-- | Render HTML to an xmlhtml Document
renderHtml :: Html -> Document
-- | Render HTML to a list of xmlhtml nodes
renderHtmlNodes :: Html -> [Node]