-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Pure-Haskell utilities for dealing with XML with the conduit package. -- @package xml-conduit @version 1.2.3 -- | Generalized cursors to be applied to different nodes. module Text.XML.Cursor.Generic -- | A cursor: contains an XML Node and pointers to its children, -- ancestors and siblings. data Cursor node type Axis node = Cursor node -> [Cursor node] toCursor :: (node -> [node]) -> node -> Cursor node -- | The current node. node :: Cursor node -> node -- | The child axis. XPath: the child axis contains the children of the -- context node. child :: Cursor node -> [Cursor node] -- | The parent axis. As described in XPath: the parent axis contains -- the parent of the context node, if there is one. -- -- Every node but the root element of the document has a parent. Parent -- nodes will always be NodeElements. parent :: Axis node -- | The preceding-sibling axis. XPath: the preceding-sibling axis -- contains all the preceding siblings of the context node [...]. precedingSibling :: Axis node -- | The following-sibling axis. XPath: the following-sibling axis -- contains all the following siblings of the context node [...]. followingSibling :: Axis node -- | The ancestor axis. XPath: the ancestor axis contains the ancestors -- of the context node; the ancestors of the context node consist of the -- parent of context node and the parent's parent and so on; thus, the -- ancestor axis will always include the root node, unless the context -- node is the root node. ancestor :: Axis node -- | The descendant axis. XPath: the descendant axis contains the -- descendants of the context node; a descendant is a child or a child of -- a child and so on; thus the descendant axis never contains attribute -- or namespace nodes. descendant :: Axis node -- | Modify an axis by adding the context node itself as the first element -- of the result list. orSelf :: Axis node -> Axis node -- | The preceding axis. XPath: the preceding axis contains all nodes in -- the same document as the context node that are before the context node -- in document order, excluding any ancestors and excluding attribute -- nodes and namespace nodes. preceding :: Axis node -- | The following axis. XPath: the following axis contains all nodes in -- the same document as the context node that are after the context node -- in document order, excluding any descendants and excluding attribute -- nodes and namespace nodes. following :: Axis node -- | Apply a function to the result of an axis. (&|) :: (Cursor node -> [a]) -> (a -> b) -> (Cursor node -> [b]) -- | Combine two axes so that the second works on the children of the -- results of the first. (&/) :: Axis node -> (Cursor node -> [a]) -> (Cursor node -> [a]) -- | Combine two axes so that the second works on the descendants of the -- results of the first. (&//) :: Axis node -> (Cursor node -> [a]) -> (Cursor node -> [a]) -- | Combine two axes so that the second works on both the result nodes, -- and their descendants. (&.//) :: Axis node -> (Cursor node -> [a]) -> (Cursor node -> [a]) -- | Apply an axis to a 'Cursor node'. ($|) :: Cursor node -> (Cursor node -> a) -> a -- | Apply an axis to the children of a 'Cursor node'. ($/) :: Cursor node -> (Cursor node -> [a]) -> [a] -- | Apply an axis to the descendants of a 'Cursor node'. ($//) :: Cursor node -> (Cursor node -> [a]) -> [a] -- | Apply an axis to a 'Cursor node' as well as its descendants. ($.//) :: Cursor node -> (Cursor node -> [a]) -> [a] -- | Left-to-right Kleisli composition of monads. (>=>) :: Monad m => (a -> m b) -> (b -> m c) -> a -> m c instance Show node => Show (Cursor node) -- | Enumeratees to render XML Events. Unlike -- libxml-enumerator and expat-enumerator, this module does not provide -- IO and ST variants, since the underlying rendering operations are pure -- functions. module Text.XML.Stream.Render -- | Render a stream of Events into a stream of Builders. -- Builders are from the blaze-builder package, and allow the create of -- optimally sized ByteStrings with minimal buffer copying. renderBuilder :: Monad m => RenderSettings -> Conduit Event m Builder -- | Render a stream of Events into a stream of ByteStrings. -- This function wraps around renderBuilder and -- builderToByteString, so it produces optimally sized -- ByteStrings with minimal buffer copying. -- -- The output is UTF8 encoded. renderBytes :: Monad m => -- RenderSettings -> Conduit Event m ByteString renderBytes :: (PrimMonad base, MonadBase base m) => RenderSettings -> ConduitM Event ByteString m () -- | Render a stream of Events into a stream of ByteStrings. -- This function wraps around renderBuilder, -- builderToByteString and renderBytes, so it produces -- optimally sized ByteStrings with minimal buffer copying. renderText :: (PrimMonad base, MonadBase base m, MonadThrow m) => RenderSettings -> ConduitM Event Text m () data RenderSettings -- | The default value for this type. def :: Default a => a rsPretty :: RenderSettings -> Bool -- | Defines some top level namespace definitions to be used, in the form -- of (prefix, namespace). This has absolutely no impact on the meaning -- of your documents, but can increase readability by moving commonly -- used namespace declarations to the top level. rsNamespaces :: RenderSettings -> [(Text, Text)] -- | Specify how to turn the unordered attributes used by the -- Text.XML module into an ordered list. rsAttrOrder :: RenderSettings -> Name -> Map Name Text -> [(Name, Text)] -- | Convenience function to create an ordering function suitable for use -- as the value of rsAttrOrder. The ordering function is created -- from an explicit ordering of the attributes, specified as a list of -- tuples, as follows: In each tuple, the first component is the -- Name of an element, and the second component is a list of -- attributes names. When the given element is rendered, the attributes -- listed, when present, appear first in the given order, followed by any -- other attributes in arbitrary order. If an element does not appear, -- all of its attributes are rendered in arbitrary order. orderAttrs :: [(Name, [Name])] -> Name -> Map Name Text -> [(Name, Text)] -- | Convert a stream of Events into a prettified one, adding extra -- whitespace. Note that this can change the meaning of your XML. prettify :: Monad m => Conduit Event m Event instance Default RenderSettings -- | This module provides both a native Haskell solution for parsing XML -- documents into a stream of events, and a set of parser combinators for -- dealing with a stream of events. -- -- As a simple example, if you have the following XML file: -- --
-- <?xml version="1.0" encoding="utf-8"?> -- <people> -- <person age="25">Michael</person> -- <person age="2">Eliezer</person> -- </people> ---- -- Then this code: -- --
-- {-# LANGUAGE OverloadedStrings #-}
-- import Control.Monad.Trans.Resource
-- import Data.Conduit (($$))
-- import Data.Text (Text, unpack)
-- import Text.XML.Stream.Parse
--
-- data Person = Person Int Text
-- deriving Show
--
-- parsePerson = tagName "person" (requireAttr "age") $ \age -> do
-- name <- content
-- return $ Person (read $ unpack age) name
--
-- parsePeople = tagNoAttr "people" $ many parsePerson
--
-- main = do
-- people <- runResourceT $
-- parseFile def "people.xml" $$ force "people required" parsePeople
-- print people
--
--
-- will produce:
--
--
-- [Person {age = 25, name = "Michael"},Person {age = 2, name = "Eliezer"}]
--
--
-- Previous versions of this module contained a number of more
-- sophisticated functions written by Aristid Breitkreuz and Dmitry
-- Olshansky. To keep this package simpler, those functions are being
-- moved to a separate package. This note will be updated with the name
-- of the package(s) when available.
module Text.XML.Stream.Parse
-- | Parses a byte stream into Events. This function is implemented
-- fully in Haskell using attoparsec-text for parsing. The produced error
-- messages do not give line/column information, so you may prefer to
-- stick with the parser provided by libxml-enumerator. However, this has
-- the advantage of not relying on any C libraries.
--
-- This relies on detectUtf to determine character encoding, and
-- parseText to do the actual parsing.
parseBytes :: MonadThrow m => ParseSettings -> Conduit ByteString m Event
parseBytesPos :: MonadThrow m => ParseSettings -> Conduit ByteString m EventPos
-- | Parses a character stream into Events. This function is
-- implemented fully in Haskell using attoparsec-text for parsing. The
-- produced error messages do not give line/column information, so you
-- may prefer to stick with the parser provided by libxml-enumerator.
-- However, this has the advantage of not relying on any C libraries.
parseText :: MonadThrow m => ParseSettings -> Conduit Text m EventPos
-- | Automatically determine which UTF variant is being used. This function
-- first checks for BOMs, removing them as necessary, and then check for
-- the equivalent of <?xml for each of UTF-8, UTF-16LEBE, and
-- UTF-32LEBE. It defaults to assuming UTF-8.
detectUtf :: MonadThrow m => Conduit ByteString m Text
-- | A helper function which reads a file from disk using
-- enumFile, detects character encoding using detectUtf,
-- parses the XML using parseBytes, and then hands off control to
-- your supplied parser.
parseFile :: MonadResource m => ParseSettings -> FilePath -> Producer m Event
-- | Parse an event stream from a lazy ByteString.
parseLBS :: MonadThrow m => ParseSettings -> ByteString -> Producer m Event
data ParseSettings
-- | The default value for this type.
def :: Default a => a
type DecodeEntities = Text -> Content
psDecodeEntities :: ParseSettings -> DecodeEntities
-- | Whether the original xmlns attributes should be retained in the parsed
-- values. For more information on motivation, see:
--
-- https://github.com/snoyberg/xml/issues/38
--
-- Default: False
--
-- Since 1.2.1
psRetainNamespaces :: ParseSettings -> Bool
-- | Default implementation of DecodeEntities: handles numeric
-- entities and the five standard character entities (lt, gt, amp, quot,
-- apos).
decodeXmlEntities :: DecodeEntities
-- | HTML4-compliant entity decoder. Handles numerics, the five standard
-- character entities, and the additional 248 entities defined by HTML 4
-- and XHTML 1.
--
-- Note that HTML 5 introduces a drastically larger number of entities,
-- and this code does not recognize most of them.
decodeHtmlEntities :: DecodeEntities
-- | The most generic way to parse a tag. It takes a predicate for checking
-- if this is the correct tag name, an AttrParser for handling
-- attributes, and then a parser for dealing with content.
--
-- This function automatically absorbs its balancing closing tag, and
-- will throw an exception if not all of the attributes or child elements
-- are consumed. If you want to allow extra attributes, see
-- ignoreAttrs.
--
-- This function automatically ignores comments, instructions and
-- whitespace.
tag :: MonadThrow m => (Name -> Maybe a) -> (a -> AttrParser b) -> (b -> ConduitM Event o m c) -> ConduitM Event o m (Maybe c)
-- | A simplified version of tag which matches against boolean
-- predicates.
tagPredicate :: MonadThrow m => (Name -> Bool) -> AttrParser a -> (a -> ConduitM Event o m b) -> ConduitM Event o m (Maybe b)
-- | A simplified version of tag which matches for specific tag
-- names instead of taking a predicate function. This is often
-- sufficient, and when combined with OverloadedStrings and the IsString
-- instance of Name, can prove to be very concise. . Note that
-- Name is namespace sensitive. When using the IsString
-- instance of name, use > "{http:/ab}c" :: Name to match the
-- tag c in the XML namespace http://a/b
tagName :: MonadThrow m => Name -> AttrParser a -> (a -> ConduitM Event o m b) -> ConduitM Event o m (Maybe b)
-- | A further simplified tag parser, which requires that no attributes
-- exist.
tagNoAttr :: MonadThrow m => Name -> ConduitM Event o m a -> ConduitM Event o m (Maybe a)
-- | Grabs the next piece of content. If none if available, returns
-- empty. This is simply a wrapper around contentMaybe.
content :: MonadThrow m => Consumer Event m Text
-- | Grabs the next piece of content if available. This function skips over
-- any comments and instructions and concatenates all content until the
-- next start or end tag.
contentMaybe :: MonadThrow m => Consumer Event m (Maybe Text)
-- | A monad for parsing attributes. By default, it requires you to deal
-- with all attributes present on an element, and will throw an exception
-- if there are unhandled attributes. Use the requireAttr,
-- optionalAttr et al functions for handling an attribute, and
-- ignoreAttrs if you would like to skip the rest of the
-- attributes on an element.
--
-- Alternative instance behave like First monoid. It
-- chooses first parser which doesn't fail.
data AttrParser a
-- | Require that a certain attribute be present and return its value.
requireAttr :: Name -> AttrParser Text
-- | Return the value for an attribute if present.
optionalAttr :: Name -> AttrParser (Maybe Text)
requireAttrRaw :: String -> ((Name, [Content]) -> Maybe b) -> AttrParser b
optionalAttrRaw :: ((Name, [Content]) -> Maybe b) -> AttrParser (Maybe b)
-- | Skip the remaining attributes on an element. Since this will clear the
-- list of attributes, you must call this after any calls to
-- requireAttr, optionalAttr, etc.
ignoreAttrs :: AttrParser ()
-- | Get the value of the first parser which returns Just. If no
-- parsers succeed (i.e., return Just), this function returns
-- Nothing.
--
-- -- orE a b = choose [a, b] --orE :: Monad m => Consumer Event m (Maybe a) -> Consumer Event m (Maybe a) -> Consumer Event m (Maybe a) -- | Get the value of the first parser which returns Just. If no -- parsers succeed (i.e., return Just), this function returns -- Nothing. choose :: Monad m => [Consumer Event m (Maybe a)] -> Consumer Event m (Maybe a) -- | Keep parsing elements as long as the parser returns Just. many :: Monad m => Consumer Event m (Maybe a) -> Consumer Event m [a] -- | Force an optional parser into a required parser. All of the tag -- functions, choose and many deal with Maybe -- parsers. Use this when you want to finally force something to happen. force :: MonadThrow m => String -> ConduitM Event o m (Maybe a) -> ConduitM Event o m a data XmlException XmlException :: String -> Maybe Event -> XmlException xmlErrorMessage :: XmlException -> String xmlBadInput :: XmlException -> Maybe Event InvalidEndElement :: Name -> XmlException InvalidEntity :: Text -> XmlException UnparsedAttributes :: [(Name, [Content])] -> XmlException data PositionRange :: * type EventPos = (Maybe PositionRange, Event) instance Typeable XmlException instance Show XmlException instance Alternative AttrParser instance Applicative AttrParser instance Functor AttrParser instance Monad AttrParser instance Exception XmlException instance Default ParseSettings -- | DOM-based XML parsing and rendering. -- -- In this module, attribute values and content nodes can contain either -- raw text or entities. In most cases, these can be fully resolved at -- parsing. If that is the case for your documents, the Text.XML -- module provides simplified datatypes that only contain raw text. module Text.XML.Unresolved writeFile :: RenderSettings -> FilePath -> Document -> IO () readFile :: ParseSettings -> FilePath -> IO Document renderLBS :: RenderSettings -> Document -> ByteString parseLBS :: ParseSettings -> ByteString -> Either SomeException Document parseLBS_ :: ParseSettings -> ByteString -> Document parseText :: ParseSettings -> Text -> Either SomeException Document parseText_ :: ParseSettings -> Text -> Document sinkTextDoc :: MonadThrow m => ParseSettings -> Consumer Text m Document sinkDoc :: MonadThrow m => ParseSettings -> Consumer ByteString m Document toEvents :: Document -> [Event] fromEvents :: MonadThrow m => Consumer EventPos m Document renderBuilder :: Monad m => RenderSettings -> Document -> Producer m Builder renderBytes :: (PrimMonad base, MonadBase base m) => RenderSettings -> Document -> ConduitM a ByteString m () renderText :: (PrimMonad base, MonadBase base m, MonadThrow m) => RenderSettings -> Document -> ConduitM a Text m () data InvalidEventStream ContentAfterRoot :: EventPos -> InvalidEventStream MissingRootElement :: InvalidEventStream InvalidInlineDoctype :: EventPos -> InvalidEventStream MissingEndElement :: Name -> (Maybe EventPos) -> InvalidEventStream UnterminatedInlineDoctype :: InvalidEventStream -- | The default value for this type. def :: Default a => a data ParseSettings psDecodeEntities :: ParseSettings -> DecodeEntities -- | Whether the original xmlns attributes should be retained in the parsed -- values. For more information on motivation, see: -- -- https://github.com/snoyberg/xml/issues/38 -- -- Default: False -- -- Since 1.2.1 psRetainNamespaces :: ParseSettings -> Bool data RenderSettings rsPretty :: RenderSettings -> Bool -- | Defines some top level namespace definitions to be used, in the form -- of (prefix, namespace). This has absolutely no impact on the meaning -- of your documents, but can increase readability by moving commonly -- used namespace declarations to the top level. rsNamespaces :: RenderSettings -> [(Text, Text)] instance Typeable InvalidEventStream instance Show InvalidEventStream instance Exception InvalidEventStream -- | DOM-based parsing and rendering. -- -- This module requires that all entities be resolved at parsing. If you -- need to interact with unresolved entities, please use -- Text.XML.Unresolved. This is the recommended module for most -- uses cases. -- -- While many of the datatypes in this module are simply re-exported from -- Data.XML.Types, Document, Node and -- Element are all redefined here to disallow the possibility of -- unresolved entities. Conversion functions are provided to switch -- between the two sets of datatypes. -- -- For simpler, bidirectional traversal of the DOM tree, see the -- Text.XML.Cursor module. module Text.XML data Document Document :: Prologue -> Element -> [Miscellaneous] -> Document documentPrologue :: Document -> Prologue documentRoot :: Document -> Element documentEpilogue :: Document -> [Miscellaneous] data Prologue :: * Prologue :: [Miscellaneous] -> Maybe Doctype -> [Miscellaneous] -> Prologue prologueBefore :: Prologue -> [Miscellaneous] prologueDoctype :: Prologue -> Maybe Doctype prologueAfter :: Prologue -> [Miscellaneous] data Instruction :: * Instruction :: Text -> Text -> Instruction instructionTarget :: Instruction -> Text instructionData :: Instruction -> Text data Miscellaneous :: * MiscInstruction :: Instruction -> Miscellaneous MiscComment :: Text -> Miscellaneous data Node NodeElement :: Element -> Node NodeInstruction :: Instruction -> Node NodeContent :: Text -> Node NodeComment :: Text -> Node data Element Element :: Name -> Map Name Text -> [Node] -> Element elementName :: Element -> Name elementAttributes :: Element -> Map Name Text elementNodes :: Element -> [Node] -- | A fully qualified name. -- -- Prefixes are not semantically important; they are included only to -- simplify pass-through parsing. When comparing names with Eq or -- Ord methods, prefixes are ignored. -- -- The IsString instance supports Clark notation; see -- http://www.jclark.com/xml/xmlns.htm and -- http://infohost.nmt.edu/tcc/help/pubs/pylxml/etree-QName.html. -- Use the OverloadedStrings language extension for very simple -- Name construction: -- --
-- myname :: Name
-- myname = "{http://example.com/ns/my-namespace}my-name"
--
data Name :: *
Name :: Text -> Maybe Text -> Maybe Text -> Name
nameLocalName :: Name -> Text
nameNamespace :: Name -> Maybe Text
namePrefix :: Name -> Maybe Text
-- | Note: due to the incredible complexity of DTDs, this type only
-- supports external subsets. I've tried adding internal subset types,
-- but they quickly gain more code than the rest of this module put
-- together.
--
-- It is possible that some future version of this library might support
-- internal subsets, but I am no longer actively working on adding them.
data Doctype :: *
Doctype :: Text -> Maybe ExternalID -> Doctype
doctypeName :: Doctype -> Text
doctypeID :: Doctype -> Maybe ExternalID
data ExternalID :: *
SystemID :: Text -> ExternalID
PublicID :: Text -> Text -> ExternalID
readFile :: ParseSettings -> FilePath -> IO Document
parseLBS :: ParseSettings -> ByteString -> Either SomeException Document
parseLBS_ :: ParseSettings -> ByteString -> Document
sinkDoc :: MonadThrow m => ParseSettings -> Consumer ByteString m Document
parseText :: ParseSettings -> Text -> Either SomeException Document
parseText_ :: ParseSettings -> Text -> Document
sinkTextDoc :: MonadThrow m => ParseSettings -> Consumer Text m Document
fromEvents :: MonadThrow m => Consumer EventPos m Document
data UnresolvedEntityException
UnresolvedEntityException :: (Set Text) -> UnresolvedEntityException
data XMLException
InvalidXMLFile :: FilePath -> SomeException -> XMLException
writeFile :: RenderSettings -> FilePath -> Document -> IO ()
renderLBS :: RenderSettings -> Document -> ByteString
renderText :: RenderSettings -> Document -> Text
renderBytes :: (PrimMonad base, MonadBase base m) => RenderSettings -> Document -> ConduitM a ByteString m ()
-- | The default value for this type.
def :: Default a => a
data ParseSettings
psDecodeEntities :: ParseSettings -> DecodeEntities
-- | Whether the original xmlns attributes should be retained in the parsed
-- values. For more information on motivation, see:
--
-- https://github.com/snoyberg/xml/issues/38
--
-- Default: False
--
-- Since 1.2.1
psRetainNamespaces :: ParseSettings -> Bool
-- | Default implementation of DecodeEntities: handles numeric
-- entities and the five standard character entities (lt, gt, amp, quot,
-- apos).
decodeXmlEntities :: DecodeEntities
-- | HTML4-compliant entity decoder. Handles numerics, the five standard
-- character entities, and the additional 248 entities defined by HTML 4
-- and XHTML 1.
--
-- Note that HTML 5 introduces a drastically larger number of entities,
-- and this code does not recognize most of them.
decodeHtmlEntities :: DecodeEntities
data RenderSettings
rsPretty :: RenderSettings -> Bool
-- | Defines some top level namespace definitions to be used, in the form
-- of (prefix, namespace). This has absolutely no impact on the meaning
-- of your documents, but can increase readability by moving commonly
-- used namespace declarations to the top level.
rsNamespaces :: RenderSettings -> [(Text, Text)]
-- | Specify how to turn the unordered attributes used by the
-- Text.XML module into an ordered list.
rsAttrOrder :: RenderSettings -> Name -> Map Name Text -> [(Name, Text)]
-- | Convenience function to create an ordering function suitable for use
-- as the value of rsAttrOrder. The ordering function is created
-- from an explicit ordering of the attributes, specified as a list of
-- tuples, as follows: In each tuple, the first component is the
-- Name of an element, and the second component is a list of
-- attributes names. When the given element is rendered, the attributes
-- listed, when present, appear first in the given order, followed by any
-- other attributes in arbitrary order. If an element does not appear,
-- all of its attributes are rendered in arbitrary order.
orderAttrs :: [(Name, [Name])] -> Name -> Map Name Text -> [(Name, Text)]
toXMLDocument :: Document -> Document
fromXMLDocument :: Document -> Either (Set Text) Document
toXMLNode :: Node -> Node
fromXMLNode :: Node -> Either (Set Text) Node
toXMLElement :: Element -> Element
fromXMLElement :: Element -> Either (Set Text) Element
instance Typeable Element
instance Typeable Node
instance Typeable Document
instance Typeable XMLException
instance Typeable UnresolvedEntityException
instance Show Element
instance Eq Element
instance Ord Element
instance Data Element
instance Show Node
instance Eq Node
instance Ord Node
instance Data Node
instance Show Document
instance Eq Document
instance Data Document
instance Show UnresolvedEntityException
instance ToMarkup Node
instance ToMarkup Element
instance ToMarkup Document
instance Exception UnresolvedEntityException
instance Exception XMLException
instance Show XMLException
instance NFData Element
instance NFData Node
instance NFData Document
-- | This module provides for simple DOM traversal. It is inspired by
-- XPath. There are two central concepts here:
--
--