-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Pure-Haskell utilities for dealing with XML with the enumerator package. -- -- Provides the ability to parse and render XML in a streaming manner -- using the enumerator package. @package xml-enumerator @version 0.3.2 -- | Enumeratees to render XML Events. Unlike -- libxml-enumerator and expat-enumerator, this module does not provide -- IO and ST variants, since the underlying rendering operations are pure -- functions. module Text.XML.Enumerator.Render -- | Render a stream of Events into a stream of Builders. -- Builders are from the blaze-builder package, and allow the create of -- optimally sized ByteStrings with minimal buffer copying. renderBuilder :: Monad m => Enumeratee Event Builder m b -- | Render a stream of Events into a stream of ByteStrings. -- This function wraps around renderBuilder and -- builderToByteString, so it produces optimally sized -- ByteStrings with minimal buffer copying. -- -- The output is UTF8 encoded. renderBytes :: MonadIO m => Enumeratee Event ByteString m b -- | Render a stream of Events into a stream of ByteStrings. -- This function wraps around renderBuilder, -- builderToByteString and renderBytes, so it produces -- optimally sized ByteStrings with minimal buffer copying. renderText :: MonadIO m => Enumeratee Event Text m b -- | Pretty prints a stream of Events into a stream of -- Builders. This changes the meaning of some documents, by -- inserting/modifying whitespace. prettyBuilder :: Monad m => Enumeratee Event Builder m b -- | Same as prettyBuilder, but produces a stream of -- ByteStrings. prettyBytes :: MonadIO m => Enumeratee Event ByteString m b -- | Same as prettyBuilder, but produces a stream of Texts. prettyText :: MonadIO m => Enumeratee Event Text m b -- | This module provides both a native Haskell solution for parsing XML -- documents into a stream of events, and a set of parser combinators for -- dealing with a stream of events. -- -- As a simple example, if you have the following XML file: -- --
-- <?xml version="1.0" encoding="utf-8"?> -- <people> -- <person age="25">Michael</person> -- <person age="2">Eliezer</person> -- </people> ---- -- Then this code: -- --
-- {-# LANGUAGE OverloadedStrings #-}
-- import Text.XML.Enumerator.Parse
-- import Data.Text.Lazy (Text, unpack)
--
-- data Person = Person { age :: Int, name :: Text }
-- deriving Show
--
-- parsePerson = tagName "person" (requireAttr "age") $ \age -> do
-- name <- content
-- return $ Person (read $ unpack age) name
--
-- parsePeople = tagNoAttr "people" $ many parsePerson
--
-- main = parseFile_ "people.xml" decodeEntities $ force "people required" parsePeople
--
--
-- will produce:
--
--
-- [Person {age = 25, name = "Michael"},Person {age = 2, name = "Eliezer"}]
--
--
-- Previous versions of this module contained a number of more
-- sophisticated functions written by Aristid Breitkreuz and Dmitry
-- Olshansky. To keep this package simpler, those functions are being
-- moved to a separate package. This note will be updated with the name
-- of the package(s) when available.
module Text.XML.Enumerator.Parse
-- | Parses a byte stream into Events. This function is implemented
-- fully in Haskell using attoparsec-text for parsing. The produced error
-- messages do not give line/column information, so you may prefer to
-- stick with the parser provided by libxml-enumerator. However, this has
-- the advantage of not relying on any C libraries.
--
-- This relies on detectUtf to determine character encoding, and
-- parseText to do the actual parsing.
parseBytes :: Monad m => DecodeEntities -> Enumeratee ByteString Event m a
-- | Parses a character stream into Events. This function is
-- implemented fully in Haskell using attoparsec-text for parsing. The
-- produced error messages do not give line/column information, so you
-- may prefer to stick with the parser provided by libxml-enumerator.
-- However, this has the advantage of not relying on any C libraries.
parseText :: Monad m => DecodeEntities -> Enumeratee Text Event m a
-- | Automatically determine which UTF variant is being used. This function
-- first checks for BOMs, removing them as necessary, and then check for
-- the equivalent of <?xml for each of UTF-8, UTF-16LEBE, and
-- UTF-32LEBE. It defaults to assuming UTF-8.
detectUtf :: Monad m => Enumeratee ByteString Text m a
-- | A helper function which reads a file from disk using enumFile,
-- detects character encoding using detectUtf, parses the XML
-- using parseBytes, and then hands off control to your supplied
-- parser.
parseFile :: FilePath -> DecodeEntities -> Iteratee Event IO a -> IO (Either SomeException a)
-- | The same as parseFile, but throws any exceptions.
parseFile_ :: FilePath -> DecodeEntities -> Iteratee Event IO a -> IO a
-- | Parse an event stream from a lazy ByteString.
parseLBS :: ByteString -> DecodeEntities -> Iteratee Event IO a -> IO (Either SomeException a)
-- | Same as parseLBS, but throws exceptions.
parseLBS_ :: ByteString -> DecodeEntities -> Iteratee Event IO a -> IO a
type DecodeEntities = Text -> Content
-- | Default implementation of DecodeEntities: handles numeric
-- entities and the five standard character entities (lt, gt, amp, quot,
-- apos).
decodeEntities :: DecodeEntities
-- | The most generic way to parse a tag. It takes a predicate for checking
-- if this is the correct tag name, an AttrParser for handling
-- attributes, and then a parser for dealing with content.
--
-- This function automatically absorbs its balancing closing tag, and
-- will throw an exception if not all of the attributes or child elements
-- are consumed. If you want to allow extra attributes, see
-- ignoreAttrs.
--
-- This function automatically ignores comments, instructions and
-- whitespace.
tag :: Monad m => (Name -> Maybe a) -> (a -> AttrParser b) -> (b -> Iteratee Event m c) -> Iteratee Event m (Maybe c)
-- | A simplified version of tag which matches against boolean
-- predicates.
tagPredicate :: Monad m => (Name -> Bool) -> AttrParser a -> (a -> Iteratee Event m b) -> Iteratee Event m (Maybe b)
-- | A simplified version of tag which matches for specific tag
-- names instead of taking a predicate function. This is often
-- sufficient, and when combined with OverloadedStrings and the IsString
-- instance of Name, can prove to be very concise.
tagName :: Monad m => Name -> AttrParser a -> (a -> Iteratee Event m b) -> Iteratee Event m (Maybe b)
-- | A further simplified tag parser, which requires that no attributes
-- exist.
tagNoAttr :: Monad m => Name -> Iteratee Event m a -> Iteratee Event m (Maybe a)
-- | Grabs the next piece of content. If none if available, returns
-- empty. This is simply a wrapper around contentMaybe.
content :: Monad m => Iteratee Event m Text
-- | Grabs the next piece of content if available. This function skips over
-- any comments and instructions and concatenates all content until the
-- next start or end tag.
contentMaybe :: Monad m => Iteratee Event m (Maybe Text)
-- | A monad for parsing attributes. By default, it requires you to deal
-- with all attributes present on an element, and will throw an exception
-- if there are unhandled attributes. Use the requireAttr,
-- optionalAttr et al functions for handling an attribute, and
-- ignoreAttrs if you would like to skip the rest of the
-- attributes on an element.
data AttrParser a
-- | Require that a certain attribute be present and return its value.
requireAttr :: Name -> AttrParser Text
-- | Return the value for an attribute if present.
optionalAttr :: Name -> AttrParser (Maybe Text)
requireAttrRaw :: String -> ((Name, [Content]) -> Maybe b) -> AttrParser b
optionalAttrRaw :: ((Name, [Content]) -> Maybe b) -> AttrParser (Maybe b)
-- | Skip the remaining attributes on an element. Since this will clear the
-- list of attributes, you must call this after any calls to
-- requireAttr, optionalAttr, etc.
ignoreAttrs :: AttrParser ()
-- | Get the value of the first parser which returns Just. If no
-- parsers succeed (i.e., return Just), this function returns
-- Nothing.
--
-- -- orE a b = choose [a, b] --orE :: Monad m => Iteratee Event m (Maybe a) -> Iteratee Event m (Maybe a) -> Iteratee Event m (Maybe a) -- | Get the value of the first parser which returns Just. If no -- parsers succeed (i.e., return Just), this function returns -- Nothing. choose :: Monad m => [Iteratee Event m (Maybe a)] -> Iteratee Event m (Maybe a) -- | Keep parsing elements as long as the parser returns Just. many :: Monad m => Iteratee Event m (Maybe a) -> Iteratee Event m [a] -- | Force an optional parser into a required parser. All of the tag -- functions, choose and many deal with Maybe -- parsers. Use this when you want to finally force something to happen. force :: Monad m => String -> Iteratee Event m (Maybe a) -> Iteratee Event m a data XmlException XmlException :: String -> Maybe Event -> XmlException xmlErrorMessage :: XmlException -> String xmlBadInput :: XmlException -> Maybe Event InvalidEndElement :: Name -> XmlException InvalidEntity :: Text -> XmlException UnparsedAttributes :: [(Name, [Content])] -> XmlException instance Typeable XmlException instance Show XmlException instance Applicative AttrParser instance Functor AttrParser instance Monad AttrParser instance Exception XmlException module Text.XML.Enumerator.Document writeFile :: FilePath -> Document -> IO () -- | Pretty prints via prettyBytes. writePrettyFile :: FilePath -> Document -> IO () readFile :: FilePath -> DecodeEntities -> IO (Either SomeException Document) readFile_ :: FilePath -> DecodeEntities -> IO Document renderLBS :: Document -> ByteString -- | Pretty prints via prettyBytes. prettyLBS :: Document -> ByteString parseLBS :: ByteString -> DecodeEntities -> Either SomeException Document parseLBS_ :: ByteString -> DecodeEntities -> Document toEvents :: Document -> [Event] fromEvents :: Monad m => Iteratee Event m Document renderBuilder :: MonadIO m => Document -> Enumerator Builder m a renderBytes :: MonadIO m => Document -> Enumerator ByteString m a renderText :: MonadIO m => Document -> Enumerator Text m a prettyBuilder :: MonadIO m => Document -> Enumerator Builder m a prettyBytes :: MonadIO m => Document -> Enumerator ByteString m a prettyText :: MonadIO m => Document -> Enumerator Text m a data InvalidEventStream InvalidEventStream :: String -> InvalidEventStream lazyConsume :: Enumerator a IO () -> IO [a] instance Typeable InvalidEventStream instance Show InvalidEventStream instance Exception InvalidEventStream module Text.XML.Enumerator.Resolved data Document Document :: Prologue -> Element -> [Miscellaneous] -> Document documentPrologue :: Document -> Prologue documentRoot :: Document -> Element documentEpilogue :: Document -> [Miscellaneous] data Prologue :: * Prologue :: [Miscellaneous] -> Maybe Doctype -> [Miscellaneous] -> Prologue prologueBefore :: Prologue -> [Miscellaneous] prologueDoctype :: Prologue -> Maybe Doctype prologueAfter :: Prologue -> [Miscellaneous] data Instruction :: * Instruction :: Text -> Text -> Instruction instructionTarget :: Instruction -> Text instructionData :: Instruction -> Text data Miscellaneous :: * MiscInstruction :: Instruction -> Miscellaneous MiscComment :: Text -> Miscellaneous data Node NodeElement :: Element -> Node NodeInstruction :: Instruction -> Node NodeContent :: Text -> Node NodeComment :: Text -> Node data Element Element :: Name -> [(Name, Text)] -> [Node] -> Element elementName :: Element -> Name elementAttributes :: Element -> [(Name, Text)] elementNodes :: Element -> [Node] -- | A fully qualified name. -- -- Prefixes are not semantically important; they are included only to -- simplify pass-through parsing. When comparing names with Eq or -- Ord methods, prefixes are ignored. -- -- The IsString instance supports Clark notation; see -- http://www.jclark.com/xml/xmlns.htm and -- http://infohost.nmt.edu/tcc/help/pubs/pylxml/etree-QName.html. -- Use the OverloadedStrings language extension for very simple -- Name construction: -- --
-- myname :: Name
-- myname = "{http://example.com/ns/my-namespace}my-name"
--
data Name :: *
Name :: Text -> Maybe Text -> Maybe Text -> Name
nameLocalName :: Name -> Text
nameNamespace :: Name -> Maybe Text
namePrefix :: Name -> Maybe Text
-- | Note: due to the incredible complexity of DTDs, this type only
-- supports external subsets. I've tried adding internal subset types,
-- but they quickly gain more code than the rest of this module put
-- together.
--
-- It is possible that some future version of this library might support
-- internal subsets, but I am no longer actively working on adding them.
data Doctype :: *
Doctype :: Text -> Maybe ExternalID -> Doctype
doctypeName :: Doctype -> Text
doctypeID :: Doctype -> Maybe ExternalID
data ExternalID :: *
SystemID :: Text -> ExternalID
PublicID :: Text -> Text -> ExternalID
type DecodeEntities = Text -> Content
-- | Default implementation of DecodeEntities: handles numeric
-- entities and the five standard character entities (lt, gt, amp, quot,
-- apos).
decodeEntities :: DecodeEntities
readFile :: FilePath -> DecodeEntities -> IO (Either SomeException Document)
readFile_ :: FilePath -> DecodeEntities -> IO Document
parseLBS :: ByteString -> DecodeEntities -> Either SomeException Document
parseLBS_ :: ByteString -> DecodeEntities -> Document
parseEnum :: Monad m => Enumerator ByteString m Document -> DecodeEntities -> m (Either SomeException Document)
parseEnum_ :: Monad m => Enumerator ByteString m Document -> DecodeEntities -> m Document
fromEvents :: Monad m => Iteratee Event m Document
data UnresolvedEntityException
UnresolvedEntityException :: (Set Text) -> UnresolvedEntityException
writeFile :: FilePath -> Document -> IO ()
-- | Pretty prints via prettyBytes.
writePrettyFile :: FilePath -> Document -> IO ()
renderLBS :: Document -> ByteString
-- | Pretty prints via prettyBytes.
prettyLBS :: Document -> ByteString
renderBytes :: MonadIO m => Document -> Enumerator ByteString m a
prettyBytes :: MonadIO m => Document -> Enumerator ByteString m a
toXMLDocument :: Document -> Document
fromXMLDocument :: Document -> Either (Set Text) Document
toXMLNode :: Node -> Node
fromXMLNode :: Node -> Either (Set Text) Node
toXMLElement :: Element -> Element
fromXMLElement :: Element -> Either (Set Text) Element
instance Typeable Element
instance Typeable Node
instance Typeable Document
instance Typeable UnresolvedEntityException
instance Show Element
instance Eq Element
instance Show Node
instance Eq Node
instance Show Document
instance Eq Document
instance Show UnresolvedEntityException
instance Exception UnresolvedEntityException
module Text.XML.Enumerator.Cursor
-- | Something that can be used in a predicate check as a boolean.
class Boolean a
bool :: Boolean a => a -> Bool
-- | The type of an Axis that returns a list of Cursors. They are roughly
-- modeled after http://www.w3.org/TR/xpath/#axes.
--
-- Axes can be composed with >=>, where e.g. f >=>
-- g means that on all results of the f axis, the
-- g axis will be applied, and all results joined together.
-- Because Axis is just a type synonym for Cursor ->
-- [Cursor], it is possible to use other standard functions like
-- >>= or concatMap similarly.
--
-- The operators &/, &// and &.// can be
-- used to combine axes so that the second axis works on the children,
-- descendants, respectively the context node as well as its descendants
-- of the results of the first axis.
--
-- The operators $|, $/, $// and $.// can be
-- used to apply an axis (right-hand side) to a cursor so that it is
-- applied on the cursor itself, its children, its descendants,
-- respectively itself and its descendants.
--
-- Note that many of these operators also work on generalised Axes
-- that can return lists of something other than Cursors, for example
-- Content elements.
type Axis = Cursor -> [Cursor]
-- | A cursor: contains an XML Node and pointers to its children,
-- ancestors and siblings.
data Cursor
-- | Convert a Document to a Cursor. It will point to the
-- document root.
fromDocument :: Document -> Cursor
-- | Convert a Node to a Cursor (without parents).
fromNode :: Node -> Cursor
-- | Cut a cursor off from its parent. The idea is to allow restricting the
-- scope of queries on it.
cut :: Cursor -> Cursor
-- | The parent axis. As described in XPath: the parent axis contains
-- the parent of the context node, if there is one.
parent :: Axis
-- | The preceding-sibling axis. XPath: the preceding-sibling axis
-- contains all the preceding siblings of the context node [...].
precedingSibling :: Axis
-- | The following-sibling axis. XPath: the following-sibling axis
-- contains all the following siblings of the context node [...].
followingSibling :: Axis
-- | The child axis. XPath: the child axis contains the children of the
-- context node.
child :: Cursor -> [Cursor]
-- | The current node.
node :: Cursor -> Node
-- | The preceding axis. XPath: the preceding axis contains all nodes in
-- the same document as the context node that are before the context node
-- in document order, excluding any ancestors and excluding attribute
-- nodes and namespace nodes.
preceding :: Axis
-- | The following axis. XPath: the following axis contains all nodes in
-- the same document as the context node that are after the context node
-- in document order, excluding any descendants and excluding attribute
-- nodes and namespace nodes.
following :: Axis
-- | The ancestor axis. XPath: the ancestor axis contains the ancestors
-- of the context node; the ancestors of the context node consist of the
-- parent of context node and the parent's parent and so on; thus, the
-- ancestor axis will always include the root node, unless the context
-- node is the root node.
ancestor :: Axis
-- | The descendant axis. XPath: the descendant axis contains the
-- descendants of the context node; a descendant is a child or a child of
-- a child and so on; thus the descendant axis never contains attribute
-- or namespace nodes.
descendant :: Axis
-- | Modify an axis by adding the context node itself as the first element
-- of the result list.
orSelf :: Axis -> Axis
-- | Combine two axes so that the second works on the children of the
-- results of the first.
(&/) :: Axis -> (Cursor -> [a]) -> (Cursor -> [a])
-- | Combine two axes so that the second works on the descendants of the
-- results of the first.
(&//) :: Axis -> (Cursor -> [a]) -> (Cursor -> [a])
-- | Combine two axes so that the second works on both the result nodes,
-- and their descendants.
(&.//) :: Axis -> (Cursor -> [a]) -> (Cursor -> [a])
-- | Apply an axis to a Cursor.
($|) :: Cursor -> (Cursor -> [a]) -> [a]
-- | Apply an axis to the children of a Cursor.
($/) :: Cursor -> (Cursor -> [a]) -> [a]
-- | Apply an axis to the descendants of a Cursor.
($//) :: Cursor -> (Cursor -> [a]) -> [a]
-- | Apply an axis to a Cursor as well as its descendants.
($.//) :: Cursor -> (Cursor -> [a]) -> [a]
-- | Filter cursors that don't pass a check.
check :: Boolean b => (Cursor -> b) -> Axis
-- | Filter nodes that don't pass a check.
checkNode :: Boolean b => (Node -> b) -> Axis
-- | Filter elements that don't pass a check, and remove all non-elements.
checkElement :: Boolean b => (Element -> b) -> Axis
-- | Filter elements that don't pass a name check, and remove all
-- non-elements.
checkName :: Boolean b => (Name -> b) -> Axis
-- | Remove all non-elements. Compare roughly to XPath: A node test * is
-- true for any node of the principal node type. For example, child::*
-- will select all element children of the context node [...].
anyElement :: Axis
-- | Select only those elements with a matching tag name. XPath: A node
-- test that is a QName is true if and only if the type of the node (see
-- [5 Data Model]) is the principal node type and has an expanded-name
-- equal to the expanded-name specified by the QName.
element :: Name -> Axis
-- | Select only text nodes, and directly give the Content values.
-- XPath: The node test text() is true for any text node.
--
-- Note that this is not strictly an Axis, but will work with most
-- combinators.
content :: Cursor -> [Text]
-- | Select attributes on the current element (or nothing if it is not an
-- element). XPath: the attribute axis contains the attributes of the
-- context node; the axis will be empty unless the context node is an
-- element
--
-- Note that this is not strictly an Axis, but will work with most
-- combinators.
--
-- The return list of the generalised axis contains as elements lists of
-- Content elements, each full list representing an attribute
-- value.
attribute :: Name -> Cursor -> [Text]
-- | Left-to-right Kleisli composition of monads.
(>=>) :: Monad m => (a -> m b) -> (b -> m c) -> a -> m c
instance Show Cursor
instance Boolean (Either a b)
instance Boolean (Maybe a)
instance Boolean [a]
instance Boolean Bool