-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Pure-Haskell utilities for dealing with XML with the enumerator package. (deprecated) -- -- This package is deprecated in favor of xml-conduit. -- -- This package provides parsing and rendering functions for XML. It is -- based on the datatypes found in the xml-types package. This package is -- broken up into the following modules: -- --
-- <?xml version="1.0" encoding="utf-8"?> -- <people> -- <person age="25">Michael</person> -- <person age="2">Eliezer</person> -- </people> ---- -- Then this code: -- --
-- {-# LANGUAGE OverloadedStrings #-}
-- import Text.XML.Stream.Parse
-- import Data.Text (Text, unpack)
--
-- data Person = Person { age :: Int, name :: Text }
-- deriving Show
--
-- parsePerson = tagName "person" (requireAttr "age") $ \age -> do
-- name <- content
-- return $ Person (read $ unpack age) name
--
-- parsePeople = tagNoAttr "people" $ many parsePerson
--
-- main = parseFile_ def "people.xml" $ force "people required" parsePeople
--
--
-- will produce:
--
--
-- [Person {age = 25, name = "Michael"},Person {age = 2, name = "Eliezer"}]
--
--
-- Previous versions of this module contained a number of more
-- sophisticated functions written by Aristid Breitkreuz and Dmitry
-- Olshansky. To keep this package simpler, those functions are being
-- moved to a separate package. This note will be updated with the name
-- of the package(s) when available.
module Text.XML.Stream.Parse
-- | Parses a byte stream into Events. This function is implemented
-- fully in Haskell using attoparsec-text for parsing. The produced error
-- messages do not give line/column information, so you may prefer to
-- stick with the parser provided by libxml-enumerator. However, this has
-- the advantage of not relying on any C libraries.
--
-- This relies on detectUtf to determine character encoding, and
-- parseText to do the actual parsing.
parseBytes :: Monad m => ParseSettings -> Enumeratee ByteString Event m a
-- | Parses a character stream into Events. This function is
-- implemented fully in Haskell using attoparsec-text for parsing. The
-- produced error messages do not give line/column information, so you
-- may prefer to stick with the parser provided by libxml-enumerator.
-- However, this has the advantage of not relying on any C libraries.
parseText :: Monad m => ParseSettings -> Enumeratee Text Event m a
-- | Automatically determine which UTF variant is being used. This function
-- first checks for BOMs, removing them as necessary, and then check for
-- the equivalent of <?xml for each of UTF-8, UTF-16LEBE, and
-- UTF-32LEBE. It defaults to assuming UTF-8.
detectUtf :: Monad m => Enumeratee ByteString Text m a
-- | A helper function which reads a file from disk using enumFile,
-- detects character encoding using detectUtf, parses the XML
-- using parseBytes, and then hands off control to your supplied
-- parser.
parseFile :: ParseSettings -> FilePath -> Iteratee Event IO a -> IO (Either SomeException a)
-- | The same as parseFile, but throws any exceptions.
parseFile_ :: ParseSettings -> FilePath -> Iteratee Event IO a -> IO a
-- | Parse an event stream from a lazy ByteString.
parseLBS :: ParseSettings -> ByteString -> Iteratee Event IO a -> IO (Either SomeException a)
-- | Same as parseLBS, but throws exceptions.
parseLBS_ :: ParseSettings -> ByteString -> Iteratee Event IO a -> IO a
data ParseSettings
-- | The default value for this type.
def :: Default a => a
type DecodeEntities = Text -> Content
psDecodeEntities :: ParseSettings -> DecodeEntities
-- | The most generic way to parse a tag. It takes a predicate for checking
-- if this is the correct tag name, an AttrParser for handling
-- attributes, and then a parser for dealing with content.
--
-- This function automatically absorbs its balancing closing tag, and
-- will throw an exception if not all of the attributes or child elements
-- are consumed. If you want to allow extra attributes, see
-- ignoreAttrs.
--
-- This function automatically ignores comments, instructions and
-- whitespace.
tag :: Monad m => (Name -> Maybe a) -> (a -> AttrParser b) -> (b -> Iteratee Event m c) -> Iteratee Event m (Maybe c)
-- | A simplified version of tag which matches against boolean
-- predicates.
tagPredicate :: Monad m => (Name -> Bool) -> AttrParser a -> (a -> Iteratee Event m b) -> Iteratee Event m (Maybe b)
-- | A simplified version of tag which matches for specific tag
-- names instead of taking a predicate function. This is often
-- sufficient, and when combined with OverloadedStrings and the IsString
-- instance of Name, can prove to be very concise.
tagName :: Monad m => Name -> AttrParser a -> (a -> Iteratee Event m b) -> Iteratee Event m (Maybe b)
-- | A further simplified tag parser, which requires that no attributes
-- exist.
tagNoAttr :: Monad m => Name -> Iteratee Event m a -> Iteratee Event m (Maybe a)
-- | Grabs the next piece of content. If none if available, returns
-- empty. This is simply a wrapper around contentMaybe.
content :: Monad m => Iteratee Event m Text
-- | Grabs the next piece of content if available. This function skips over
-- any comments and instructions and concatenates all content until the
-- next start or end tag.
contentMaybe :: Monad m => Iteratee Event m (Maybe Text)
-- | A monad for parsing attributes. By default, it requires you to deal
-- with all attributes present on an element, and will throw an exception
-- if there are unhandled attributes. Use the requireAttr,
-- optionalAttr et al functions for handling an attribute, and
-- ignoreAttrs if you would like to skip the rest of the
-- attributes on an element.
--
-- Alternative instance behave like First monoid. It
-- chooses first parser which doesn't fail.
data AttrParser a
-- | Require that a certain attribute be present and return its value.
requireAttr :: Name -> AttrParser Text
-- | Return the value for an attribute if present.
optionalAttr :: Name -> AttrParser (Maybe Text)
requireAttrRaw :: String -> ((Name, [Content]) -> Maybe b) -> AttrParser b
optionalAttrRaw :: ((Name, [Content]) -> Maybe b) -> AttrParser (Maybe b)
-- | Skip the remaining attributes on an element. Since this will clear the
-- list of attributes, you must call this after any calls to
-- requireAttr, optionalAttr, etc.
ignoreAttrs :: AttrParser ()
-- | Get the value of the first parser which returns Just. If no
-- parsers succeed (i.e., return Just), this function returns
-- Nothing.
--
-- -- orE a b = choose [a, b] --orE :: Monad m => Iteratee Event m (Maybe a) -> Iteratee Event m (Maybe a) -> Iteratee Event m (Maybe a) -- | Get the value of the first parser which returns Just. If no -- parsers succeed (i.e., return Just), this function returns -- Nothing. choose :: Monad m => [Iteratee Event m (Maybe a)] -> Iteratee Event m (Maybe a) -- | Keep parsing elements as long as the parser returns Just. many :: Monad m => Iteratee Event m (Maybe a) -> Iteratee Event m [a] -- | Force an optional parser into a required parser. All of the tag -- functions, choose and many deal with Maybe -- parsers. Use this when you want to finally force something to happen. force :: Monad m => String -> Iteratee Event m (Maybe a) -> Iteratee Event m a data XmlException XmlException :: String -> Maybe Event -> XmlException xmlErrorMessage :: XmlException -> String xmlBadInput :: XmlException -> Maybe Event InvalidEndElement :: Name -> XmlException InvalidEntity :: Text -> XmlException UnparsedAttributes :: [(Name, [Content])] -> XmlException instance Typeable XmlException instance Show XmlException instance Alternative AttrParser instance Applicative AttrParser instance Functor AttrParser instance Monad AttrParser instance Exception XmlException instance Default ParseSettings -- | DOM-based XML parsing and rendering. -- -- In this module, attribute values and content nodes can contain either -- raw text or entities. In most cases, these can be fully resolved at -- parsing. If that is the case for your documents, the Text.XML -- module provides simplified datatypes that only contain raw text. module Text.XML.Unresolved writeFile :: RenderSettings -> FilePath -> Document -> IO () readFile :: ParseSettings -> FilePath -> IO (Either SomeException Document) readFile_ :: ParseSettings -> FilePath -> IO Document renderLBS :: RenderSettings -> Document -> ByteString parseLBS :: ParseSettings -> ByteString -> Either SomeException Document parseLBS_ :: ParseSettings -> ByteString -> Document parseEnum :: Monad m => ParseSettings -> Enumerator ByteString m Document -> m (Either SomeException Document) parseEnum_ :: Monad m => ParseSettings -> Enumerator ByteString m Document -> m Document toEvents :: Document -> [Event] fromEvents :: Monad m => Iteratee Event m Document renderBuilder :: MonadIO m => RenderSettings -> Document -> Enumerator Builder m a renderBytes :: MonadIO m => RenderSettings -> Document -> Enumerator ByteString m a renderText :: MonadIO m => RenderSettings -> Document -> Enumerator Text m a data InvalidEventStream InvalidEventStream :: String -> InvalidEventStream lazyConsume :: Enumerator a IO () -> IO [a] -- | The default value for this type. def :: Default a => a data ParseSettings psDecodeEntities :: ParseSettings -> DecodeEntities data RenderSettings rsPretty :: RenderSettings -> Bool instance Typeable InvalidEventStream instance Show InvalidEventStream instance Exception InvalidEventStream -- | DOM-based parsing and rendering. -- -- This module requires that all entities be resolved at parsing. If you -- need to interact with unresolved entities, please use -- Text.XML.Unresolved. This is the recommended module for most -- uses cases. -- -- While many of the datatypes in this module are simply re-exported from -- Data.XML.Types, Document, Node and -- Element are all redefined here to disallow the possibility of -- unresolved entities. Conversion functions are provided to switch -- between the two sets of datatypes. -- -- For simpler, bidirectional traversal of the DOM tree, see the -- Text.XML.Cursor module. module Text.XML data Document Document :: Prologue -> Element -> [Miscellaneous] -> Document documentPrologue :: Document -> Prologue documentRoot :: Document -> Element documentEpilogue :: Document -> [Miscellaneous] data Prologue :: * Prologue :: [Miscellaneous] -> Maybe Doctype -> [Miscellaneous] -> Prologue prologueBefore :: Prologue -> [Miscellaneous] prologueDoctype :: Prologue -> Maybe Doctype prologueAfter :: Prologue -> [Miscellaneous] data Instruction :: * Instruction :: Text -> Text -> Instruction instructionTarget :: Instruction -> Text instructionData :: Instruction -> Text data Miscellaneous :: * MiscInstruction :: Instruction -> Miscellaneous MiscComment :: Text -> Miscellaneous data Node NodeElement :: Element -> Node NodeInstruction :: Instruction -> Node NodeContent :: Text -> Node NodeComment :: Text -> Node data Element Element :: Name -> [(Name, Text)] -> [Node] -> Element elementName :: Element -> Name elementAttributes :: Element -> [(Name, Text)] elementNodes :: Element -> [Node] -- | A fully qualified name. -- -- Prefixes are not semantically important; they are included only to -- simplify pass-through parsing. When comparing names with Eq or -- Ord methods, prefixes are ignored. -- -- The IsString instance supports Clark notation; see -- http://www.jclark.com/xml/xmlns.htm and -- http://infohost.nmt.edu/tcc/help/pubs/pylxml/etree-QName.html. -- Use the OverloadedStrings language extension for very simple -- Name construction: -- --
-- myname :: Name
-- myname = "{http://example.com/ns/my-namespace}my-name"
--
data Name :: *
Name :: Text -> Maybe Text -> Maybe Text -> Name
nameLocalName :: Name -> Text
nameNamespace :: Name -> Maybe Text
namePrefix :: Name -> Maybe Text
-- | Note: due to the incredible complexity of DTDs, this type only
-- supports external subsets. I've tried adding internal subset types,
-- but they quickly gain more code than the rest of this module put
-- together.
--
-- It is possible that some future version of this library might support
-- internal subsets, but I am no longer actively working on adding them.
data Doctype :: *
Doctype :: Text -> Maybe ExternalID -> Doctype
doctypeName :: Doctype -> Text
doctypeID :: Doctype -> Maybe ExternalID
data ExternalID :: *
SystemID :: Text -> ExternalID
PublicID :: Text -> Text -> ExternalID
readFile :: ParseSettings -> FilePath -> IO (Either SomeException Document)
readFile_ :: ParseSettings -> FilePath -> IO Document
parseLBS :: ParseSettings -> ByteString -> Either SomeException Document
parseLBS_ :: ParseSettings -> ByteString -> Document
parseEnum :: Monad m => ParseSettings -> Enumerator ByteString m Document -> m (Either SomeException Document)
parseEnum_ :: Monad m => ParseSettings -> Enumerator ByteString m Document -> m Document
parseText :: ParseSettings -> Text -> Either SomeException Document
parseText_ :: ParseSettings -> Text -> Document
parseTextEnum :: Monad m => ParseSettings -> Enumerator Text m Document -> m (Either SomeException Document)
parseTextEnum_ :: Monad m => ParseSettings -> Enumerator Text m Document -> m Document
fromEvents :: Monad m => Iteratee Event m Document
data UnresolvedEntityException
UnresolvedEntityException :: (Set Text) -> UnresolvedEntityException
writeFile :: RenderSettings -> FilePath -> Document -> IO ()
renderLBS :: RenderSettings -> Document -> ByteString
renderText :: RenderSettings -> Document -> Text
renderBytes :: MonadIO m => RenderSettings -> Document -> Enumerator ByteString m a
-- | The default value for this type.
def :: Default a => a
data ParseSettings
psDecodeEntities :: ParseSettings -> DecodeEntities
data RenderSettings
rsPretty :: RenderSettings -> Bool
toXMLDocument :: Document -> Document
fromXMLDocument :: Document -> Either (Set Text) Document
toXMLNode :: Node -> Node
fromXMLNode :: Node -> Either (Set Text) Node
toXMLElement :: Element -> Element
fromXMLElement :: Element -> Either (Set Text) Element
instance Typeable Element
instance Typeable Node
instance Typeable Document
instance Typeable UnresolvedEntityException
instance Show Element
instance Eq Element
instance Show Node
instance Eq Node
instance Show Document
instance Eq Document
instance Show UnresolvedEntityException
instance Exception UnresolvedEntityException
-- | This module provides for simple DOM traversal. It is inspired by
-- XPath. There are two central concepts here:
--
--