-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | wrapper for expat, the fast XML parser -- -- Expat (http://expat.sourceforge.net/) is a stream-oriented XML -- parser written in C. -- -- This package provides a Haskell binding for Expat, with a choice of -- tree or SAX-style representation, and it includes an -- XML formatter. It is extensible to any string type, with -- String, ByteString and Text provided out of -- the box. -- -- The emphasis is on speed and simplicity. If you want more complete and -- powerful XML libraries, consider using HaXml or HXT -- instead. -- -- Examples and benchmarks: http://haskell.org/haskellwiki/Hexpat/ -- -- DARCS repository: http://code.haskell.org/hexpat/ @package hexpat @version 0.8 -- | Low-level interface to Expat. Unless speed is paramount, this should -- normally be avoided in favour of the interface provided by -- Text-XML-Expat-Tree. Basic usage is: -- --
    --
  1. Make a new parser: newParser.
  2. --
  3. Set up callbacks on the parser: setStartElementHandler, -- etc.
  4. --
  5. Feed data into the parser: parse, parse' or -- parseChunk.
  6. --
module Text.XML.Expat.IO data Parser -- | Create a Parser. The encoding parameter, if provided, overrides -- the document's encoding declaration. newParser :: Maybe Encoding -> IO Parser -- | parse data feeds lazy ByteString data into a -- Parser. It returns Nothing on success, or Just the parse error. parse :: Parser -> ByteString -> IO (Maybe XMLParseError) -- | parse data feeds strict ByteString data into a -- Parser. It returns Nothing on success, or Just the parse error. parse' :: Parser -> ByteString -> IO (Maybe XMLParseError) -- | parseChunk data False feeds strict ByteString data -- into a Parser. The end of the data is indicated by passing -- True for the final parameter. It returns Nothing on success, -- or Just the parse error. parseChunk :: Parser -> ByteString -> Bool -> IO (Maybe XMLParseError) -- | Encoding types available for the document encoding. data Encoding ASCII :: Encoding UTF8 :: Encoding UTF16 :: Encoding ISO88591 :: Encoding -- | Parse error, consisting of message text and error location data XMLParseError XMLParseError :: String -> XMLParseLocation -> XMLParseError -- | Specifies a location of an event within the input text data XMLParseLocation XMLParseLocation :: Int64 -> Int64 -> Int64 -> Int64 -> XMLParseLocation -- | Line number of the event xmlLineNumber :: XMLParseLocation -> Int64 -- | Column number of the event xmlColumnNumber :: XMLParseLocation -> Int64 -- | Byte index of event from start of document xmlByteIndex :: XMLParseLocation -> Int64 -- | The number of bytes in the event xmlByteCount :: XMLParseLocation -> Int64 -- | The type of the "element started" callback. The first parameter is the -- element name; the second are the (attribute, value) pairs. Return True -- to continue parsing as normal, or False to terminate the parse. type StartElementHandler = CString -> [(CString, CString)] -> IO Bool -- | The type of the "element ended" callback. The parameter is the element -- name. Return True to continue parsing as normal, or False to terminate -- the parse. type EndElementHandler = CString -> IO Bool -- | The type of the "character data" callback. The parameter is the -- character data processed. This callback may be called more than once -- while processing a single conceptual block of text. Return True to -- continue parsing as normal, or False to terminate the parse. type CharacterDataHandler = CStringLen -> IO Bool -- | Attach a StartElementHandler to a Parser. setStartElementHandler :: Parser -> StartElementHandler -> IO () -- | Attach an EndElementHandler to a Parser. setEndElementHandler :: Parser -> EndElementHandler -> IO () -- | Attach an CharacterDataHandler to a Parser. setCharacterDataHandler :: Parser -> CharacterDataHandler -> IO () -- | This variant of parseChunk must either be called inside -- withHandlers (safest), or between unsafeSetHandlers and -- unsafeReleaseHandlers, and this will give you better -- performance than parseChunk if you process multiple chunks -- inside. unsafeParseChunk :: Parser -> ByteString -> Bool -> IO (Maybe XMLParseError) -- | unsafeParseChunk is required to be called inside -- withHandlers. Safer than using unsafeSetHandlers / -- unsafeReleaseHandlers. withHandlers :: Parser -> IO a -> IO a unsafeSetHandlers :: Parser -> IO ExpatHandlers unsafeReleaseHandlers :: ExpatHandlers -> IO () data ExpatHandlers encodingToString :: Encoding -> String instance Eq XMLParseLocation instance Show XMLParseLocation instance Eq XMLParseError instance Show XMLParseError instance NFData XMLParseLocation instance NFData XMLParseError instance Show Parser -- | This module provides functions to parse an XML document to a tree -- structure, either strictly or lazily, as well as a lazy SAX-style -- interface. -- -- The GenericXMLString type class allows you to use any string type. -- Three string types are provided for here: String, -- ByteString and Text. -- -- Here is a complete example to get you started: -- --
--   -- | A "hello world" example of hexpat that lazily parses a document, printing
--   -- it to standard out.
--   
--   import Text.XML.Expat.Tree
--   import Text.XML.Expat.Format
--   import System.Environment
--   import System.Exit
--   import System.IO
--   import qualified Data.ByteString.Lazy as L
--   
--   main = do
--       args <- getArgs
--       case args of
--           [filename] -> process filename
--           otherwise  -> do
--               hPutStrLn stderr "Usage: helloworld <file.xml>"
--               exitWith $ ExitFailure 1
--   
--   process :: String -> IO ()
--   process filename = do
--       inputText <- L.readFile filename
--       -- Note: Because we're not using the tree, Haskell can't infer the type of
--       -- strings we're using so we need to tell it explicitly with a type signature.
--       let (xml, mErr) = parseTree Nothing inputText :: (UNode String, Maybe XMLParseError)
--       -- Process document before handling error, so we get lazy processing.
--       L.hPutStr stdout $ formatTree xml
--       putStrLn ""
--       case mErr of
--           Nothing -> return ()
--           Just err -> do
--               hPutStrLn stderr $ "XML parse failed: "++show err
--               exitWith $ ExitFailure 2
--   
-- -- Error handling in strict parses is very straight forward - just check -- the Either return value. Lazy parses are not so simple. Here -- are two working examples that illustrate the ways to handle errors. -- Here they are: -- -- Way no. 1 - Using a Maybe value -- --
--   import Text.XML.Expat.Tree
--   import qualified Data.ByteString.Lazy as L
--   import Data.ByteString.Internal (c2w)
--   
--   -- This is the recommended way to handle errors in lazy parses
--   main = do
--       let (tree, mError) = parseTree Nothing (L.pack $ map c2w $ "<top><banana></apple></top>")
--       print (tree :: UNode String)
--       -- Note: We check the error _after_ we have finished our processing on the tree.
--       case mError of
--           Just err -> putStrLn $ "It failed : "++show err
--           Nothing -> putStrLn "Success!"
--   
-- -- Way no. 2 - Using exceptions -- -- Unless exceptions fit in with the design of your program, this way is -- less preferred. -- --
--   ...
--   import Control.Exception.Extensible as E
--   
--   -- This is not the recommended way to handle errors.
--   main = do
--       do
--           let tree = parseTreeThrowing Nothing (L.pack $ map c2w $ "<top><banana></apple></top>")
--           print (tree :: UNode String)
--           -- Because of lazy evaluation, you should not process the tree outside the 'do' block,
--           -- or exceptions could be thrown that won't get caught.
--       `E.catch` (\exc ->
--           case E.fromException exc of
--               Just (XMLParseException err) -> putStrLn $ "It failed : "++show err
--               Nothing -> E.throwIO exc)
--   
module Text.XML.Expat.Tree -- | The tree representation of the XML document. data Node tag text Element :: !tag -> ![(tag, text)] -> [Node tag text] -> Node tag text eName :: Node tag text -> !tag eAttrs :: Node tag text -> ![(tag, text)] eChildren :: Node tag text -> [Node tag text] Text :: !text -> Node tag text -- | Type shortcut for nodes type Nodes tag text = [Node tag text] -- | Type shortcut for attributes type Attributes tag text = [(tag, text)] -- | Type shortcut for a single node with unqualified tag names where tag -- and text are the same string type. type UNode text = Node text text -- | Type shortcut for nodes with unqualified tag names where tag and text -- are the same string type. type UNodes text = Nodes text text -- | Type shortcut for attributes with unqualified names where tag and text -- are the same string type. type UAttributes text = Attributes text text -- | Extract all text content from inside a tag into a single string, -- including any text contained in children. extractText :: (Monoid text) => Node tag text -> text -- | Lazily parse XML to tree. Note that forcing the XMLParseError return -- value will force the entire parse. Therefore, to ensure lazy -- operation, don't check the error status until you have processed the -- tree. parseTree :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> (Node tag text, Maybe XMLParseError) -- | Strictly parse XML to tree. Returns error message or valid parsed -- tree. parseTree' :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> Either XMLParseError (Node tag text) -- | Encoding types available for the document encoding. data Encoding ASCII :: Encoding UTF8 :: Encoding UTF16 :: Encoding ISO88591 :: Encoding -- | Parse error, consisting of message text and error location data XMLParseError XMLParseError :: String -> XMLParseLocation -> XMLParseError -- | Specifies a location of an event within the input text data XMLParseLocation XMLParseLocation :: Int64 -> Int64 -> Int64 -> Int64 -> XMLParseLocation -- | Line number of the event xmlLineNumber :: XMLParseLocation -> Int64 -- | Column number of the event xmlColumnNumber :: XMLParseLocation -> Int64 -- | Byte index of event from start of document xmlByteIndex :: XMLParseLocation -> Int64 -- | The number of bytes in the event xmlByteCount :: XMLParseLocation -> Int64 -- | Lazily parse XML to SAX events. In the event of an error, FailDocument -- is the last element of the output list. parseSAX :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [SAXEvent tag text] data SAXEvent tag text StartElement :: tag -> [(tag, text)] -> SAXEvent tag text EndElement :: tag -> SAXEvent tag text CharacterData :: text -> SAXEvent tag text FailDocument :: XMLParseError -> SAXEvent tag text -- | A lower level function that lazily converts a SAX stream into a tree -- structure. saxToTree :: (GenericXMLString tag) => [SAXEvent tag text] -> (Node tag text, Maybe XMLParseError) -- | A variant of parseSAX that gives a document location with each SAX -- event. parseSAXLocations :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [(SAXEvent tag text, XMLParseLocation)] -- | An exception indicating an XML parse error, used by the -- ..Throwing variants. data XMLParseException XMLParseException :: XMLParseError -> XMLParseException -- | Lazily parse XML to SAX events. In the event of an error, throw -- XMLParseException. parseSAXThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [SAXEvent tag text] -- | Lazily parse XML to tree. In the event of an error, throw -- XMLParseException. parseTreeThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> Node tag text -- | An abstraction for any string type you want to use as xml text (that -- is, attribute values or element text content). If you want to use a -- new string type with hexpat, you must make it an instance of -- GenericXMLString. class (Monoid s, Eq s) => GenericXMLString s gxNullString :: (GenericXMLString s) => s -> Bool gxToString :: (GenericXMLString s) => s -> String gxFromString :: (GenericXMLString s) => String -> s gxFromChar :: (GenericXMLString s) => Char -> s gxHead :: (GenericXMLString s) => s -> Char gxTail :: (GenericXMLString s) => s -> s gxBreakOn :: (GenericXMLString s) => Char -> s -> (s, s) gxFromCStringLen :: (GenericXMLString s) => CStringLen -> IO s gxToByteString :: (GenericXMLString s) => s -> ByteString instance Typeable XMLParseException instance Eq XMLParseException instance Show XMLParseException instance (Eq tag, Eq text) => Eq (SAXEvent tag text) instance (Show tag, Show text) => Show (SAXEvent tag text) instance (Eq tag, Eq text) => Eq (Node tag text) instance (Show tag, Show text) => Show (Node tag text) instance Exception XMLParseException instance (NFData tag, NFData text) => NFData (SAXEvent tag text) instance (NFData tag, NFData text) => NFData (Node tag text) instance GenericXMLString Text instance GenericXMLString ByteString instance GenericXMLString String -- | This module provides functions to format a tree structure as UTF-8 -- encoded XML. module Text.XML.Expat.Format -- | Format document with <?xml.. header - lazy variant that returns -- lazy ByteString. formatTree :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> ByteString -- | Format document with <?xml.. header - strict variant that returns -- strict ByteString. formatTree' :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> ByteString -- | Format XML node with no header - lazy variant that returns lazy -- ByteString. formatNode :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> ByteString -- | Format XML node with no header - strict variant that returns strict -- ByteString. formatNode' :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> ByteString -- | Put interface for formatting a tree with <?xml.. header. putTree :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> Put -- | Put interface for formatting a node with no header. putNode :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> Put -- | In the default representation, qualified tag and attribute names such -- as <abc:hello> are represented just as a string containing a -- colon, e.g. "abc:hello". -- -- This module provides functionality to handle these more intelligently, -- splitting all tag and attribute names into their Prefix and LocalPart -- components. module Text.XML.Expat.Qualified -- | A qualified name. -- -- Qualified names have two parts, a prefix and a local part. The local -- part is the name of the tag. The prefix scopes that name to a -- particular group of legal tags. -- -- The prefix will usually be associated with a namespace URI. This is -- usually achieved by using xmlns attributes to bind prefixes to URIs. data QName text QName :: Maybe text -> !text -> QName text qnPrefix :: QName text -> Maybe text qnLocalPart :: QName text -> !text -- | Type shortcut for a single node where qualified names are used for -- tags type QNode text = Node (QName text) text -- | Type shortcut for nodes where qualified names are used for tags type QNodes text = Nodes (QName text) text -- | Type shortcut for attributes with qualified names type QAttributes text = Attributes (QName text) text -- | Make a new QName from a prefix and localPart. mkQName :: text -> text -> QName text -- | Make a new QName with no prefix. mkAnQName :: text -> QName text toQualified :: (GenericXMLString text) => UNode text -> QNode text fromQualified :: (GenericXMLString text) => QNode text -> UNode text instance (Eq text) => Eq (QName text) instance (Show text) => Show (QName text) instance (NFData text) => NFData (QName text) module Text.XML.Expat.Namespaced -- | A namespace-qualified tag. -- -- NName has two components, a local part and an optional namespace. The -- local part is the name of the tag. The namespace is the URI -- identifying collections of declared tags. Tags with the same local -- part but from different namespaces are distinct. Unqualified tags are -- those with no namespace. They are in the default namespace, and all -- uses of an unqualified tag are equivalent. data NName text NName :: Maybe text -> !text -> NName text nnNamespace :: NName text -> Maybe text nnLocalPart :: NName text -> !text -- | Type shortcut for a single node where namespaced names are used for -- tags type NNode text = Node (NName text) text -- | Type shortcut for nodes where namespaced names are used for tags type NNodes text = Nodes (NName text) text -- | Type shortcut for attributes with namespaced names type NAttributes text = Attributes (NName text) text -- | Make a new NName from a prefix and localPart. mkNName :: text -> text -> NName text -- | Make a new NName with no prefix. mkAnNName :: text -> NName text toNamespaced :: (GenericXMLString text, Ord text, Show text) => QNode text -> NNode text fromNamespaced :: (GenericXMLString text, Ord text) => NNode text -> QNode text xmlnsUri :: (GenericXMLString text) => text xmlns :: (GenericXMLString text) => text instance (Eq text) => Eq (NName text) instance (Show text) => Show (NName text) instance (NFData text) => NFData (NName text)