-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | wrapper for expat, the fast XML parser -- -- Expat (http://expat.sourceforge.net/) is a stream-oriented XML -- parser written in C. -- -- This package provides a Haskell binding for Expat, with a choice of -- tree or SAX-style representation, and it includes an -- XML formatter. It is extensible to any string type, with -- String, ByteString and Text provided out of -- the box. -- -- The emphasis is on speed and simplicity. If you want more complete and -- powerful XML libraries, consider using HaXml or HXT -- instead. -- -- Examples and benchmarks: http://haskell.org/haskellwiki/Hexpat/ -- -- DARCS repository: http://code.haskell.org/hexpat/ @package hexpat @version 0.10 -- | Low-level interface to Expat. Unless speed is paramount, this should -- normally be avoided in favour of the interface provided by -- Text-XML-Expat-Tree. Basic usage is: -- --
-- -- | A "hello world" example of hexpat that lazily parses a document, printing -- -- it to standard out. -- -- import Text.XML.Expat.Tree -- import Text.XML.Expat.Format -- import System.Environment -- import System.Exit -- import System.IO -- import qualified Data.ByteString.Lazy as L -- -- main = do -- args <- getArgs -- case args of -- [filename] -> process filename -- otherwise -> do -- hPutStrLn stderr "Usage: helloworld <file.xml>" -- exitWith $ ExitFailure 1 -- -- process :: String -> IO () -- process filename = do -- inputText <- L.readFile filename -- -- Note: Because we're not using the tree, Haskell can't infer the type of -- -- strings we're using so we need to tell it explicitly with a type signature. -- let (xml, mErr) = parseTree Nothing inputText :: (UNode String, Maybe XMLParseError) -- -- Process document before handling error, so we get lazy processing. -- L.hPutStr stdout $ formatTree xml -- putStrLn "" -- case mErr of -- Nothing -> return () -- Just err -> do -- hPutStrLn stderr $ "XML parse failed: "++show err -- exitWith $ ExitFailure 2 ---- -- Error handling in strict parses is very straight forward - just check -- the Either return value. Lazy parses are not so simple. Here -- are two working examples that illustrate the ways to handle errors. -- Here they are: -- -- Way no. 1 - Using a Maybe value -- --
-- import Text.XML.Expat.Tree -- import qualified Data.ByteString.Lazy as L -- import Data.ByteString.Internal (c2w) -- -- -- This is the recommended way to handle errors in lazy parses -- main = do -- let (tree, mError) = parseTree Nothing (L.pack $ map c2w $ "<top><banana></apple></top>") -- print (tree :: UNode String) -- -- Note: We check the error _after_ we have finished our processing on the tree. -- case mError of -- Just err -> putStrLn $ "It failed : "++show err -- Nothing -> putStrLn "Success!" ---- -- Way no. 2 - Using exceptions -- -- Unless exceptions fit in with the design of your program, this way is -- less preferred. -- --
-- ... -- import Control.Exception.Extensible as E -- -- -- This is not the recommended way to handle errors. -- main = do -- do -- let tree = parseTreeThrowing Nothing (L.pack $ map c2w $ "<top><banana></apple></top>") -- print (tree :: UNode String) -- -- Because of lazy evaluation, you should not process the tree outside the 'do' block, -- -- or exceptions could be thrown that won't get caught. -- `E.catch` (\exc -> -- case E.fromException exc of -- Just (XMLParseException err) -> putStrLn $ "It failed : "++show err -- Nothing -> E.throwIO exc) --module Text.XML.Expat.Tree -- | The tree representation of the XML document. data Node tag text Element :: !tag -> ![(tag, text)] -> [Node tag text] -> Node tag text eName :: Node tag text -> !tag eAttrs :: Node tag text -> ![(tag, text)] eChildren :: Node tag text -> [Node tag text] Text :: !text -> Node tag text -- | Type shortcut for nodes type Nodes tag text = [Node tag text] -- | Type shortcut for attributes type Attributes tag text = [(tag, text)] -- | Type shortcut for a single node with unqualified tag names where tag -- and text are the same string type. type UNode text = Node text text -- | Type shortcut for nodes with unqualified tag names where tag and text -- are the same string type. type UNodes text = Nodes text text -- | Type shortcut for attributes with unqualified names where tag and text -- are the same string type. type UAttributes text = Attributes text text -- | Extract all text content from inside a tag into a single string, -- including any text contained in children. textContent :: (Monoid text) => Node tag text -> text -- | Deprecated - renamed to textContent. extractText :: (Monoid text) => Node tag text -> text -- | Lazily parse XML to tree. Note that forcing the XMLParseError return -- value will force the entire parse. Therefore, to ensure lazy -- operation, don't check the error status until you have processed the -- tree. parseTree :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> (Node tag text, Maybe XMLParseError) -- | Strictly parse XML to tree. Returns error message or valid parsed -- tree. parseTree' :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> Either XMLParseError (Node tag text) -- | Encoding types available for the document encoding. data Encoding ASCII :: Encoding UTF8 :: Encoding UTF16 :: Encoding ISO88591 :: Encoding -- | Parse error, consisting of message text and error location data XMLParseError XMLParseError :: String -> XMLParseLocation -> XMLParseError -- | Specifies a location of an event within the input text data XMLParseLocation XMLParseLocation :: Int64 -> Int64 -> Int64 -> Int64 -> XMLParseLocation -- | Line number of the event xmlLineNumber :: XMLParseLocation -> Int64 -- | Column number of the event xmlColumnNumber :: XMLParseLocation -> Int64 -- | Byte index of event from start of document xmlByteIndex :: XMLParseLocation -> Int64 -- | The number of bytes in the event xmlByteCount :: XMLParseLocation -> Int64 -- | Lazily parse XML to SAX events. In the event of an error, FailDocument -- is the last element of the output list. parseSAX :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [SAXEvent tag text] data SAXEvent tag text StartElement :: tag -> [(tag, text)] -> SAXEvent tag text EndElement :: tag -> SAXEvent tag text CharacterData :: text -> SAXEvent tag text FailDocument :: XMLParseError -> SAXEvent tag text -- | A lower level function that lazily converts a SAX stream into a tree -- structure. saxToTree :: (GenericXMLString tag) => [SAXEvent tag text] -> (Node tag text, Maybe XMLParseError) -- | A variant of parseSAX that gives a document location with each SAX -- event. parseSAXLocations :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [(SAXEvent tag text, XMLParseLocation)] -- | An exception indicating an XML parse error, used by the -- ..Throwing variants. data XMLParseException XMLParseException :: XMLParseError -> XMLParseException -- | Lazily parse XML to tree. In the event of an error, throw -- XMLParseException. parseTreeThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> Node tag text -- | Lazily parse XML to SAX events. In the event of an error, throw -- XMLParseException. parseSAXThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [SAXEvent tag text] -- | A variant of parseSAX that gives a document location with each SAX -- event. In the event of an error, throw XMLParseException. parseSAXLocationsThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [(SAXEvent tag text, XMLParseLocation)] -- | An abstraction for any string type you want to use as xml text (that -- is, attribute values or element text content). If you want to use a -- new string type with hexpat, you must make it an instance of -- GenericXMLString. class (Monoid s, Eq s) => GenericXMLString s gxNullString :: (GenericXMLString s) => s -> Bool gxToString :: (GenericXMLString s) => s -> String gxFromString :: (GenericXMLString s) => String -> s gxFromChar :: (GenericXMLString s) => Char -> s gxHead :: (GenericXMLString s) => s -> Char gxTail :: (GenericXMLString s) => s -> s gxBreakOn :: (GenericXMLString s) => Char -> s -> (s, s) gxFromCStringLen :: (GenericXMLString s) => CStringLen -> IO s gxToByteString :: (GenericXMLString s) => s -> ByteString instance Typeable XMLParseException instance Eq XMLParseException instance Show XMLParseException instance (Eq tag, Eq text) => Eq (SAXEvent tag text) instance (Show tag, Show text) => Show (SAXEvent tag text) instance (Eq tag, Eq text) => Eq (Node tag text) instance (Show tag, Show text) => Show (Node tag text) instance Exception XMLParseException instance (NFData tag, NFData text) => NFData (SAXEvent tag text) instance (NFData tag, NFData text) => NFData (Node tag text) instance GenericXMLString Text instance GenericXMLString ByteString instance GenericXMLString String -- | This module provides functions to format a tree structure or SAX -- stream as UTF-8 encoded XML. module Text.XML.Expat.Format -- | Format document with <?xml.. header - lazy variant that returns -- lazy ByteString. formatTree :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> ByteString -- | Format document with <?xml.. header - strict variant that returns -- strict ByteString. formatTree' :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> ByteString -- | Format XML node with no header - lazy variant that returns lazy -- ByteString. formatNode :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> ByteString -- | Format XML node with no header - strict variant that returns strict -- ByteString. formatNode' :: (GenericXMLString tag, GenericXMLString text) => Node tag text -> ByteString xmlHeader :: ByteString -- | Flatten a tree structure into SAX events. treeToSAX :: Node tag text -> [SAXEvent tag text] -- | Format SAX events with no header - lazy variant that returns lazy -- ByteString. formatSAX :: (GenericXMLString tag, GenericXMLString text) => [SAXEvent tag text] -> ByteString -- | Format SAX events with no header - strict variant that returns strict -- ByteString. formatSAX' :: (GenericXMLString tag, GenericXMLString text) => [SAXEvent tag text] -> ByteString -- | In the default representation, qualified tag and attribute names such -- as <abc:hello> are represented just as a string containing a -- colon, e.g. "abc:hello". -- -- This module provides functionality to handle these more intelligently, -- splitting all tag and attribute names into their Prefix and LocalPart -- components. module Text.XML.Expat.Qualified -- | A qualified name. -- -- Qualified names have two parts, a prefix and a local part. The local -- part is the name of the tag. The prefix scopes that name to a -- particular group of legal tags. -- -- The prefix will usually be associated with a namespace URI. This is -- usually achieved by using xmlns attributes to bind prefixes to URIs. data QName text QName :: Maybe text -> !text -> QName text qnPrefix :: QName text -> Maybe text qnLocalPart :: QName text -> !text -- | Type shortcut for a single node where qualified names are used for -- tags type QNode text = Node (QName text) text -- | Type shortcut for nodes where qualified names are used for tags type QNodes text = Nodes (QName text) text -- | Type shortcut for attributes with qualified names type QAttributes text = Attributes (QName text) text -- | Make a new QName from a prefix and localPart. mkQName :: text -> text -> QName text -- | Make a new QName with no prefix. mkAnQName :: text -> QName text toQualified :: (GenericXMLString text) => UNode text -> QNode text fromQualified :: (GenericXMLString text) => QNode text -> UNode text instance (Eq text) => Eq (QName text) instance (Show text) => Show (QName text) instance (NFData text) => NFData (QName text) module Text.XML.Expat.Namespaced -- | A namespace-qualified tag. -- -- NName has two components, a local part and an optional namespace. The -- local part is the name of the tag. The namespace is the URI -- identifying collections of declared tags. Tags with the same local -- part but from different namespaces are distinct. Unqualified tags are -- those with no namespace. They are in the default namespace, and all -- uses of an unqualified tag are equivalent. data NName text NName :: Maybe text -> !text -> NName text nnNamespace :: NName text -> Maybe text nnLocalPart :: NName text -> !text -- | Type shortcut for a single node where namespaced names are used for -- tags type NNode text = Node (NName text) text -- | Type shortcut for nodes where namespaced names are used for tags type NNodes text = Nodes (NName text) text -- | Type shortcut for attributes with namespaced names type NAttributes text = Attributes (NName text) text -- | Make a new NName from a prefix and localPart. mkNName :: text -> text -> NName text -- | Make a new NName with no prefix. mkAnNName :: text -> NName text toNamespaced :: (GenericXMLString text, Ord text, Show text) => QNode text -> NNode text fromNamespaced :: (GenericXMLString text, Ord text) => NNode text -> QNode text xmlnsUri :: (GenericXMLString text) => text xmlns :: (GenericXMLString text) => text instance (Eq text) => Eq (NName text) instance (Show text) => Show (NName text) instance (NFData text) => NFData (NName text) -- | A variant of Node in which Element nodes have an annotation of -- any type, and some concrete functions that annotate with the XML parse -- location. It is assumed you will usually want Tree or -- Annotated, not both, so many of the names conflict. -- -- Support for qualified and namespaced trees annotated with location -- information is not complete. module Text.XML.Expat.Annotated -- | Annotated variant of the tree representation of the XML document. data Node tag text a Element :: !tag -> ![(tag, text)] -> [Node tag text a] -> a -> Node tag text a eName :: Node tag text a -> !tag eAttrs :: Node tag text a -> ![(tag, text)] eChildren :: Node tag text a -> [Node tag text a] eAnn :: Node tag text a -> a Text :: !text -> Node tag text a -- | Type shortcut for attributes type Attributes tag text = [(tag, text)] -- | Type shortcut for annotated nodes type Nodes tag text a = [Node tag text a] -- | Type shortcut for a single annotated node with unqualified tag names -- where tag and text are the same string type type UNode text a = Node text text a -- | Type shortcut for annotated nodes with unqualified tag names where tag -- and text are the same string type type UNodes text a = Nodes text text a -- | Type shortcut for attributes with unqualified names where tag and text -- are the same string type. type UAttributes text = Attributes text text -- | Type shortcut for a single annotated node, annotated with parse -- location type LNode tag text = Node tag text XMLParseLocation -- | Type shortcut for annotated nodes with location information. type LNodes tag text = [Node tag text XMLParseLocation] -- | Type shortcut for a single node with unqualified tag names where tag -- and text are the same string type, annotated with parse location type ULNode text = LNode text text -- | Type shortcut for nodes with unqualified tag names where tag and text -- are the same string type, annotated with parse location type ULNodes text = LNodes text text -- | Extract all text content from inside a tag into a single string, -- including any text contained in children. textContent :: (Monoid text) => Node tag text a -> text unannotate :: Node tag text a -> Node tag text -- | A qualified name. -- -- Qualified names have two parts, a prefix and a local part. The local -- part is the name of the tag. The prefix scopes that name to a -- particular group of legal tags. -- -- The prefix will usually be associated with a namespace URI. This is -- usually achieved by using xmlns attributes to bind prefixes to URIs. data QName text QName :: Maybe text -> !text -> QName text qnPrefix :: QName text -> Maybe text qnLocalPart :: QName text -> !text -- | Type shortcut for a single annotated node where qualified names are -- used for tags type QNode text a = Node (QName text) text a -- | Type shortcut for annotated nodes where qualified names are used for -- tags type QNodes text a = Nodes (QName text) text a -- | Type shortcut for attributes with qualified names type QAttributes text = Attributes (QName text) text -- | Type shortcut for a single node where qualified names are used for -- tags, annotated with parse location type QLNode text = LNode (QName text) text -- | Type shortcut for nodes where qualified names are used for tags, -- annotated with parse location type QLNodes text = LNodes (QName text) text -- | A namespace-qualified tag. -- -- NName has two components, a local part and an optional namespace. The -- local part is the name of the tag. The namespace is the URI -- identifying collections of declared tags. Tags with the same local -- part but from different namespaces are distinct. Unqualified tags are -- those with no namespace. They are in the default namespace, and all -- uses of an unqualified tag are equivalent. data NName text NName :: Maybe text -> !text -> NName text nnNamespace :: NName text -> Maybe text nnLocalPart :: NName text -> !text -- | Type shortcut for a single annotated node where namespaced names are -- used for tags type NNode text a = Node (NName text) text a -- | Type shortcut for annotated nodes where namespaced names are used for -- tags type NNodes text a = Nodes (NName text) text a -- | Type shortcut for attributes with namespaced names type NAttributes text = Attributes (NName text) text -- | Type shortcut for a single node where namespaced names are used for -- tags, annotated with parse location type NLNode text = LNode (NName text) text -- | Type shortcut for nodes where namespaced names are used for tags, -- annotated with parse location type NLNodes text = LNodes (NName text) text -- | Make a new NName from a prefix and localPart. mkNName :: text -> text -> NName text -- | Make a new NName with no prefix. mkAnNName :: text -> NName text xmlnsUri :: (GenericXMLString text) => text xmlns :: (GenericXMLString text) => text -- | Lazily parse XML to tree. Note that forcing the XMLParseError return -- value will force the entire parse. Therefore, to ensure lazy -- operation, don't check the error status until you have processed the -- tree. parseTree :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> (LNode tag text, Maybe XMLParseError) -- | Strictly parse XML to tree. Returns error message or valid parsed -- tree. parseTree' :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> Either XMLParseError (LNode tag text) -- | Encoding types available for the document encoding. data Encoding ASCII :: Encoding UTF8 :: Encoding UTF16 :: Encoding ISO88591 :: Encoding -- | Parse error, consisting of message text and error location data XMLParseError XMLParseError :: String -> XMLParseLocation -> XMLParseError -- | Specifies a location of an event within the input text data XMLParseLocation XMLParseLocation :: Int64 -> Int64 -> Int64 -> Int64 -> XMLParseLocation -- | Line number of the event xmlLineNumber :: XMLParseLocation -> Int64 -- | Column number of the event xmlColumnNumber :: XMLParseLocation -> Int64 -- | Byte index of event from start of document xmlByteIndex :: XMLParseLocation -> Int64 -- | The number of bytes in the event xmlByteCount :: XMLParseLocation -> Int64 -- | Lazily parse XML to SAX events. In the event of an error, FailDocument -- is the last element of the output list. parseSAX :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [SAXEvent tag text] data SAXEvent tag text StartElement :: tag -> [(tag, text)] -> SAXEvent tag text EndElement :: tag -> SAXEvent tag text CharacterData :: text -> SAXEvent tag text FailDocument :: XMLParseError -> SAXEvent tag text -- | A lower level function that lazily converts a SAX stream into a tree -- structure. Variant that takes annotations for start tags. saxToTree :: (GenericXMLString tag) => [(SAXEvent tag text, a)] -> (Node tag text a, Maybe XMLParseError) -- | A variant of parseSAX that gives a document location with each SAX -- event. parseSAXLocations :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [(SAXEvent tag text, XMLParseLocation)] -- | An exception indicating an XML parse error, used by the -- ..Throwing variants. data XMLParseException XMLParseException :: XMLParseError -> XMLParseException -- | Lazily parse XML to tree. In the event of an error, throw -- XMLParseException. parseTreeThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> LNode tag text -- | Lazily parse XML to SAX events. In the event of an error, throw -- XMLParseException. parseSAXThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [SAXEvent tag text] -- | A variant of parseSAX that gives a document location with each SAX -- event. In the event of an error, throw XMLParseException. parseSAXLocationsThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -> ByteString -> [(SAXEvent tag text, XMLParseLocation)] -- | An abstraction for any string type you want to use as xml text (that -- is, attribute values or element text content). If you want to use a -- new string type with hexpat, you must make it an instance of -- GenericXMLString. class (Monoid s, Eq s) => GenericXMLString s gxNullString :: (GenericXMLString s) => s -> Bool gxToString :: (GenericXMLString s) => s -> String gxFromString :: (GenericXMLString s) => String -> s gxFromChar :: (GenericXMLString s) => Char -> s gxHead :: (GenericXMLString s) => s -> Char gxTail :: (GenericXMLString s) => s -> s gxBreakOn :: (GenericXMLString s) => Char -> s -> (s, s) gxFromCStringLen :: (GenericXMLString s) => CStringLen -> IO s gxToByteString :: (GenericXMLString s) => s -> ByteString instance (Eq tag, Eq text, Eq a) => Eq (Node tag text a) instance (Show tag, Show text, Show a) => Show (Node tag text a) instance Functor (Node tag text)