-- | A variant of /Node/ in which Element nodes have an annotation of any type, -- and some concrete functions that annotate with the XML parse location. -- It is assumed you will usually want /Tree/ or /Annotated/, not both, so many -- of the names conflict. -- -- Support for qualified and namespaced trees annotated with location information -- is not complete. module Text.XML.Expat.Annotated ( -- * Tree structure Node(..), Attributes, -- re-export from Tree Nodes, UNode, UNodes, UAttributes, LNode, LNodes, ULNode, ULNodes, textContent, unannotate, -- * Qualified nodes QName(..), QNode, QNodes, QAttributes, QLNode, QLNodes, -- * Namespaced nodes NName (..), NNode, NNodes, NAttributes, NLNode, NLNodes, mkNName, mkAnNName, xmlnsUri, xmlns, -- * Parse to tree parseTree, parseTree', Encoding(..), XMLParseError(..), XMLParseLocation(..), -- * SAX-style parse parseSAX, SAXEvent(..), saxToTree, parseSAXLocations, -- * Variants that throw exceptions XMLParseException(..), parseTreeThrowing, parseSAXThrowing, parseSAXLocationsThrowing, -- * Abstraction of string types GenericXMLString(..) ) where import Text.XML.Expat.Tree hiding (Node(..), Nodes, UNode, UNodes, saxToTree, parseTree, parseTree', parseTreeThrowing, textContent) import qualified Text.XML.Expat.Tree as Tree (Node(..)) import Text.XML.Expat.Qualified hiding (QNode, QNodes) import Text.XML.Expat.Namespaced hiding (NNode, NNodes) import Control.Monad (mplus) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import Data.Monoid -- | Annotated variant of the tree representation of the XML document. data Node tag text a = Element { eName :: !tag, eAttrs :: ![(tag,text)], eChildren :: [Node tag text a], eAnn :: a } | Text !text deriving (Eq, Show) unannotate :: Node tag text a -> Tree.Node tag text unannotate (Element na at ch _) = (Tree.Element na at (map unannotate ch)) unannotate (Text t) = Tree.Text t -- | Extract all text content from inside a tag into a single string, including -- any text contained in children. textContent :: Monoid text => Node tag text a -> text textContent (Element _ _ children _) = mconcat $ map textContent children textContent (Text txt) = txt -- | Type shortcut for annotated nodes type Nodes tag text a = [Node tag text a] -- | Type shortcut for annotated nodes with unqualified tag names where tag and -- text are the same string type type UNodes text a = Nodes text text a -- | Type shortcut for a single annotated node with unqualified tag names where -- tag and text are the same string type type UNode text a = Node text text a -- | Type shortcut for a single annotated node, annotated with parse location type LNode tag text = Node tag text XMLParseLocation -- | Type shortcut for annotated nodes with location information. type LNodes tag text = [Node tag text XMLParseLocation] -- | Type shortcut for a single node with unqualified tag names where -- tag and text are the same string type, annotated with parse location type ULNode text = LNode text text -- | Type shortcut for nodes with unqualified tag names where -- tag and text are the same string type, annotated with parse location type ULNodes text = LNodes text text -- | Type shortcut for annotated nodes where qualified names are used for tags type QNodes text a = Nodes (QName text) text a -- | Type shortcut for nodes where qualified names are used for tags, annotated with parse location type QLNodes text = LNodes (QName text) text -- | Type shortcut for a single annotated node where qualified names are used for tags type QNode text a = Node (QName text) text a -- | Type shortcut for a single node where qualified names are used for tags, annotated with parse location type QLNode text = LNode (QName text) text -- | Type shortcut for annotated nodes where namespaced names are used for tags type NNodes text a = Nodes (NName text) text a -- | Type shortcut for nodes where namespaced names are used for tags, annotated with parse location type NLNodes text = LNodes (NName text) text -- | Type shortcut for a single annotated node where namespaced names are used for tags type NNode text a = Node (NName text) text a -- | Type shortcut for a single node where namespaced names are used for tags, annotated with parse location type NLNode text = LNode (NName text) text instance Functor (Node tag text) where f `fmap` Element na at ch an = Element na at (map (f `fmap`) ch) (f an) f `fmap` Text t = Text t -- | A lower level function that lazily converts a SAX stream into a tree structure. -- Variant that takes annotations for start tags. saxToTree :: GenericXMLString tag => [(SAXEvent tag text, a)] -> (Node tag text a, Maybe XMLParseError) saxToTree events = let (nodes, mError, _) = ptl events in (safeHead nodes, mError) where safeHead (a:_) = a safeHead [] = Element (gxFromString "") [] [] (error "saxToTree null annotation") ptl ((StartElement name attrs, ann):rem) = let (children, err1, rem') = ptl rem elt = Element name attrs children ann (out, err2, rem'') = ptl rem' in (elt:out, err1 `mplus` err2, rem'') ptl ((EndElement name, _):rem) = ([], Nothing, rem) ptl ((CharacterData txt, _):rem) = let (out, err, rem') = ptl rem in (Text txt:out, err, rem') ptl ((FailDocument err, _):_) = ([], Just err, []) ptl [] = ([], Nothing, []) -- | Lazily parse XML to tree. Note that forcing the XMLParseError return value -- will force the entire parse. Therefore, to ensure lazy operation, don't -- check the error status until you have processed the tree. parseTree :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -- ^ Optional encoding override -> L.ByteString -- ^ Input text (a lazy ByteString) -> (LNode tag text, Maybe XMLParseError) parseTree mEnc bs = saxToTree $ parseSAXLocations mEnc bs -- | Lazily parse XML to tree. In the event of an error, throw 'XMLParseException'. parseTreeThrowing :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -- ^ Optional encoding override -> L.ByteString -- ^ Input text (a lazy ByteString) -> LNode tag text parseTreeThrowing mEnc bs = fst $ saxToTree $ parseSAXLocationsThrowing mEnc bs -- | Strictly parse XML to tree. Returns error message or valid parsed tree. parseTree' :: (GenericXMLString tag, GenericXMLString text) => Maybe Encoding -- ^ Optional encoding override -> B.ByteString -- ^ Input text (a strict ByteString) -> Either XMLParseError (LNode tag text) parseTree' mEnc bs = case parseTree mEnc (L.fromChunks [bs]) of (_, Just err) -> Left err (root, Nothing) -> Right root