module Text.XML.Expat.Tree (
Node,
NodeG(..),
UNode,
module Text.XML.Expat.Internal.NodeClass,
QNode,
module Text.XML.Expat.Internal.Qualified,
NNode,
module Text.XML.Expat.Internal.Namespaced,
ParseOptions(..),
defaultParseOptions,
Encoding(..),
parse,
parse',
parseG,
XMLParseError(..),
XMLParseLocation(..),
parseThrowing,
XMLParseException(..),
saxToTree,
saxToTreeG,
GenericXMLString(..),
eAttrs,
Nodes,
UNodes,
QNodes,
NNodes,
parseTree,
parseTree',
parseSAX,
parseSAXLocations,
parseTreeThrowing,
parseSAXThrowing,
parseSAXLocationsThrowing,
ParserOptions,
defaultParserOptions
) where
import Text.XML.Expat.Internal.IO hiding (parse,parse')
import qualified Text.XML.Expat.Internal.IO as IO
import Text.XML.Expat.SAX ( ParseOptions(..)
, XMLParseException(..)
, SAXEvent(..)
, defaultParseOptions
, textFromCString
, parseSAX
, parseSAXLocations
, parseSAXLocationsThrowing
, parseSAXThrowing
, GenericXMLString(..)
, setEntityDecoder
, ParserOptions
, defaultParserOptions )
import qualified Text.XML.Expat.SAX as SAX
import Text.XML.Expat.Internal.Namespaced
import Text.XML.Expat.Internal.NodeClass
import Text.XML.Expat.Internal.Qualified
import Control.Arrow
import Control.Monad (forM, mplus, mzero)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
import Data.IORef
import Data.List.Class
import Data.Monoid (Monoid,mempty,mappend)
import Control.DeepSeq
import System.IO.Unsafe
data NodeG c tag text =
Element {
eName :: !tag,
eAttributes :: ![(tag,text)],
eChildren :: c (NodeG c tag text)
} |
Text !text
type instance ListOf (NodeG c tag text) = c (NodeG c tag text)
instance (Show tag, Show text) => Show (NodeG [] tag text) where
showsPrec d (Element na at ch) = showParen (d > 10) $
("Element "++) . showsPrec 11 na . (" "++) .
showsPrec 11 at . (" "++) .
showsPrec 11 ch
showsPrec d (Text t) = showParen (d > 10) $ ("Text "++) . showsPrec 11 t
instance (Eq tag, Eq text) => Eq (NodeG [] tag text) where
Element na1 at1 ch1 == Element na2 at2 ch2 =
na1 == na2 &&
at1 == at2 &&
ch1 == ch2
Text t1 == Text t2 = t1 == t2
_ == _ = False
type Node tag text = NodeG [] tag text
eAttrs :: Node tag text -> [(tag, text)]
eAttrs = eAttributes
instance (NFData tag, NFData text) => NFData (NodeG [] tag text) where
rnf (Element nam att chi) = rnf (nam, att, chi)
rnf (Text txt) = rnf txt
type Nodes tag text = [Node tag text]
type UNodes text = Nodes text text
type UNode text = Node text text
type QNodes text = [Node (QName text) text]
type QNode text = Node (QName text) text
type NNodes text = [Node (NName text) text]
type NNode text = Node (NName text) text
instance (Functor c, List c) => NodeClass NodeG c where
textContentM (Element _ _ children) = foldlL mappend mempty $ joinM $ fmap textContentM children
textContentM (Text txt) = return txt
isElement (Element _ _ _) = True
isElement _ = False
isText (Text _) = True
isText _ = False
isCData _ = False
isProcessingInstruction _ = False
isComment _ = False
isNamed _ (Text _) = False
isNamed nm (Element nm' _ _) = nm == nm'
getName (Text _) = mempty
getName (Element name _ _) = name
hasTarget _ _ = False
getTarget _ = mempty
getAttributes (Text _) = []
getAttributes (Element _ attrs _) = attrs
getChildren (Text _) = mzero
getChildren (Element _ _ ch) = ch
getText (Text txt) = txt
getText (Element _ _ _) = mempty
modifyName _ node@(Text _) = node
modifyName f (Element n a c) = Element (f n) a c
modifyAttributes _ node@(Text _) = node
modifyAttributes f (Element n a c) = Element n (f a) c
modifyChildren _ node@(Text _) = node
modifyChildren f (Element n a c) = Element n a (f c)
mapAllTags _ (Text t) = Text t
mapAllTags f (Element n a c) = Element (f n) (map (first f) a) (fmap (mapAllTags f) c)
modifyElement _ (Text t) = Text t
modifyElement f (Element n a c) =
let (n', a', c') = f (n, a, c)
in Element n' a' c'
mapNodeContainer f (Element n a ch) = do
ch' <- mapNodeListContainer f ch
return $ Element n a ch'
mapNodeContainer _ (Text t) = return $ Text t
mkText = Text
instance (Functor c, List c) => MkElementClass NodeG c where
mkElement name attrs children = Element name attrs children
parse' :: (GenericXMLString tag, GenericXMLString text) =>
ParseOptions tag text
-> ByteString
-> Either XMLParseError (Node tag text)
parse' opts doc = unsafePerformIO $ runParse where
runParse = do
let enc = overrideEncoding opts
let mEntityDecoder = entityDecoder opts
parser <- newParser enc
let emptyString = gxFromString ""
stack <- newIORef [Element emptyString [] []]
case mEntityDecoder of
Just deco -> setEntityDecoder parser deco $ \_ txt -> do
modifyIORef stack (text txt)
Nothing -> return ()
setStartElementHandler parser $ \_ cName cAttrs -> do
name <- textFromCString cName
attrs <- forM cAttrs $ \(cAttrName,cAttrValue) -> do
attrName <- textFromCString cAttrName
attrValue <- textFromCString cAttrValue
return (attrName, attrValue)
modifyIORef stack (start name attrs)
return True
setEndElementHandler parser $ \_ _ -> do
modifyIORef stack end
return True
setCharacterDataHandler parser $ \_ cText -> do
txt <- gxFromCStringLen cText
modifyIORef stack (text txt)
return True
mError <- IO.parse' parser doc
case mError of
Just err -> return $ Left err
Nothing -> do
[Element _ _ [root]] <- readIORef stack
return $ Right root
start name attrs stack = Element name attrs [] : stack
text str (cur:rest) = modifyChildren (Text str:) cur : rest
text _ [] = impossible
end (cur:parent:rest) =
let node = modifyChildren reverse cur in
modifyChildren (node:) parent : rest
end _ = impossible
impossible = error "parse' impossible"
parseTree' :: (GenericXMLString tag, GenericXMLString text) =>
Maybe Encoding
-> ByteString
-> Either XMLParseError (Node tag text)
parseTree' enc = parse' (ParseOptions enc Nothing)
saxToTree :: GenericXMLString tag =>
[SAXEvent tag text]
-> (Node tag text, Maybe XMLParseError)
saxToTree events =
let (nodes, mError, _) = ptl events
in (findRoot nodes, mError)
where
findRoot (elt@(Element _ _ _):_) = elt
findRoot (_:nodes) = findRoot nodes
findRoot [] = Element (gxFromString "") [] []
ptl (StartElement name attrs:rema) =
let (children, err1, rema') = ptl rema
elt = Element name attrs children
(out, err2, rema'') = ptl rema'
in (elt:out, err1 `mplus` err2, rema'')
ptl (EndElement _:rema) = ([], Nothing, rema)
ptl (CharacterData txt:rema) =
let (out, err, rema') = ptl rema
in (Text txt:out, err, rema')
ptl (FailDocument err:_) = ([], Just err, [])
ptl (_:rema) = ptl rema
ptl [] = ([], Nothing, [])
saxToTreeG :: (GenericXMLString tag, List l) =>
l (SAXEvent tag text)
-> ItemM l (NodeG l tag text)
saxToTreeG events = do
(elts, _) <- process events
findRoot elts
where
findRoot elts = do
li <- runList elts
case li of
Cons elt@(Element _ _ _ ) _ -> return elt
Cons _ rema -> findRoot rema
Nil -> return $ Element (gxFromString "") mzero mzero
process events = do
li <- runList events
case li of
Nil -> return (mzero, mzero)
Cons (StartElement name attrs) rema -> do
(children, rema') <- process rema
(out, rema'') <- process rema'
return (Element name attrs children `cons` out, rema'')
Cons (EndElement _) rema -> return (mzero, rema)
Cons (CharacterData txt) rema -> do
(out, rema') <- process rema
return (Text txt `cons` out, rema')
Cons _ rema -> process rema
parse :: (GenericXMLString tag, GenericXMLString text) =>
ParseOptions tag text
-> L.ByteString
-> (Node tag text, Maybe XMLParseError)
parse opts bs = saxToTree $ SAX.parse opts bs
parseG :: (GenericXMLString tag, GenericXMLString text, List l) =>
ParseOptions tag text
-> l ByteString
-> ItemM l (NodeG l tag text)
parseG opts = saxToTreeG . SAX.parseG opts
parseTree :: (GenericXMLString tag, GenericXMLString text) =>
Maybe Encoding
-> L.ByteString
-> (Node tag text, Maybe XMLParseError)
parseTree mEnc = parse (ParseOptions mEnc Nothing)
parseThrowing :: (GenericXMLString tag, GenericXMLString text) =>
ParseOptions tag text
-> L.ByteString
-> Node tag text
parseThrowing opts bs = fst $ saxToTree $ SAX.parseThrowing opts bs
parseTreeThrowing :: (GenericXMLString tag, GenericXMLString text) =>
Maybe Encoding
-> L.ByteString
-> Node tag text
parseTreeThrowing mEnc = parseThrowing (ParseOptions mEnc Nothing)