module Text.XML.Expat.Tree (
Node,
NodeG(..),
UNode,
module Text.XML.Expat.NodeClass,
ParserOptions(..),
defaultParserOptions,
Encoding(..),
parse,
parse',
XMLParseError(..),
XMLParseLocation(..),
parseThrowing,
XMLParseException(..),
SAXEvent(..),
saxToTree,
GenericXMLString(..),
eAttrs,
Nodes,
UNodes,
parseTree,
parseTree',
parseSAX,
parseSAXLocations,
parseTreeThrowing,
parseSAXThrowing,
parseSAXLocationsThrowing
) where
import Text.XML.Expat.IO hiding (parse,parse')
import qualified Text.XML.Expat.IO as IO
import Text.XML.Expat.SAX ( ParserOptions(..)
, XMLParseException(..)
, SAXEvent(..)
, defaultParserOptions
, textFromCString
, parseSAX
, parseSAXLocations
, parseSAXLocationsThrowing
, parseSAXThrowing
, GenericXMLString(..) )
import qualified Text.XML.Expat.SAX as SAX
import Text.XML.Expat.NodeClass
import Control.Arrow
import Control.Monad (forM, mplus, mzero)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
import Data.IORef
import Data.List.Class
import Data.Monoid (Monoid,mempty,mappend)
import Control.Parallel.Strategies
import System.IO.Unsafe
import Foreign.C.String
import Foreign.Ptr
data NodeG c tag text =
Element {
eName :: !tag,
eAttributes :: ![(tag,text)],
eChildren :: c (NodeG c tag text)
} |
Text !text
instance (Show tag, Show text) => Show (NodeG [] tag text) where
show (Element na at ch) = "Element "++show na++" "++show at++" "++show ch
show (Text t) = "Text "++show t
instance (Eq tag, Eq text) => Eq (NodeG [] tag text) where
Element na1 at1 ch1 == Element na2 at2 ch2 =
na1 == na2 &&
at1 == at2 &&
ch1 == ch2
Text t1 == Text t2 = t1 == t2
_ == _ = False
type Node = NodeG []
eAttrs :: Node tag text -> [(tag, text)]
eAttrs = eAttributes
instance (NFData tag, NFData text) => NFData (NodeG [] tag text) where
rnf (Element nam att chi) = rnf (nam, att, chi)
rnf (Text txt) = rnf txt
type Nodes tag text = [Node tag text]
type UNodes text = Nodes text text
type UNode text = Node text text
instance (Functor c, List c) => NodeClass NodeG c where
textContentM (Element _ _ children) = foldlL mappend mempty $ joinM $ fmap textContentM children
textContentM (Text txt) = return txt
isElement (Element _ _ _) = True
isElement _ = False
isText (Text _) = True
isText _ = False
isNamed _ (Text _) = False
isNamed nm (Element nm' _ _) = nm == nm'
getName (Text _) = mempty
getName (Element name _ _) = name
getAttributes (Text _) = []
getAttributes (Element _ attrs _) = attrs
getChildren (Text _) = mzero
getChildren (Element _ _ ch) = ch
getText (Text txt) = txt
getText (Element _ _ _) = mempty
modifyName _ node@(Text _) = node
modifyName f (Element n a c) = Element (f n) a c
modifyAttributes _ node@(Text _) = node
modifyAttributes f (Element n a c) = Element n (f a) c
modifyChildren _ node@(Text _) = node
modifyChildren f (Element n a c) = Element n a (f c)
mapAllTags _ (Text t) = Text t
mapAllTags f (Element n a c) = Element (f n) (map (first f) a) (fmap (mapAllTags f) c)
mapElement _ (Text t) = Text t
mapElement f (Element n a c) =
let (n', a', c') = f (n, a, c)
in Element n' a' c'
mapNodeContainer f (Element n a ch) = do
ch' <- f ch
return $ Element n a ch'
mapNodeContainer _ (Text t) = return $ Text t
mkText = Text
instance (Functor c, List c) => MkElementClass NodeG c where
mkElement name attrs children = Element name attrs children
setEntityDecoder :: (GenericXMLString tag, GenericXMLString text)
=> Parser
-> IORef [Node tag text]
-> (tag -> Maybe text)
-> IO ()
setEntityDecoder parser queueRef decoder = do
setUseForeignDTD parser True
setExternalEntityRefHandler parser eh
setSkippedEntityHandler parser skip
where
text str (cur:rest) = modifyChildren (Text str:) cur : rest
text _ [] = undefined
skip _ _ 1 = return False
skip _ entityName 0 = do
en <- textFromCString entityName
let mbt = decoder en
maybe (return False)
(\t -> do
modifyIORef queueRef $ text t
return True)
mbt
skip _ _ _ = undefined
eh p ctx _ systemID publicID =
if systemID == nullPtr && publicID == nullPtr
then withCStringLen "" $ \c -> do
parseExternalEntityReference p ctx Nothing c
else return False
parse' :: (GenericXMLString tag, GenericXMLString text) =>
ParserOptions tag text
-> ByteString
-> Either XMLParseError (Node tag text)
parse' opts doc = unsafePerformIO $ runParse where
runParse = do
let enc = parserEncoding opts
let mEntityDecoder = entityDecoder opts
parser <- newParser enc
let emptyString = gxFromString ""
stack <- newIORef [Element emptyString [] []]
maybe (return ())
(setEntityDecoder parser stack)
mEntityDecoder
setStartElementHandler parser $ \_ cName cAttrs -> do
name <- textFromCString cName
attrs <- forM cAttrs $ \(cAttrName,cAttrValue) -> do
attrName <- textFromCString cAttrName
attrValue <- textFromCString cAttrValue
return (attrName, attrValue)
modifyIORef stack (start name attrs)
return True
setEndElementHandler parser $ \_ _ -> do
modifyIORef stack end
return True
setCharacterDataHandler parser $ \_ cText -> do
txt <- gxFromCStringLen cText
modifyIORef stack (text txt)
return True
mError <- IO.parse' parser doc
case mError of
Just err -> return $ Left err
Nothing -> do
[Element _ _ [root]] <- readIORef stack
return $ Right root
start name attrs stack = Element name attrs [] : stack
text str (cur:rest) = modifyChildren (Text str:) cur : rest
text _ [] = impossible
end (cur:parent:rest) =
let node = modifyChildren reverse cur in
modifyChildren (node:) parent : rest
end _ = impossible
impossible = error "parse' impossible"
parseTree' :: (GenericXMLString tag, GenericXMLString text) =>
Maybe Encoding
-> ByteString
-> Either XMLParseError (Node tag text)
parseTree' enc = parse' (ParserOptions enc Nothing)
saxToTree :: GenericXMLString tag =>
[SAXEvent tag text]
-> (Node tag text, Maybe XMLParseError)
saxToTree events =
let (nodes, mError, _) = ptl events
in (safeHead nodes, mError)
where
safeHead (a:_) = a
safeHead [] = Element (gxFromString "") [] []
ptl (StartElement name attrs:rema) =
let (children, err1, rema') = ptl rema
elt = Element name attrs children
(out, err2, rema'') = ptl rema'
in (elt:out, err1 `mplus` err2, rema'')
ptl (EndElement _:rema) = ([], Nothing, rema)
ptl (CharacterData txt:rema) =
let (out, err, rema') = ptl rema
in (Text txt:out, err, rema')
ptl (FailDocument err:_) = ([], Just err, [])
ptl [] = ([], Nothing, [])
parse :: (GenericXMLString tag, GenericXMLString text) =>
ParserOptions tag text
-> L.ByteString
-> (Node tag text, Maybe XMLParseError)
parse opts bs = saxToTree $ SAX.parse opts bs
parseTree :: (GenericXMLString tag, GenericXMLString text) =>
Maybe Encoding
-> L.ByteString
-> (Node tag text, Maybe XMLParseError)
parseTree mEnc = parse (ParserOptions mEnc Nothing)
parseThrowing :: (GenericXMLString tag, GenericXMLString text) =>
ParserOptions tag text
-> L.ByteString
-> Node tag text
parseThrowing opts bs = fst $ saxToTree $ SAX.parseThrowing opts bs
parseTreeThrowing :: (GenericXMLString tag, GenericXMLString text) =>
Maybe Encoding
-> L.ByteString
-> Node tag text
parseTreeThrowing mEnc = parseThrowing (ParserOptions mEnc Nothing)