module Text.XML.Expat.IO (
Parser, newParser,
parse, parse', parseChunk, Encoding(..), XMLParseError(..),
getParseLocation,
XMLParseLocation(..),
StartElementHandler,
EndElementHandler,
CharacterDataHandler,
ExternalEntityRefHandler,
SkippedEntityHandler,
setStartElementHandler,
setEndElementHandler,
setCharacterDataHandler,
setExternalEntityRefHandler,
setSkippedEntityHandler,
setUseForeignDTD,
parseExternalEntityReference,
unsafeParseChunk,
withHandlers,
unsafeSetHandlers,
unsafeReleaseHandlers,
ExpatHandlers,
encodingToString
) where
import Control.Exception (bracket)
import Control.Parallel.Strategies
import Control.Monad
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Internal as BSI
import Data.Maybe (maybe)
import Data.IORef
import Foreign
import CForeign
type ParserPtr = Ptr ()
data Parser = Parser
{ _parserObj :: ForeignPtr ()
, _startElementHandler :: IORef CStartElementHandler
, _endElementHandler :: IORef CEndElementHandler
, _cdataHandler :: IORef CCharacterDataHandler
, _externalEntityRefHandler :: IORef (Maybe CExternalEntityRefHandler)
, _skippedEntityHandler :: IORef (Maybe CSkippedEntityHandler)
}
instance Show Parser where
showsPrec _ (Parser fp _ _ _ _ _) = showsPrec 0 fp
withParser :: Parser -> (ParserPtr -> IO a) -> IO a
withParser (Parser fp _ _ _ _ _) = withForeignPtr fp
data Encoding = ASCII | UTF8 | UTF16 | ISO88591
encodingToString :: Encoding -> String
encodingToString ASCII = "US-ASCII"
encodingToString UTF8 = "UTF-8"
encodingToString UTF16 = "UTF-16"
encodingToString ISO88591 = "ISO-8859-1"
withOptEncoding :: Maybe Encoding -> (CString -> IO a) -> IO a
withOptEncoding Nothing f = f nullPtr
withOptEncoding (Just enc) f = withCString (encodingToString enc) f
parserCreate :: Maybe Encoding -> IO (ParserPtr)
parserCreate a1 =
withOptEncoding a1 $ \a1' ->
parserCreate'_ a1' >>= \res ->
let {res' = id res} in
return (res')
newParser :: Maybe Encoding -> IO Parser
newParser enc = do
ptr <- parserCreate enc
fptr <- newForeignPtr parserFree ptr
nullStartH <- newIORef nullCStartElementHandler
nullEndH <- newIORef nullCEndElementHandler
nullCharH <- newIORef nullCCharacterDataHandler
extH <- newIORef Nothing
skipH <- newIORef Nothing
return $ Parser fptr nullStartH nullEndH nullCharH extH skipH
setUseForeignDTD :: Parser -> Bool -> IO ()
setUseForeignDTD p b = withParser p $ \p' -> xmlUseForeignDTD p' b'
where
b' = if b then 1 else 0
withBStringLen :: BS.ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen bs f = do
BS.useAsCStringLen bs $ \(str, len) -> f (str, fromIntegral len)
unStatus :: CInt -> Bool
unStatus 0 = False
unStatus _ = True
parse :: Parser -> BSL.ByteString -> IO (Maybe XMLParseError)
parse parser bs = withHandlers parser $ do
ok <- doParseChunks (BSL.toChunks bs)
if ok
then return Nothing
else Just `fmap` getError parser
where
doParseChunks [] = doParseChunk parser BS.empty True
doParseChunks (c:cs) = do
ok <- doParseChunk parser c False
if ok
then doParseChunks cs
else return False
parse' :: Parser -> BS.ByteString -> IO (Maybe XMLParseError)
parse' parser bs = withHandlers parser $ do
ok <- doParseChunk parser bs True
if ok
then return Nothing
else Just `fmap` getError parser
parseChunk :: Parser
-> BS.ByteString
-> Bool
-> IO (Maybe XMLParseError)
parseChunk parser xml final = withHandlers parser $ unsafeParseChunk parser xml final
parseExternalEntityReference :: Parser
-> CString
-> Maybe Encoding
-> CStringLen
-> IO Bool
parseExternalEntityReference parser context encoding (text,sz) =
withParser parser $ \p -> do
extp <- withOptEncoding encoding $
xmlExternalEntityParserCreate p context
e <- doParseChunk'_ extp text (fromIntegral sz) 1
parserFree' extp
return $ e == 1
unsafeParseChunk :: Parser
-> BS.ByteString
-> Bool
-> IO (Maybe XMLParseError)
unsafeParseChunk parser xml final = do
ok <- doParseChunk parser xml final
if ok
then return Nothing
else Just `fmap` getError parser
getError :: Parser -> IO XMLParseError
getError parser = withParser parser $ \p -> do
code <- xmlGetErrorCode p
cerr <- xmlErrorString code
err <- peekCString cerr
loc <- getParseLocation parser
return $ XMLParseError err loc
data ExpatHandlers = ExpatHandlers
(FunPtr CStartElementHandler)
(FunPtr CEndElementHandler)
(FunPtr CCharacterDataHandler)
(Maybe (FunPtr CExternalEntityRefHandler))
(Maybe (FunPtr CSkippedEntityHandler))
unsafeSetHandlers :: Parser -> IO ExpatHandlers
unsafeSetHandlers parser@(Parser _ startRef endRef charRef extRef skipRef) =
do
cStartH <- mkCStartElementHandler =<< readIORef startRef
cEndH <- mkCEndElementHandler =<< readIORef endRef
cCharH <- mkCCharacterDataHandler =<< readIORef charRef
mExtH <- readIORef extRef >>=
maybe (return Nothing)
(\h -> liftM Just $ mkCExternalEntityRefHandler h)
mSkipH <- readIORef skipRef >>=
maybe (return Nothing)
(\h -> liftM Just $ mkCSkippedEntityHandler h)
withParser parser $ \p -> do
xmlSetstartelementhandler p cStartH
xmlSetendelementhandler p cEndH
xmlSetcharacterdatahandler p cCharH
maybe (return ())
(xmlSetExternalEntityRefHandler p)
mExtH
maybe (return ())
(xmlSetSkippedEntityHandler p)
mSkipH
return $ ExpatHandlers cStartH cEndH cCharH mExtH mSkipH
unsafeReleaseHandlers :: ExpatHandlers -> IO ()
unsafeReleaseHandlers (ExpatHandlers cStartH cEndH cCharH mcExtH mcSkipH) = do
freeHaskellFunPtr cStartH
freeHaskellFunPtr cEndH
freeHaskellFunPtr cCharH
maybe (return ()) freeHaskellFunPtr mcExtH
maybe (return ()) freeHaskellFunPtr mcSkipH
withHandlers :: Parser
-> IO a
-> IO a
withHandlers parser code = do
bracket
(unsafeSetHandlers parser)
unsafeReleaseHandlers
(\_ -> code)
cFromBool :: Num a => Bool -> a
cFromBool = fromBool
doParseChunk :: Parser -> BS.ByteString -> Bool -> IO (Bool)
doParseChunk a1 a2 a3 =
withParser a1 $ \a1' ->
withBStringLen a2 $ \(a2'1, a2'2) ->
let {a3' = cFromBool a3} in
doParseChunk'_ a1' a2'1 a2'2 a3' >>= \res ->
let {res' = unStatus res} in
return (res')
data XMLParseError = XMLParseError String XMLParseLocation deriving (Eq, Show)
instance NFData XMLParseError where
rnf (XMLParseError msg loc) = rnf (msg, loc)
data XMLParseLocation = XMLParseLocation {
xmlLineNumber :: Int64,
xmlColumnNumber :: Int64,
xmlByteIndex :: Int64,
xmlByteCount :: Int64
}
deriving (Eq, Show)
instance NFData XMLParseLocation where
rnf (XMLParseLocation lin col ind cou) = rnf (lin, col, ind, cou)
getParseLocation :: Parser -> IO XMLParseLocation
getParseLocation parser = withParser parser $ \p -> do
line <- xmlGetCurrentLineNumber p
col <- xmlGetCurrentColumnNumber p
index <- xmlGetCurrentByteIndex p
count <- xmlGetCurrentByteCount p
return $ XMLParseLocation {
xmlLineNumber = fromIntegral line,
xmlColumnNumber = fromIntegral col,
xmlByteIndex = fromIntegral index,
xmlByteCount = fromIntegral count
}
type StartElementHandler = CString -> [(CString, CString)] -> IO Bool
type EndElementHandler = CString -> IO Bool
type CharacterDataHandler = CStringLen -> IO Bool
type ExternalEntityRefHandler = Parser
-> CString
-> CString
-> CString
-> CString
-> IO Bool
type SkippedEntityHandler = CString
-> Int
-> IO Bool
type CStartElementHandler = Ptr () -> CString -> Ptr CString -> IO ()
nullCStartElementHandler :: CStartElementHandler
nullCStartElementHandler _ _ _ = return ()
foreign import ccall safe "wrapper"
mkCStartElementHandler :: CStartElementHandler
-> IO (FunPtr CStartElementHandler)
wrapStartElementHandler :: Parser -> StartElementHandler -> CStartElementHandler
wrapStartElementHandler parser handler = h
where
h _ cname cattrs = do
cattrlist <- peekArray0 nullPtr cattrs
stillRunning <- handler cname (pairwise cattrlist)
unless stillRunning $ stopParser parser
setStartElementHandler :: Parser -> StartElementHandler -> IO ()
setStartElementHandler parser@(Parser _ startRef _ _ _ _) handler =
writeIORef startRef $ wrapStartElementHandler parser handler
type CEndElementHandler = Ptr () -> CString -> IO ()
nullCEndElementHandler :: CEndElementHandler
nullCEndElementHandler _ _ = return ()
foreign import ccall safe "wrapper"
mkCEndElementHandler :: CEndElementHandler
-> IO (FunPtr CEndElementHandler)
wrapEndElementHandler :: Parser -> EndElementHandler -> CEndElementHandler
wrapEndElementHandler parser handler = h
where
h _ cname = do
stillRunning <- handler cname
unless stillRunning $ stopParser parser
setEndElementHandler :: Parser -> EndElementHandler -> IO ()
setEndElementHandler parser@(Parser _ _ endRef _ _ _) handler =
writeIORef endRef $ wrapEndElementHandler parser handler
type CCharacterDataHandler = Ptr () -> CString -> CInt -> IO ()
nullCCharacterDataHandler :: CCharacterDataHandler
nullCCharacterDataHandler _ _ _ = return ()
foreign import ccall safe "wrapper"
mkCCharacterDataHandler :: CCharacterDataHandler
-> IO (FunPtr CCharacterDataHandler)
wrapCharacterDataHandler :: Parser -> CharacterDataHandler -> CCharacterDataHandler
wrapCharacterDataHandler parser handler = h
where
h _ cdata len = do
stillRunning <- handler (cdata, fromIntegral len)
unless stillRunning $ stopParser parser
setCharacterDataHandler :: Parser -> CharacterDataHandler -> IO ()
setCharacterDataHandler parser@(Parser _ _ _ charRef _ _) handler =
writeIORef charRef $ wrapCharacterDataHandler parser handler
pairwise :: [a] -> [(a,a)]
pairwise (x1:x2:xs) = (x1,x2) : pairwise xs
pairwise _ = []
stopParser :: Parser -> IO ()
stopParser parser = withParser parser $ \p -> xmlStopParser p 0
foreign import ccall unsafe "XML_ParserCreate"
parserCreate'_ :: ((Ptr CChar) -> (IO (Ptr ())))
foreign import ccall unsafe "XML_SetStartElementHandler"
xmlSetstartelementhandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> ((Ptr (Ptr CChar)) -> (IO ()))))) -> (IO ())))
foreign import ccall unsafe "XML_SetEndElementHandler"
xmlSetendelementhandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> (IO ())))) -> (IO ())))
foreign import ccall unsafe "XML_SetCharacterDataHandler"
xmlSetcharacterdatahandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> (CInt -> (IO ()))))) -> (IO ())))
foreign import ccall safe "XML_Parse"
doParseChunk'_ :: ((Ptr ()) -> ((Ptr CChar) -> (CInt -> (CInt -> (IO CInt)))))
foreign import ccall unsafe "XML_UseForeignDTD"
xmlUseForeignDTD :: ParserPtr
-> CChar
-> IO ()
foreign import ccall "&XML_ParserFree" parserFree :: FunPtr (ParserPtr -> IO ())
foreign import ccall "XML_ParserFree" parserFree' :: ParserPtr -> IO ()
type CExternalEntityRefHandler = ParserPtr
-> Ptr CChar
-> Ptr CChar
-> Ptr CChar
-> Ptr CChar
-> IO ()
foreign import ccall safe "wrapper"
mkCExternalEntityRefHandler :: CExternalEntityRefHandler
-> IO (FunPtr CExternalEntityRefHandler)
foreign import ccall unsafe "XML_SetExternalEntityRefHandler"
xmlSetExternalEntityRefHandler :: ParserPtr
-> FunPtr CExternalEntityRefHandler
-> IO ()
foreign import ccall unsafe "XML_SetSkippedEntityHandler"
xmlSetSkippedEntityHandler :: ParserPtr
-> FunPtr CSkippedEntityHandler
-> IO ()
foreign import ccall unsafe "XML_ExternalEntityParserCreate"
xmlExternalEntityParserCreate :: ParserPtr
-> CString
-> CString
-> IO ParserPtr
type CSkippedEntityHandler = Ptr ()
-> CString
-> CInt
-> IO ()
foreign import ccall safe "wrapper"
mkCSkippedEntityHandler :: CSkippedEntityHandler
-> IO (FunPtr CSkippedEntityHandler)
wrapExternalEntityRefHandler :: Parser
-> ExternalEntityRefHandler
-> CExternalEntityRefHandler
wrapExternalEntityRefHandler parser handler = h
where
h _ context base systemID publicID = do
stillRunning <- handler parser context base systemID publicID
unless stillRunning $ stopParser parser
wrapSkippedEntityHandler :: Parser
-> SkippedEntityHandler
-> CSkippedEntityHandler
wrapSkippedEntityHandler parser handler = h
where
h _ entityName i = do
stillRunning <- handler entityName (fromIntegral i)
unless stillRunning $ stopParser parser
setExternalEntityRefHandler :: Parser -> ExternalEntityRefHandler -> IO ()
setExternalEntityRefHandler parser h =
writeIORef ref $ Just $ wrapExternalEntityRefHandler parser h
where
ref = _externalEntityRefHandler parser
setSkippedEntityHandler :: Parser -> SkippedEntityHandler -> IO ()
setSkippedEntityHandler parser h =
writeIORef ref $ Just $ wrapSkippedEntityHandler parser h
where
ref = _skippedEntityHandler parser
foreign import ccall unsafe "expat.h XML_GetErrorCode" xmlGetErrorCode
:: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_GetCurrentLineNumber" xmlGetCurrentLineNumber
:: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentColumnNumber" xmlGetCurrentColumnNumber
:: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentByteIndex" xmlGetCurrentByteIndex
:: ParserPtr -> IO CLong
foreign import ccall unsafe "expat.h XML_GetCurrentByteCount" xmlGetCurrentByteCount
:: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_ErrorString" xmlErrorString
:: CInt -> IO CString
foreign import ccall unsafe "expat.h XML_StopParser" xmlStopParser
:: ParserPtr -> CInt -> IO ()