module Text.XML.Expat.IO (
Parser, newParser,
parse, parse', parseChunk, Encoding(..), XMLParseError(..),
getParseLocation,
XMLParseLocation(..),
StartElementHandler,
EndElementHandler,
CharacterDataHandler,
ExternalEntityRefHandler,
SkippedEntityHandler,
setStartElementHandler,
setEndElementHandler,
setCharacterDataHandler,
setExternalEntityRefHandler,
setSkippedEntityHandler,
setUseForeignDTD,
parseExternalEntityReference,
unsafeParseChunk,
withHandlers,
unsafeSetHandlers,
unsafeReleaseHandlers,
ExpatHandlers,
encodingToString
) where
import Control.Exception (bracket)
import Control.Parallel.Strategies
import Control.Monad
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import Data.IORef
import Foreign
import Foreign.C
type ParserPtr = Ptr ()
data Parser = Parser
{ _parserObj :: ForeignPtr ()
, _startElementHandler :: IORef CStartElementHandler
, _endElementHandler :: IORef CEndElementHandler
, _cdataHandler :: IORef CCharacterDataHandler
, _externalEntityRefHandler :: IORef (Maybe CExternalEntityRefHandler)
, _skippedEntityHandler :: IORef (Maybe CSkippedEntityHandler)
}
instance Show Parser where
showsPrec _ (Parser fp _ _ _ _ _) = showsPrec 0 fp
withParser :: Parser -> (ParserPtr -> IO a) -> IO a
withParser (Parser fp _ _ _ _ _) = withForeignPtr fp
data Encoding = ASCII | UTF8 | UTF16 | ISO88591
encodingToString :: Encoding -> String
encodingToString ASCII = "US-ASCII"
encodingToString UTF8 = "UTF-8"
encodingToString UTF16 = "UTF-16"
encodingToString ISO88591 = "ISO-8859-1"
withOptEncoding :: Maybe Encoding -> (CString -> IO a) -> IO a
withOptEncoding Nothing f = f nullPtr
withOptEncoding (Just enc) f = withCString (encodingToString enc) f
parserCreate :: Maybe Encoding -> IO (ParserPtr)
parserCreate a1 =
withOptEncoding a1 $ \a1' ->
parserCreate'_ a1' >>= \res ->
let {res' = id res} in
return (res')
newParser :: Maybe Encoding -> IO Parser
newParser enc = do
ptr <- parserCreate enc
fptr <- newForeignPtr parserFree ptr
nullStartH <- newIORef nullCStartElementHandler
nullEndH <- newIORef nullCEndElementHandler
nullCharH <- newIORef nullCCharacterDataHandler
extH <- newIORef Nothing
skipH <- newIORef Nothing
return $ Parser fptr nullStartH nullEndH nullCharH extH skipH
setUseForeignDTD :: Parser -> Bool -> IO ()
setUseForeignDTD p b = withParser p $ \p' -> xmlUseForeignDTD p' b'
where
b' = if b then 1 else 0
withBStringLen :: BS.ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen bs f = do
BS.useAsCStringLen bs $ \(str, len) -> f (str, fromIntegral len)
unStatus :: CInt -> Bool
unStatus 0 = False
unStatus _ = True
parse :: Parser -> BSL.ByteString -> IO (Maybe XMLParseError)
parse parser bs = withHandlers parser $ do
ok <- doParseChunks (BSL.toChunks bs)
if ok
then return Nothing
else Just `fmap` getError parser
where
doParseChunks [] = doParseChunk parser BS.empty True
doParseChunks (c:cs) = do
ok <- doParseChunk parser c False
if ok
then doParseChunks cs
else return False
parse' :: Parser -> BS.ByteString -> IO (Maybe XMLParseError)
parse' parser bs = withHandlers parser $ do
ok <- doParseChunk parser bs True
if ok
then return Nothing
else Just `fmap` getError parser
parseChunk :: Parser
-> BS.ByteString
-> Bool
-> IO (Maybe XMLParseError)
parseChunk parser xml final = withHandlers parser $ unsafeParseChunk parser xml final
parseExternalEntityReference :: Parser
-> CString
-> Maybe Encoding
-> CStringLen
-> IO Bool
parseExternalEntityReference parser context encoding (text,sz) =
withParser parser $ \p -> do
extp <- withOptEncoding encoding $
xmlExternalEntityParserCreate p context
e <- doParseChunk'_ extp text (fromIntegral sz) 1
parserFree' extp
return $ e == 1
unsafeParseChunk :: Parser
-> BS.ByteString
-> Bool
-> IO (Maybe XMLParseError)
unsafeParseChunk parser xml final = do
ok <- doParseChunk parser xml final
if ok
then return Nothing
else Just `fmap` getError parser
getError :: Parser -> IO XMLParseError
getError parser = withParser parser $ \p -> do
code <- xmlGetErrorCode p
cerr <- xmlErrorString code
err <- peekCString cerr
loc <- getParseLocation parser
return $ XMLParseError err loc
data ExpatHandlers = ExpatHandlers
(FunPtr CStartElementHandler)
(FunPtr CEndElementHandler)
(FunPtr CCharacterDataHandler)
(Maybe (FunPtr CExternalEntityRefHandler))
(Maybe (FunPtr CSkippedEntityHandler))
unsafeSetHandlers :: Parser -> IO ExpatHandlers
unsafeSetHandlers parser@(Parser _ startRef endRef charRef extRef skipRef) =
do
cStartH <- mkCStartElementHandler =<< readIORef startRef
cEndH <- mkCEndElementHandler =<< readIORef endRef
cCharH <- mkCCharacterDataHandler =<< readIORef charRef
mExtH <- readIORef extRef >>=
maybe (return Nothing)
(\h -> liftM Just $ mkCExternalEntityRefHandler h)
mSkipH <- readIORef skipRef >>=
maybe (return Nothing)
(\h -> liftM Just $ mkCSkippedEntityHandler h)
withParser parser $ \p -> do
xmlSetstartelementhandler p cStartH
xmlSetendelementhandler p cEndH
xmlSetcharacterdatahandler p cCharH
maybe (return ())
(xmlSetExternalEntityRefHandler p)
mExtH
maybe (return ())
(xmlSetSkippedEntityHandler p)
mSkipH
return $ ExpatHandlers cStartH cEndH cCharH mExtH mSkipH
unsafeReleaseHandlers :: ExpatHandlers -> IO ()
unsafeReleaseHandlers (ExpatHandlers cStartH cEndH cCharH mcExtH mcSkipH) = do
freeHaskellFunPtr cStartH
freeHaskellFunPtr cEndH
freeHaskellFunPtr cCharH
maybe (return ()) freeHaskellFunPtr mcExtH
maybe (return ()) freeHaskellFunPtr mcSkipH
withHandlers :: Parser
-> IO a
-> IO a
withHandlers parser code = do
bracket
(unsafeSetHandlers parser)
unsafeReleaseHandlers
(\_ -> code)
cFromBool :: Num a => Bool -> a
cFromBool = fromBool
doParseChunk :: Parser -> BS.ByteString -> Bool -> IO (Bool)
doParseChunk a1 a2 a3 =
withParser a1 $ \a1' ->
withBStringLen a2 $ \(a2'1, a2'2) ->
let {a3' = cFromBool a3} in
doParseChunk'_ a1' a2'1 a2'2 a3' >>= \res ->
let {res' = unStatus res} in
return (res')
data XMLParseError = XMLParseError String XMLParseLocation deriving (Eq, Show)
instance NFData XMLParseError where
rnf (XMLParseError msg loc) = rnf (msg, loc)
data XMLParseLocation = XMLParseLocation {
xmlLineNumber :: Int64,
xmlColumnNumber :: Int64,
xmlByteIndex :: Int64,
xmlByteCount :: Int64
}
deriving (Eq, Show)
instance NFData XMLParseLocation where
rnf (XMLParseLocation lin col ind cou) = rnf (lin, col, ind, cou)
getParseLocation :: Parser -> IO XMLParseLocation
getParseLocation parser = withParser parser $ \p -> do
line <- xmlGetCurrentLineNumber p
col <- xmlGetCurrentColumnNumber p
index <- xmlGetCurrentByteIndex p
count <- xmlGetCurrentByteCount p
return $ XMLParseLocation {
xmlLineNumber = fromIntegral line,
xmlColumnNumber = fromIntegral col,
xmlByteIndex = fromIntegral index,
xmlByteCount = fromIntegral count
}
type StartElementHandler = CString -> [(CString, CString)] -> IO Bool
type EndElementHandler = CString -> IO Bool
type CharacterDataHandler = CStringLen -> IO Bool
type ExternalEntityRefHandler = Parser
-> CString
-> CString
-> CString
-> CString
-> IO Bool
type SkippedEntityHandler = CString
-> Int
-> IO Bool
type CStartElementHandler = Ptr () -> CString -> Ptr CString -> IO ()
nullCStartElementHandler :: CStartElementHandler
nullCStartElementHandler _ _ _ = return ()
foreign import ccall safe "wrapper"
mkCStartElementHandler :: CStartElementHandler
-> IO (FunPtr CStartElementHandler)
wrapStartElementHandler :: Parser -> StartElementHandler -> CStartElementHandler
wrapStartElementHandler parser handler = h
where
h _ cname cattrs = do
cattrlist <- peekArray0 nullPtr cattrs
stillRunning <- handler cname (pairwise cattrlist)
unless stillRunning $ stopParser parser
setStartElementHandler :: Parser -> StartElementHandler -> IO ()
setStartElementHandler parser@(Parser _ startRef _ _ _ _) handler =
writeIORef startRef $ wrapStartElementHandler parser handler
type CEndElementHandler = Ptr () -> CString -> IO ()
nullCEndElementHandler :: CEndElementHandler
nullCEndElementHandler _ _ = return ()
foreign import ccall safe "wrapper"
mkCEndElementHandler :: CEndElementHandler
-> IO (FunPtr CEndElementHandler)
wrapEndElementHandler :: Parser -> EndElementHandler -> CEndElementHandler
wrapEndElementHandler parser handler = h
where
h _ cname = do
stillRunning <- handler cname
unless stillRunning $ stopParser parser
setEndElementHandler :: Parser -> EndElementHandler -> IO ()
setEndElementHandler parser@(Parser _ _ endRef _ _ _) handler =
writeIORef endRef $ wrapEndElementHandler parser handler
type CCharacterDataHandler = Ptr () -> CString -> CInt -> IO ()
nullCCharacterDataHandler :: CCharacterDataHandler
nullCCharacterDataHandler _ _ _ = return ()
foreign import ccall safe "wrapper"
mkCCharacterDataHandler :: CCharacterDataHandler
-> IO (FunPtr CCharacterDataHandler)
wrapCharacterDataHandler :: Parser -> CharacterDataHandler -> CCharacterDataHandler
wrapCharacterDataHandler parser handler = h
where
h _ cdata len = do
stillRunning <- handler (cdata, fromIntegral len)
unless stillRunning $ stopParser parser
setCharacterDataHandler :: Parser -> CharacterDataHandler -> IO ()
setCharacterDataHandler parser@(Parser _ _ _ charRef _ _) handler =
writeIORef charRef $ wrapCharacterDataHandler parser handler
pairwise :: [a] -> [(a,a)]
pairwise (x1:x2:xs) = (x1,x2) : pairwise xs
pairwise _ = []
stopParser :: Parser -> IO ()
stopParser parser = withParser parser $ \p -> xmlStopParser p 0
foreign import ccall unsafe "XML_ParserCreate"
parserCreate'_ :: ((Ptr CChar) -> (IO (Ptr ())))
foreign import ccall unsafe "XML_SetStartElementHandler"
xmlSetstartelementhandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> ((Ptr (Ptr CChar)) -> (IO ()))))) -> (IO ())))
foreign import ccall unsafe "XML_SetEndElementHandler"
xmlSetendelementhandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> (IO ())))) -> (IO ())))
foreign import ccall unsafe "XML_SetCharacterDataHandler"
xmlSetcharacterdatahandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> (CInt -> (IO ()))))) -> (IO ())))
foreign import ccall safe "XML_Parse"
doParseChunk'_ :: ((Ptr ()) -> ((Ptr CChar) -> (CInt -> (CInt -> (IO CInt)))))
foreign import ccall unsafe "XML_UseForeignDTD"
xmlUseForeignDTD :: ParserPtr
-> CChar
-> IO ()
foreign import ccall "&XML_ParserFree" parserFree :: FunPtr (ParserPtr -> IO ())
foreign import ccall "XML_ParserFree" parserFree' :: ParserPtr -> IO ()
type CExternalEntityRefHandler = ParserPtr
-> Ptr CChar
-> Ptr CChar
-> Ptr CChar
-> Ptr CChar
-> IO ()
foreign import ccall safe "wrapper"
mkCExternalEntityRefHandler :: CExternalEntityRefHandler
-> IO (FunPtr CExternalEntityRefHandler)
foreign import ccall unsafe "XML_SetExternalEntityRefHandler"
xmlSetExternalEntityRefHandler :: ParserPtr
-> FunPtr CExternalEntityRefHandler
-> IO ()
foreign import ccall unsafe "XML_SetSkippedEntityHandler"
xmlSetSkippedEntityHandler :: ParserPtr
-> FunPtr CSkippedEntityHandler
-> IO ()
foreign import ccall unsafe "XML_ExternalEntityParserCreate"
xmlExternalEntityParserCreate :: ParserPtr
-> CString
-> CString
-> IO ParserPtr
type CSkippedEntityHandler = Ptr ()
-> CString
-> CInt
-> IO ()
foreign import ccall safe "wrapper"
mkCSkippedEntityHandler :: CSkippedEntityHandler
-> IO (FunPtr CSkippedEntityHandler)
wrapExternalEntityRefHandler :: Parser
-> ExternalEntityRefHandler
-> CExternalEntityRefHandler
wrapExternalEntityRefHandler parser handler = h
where
h _ context base systemID publicID = do
stillRunning <- handler parser context base systemID publicID
unless stillRunning $ stopParser parser
wrapSkippedEntityHandler :: Parser
-> SkippedEntityHandler
-> CSkippedEntityHandler
wrapSkippedEntityHandler parser handler = h
where
h _ entityName i = do
stillRunning <- handler entityName (fromIntegral i)
unless stillRunning $ stopParser parser
setExternalEntityRefHandler :: Parser -> ExternalEntityRefHandler -> IO ()
setExternalEntityRefHandler parser h =
writeIORef ref $ Just $ wrapExternalEntityRefHandler parser h
where
ref = _externalEntityRefHandler parser
setSkippedEntityHandler :: Parser -> SkippedEntityHandler -> IO ()
setSkippedEntityHandler parser h =
writeIORef ref $ Just $ wrapSkippedEntityHandler parser h
where
ref = _skippedEntityHandler parser
foreign import ccall unsafe "expat.h XML_GetErrorCode" xmlGetErrorCode
:: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_GetCurrentLineNumber" xmlGetCurrentLineNumber
:: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentColumnNumber" xmlGetCurrentColumnNumber
:: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentByteIndex" xmlGetCurrentByteIndex
:: ParserPtr -> IO CLong
foreign import ccall unsafe "expat.h XML_GetCurrentByteCount" xmlGetCurrentByteCount
:: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_ErrorString" xmlErrorString
:: CInt -> IO CString
foreign import ccall unsafe "expat.h XML_StopParser" xmlStopParser
:: ParserPtr -> CInt -> IO ()