module Text.XML.Expat.IO (
Parser, newParser,
parse, parse', parseChunk, Encoding(..), XMLParseError(..),
getParseLocation,
XMLParseLocation(..),
StartElementHandler, EndElementHandler, CharacterDataHandler,
setStartElementHandler, setEndElementHandler, setCharacterDataHandler,
unsafeParseChunk,
withHandlers,
unsafeSetHandlers,
unsafeReleaseHandlers,
ExpatHandlers,
encodingToString
) where
import Control.Exception (bracket)
import Control.Parallel.Strategies
import Control.Monad
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Internal as BSI
import Data.IORef
import Data.Int
import Foreign
import CForeign
type ParserPtr = Ptr ()
data Parser = Parser
(ForeignPtr ())
(IORef CStartElementHandler)
(IORef CEndElementHandler)
(IORef CCharacterDataHandler)
instance Show Parser where
showsPrec _ (Parser fp _ _ _) = showsPrec 0 fp
withParser :: Parser -> (ParserPtr -> IO a) -> IO a
withParser (Parser fp _ _ _) = withForeignPtr fp
data Encoding = ASCII | UTF8 | UTF16 | ISO88591
encodingToString :: Encoding -> String
encodingToString ASCII = "US-ASCII"
encodingToString UTF8 = "UTF-8"
encodingToString UTF16 = "UTF-16"
encodingToString ISO88591 = "ISO-8859-1"
withOptEncoding :: Maybe Encoding -> (CString -> IO a) -> IO a
withOptEncoding Nothing f = f nullPtr
withOptEncoding (Just enc) f = withCString (encodingToString enc) f
parserCreate :: Maybe Encoding -> IO (ParserPtr)
parserCreate a1 =
withOptEncoding a1 $ \a1' ->
parserCreate'_ a1' >>= \res ->
let {res' = id res} in
return (res')
foreign import ccall "&XML_ParserFree" parserFree :: FunPtr (ParserPtr -> IO ())
newParser :: Maybe Encoding -> IO Parser
newParser enc = do
ptr <- parserCreate enc
fptr <- newForeignPtr parserFree ptr
nullStartH <- newIORef nullCStartElementHandler
nullEndH <- newIORef nullCEndElementHandler
nullCharH <- newIORef nullCCharacterDataHandler
return $ Parser fptr nullStartH nullEndH nullCharH
withBStringLen :: BS.ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen bs f = do
BS.useAsCStringLen bs $ \(str, len) -> f (str, fromIntegral len)
unStatus :: CInt -> Bool
unStatus 0 = False
unStatus _ = True
parse :: Parser -> BSL.ByteString -> IO (Maybe XMLParseError)
parse parser@(Parser _ _ _ _) bs = withHandlers parser $ do
ok <- doParseChunks (BSL.toChunks bs)
if ok
then return Nothing
else Just `fmap` getError parser
where
doParseChunks [] = doParseChunk parser BS.empty True
doParseChunks (c:cs) = do
ok <- doParseChunk parser c False
if ok
then doParseChunks cs
else return False
parse' :: Parser -> BS.ByteString -> IO (Maybe XMLParseError)
parse' parser@(Parser _ _ _ _) bs = withHandlers parser $ do
ok <- doParseChunk parser bs True
if ok
then return Nothing
else Just `fmap` getError parser
parseChunk :: Parser
-> BS.ByteString
-> Bool
-> IO (Maybe XMLParseError)
parseChunk parser xml final = withHandlers parser $ unsafeParseChunk parser xml final
unsafeParseChunk :: Parser
-> BS.ByteString
-> Bool
-> IO (Maybe XMLParseError)
unsafeParseChunk parser xml final = do
ok <- doParseChunk parser xml final
if ok
then return Nothing
else Just `fmap` getError parser
getError parser = withParser parser $ \p -> do
code <- xmlGetErrorCode p
cerr <- xmlErrorString code
err <- peekCString cerr
loc <- getParseLocation parser
return $ XMLParseError err loc
data ExpatHandlers = ExpatHandlers
(FunPtr CStartElementHandler)
(FunPtr CEndElementHandler)
(FunPtr CCharacterDataHandler)
unsafeSetHandlers :: Parser -> IO ExpatHandlers
unsafeSetHandlers parser@(Parser fp startRef endRef charRef) = do
cStartH <- mkCStartElementHandler =<< readIORef startRef
cEndH <- mkCEndElementHandler =<< readIORef endRef
cCharH <- mkCCharacterDataHandler =<< readIORef charRef
withParser parser $ \p -> do
xmlSetstartelementhandler p cStartH
xmlSetendelementhandler p cEndH
xmlSetcharacterdatahandler p cCharH
return $ ExpatHandlers cStartH cEndH cCharH
unsafeReleaseHandlers :: ExpatHandlers -> IO ()
unsafeReleaseHandlers (ExpatHandlers cStartH cEndH cCharH) = do
freeHaskellFunPtr cStartH
freeHaskellFunPtr cEndH
freeHaskellFunPtr cCharH
withHandlers :: Parser
-> IO a
-> IO a
withHandlers parser code = do
bracket
(unsafeSetHandlers parser)
unsafeReleaseHandlers
(\_ -> code)
cFromBool :: Num a => Bool -> a
cFromBool = fromBool
doParseChunk :: Parser -> BS.ByteString -> Bool -> IO (Bool)
doParseChunk a1 a2 a3 =
withParser a1 $ \a1' ->
withBStringLen a2 $ \(a2'1, a2'2) ->
let {a3' = cFromBool a3} in
doParseChunk'_ a1' a2'1 a2'2 a3' >>= \res ->
let {res' = unStatus res} in
return (res')
data XMLParseError = XMLParseError String XMLParseLocation deriving (Eq, Show)
instance NFData XMLParseError where
rnf (XMLParseError msg loc) = rnf (msg, loc)
data XMLParseLocation = XMLParseLocation {
xmlLineNumber :: Int64,
xmlColumnNumber :: Int64,
xmlByteIndex :: Int64,
xmlByteCount :: Int64
}
deriving (Eq, Show)
instance NFData XMLParseLocation where
rnf (XMLParseLocation lin col ind cou) = rnf (lin, col, ind, cou)
getParseLocation parser = withParser parser $ \p -> do
line <- xmlGetCurrentLineNumber p
col <- xmlGetCurrentColumnNumber p
index <- xmlGetCurrentByteIndex p
count <- xmlGetCurrentByteCount p
return $ XMLParseLocation {
xmlLineNumber = fromIntegral line,
xmlColumnNumber = fromIntegral col,
xmlByteIndex = fromIntegral index,
xmlByteCount = fromIntegral count
}
type StartElementHandler = CString -> [(CString, CString)] -> IO Bool
type EndElementHandler = CString -> IO Bool
type CharacterDataHandler = CStringLen -> IO Bool
type CStartElementHandler = Ptr () -> CString -> Ptr CString -> IO ()
nullCStartElementHandler _ _ _ = return ()
foreign import ccall unsafe "expat.h XML_GetErrorCode" xmlGetErrorCode
:: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_GetCurrentLineNumber" xmlGetCurrentLineNumber
:: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentColumnNumber" xmlGetCurrentColumnNumber
:: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentByteIndex" xmlGetCurrentByteIndex
:: ParserPtr -> IO CLong
foreign import ccall unsafe "expat.h XML_GetCurrentByteCount" xmlGetCurrentByteCount
:: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_ErrorString" xmlErrorString
:: CInt -> IO CString
foreign import ccall unsafe "expat.h XML_StopParser" xmlStopParser
:: ParserPtr -> CInt -> IO ()
foreign import ccall safe "wrapper"
mkCStartElementHandler :: CStartElementHandler
-> IO (FunPtr CStartElementHandler)
wrapStartElementHandler :: Parser -> StartElementHandler -> CStartElementHandler
wrapStartElementHandler parser@(Parser _ _ _ _) handler = h
where
h ptr cname cattrs = do
cattrlist <- peekArray0 nullPtr cattrs
stillRunning <- handler cname (pairwise cattrlist)
unless stillRunning $
withParser parser $ \p -> xmlStopParser p 0
setStartElementHandler :: Parser -> StartElementHandler -> IO ()
setStartElementHandler parser@(Parser _ startRef _ _) handler =
withParser parser $ \p -> do
writeIORef startRef $ wrapStartElementHandler parser handler
type CEndElementHandler = Ptr () -> CString -> IO ()
nullCEndElementHandler _ _ = return ()
foreign import ccall safe "wrapper"
mkCEndElementHandler :: CEndElementHandler
-> IO (FunPtr CEndElementHandler)
wrapEndElementHandler :: Parser -> EndElementHandler -> CEndElementHandler
wrapEndElementHandler parser@(Parser _ _ _ _) handler = h
where
h ptr cname = do
stillRunning <- handler cname
unless stillRunning $
withParser parser $ \p -> xmlStopParser p 0
setEndElementHandler :: Parser -> EndElementHandler -> IO ()
setEndElementHandler parser@(Parser _ _ endRef _) handler =
withParser parser $ \p -> do
writeIORef endRef $ wrapEndElementHandler parser handler
type CCharacterDataHandler = Ptr () -> CString -> CInt -> IO ()
nullCCharacterDataHandler _ _ _ = return ()
foreign import ccall safe "wrapper"
mkCCharacterDataHandler :: CCharacterDataHandler
-> IO (FunPtr CCharacterDataHandler)
wrapCharacterDataHandler :: Parser -> CharacterDataHandler -> CCharacterDataHandler
wrapCharacterDataHandler parser@(Parser _ _ _ _) handler = h
where
h ptr cdata len = do
stillRunning <- handler (cdata, fromIntegral len)
unless stillRunning $
withParser parser $ \p -> xmlStopParser p 0
setCharacterDataHandler :: Parser -> CharacterDataHandler -> IO ()
setCharacterDataHandler parser@(Parser _ _ _ charRef) handler =
withParser parser $ \p -> do
writeIORef charRef $ wrapCharacterDataHandler parser handler
pairwise (x1:x2:xs) = (x1,x2) : pairwise xs
pairwise _ = []
foreign import ccall unsafe "XML_ParserCreate"
parserCreate'_ :: ((Ptr CChar) -> (IO (Ptr ())))
foreign import ccall unsafe "XML_SetStartElementHandler"
xmlSetstartelementhandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> ((Ptr (Ptr CChar)) -> (IO ()))))) -> (IO ())))
foreign import ccall unsafe "XML_SetEndElementHandler"
xmlSetendelementhandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> (IO ())))) -> (IO ())))
foreign import ccall unsafe "XML_SetCharacterDataHandler"
xmlSetcharacterdatahandler :: ((Ptr ()) -> ((FunPtr ((Ptr ()) -> ((Ptr CChar) -> (CInt -> (IO ()))))) -> (IO ())))
foreign import ccall safe "XML_Parse"
doParseChunk'_ :: ((Ptr ()) -> ((Ptr CChar) -> (CInt -> (CInt -> (IO CInt)))))