module Text.Xml.Tiny.Internal.Monad where
import qualified Control.Exception as CE
import Control.Monad
import Control.Monad.Primitive
import Control.Monad.ST
import Control.Monad.ST.Unsafe
import GHC.ST (ST(..))
import Control.Monad.State.Class
import Data.ByteString.Internal (ByteString(..), c2w, w2c, memchr, memcmp)
import qualified Data.ByteString.Char8 as BS
import Data.Int
import Data.Word
import Foreign.ForeignPtr
import Foreign.ForeignPtr.Unsafe
import Foreign.Storable hiding (peek)
import qualified Foreign.Storable as Foreign
import Foreign.Ptr
import GHC.Int
import Data.VectorBuilder.Storable (VectorBuilder)
import qualified Data.VectorBuilder.Storable as VectorBuilder
import Text.Xml.Tiny.Internal as Slice
import Text.Xml.Tiny.Internal.Checks
import Config
import qualified GHC.Stack
import Text.Printf
type MonadParse m = MonadState ParseState m
type ParseState = Slice
data Env s =
Env { source :: !ByteString
, ptr :: !(Ptr Word8)
, attributeBuffer :: !(VectorBuilder s AttributeParseDetails)
, nodeBuffer :: !(VectorBuilder s ParseDetails)
}
newtype ParseMonad s a = PM {unPM :: Env s -> Slice -> ST s (a,Slice)}
liftST :: Config => ST s a -> ParseMonad s a
liftST action = PM $ \_ slice -> (,slice) <$> action
unsafeLiftIO :: Config => IO a -> ParseMonad s a
unsafeLiftIO action = liftST (unsafeIOToST action)
runPM :: Config => ByteString -> (forall s. ParseMonad s a) -> a
runPM bs@(PS fptr (fromIntegral -> o) (fromIntegral -> l)) pm = runST $ do
attributes <- VectorBuilder.new 1000
nodes <- VectorBuilder.new 500
let ptr = unsafeForeignPtrToPtr fptr
(res,_) <- unPM pm (Env bs ptr attributes nodes) (Slice o l)
return res
readStr :: Config => Slice -> ParseMonad s ByteString
readStr str = do
Env{source = PS fptr _ _} <- getEnv
return $! fromIndexPtr str fptr
getBS :: Config => ParseMonad s ByteString
getBS = get >>= readStr
unsafeIO :: Config => String -> IO a -> ParseMonad s a
unsafeIO msg action = unsafeLiftIO $ do
#ifdef TRACE_UNSAFE
res <- trace ("About to start unsafe IO: " ++ msg) $ action
trace ("Completed unsafe IO: " ++ msg) $ return res
#else
action
#endif
withCursor k = do
checkCursor
Env{ptr=p} <- getEnv
Slice o l <- get
k p o l
peek :: Config => (Char -> a) -> ParseMonad s a
peek = peekAt 0
peekAt :: Config => Int32 -> (Char -> a) -> ParseMonad s a
peekAt i pred = withCursor $ \p o l ->
if (l < i) then throw UnexpectedEndOfStream else peekAtUnsafePO i pred p o
peekAtUnsafePO i pred p o = (pred.w2c) <$> unsafeIO "BS" (peekByteOff p (fromIntegral $ i + o))
bsIndex2 :: Config => Int32 -> Int32 -> (Char -> Char -> a) -> ParseMonad s a
bsIndex2 i j pred = withCursor $ \p o l ->
if (l<i || l<j) then throw UnexpectedEndOfStream else do
checkCursor
byteI <- peekAtUnsafePO i id p o
byteJ <- peekAtUnsafePO j id p o
return $ pred byteI byteJ
bsIsPrefix :: Config => ByteString -> ParseMonad s Bool
bsIsPrefix (PS r o' l') = withCursor $ \p o _ -> do
resp <-
unsafeIO "memcmp" $ withForeignPtr r $ \p' ->
memcmp (p' `plusPtr` o') (p `plusPtr` fromIntegral o) (fromIntegral l')
let res = resp == 0
return res
bsElemIndex :: Char -> ParseMonad s (Maybe Int)
bsElemIndex c = withCursor $ \p o l -> do
let !p' = p `plusPtr` fromIntegral o
!q <- unsafeIO "memchr" $ memchr p' (c2w c) (fromIntegral l)
let !res = if q == nullPtr then Nothing else Just $! q `minusPtr` p'
return res
bsDropWhile :: (Char -> Bool) -> ParseMonad s ()
bsDropWhile f = withCursor $ \p o l -> do
!n <- unsafeIO "BS" $ findIndexOrEnd (not.f.w2c) (p `plusPtr` fromIntegral o) l
put $ Slice (o+n) (ln)
bsSpan :: (Char -> Bool) -> ParseMonad s Slice
bsSpan f = withCursor $ \ p o l -> do
!n <- unsafeIO "BS" $ findIndexOrEnd (not.f.w2c) (p `plusPtr` fromIntegral o) l
put $ Slice (o+n) (ln)
return $! Slice o n
findIndexOrEnd k p l = go p 0 where
go !ptr !n | n >= l = return l
| otherwise = do !w <- Foreign.peek ptr
if k w
then return n
else go (ptr `plusPtr` 1) (n+1)
loc :: ParseMonad s SrcLoc
loc = get >>= \ (Slice o _) -> return $ SrcLoc $ fromIntegral o
throw :: HasCallStack => ErrorType -> a
#if __GLASGOW_HASKELL__ < 800
throw e = CE.throw $ Error e ?callStack
#else
throw e = CE.throw $ Error e GHC.Stack.callStack
#endif
throwLoc :: Config => (SrcLoc -> ErrorType) -> ParseMonad s a
throwLoc e = loc >>= throw . e
getEnv :: ParseMonad s (Env s)
getEnv = PM $ \e s -> return (e,s)
pushNode :: Config => ParseDetails -> ParseMonad s Int32
pushNode node = do
Env{nodeBuffer} <- getEnv
VectorBuilder.push nodeBuffer node
popNodes :: Config => Int32 -> ParseMonad s ()
popNodes n = do
Env{nodeBuffer} <- getEnv
VectorBuilder.pop nodeBuffer n
peekNode :: Config => Int32 -> ParseMonad s ParseDetails
peekNode n = do
Env{nodeBuffer} <- getEnv
VectorBuilder.peek nodeBuffer n
insertNode :: Config => ParseDetails -> ParseMonad s ()
insertNode node = do
Env{nodeBuffer} <- getEnv
VectorBuilder.insert nodeBuffer node
insertAttribute :: AttributeParseDetails -> ParseMonad s ()
insertAttribute att = do
Env{attributeBuffer} <- getEnv
VectorBuilder.insert attributeBuffer att
getNodeBufferCount, getAttributeBufferCount, getNodeStackCount :: Config => ParseMonad s Int32
getNodeBufferCount = do
Env{nodeBuffer} <- getEnv
VectorBuilder.getCount nodeBuffer
getAttributeBufferCount = do
Env{attributeBuffer} <- getEnv
VectorBuilder.getCount attributeBuffer
getNodeStackCount = do
Env{nodeBuffer} <- getEnv
VectorBuilder.getStackCount nodeBuffer
updateNode :: Int32 -> (ParseDetails -> ParseDetails) -> ParseMonad s ()
updateNode i f = do
Env{nodeBuffer} <- getEnv
VectorBuilder.updateStackElt nodeBuffer i f
instance Functor (ParseMonad s) where
fmap = liftM
instance Applicative (ParseMonad s) where
pure x = PM (\_ s -> return (x,s))
(<*>) = ap
instance Monad (ParseMonad s) where
PM m >>= k = PM (\e s -> m e s >>= \(a,s') -> unPM (k a) e s')
instance MonadState ParseState (ParseMonad s) where
get = PM $ \_ s -> return (s,s)
put s = PM $ \_ _ -> return ((), s)
instance PrimMonad (ParseMonad s) where
type PrimState(ParseMonad s) = s
primitive act = PM (\_ s -> (,s) <$> ST act )
checkCursor
| doCursorChecks = do
Env{source = PS _ o0 l0} <- getEnv
Slice o l <- get
return $ checkBSaccess o l o0 l0
| otherwise = return ()
checkConsistency (fromIntegral -> outerStart) (fromIntegral -> innerClose) origStr seenStr
| doStackChecks = do
nameBS_original <- readStr origStr
seenBS <- readStr seenStr
unless (seenBS == nameBS_original) $ do
xml <- readStr(Slice outerStart (min 100 $ innerClose + fromIntegral( Slice.length origStr) + 4 outerStart))
total <- getNodeStackCount
let go n
| n >= total = return []
| otherwise = do
node <- peekNode n
nBS <- readStr (name node)
(BS.unpack nBS :) <$> go (n+1)
stackNames <- go 0
error $ printf "Inconsistency detected in the node stack while parsing %s...\n Expected %s(%s), but obtained %s(%s) (and the stack contains %s)"
(BS.unpack xml) (BS.unpack nameBS_original) (show origStr) (BS.unpack seenBS) (show seenStr)(show stackNames)
| otherwise =
return ()