{-# LANGUAGE BangPatterns #-} module Database.VCache.VGet ( VGet -- * Prim Readers , getVRef, getPVar , getWord8 , getWord16le, getWord16be , getWord32le, getWord32be , getWord64le, getWord64be , getStorable , getVarNat, getVarInt , getByteString, getByteStringLazy , getc -- * Parser Combinators , isolate , label , lookAhead, lookAheadM, lookAheadE , isEmpty ) where import Control.Applicative import Data.Bits import Data.Char import Data.Word import Foreign.Ptr import Foreign.Storable (Storable(..)) import Foreign.Marshal.Alloc (mallocBytes,finalizerFree) import Foreign.Marshal.Utils (copyBytes) import Foreign.ForeignPtr (newForeignPtr) import qualified Data.List as L import qualified Data.ByteString as BS import qualified Data.ByteString.Internal as BSI import qualified Data.ByteString.Lazy as LBS import Database.VCache.Types import Database.VCache.Aligned import Database.VCache.Alloc import Database.VCache.VGetAux -- | isolate a parser to a subset of bytes and value references. The -- child parser must process its entire input (all bytes and values) -- or will fail. If there is not enough available input to isolate, -- this operation will fail. -- -- isolate nBytes nVRefs operation -- isolate :: Int -> Int -> VGet a -> VGet a isolate nBytes nRefs op = VGet $ \ s -> let pF = vget_target s `plusPtr` nBytes in if (pF > vget_limit s) then return (VGetE "isolate: not enough data") else case takeExact nRefs (vget_children s) of Nothing -> return (VGetE "isolate: not enough children") Just (cs,cs') -> let s_isolated = s { vget_children = cs , vget_limit = pF } in let s_postIsolate = s { vget_children = cs' , vget_target = pF } in _vget op s_isolated >>= \ r_isolated -> case r_isolated of VGetE emsg -> return (VGetE emsg) VGetR r s' -> let bDone = vgetStateEmpty s' in if bDone then return (VGetR r s_postIsolate) else return (VGetE "isolate: did not parse all input") -- take exactly the requested amount from a list, or return Nothing. takeExact :: Int -> [a] -> Maybe ([a],[a]) takeExact = takeExact' [] {-# INLINE takeExact #-} takeExact' :: [a] -> Int -> [a] -> Maybe ([a],[a]) takeExact' l 0 r = Just (L.reverse l, r) takeExact' l n (r:rs) = takeExact' (r:l) (n-1) rs takeExact' _ _ _ = Nothing -- | Load a VRef, just the reference rather than the content. User must -- know the type of the value, since getVRef is essentially a typecast. -- VRef content is not read until deref. -- -- All instances of a VRef with the same type and address will share the -- same cache. getVRef :: (VCacheable a) => VGet (VRef a) getVRef = VGet $ \ s -> case (vget_children s) of (c:cs) | isVRefAddr c -> do let s' = s { vget_children = cs } vref <- addr2vref (vget_space s) c return (VGetR vref s') _ -> return (VGetE "getVRef") {-# INLINABLE getVRef #-} -- | Load a PVar, just the variable. Content is loaded lazily on first -- read, then kept in memory until the PVar is GC'd. Unlike other Haskell -- variables, PVars can be serialized to the VCache address space. All -- PVars for a specific address are collapsed, using the same TVar. -- -- Developers must know the type of the PVar, since getPVar will cast to -- any cacheable type. A runtime error is raised only if you attempt to -- load the same PVar address with two different types. -- getPVar :: (VCacheable a) => VGet (PVar a) getPVar = VGet $ \ s -> case (vget_children s) of (c:cs) | isPVarAddr c -> do let s' = s { vget_children = cs } pvar <- addr2pvar (vget_space s) c return (VGetR pvar s') _ -> return (VGetE "getPVar") {-# INLINABLE getPVar #-} -- | Read words of size 16, 32, or 64 in little-endian or big-endian. getWord16le, getWord16be :: VGet Word16 getWord32le, getWord32be :: VGet Word32 getWord64le, getWord64be :: VGet Word64 getWord16le = consuming 2 $ VGet $ \ s -> do let p = vget_target s b0 <- peekByte p b1 <- peekByte (p `plusPtr` 1) let r = (fromIntegral b1 `shiftL` 8) .|. (fromIntegral b0 ) let s' = s { vget_target = p `plusPtr` 2 } return (VGetR r s') {-# INLINE getWord16le #-} getWord32le = consuming 4 $ VGet $ \ s -> do let p = vget_target s b0 <- peekByte p b1 <- peekByte (p `plusPtr` 1) b2 <- peekByte (p `plusPtr` 2) b3 <- peekByte (p `plusPtr` 3) let r = (fromIntegral b3 `shiftL` 24) .|. (fromIntegral b2 `shiftL` 16) .|. (fromIntegral b1 `shiftL` 8) .|. (fromIntegral b0 ) let s' = s { vget_target = p `plusPtr` 4 } return (VGetR r s') {-# INLINE getWord32le #-} getWord64le = consuming 8 $ VGet $ \ s -> do let p = vget_target s b0 <- peekByte p b1 <- peekByte (p `plusPtr` 1) b2 <- peekByte (p `plusPtr` 2) b3 <- peekByte (p `plusPtr` 3) b4 <- peekByte (p `plusPtr` 4) b5 <- peekByte (p `plusPtr` 5) b6 <- peekByte (p `plusPtr` 6) b7 <- peekByte (p `plusPtr` 7) let r = (fromIntegral b7 `shiftL` 56) .|. (fromIntegral b6 `shiftL` 48) .|. (fromIntegral b5 `shiftL` 40) .|. (fromIntegral b4 `shiftL` 32) .|. (fromIntegral b3 `shiftL` 24) .|. (fromIntegral b2 `shiftL` 16) .|. (fromIntegral b1 `shiftL` 8) .|. (fromIntegral b0 ) let s' = s { vget_target = p `plusPtr` 8 } return (VGetR r s') {-# INLINE getWord64le #-} getWord16be = consuming 2 $ VGet $ \ s -> do let p = vget_target s b0 <- peekByte p b1 <- peekByte (p `plusPtr` 1) let r = (fromIntegral b0 `shiftL` 8) .|. (fromIntegral b1 ) let s' = s { vget_target = p `plusPtr` 2 } return (VGetR r s') {-# INLINE getWord16be #-} getWord32be = consuming 4 $ VGet $ \ s -> do let p = vget_target s b0 <- peekByte p b1 <- peekByte (p `plusPtr` 1) b2 <- peekByte (p `plusPtr` 2) b3 <- peekByte (p `plusPtr` 3) let r = (fromIntegral b0 `shiftL` 24) .|. (fromIntegral b1 `shiftL` 16) .|. (fromIntegral b2 `shiftL` 8) .|. (fromIntegral b3 ) let s' = s { vget_target = p `plusPtr` 4 } return (VGetR r s') {-# INLINE getWord32be #-} getWord64be = consuming 8 $ VGet $ \ s -> do let p = vget_target s b0 <- peekByte p b1 <- peekByte (p `plusPtr` 1) b2 <- peekByte (p `plusPtr` 2) b3 <- peekByte (p `plusPtr` 3) b4 <- peekByte (p `plusPtr` 4) b5 <- peekByte (p `plusPtr` 5) b6 <- peekByte (p `plusPtr` 6) b7 <- peekByte (p `plusPtr` 7) let r = (fromIntegral b0 `shiftL` 56) .|. (fromIntegral b1 `shiftL` 48) .|. (fromIntegral b2 `shiftL` 40) .|. (fromIntegral b3 `shiftL` 32) .|. (fromIntegral b4 `shiftL` 24) .|. (fromIntegral b5 `shiftL` 16) .|. (fromIntegral b6 `shiftL` 8) .|. (fromIntegral b7 ) let s' = s { vget_target = p `plusPtr` 8 } return (VGetR r s') {-# INLINE getWord64be #-} -- | Read a Storable value. In this case, the content should be -- bytes only, since pointers aren't really meaningful when persisted. -- Data is copied to an intermediate structure via alloca to avoid -- alignment issues. getStorable :: (Storable a) => VGet a getStorable = _getStorable undefined {-# INLINE getStorable #-} _getStorable :: (Storable a) => a -> VGet a _getStorable _dummy = let n = sizeOf _dummy in consuming n $ VGet $ \ s -> do let pTgt = vget_target s let s' = s { vget_target = pTgt `plusPtr` n } a <- peekAligned (castPtr pTgt) return (VGetR a s') {-# INLINE _getStorable #-} -- | Load a number of bytes from the underlying object. A copy is -- performed in this case (typically no copy is performed by VGet, -- but the underlying pointer is ephemeral, becoming invalid after -- the current read transaction). Fails if not enough data. O(N) getByteString :: Int -> VGet BS.ByteString getByteString n | (n > 0) = _getByteString n | otherwise = return (BS.empty) {-# INLINE getByteString #-} _getByteString :: Int -> VGet BS.ByteString _getByteString n = consuming n $ VGet $ \ s -> do let pSrc = vget_target s pDst <- mallocBytes n copyBytes pDst pSrc n fp <- newForeignPtr finalizerFree pDst let r = BSI.fromForeignPtr fp 0 n let s' = s { vget_target = (pSrc `plusPtr` n) } return (VGetR r s') -- | Get a lazy bytestring. (Simple wrapper on strict bytestring.) getByteStringLazy :: Int -> VGet LBS.ByteString getByteStringLazy n = LBS.fromStrict <$> getByteString n {-# INLINE getByteStringLazy #-} -- | Get a character from UTF-8 format. Assumes a valid encoding. -- (In case of invalid encoding, arbitrary characters may be returned.) getc :: VGet Char getc = _c0 >>= \ b0 -> if (b0 < 0x80) then return $! chr b0 else if (b0 < 0xe0) then _getc2 (b0 `xor` 0xc0) else if (b0 < 0xf0) then _getc3 (b0 `xor` 0xe0) else _getc4 (b0 `xor` 0xf0) -- get UTF-8 of size 2,3,4 bytes _getc2, _getc3, _getc4 :: Int -> VGet Char _getc2 b0 = _cc >>= \ b1 -> return $! chr ((b0 `shiftL` 6) .|. b1) _getc3 b0 = _cc >>= \ b1 -> _cc >>= \ b2 -> return $! chr ((b0 `shiftL` 12) .|. (b1 `shiftL` 6) .|. b2) _getc4 b0 = _cc >>= \ b1 -> _cc >>= \ b2 -> _cc >>= \ b3 -> return $! chr ((b0 `shiftL` 18) .|. (b1 `shiftL` 12) .|. (b2 `shiftL` 6) .|. b3) _c0,_cc :: VGet Int _c0 = fromIntegral <$> getWord8 _cc = (fromIntegral . xor 0x80) <$> getWord8 {-# INLINE _c0 #-} {-# INLINE _cc #-} -- | label will modify the error message returned from the -- argument operation; it can help contextualize parse errors. label :: ShowS -> VGet a -> VGet a label sf op = VGet $ \ s -> _vget op s >>= \ r -> return $ case r of VGetE emsg -> VGetE (sf emsg) ok@(VGetR _ _) -> ok -- | lookAhead will parse a value, but not consume any input. lookAhead :: VGet a -> VGet a lookAhead op = VGet $ \ s -> _vget op s >>= \ result -> return $ case result of VGetR r _ -> VGetR r s other -> other -- | lookAheadM will consume input only if it returns `Just a`. lookAheadM :: VGet (Maybe a) -> VGet (Maybe a) lookAheadM op = VGet $ \ s -> _vget op s >>= \ result -> return $ case result of VGetR Nothing _ -> VGetR Nothing s other -> other -- | lookAheadE will consume input only if it returns `Right b`. lookAheadE :: VGet (Either a b) -> VGet (Either a b) lookAheadE op = VGet $ \ s -> _vget op s >>= \ result -> return $ case result of VGetR l@(Left _) _ -> VGetR l s other -> other