{-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fglasgow-exts #-} -- for unboxed shifts ----------------------------------------------------------------------------- -- | -- Module : Data.Binary.Strict.IncrementalGet -- Copyright : Lennart Kolmodin -- License : BSD3-style (see LICENSE) -- -- Maintainer : Adam Langley -- Stability : experimental -- Portability : portable to Hugs and GHC. -- -- This is a version of the Get monad for incremental parsing. The parser is -- written as if a single, huge, strict ByteString was to be parsed. It -- produces results as it parses by calling yield. -- -- However, if the parser runs out of data, rather than failing the caller sees -- a Partial result, which includes the list of yielded values so far and a -- continuation. By calling the continuation with more data, the parser -- continues, none the wiser. -- -- Take the following example -- -- > testParse = do -- > getWord16be >>= yield -- > testParse -- > -- > test = runGet testParse $ B.pack [1,0,0] -- -- Here, @testParse@ never completes, but yields Word16 values forever. It's -- started with a 3 byte ByteString and will yield a single value before -- running out of data. Thus, @test = Partial cont [256]@. Calling @cont@ -- with a single extra byte will yield another Word16 value etc. -- -- The lookahead functions have been removed from this parser because of their -- incompatibility with the incremental monad at the moment. ----------------------------------------------------------------------------- #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) #include "MachDeps.h" #endif module Data.Binary.Strict.IncrementalGet ( -- * The Get type Get , Result(..) , runGet -- * Utility , skip , yield , bytesRead , remaining , isEmpty -- * Parsing particular types , getWord8 -- ** ByteStrings , getByteString -- ** Big-endian reads , getWord16be , getWord32be , getWord64be -- ** Little-endian reads , getWord16le , getWord32le , getWord64le -- ** Host-endian, unaligned reads , getWordhost , getWord16host , getWord32host , getWord64host ) where import qualified Data.ByteString as B import qualified Data.ByteString.Internal as B import Foreign #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) import GHC.Base import GHC.Word #endif #ifndef __HADDOCK__ -- | The parse state data S r = S {-# UNPACK #-} !B.ByteString -- ^ input {-# UNPACK #-} !Int -- ^ bytes read [r] -- ^ results so far, in reverse order #endif -- | The result of a partial parse data Result a = Failed String -- ^ the parse failed with the given error message | Finished B.ByteString [a] -- ^ the parse finished and produced the given list of -- results doing so. Any unparsed data is returned. | Partial (B.ByteString -> Result a) [a] -- ^ the parse ran out of data before finishing, but produced -- the given list of results before doing so. To continue the -- parse pass more data to the given continuation instance (Show a) => Show (Result a) where show (Failed err) = "Failed " ++ err show (Finished _ rs) = "Finished " ++ show rs show (Partial _ rs) = "Partial " ++ show rs newtype Get r a = Get { unGet :: S r -> (a -> S r -> Result r) -> Result r } instance Functor (Get r) where fmap f m = Get (\s -> \cont -> unGet m s (cont . f)) instance Monad (Get r) where return a = Get (\s -> \k -> k a s) m >>= k = Get (\s -> \cont -> unGet m s (\a -> \s' -> unGet (k a) s' cont)) fail err = Get (const $ const $ Failed err) get :: Get r (S r) get = Get (\s -> \k -> k s s) -- | Return a value from the parse yield :: r -> Get r () yield v = Get (\(S a b c) -> \cont -> cont () (S a b (v : c))) initState :: B.ByteString -> S r initState input = S input 0 [] {-# INLINE initState #-} -- | Start a parser and return the first Result. runGet :: Get r a -> B.ByteString -> Result r runGet m input = unGet m (initState input) (const $ \(S s _ rs) -> Finished s $ reverse rs) -- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available. skip :: Int -> Get r () skip n = readN (fromIntegral n) (const ()) -- | Get the total number of bytes read to this point. bytesRead :: Get r Int bytesRead = do S _ b _ <- get return b -- | Get the number of remaining unparsed bytes. -- Useful for checking whether all input has been consumed. remaining :: Get r Int remaining = do S s _ _ <- get return (fromIntegral (B.length s)) -- | Test whether all input has been consumed, -- i.e. there are no remaining unparsed bytes. isEmpty :: Get r Bool isEmpty = do S s _ _ <- get return $ B.null s ------------------------------------------------------------------------ -- Utility with ByteStrings -- | An efficient 'get' method for strict ByteStrings. Fails if fewer -- than @n@ bytes are left in the input. getByteString :: Int -> Get r B.ByteString getByteString n = readN n id {-# INLINE getByteString #-} -- | Pull @n@ bytes from the input, as a strict ByteString. getBytes :: Int -> Get r B.ByteString getBytes n = Get $ \(S s offset values) -> \cont -> if n <= B.length s then let (consume, rest) = B.splitAt n s in cont consume $ S rest (offset + fromIntegral n) values else Partial (\s' -> unGet (getBytes n) (S (B.append s s') offset []) cont) $ reverse values {-# INLINE getBytes #-} -- Pull n bytes from the input, and apply a parser to those bytes, -- yielding a value. If less than @n@ bytes are available, fail with an -- error. This wraps @getBytes@. readN :: Int -> (B.ByteString -> a) -> Get r a readN n f = fmap f $ getBytes n {-# INLINE readN #-} getPtr :: Storable a => Int -> Get r a getPtr n = do (fp, o, _) <- readN n B.toForeignPtr return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o) {-# INLINE getPtr #-} getWord8 :: Get r Word8 getWord8 = getPtr (sizeOf (undefined :: Word8)) {-# INLINE getWord8 #-} -- | Read a Word16 in big endian format getWord16be :: Get r Word16 getWord16be = do s <- readN 2 id return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|. (fromIntegral (s `B.index` 1)) -- | Read a Word16 in little endian format getWord16le :: Get r Word16 getWord16le = do s <- readN 2 id return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|. (fromIntegral (s `B.index` 0) ) {-# INLINE getWord16le #-} -- | Read a Word32 in big endian format getWord32be :: Get r Word32 getWord32be = do s <- readN 4 id return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|. (fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|. (fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|. (fromIntegral (s `B.index` 3) ) {-# INLINE getWord32be #-} -- | Read a Word32 in little endian format getWord32le :: Get r Word32 getWord32le = do s <- readN 4 id return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|. (fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|. (fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|. (fromIntegral (s `B.index` 0) ) {-# INLINE getWord32le #-} -- | Read a Word64 in big endian format getWord64be :: Get r Word64 getWord64be = do s <- readN 8 id return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|. (fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|. (fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|. (fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|. (fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|. (fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|. (fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|. (fromIntegral (s `B.index` 7) ) {-# INLINE getWord64be #-} -- | Read a Word64 in little endian format getWord64le :: Get r Word64 getWord64le = do s <- readN 8 id return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|. (fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|. (fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|. (fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|. (fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|. (fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|. (fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|. (fromIntegral (s `B.index` 0) ) ------------------------------------------------------------------------ -- Host-endian reads -- | /O(1)./ Read a single native machine word. The word is read in -- host order, host endian form, for the machine you're on. On a 64 bit -- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes. getWordhost :: Get r Word getWordhost = getPtr (sizeOf (undefined :: Word)) {-# INLINE getWordhost #-} -- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness. getWord16host :: Get r Word16 getWord16host = getPtr (sizeOf (undefined :: Word16)) {-# INLINE getWord16host #-} -- | /O(1)./ Read a Word32 in native host order and host endianness. getWord32host :: Get r Word32 getWord32host = getPtr (sizeOf (undefined :: Word32)) {-# INLINE getWord32host #-} -- | /O(1)./ Read a Word64 in native host order and host endianess. getWord64host :: Get r Word64 getWord64host = getPtr (sizeOf (undefined :: Word64)) {-# INLINE getWord64host #-} {-# INLINE getWord64le #-} shiftl_w16 :: Word16 -> Int -> Word16 shiftl_w32 :: Word32 -> Int -> Word32 shiftl_w64 :: Word64 -> Int -> Word64 #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i) shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i) #if WORD_SIZE_IN_BITS < 64 shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i) #else shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i) #endif #else shiftl_w16 = shiftL shiftl_w32 = shiftL shiftl_w64 = shiftL #endif