{-# LANGUAGE RankNTypes, MagicHash, BangPatterns, CPP #-} ----------------------------------------------------------------------------- -- | -- Module : Data.Binary.Bits.Get -- Copyright : (c) Lennart Kolmodin 2010-2011 -- License : BSD3-style (see LICENSE) -- -- Maintainer : kolmodin@gmail.com -- Stability : experimental -- Portability : portable (should run where the package binary runs) -- -- Parse bits easily. Parsing can be done either in a monadic style, or more -- efficiently, using the 'Applicative' style. -- -- For the monadic style, write your parser as a 'BitGet' monad using the -- -- * 'getBool' -- -- * 'getWord8' -- -- * 'getWord16be' -- -- * 'getWord32be' -- -- * 'getWord64be' -- -- * 'getByteString' -- -- functions and run it with 'runBitGet'. -- -- For the applicative style, compose the fuctions -- -- * 'bool' -- -- * 'word8' -- -- * 'word16be' -- -- * 'word32be' -- -- * 'word64be' -- -- * 'byteString' -- -- to make a 'Block'. -- Use 'block' to turn it into the 'BitGet' monad to be able to run it with -- 'runBitGet'. ----------------------------------------------------------------------------- module Data.Binary.Bits.Get ( -- * BitGet monad -- $bitget BitGet , runBitGet -- ** Get bytes , getBool , getWord8 , getWord16be , getWord32be , getWord64be -- * Blocks -- $blocks , Block , block -- ** Read in Blocks , bool , word8 , word16be , word32be , word64be , byteString , Data.Binary.Bits.Get.getByteString , Data.Binary.Bits.Get.getLazyByteString , Data.Binary.Bits.Get.isEmpty ) where import Data.Binary.Get as B ( runGet, Get, getByteString, getLazyByteString, isEmpty ) import Data.Binary.Get.Internal as B ( get, put, ensureN ) import Data.ByteString as B import qualified Data.ByteString.Lazy as L import Data.ByteString.Unsafe import Data.Bits import Data.Word import Control.Applicative import Prelude as P #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) import GHC.Base import GHC.Word #endif -- $bitget -- Parse bits using a monad. -- -- @ --myBitParser :: 'Get' ('Word8', 'Word8') --myBitParser = 'runGetBit' parse4by4 -- --parse4by4 :: 'BitGet' ('Word8', 'Word8') --parse4by4 = do -- bits <- 'getWord8' 4 -- more <- 'getWord8' 4 -- return (bits,more) -- @ -- $blocks -- Parse more efficiently in blocks. Each block is read with only one boundry -- check (checking that there is enough input) as the size of the block can be -- calculated statically. This is somewhat limiting as you cannot make the -- parsing depend on the input being parsed. -- -- @ --data IPV6Header = IPV6Header { -- ipv6Version :: 'Word8' -- , ipv6TrafficClass :: 'Word8' -- , ipv6FlowLabel :: 'Word32 -- , ipv6PayloadLength :: 'Word16' -- , ipv6NextHeader :: 'Word8' -- , ipv6HopLimit :: 'Word8' -- , ipv6SourceAddress :: 'ByteString' -- , ipv6DestinationAddress :: 'ByteString' -- } -- -- ipv6headerblock = -- IPV6Header '<$>' 'word8' 4 -- '<*>' 'word8' 8 -- '<*>' 'word32be' 24 -- '<*>' 'word16be' 16 -- '<*>' 'word8' 8 -- '<*>' 'word8' 8 -- '<*>' 'byteString' 16 -- '<*>' 'byteString' 16 -- --ipv6Header :: 'Get' IPV6Header --ipv6Header = 'runBitGet' ('block' ipv6headerblock) -- @ data S = S {-# UNPACK #-} !ByteString -- Input {-# UNPACK #-} !Int -- Bit offset (0-7) deriving (Show) -- | A block that will be read with only one boundry check. Needs to know the -- number of bits in advance. data Block a = Block Int (S -> a) instance Functor Block where fmap f (Block i p) = Block i (\s -> f (p s)) instance Applicative Block where pure a = Block 0 (\_ -> a) (Block i p) <*> (Block j q) = Block (i+j) (\s -> p s $ q (incS i s)) (Block i _) *> (Block j q) = Block (i+j) (q . incS i) (Block i p) <* (Block j _) = Block (i+j) p -- | Get a block. Will be read with one single boundry check, and -- therefore requires a statically known number of bits. -- Build blocks using 'bool', 'word8', 'word16be', 'word32be', 'word64be', -- 'byteString' and 'Applicative'. block :: Block a -> BitGet a block (Block i p) = do ensureBits i s <- getState putState $! (incS i s) return $! p s incS :: Int -> S -> S incS o (S bs n) = let !o' = (n+o) !d = o' `shiftR` 3 !n' = o' .&. make_mask 3 in S (unsafeDrop d bs) n' -- | make_mask 3 = 00000111 make_mask :: (Bits a, Num a) => Int -> a make_mask n = (1 `shiftL` fromIntegral n) - 1 {-# SPECIALIZE make_mask :: Int -> Int #-} {-# SPECIALIZE make_mask :: Int -> Word #-} {-# SPECIALIZE make_mask :: Int -> Word8 #-} {-# SPECIALIZE make_mask :: Int -> Word16 #-} {-# SPECIALIZE make_mask :: Int -> Word32 #-} {-# SPECIALIZE make_mask :: Int -> Word64 #-} bit_offset :: Int -> Int bit_offset n = make_mask 3 .&. n byte_offset :: Int -> Int byte_offset n = n `shiftR` 3 readBool :: S -> Bool readBool (S bs n) = testBit (unsafeHead bs) (7-n) {-# INLINE readWord8 #-} readWord8 :: Int -> S -> Word8 readWord8 n (S bs o) -- no bits at all, return 0 | n == 0 = 0 -- all bits are in the same byte -- we just need to shift and mask them right | n <= 8 - o = let w = unsafeHead bs m = make_mask n w' = (w `shiftr_w8` (8 - o - n)) .&. m in w' -- the bits are in two different bytes -- make a word16 using both bytes, and then shift and mask | n <= 8 = let w = (fromIntegral (unsafeHead bs) `shiftl_w16` 8) .|. (fromIntegral (unsafeIndex bs 1)) m = make_mask n w' = (w `shiftr_w16` (16 - o - n)) .&. m in fromIntegral w' {-# INLINE readWord16be #-} readWord16be :: Int -> S -> Word16 readWord16be n s@(S bs o) -- 8 or fewer bits, use readWord8 | n <= 8 = fromIntegral (readWord8 n s) -- handle 9 or more bits, stored in two bytes -- no offset, plain and simple 16 bytes | o == 0 && n == 16 = let msb = fromIntegral (unsafeHead bs) lsb = fromIntegral (unsafeIndex bs 1) w = (msb `shiftl_w16` 8) .|. lsb in w -- no offset, but not full 16 bytes | o == 0 = let msb = fromIntegral (unsafeHead bs) lsb = fromIntegral (unsafeIndex bs 1) w = (msb `shiftl_w16` (n-8)) .|. (lsb `shiftr_w16` (16-n)) in w -- with offset, and n=9-16 | n <= 16 = readWithOffset s shiftl_w16 shiftr_w16 n | otherwise = error "readWord16be: tried to read more than 16 bits" {-# INLINE readWord32be #-} readWord32be :: Int -> S -> Word32 readWord32be n s@(S _ o) -- 8 or fewer bits, use readWord8 | n <= 8 = fromIntegral (readWord8 n s) -- 16 or fewer bits, use readWord16be | n <= 16 = fromIntegral (readWord16be n s) | o == 0 = readWithoutOffset s shiftl_w32 shiftr_w32 n | n <= 32 = readWithOffset s shiftl_w32 shiftr_w32 n | otherwise = error "readWord32be: tried to read more than 32 bits" {-# INLINE readWord64be #-} readWord64be :: Int -> S -> Word64 readWord64be n s@(S _ o) -- 8 or fewer bits, use readWord8 | n <= 8 = fromIntegral (readWord8 n s) -- 16 or fewer bits, use readWord16be | n <= 16 = fromIntegral (readWord16be n s) | o == 0 = readWithoutOffset s shiftl_w64 shiftr_w64 n | n <= 64 = readWithOffset s shiftl_w64 shiftr_w64 n | otherwise = error "readWord64be: tried to read more than 64 bits" readByteString :: Int -> S -> ByteString readByteString n s@(S bs o) -- no offset, easy. | o == 0 = unsafeTake n bs -- offset. ugg. this is really naive and slow. but also pretty easy :) | otherwise = B.pack (P.map (readWord8 8) (P.take n (iterate (incS 8) s))) readWithoutOffset :: (Bits a, Num a) => S -> (a -> Int -> a) -> (a -> Int -> a) -> Int -> a readWithoutOffset (S bs o) shifterL shifterR n | o /= 0 = error "readWithoutOffset: there is an offset" | bit_offset n == 0 && byte_offset n <= 4 = let segs = byte_offset n bn 0 = fromIntegral (unsafeHead bs) bn n = (bn (n-1) `shifterL` 8) .|. fromIntegral (unsafeIndex bs n) in bn (segs-1) | n <= 64 = let segs = byte_offset n o' = bit_offset (n - 8 + o) bn 0 = fromIntegral (unsafeHead bs) bn n = (bn (n-1) `shifterL` 8) .|. fromIntegral (unsafeIndex bs n) msegs = bn (segs-1) `shifterL` o' last = (fromIntegral (unsafeIndex bs segs)) `shifterR` (8 - o') w = msegs .|. last in w readWithOffset :: (Bits a, Num a) => S -> (a -> Int -> a) -> (a -> Int -> a) -> Int -> a readWithOffset (S bs o) shifterL shifterR n | n <= 64 = let bits_in_msb = 8 - o (n',top) = (n - bits_in_msb , (fromIntegral (unsafeHead bs) .&. make_mask bits_in_msb) `shifterL` n') segs = byte_offset n' bn 0 = 0 bn n = (bn (n-1) `shifterL` 8) .|. fromIntegral (unsafeIndex bs n) o' = bit_offset n' mseg = bn segs `shifterL` o' last | o' > 0 = (fromIntegral (unsafeIndex bs (segs + 1))) `shifterR` (8 - o') | otherwise = 0 w = top .|. mseg .|. last in w ------------------------------------------------------------------------ -- | 'BitGet' is a monad, applicative and a functor. See 'runBitGet' -- for how to run it. newtype BitGet a = B { runState :: S -> Get (S,a) } instance Monad BitGet where return x = B $ \s -> return (s,x) fail str = B $ \(S inp n) -> putBackState inp n >> fail str (B f) >>= g = B $ \s -> do (s',a) <- f s runState (g a) s' instance Functor BitGet where fmap f m = m >>= \a -> return (f a) instance Applicative BitGet where pure x = return x fm <*> m = fm >>= \f -> m >>= \v -> return (f v) -- | Run a 'BitGet' within the Binary packages 'Get' monad. If a byte has -- been partially consumed it will be discarded once 'runBitGet' is finished. runBitGet :: BitGet a -> Get a runBitGet bg = do s <- mkInitState ((S str' n),a) <- runState bg s putBackState str' n return a mkInitState :: Get S mkInitState = do str <- get put B.empty return (S str 0) putBackState :: B.ByteString -> Int -> Get () putBackState bs n = do remaining <- get put (B.drop (if n==0 then 0 else 1) bs `B.append` remaining) getState :: BitGet S getState = B $ \s -> return (s,s) putState :: S -> BitGet () putState s = B $ \_ -> return (s,()) -- | Make sure there are at least @n@ bits. ensureBits :: Int -> BitGet () ensureBits n = do (S bs o) <- getState if n <= (B.length bs * 8 - o) then return () else do let currentBits = B.length bs * 8 - o let byteCount = (n - currentBits + 7) `div` 8 B $ \_ -> do B.ensureN byteCount bs' <- B.get put B.empty return (S (bs`append`bs') o, ()) -- | Get 1 bit as a 'Bool'. getBool :: BitGet Bool getBool = block bool -- | Get @n@ bits as a 'Word8'. @n@ must be within @[0..8]@. getWord8 :: Int -> BitGet Word8 getWord8 n = block (word8 n) -- | Get @n@ bits as a 'Word16'. @n@ must be within @[0..16]@. getWord16be :: Int -> BitGet Word16 getWord16be n = block (word16be n) -- | Get @n@ bits as a 'Word32'. @n@ must be within @[0..32]@. getWord32be :: Int -> BitGet Word32 getWord32be n = block (word32be n) -- | Get @n@ bits as a 'Word64'. @n@ must be within @[0..64]@. getWord64be :: Int -> BitGet Word64 getWord64be n = block (word64be n) -- | Get @n@ bytes as a 'ByteString'. getByteString :: Int -> BitGet ByteString getByteString n = block (byteString n) -- | Get @n@ bytes as a lazy ByteString. getLazyByteString :: Int -> BitGet L.ByteString getLazyByteString n = do (S _ o) <- getState case o of 0 -> B $ \ (S bs o') -> do putBackState bs o' lbs <- B.getLazyByteString (fromIntegral n) return (S B.empty 0, lbs) _ -> L.fromChunks . (:[]) <$> Data.Binary.Bits.Get.getByteString n -- | Test whether all input has been consumed, i.e. there are no remaining -- undecoded bytes. isEmpty :: BitGet Bool isEmpty = B $ \ (S bs o) -> if B.null bs then B.isEmpty >>= \e -> return (S bs o, e) else return (S bs o, False) -- | Read a 1 bit 'Bool'. bool :: Block Bool bool = Block 1 readBool -- | Read @n@ bits as a 'Word8'. @n@ must be within @[0..8]@. word8 :: Int -> Block Word8 word8 n = Block n (readWord8 n) -- | Read @n@ bits as a 'Word16'. @n@ must be within @[0..16]@. word16be :: Int -> Block Word16 word16be n = Block n (readWord16be n) -- | Read @n@ bits as a 'Word32'. @n@ must be within @[0..32]@. word32be :: Int -> Block Word32 word32be n = Block n (readWord32be n) -- | Read @n@ bits as a 'Word64'. @n@ must be within @[0..64]@. word64be :: Int -> Block Word64 word64be n = Block n (readWord64be n) -- | Read @n@ bytes as a 'ByteString'. byteString :: Int -> Block ByteString byteString n | n > 0 = Block (n*8) (readByteString n) | otherwise = Block 0 (\_ -> B.empty) ------------------------------------------------------------------------ -- Unchecked shifts, from the package binary shiftl_w16 :: Word16 -> Int -> Word16 shiftl_w32 :: Word32 -> Int -> Word32 shiftl_w64 :: Word64 -> Int -> Word64 #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) shiftl_w8 (W8# w) (I# i) = W8# (w `uncheckedShiftL#` i) shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i) shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i) shiftr_w8 (W8# w) (I# i) = W8# (w `uncheckedShiftRL#` i) shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i) shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i) #if WORD_SIZE_IN_BITS < 64 shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i) shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i) #if __GLASGOW_HASKELL__ <= 606 -- Exported by GHC.Word in GHC 6.8 and higher foreign import ccall unsafe "stg_uncheckedShiftL64" uncheckedShiftL64# :: Word64# -> Int# -> Word64# foreign import ccall unsafe "stg_uncheckedShiftRL64" uncheckedShiftRL64# :: Word64# -> Int# -> Word64# #endif #else shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i) shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i) #endif #else shiftl_w8 = shiftL shiftl_w16 = shiftL shiftl_w32 = shiftL shiftl_w64 = shiftL shiftr_w8 = shiftR shiftr_w16 = shiftR shiftr_w32 = shiftR shiftr_w64 = shiftR #endif