{-# LANGUAGE OverloadedStrings #-}
module Codec.Compression.Snappy.Framing
(
Checksum
, Chunk (..)
, DecodeError
, encode
, encode'
, decode
, decode'
, decodeVerify
, decodeVerify'
, decodeM
, decodeVerifyM
, checksum
, streamIdentifier
, verify
)
where
import Data.ByteString (ByteString)
import Data.Binary (Binary(..))
import Data.Binary.Get
import Data.Binary.Put
import Data.Bits
import Data.Digest.CRC32C
import Data.Word
import qualified Data.Binary as Binary
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Codec.Compression.Snappy as Snappy
type Checksum = Word32
type DecodeError = (ByteOffset, String)
data Chunk = StreamIdentifier
| Compressed !Checksum !ByteString
| Uncompressed !Checksum !ByteString
| Skippable !Word8
| Unskippable !Word8
deriving (Eq, Show)
streamStart :: [Word8]
streamStart = [0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59]
maxUncompressed :: Int
maxUncompressed = 65536
minCompressible :: Int
minCompressible = 18
instance Binary Chunk where
put StreamIdentifier = mapM_ put streamStart
put (Compressed chk dat) = putWord8 0x00 >> putData chk dat
put (Uncompressed chk dat) = putWord8 0x01 >> putData chk dat
put (Skippable x) = put x
put (Unskippable x) = put x
get = do
chunktype <- getWord8
case chunktype of
0xff -> skip (length streamStart - 1) >> return StreamIdentifier
0x00 -> uncurry Compressed <$> getData
0x01 -> uncurry Uncompressed <$> getData
x | x >= 0x02 && x <= 0x7f -> return $ Unskippable x
| x >= 0x80 && x <= 0xfe -> return $ Skippable x
| otherwise -> error "junk chunk type"
getData :: Get (Checksum, ByteString)
getData = do
len <- getWord24le
chk <- getWord32le
dat <- getByteString . fromIntegral $ len - 4
return (chk, dat)
putData :: Checksum -> ByteString -> Put
putData chk dat = do
putWord24le (B.length dat + 4)
putWord32le chk
putByteString dat
getWord24le :: Get Word32
getWord24le = do
(a,b,c) <- (,,) <$> getWord8 <*> getWord8 <*> getWord8
return $ (fromIntegral a :: Word32)
.|. (fromIntegral b :: Word32) `shiftL` 8
.|. (fromIntegral c :: Word32) `shiftL` 16
putWord24le :: Int -> Put
putWord24le x = mapM_ putWord8 bytes
where
bytes = [ fromIntegral ((fromIntegral x :: Word32) `shiftR` 0) :: Word8
, fromIntegral ((fromIntegral x :: Word32) `shiftR` 8) :: Word8
, fromIntegral ((fromIntegral x :: Word32) `shiftR` 16) :: Word8
]
checksum :: ByteString -> Checksum
checksum a =
let chksum = crc32c a
masked = ((chksum `shiftR` 15) .|. (chksum `shiftL` 17)) + 0xa282ead8
in masked
verify :: Chunk -> Maybe Chunk
verify u@(Uncompressed chk d) = if chk == checksum d then Just u else Nothing
verify (Compressed chk d) = if ok then Just (Uncompressed chk d') else Nothing
where
d' = Snappy.decompress d
ok = chk == checksum d'
verify (Unskippable _) = Nothing
verify c = Just c
streamIdentifier :: BL.ByteString
streamIdentifier = Binary.encode StreamIdentifier
encode :: BL.ByteString -> (Chunk, Maybe BL.ByteString)
encode = go . split
where
go (x,xs) = (chunk $ BL.toStrict x, leftover' xs)
chunk c
| shouldCompress c = Compressed (checksum c) (Snappy.compress c)
| otherwise = Uncompressed (checksum c) c
split = BL.splitAt (fromIntegral maxUncompressed)
leftover' x
| BL.null x = Nothing
| otherwise = Just x
encode' :: ByteString -> (Chunk, Maybe ByteString)
encode' = go . split
where
go (x,xs) = (chunk x, leftover xs)
chunk c
| shouldCompress c = Compressed (checksum c) (Snappy.compress c)
| otherwise = Uncompressed (checksum c) c
split = B.splitAt maxUncompressed
decode :: BL.ByteString -> (Either DecodeError Chunk, Maybe ByteString)
decode = dec . feed
decodeVerify :: BL.ByteString -> (Either DecodeError Chunk, Maybe ByteString)
decodeVerify = decV . feed
decode' :: ByteString -> (Either DecodeError Chunk, Maybe ByteString)
decode' = dec . feed'
decodeVerify' :: ByteString -> (Either DecodeError Chunk, Maybe ByteString)
decodeVerify' = decV . feed'
decodeM :: Monad m
=> m (Maybe ByteString)
-> m (Either DecodeError Chunk, Maybe ByteString)
decodeM pull = go (runGetIncremental (get :: Get Chunk))
where
go (Partial k) = go . k =<< pull
go (Fail r n m) = return (Left (n, m), leftover r)
go (Done r _ c) = return (Right c, leftover r)
decodeVerifyM :: Monad m
=> m (Maybe ByteString)
-> m (Either DecodeError Chunk, Maybe ByteString)
decodeVerifyM pull = go (runGetIncremental (get :: Get Chunk))
where
go (Partial k) = go . k =<< pull
go (Fail r n m) = return (Left (n, m), leftover r)
go (Done r n c) = case verify c of
Just c' -> return (Right c', leftover r)
Nothing -> go (Fail r n "verification failure")
shouldCompress :: ByteString -> Bool
shouldCompress x = B.length x >= minCompressible
{-# INLINEABLE shouldCompress #-}
feed :: BL.ByteString -> Decoder Chunk
feed = pushChunks $ runGetIncremental get
{-# INLINEABLE feed #-}
feed' :: ByteString -> Decoder Chunk
feed' = pushChunk $ runGetIncremental get
{-# INLINEABLE feed' #-}
dec :: Decoder Chunk -> (Either DecodeError Chunk, Maybe ByteString)
dec (Partial k) = dec (k Nothing)
dec (Fail r n m) = (Left (n, m), leftover r)
dec (Done r _ c) = (Right c, leftover r)
{-# INLINEABLE dec #-}
decV :: Decoder Chunk -> (Either DecodeError Chunk, Maybe ByteString)
decV (Partial k) = decV (k Nothing)
decV (Fail r n m) = (Left (n, m), leftover r)
decV (Done r n c) = case verify c of
Just c' -> (Right c', leftover r)
Nothing -> decV (Fail r n "verification failure")
{-# INLINEABLE decV #-}
leftover :: ByteString -> Maybe ByteString
leftover x
| B.null x = Nothing
| otherwise = Just x
{-# INLINEABLE leftover #-}