module Codec.Compression.GZip.GUnZip (gunzip) where import Codec.Compression.Deflate.Inflate import Codec.Compression.LazyStateT import Codec.Compression.UnsafeInterleave import Codec.Compression.Utils import Data.Char import Data.IORef import System.IO import System.IO.Unsafe import Control.Monad.State import Control.Monad.Trans import Data.Bits import Data.Word import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as BS type GUnZipM a = LazyStateT ByteString IO a readBytes :: Integral a => a -> GUnZipM ByteString readBytes n = do xs <- get case genericSplitAtExactlyBS n xs of Just (ys, zs) -> do put zs return ys Nothing -> failWith $ InsufficientBytes "readBytes" readNulTerminatedLatin1 :: GUnZipM ByteString readNulTerminatedLatin1 = do xs <- get case BS.break (0 ==) xs of (ys, zs) | not (BS.null zs) -> do put (BS.tail zs) return ys _ -> failWith $ InsufficientBytes "readNulTerminatedLatin1" readWord8 :: GUnZipM Word8 readWord8 = do xs <- get case headTail xs of Just (x, xs') -> do put xs' return x Nothing -> failWith $ InsufficientBytes "readWord8" readWord16 :: GUnZipM Word16 readWord16 = do xs0 <- get case do (x1, xs1) <- headTail xs0 (x2, xs2) <- headTail xs1 return (x1, x2, xs2) of Just (x1, x2, xs') -> do put xs' return ( (fromIntegral x1) + shiftL (fromIntegral x2) 8) Nothing -> failWith $ InsufficientBytes "readWord16" readWord32 :: GUnZipM Word32 readWord32 = do xs0 <- get case do (x1, xs1) <- headTail xs0 (x2, xs2) <- headTail xs1 (x3, xs3) <- headTail xs2 (x4, xs4) <- headTail xs3 return (x1, x2, x3, x4, xs4) of Just (x1, x2, x3, x4, xs') -> do put xs' return ( (fromIntegral x1) + shiftL (fromIntegral x2) 8 + shiftL (fromIntegral x3) 16 + shiftL (fromIntegral x4) 24) Nothing -> failWith $ InsufficientBytes "readWord32" ----- data Error = InsufficientBytes String | BadID1 Word8 | BadID2 Word8 | UnknownCompressionMethod Word8 | ReservedFlagBitSet Int deriving Show failWith :: Error -> GUnZipM a failWith e = error ("Failure: Codec.Compression.GZip.GUnZip: " ++ show e) readHeader :: GUnZipM () -- XXX Should return Header? readHeader = do id1 <- readWord8 unless (id1 == 31) $ failWith (BadID1 id1) id2 <- readWord8 unless (id2 == 139) $ failWith (BadID2 id2) cm <- readWord8 unless (cm == 8) $ failWith (UnknownCompressionMethod cm) flg <- readWord8 -- We currently ignore flg bit 0 (FTEXT) when (testBit flg 5) $ failWith (ReservedFlagBitSet 5) when (testBit flg 6) $ failWith (ReservedFlagBitSet 6) when (testBit flg 7) $ failWith (ReservedFlagBitSet 7) _mtime <- readWord32 -- We ignore this for now _xfl <- readWord8 -- We ignore this for now _os <- readWord8 -- We ignore this for now -- We ignore this for now _m_fextra <- if testBit flg 2 then do xlen <- readWord16 extra <- readBytes xlen return (Just extra) else return Nothing -- We ignore this for now _m_fname <- if testBit flg 3 then liftM Just readNulTerminatedLatin1 else return Nothing -- We ignore this for now _m_fcomment <- if testBit flg 4 then liftM Just readNulTerminatedLatin1 else return Nothing _m_hcrc <- if testBit flg 1 then liftM Just readWord16 else return Nothing return () readFooter :: GUnZipM () -- XXX Should return Footer? readFooter = do _crc32 <- readWord32 -- We ignore this for now _isize <- readWord32 -- We ignore this for now return () readChunks :: GUnZipM ByteString readChunks = do bs <- get if BS.null bs then return BS.empty else do readHeader xs <- get put BS.empty -- We don't want the monad to hold on to the -- whole input var <- liftIO $ newIORef (error "IORef used before initialised") ws <- liftIO $ inflate var xs ws' <- unsafeInterleave $ do rest <- liftIO $ readIORef var put rest readFooter readChunks return (ws `myAppend` ws') gunzip :: ByteString -> ByteString gunzip xs = unsafePerformIO $ evalLazyStateT readChunks xs {- ... (the compressed data) CRC32 (4 bytes) CRC32 of the uncompressed data ISIZE (4 bytes) uncompressed data size mod 2^32 -}