-- |Stream the creation of a zip file, e.g., as it's being uploaded. {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ViewPatterns #-} module Codec.Archive.Zip.Conduit.Zip ( zipStream , ZipOptions(..) , ZipInfo(..) , defaultZipOptions , ZipEntry(..) , ZipData(..) , zipFileData ) where import qualified Codec.Compression.Zlib.Raw as Z import Control.Arrow ((&&&), (+++), left) import Control.Monad (when) import Control.Monad.Base (MonadBase) import Control.Monad.Catch (MonadThrow) import Control.Monad.Primitive (PrimMonad) import Control.Monad.State.Strict (StateT, get) import Control.Monad.Trans.Resource (MonadResource) import qualified Data.Binary.Put as P import Data.Bits (bit, shiftL, shiftR, (.|.)) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Lazy as BSL import qualified Data.Conduit as C import qualified Data.Conduit.Binary as CB import Data.Conduit.Lift (stateC, execStateC) import Data.Conduit.Serialization.Binary (sourcePut) import qualified Data.Conduit.Zlib as CZ import Data.Digest.CRC32 (crc32) import Data.Either (isLeft) import Data.Maybe (fromMaybe, fromJust) import Data.Monoid ((<>)) import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian) import Data.Word (Word16, Word64) import Codec.Archive.Zip.Conduit.Types import Codec.Archive.Zip.Conduit.Internal -- |Options controlling zip file parameters and features data ZipOptions = ZipOptions { zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases); this is automatically enabled for any files of known size (e.g., 'zipEntrySize') , zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package) , zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file } defaultZipOptions :: ZipOptions defaultZipOptions = ZipOptions { zipOpt64 = False , zipOptCompressLevel = -1 , zipOptInfo = ZipInfo { zipComment = BS.empty } } infixr 7 ?* (?*) :: Num a => Bool -> a -> a True ?* x = x False ?* _ = 0 -- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'CB.sourceFile'@). zipFileData :: MonadResource m => FilePath -> ZipData m zipFileData = ZipDataSource . CB.sourceFile zipData :: Monad m => ZipData m -> Either (C.Source m BS.ByteString) BSL.ByteString zipData (ZipDataByteString b) = Right b zipData (ZipDataSource s) = Left s dataSize :: Either a BSL.ByteString -> Maybe Word64 dataSize (Left _) = Nothing dataSize (Right b) = Just $ fromIntegral $ BSL.length b toDOSTime :: LocalTime -> (Word16, Word16) toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins secs)) = ( fromIntegral hour `shiftL` 11 .|. fromIntegral mins `shiftL` 5 .|. truncate secs `shiftR` 1 , fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day ) countOutput :: Monad m => C.Conduit i m BS.ByteString -> C.Conduit i (StateT Word64 m) BS.ByteString countOutput c = stateC $ \s -> (,) () . (s +) <$> outputSize c output :: MonadThrow m => P.Put -> C.ConduitM i BS.ByteString (StateT Word64 m) () output = countOutput . sourcePut maxBound16 :: Integral n => n maxBound16 = fromIntegral (maxBound :: Word16) -- |Stream produce a zip file, reading a sequence of entries with data. -- Although file data is never kept in memory (beyond a single 'ZipDataByteString'), the format of zip files requires producing a final directory of entries at the end of the file, consuming an additional ~100 bytes of state per entry during streaming. -- The final result is the total size of the zip file. -- -- Depending on options, the resulting zip file should be compatible with most unzipping applications. -- Any errors are thrown in the underlying monad (as 'ZipError's). zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64 zipStream ZipOptions{..} = execStateC 0 $ do (cnt, cdir) <- next 0 (mempty :: P.Put) cdoff <- get output cdir eoff <- get endDirectory cdoff (eoff - cdoff) cnt where next cnt dir = C.await >>= maybe (return (cnt, dir)) (\e -> do d <- entry e next (succ cnt) $ dir <> d) entry (ZipEntry{..}, zipData -> dat) = do let usiz = dataSize dat sdat = left ((C..| sizeCRC) . C.toProducer) dat comp = zipOptCompressLevel /= 0 && all (0 /=) usiz (cdat, csiz) | comp = ( ((`C.fuseBoth` (outputSize $ CZ.compress zipOptCompressLevel deflateWindowBits)) +++ Z.compress) sdat -- level for Z.compress? , dataSize cdat) | otherwise = (left (fmap (id &&& fst)) sdat, usiz) z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize) (maxBound32 <) (max <$> usiz <*> csiz) namelen = BS.length zipEntryName (time, date) = toDOSTime zipEntryTime mcrc = either (const Nothing) (Just . crc32) dat when (namelen > maxBound16) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long" let common = do P.putWord16le $ isLeft dat ?* bit 3 P.putWord16le $ comp ?* 8 P.putWord16le $ time P.putWord16le $ date off <- get output $ do P.putWord32le 0x04034b50 P.putWord16le $ if z64 then 45 else 20 common P.putWord32le $ fromMaybe 0 mcrc P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral csiz P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz P.putWord16le $ fromIntegral namelen P.putWord16le $ z64 ?* 20 P.putByteString zipEntryName when z64 $ do P.putWord16le 0x0001 P.putWord16le 16 P.putWord64le $ fromMaybe 0 usiz P.putWord64le $ fromMaybe 0 csiz let outsz c = stateC $ \o -> (id &&& (o +) . snd) <$> c ((usz, crc), csz) <- either (\cd -> do r@((usz, crc), csz) <- outsz cd -- write compressed data when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled" output $ do P.putWord32le 0x08074b50 P.putWord32le crc let putsz | z64 = P.putWord64le | otherwise = P.putWord32le . fromIntegral putsz csz putsz usz return r) (\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b) cdat when (any (usz /=) zipEntrySize) $ zipError $ BSC.unpack zipEntryName ++ ": incorrect zipEntrySize" return $ do -- central directory let o64 = off >= maxBound32 l64 = z64 ?* 16 + o64 ?* 8 a64 = z64 || o64 P.putWord32le 0x02014b50 P.putWord16le zipVersion P.putWord16le $ if a64 then 45 else 20 common P.putWord32le crc P.putWord32le $ if z64 then maxBound32 else fromIntegral csz P.putWord32le $ if z64 then maxBound32 else fromIntegral usz P.putWord16le $ fromIntegral namelen P.putWord16le $ a64 ?* (4 + l64) P.putWord16le 0 -- comment length P.putWord16le 0 -- disk number P.putWord16le 0 -- internal file attributes P.putWord32le 0 -- external file attributes P.putWord32le $ if o64 then maxBound32 else fromIntegral off P.putByteString zipEntryName when a64 $ do P.putWord16le 0x0001 P.putWord16le l64 when z64 $ do P.putWord64le usz P.putWord64le csz when o64 $ P.putWord64le off endDirectory cdoff cdlen cnt = do let z64 = zipOpt64 || cdoff > maxBound32 || cnt > maxBound16 when z64 $ output $ do P.putWord32le 0x06064b50 -- zip64 end P.putWord64le 44 -- length of this record P.putWord16le zipVersion P.putWord16le 45 P.putWord32le 0 -- disk P.putWord32le 0 -- central disk P.putWord64le cnt P.putWord64le cnt P.putWord64le cdlen P.putWord64le cdoff P.putWord32le 0x07064b50 -- locator: P.putWord32le 0 -- central disk P.putWord64le $ cdoff + cdlen P.putWord32le 1 -- total disks let comment = zipComment zipOptInfo commlen = BS.length comment when (commlen > maxBound16) $ zipError "comment too long" output $ do P.putWord32le 0x06054b50 -- end P.putWord16le 0 -- disk P.putWord16le 0 -- central disk P.putWord16le $ fromIntegral $ min maxBound16 cnt P.putWord16le $ fromIntegral $ min maxBound16 cnt P.putWord32le $ fromIntegral $ min maxBound32 cdlen P.putWord32le $ fromIntegral $ min maxBound32 cdoff P.putWord16le $ fromIntegral commlen P.putByteString comment