module Data.Conduit.Tar
(
tar
, tarEntries
, untar
, untarWithFinalizers
, restoreFile
, restoreFileInto
, withEntry
, withEntries
, headerFileType
, headerFilePath
, tarFilePath
, filePathConduit
, createTarball
, writeTarball
, extractTarball
, module Data.Conduit.Tar.Types
) where
import Conduit as C
import Control.Exception (assert)
import Control.Monad (unless, void)
import Data.ByteString (ByteString)
import qualified Data.ByteString as S
import Data.ByteString.Builder
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as SL
import Data.ByteString.Short (ShortByteString, fromShort, toShort)
import qualified Data.ByteString.Short as SS
import qualified Data.ByteString.Unsafe as BU
import Data.Foldable (foldr')
import Data.Monoid ((<>), mempty)
import Foreign.C.Types (CTime (..))
import System.Directory (createDirectoryIfMissing,
getCurrentDirectory)
import System.FilePath
import System.IO
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<*))
#endif
import Data.Conduit.Tar.Types
#ifdef WINDOWS
import Data.Conduit.Tar.Windows
#else
import Data.Conduit.Tar.Unix
#endif
headerFilePathBS :: Header -> S.ByteString
headerFilePathBS Header {..} =
if SS.length headerFileNamePrefix > 0
then S.concat
[fromShort headerFileNamePrefix, pathSeparatorS, fromShort headerFileNameSuffix]
else fromShort headerFileNameSuffix
headerFilePath :: Header -> FilePath
headerFilePath = S8.unpack . headerFilePathBS
headerFileType :: Header -> FileType
headerFileType h =
case headerLinkIndicator h of
0 -> FTNormal
48 -> FTNormal
49 -> FTHardLink
50 -> FTSymbolicLink (fromShort (headerLinkName h))
51 -> FTCharacterSpecial
52 -> FTBlockSpecial
53 -> FTDirectory
54 -> FTFifo
x -> FTOther x
parseHeader :: FileOffset -> ByteString -> Either TarException Header
parseHeader offset bs = assert (S.length bs == 512) $ do
let checksumBytes = S.take 8 $ S.drop 148 bs
expectedChecksum = parseOctal checksumBytes
actualChecksum = bsum bs bsum checksumBytes + 8 * space
unless (actualChecksum == expectedChecksum) (Left (BadChecksum offset))
return Header
{ headerOffset = offset
, headerPayloadOffset = offset + 512
, headerFileNameSuffix = getShort 0 100
, headerFileMode = getOctal 100 8
, headerOwnerId = getOctal 108 8
, headerGroupId = getOctal 116 8
, headerPayloadSize = getOctal 124 12
, headerTime = CTime $ getOctal 136 12
, headerLinkIndicator = BU.unsafeIndex bs 156
, headerLinkName = getShort 157 100
, headerMagicVersion = toShort $ S.take 8 $ S.drop 257 bs
, headerOwnerName = getShort 265 32
, headerGroupName = getShort 297 32
, headerDeviceMajor = getOctal 329 8
, headerDeviceMinor = getOctal 337 8
, headerFileNamePrefix = getShort 345 155
}
where
bsum :: ByteString -> Int
bsum = S.foldl' (\c n -> c + fromIntegral n) 0
getShort off len = toShort $ S.takeWhile (/= 0) $ S.take len $ S.drop off bs
getOctal off len = parseOctal $ S.take len $ S.drop off bs
parseOctal :: Integral i => ByteString -> i
parseOctal = S.foldl' (\t c -> t * 8 + fromIntegral (c zero)) 0
. S.takeWhile (\c -> zero <= c && c <= seven)
. S.dropWhile (== space)
space :: Integral i => i
space = 0x20
zero = 48
seven = 55
untarChunks :: Monad m => ConduitM ByteString TarChunk m ()
untarChunks =
loop 0
where
loop !offset = assert (offset `mod` 512 == 0) $ do
bs <- takeCE 512 .| foldC
case S.length bs of
0 -> return ()
512 | S.all (== 0) bs -> do
let offset' = offset + 512
bs' <- takeCE 512 .| foldC
case () of
()
| S.length bs' /= 512 -> do
leftover bs'
yield $ ChunkException $ ShortTrailer offset'
| S.all (== 0) bs' -> return ()
| otherwise -> do
leftover bs'
yield $ ChunkException $ BadTrailer offset'
512 ->
case parseHeader offset bs of
Left e -> do
leftover bs
yield $ ChunkException e
Right h -> do
yield $ ChunkHeader h
offset' <- payloads (offset + 512) $ headerPayloadSize h
let expectedOffset = offset + 512 + headerPayloadSize h +
(case 512 (headerPayloadSize h `mod` 512) of
512 -> 0
x -> x)
assert (offset' == expectedOffset) (loop offset')
_ -> do
leftover bs
yield $ ChunkException $ IncompleteHeader offset
payloads !offset 0 = do
let padding =
case offset `mod` 512 of
0 -> 0
x -> 512 fromIntegral x
takeCE padding .| sinkNull
return $! offset + fromIntegral padding
payloads !offset !size = do
mbs <- await
case mbs of
Nothing -> do
yield $ ChunkException $ IncompletePayload offset $ fromIntegral size
return offset
Just bs -> do
let (x, y) = S.splitAt (fromIntegral (min size (fromIntegral (maxBound :: Int)))) bs
yield $ ChunkPayload offset x
let size' = size fromIntegral (S.length x)
offset' = offset + fromIntegral (S.length x)
unless (S.null y) (leftover y)
payloads offset' size'
withEntry :: MonadThrow m
=> (Header -> ConduitM ByteString o m r)
-> ConduitM TarChunk o m r
withEntry inner = do
mc <- await
case mc of
Nothing -> throwM NoMoreHeaders
Just (ChunkHeader h) -> payloadsConduit .| (inner h <* sinkNull)
Just x@(ChunkPayload offset _bs) -> do
leftover x
throwM $ UnexpectedPayload offset
Just (ChunkException e) -> throwM e
payloadsConduit :: MonadThrow m
=> ConduitM TarChunk ByteString m ()
payloadsConduit = do
mx <- await
case mx of
Just (ChunkPayload _ bs) -> yield bs >> payloadsConduit
Just x@ChunkHeader {} -> leftover x
Just (ChunkException e) -> throwM e
Nothing -> return ()
withEntries :: MonadThrow m
=> (Header -> ConduitM ByteString o m ())
-> ConduitM TarChunk o m ()
withEntries = peekForever . withEntry
withFileInfo :: MonadThrow m
=> (FileInfo -> ConduitM ByteString o m ())
-> ConduitM TarChunk o m ()
withFileInfo inner = go
where
go = do
mc <- await
case mc of
Nothing -> return ()
Just (ChunkHeader h)
| headerLinkIndicator h >= 55 -> do
if (headerMagicVersion h == gnuTarMagicVersion)
then handleGnuTarHeader h .| go
else go
Just (ChunkHeader h) -> do
payloadsConduit .| (inner (fileInfoFromHeader h) <* sinkNull)
go
Just x@(ChunkPayload offset _bs) -> do
leftover x
throwM $ UnexpectedPayload offset
Just (ChunkException e) -> throwM e
handleGnuTarHeader :: MonadThrow m
=> Header
-> ConduitM TarChunk TarChunk m ()
handleGnuTarHeader h = do
case headerLinkIndicator h of
76 -> do
let pSize = headerPayloadSize h
unless (0 < pSize && pSize <= 4096) $
throwM $
FileTypeError (headerPayloadOffset h) 'L' $ "Filepath is too long: " ++ show pSize
longFileNameBuilder <- payloadsConduit .| sinkBuilder
let longFileName = SL.toStrict . SL.init . toLazyByteString $ longFileNameBuilder
mcNext <- await
case mcNext of
Just (ChunkHeader nh) -> do
unless (S.isPrefixOf (fromShort (headerFileNameSuffix nh)) longFileName) $
throwM $
FileTypeError (headerPayloadOffset nh) 'L' $
"Long filename doesn't match the original."
yield
(ChunkHeader $
nh
{ headerFileNameSuffix = toShort longFileName
, headerFileNamePrefix = SS.empty
})
Just c@(ChunkPayload offset _) -> do
leftover c
throwM $ InvalidHeader offset
Just (ChunkException exc) -> throwM exc
Nothing -> throwM NoMoreHeaders
83 -> do
payloadsConduit .| sinkNull
_ -> return ()
untar :: MonadThrow m
=> (FileInfo -> ConduitM ByteString o m ())
-> ConduitM ByteString o m ()
untar inner = untarChunks .| withFileInfo inner
untarWithFinalizers ::
(MonadThrow m, MonadIO m)
=> (FileInfo -> ConduitM ByteString (IO ()) m ())
-> ConduitM ByteString c m ()
untarWithFinalizers inner = do
finilizers <- untar inner .| foldlC (>>) (return ())
liftIO finilizers
gnuTarMagicVersion :: ShortByteString
gnuTarMagicVersion = toShort (S8.pack "ustar \NUL")
ustarMagicVersion :: ShortByteString
ustarMagicVersion = toShort (S8.pack "ustar\NUL00")
blockSize :: FileOffset
blockSize = 512
terminatorBlock :: ByteString
terminatorBlock = S.replicate (fromIntegral (2 * blockSize)) 0
defHeader :: FileOffset -> Header
defHeader offset = Header
{ headerOffset = offset
, headerPayloadOffset = offset + 512
, headerFileNameSuffix = SS.empty
, headerFileMode = 0o644
, headerOwnerId = 0
, headerGroupId = 0
, headerPayloadSize = 0
, headerTime = 0
, headerLinkIndicator = 0
, headerLinkName = SS.empty
, headerMagicVersion = ustarMagicVersion
, headerOwnerName = "root"
, headerGroupName = "root"
, headerDeviceMajor = 0
, headerDeviceMinor = 0
, headerFileNamePrefix = SS.empty
}
headerFromFileInfo :: MonadThrow m =>
FileOffset
-> FileInfo
-> m (Either TarCreateException Header)
headerFromFileInfo offset fi = do
unless (offset `mod` 512 == 0) $
throwM $ TarCreationError $ "Offset must always be a multiple of 512"
let (prefix, suffix) = splitPathAt 100 $ filePath fi
if (SS.length prefix > 155)
then return $ Left $ FileNameTooLong fi
else do
(payloadSize, linkName, linkIndicator) <-
case fileType fi of
FTNormal -> return (fileSize fi, SS.empty, 48)
FTSymbolicLink ln -> return (0, toShort ln, 50)
FTDirectory -> return (0, SS.empty, 53)
fty -> throwM $ TarCreationError $ "Unsupported file type: " ++ show fty
return $
Right
Header
{ headerOffset = offset
, headerPayloadOffset = offset + 512
, headerFileNameSuffix = suffix
, headerFileMode = fileMode fi
, headerOwnerId = fileUserId fi
, headerGroupId = fileGroupId fi
, headerPayloadSize = payloadSize
, headerTime = fileModTime fi
, headerLinkIndicator = linkIndicator
, headerLinkName = linkName
, headerMagicVersion = ustarMagicVersion
, headerOwnerName = toShort $ fileUserName fi
, headerGroupName = toShort $ fileGroupName fi
, headerDeviceMajor = 0
, headerDeviceMinor = 0
, headerFileNamePrefix = prefix
}
splitPathAt :: Int -> ByteString -> (ShortByteString, ShortByteString)
splitPathAt n fp
| S.length fp <= n = (SS.empty, toShort fp)
| otherwise =
let sfp = S8.splitWith isPathSeparator fp
sepWith p (tlen, prefix', suffix') =
case S.length p + 1 + tlen of
tlen'
| tlen' <= n -> (tlen', prefix', p : suffix')
tlen' -> (tlen', p : prefix', suffix')
(_, prefix, suffix) = foldr' sepWith (0, [], []) sfp
toShortPath = toShort . S8.intercalate pathSeparatorS
in (toShortPath prefix, toShortPath suffix)
packHeader :: MonadThrow m => Header -> m S.ByteString
packHeader header = do
(left, right) <- packHeaderNoChecksum header
let sumsl :: SL.ByteString -> Int
sumsl = SL.foldl' (\ !acc !v -> acc + fromIntegral v) 0
encChecksum <- encodeOctal 7 $ sumsl left + 32 * 8 + sumsl right
return $
SL.toStrict $
toLazyByteString $ lazyByteString left <> encChecksum <> word8 0 <> lazyByteString right
packHeaderNoChecksum :: MonadThrow m => Header -> m (SL.ByteString, SL.ByteString)
packHeaderNoChecksum Header {..} = do
let CTime headerTime' = headerTime
hNameSuffix <- encodeShort 100 headerFileNameSuffix
hFileMode <- encodeOctal 7 headerFileMode
hOwnerId <- encodeOctal 7 headerOwnerId
hGroupId <- encodeOctal 7 headerGroupId
hPayloadSize <- encodeOctal 11 headerPayloadSize
hTime <- encodeOctal 11 headerTime'
hLinkName <- encodeShort 100 headerLinkName
hMagicVersion <- encodeShort 8 headerMagicVersion
hOwnerName <- encodeShort 32 headerOwnerName
hGroupName <- encodeShort 32 headerGroupName
hDevMajor <- encodeDevice headerDeviceMajor
hDevMinor <- encodeDevice headerDeviceMinor
hNamePrefix <- encodeShort 155 headerFileNamePrefix
return
( toLazyByteString $
hNameSuffix <>
hFileMode <> word8 0 <>
hOwnerId <> word8 0 <>
hGroupId <> word8 0 <>
hPayloadSize <> word8 0 <>
hTime <> word8 0
, toLazyByteString $
word8 headerLinkIndicator <>
hLinkName <>
hMagicVersion <>
hOwnerName <>
hGroupName <>
hDevMajor <> word8 0 <>
hDevMinor <> word8 0 <>
hNamePrefix <>
byteString (S.replicate 12 0)
)
where
encodeDevice 0 = return $ byteString $ S.replicate 7 0
encodeDevice devid = encodeOctal 7 devid
encodeShort :: MonadThrow m => Int -> ShortByteString -> m Builder
encodeShort !len !sbs
| lenShort <= len = return $ shortByteString sbs <> byteString (S.replicate (len lenShort) 0)
| otherwise =
throwM $
TarCreationError $ "Can't fit '" ++ S8.unpack (fromShort sbs) ++ "' into the tar header"
where
lenShort = SS.length sbs
encodeOctal :: (Show a, Integral a, MonadThrow m) => Int -> a -> m Builder
encodeOctal !len !val = go 0 val mempty
where
go !n !cur !acc
| cur == 0 =
if n < len
then return $ byteString (S.replicate (len n) 48) <> acc
else return acc
| n < len =
let !(q, r) = cur `quotRem` 8
in go (n + 1) q (word8 (fromIntegral r + 48) <> acc)
| otherwise =
throwM $
TarCreationError $
"<encodeOctal>: Tar value overflow (for maxLen " ++ show len ++ "): " ++ show val
yieldNulPadding :: Monad m => FileOffset -> ConduitM i ByteString m FileOffset
yieldNulPadding n = do
let pad = blockSize (n `mod` blockSize)
if pad /= blockSize
then yield (S.replicate (fromIntegral pad) 0) >> return (n + pad)
else return n
tarPayload :: MonadThrow m =>
FileOffset
-> Header
-> (FileOffset -> ConduitM (Either a ByteString) ByteString m FileOffset)
-> ConduitM (Either a ByteString) ByteString m FileOffset
tarPayload size header cont
| size == headerPayloadSize header = cont (headerOffset header + blockSize)
| otherwise = go size
where
go prevSize = do
eContent <- await
case eContent of
Just h@(Left _) -> do
leftover h
throwM $ TarCreationError "Not enough payload."
Just (Right content) -> do
let nextSize = prevSize + fromIntegral (S.length content)
unless (nextSize <= headerPayloadSize header) $
throwM $ TarCreationError "Too much payload"
yield content
if nextSize == headerPayloadSize header
then do
paddedSize <- yieldNulPadding nextSize
cont (headerPayloadOffset header + paddedSize)
else go nextSize
Nothing -> throwM $ TarCreationError "Stream finished abruptly. Not enough payload."
tarHeader :: MonadThrow m =>
FileOffset -> ConduitM (Either Header ByteString) ByteString m FileOffset
tarHeader offset = do
eContent <- await
case eContent of
Just c@(Right _) -> do
leftover c
throwM $ TarCreationError "Received payload without a corresponding Header."
Just (Left header) -> do
packHeader header >>= yield
tarPayload 0 header tarHeader
Nothing -> do
yield terminatorBlock
return $ offset + fromIntegral (S.length terminatorBlock)
tarFileInfo :: MonadThrow m =>
FileOffset -> ConduitM (Either FileInfo ByteString) ByteString m FileOffset
tarFileInfo offset = do
eContent <- await
case eContent of
Just (Right _) ->
throwM $ TarCreationError "Received payload without a corresponding FileInfo."
Just (Left fi) -> do
eHeader <- headerFromFileInfo offset fi
case eHeader of
Left (FileNameTooLong _) -> do
let fPath = filePath fi
fPathLen = fromIntegral (S.length fPath + 1)
pad =
case fPathLen `mod` blockSize of
0 -> 0
x -> blockSize x
eHeader' <-
headerFromFileInfo
(offset + blockSize + fPathLen + pad)
(fi {filePath = S.take 100 fPath})
header <- either throwM return eHeader'
pHeader <- packHeader header
pFileNameHeader <-
packHeader $
(defHeader offset)
{ headerFileNameSuffix = "././@LongLink"
, headerPayloadSize = fPathLen
, headerLinkIndicator = 76
, headerMagicVersion = gnuTarMagicVersion
}
yield pFileNameHeader
yield fPath
yield $ S.replicate (fromIntegral pad + 1) 0
yield pHeader
tarPayload 0 header tarFileInfo
Left exc -> throwM exc
Right header -> do
packHeader header >>= yield
tarPayload 0 header tarFileInfo
Nothing -> return offset
tar :: MonadResource m =>
ConduitM (Either FileInfo ByteString) ByteString m FileOffset
tar = do
offset <- tarFileInfo 0
yield terminatorBlock
return $ offset + fromIntegral (S.length terminatorBlock)
tarEntries :: MonadResource m =>
ConduitM (Either Header ByteString) ByteString m FileOffset
tarEntries = do
offset <- tarHeader 0
yield terminatorBlock
return $ offset + fromIntegral (S.length terminatorBlock)
filePathConduit :: MonadResource m =>
ConduitM FilePath (Either FileInfo ByteString) m ()
filePathConduit = do
mfp <- await
case mfp of
Just fp -> do
fi <- liftIO $ getFileInfo $ S8.pack fp
case fileType fi of
FTNormal -> do
yield (Left fi)
sourceFile (S8.unpack (filePath fi)) .| mapC Right
FTSymbolicLink _ -> yield (Left fi)
FTDirectory -> do
yield (Left fi)
sourceDirectory (S8.unpack (filePath fi)) .| filePathConduit
fty -> do
leftover fp
throwM $ TarCreationError $ "Unsupported file type: " ++ show fty
filePathConduit
Nothing -> return ()
tarFilePath :: MonadResource m => ConduitM FilePath ByteString m FileOffset
tarFilePath = filePathConduit .| tar
createTarball :: FilePath
-> [FilePath]
-> IO ()
createTarball tarfp dirs = do
runConduitRes $ yieldMany dirs .| void tarFilePath .| sinkFile tarfp
writeTarball :: Handle
-> [FilePath]
-> IO ()
writeTarball tarHandle dirs = do
runConduitRes $ yieldMany dirs .| void tarFilePath .| sinkHandle tarHandle
pathSeparatorS :: ByteString
pathSeparatorS = S8.singleton pathSeparator
fileInfoFromHeader :: Header -> FileInfo
fileInfoFromHeader header@(Header {..}) =
FileInfo
{ filePath = headerFilePathBS header
, fileUserId = headerOwnerId
, fileUserName = fromShort headerOwnerName
, fileGroupId = headerGroupId
, fileGroupName = fromShort headerGroupName
, fileMode = headerFileMode
, fileSize = headerPayloadSize
, fileType = headerFileType header
, fileModTime = headerTime
}
extractTarball :: FilePath
-> Maybe FilePath
-> IO ()
extractTarball tarfp mcd = do
cd <- maybe getCurrentDirectory return mcd
createDirectoryIfMissing True cd
runConduitRes $ sourceFileBS tarfp .| untarWithFinalizers (restoreFileInto cd)
restoreFileInto :: MonadResource m =>
FilePath -> FileInfo -> ConduitM ByteString (IO ()) m ()
restoreFileInto cd fi =
restoreFile fi {filePath = S8.pack (cd </> makeRelative "/" (S8.unpack (filePath fi)))}