{-# OPTIONS_GHC -fno-warn-unused-imports #-} ----------------------------------------------------------------------------- -- | -- Module : Distribution.Client.Tar -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2009 Duncan Coutts -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- -- Reading, writing and manipulating \"@.tar@\" archive files. -- ----------------------------------------------------------------------------- module Distribution.Client.Tar ( -- * High level \"all in one\" operations createTarGzFile, extractTarGzFile, -- * Converting between internal and external representation read, write, -- * Packing and unpacking files to\/from internal representation pack, unpack, -- * Tar entry and associated types Entry(..), entryPath, EntryContent(..), Ownership(..), FileSize, Permissions, EpochTime, DevMajor, DevMinor, TypeCode, Format(..), -- * Constructing simple entry values simpleEntry, fileEntry, directoryEntry, -- * TarPath type TarPath, toTarPath, fromTarPath, -- ** Sequences of tar entries Entries(..), foldrEntries, foldlEntries, unfoldrEntries, mapEntries, filterEntries, entriesIndex, ) where import Data.Char (ord) import Data.Int (Int64) import Data.Bits (Bits, shiftL, testBit) import Data.List (foldl') import Numeric (readOct, showOct) import Control.Monad (MonadPlus(mplus), when) import qualified Data.Map as Map import qualified Data.ByteString.Lazy as BS import qualified Data.ByteString.Lazy.Char8 as BS.Char8 import Data.ByteString.Lazy (ByteString) import qualified Codec.Compression.GZip as GZip import qualified Distribution.Client.GZipUtils as GZipUtils import System.FilePath ( () ) import qualified System.FilePath as FilePath.Native import qualified System.FilePath.Windows as FilePath.Windows import qualified System.FilePath.Posix as FilePath.Posix import System.Directory ( getDirectoryContents, doesDirectoryExist, getModificationTime , getPermissions, createDirectoryIfMissing, copyFile ) import qualified System.Directory as Permissions ( Permissions(executable) ) import Distribution.Compat.FilePerms ( setFileExecutable ) import System.Posix.Types ( FileMode ) import System.Time ( ClockTime(..) ) import System.IO ( IOMode(ReadMode), openBinaryFile, hFileSize ) import System.IO.Unsafe (unsafeInterleaveIO) import Prelude hiding (read) -- -- * High level operations -- createTarGzFile :: FilePath -- ^ Full Tarball path -> FilePath -- ^ Base directory -> FilePath -- ^ Directory to archive, relative to base dir -> IO () createTarGzFile tar base dir = BS.writeFile tar . GZip.compress . write =<< pack base [dir] extractTarGzFile :: FilePath -- ^ Destination directory -> FilePath -- ^ Expected subdir (to check for tarbombs) -> FilePath -- ^ Tarball -> IO () extractTarGzFile dir expected tar = do unpack dir . checkTarbomb expected . read . GZipUtils.maybeDecompress =<< BS.readFile tar -- -- * Entry type -- type FileSize = Int64 -- | The number of seconds since the UNIX epoch type EpochTime = Int64 type DevMajor = Int type DevMinor = Int type TypeCode = Char type Permissions = FileMode -- | Tar archive entry. -- data Entry = Entry { -- | The path of the file or directory within the archive. This is in a -- tar-specific form. Use 'entryPath' to get a native 'FilePath'. entryTarPath :: !TarPath, -- | The real content of the entry. For 'NormalFile' this includes the -- file data. An entry usually contains a 'NormalFile' or a 'Directory'. entryContent :: !EntryContent, -- | File permissions (Unix style file mode). entryPermissions :: !Permissions, -- | The user and group to which this file belongs. entryOwnership :: !Ownership, -- | The time the file was last modified. entryTime :: !EpochTime, -- | The tar format the archive is using. entryFormat :: !Format } -- | Native 'FilePath' of the file or directory within the archive. -- entryPath :: Entry -> FilePath entryPath = fromTarPath . entryTarPath -- | The content of a tar archive entry, which depends on the type of entry. -- -- Portable archives should contain only 'NormalFile' and 'Directory'. -- data EntryContent = NormalFile ByteString !FileSize | Directory | SymbolicLink !LinkTarget | HardLink !LinkTarget | CharacterDevice !DevMajor !DevMinor | BlockDevice !DevMajor !DevMinor | NamedPipe | OtherEntryType !TypeCode ByteString !FileSize data Ownership = Ownership { -- | The owner user name. Should be set to @\"\"@ if unknown. ownerName :: String, -- | The owner group name. Should be set to @\"\"@ if unknown. groupName :: String, -- | Numeric owner user id. Should be set to @0@ if unknown. ownerId :: !Int, -- | Numeric owner group id. Should be set to @0@ if unknown. groupId :: !Int } -- | There have been a number of extensions to the tar file format over the -- years. They all share the basic entry fields and put more meta-data in -- different extended headers. -- data Format = -- | This is the classic Unix V7 tar format. It does not support owner and -- group names, just numeric Ids. It also does not support device numbers. V7Format -- | The \"USTAR\" format is an extension of the classic V7 format. It was -- later standardised by POSIX. It has some restructions but is the most -- portable format. -- | UstarFormat -- | The GNU tar implementation also extends the classic V7 format, though -- in a slightly different way from the USTAR format. In general for new -- archives the standard USTAR/POSIX should be used. -- | GnuFormat deriving Eq -- | @rw-r--r--@ for normal files ordinaryFilePermissions :: Permissions ordinaryFilePermissions = 0o0644 -- | @rwxr-xr-x@ for executable files executableFilePermissions :: Permissions executableFilePermissions = 0o0755 -- | @rwxr-xr-x@ for directories directoryPermissions :: Permissions directoryPermissions = 0o0755 isExecutable :: Permissions -> Bool isExecutable p = testBit p 0 || testBit p 6 -- user or other exectuable -- | An 'Entry' with all default values except for the file name and type. It -- uses the portable USTAR/POSIX format (see 'UstarHeader'). -- -- You can use this as a basis and override specific fields, eg: -- -- > (emptyEntry name HardLink) { linkTarget = target } -- simpleEntry :: TarPath -> EntryContent -> Entry simpleEntry tarpath content = Entry { entryTarPath = tarpath, entryContent = content, entryPermissions = case content of Directory -> directoryPermissions _ -> ordinaryFilePermissions, entryOwnership = Ownership "" "" 0 0, entryTime = 0, entryFormat = UstarFormat } -- | A tar 'Entry' for a file. -- -- Entry fields such as file permissions and ownership have default values. -- -- You can use this as a basis and override specific fields. For example if you -- need an executable file you could use: -- -- > (fileEntry name content) { fileMode = executableFileMode } -- fileEntry :: TarPath -> ByteString -> Entry fileEntry name fileContent = simpleEntry name (NormalFile fileContent (BS.length fileContent)) -- | A tar 'Entry' for a directory. -- -- Entry fields such as file permissions and ownership have default values. -- directoryEntry :: TarPath -> Entry directoryEntry name = simpleEntry name Directory -- -- * Tar paths -- -- | The classic tar format allowed just 100 charcters for the file name. The -- USTAR format extended this with an extra 155 characters, however it uses a -- complex method of splitting the name between the two sections. -- -- Instead of just putting any overflow into the extended area, it uses the -- extended area as a prefix. The agrevating insane bit however is that the -- prefix (if any) must only contain a directory prefix. That is the split -- between the two areas must be on a directory separator boundary. So there is -- no simple calculation to work out if a file name is too long. Instead we -- have to try to find a valid split that makes the name fit in the two areas. -- -- The rationale presumably was to make it a bit more compatible with old tar -- programs that only understand the classic format. A classic tar would be -- able to extract the file name and possibly some dir prefix, but not the -- full dir prefix. So the files would end up in the wrong place, but that's -- probably better than ending up with the wrong names too. -- -- So it's understandable but rather annoying. -- -- * Tar paths use posix format (ie @\'/\'@ directory separators), irrespective -- of the local path conventions. -- -- * The directory separator between the prefix and name is /not/ stored. -- data TarPath = TarPath FilePath -- path name, 100 characters max. FilePath -- path prefix, 155 characters max. deriving (Eq, Ord) -- | Convert a 'TarPath' to a native 'FilePath'. -- -- The native 'FilePath' will use the native directory separator but it is not -- otherwise checked for validity or sanity. In particular: -- -- * The tar path may be invalid as a native path, eg the filename @\"nul\"@ is -- not valid on Windows. -- -- * The tar path may be an absolute path or may contain @\"..\"@ components. -- For security reasons this should not usually be allowed, but it is your -- responsibility to check for these conditions (eg using 'checkSecurity'). -- fromTarPath :: TarPath -> FilePath fromTarPath (TarPath name prefix) = adjustDirectory $ FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix ++ FilePath.Posix.splitDirectories name where adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name = FilePath.Native.addTrailingPathSeparator | otherwise = id -- | Convert a native 'FilePath' to a 'TarPath'. -- -- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a -- description of the problem with splitting long 'FilePath's. -- toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for -- directories a 'TarPath' must always use a trailing @\/@. -> FilePath -> Either String TarPath toTarPath isDir = splitLongPath . addTrailingSep . FilePath.Posix.joinPath . FilePath.Native.splitDirectories where addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator | otherwise = id -- | Take a sanitized path, split on directory separators and try to pack it -- into the 155 + 100 tar file name format. -- -- The stragey is this: take the name-directory components in reverse order -- and try to fit as many components into the 100 long name area as possible. -- If all the remaining components fit in the 155 name area then we win. -- splitLongPath :: FilePath -> Either String TarPath splitLongPath path = case packName nameMax (reverse (FilePath.Posix.splitPath path)) of Left err -> Left err Right (name, []) -> Right (TarPath name "") Right (name, first:rest) -> case packName prefixMax remainder of Left err -> Left err Right (_ , (_:_)) -> Left "File name too long (cannot split)" Right (prefix, []) -> Right (TarPath name prefix) where -- drop the '/' between the name and prefix: remainder = init first : rest where nameMax, prefixMax :: Int nameMax = 100 prefixMax = 155 packName _ [] = Left "File name empty" packName maxLen (c:cs) | n > maxLen = Left "File name too long" | otherwise = Right (packName' maxLen n [c] cs) where n = length c packName' maxLen n ok (c:cs) | n' <= maxLen = packName' maxLen n' (c:ok) cs where n' = n + length c packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs) -- | The tar format allows just 100 ASCII charcters for the 'SymbolicLink' and -- 'HardLink' entry types. -- newtype LinkTarget = LinkTarget FilePath deriving (Eq, Ord) -- | Convert a tar 'LinkTarget' to a native 'FilePath'. -- fromLinkTarget :: LinkTarget -> FilePath fromLinkTarget (LinkTarget path) = adjustDirectory $ FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path where adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path = FilePath.Native.addTrailingPathSeparator | otherwise = id -- -- * Entries type -- -- | A tar archive is a sequence of entries. data Entries = Next Entry Entries | Done | Fail String unfoldrEntries :: (a -> Either String (Maybe (Entry, a))) -> a -> Entries unfoldrEntries f = unfold where unfold x = case f x of Left err -> Fail err Right Nothing -> Done Right (Just (e, x')) -> Next e (unfold x') foldrEntries :: (Entry -> a -> a) -> a -> (String -> a) -> Entries -> a foldrEntries next done fail' = fold where fold (Next e es) = next e (fold es) fold Done = done fold (Fail err) = fail' err foldlEntries :: (a -> Entry -> a) -> a -> Entries -> Either String a foldlEntries f = fold where fold a (Next e es) = (fold $! f a e) es fold a Done = Right a fold _ (Fail err) = Left err mapEntries :: (Entry -> Entry) -> Entries -> Entries mapEntries f = foldrEntries (Next . f) Done Fail filterEntries :: (Entry -> Bool) -> Entries -> Entries filterEntries p = foldrEntries (\entry rest -> if p entry then Next entry rest else rest) Done Fail checkEntries :: (Entry -> Maybe String) -> Entries -> Entries checkEntries checkEntry = foldrEntries (\entry rest -> case checkEntry entry of Nothing -> Next entry rest Just err -> Fail err) Done Fail entriesIndex :: Entries -> Either String (Map.Map TarPath Entry) entriesIndex = foldlEntries (\m e -> Map.insert (entryTarPath e) e m) Map.empty -- -- * Checking -- -- | This function checks a sequence of tar entries for file name security -- problems. It checks that: -- -- * file paths are not absolute -- -- * file paths do not contain any path components that are \"@..@\" -- -- * file names are valid -- -- These checks are from the perspective of the current OS. That means we check -- for \"@C:\blah@\" files on Windows and \"\/blah\" files on unix. For archive -- entry types 'HardLink' and 'SymbolicLink' the same checks are done for the -- link target. A failure in any entry terminates the sequence of entries with -- an error. -- checkSecurity :: Entries -> Entries checkSecurity = checkEntries checkEntrySecurity checkTarbomb :: FilePath -> Entries -> Entries checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir) checkEntrySecurity :: Entry -> Maybe String checkEntrySecurity entry = case entryContent entry of HardLink link -> check (entryPath entry) `mplus` check (fromLinkTarget link) SymbolicLink link -> check (entryPath entry) `mplus` check (fromLinkTarget link) _ -> check (entryPath entry) where check name | not (FilePath.Native.isRelative name) = Just $ "Absolute file name in tar archive: " ++ show name | not (FilePath.Native.isValid name) = Just $ "Invalid file name in tar archive: " ++ show name | any (=="..") (FilePath.Native.splitDirectories name) = Just $ "Invalid file name in tar archive: " ++ show name | otherwise = Nothing checkEntryTarbomb :: FilePath -> Entry -> Maybe String checkEntryTarbomb _ entry | nonFilesystemEntry = Nothing where -- Ignore some special entries we will not unpack anyway nonFilesystemEntry = case entryContent entry of OtherEntryType 'g' _ _ -> True --PAX global header OtherEntryType 'x' _ _ -> True --PAX individual header _ -> False checkEntryTarbomb expectedTopDir entry = case FilePath.Native.splitDirectories (entryPath entry) of (topDir:_) | topDir == expectedTopDir -> Nothing _ -> Just $ "File in tar archive is not in the expected directory " ++ show expectedTopDir -- -- * Reading -- read :: ByteString -> Entries read = unfoldrEntries getEntry getEntry :: ByteString -> Either String (Maybe (Entry, ByteString)) getEntry bs | BS.length header < 512 = Left "truncated tar archive" -- Tar files end with at least two blocks of all '0'. Checking this serves -- two purposes. It checks the format but also forces the tail of the data -- which is necessary to close the file if it came from a lazily read file. | BS.head bs == 0 = case BS.splitAt 1024 bs of (end, trailing) | BS.length end /= 1024 -> Left "short tar trailer" | not (BS.all (== 0) end) -> Left "bad tar trailer" | not (BS.all (== 0) trailing) -> Left "tar file has trailing junk" | otherwise -> Right Nothing | otherwise = partial $ do case (chksum_, format_) of (Ok chksum, _ ) | correctChecksum header chksum -> return () (Ok _, Ok _) -> fail "tar checksum error" _ -> fail "data is not in tar format" -- These fields are partial, have to check them format <- format_; mode <- mode_; uid <- uid_; gid <- gid_; size <- size_; mtime <- mtime_; devmajor <- devmajor_; devminor <- devminor_; let content = BS.take size (BS.drop 512 bs) padding = (512 - size) `mod` 512 bs' = BS.drop (512 + size + padding) bs entry = Entry { entryTarPath = TarPath name prefix, entryContent = case typecode of '\0' -> NormalFile content size '0' -> NormalFile content size '1' -> HardLink (LinkTarget linkname) '2' -> SymbolicLink (LinkTarget linkname) '3' -> CharacterDevice devmajor devminor '4' -> BlockDevice devmajor devminor '5' -> Directory '6' -> NamedPipe '7' -> NormalFile content size _ -> OtherEntryType typecode content size, entryPermissions = mode, entryOwnership = Ownership uname gname uid gid, entryTime = mtime, entryFormat = format } return (Just (entry, bs')) where header = BS.take 512 bs name = getString 0 100 header mode_ = getOct 100 8 header uid_ = getOct 108 8 header gid_ = getOct 116 8 header size_ = getOct 124 12 header mtime_ = getOct 136 12 header chksum_ = getOct 148 8 header typecode = getByte 156 header linkname = getString 157 100 header magic = getChars 257 8 header uname = getString 265 32 header gname = getString 297 32 header devmajor_ = getOct 329 8 header devminor_ = getOct 337 8 header prefix = getString 345 155 header -- trailing = getBytes 500 12 header format_ = case magic of "\0\0\0\0\0\0\0\0" -> return V7Format "ustar\NUL00" -> return UstarFormat "ustar \NUL" -> return GnuFormat _ -> fail "tar entry not in a recognised format" correctChecksum :: ByteString -> Int -> Bool correctChecksum header checksum = checksum == checksum' where -- sum of all 512 bytes in the header block, -- treating each byte as an 8-bit unsigned value checksum' = BS.Char8.foldl' (\x y -> x + ord y) 0 header' -- treating the 8 bytes of chksum as blank characters. header' = BS.concat [BS.take 148 header, BS.Char8.replicate 8 ' ', BS.drop 156 header] -- * TAR format primitive input getOct :: (Integral a, Bits a) => Int64 -> Int64 -> ByteString -> Partial a getOct off len header | BS.head bytes == 128 = parseBinInt (BS.unpack (BS.tail bytes)) | null octstr = return 0 | otherwise = case readOct octstr of [(x,[])] -> return x _ -> fail "tar header is malformed (bad numeric encoding)" where bytes = getBytes off len header octstr = BS.Char8.unpack . BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ') . BS.Char8.dropWhile (== ' ') $ bytes -- Some tar programs switch into a binary format when they try to represent -- field values that will not fit in the required width when using the text -- octal format. In particular, the UID/GID fields can only hold up to 2^21 -- while in the binary format can hold up to 2^32. The binary format uses -- '\128' as the header which leaves 7 bytes. Only the last 4 are used. parseBinInt [0, 0, 0, byte3, byte2, byte1, byte0] = return $! shiftL (fromIntegral byte3) 24 + shiftL (fromIntegral byte2) 16 + shiftL (fromIntegral byte1) 8 + shiftL (fromIntegral byte0) 0 parseBinInt _ = fail "tar header uses non-standard number encoding" getBytes :: Int64 -> Int64 -> ByteString -> ByteString getBytes off len = BS.take len . BS.drop off getByte :: Int64 -> ByteString -> Char getByte off bs = BS.Char8.index bs off getChars :: Int64 -> Int64 -> ByteString -> String getChars off len = BS.Char8.unpack . getBytes off len getString :: Int64 -> Int64 -> ByteString -> String getString off len = BS.Char8.unpack . BS.Char8.takeWhile (/='\0') . getBytes off len data Partial a = Error String | Ok a partial :: Partial a -> Either String a partial (Error msg) = Left msg partial (Ok x) = Right x instance Monad Partial where return = Ok Error m >>= _ = Error m Ok x >>= k = k x fail = Error -- -- * Writing -- -- | Create the external representation of a tar archive by serialising a list -- of tar entries. -- -- * The conversion is done lazily. -- write :: [Entry] -> ByteString write es = BS.concat $ map putEntry es ++ [BS.replicate (512*2) 0] putEntry :: Entry -> ByteString putEntry entry = case entryContent entry of NormalFile content size -> BS.concat [ header, content, padding size ] OtherEntryType _ content size -> BS.concat [ header, content, padding size ] _ -> header where header = putHeader entry padding size = BS.replicate paddingSize 0 where paddingSize = fromIntegral (negate size `mod` 512) putHeader :: Entry -> ByteString putHeader entry = BS.Char8.pack $ take 148 block ++ putOct 7 checksum ++ ' ' : drop 156 block where block = putHeaderNoChkSum entry checksum = foldl' (\x y -> x + ord y) 0 block putHeaderNoChkSum :: Entry -> String putHeaderNoChkSum Entry { entryTarPath = TarPath name prefix, entryContent = content, entryPermissions = permissions, entryOwnership = ownership, entryTime = modTime, entryFormat = format } = concat [ putString 100 $ name , putOct 8 $ permissions , putOct 8 $ ownerId ownership , putOct 8 $ groupId ownership , putOct 12 $ contentSize , putOct 12 $ modTime , fill 8 $ ' ' -- dummy checksum , putChar8 $ typeCode , putString 100 $ linkTarget ] ++ case format of V7Format -> fill 255 '\NUL' UstarFormat -> concat [ putString 8 $ "ustar\NUL00" , putString 32 $ ownerName ownership , putString 32 $ groupName ownership , putOct 8 $ deviceMajor , putOct 8 $ deviceMinor , putString 155 $ prefix , fill 12 $ '\NUL' ] GnuFormat -> concat [ putString 8 $ "ustar \NUL" , putString 32 $ ownerName ownership , putString 32 $ groupName ownership , putGnuDev 8 $ deviceMajor , putGnuDev 8 $ deviceMinor , putString 155 $ prefix , fill 12 $ '\NUL' ] where (typeCode, contentSize, linkTarget, deviceMajor, deviceMinor) = case content of NormalFile _ size -> ('0' , size, [], 0, 0) Directory -> ('5' , 0, [], 0, 0) SymbolicLink (LinkTarget link) -> ('2' , 0, link, 0, 0) HardLink (LinkTarget link) -> ('1' , 0, link, 0, 0) CharacterDevice major minor -> ('3' , 0, [], major, minor) BlockDevice major minor -> ('4' , 0, [], major, minor) NamedPipe -> ('6' , 0, [], 0, 0) OtherEntryType code _ size -> (code, size, [], 0, 0) putGnuDev w n = case content of CharacterDevice _ _ -> putOct w n BlockDevice _ _ -> putOct w n _ -> replicate w '\NUL' -- * TAR format primitive output type FieldWidth = Int putString :: FieldWidth -> String -> String putString n s = take n s ++ fill (n - length s) '\NUL' --TODO: check integer widths, eg for large file sizes putOct :: (Show a, Integral a) => FieldWidth -> a -> String putOct n x = let octStr = take (n-1) $ showOct x "" in fill (n - length octStr - 1) '0' ++ octStr ++ putChar8 '\NUL' putChar8 :: Char -> String putChar8 c = [c] fill :: FieldWidth -> Char -> String fill n c = replicate n c -- -- * Unpacking -- unpack :: FilePath -> Entries -> IO () unpack baseDir entries = unpackEntries [] (checkSecurity entries) >>= emulateLinks where -- We're relying here on 'checkSecurity' to make sure we're not scribbling -- files all over the place. unpackEntries _ (Fail err) = fail err unpackEntries links Done = return links unpackEntries links (Next entry es) = case entryContent entry of NormalFile file _ -> extractFile entry path file >> unpackEntries links es Directory -> extractDir path >> unpackEntries links es HardLink link -> (unpackEntries $! saveLink path link links) es SymbolicLink link -> (unpackEntries $! saveLink path link links) es _ -> unpackEntries links es --ignore other file types where path = entryPath entry extractFile entry path content = do -- Note that tar archives do not make sure each directory is created -- before files they contain, indeed we may have to create several -- levels of directory. createDirectoryIfMissing True absDir BS.writeFile absPath content when (isExecutable (entryPermissions entry)) (setFileExecutable absPath) where absDir = baseDir FilePath.Native.takeDirectory path absPath = baseDir path extractDir path = createDirectoryIfMissing True (baseDir path) saveLink path link links = seq (length path) $ seq (length link') $ (path, link'):links where link' = fromLinkTarget link emulateLinks = mapM_ $ \(relPath, relLinkTarget) -> let absPath = baseDir relPath absTarget = FilePath.Native.takeDirectory absPath relLinkTarget in copyFile absTarget absPath -- -- * Packing -- pack :: FilePath -- ^ Base directory -> [FilePath] -- ^ Files and directories to pack, relative to the base dir -> IO [Entry] pack baseDir paths0 = preparePaths baseDir paths0 >>= packPaths baseDir preparePaths :: FilePath -> [FilePath] -> IO [FilePath] preparePaths baseDir paths = fmap concat $ interleave [ do isDir <- doesDirectoryExist (baseDir path) if isDir then do entries <- getDirectoryContentsRecursive (baseDir path) return (FilePath.Native.addTrailingPathSeparator path : map (path ) entries) else return [path] | path <- paths ] packPaths :: FilePath -> [FilePath] -> IO [Entry] packPaths baseDir paths = interleave [ do tarpath <- either fail return (toTarPath isDir relpath) if isDir then packDirectoryEntry filepath tarpath else packFileEntry filepath tarpath | relpath <- paths , let isDir = FilePath.Native.hasTrailingPathSeparator filepath filepath = baseDir relpath ] interleave :: [IO a] -> IO [a] interleave = unsafeInterleaveIO . go where go [] = return [] go (x:xs) = do x' <- x xs' <- interleave xs return (x':xs') packFileEntry :: FilePath -- ^ Full path to find the file on the local disk -> TarPath -- ^ Path to use for the tar Entry in the archive -> IO Entry packFileEntry filepath tarpath = do mtime <- getModTime filepath perms <- getPermissions filepath file <- openBinaryFile filepath ReadMode size <- hFileSize file content <- BS.hGetContents file return (simpleEntry tarpath (NormalFile content (fromIntegral size))) { entryPermissions = if Permissions.executable perms then executableFilePermissions else ordinaryFilePermissions, entryTime = mtime } packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk -> TarPath -- ^ Path to use for the tar Entry in the archive -> IO Entry packDirectoryEntry filepath tarpath = do mtime <- getModTime filepath return (directoryEntry tarpath) { entryTime = mtime } getDirectoryContentsRecursive :: FilePath -> IO [FilePath] getDirectoryContentsRecursive dir0 = fmap tail (recurseDirectories dir0 [""]) recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath] recurseDirectories _ [] = return [] recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do (files, dirs') <- collect [] [] =<< getDirectoryContents (base dir) files' <- recurseDirectories base (dirs' ++ dirs) return (dir : files ++ files') where collect files dirs' [] = return (reverse files, reverse dirs') collect files dirs' (entry:entries) | ignore entry = collect files dirs' entries collect files dirs' (entry:entries) = do let dirEntry = dir entry dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry isDirectory <- doesDirectoryExist (base dirEntry) if isDirectory then collect files (dirEntry':dirs') entries else collect (dirEntry:files) dirs' entries ignore ['.'] = True ignore ['.', '.'] = True ignore _ = False getModTime :: FilePath -> IO EpochTime getModTime path = do (TOD s _) <- getModificationTime path return $! fromIntegral s