{-# LANGUAGE CPP, GeneralizedNewtypeDeriving, BangPatterns #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar.Types -- Copyright : (c) 2007 Bjorn Bringert, -- 2008 Andrea Vezzosi, -- 2008-2009 Duncan Coutts -- 2011 Max Bolingbroke -- License : BSD3 -- -- Maintainer : duncan@community.haskell.org -- Portability : portable -- -- Types to represent the content of @.tar@ archives. -- ----------------------------------------------------------------------------- module Codec.Archive.Tar.Types ( Entry(..), entryPath, EntryContent(..), FileSize, Permissions, Ownership(..), EpochTime, TypeCode, DevMajor, DevMinor, Format(..), simpleEntry, fileEntry, directoryEntry, ordinaryFilePermissions, executableFilePermissions, directoryPermissions, TarPath(..), toTarPath, fromTarPath, fromTarPathToPosixPath, fromTarPathToWindowsPath, LinkTarget(..), toLinkTarget, fromLinkTarget, fromLinkTargetToPosixPath, fromLinkTargetToWindowsPath, Entries(..), mapEntries, mapEntriesNoFail, foldEntries, foldlEntries, unfoldEntries, #ifdef TESTS limitToV7FormatCompat #endif ) where import Data.Int (Int64) import Data.Monoid (Monoid(..)) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS.Char8 import qualified Data.ByteString.Lazy as LBS import Control.DeepSeq import qualified System.FilePath as FilePath.Native ( joinPath, splitDirectories, addTrailingPathSeparator ) import qualified System.FilePath.Posix as FilePath.Posix ( joinPath, splitPath, splitDirectories, hasTrailingPathSeparator , addTrailingPathSeparator ) import qualified System.FilePath.Windows as FilePath.Windows ( joinPath, addTrailingPathSeparator ) import System.Posix.Types ( FileMode ) #ifdef TESTS import Test.QuickCheck import Control.Applicative ((<$>), pure, (<*>)) #endif type FileSize = Int64 -- | The number of seconds since the UNIX epoch type EpochTime = Int64 type DevMajor = Int type DevMinor = Int type TypeCode = Char type Permissions = FileMode -- | Tar archive entry. -- data Entry = Entry { -- | The path of the file or directory within the archive. This is in a -- tar-specific form. Use 'entryPath' to get a native 'FilePath'. entryTarPath :: {-# UNPACK #-} !TarPath, -- | The real content of the entry. For 'NormalFile' this includes the -- file data. An entry usually contains a 'NormalFile' or a 'Directory'. entryContent :: !EntryContent, -- | File permissions (Unix style file mode). entryPermissions :: {-# UNPACK #-} !Permissions, -- | The user and group to which this file belongs. entryOwnership :: {-# UNPACK #-} !Ownership, -- | The time the file was last modified. entryTime :: {-# UNPACK #-} !EpochTime, -- | The tar format the archive is using. entryFormat :: !Format } deriving (Eq, Show) -- | Native 'FilePath' of the file or directory within the archive. -- entryPath :: Entry -> FilePath entryPath = fromTarPath . entryTarPath -- | The content of a tar archive entry, which depends on the type of entry. -- -- Portable archives should contain only 'NormalFile' and 'Directory'. -- data EntryContent = NormalFile LBS.ByteString {-# UNPACK #-} !FileSize | Directory | SymbolicLink !LinkTarget | HardLink !LinkTarget | CharacterDevice {-# UNPACK #-} !DevMajor {-# UNPACK #-} !DevMinor | BlockDevice {-# UNPACK #-} !DevMajor {-# UNPACK #-} !DevMinor | NamedPipe | OtherEntryType {-# UNPACK #-} !TypeCode LBS.ByteString {-# UNPACK #-} !FileSize deriving (Eq, Ord, Show) data Ownership = Ownership { -- | The owner user name. Should be set to @\"\"@ if unknown. ownerName :: String, -- | The owner group name. Should be set to @\"\"@ if unknown. groupName :: String, -- | Numeric owner user id. Should be set to @0@ if unknown. ownerId :: {-# UNPACK #-} !Int, -- | Numeric owner group id. Should be set to @0@ if unknown. groupId :: {-# UNPACK #-} !Int } deriving (Eq, Ord, Show) -- | There have been a number of extensions to the tar file format over the -- years. They all share the basic entry fields and put more meta-data in -- different extended headers. -- data Format = -- | This is the classic Unix V7 tar format. It does not support owner and -- group names, just numeric Ids. It also does not support device numbers. V7Format -- | The \"USTAR\" format is an extension of the classic V7 format. It was -- later standardised by POSIX. It has some restrictions but is the most -- portable format. -- | UstarFormat -- | The GNU tar implementation also extends the classic V7 format, though -- in a slightly different way from the USTAR format. In general for new -- archives the standard USTAR/POSIX should be used. -- | GnuFormat deriving (Eq, Ord, Show) instance NFData Entry where rnf (Entry _ c _ _ _ _) = rnf c instance NFData EntryContent where rnf x = case x of NormalFile c _ -> rnflbs c OtherEntryType _ c _ -> rnflbs c _ -> seq x () where #if MIN_VERSION_bytestring(0,10,0) rnflbs = rnf #else rnflbs = foldr (\ !_bs r -> r) () . LBS.toChunks #endif instance NFData Ownership where rnf (Ownership o g _ _) = rnf o `seq` rnf g -- | @rw-r--r--@ for normal files ordinaryFilePermissions :: Permissions ordinaryFilePermissions = 0o0644 -- | @rwxr-xr-x@ for executable files executableFilePermissions :: Permissions executableFilePermissions = 0o0755 -- | @rwxr-xr-x@ for directories directoryPermissions :: Permissions directoryPermissions = 0o0755 -- | An 'Entry' with all default values except for the file name and type. It -- uses the portable USTAR/POSIX format (see 'UstarHeader'). -- -- You can use this as a basis and override specific fields, eg: -- -- > (emptyEntry name HardLink) { linkTarget = target } -- simpleEntry :: TarPath -> EntryContent -> Entry simpleEntry tarpath content = Entry { entryTarPath = tarpath, entryContent = content, entryPermissions = case content of Directory -> directoryPermissions _ -> ordinaryFilePermissions, entryOwnership = Ownership "" "" 0 0, entryTime = 0, entryFormat = UstarFormat } -- | A tar 'Entry' for a file. -- -- Entry fields such as file permissions and ownership have default values. -- -- You can use this as a basis and override specific fields. For example if you -- need an executable file you could use: -- -- > (fileEntry name content) { fileMode = executableFileMode } -- fileEntry :: TarPath -> LBS.ByteString -> Entry fileEntry name fileContent = simpleEntry name (NormalFile fileContent (LBS.length fileContent)) -- | A tar 'Entry' for a directory. -- -- Entry fields such as file permissions and ownership have default values. -- directoryEntry :: TarPath -> Entry directoryEntry name = simpleEntry name Directory -- -- * Tar paths -- -- | The classic tar format allowed just 100 characters for the file name. The -- USTAR format extended this with an extra 155 characters, however it uses a -- complex method of splitting the name between the two sections. -- -- Instead of just putting any overflow into the extended area, it uses the -- extended area as a prefix. The aggravating insane bit however is that the -- prefix (if any) must only contain a directory prefix. That is the split -- between the two areas must be on a directory separator boundary. So there is -- no simple calculation to work out if a file name is too long. Instead we -- have to try to find a valid split that makes the name fit in the two areas. -- -- The rationale presumably was to make it a bit more compatible with old tar -- programs that only understand the classic format. A classic tar would be -- able to extract the file name and possibly some dir prefix, but not the -- full dir prefix. So the files would end up in the wrong place, but that's -- probably better than ending up with the wrong names too. -- -- So it's understandable but rather annoying. -- -- * Tar paths use Posix format (ie @\'/\'@ directory separators), irrespective -- of the local path conventions. -- -- * The directory separator between the prefix and name is /not/ stored. -- data TarPath = TarPath {-# UNPACK #-} !BS.ByteString -- path name, 100 characters max. {-# UNPACK #-} !BS.ByteString -- path prefix, 155 characters max. deriving (Eq, Ord) instance NFData TarPath where rnf (TarPath _ _) = () -- fully strict by construction instance Show TarPath where show = show . fromTarPath -- | Convert a 'TarPath' to a native 'FilePath'. -- -- The native 'FilePath' will use the native directory separator but it is not -- otherwise checked for validity or sanity. In particular: -- -- * The tar path may be invalid as a native path, eg the file name @\"nul\"@ -- is not valid on Windows. -- -- * The tar path may be an absolute path or may contain @\"..\"@ components. -- For security reasons this should not usually be allowed, but it is your -- responsibility to check for these conditions (eg using 'checkSecurity'). -- fromTarPath :: TarPath -> FilePath fromTarPath (TarPath namebs prefixbs) = adjustDirectory $ FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix ++ FilePath.Posix.splitDirectories name where name = BS.Char8.unpack namebs prefix = BS.Char8.unpack prefixbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name = FilePath.Native.addTrailingPathSeparator | otherwise = id -- | Convert a 'TarPath' to a Unix\/Posix 'FilePath'. -- -- The difference compared to 'fromTarPath' is that it always returns a Unix -- style path irrespective of the current operating system. -- -- This is useful to check how a 'TarPath' would be interpreted on a specific -- operating system, eg to perform portability checks. -- fromTarPathToPosixPath :: TarPath -> FilePath fromTarPathToPosixPath (TarPath namebs prefixbs) = adjustDirectory $ FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories prefix ++ FilePath.Posix.splitDirectories name where name = BS.Char8.unpack namebs prefix = BS.Char8.unpack prefixbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name = FilePath.Posix.addTrailingPathSeparator | otherwise = id -- | Convert a 'TarPath' to a Windows 'FilePath'. -- -- The only difference compared to 'fromTarPath' is that it always returns a -- Windows style path irrespective of the current operating system. -- -- This is useful to check how a 'TarPath' would be interpreted on a specific -- operating system, eg to perform portability checks. -- fromTarPathToWindowsPath :: TarPath -> FilePath fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $ FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories prefix ++ FilePath.Posix.splitDirectories name where name = BS.Char8.unpack namebs prefix = BS.Char8.unpack prefixbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name = FilePath.Windows.addTrailingPathSeparator | otherwise = id -- | Convert a native 'FilePath' to a 'TarPath'. -- -- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a -- description of the problem with splitting long 'FilePath's. -- toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for -- directories a 'TarPath' must always use a trailing @\/@. -> FilePath -> Either String TarPath toTarPath isDir = splitLongPath . addTrailingSep . FilePath.Posix.joinPath . FilePath.Native.splitDirectories where addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator | otherwise = id -- | Take a sanitised path, split on directory separators and try to pack it -- into the 155 + 100 tar file name format. -- -- The strategy is this: take the name-directory components in reverse order -- and try to fit as many components into the 100 long name area as possible. -- If all the remaining components fit in the 155 name area then we win. -- splitLongPath :: FilePath -> Either String TarPath splitLongPath path = case packName nameMax (reverse (FilePath.Posix.splitPath path)) of Left err -> Left err Right (name, []) -> Right $! TarPath (BS.Char8.pack name) BS.empty Right (name, first:rest) -> case packName prefixMax remainder of Left err -> Left err Right (_ , (_:_)) -> Left "File name too long (cannot split)" Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name) (BS.Char8.pack prefix) where -- drop the '/' between the name and prefix: remainder = init first : rest where nameMax, prefixMax :: Int nameMax = 100 prefixMax = 155 packName _ [] = Left "File name empty" packName maxLen (c:cs) | n > maxLen = Left "File name too long" | otherwise = Right (packName' maxLen n [c] cs) where n = length c packName' maxLen n ok (c:cs) | n' <= maxLen = packName' maxLen n' (c:ok) cs where n' = n + length c packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs) -- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and -- 'HardLink' entry types. -- newtype LinkTarget = LinkTarget BS.ByteString deriving (Eq, Ord, Show) instance NFData LinkTarget where #if MIN_VERSION_bytestring(0,10,0) rnf (LinkTarget bs) = rnf bs #else rnf (LinkTarget !_bs) = () #endif -- | Convert a native 'FilePath' to a tar 'LinkTarget'. This may fail if the -- string is longer than 100 characters or if it contains non-portable -- characters. -- toLinkTarget :: FilePath -> Maybe LinkTarget toLinkTarget path | length path <= 100 = Just $! LinkTarget (BS.Char8.pack path) | otherwise = Nothing -- | Convert a tar 'LinkTarget' to a native 'FilePath'. -- fromLinkTarget :: LinkTarget -> FilePath fromLinkTarget (LinkTarget pathbs) = adjustDirectory $ FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path where path = BS.Char8.unpack pathbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path = FilePath.Native.addTrailingPathSeparator | otherwise = id -- | Convert a tar 'LinkTarget' to a Unix/Posix 'FilePath'. -- fromLinkTargetToPosixPath :: LinkTarget -> FilePath fromLinkTargetToPosixPath (LinkTarget pathbs) = adjustDirectory $ FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories path where path = BS.Char8.unpack pathbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path = FilePath.Native.addTrailingPathSeparator | otherwise = id -- | Convert a tar 'LinkTarget' to a Windows 'FilePath'. -- fromLinkTargetToWindowsPath :: LinkTarget -> FilePath fromLinkTargetToWindowsPath (LinkTarget pathbs) = adjustDirectory $ FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories path where path = BS.Char8.unpack pathbs adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path = FilePath.Windows.addTrailingPathSeparator | otherwise = id -- -- * Entries type -- -- | A tar archive is a sequence of entries. -- -- The point of this type as opposed to just using a list is that it makes the -- failure case explicit. We need this because the sequence of entries we get -- from reading a tarball can include errors. -- -- It is a concrete data type so you can manipulate it directly but it is often -- clearer to use the provided functions for mapping, folding and unfolding. -- -- Converting from a list can be done with just @foldr Next Done@. Converting -- back into a list can be done with 'foldEntries' however in that case you -- must be prepared to handle the 'Fail' case inherent in the 'Entries' type. -- -- The 'Monoid' instance lets you concatenate archives or append entries to an -- archive. -- data Entries e = Next Entry (Entries e) | Done | Fail e deriving (Eq, Show) infixr 5 `Next` -- | This is like the standard 'unfoldr' function on lists, but for 'Entries'. -- It includes failure as an extra possibility that the stepper function may -- return. -- -- It can be used to generate 'Entries' from some other type. For example it is -- used internally to lazily unfold entries from a 'LBS.ByteString'. -- unfoldEntries :: (a -> Either e (Maybe (Entry, a))) -> a -> Entries e unfoldEntries f = unfold where unfold x = case f x of Left err -> Fail err Right Nothing -> Done Right (Just (e, x')) -> Next e (unfold x') -- | This is like the standard 'foldr' function on lists, but for 'Entries'. -- Compared to 'foldr' it takes an extra function to account for the -- possibility of failure. -- -- This is used to consume a sequence of entries. For example it could be used -- to scan a tarball for problems or to collect an index of the contents. -- foldEntries :: (Entry -> a -> a) -> a -> (e -> a) -> Entries e -> a foldEntries next done fail' = fold where fold (Next e es) = next e (fold es) fold Done = done fold (Fail err) = fail' err -- | A 'foldl'-like function on Entries. It either returns the final -- accumulator result, or the failure along with the intermediate accumulator -- value. -- foldlEntries :: (a -> Entry -> a) -> a -> Entries e -> Either (e, a) a foldlEntries f z = go z where go !acc (Next e es) = go (f acc e) es go !acc Done = Right acc go !acc (Fail err) = Left (err, acc) -- | This is like the standard 'map' function on lists, but for 'Entries'. It -- includes failure as a extra possible outcome of the mapping function. -- -- If your mapping function cannot fail it may be more convenient to use -- 'mapEntriesNoFail' mapEntries :: (Entry -> Either e' Entry) -> Entries e -> Entries (Either e e') mapEntries f = foldEntries (\entry rest -> either (Fail . Right) (flip Next rest) (f entry)) Done (Fail . Left) -- | Like 'mapEntries' but the mapping function itself cannot fail. -- mapEntriesNoFail :: (Entry -> Entry) -> Entries e -> Entries e mapEntriesNoFail f = foldEntries (\entry -> Next (f entry)) Done Fail instance Monoid (Entries e) where mempty = Done mappend a b = foldEntries Next b Fail a instance Functor Entries where fmap f = foldEntries Next Done (Fail . f) instance NFData e => NFData (Entries e) where rnf (Next e es) = rnf e `seq` rnf es rnf Done = () rnf (Fail e) = rnf e ------------------------- -- QuickCheck instances -- #ifdef TESTS instance Arbitrary Entry where arbitrary = Entry <$> arbitrary <*> arbitrary <*> arbitraryPermissions <*> arbitrary <*> arbitraryEpochTime <*> arbitrary where arbitraryPermissions :: Gen Permissions arbitraryPermissions = fromIntegral <$> (arbitraryOctal 7 :: Gen Int) arbitraryEpochTime :: Gen EpochTime arbitraryEpochTime = fromIntegral <$> (arbitraryOctal 11 :: Gen Int) shrink (Entry path content perms author time format) = [ Entry path' content' perms author' time' format | (path', content', author', time') <- shrink (path, content, author, time) ] ++ [ Entry path content perms' author time format | perms' <- shrinkIntegral perms ] instance Arbitrary TarPath where arbitrary = either error id . toTarPath False . FilePath.Posix.joinPath <$> listOf1ToN (255 `div` 5) (elements (map (replicate 4) "abcd")) shrink = map (either error id . toTarPath False) . map FilePath.Posix.joinPath . filter (not . null) . shrinkList shrinkNothing . FilePath.Posix.splitPath . fromTarPathToPosixPath instance Arbitrary LinkTarget where arbitrary = maybe (error "link target too large") id . toLinkTarget . FilePath.Native.joinPath <$> listOf1ToN (100 `div` 5) (elements (map (replicate 4) "abcd")) shrink = map (maybe (error "link target too large") id . toLinkTarget) . map FilePath.Posix.joinPath . filter (not . null) . shrinkList shrinkNothing . FilePath.Posix.splitPath . fromLinkTargetToPosixPath listOf1ToN :: Int -> Gen a -> Gen [a] listOf1ToN n g = sized $ \sz -> do n <- choose (1, min n (max 1 sz)) vectorOf n g listOf0ToN :: Int -> Gen a -> Gen [a] listOf0ToN n g = sized $ \sz -> do n <- choose (0, min n sz) vectorOf n g instance Arbitrary EntryContent where arbitrary = frequency [ (16, do bs <- arbitrary; return (NormalFile bs (LBS.length bs))) , (2, pure Directory) , (1, SymbolicLink <$> arbitrary) , (1, HardLink <$> arbitrary) , (1, CharacterDevice <$> arbitraryOctal 7 <*> arbitraryOctal 7) , (1, BlockDevice <$> arbitraryOctal 7 <*> arbitraryOctal 7) , (1, pure NamedPipe) , (1, do c <- elements (['A'..'Z']++['a'..'z']) bs <- arbitrary; return (OtherEntryType c bs (LBS.length bs))) ] shrink (NormalFile bs _) = [ NormalFile bs' (LBS.length bs') | bs' <- shrink bs ] shrink Directory = [] shrink (SymbolicLink link) = [ SymbolicLink link' | link' <- shrink link ] shrink (HardLink link) = [ HardLink link' | link' <- shrink link ] shrink (CharacterDevice ma mi) = [ CharacterDevice ma' mi' | (ma', mi') <- shrink (ma, mi) ] shrink (BlockDevice ma mi) = [ BlockDevice ma' mi' | (ma', mi') <- shrink (ma, mi) ] shrink NamedPipe = [] shrink (OtherEntryType c bs _) = [ OtherEntryType c bs' (LBS.length bs') | bs' <- shrink bs ] instance Arbitrary LBS.ByteString where arbitrary = fmap LBS.pack arbitrary shrink = map LBS.pack . shrink . LBS.unpack instance Arbitrary BS.ByteString where arbitrary = fmap BS.pack arbitrary shrink = map BS.pack . shrink . BS.unpack instance Arbitrary Ownership where arbitrary = Ownership <$> name <*> name <*> idno <*> idno where name = listOf0ToN 32 (arbitrary `suchThat` (/= '\0')) idno = arbitraryOctal 7 shrink (Ownership oname gname oid gid) = [ Ownership oname' gname' oid' gid' | (oname', gname', oid', gid') <- shrink (oname, gname, oid, gid) ] instance Arbitrary Format where arbitrary = elements [V7Format, UstarFormat, GnuFormat] --arbitraryOctal :: (Integral n, Random n) => Int -> Gen n arbitraryOctal n = oneof [ pure 0 , choose (0, upperBound) , pure upperBound ] where upperBound = 8^n-1 -- For QC tests it's useful to have a way to limit the info to that which can -- be expressed in the old V7 format limitToV7FormatCompat :: Entry -> Entry limitToV7FormatCompat entry@Entry { entryFormat = V7Format } = entry { entryContent = case entryContent entry of CharacterDevice _ _ -> OtherEntryType '3' LBS.empty 0 BlockDevice _ _ -> OtherEntryType '4' LBS.empty 0 Directory -> OtherEntryType '5' LBS.empty 0 NamedPipe -> OtherEntryType '6' LBS.empty 0 other -> other, entryOwnership = (entryOwnership entry) { groupName = "", ownerName = "" }, entryTarPath = let TarPath name _prefix = entryTarPath entry in TarPath name BS.empty } limitToV7FormatCompat entry = entry #endif