-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Reading, writing and manipulating ".tar" archive files. -- -- This library is for working with ".tar" archive files. It can -- read and write a range of common variations of archive format -- including V7, USTAR, POSIX and GNU formats. It provides support for -- packing and unpacking portable archives. This makes it suitable for -- distribution but not backup because details like file ownership and -- exact permissions are not preserved. @package tar @version 0.4.2.2 -- | Random access to the content of a .tar archive. -- -- This module uses common names and so is designed to be imported -- qualified: -- --
--   import qualified Codec.Archive.Tar.Index as Tar
--   
module Codec.Archive.Tar.Index -- | An index of the entries in a tar file. -- -- This index type is designed to be quite compact and suitable to store -- either on disk or in memory. data TarIndex -- | Look up a given filepath in the TarIndex. It may return a -- TarFileEntry containing the TarEntryOffset of the file -- within the tar file, or if the filepath identifies a directory then it -- returns a TarDir containing the list of files within that -- directory. -- -- Given the TarEntryOffset you can then use one of the I/O -- operations: -- -- lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry -- | The result of lookup in a TarIndex. It can either be a -- file directly, or a directory entry containing further entries (and -- all subdirectories recursively). Note that the subtrees are -- constructed lazily, so it's cheaper if you don't look at them. data TarIndexEntry TarFileEntry :: {-# UNPACK #-} !TarEntryOffset -> TarIndexEntry TarDir :: [(FilePath, TarIndexEntry)] -> TarIndexEntry -- | An offset within a tar file. Use hReadEntry, -- hReadEntryHeader or hSeekEntryOffset. -- -- This is actually a tar "record" number, not a byte offset. type TarEntryOffset = Word32 -- | Reads an entire Entry at the given TarEntryOffset in the -- tar file. The Handle must be open for reading and be seekable. -- -- This reads the whole entry into memory strictly, not incrementally. -- For more control, use hReadEntryHeader and then read the entry -- content manually. hReadEntry :: Handle -> TarEntryOffset -> IO Entry -- | Read the header for a Entry at the given TarEntryOffset -- in the tar file. The entryContent will contain the correct -- metadata but an empty file content. The Handle must be open for -- reading and be seekable. -- -- The Handle position is advanced to the beginning of the entry -- content (if any). You must check the entryContent to see if the -- entry is of type NormalFile. If it is, the NormalFile -- gives the content length and you are free to read this much data from -- the Handle. -- --
--   entry <- Tar.hReadEntryHeader hnd
--   case Tar.entryContent entry of
--     Tar.NormalFile _ size -> do content <- BS.hGet hnd size
--                                 ...
--   
-- -- Of course you don't have to read it all in one go (as -- hReadEntry does), you can use any appropriate method to read it -- incrementally. -- -- In addition to I/O errors, this can throw a FormatError if the -- offset is wrong, or if the file is not valid tar format. -- -- There is also the lower level operation hSeekEntryOffset. hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry -- | Build a TarIndex from a sequence of tar Entries. The -- Entries are assumed to start at offset 0 within a -- file. build :: Entries e -> Either e TarIndex -- | The intermediate type used for incremental construction of a -- TarIndex. data IndexBuilder -- | The initial empty IndexBuilder. emptyIndex :: IndexBuilder -- | Add the next Entry into the IndexBuilder. addNextEntry :: Entry -> IndexBuilder -> IndexBuilder -- | Use this function if you want to skip some entries and not add them to -- the final TarIndex. skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder -- | Finish accumulating Entry information and build the compact -- TarIndex lookup structure. finaliseIndex :: IndexBuilder -> TarIndex -- | The TarIndex is compact in memory, and it has a similarly -- compact external representation. serialise :: TarIndex -> Builder -- | Read the external representation back into a TarIndex. deserialise :: ByteString -> Maybe (TarIndex, ByteString) -- | This is a low level variant on hReadEntryHeader, that can be -- used to iterate through a tar file, entry by entry. -- -- It has a few differences compared to hReadEntryHeader: -- -- -- -- After this action, the Handle position is not in any useful -- place. If you want to skip to the next entry, take the -- TarEntryOffset returned and use hReadEntryHeaderOrEof -- again. Or if having inspected the Entry header you want to read -- the entry content (if it has one) then use -- hSeekEntryContentOffset on the original input -- TarEntryOffset. hReadEntryHeaderOrEof :: Handle -> TarEntryOffset -> IO (Maybe (Entry, TarEntryOffset)) -- | Set the Handle position to the position corresponding to the -- given TarEntryOffset. -- -- This position is where the entry metadata can be read. If you already -- know the entry has a body (and perhaps know it's length), you may wish -- to seek to the body content directly using -- hSeekEntryContentOffset. hSeekEntryOffset :: Handle -> TarEntryOffset -> IO () -- | Set the Handle position to the entry content position -- corresponding to the given TarEntryOffset. -- -- This position is where the entry content can be read using ordinary -- I/O operations (though you have to know in advance how big the entry -- content is). This is only valid if you already know the -- entry has a body (i.e. is a normal file). hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO () -- | Seek to the end of a tar file, to the position where new entries can -- be appended, and return that TarEntryOffset. -- -- If you have a valid TarIndex for this tar file then you should -- supply it because it allows seeking directly to the correct location. -- -- If you do not have an index, then this becomes an expensive linear -- operation because we have to read each tar entry header from the -- beginning to find the location immediately after the last entry (this -- is because tar files have a variable length trailer and we cannot -- reliably find that by starting at the end). In this mode, it will fail -- with an exception if the file is not in fact in the tar format. hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset -- | Calculate the TarEntryOffset of the next entry, given the size -- and offset of the current entry. -- -- This is much like using skipNextEntry and -- indexNextEntryOffset, but without using an IndexBuilder. nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset -- | This is the offset immediately following the last entry in the tar -- file. This can be useful to append further entries into the tar file. -- Use with hSeekEntryOffset, or just use -- hSeekEndEntryOffset directly. indexEndEntryOffset :: TarIndex -> TarEntryOffset -- | This is the offset immediately following the entry most recently added -- to the IndexBuilder. You might use this if you need to know the -- offsets but don't want to use the TarIndex lookup structure. -- Use with hSeekEntryOffset. See also nextEntryOffset. indexNextEntryOffset :: IndexBuilder -> TarEntryOffset instance GHC.Show.Show Codec.Archive.Tar.Index.TarIndex instance GHC.Classes.Eq Codec.Archive.Tar.Index.TarIndex instance GHC.Show.Show Codec.Archive.Tar.Index.TarIndexEntry instance GHC.Show.Show Codec.Archive.Tar.Index.PathComponentId instance GHC.Enum.Enum Codec.Archive.Tar.Index.PathComponentId instance GHC.Classes.Ord Codec.Archive.Tar.Index.PathComponentId instance GHC.Classes.Eq Codec.Archive.Tar.Index.PathComponentId -- | Perform various checks on tar file entries. module Codec.Archive.Tar.Check -- | This function checks a sequence of tar entries for file name security -- problems. It checks that: -- -- -- -- These checks are from the perspective of the current OS. That means we -- check for "C:blah" files on Windows and "/blah" files on -- Unix. For archive entry types HardLink and SymbolicLink -- the same checks are done for the link target. A failure in any entry -- terminates the sequence of entries with an error. checkSecurity :: Entries e -> Entries (Either e FileNameError) -- | Errors arising from tar file names being in some way invalid or -- dangerous data FileNameError InvalidFileName :: FilePath -> FileNameError AbsoluteFileName :: FilePath -> FileNameError -- | This function checks a sequence of tar entries for being a "tar bomb". -- This means that the tar file does not follow the standard convention -- that all entries are within a single subdirectory, e.g. a file -- "foo.tar" would usually have all entries within the "foo/" -- subdirectory. -- -- Given the expected subdirectory, this function checks all entries are -- within that subdirectroy. -- -- Note: This check must be used in conjunction with checkSecurity -- (or checkPortability). checkTarbomb :: FilePath -> Entries e -> Entries (Either e TarBombError) -- | An error that occurs if a tar file is a "tar bomb" that would extract -- files outside of the intended directory. data TarBombError TarBombError :: FilePath -> TarBombError -- | This function checks a sequence of tar entries for a number of -- portability issues. It will complain if: -- -- checkPortability :: Entries e -> Entries (Either e PortabilityError) -- | Portability problems in a tar archive data PortabilityError NonPortableFormat :: Format -> PortabilityError NonPortableFileType :: PortabilityError NonPortableEntryNameChar :: FilePath -> PortabilityError NonPortableFileName :: PortabilityPlatform -> FileNameError -> PortabilityError -- | The name of a platform that portability issues arise from type PortabilityPlatform = String instance GHC.Show.Show Codec.Archive.Tar.Check.FileNameError instance GHC.Exception.Exception Codec.Archive.Tar.Check.FileNameError instance GHC.Exception.Exception Codec.Archive.Tar.Check.TarBombError instance GHC.Show.Show Codec.Archive.Tar.Check.TarBombError instance GHC.Exception.Exception Codec.Archive.Tar.Check.PortabilityError instance GHC.Show.Show Codec.Archive.Tar.Check.PortabilityError -- | Types and functions to manipulate tar entries. -- -- While the Codec.Archive.Tar module provides only the simple -- high level API, this module provides full access to the details of tar -- entries. This lets you inspect all the meta-data, construct entries -- and handle error cases more precisely. -- -- This module uses common names and so is designed to be imported -- qualified: -- --
--   import qualified Codec.Archive.Tar       as Tar
--   import qualified Codec.Archive.Tar.Entry as Tar
--   
module Codec.Archive.Tar.Entry -- | Tar archive entry. data Entry Entry :: !TarPath -> !EntryContent -> !Permissions -> !Ownership -> !EpochTime -> !Format -> Entry -- | The path of the file or directory within the archive. This is in a -- tar-specific form. Use entryPath to get a native -- FilePath. [entryTarPath] :: Entry -> !TarPath -- | The real content of the entry. For NormalFile this includes the -- file data. An entry usually contains a NormalFile or a -- Directory. [entryContent] :: Entry -> !EntryContent -- | File permissions (Unix style file mode). [entryPermissions] :: Entry -> !Permissions -- | The user and group to which this file belongs. [entryOwnership] :: Entry -> !Ownership -- | The time the file was last modified. [entryTime] :: Entry -> !EpochTime -- | The tar format the archive is using. [entryFormat] :: Entry -> !Format -- | Native FilePath of the file or directory within the archive. entryPath :: Entry -> FilePath -- | The content of a tar archive entry, which depends on the type of -- entry. -- -- Portable archives should contain only NormalFile and -- Directory. data EntryContent NormalFile :: ByteString -> !FileSize -> EntryContent Directory :: EntryContent SymbolicLink :: !LinkTarget -> EntryContent HardLink :: !LinkTarget -> EntryContent CharacterDevice :: !DevMajor -> !DevMinor -> EntryContent BlockDevice :: !DevMajor -> !DevMinor -> EntryContent NamedPipe :: EntryContent OtherEntryType :: !TypeCode -> ByteString -> !FileSize -> EntryContent data Ownership Ownership :: String -> String -> !Int -> !Int -> Ownership -- | The owner user name. Should be set to "" if unknown. [ownerName] :: Ownership -> String -- | The owner group name. Should be set to "" if unknown. [groupName] :: Ownership -> String -- | Numeric owner user id. Should be set to 0 if unknown. [ownerId] :: Ownership -> !Int -- | Numeric owner group id. Should be set to 0 if unknown. [groupId] :: Ownership -> !Int type FileSize = Int64 type Permissions = FileMode -- | The number of seconds since the UNIX epoch type EpochTime = Int64 type DevMajor = Int type DevMinor = Int type TypeCode = Char -- | There have been a number of extensions to the tar file format over the -- years. They all share the basic entry fields and put more meta-data in -- different extended headers. data Format -- | This is the classic Unix V7 tar format. It does not support owner and -- group names, just numeric Ids. It also does not support device -- numbers. V7Format :: Format -- | The "USTAR" format is an extension of the classic V7 format. It was -- later standardised by POSIX. It has some restrictions but is the most -- portable format. UstarFormat :: Format -- | The GNU tar implementation also extends the classic V7 format, though -- in a slightly different way from the USTAR format. In general for new -- archives the standard USTAR/POSIX should be used. GnuFormat :: Format -- | An Entry with all default values except for the file name and -- type. It uses the portable USTAR/POSIX format (see -- UstarHeader). -- -- You can use this as a basis and override specific fields, eg: -- --
--   (emptyEntry name HardLink) { linkTarget = target }
--   
simpleEntry :: TarPath -> EntryContent -> Entry -- | A tar Entry for a file. -- -- Entry fields such as file permissions and ownership have default -- values. -- -- You can use this as a basis and override specific fields. For example -- if you need an executable file you could use: -- --
--   (fileEntry name content) { fileMode = executableFileMode }
--   
fileEntry :: TarPath -> ByteString -> Entry -- | A tar Entry for a directory. -- -- Entry fields such as file permissions and ownership have default -- values. directoryEntry :: TarPath -> Entry -- | rw-r--r-- for normal files ordinaryFilePermissions :: Permissions -- | rwxr-xr-x for executable files executableFilePermissions :: Permissions -- | rwxr-xr-x for directories directoryPermissions :: Permissions -- | Construct a tar Entry based on a local file. -- -- This sets the entry size, the data contained in the file and the -- file's modification time. If the file is executable then that -- information is also preserved. File ownership and detailed permissions -- are not preserved. -- -- packFileEntry :: FilePath -> TarPath -> IO Entry -- | Construct a tar Entry based on a local directory (but not its -- contents). -- -- The only attribute of the directory that is used is its modification -- time. Directory ownership and detailed permissions are not preserved. packDirectoryEntry :: FilePath -> TarPath -> IO Entry -- | This is a utility function, much like getDirectoryContents. The -- difference is that it includes the contents of subdirectories. -- -- The paths returned are all relative to the top directory. Directory -- paths are distinguishable by having a trailing path separator (see -- hasTrailingPathSeparator). -- -- All directories are listed before the files that they contain. Amongst -- the contents of a directory, subdirectories are listed after normal -- files. The overall result is that files within a directory will be -- together in a single contiguous group. This tends to improve file -- layout and IO performance when creating or extracting tar archives. -- -- getDirectoryContentsRecursive :: FilePath -> IO [FilePath] -- | The classic tar format allowed just 100 characters for the file name. -- The USTAR format extended this with an extra 155 characters, however -- it uses a complex method of splitting the name between the two -- sections. -- -- Instead of just putting any overflow into the extended area, it uses -- the extended area as a prefix. The aggravating insane bit however is -- that the prefix (if any) must only contain a directory prefix. That is -- the split between the two areas must be on a directory separator -- boundary. So there is no simple calculation to work out if a file name -- is too long. Instead we have to try to find a valid split that makes -- the name fit in the two areas. -- -- The rationale presumably was to make it a bit more compatible with old -- tar programs that only understand the classic format. A classic tar -- would be able to extract the file name and possibly some dir prefix, -- but not the full dir prefix. So the files would end up in the wrong -- place, but that's probably better than ending up with the wrong names -- too. -- -- So it's understandable but rather annoying. -- -- data TarPath -- | Convert a native FilePath to a TarPath. -- -- The conversion may fail if the FilePath is too long. See -- TarPath for a description of the problem with splitting long -- FilePaths. toTarPath :: Bool -> FilePath -> Either String TarPath -- | Convert a TarPath to a native FilePath. -- -- The native FilePath will use the native directory separator but -- it is not otherwise checked for validity or sanity. In particular: -- -- fromTarPath :: TarPath -> FilePath -- | Convert a TarPath to a Unix/Posix FilePath. -- -- The difference compared to fromTarPath is that it always -- returns a Unix style path irrespective of the current operating -- system. -- -- This is useful to check how a TarPath would be interpreted on a -- specific operating system, eg to perform portability checks. fromTarPathToPosixPath :: TarPath -> FilePath -- | Convert a TarPath to a Windows FilePath. -- -- The only difference compared to fromTarPath is that it always -- returns a Windows style path irrespective of the current operating -- system. -- -- This is useful to check how a TarPath would be interpreted on a -- specific operating system, eg to perform portability checks. fromTarPathToWindowsPath :: TarPath -> FilePath -- | The tar format allows just 100 ASCII characters for the -- SymbolicLink and HardLink entry types. data LinkTarget -- | Convert a native FilePath to a tar LinkTarget. This may -- fail if the string is longer than 100 characters or if it contains -- non-portable characters. toLinkTarget :: FilePath -> Maybe LinkTarget -- | Convert a tar LinkTarget to a native FilePath. fromLinkTarget :: LinkTarget -> FilePath -- | Convert a tar LinkTarget to a Unix/Posix FilePath. fromLinkTargetToPosixPath :: LinkTarget -> FilePath -- | Convert a tar LinkTarget to a Windows FilePath. fromLinkTargetToWindowsPath :: LinkTarget -> FilePath -- | Reading, writing and manipulating ".tar" archive files. -- -- This module uses common names and so is designed to be imported -- qualified: -- --
--   import qualified Codec.Archive.Tar as Tar
--   
module Codec.Archive.Tar -- | Create a new ".tar" file from a directory of files. -- -- It is equivalent to calling the standard tar program like so: -- --
--   $ tar -f tarball.tar -C base -c dir
--   
-- -- This assumes a directory ./base/dir with files inside, eg -- ./base/dir/foo.txt. The file names inside the resulting tar -- file will be relative to dir, eg dir/foo.txt. -- -- This is a high level "all in one" operation. Since you may need -- variations on this function it is instructive to see how it is -- written. It is just: -- --
--   BS.writeFile tar . Tar.write =<< Tar.pack base paths
--   
-- -- Notes: -- -- The files and directories must not change during this operation or the -- result is not well defined. -- -- The intention of this function is to create tarballs that are portable -- between systems. It is not suitable for doing file system -- backups because file ownership and permissions are not fully -- preserved. File ownership is not preserved at all. File permissions -- are set to simple portable values: -- -- create :: FilePath -> FilePath -> [FilePath] -> IO () -- | Extract all the files contained in a ".tar" file. -- -- It is equivalent to calling the standard tar program like so: -- --
--   $ tar -x -f tarball.tar -C dir
--   
-- -- So for example if the tarball.tar file contains -- foo/bar.txt then this will extract it to -- dir/foo/bar.txt. -- -- This is a high level "all in one" operation. Since you may need -- variations on this function it is instructive to see how it is -- written. It is just: -- --
--   Tar.unpack dir . Tar.read =<< BS.readFile tar
--   
-- -- Notes: -- -- Extracting can fail for a number of reasons. The tarball may be -- incorrectly formatted. There may be IO or permission errors. In such -- cases an exception will be thrown and extraction will not continue. -- -- Since the extraction may fail part way through it is not atomic. For -- this reason you may want to extract into an empty directory and, if -- the extraction fails, recursively delete the directory. -- -- Security: only files inside the target directory will be written. -- Tarballs containing entries that point outside of the tarball (either -- absolute paths or relative paths) will be caught and an exception will -- be thrown. extract :: FilePath -> FilePath -> IO () -- | Append new entries to a ".tar" file from a directory of -- files. -- -- This is much like create, except that all the entries are added -- to the end of an existing tar file. Or if the file does not already -- exists then it behaves the same as create. append :: FilePath -> FilePath -> [FilePath] -> IO () -- | Convert a data stream in the tar file format into an internal data -- structure. Decoding errors are reported by the Fail constructor -- of the Entries type. -- -- read :: ByteString -> Entries FormatError -- | Create the external representation of a tar archive by serialising a -- list of tar entries. -- -- write :: [Entry] -> ByteString -- | Creates a tar archive from a list of directory or files. Any -- directories specified will have their contents included recursively. -- Paths in the archive will be relative to the given base directory. -- -- This is a portable implementation of packing suitable for portable -- archives. In particular it only constructs NormalFile and -- Directory entries. Hard links and symbolic links are treated -- like ordinary files. It cannot be used to pack directories containing -- recursive symbolic links. Special files like FIFOs (named pipes), -- sockets or device files will also cause problems. -- -- An exception will be thrown for any file names that are too long to -- represent as a TarPath. -- -- pack :: FilePath -> [FilePath] -> IO [Entry] -- | Create local files and directories based on the entries of a tar -- archive. -- -- This is a portable implementation of unpacking suitable for portable -- archives. It handles NormalFile and Directory entries -- and has simulated support for SymbolicLink and HardLink -- entries. Links are implemented by copying the target file. This -- therefore works on Windows as well as Unix. All other entry types are -- ignored, that is they are not unpacked and no exception is raised. -- -- If the Entries ends in an error then it is raised an an -- exception. Any files or directories that have been unpacked before the -- error was encountered will not be deleted. For this reason you may -- want to unpack into an empty directory so that you can easily clean up -- if unpacking fails part-way. -- -- On its own, this function only checks for security (using -- checkSecurity). You can do other checks by applying checking -- functions to the Entries that you pass to this function. For -- example: -- --
--   unpack dir (checkTarbomb expectedDir entries)
--   
-- -- If you care about the priority of the reported errors then you may -- want to use checkSecurity before checkTarbomb or other -- checks. unpack :: Exception e => FilePath -> Entries e -> IO () -- | Tar archive entry. data Entry -- | Native FilePath of the file or directory within the archive. entryPath :: Entry -> FilePath -- | The real content of the entry. For NormalFile this includes the -- file data. An entry usually contains a NormalFile or a -- Directory. entryContent :: Entry -> EntryContent -- | The content of a tar archive entry, which depends on the type of -- entry. -- -- Portable archives should contain only NormalFile and -- Directory. data EntryContent NormalFile :: ByteString -> !FileSize -> EntryContent Directory :: EntryContent SymbolicLink :: !LinkTarget -> EntryContent HardLink :: !LinkTarget -> EntryContent CharacterDevice :: !DevMajor -> !DevMinor -> EntryContent BlockDevice :: !DevMajor -> !DevMinor -> EntryContent NamedPipe :: EntryContent OtherEntryType :: !TypeCode -> ByteString -> !FileSize -> EntryContent -- | A tar archive is a sequence of entries. -- -- The point of this type as opposed to just using a list is that it -- makes the failure case explicit. We need this because the sequence of -- entries we get from reading a tarball can include errors. -- -- It is a concrete data type so you can manipulate it directly but it is -- often clearer to use the provided functions for mapping, folding and -- unfolding. -- -- Converting from a list can be done with just foldr Next Done. -- Converting back into a list can be done with foldEntries -- however in that case you must be prepared to handle the Fail -- case inherent in the Entries type. -- -- The Monoid instance lets you concatenate archives or append -- entries to an archive. data Entries e Next :: Entry -> (Entries e) -> Entries e Done :: Entries e Fail :: e -> Entries e -- | This is like the standard map function on lists, but for -- Entries. It includes failure as a extra possible outcome of the -- mapping function. -- -- If your mapping function cannot fail it may be more convenient to use -- mapEntriesNoFail mapEntries :: (Entry -> Either e' Entry) -> Entries e -> Entries (Either e e') -- | Like mapEntries but the mapping function itself cannot fail. mapEntriesNoFail :: (Entry -> Entry) -> Entries e -> Entries e -- | This is like the standard foldr function on lists, but for -- Entries. Compared to foldr it takes an extra function to -- account for the possibility of failure. -- -- This is used to consume a sequence of entries. For example it could be -- used to scan a tarball for problems or to collect an index of the -- contents. foldEntries :: (Entry -> a -> a) -> a -> (e -> a) -> Entries e -> a -- | This is like the standard unfoldr function on lists, but for -- Entries. It includes failure as an extra possibility that the -- stepper function may return. -- -- It can be used to generate Entries from some other type. For -- example it is used internally to lazily unfold entries from a -- ByteString. unfoldEntries :: (a -> Either e (Maybe (Entry, a))) -> a -> Entries e -- | Errors that can be encountered when parsing a Tar archive. data FormatError TruncatedArchive :: FormatError ShortTrailer :: FormatError BadTrailer :: FormatError TrailingJunk :: FormatError ChecksumIncorrect :: FormatError NotTarFormat :: FormatError UnrecognisedTarFormat :: FormatError HeaderBadNumericEncoding :: FormatError