Copyright	(c) 2010-2015 Duncan Coutts
License	BSD3
Maintainer	duncan@community.haskell.org
Portability	portable
Safe Haskell	Safe-Inferred
Language	Haskell2010

Codec.Archive.Tar.Index.Internal

Contents

Index type
Index lookup
- I/O operations
Index construction
- Incremental construction
Serialising indexes
Lower level operations with offsets and I/O on tar files

Description

Synopsis

data TarIndex = TarIndex !(StringTable PathComponentId) !IntTrie !TarEntryOffset
lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry
data TarIndexEntry
- = TarFileEntry !TarEntryOffset
- | TarDir [(FilePath, TarIndexEntry)]
toList :: TarIndex -> [(FilePath, TarEntryOffset)]
newtype PathComponentId = PathComponentId Int
type TarEntryOffset = Word32
hReadEntry :: Handle -> TarEntryOffset -> IO Entry
hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry
build :: Entries e -> Either e TarIndex
data IndexBuilder
empty :: IndexBuilder
addNextEntry :: Entry -> IndexBuilder -> IndexBuilder
skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder
finalise :: IndexBuilder -> TarIndex
unfinalise :: TarIndex -> IndexBuilder
serialise :: TarIndex -> ByteString
deserialise :: ByteString -> Maybe (TarIndex, ByteString)
hReadEntryHeaderOrEof :: Handle -> TarEntryOffset -> IO (Maybe (Entry, TarEntryOffset))
hSeekEntryOffset :: Handle -> TarEntryOffset -> IO ()
hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO ()
hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset
nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset
indexEndEntryOffset :: TarIndex -> TarEntryOffset
indexNextEntryOffset :: IndexBuilder -> TarEntryOffset
toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId]
serialiseLBS :: TarIndex -> ByteString
serialiseSize :: TarIndex -> Int

Index type

data TarIndex Source #

An index of the entries in a tar file.

This index type is designed to be quite compact and suitable to store either on disk or in memory.

Constructors

TarIndex !(StringTable PathComponentId) !IntTrie !TarEntryOffset

Instances

Instances details

Show TarIndex Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods showsPrec :: Int -> TarIndex -> ShowS # show :: TarIndex -> String # showList :: [TarIndex] -> ShowS #
NFData TarIndex Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods rnf :: TarIndex -> () #
Eq TarIndex Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods (==) :: TarIndex -> TarIndex -> Bool # (/=) :: TarIndex -> TarIndex -> Bool #

Index lookup

lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry Source #

Look up a given filepath in the TarIndex. It may return a TarFileEntry containing the TarEntryOffset of the file within the tar file, or if the filepath identifies a directory then it returns a TarDir containing the list of files within that directory.

Given the TarEntryOffset you can then use one of the I/O operations:

hReadEntry to read the whole entry;
hReadEntryHeader to read just the file metadata (e.g. its length);

data TarIndexEntry Source #

The result of lookup in a TarIndex. It can either be a file directly, or a directory entry containing further entries (and all subdirectories recursively). Note that the subtrees are constructed lazily, so it's cheaper if you don't look at them.

Constructors

TarFileEntry !TarEntryOffset
TarDir [(FilePath, TarIndexEntry)]

Instances

Instances details

Show TarIndexEntry Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods showsPrec :: Int -> TarIndexEntry -> ShowS # show :: TarIndexEntry -> String # showList :: [TarIndexEntry] -> ShowS #

toList :: TarIndex -> [(FilePath, TarEntryOffset)] Source #

All the files in the index with their corresponding TarEntryOffsets.

Note that the files are in no special order. If you intend to read all or most files then is is recommended to sort by the TarEntryOffset.

newtype PathComponentId Source #

Constructors

PathComponentId Int

Instances

Instances details

Enum PathComponentId Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods succ :: PathComponentId -> PathComponentId # pred :: PathComponentId -> PathComponentId # toEnum :: Int -> PathComponentId # fromEnum :: PathComponentId -> Int # enumFrom :: PathComponentId -> [PathComponentId] # enumFromThen :: PathComponentId -> PathComponentId -> [PathComponentId] # enumFromTo :: PathComponentId -> PathComponentId -> [PathComponentId] # enumFromThenTo :: PathComponentId -> PathComponentId -> PathComponentId -> [PathComponentId] #
Show PathComponentId Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods showsPrec :: Int -> PathComponentId -> ShowS # show :: PathComponentId -> String # showList :: [PathComponentId] -> ShowS #
Eq PathComponentId Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods (==) :: PathComponentId -> PathComponentId -> Bool # (/=) :: PathComponentId -> PathComponentId -> Bool #
Ord PathComponentId Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods compare :: PathComponentId -> PathComponentId -> Ordering # (<) :: PathComponentId -> PathComponentId -> Bool # (<=) :: PathComponentId -> PathComponentId -> Bool # (>) :: PathComponentId -> PathComponentId -> Bool # (>=) :: PathComponentId -> PathComponentId -> Bool # max :: PathComponentId -> PathComponentId -> PathComponentId # min :: PathComponentId -> PathComponentId -> PathComponentId #

I/O operations

type TarEntryOffset = Word32 Source #

An offset within a tar file. Use hReadEntry, hReadEntryHeader or hSeekEntryOffset.

This is actually a tar "record" number, not a byte offset.

hReadEntry :: Handle -> TarEntryOffset -> IO Entry Source #

Reads an entire GenEntry at the given TarEntryOffset in the tar file. The Handle must be open for reading and be seekable.

This reads the whole entry into memory strictly, not incrementally. For more control, use hReadEntryHeader and then read the entry content manually.

hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry Source #

Read the header for a GenEntry at the given TarEntryOffset in the tar file. The entryContent will contain the correct metadata but an empty file content. The Handle must be open for reading and be seekable.

The Handle position is advanced to the beginning of the entry content (if any). You must check the entryContent to see if the entry is of type NormalFile. If it is, the NormalFile gives the content length and you are free to read this much data from the Handle.

entry <- Tar.hReadEntryHeader hnd
case Tar.entryContent entry of
  Tar.NormalFile _ size -> do content <- BS.hGet hnd size
                              ...

Of course you don't have to read it all in one go (as hReadEntry does), you can use any appropriate method to read it incrementally.

In addition to I/O errors, this can throw a FormatError if the offset is wrong, or if the file is not valid tar format.

There is also the lower level operation hSeekEntryOffset.

Index construction

build :: Entries e -> Either e TarIndex Source #

Build a TarIndex from a sequence of tar Entries. The Entries are assumed to start at offset 0 within a file.

Incremental construction

data IndexBuilder Source #

The intermediate type used for incremental construction of a TarIndex.

Instances

Instances details

Show IndexBuilder Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods showsPrec :: Int -> IndexBuilder -> ShowS # show :: IndexBuilder -> String # showList :: [IndexBuilder] -> ShowS #
NFData IndexBuilder Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods rnf :: IndexBuilder -> () #
Eq IndexBuilder Source #
Instance details Defined in Codec.Archive.Tar.Index.Internal Methods (==) :: IndexBuilder -> IndexBuilder -> Bool # (/=) :: IndexBuilder -> IndexBuilder -> Bool #

empty :: IndexBuilder Source #

The initial empty IndexBuilder.

addNextEntry :: Entry -> IndexBuilder -> IndexBuilder Source #

Add the next GenEntry into the IndexBuilder.

skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder Source #

Use this function if you want to skip some entries and not add them to the final TarIndex.

finalise :: IndexBuilder -> TarIndex Source #

Finish accumulating GenEntry information and build the compact TarIndex lookup structure.

unfinalise :: TarIndex -> IndexBuilder Source #

Resume building an existing index

A TarIndex is optimized for a highly compact and efficient in-memory representation. This, however, makes it read-only. If you have an existing TarIndex for a large file, and want to add to it, you can translate the TarIndex back to an IndexBuilder. Be aware that this is a relatively costly operation (linear in the size of the TarIndex), though still faster than starting again from scratch.

This is the left inverse to finalise (modulo ordering).

Serialising indexes

serialise :: TarIndex -> ByteString Source #

The TarIndex is compact in memory, and it has a similarly compact external representation.

deserialise :: ByteString -> Maybe (TarIndex, ByteString) Source #

Read the external representation back into a TarIndex.

Lower level operations with offsets and I/O on tar files

hReadEntryHeaderOrEof :: Handle -> TarEntryOffset -> IO (Maybe (Entry, TarEntryOffset)) Source #

This is a low level variant on hReadEntryHeader, that can be used to iterate through a tar file, entry by entry.

It has a few differences compared to hReadEntryHeader:

It returns an indication when the end of the tar file is reached.
It does not move the Handle position to the beginning of the entry content.
It returns the TarEntryOffset of the next entry.

After this action, the Handle position is not in any useful place. If you want to skip to the next entry, take the TarEntryOffset returned and use hReadEntryHeaderOrEof again. Or if having inspected the GenEntry header you want to read the entry content (if it has one) then use hSeekEntryContentOffset on the original input TarEntryOffset.

hSeekEntryOffset :: Handle -> TarEntryOffset -> IO () Source #

Set the Handle position to the position corresponding to the given TarEntryOffset.

This position is where the entry metadata can be read. If you already know the entry has a body (and perhaps know it's length), you may wish to seek to the body content directly using hSeekEntryContentOffset.

hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO () Source #

Set the Handle position to the entry content position corresponding to the given TarEntryOffset.

This position is where the entry content can be read using ordinary I/O operations (though you have to know in advance how big the entry content is). This is only valid if you already know the entry has a body (i.e. is a normal file).

hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset Source #

Seek to the end of a tar file, to the position where new entries can be appended, and return that TarEntryOffset.

If you have a valid TarIndex for this tar file then you should supply it because it allows seeking directly to the correct location.

If you do not have an index, then this becomes an expensive linear operation because we have to read each tar entry header from the beginning to find the location immediately after the last entry (this is because tar files have a variable length trailer and we cannot reliably find that by starting at the end). In this mode, it will fail with an exception if the file is not in fact in the tar format.

nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset Source #

Calculate the TarEntryOffset of the next entry, given the size and offset of the current entry.

This is much like using skipNextEntry and indexNextEntryOffset, but without using an IndexBuilder.

indexEndEntryOffset :: TarIndex -> TarEntryOffset Source #

This is the offset immediately following the last entry in the tar file. This can be useful to append further entries into the tar file. Use with hSeekEntryOffset, or just use hSeekEndEntryOffset directly.

indexNextEntryOffset :: IndexBuilder -> TarEntryOffset Source #

This is the offset immediately following the entry most recently added to the IndexBuilder. You might use this if you need to know the offsets but don't want to use the TarIndex lookup structure. Use with hSeekEntryOffset. See also nextEntryOffset.

toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId] Source #

serialiseLBS :: TarIndex -> ByteString Source #

serialiseSize :: TarIndex -> Int Source #