module Codec.Archive.Tar.Index (
TarIndex,
lookup,
TarIndexEntry(..),
TarEntryOffset,
hReadEntry,
hReadEntryHeader,
build,
IndexBuilder,
emptyIndex,
addNextEntry,
skipNextEntry,
finaliseIndex,
serialise,
deserialise,
hReadEntryHeaderOrEof,
hSeekEntryOffset,
hSeekEntryContentOffset,
hSeekEndEntryOffset,
nextEntryOffset,
indexEndEntryOffset,
indexNextEntryOffset,
#ifdef TESTS
prop_lookup,
prop_valid,
#endif
) where
import Data.Typeable (Typeable)
import Codec.Archive.Tar.Types as Tar
import Codec.Archive.Tar.Read as Tar
import qualified Codec.Archive.Tar.Index.StringTable as StringTable
import Codec.Archive.Tar.Index.StringTable (StringTable(..))
import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
import Codec.Archive.Tar.Index.IntTrie (IntTrie(..))
import qualified System.FilePath.Posix as FilePath
import Data.Monoid (Monoid(..))
#if (MIN_VERSION_base(4,5,0))
import Data.Monoid ((<>))
#endif
import Data.Word
import Data.Int
import Data.Bits
import qualified Data.Array.Unboxed as A
import Prelude hiding (lookup)
import System.IO
import Control.Exception (throwIO)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
#if MIN_VERSION_bytestring(0,10,2)
import Data.ByteString.Builder as BS
#else
import Data.ByteString.Lazy.Builder as BS
#endif
#ifdef TESTS
import qualified Prelude
import Test.QuickCheck
import Control.Applicative ((<$>), (<*>))
#endif
data TarIndex = TarIndex
!(StringTable PathComponentId)
!(IntTrie PathComponentId TarEntryOffset)
!TarEntryOffset
deriving (Eq, Show, Typeable)
data TarIndexEntry = TarFileEntry !TarEntryOffset
| TarDir [(FilePath, TarIndexEntry)]
deriving (Show, Typeable)
newtype PathComponentId = PathComponentId Int
deriving (Eq, Ord, Enum, Show, Typeable)
type TarEntryOffset = Word32
lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry
lookup (TarIndex pathTable pathTrie _) path = do
fpath <- toComponentIds pathTable path
tentry <- IntTrie.lookup pathTrie fpath
return (mkIndexEntry tentry)
where
mkIndexEntry (IntTrie.Entry offset) = TarFileEntry offset
mkIndexEntry (IntTrie.Completions entries) =
TarDir [ (fromComponentId pathTable key, mkIndexEntry entry)
| (key, entry) <- entries ]
toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId]
toComponentIds table =
lookupComponents []
. filter (/= ".")
. FilePath.splitDirectories
where
lookupComponents cs' [] = Just (reverse cs')
lookupComponents cs' (c:cs) = case StringTable.lookup table c of
Nothing -> Nothing
Just cid -> lookupComponents (cid:cs') cs
fromComponentId :: StringTable PathComponentId -> PathComponentId -> FilePath
fromComponentId table = StringTable.index table
build :: Entries e -> Either e TarIndex
build = go emptyIndex
where
go !builder (Next e es) = go (addNextEntry e builder) es
go !builder Done = Right $! finaliseIndex builder
go !_ (Fail err) = Left err
data IndexBuilder = IndexBuilder [(FilePath, TarEntryOffset)]
!TarEntryOffset
emptyIndex :: IndexBuilder
emptyIndex = IndexBuilder [] 0
addNextEntry :: Entry -> IndexBuilder -> IndexBuilder
addNextEntry entry (IndexBuilder acc nextOffset) =
IndexBuilder ((entrypath, nextOffset):acc)
(nextEntryOffset entry nextOffset)
where
!entrypath = entryPath entry
skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder
skipNextEntry entry (IndexBuilder acc nextOffset) =
IndexBuilder acc (nextEntryOffset entry nextOffset)
finaliseIndex :: IndexBuilder -> TarIndex
finaliseIndex (IndexBuilder pathsOffsets finalOffset) =
TarIndex pathTable pathTrie finalOffset
where
pathComponents = concatMap (FilePath.splitDirectories . fst) pathsOffsets
pathTable = StringTable.construct pathComponents
pathTrie = IntTrie.construct
[ (cids, offset)
| (path, offset) <- pathsOffsets
, let Just cids = toComponentIds pathTable path ]
indexNextEntryOffset :: IndexBuilder -> TarEntryOffset
indexNextEntryOffset (IndexBuilder _ off) = off
indexEndEntryOffset :: TarIndex -> TarEntryOffset
indexEndEntryOffset (TarIndex _ _ off) = off
nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset
nextEntryOffset entry offset =
offset
+ 1
+ case entryContent entry of
NormalFile _ size -> blocks size
OtherEntryType _ _ size -> blocks size
_ -> 0
where
blocks size = 1 + ((fromIntegral size 1) `div` 512)
hReadEntry :: Handle -> TarEntryOffset -> IO Entry
hReadEntry hnd off = do
entry <- hReadEntryHeader hnd off
case entryContent entry of
NormalFile _ size -> do body <- LBS.hGet hnd (fromIntegral size)
return entry {
entryContent = NormalFile body size
}
OtherEntryType c _ size -> do body <- LBS.hGet hnd (fromIntegral size)
return entry {
entryContent = OtherEntryType c body size
}
_ -> return entry
hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry
hReadEntryHeader hnd blockOff = do
hSeekEntryOffset hnd blockOff
header <- LBS.hGet hnd 512
case Tar.read header of
Tar.Next entry _ -> return entry
Tar.Fail e -> throwIO e
Tar.Done -> fail "hReadEntryHeader: impossible"
hSeekEntryOffset :: Handle -> TarEntryOffset -> IO ()
hSeekEntryOffset hnd blockOff =
hSeek hnd AbsoluteSeek (fromIntegral blockOff * 512)
hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO ()
hSeekEntryContentOffset hnd blockOff =
hSeekEntryOffset hnd (blockOff + 1)
hReadEntryHeaderOrEof :: Handle -> TarEntryOffset
-> IO (Maybe (Entry, TarEntryOffset))
hReadEntryHeaderOrEof hnd blockOff = do
hSeekEntryOffset hnd blockOff
header <- LBS.hGet hnd 1024
case Tar.read header of
Tar.Next entry _ -> let !blockOff' = nextEntryOffset entry blockOff
in return (Just (entry, blockOff'))
Tar.Done -> return Nothing
Tar.Fail e -> throwIO e
hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset
hSeekEndEntryOffset hnd (Just index) = do
let offset = indexEndEntryOffset index
hSeekEntryOffset hnd offset
return offset
hSeekEndEntryOffset hnd Nothing = do
size <- hFileSize hnd
if size == 0
then return 0
else seekToEnd 0
where
seekToEnd offset = do
mbe <- hReadEntryHeaderOrEof hnd offset
case mbe of
Nothing -> do hSeekEntryOffset hnd offset
return offset
Just (_, offset') -> seekToEnd offset'
serialise :: TarIndex -> BS.Builder
serialise (TarIndex stringTable intTrie finalOffset) =
BS.word32BE 1
<> BS.word32BE finalOffset
<> serialiseStringTable stringTable
<> serialiseIntTrie intTrie
deserialise :: BS.ByteString -> Maybe (TarIndex, BS.ByteString)
deserialise bs
| BS.length bs >= 8
, let ver = readWord32BE bs 0
, ver == 1
= do let !finalOffset = readWord32BE bs 4
(stringTable, bs') <- deserialiseStringTable (BS.drop 8 bs)
(intTrie, bs'') <- deserialiseIntTrie bs'
return (TarIndex stringTable intTrie finalOffset, bs'')
| otherwise = Nothing
serialiseIntTrie :: IntTrie k v -> BS.Builder
serialiseIntTrie (IntTrie arr) =
let (_, !ixEnd) = A.bounds arr in
BS.word32BE (ixEnd+1)
<> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems arr)
deserialiseIntTrie :: BS.ByteString -> Maybe (IntTrie k v, BS.ByteString)
deserialiseIntTrie bs
| BS.length bs >= 4
, let lenArr = readWord32BE bs 0
lenTotal = 4 + 4 * fromIntegral lenArr
, BS.length bs >= 4 + 4 * fromIntegral lenArr
, let !arr = A.array (0, lenArr1)
[ (i, readWord32BE bs off)
| (i, off) <- zip [0..lenArr1] [4,8 .. lenTotal 4] ]
!bs' = BS.drop lenTotal bs
= Just (IntTrie arr, bs')
| otherwise
= Nothing
serialiseStringTable :: StringTable id -> BS.Builder
serialiseStringTable (StringTable strs arr) =
let (_, !ixEnd) = A.bounds arr in
BS.word32BE (fromIntegral (BS.length strs))
<> BS.word32BE (fromIntegral ixEnd + 1)
<> BS.byteString strs
<> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems arr)
deserialiseStringTable :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
deserialiseStringTable bs
| BS.length bs >= 8
, let lenStrs = fromIntegral (readWord32BE bs 0)
lenArr = fromIntegral (readWord32BE bs 4)
lenTotal= 8 + lenStrs + 4 * lenArr
, BS.length bs >= lenTotal
, let strs = BS.take lenStrs (BS.drop 8 bs)
arr = A.array (0, lenArr1)
[ (i, readWord32BE bs off)
| (i, off) <- zip [0..lenArr1]
[offArrS,offArrS+4 .. offArrE]
]
offArrS = 8 + lenStrs
offArrE = offArrS + 4 * lenArr 1
!stringTable = StringTable strs arr
!bs' = BS.drop lenTotal bs
= Just (stringTable, bs')
| otherwise
= Nothing
readWord32BE :: BS.ByteString -> Int -> Word32
readWord32BE bs i =
fromIntegral (BS.index bs (i + 0)) `shiftL` 24
+ fromIntegral (BS.index bs (i + 1)) `shiftL` 16
+ fromIntegral (BS.index bs (i + 2)) `shiftL` 8
+ fromIntegral (BS.index bs (i + 3))
#ifdef TESTS
prop_lookup :: [(NonEmptyFilePath, TarEntryOffset)] -> NonEmptyFilePath -> Bool
prop_lookup paths (NonEmptyFilePath p) =
case (lookup index p, Prelude.lookup p paths') of
(Nothing, Nothing) -> True
(Just (TarFileEntry offset), Just offset') -> offset == offset'
_ -> False
where
paths' = [ (p, off) | (NonEmptyFilePath p, off) <- paths ]
index@(TarIndex pathTable _ _) =
finaliseIndex (IndexBuilder paths' 0)
prop_valid :: [(NonEmptyFilePath, TarEntryOffset)] -> Bool
prop_valid paths
| not $ StringTable.prop_valid pathbits = error "TarIndex: bad string table"
| not $ IntTrie.prop_lookup intpaths = error "TarIndex: bad int trie"
| not $ IntTrie.prop_completions intpaths = error "TarIndex: bad int trie"
| not $ prop' = error "TarIndex: bad prop"
| otherwise = True
where
paths' = [ (p, off) | (NonEmptyFilePath p, off) <- paths ]
index@(TarIndex pathTable _ _) =
finaliseIndex (IndexBuilder paths' 0)
pathbits = concatMap (FilePath.splitDirectories . fst) paths'
intpaths = [ (cids, offset)
| (path, offset) <- paths'
, let Just cids = toComponentIds pathTable path ]
prop' = flip all paths' $ \(file, offset) ->
case lookup index file of
Just (TarFileEntry offset') -> offset' == offset
_ -> False
newtype NonEmptyFilePath = NonEmptyFilePath FilePath deriving Show
instance Arbitrary NonEmptyFilePath where
arbitrary = NonEmptyFilePath . FilePath.joinPath
<$> listOf1 (elements ["a", "b", "c", "d"])
example0 :: Entries ()
example0 =
testEntry "foo-1.0/foo-1.0.cabal" 1500
`Next` testEntry "foo-1.0/LICENSE" 2000
`Next` testEntry "foo-1.0/Data/Foo.hs" 1000
`Next` Done
example1 :: Entries ()
example1 =
Next (testEntry "./" 1500) Done <> example0
testEntry :: FilePath -> Int64 -> Entry
testEntry name size = simpleEntry path (NormalFile mempty size)
where
Right path = toTarPath False name
#endif
#if !(MIN_VERSION_base(4,5,0))
(<>) :: Monoid m => m -> m -> m
(<>) = mappend
#endif