{-# LANGUAGE CPP #-} {-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable, TemplateHaskell #-} module Data.TarIndex ( TarIndex, TarIndexEntry(..), TarEntryOffset, lookup, construct, #ifdef TESTS prop_lookup, prop, #endif ) where import Data.SafeCopy (base, deriveSafeCopy) import Data.Typeable (Typeable) import qualified Data.StringTable as StringTable import Data.StringTable (StringTable) import qualified Data.IntTrie as IntTrie import Data.IntTrie (IntTrie) import qualified System.FilePath as FilePath import Prelude hiding (lookup) #ifdef TESTS import qualified Prelude #endif import Distribution.Server.Framework.MemSize -- | An index of the entries in a tar file. This lets us look up a filename -- within the tar file and find out where in the tar file (ie the file offset) -- that entry occurs. -- data TarIndex = TarIndex -- As an example of how the mapping works, consider these example files: -- "foo/bar.hs" at offset 0 -- "foo/baz.hs" at offset 1024 -- -- We split the paths into components and enumerate them. -- { "foo" -> TokenId 0, "bar.hs" -> TokenId 1, "baz.hs" -> TokenId 2 } -- -- We convert paths into sequences of 'TokenId's, i.e. -- "foo/bar.hs" becomes [PathComponentId 0, PathComponentId 1] -- "foo/baz.hs" becomes [PathComponentId 0, PathComponentId 2] -- -- We use a trie mapping sequences of 'PathComponentId's to the entry offset: -- { [PathComponentId 0, PathComponentId 1] -> offset 0 -- , [PathComponentId 0, PathComponentId 2] -> offset 1024 } -- | The mapping of filepath components as strings to ids. !(StringTable PathComponentId) -- Mapping of sequences of filepath component ids to tar entry offsets. !(IntTrie PathComponentId TarEntryOffset) deriving (Show, Typeable) data TarIndexEntry = TarFileEntry !TarEntryOffset | TarDir [FilePath] deriving (Show, Typeable) newtype PathComponentId = PathComponentId Int deriving (Eq, Ord, Enum, Show, Typeable) type TarEntryOffset = Int $(deriveSafeCopy 0 'base ''TarIndex) $(deriveSafeCopy 0 'base ''PathComponentId) $(deriveSafeCopy 0 'base ''TarIndexEntry) instance MemSize TarIndex where memSize (TarIndex a b) = memSize2 a b -- | Look up a given filepath in the index. It may return a 'TarFileEntry' -- containing the offset and length of the file within the tar file, or if -- the filepath identifies a directory then it returns a 'TarDir' containing -- the list of files within that directory. -- lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry lookup (TarIndex pathTable pathTrie) path = case toComponentIds pathTable path of Nothing -> Nothing Just fpath -> fmap (mkIndexEntry fpath) (IntTrie.lookup pathTrie fpath) where mkIndexEntry _ (IntTrie.Entry offset) = TarFileEntry offset mkIndexEntry _ (IntTrie.Completions entries) = TarDir [ fromComponentIds pathTable [entry] | entry <- entries ] -- | Construct a 'TarIndex' from a list of filepaths and their corresponding -- construct :: [(FilePath, TarEntryOffset)] -> TarIndex construct pathsOffsets = TarIndex pathTable pathTrie where pathComponents = concatMap (FilePath.splitDirectories . fst) pathsOffsets pathTable = StringTable.construct pathComponents pathTrie = IntTrie.construct [ (cids, offset) | (path, offset) <- pathsOffsets , let Just cids = toComponentIds pathTable path ] toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId] toComponentIds table = lookupComponents [] . FilePath.splitDirectories where lookupComponents cs' [] = Just (reverse cs') lookupComponents cs' (c:cs) = case StringTable.lookup table c of Nothing -> Nothing Just cid -> lookupComponents (cid:cs') cs fromComponentIds :: StringTable PathComponentId -> [PathComponentId] -> FilePath fromComponentIds table = FilePath.joinPath . map (StringTable.index table) #ifdef TESTS -- properties of a finite mapping... prop_lookup :: [(FilePath, TarEntryOffset)] -> FilePath -> Bool prop_lookup xs x = case (lookup (construct xs) x, Prelude.lookup x xs) of (Nothing, Nothing) -> True (Just (TarFileEntry offset), Just offset') -> offset == offset' _ -> False prop :: [(FilePath, TarEntryOffset)] -> Bool prop paths | not $ StringTable.prop pathbits = error "TarIndex: bad string table" | not $ IntTrie.prop intpaths = error "TarIndex: bad int trie" | not $ prop' = error "TarIndex: bad prop" | otherwise = True where index@(TarIndex pathTable _) = construct paths pathbits = concatMap (FilePath.splitDirectories . fst) paths intpaths = [ (cids, offset) | (path, offset) <- paths , let Just cids = toComponentIds pathTable path ] prop' = flip all paths $ \(file, offset) -> case lookup index file of Just (TarFileEntry offset') -> offset' == offset _ -> False example0 :: [(FilePath, Int)] example0 = [("foo-1.0/foo-1.0.cabal", 512) -- tar block 1 ,("foo-1.0/LICENSE", 2048) -- tar block 4 ,("foo-1.0/Data/Foo.hs", 4096)] -- tar block 8 #endif