-- Copyright (C) 2009-2011 Petr Rockai -- (C) 2013 Jose Neder -- BSD3 {-# LANGUAGE MultiParamTypeClasses #-} -- | This module contains plain tree indexing code. The index itself is a -- CACHE: you should only ever use it as an optimisation and never as a primary -- storage. In practice, this means that when we change index format, the -- application is expected to throw the old index away and build a fresh -- index. Please note that tracking index validity is out of scope for this -- module: this is responsibility of your application. It is advisable that in -- your validity tracking code, you also check for format validity (see -- 'indexFormatValid') and scrap and re-create index when needed. -- -- The index is a binary file that overlays a hashed tree over the working -- copy. This means that every working file and directory has an entry in the -- index, that contains its path and hash and validity data. The validity data -- is a timestamp plus the file size. The file hashes are sha256's of the -- file's content. It also contains the fileid to track moved files. -- -- There are two entry types, a file entry and a directory entry. Both have a -- common binary format (see 'Item'). The on-disk format is described by -- the section /Index format/ below. -- -- For each file, the index has a copy of the file's last modification -- timestamp taken at the instant when the hash has been computed. This means -- that when file size and timestamp of a file in working tree matches those in -- the index, we assume that the hash stored in the index for given file is -- valid. These hashes are then exposed in the resulting 'Tree' object, and can -- be leveraged by eg. 'diffTrees' to compare many files quickly. -- -- You may have noticed that we also keep hashes of directories. These are -- assumed to be valid whenever the complete subtree has been valid. At any -- point, as soon as a size or timestamp mismatch is found, the working file in -- question is opened, its hash (and timestamp and size) is recomputed and -- updated in-place in the index file (everything lives at a fixed offset and -- is fixed size, so this isn't an issue). This is also true of directories: -- when a file in a directory changes hash, this triggers recomputation of all -- of its parent directory hashes; moreover this is done efficiently -- each -- directory is updated at most once during an update run. -- -- /Endianness/ -- -- Since version 6 (magic == "HSI6"), the file format depends on the endianness -- of the architecture. To account for the (rare) case where darcs executables -- from different architectures operate on the same repo, we make an additional -- check in indexFormatValid to detect whether the file's endianness differs -- from what we expect. If this is detected, the file is considered invalid and -- will be re-created. -- -- /Index format/ -- -- The index starts with a header consisting of a 4 bytes magic word, followed -- by a 4 byte word to indicate the endianness of the encoding. This word -- should, when read directly from the mmapped file, be equal to 1. -- -- After the header comes the actual content of the index, which is a -- sequence of 'Item's. An 'Item' consists of: -- -- * size: item size, 8 bytes -- * aux: timestamp (for file) or offset to sibling (for dir), 8 bytes -- * fileid: inode or fhandle of the item, 8 bytes -- * hash: sha256 of content, 32 bytes -- * descriptor length: >= 2 due to type and null, 4 bytes -- * descriptor: -- * type: 'D' or 'F', one byte -- * path: flattened path, variable >= 0 -- * null: terminating null byte -- * alignment padding: 0 to 3 bytes -- -- Each 'Item' is 4 byte aligned. Thus the descriptor length must be -- rounded up to get the position of the next item using 'align'. Similar, -- when determining the aux (offset to sibling) for dir items. -- -- With directories, the aux holds the offset of the next sibling item in the -- index, so we can efficiently skip reading the whole subtree starting at a -- given directory (by just seeking aux bytes forward). The items are -- pre-ordered with respect to directory structure -- the directory comes first -- and after it come all its items. Cf. 'openIndex'. -- -- For files, the aux field holds a timestamp. -- -- Internally, the item is stored as a pointer to the first field (iBase) -- from which we directly read off the first three fields (size, aux, fileid), -- and a ByteString for the rest (iHashAndDescriptor), up to but not including -- the terminating null byte. -- -- TODO -- -- The null byte terminator seems useless. -- -- We could as well use a single plain pointer for the item. The dumpIndex -- function demonstrates how this could be done. -- -- Another possible improvement is to store only the Name of an item, not the -- full path. We need to keep track of the current path anyway when traversing -- the index. module Darcs.Util.Index ( openIndex , updateIndexFrom , indexFormatValid , treeFromIndex , listFileIDs , Index , filter , getFileID , IndexEntry(..) , dumpIndex -- for testing , align ) where import Darcs.Prelude hiding ( readFile, writeFile, filter ) import Darcs.Util.ByteString ( readSegment, decodeLocale ) import qualified Darcs.Util.File ( getFileStatus ) import Darcs.Util.Global ( debugMessage ) import Darcs.Util.Hash ( Hash(..), mkHash, rawHash, sha256 ) import Darcs.Util.Tree import Darcs.Util.Tree.Hashed ( darcsTreeHash ) import Darcs.Util.Path ( AnchoredPath(..) , realPath , anchoredRoot , Name , rawMakeName , appendPath , flatten ) import Darcs.Util.Progress ( beginTedious, endTedious, finishedOneIO ) import Control.Monad( when ) import Control.Exception( catch, throw, SomeException, Exception ) import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import Data.ByteString.Unsafe( unsafeHead, unsafeDrop ) import Data.ByteString.Internal ( c2w , fromForeignPtr , nullForeignPtr , toForeignPtr ) import qualified Data.ByteString.Short.Internal as BS import Data.Int( Int64, Int32 ) import Data.Word( Word8 ) import Data.IORef( ) import Data.Maybe( fromJust, isJust, isNothing ) import Data.Typeable( Typeable ) import Foreign.Marshal.Utils ( copyBytes ) import Foreign.Storable import Foreign.ForeignPtr( ForeignPtr, withForeignPtr, castForeignPtr ) import Foreign.Ptr( Ptr, plusPtr ) import System.IO ( hPutStrLn, stderr ) import System.IO.MMap( mmapFileForeignPtr, mmapWithFilePtr, Mode(..) ) import System.Directory( doesFileExist, getCurrentDirectory ) import System.Directory( renameFile ) import System.FilePath( (<.>) ) import qualified System.Posix.Files as F ( fileID ) import System.FilePath ( () ) import qualified System.Posix.Files as F ( modificationTimeHiRes, fileSize, isDirectory, isSymbolicLink , FileStatus ) import System.Posix.Types ( FileID, FileOffset ) -------------------------- -- Indexed trees -- -- | Description of a a single indexed item. The structure itself does not -- contain any data, just pointers to the underlying mmap (bytestring is a -- pointer + offset + length). -- -- The structure is recursive-ish (as opposed to flat-ish structure, which is -- used by git...) It turns out that it's hard to efficiently read a flat index -- with our internal data structures -- we need to turn the flat index into a -- recursive Tree object, which is rather expensive... As a bonus, we can also -- efficiently implement subtree queries this way (cf. 'openIndex'). data Item = Item { iBase :: !(Ptr ()) , iHashAndDescriptor :: !B.ByteString } deriving Show index_version :: B.ByteString index_version = BC.pack "HSI7" -- | Stored to the index to verify we are on the same endianness when reading -- it back. We will treat the index as invalid in this case so user code will -- regenerate it. index_endianness_indicator :: Int32 index_endianness_indicator = 1 size_header, size_magic, size_endianness_indicator :: Int size_magic = 4 -- the magic word, first 4 bytes of the index size_endianness_indicator = 4 -- second 4 bytes of the index size_header = size_magic + size_endianness_indicator size_dsclen, size_hash, size_size, size_aux, size_fileid :: Int size_size = 8 -- file/directory size (Int64) size_aux = 8 -- aux (Int64) size_fileid = 8 -- fileid (inode or fhandle FileID) size_dsclen = 4 -- this many bytes store the length of the descriptor size_hash = 32 -- hash representation size_type, size_null :: Int size_type = 1 -- ItemType: 'D' for directory, 'F' for file size_null = 1 -- null byte at the end of path off_size, off_aux, off_hash, off_dsc, off_dsclen, off_fileid :: Int off_size = 0 off_aux = off_size + size_size off_fileid = off_aux + size_aux off_dsclen = off_fileid + size_fileid off_hash = off_dsclen + size_dsclen off_dsc = off_hash + size_hash itemAllocSize :: AnchoredPath -> Int itemAllocSize apath = align 4 $ size_size + size_aux + size_fileid + size_dsclen + size_hash + size_type + B.length (flatten apath) + size_null itemSize :: Item -> Int itemSize i = size_size + size_aux + size_fileid + size_dsclen + (B.length $ iHashAndDescriptor i) + size_null itemNext :: Item -> Int itemNext i = align 4 (itemSize i) -- iDescriptor is: -- * one byte for type of item ('D' or 'F') -- * flattened path (w/o terminating null byte) iHash, iDescriptor :: Item -> B.ByteString iDescriptor = unsafeDrop size_hash . iHashAndDescriptor iHash = B.take size_hash . iHashAndDescriptor -- The "drop 1" here gets rid of the item type. iPath :: Item -> FilePath iPath = decodeLocale . unsafeDrop 1 . iDescriptor iSize, iAux :: Item -> Ptr Int64 iSize i = plusPtr (iBase i) off_size iAux i = plusPtr (iBase i) off_aux iFileID :: Item -> Ptr FileID iFileID i = plusPtr (iBase i) off_fileid itemIsDir :: Item -> Bool itemIsDir i = unsafeHead (iDescriptor i) == c2w 'D' type FileStatus = Maybe F.FileStatus -- We deal with hi res timestamps by noting that the actual resolution is in -- nanoseconds. If we count the nanoseconds since the epoch we will overflow -- (1<<63)/(1e9*60*60*24*366) =~ 290 years after the epoch. Comfortable. modificationTime :: FileStatus -> Int64 modificationTime = maybe 0 (truncate . (*1e9) . F.modificationTimeHiRes) fileSize :: FileStatus -> FileOffset fileSize = maybe 0 F.fileSize fileExists :: FileStatus -> Bool fileExists = maybe False (const True) isDirectory :: FileStatus -> Bool isDirectory = maybe False F.isDirectory fileID :: FileStatus -> FileID fileID = maybe 0 F.fileID -- | Lay out the basic index item structure in memory. The memory location is -- given by a ForeignPointer () and an offset. The path and type given are -- written out, and a corresponding Item is given back. The remaining bits of -- the item can be filled out using 'update'. createItem :: ItemType -> AnchoredPath -> ForeignPtr () -> Int -> IO Item createItem typ apath fp off = do let dsc = B.concat [ BC.singleton $ if typ == TreeType then 'D' else 'F' , flatten apath -- this (currently) gives "." for anchoredRoot , B.singleton 0 ] (dsc_fp, dsc_start, dsc_len) = toForeignPtr dsc withForeignPtr fp $ \p -> withForeignPtr dsc_fp $ \dsc_p -> do pokeByteOff p (off + off_dsclen) (fromIntegral dsc_len :: Int32) copyBytes (plusPtr p $ off + off_dsc) (plusPtr dsc_p dsc_start) (fromIntegral dsc_len) peekItem fp off -- | Read the on-disk representation into internal data structure. -- -- See the module-level section /Index format/ for details on how the index -- is structured. peekItem :: ForeignPtr () -> Int -> IO Item peekItem fp off = withForeignPtr fp $ \p -> do nl' :: Int32 <- peekByteOff p (off + off_dsclen) when (nl' <= 2) $ fail "Descriptor too short in peekItem!" let nl = fromIntegral nl' dsc = fromForeignPtr (castForeignPtr fp) (off + off_hash) -- Note that iHashAndDescriptor does not include the terminating -- null byte, so we have to subtract its size here. (size_hash + nl - size_null) return $! Item {iBase = plusPtr p off, iHashAndDescriptor = dsc} -- | Update an existing 'Item' with new size and hash. The hash must be -- not be 'Nothing'. updateItem :: Item -> Int64 -> Hash -> IO () updateItem item size hash = do poke (iSize item) size unsafePokeBS (iHash item) (rawHash hash) updateFileID :: Item -> FileID -> IO () updateFileID item fileid = poke (iFileID item) fileid updateAux :: Item -> Int64 -> IO () updateAux item aux = poke (iAux item) aux updateTime :: Item -> Int64 -> IO () updateTime item mtime = updateAux item mtime iHash' :: Item -> Maybe Hash iHash' i = let ih = iHash i in if ih == nullHash then Nothing else Just (mkHash ih) nullHash :: B.ByteString nullHash = B.replicate size_hash 0 -- | Gives a ForeignPtr to mmapped index, which can be used for reading and -- updates. The req_size parameter, if non-0, expresses the requested size of -- the index file. mmapIndex will grow the index if it is smaller than this. mmapIndex :: forall a. FilePath -> Int -> IO (ForeignPtr a, Int) mmapIndex indexpath req_size = do act_size <- fromIntegral . fileSize <$> Darcs.Util.File.getFileStatus indexpath let size = case req_size > 0 of True -> req_size False | act_size >= size_header -> act_size - size_header | otherwise -> 0 case size of 0 -> return (castForeignPtr nullForeignPtr, size) _ -> do (x, _, _) <- mmapFileForeignPtr indexpath ReadWriteEx (Just (0, size + size_header)) return (x, size) data IndexM m = Index { mmap :: (ForeignPtr ()) , basedir :: FilePath , predicate :: AnchoredPath -> TreeItem m -> Bool } | EmptyIndex type Index = IndexM IO -- FIXME This is not really a state: we modify it only when we recurse -- down into a dir item, so this is rather more like an environment. -- Instead of passing it explicitly we could use ReaderT. -- | When we traverse the index, we keep track of some data about the -- current parent directory. data State = State { dirlength :: !Int -- ^ length in bytes of current path prefix, -- includes the trailing path separator , path :: !AnchoredPath -- ^ path of the current directory , start :: !Int -- ^ offset of current directory in the index } -- * Reading items from the index data Result = Result { changed :: !Bool -- ^ Whether item has changed since the last update to the index. , next :: !Int -- ^ Position of the next item, in bytes. , treeitem :: !(Maybe (TreeItem IO)) -- ^ Nothing in case of the item doesn't exist in the tree -- or is filtered by a FilterTree. Or a TreeItem otherwise. , resitem :: !Item -- ^ The item extracted. } readItem :: String -> Index -> State -> IO Result readItem progressKey index state = do item <- peekItem (mmap index) (start state) res' <- if itemIsDir item then readDir item else readFile item finishedOneIO progressKey (iPath item) return res' where readDir item = do following <- fromIntegral <$> peek (iAux item) st <- getFileStatus (iPath item) let exists = fileExists st && isDirectory st fileid <- peek $ iFileID item when (fileid == 0) $ updateFileID item (fileID st) let substate = substateof item state want = exists && (predicate index) (path substate) (Stub undefined Nothing) oldhash = iHash' item subs off = case compare off following of LT -> do result <- readItem progressKey index $ substate { start = off } rest <- subs $ next result return $! (nameof (resitem result) substate, result) : rest EQ -> return [] GT -> fail $ "Offset mismatch at " ++ show off ++ " (ends at " ++ show following ++ ")" inferiors <- if want then subs $ start substate else return [] let we_changed = or [ changed x | (_, x) <- inferiors ] || nullleaf nullleaf = null inferiors && isNothing oldhash tree' = -- Note the partial pattern match on 'Just n' below is justified -- as we are traversing sub items here, which means 'Nothing' is -- impossible, see 'substateof' for details. makeTree [ (n, fromJust $ treeitem s) | (Just n, s) <- inferiors, isJust $ treeitem s ] treehash = if we_changed then Just (darcsTreeHash tree') else oldhash tree = tree' { treeHash = treehash } when (exists && we_changed) $ -- fromJust is justified because we_changed implies (isJust treehash) updateItem item 0 (fromJust treehash) return $ Result { changed = not exists || we_changed , next = following , treeitem = if want then Just $ SubTree tree else Nothing , resitem = item } readFile item = do st <- getFileStatus (iPath item) mtime <- fromIntegral <$> (peek $ iAux item) size <- peek $ iSize item fileid <- peek $ iFileID item let mtime' = modificationTime st size' = fromIntegral $ fileSize st readblob = readSegment (basedir index (iPath item), Nothing) exists = fileExists st && not (isDirectory st) we_changed = mtime /= mtime' || size /= size' hash = iHash' item when (exists && we_changed) $ do hash' <- sha256 `fmap` readblob updateItem item size' hash' updateTime item mtime' when (fileid == 0) $ updateFileID item (fileID st) return $ Result { changed = not exists || we_changed , next = start state + itemNext item , treeitem = if exists then Just $ File $ Blob readblob hash else Nothing , resitem = item } data CorruptIndex = CorruptIndex String deriving (Eq, Typeable) instance Exception CorruptIndex instance Show CorruptIndex where show (CorruptIndex s) = s -- | Get the 'Name' of an 'Item' in the given 'State'. This fails for -- the root 'Item' because it has no 'Name', so we return 'Nothing'. nameof :: Item -> State -> Maybe Name nameof item state | iDescriptor item == BC.pack "D." = Nothing | otherwise = case rawMakeName $ B.drop (dirlength state + 1) $ iDescriptor item of Left msg -> throw (CorruptIndex msg) Right name -> Just name -- | 'Maybe' append a 'Name' to an 'AnchoredPath'. maybeAppendName :: AnchoredPath -> Maybe Name -> AnchoredPath maybeAppendName parent = maybe parent (parent `appendPath`) -- | Calculate the next 'State' when entering an 'Item'. Works for the -- top-level 'Item' i.e. the root directory only because we handle that -- specially. substateof :: Item -> State -> State substateof item state = state { start = start state + itemNext item , path = path state `maybeAppendName` myname , dirlength = case myname of Nothing -> -- We are entering the root item. The current path prefix remains -- empty, so its length (which must be 0) doesn't change. dirlength state Just _ -> -- This works because the 'iDescriptor' is always one byte larger -- than the actual name. So @dirlength state@ will also be greater -- by 1, which accounts for the path separator when we strip the -- directory prefix from the full path. B.length (iDescriptor item) } where myname = nameof item state -- * Reading (only) file IDs from the index -- FIXME this seems copy-pasted from the code above and then adapted -- to the purpose. Should factor out the traversal of the index as a -- higher order function. data ResultF = ResultF { nextF :: !Int -- ^ Position of the next item, in bytes. , resitemF :: !Item -- ^ The item extracted. , _fileIDs :: [((AnchoredPath, ItemType), FileID)] -- ^ The fileids of the files and folders inside, -- in a folder item and its own fileid for file item). } -- | Return a list containing all the file/folder names in an index, with -- their respective ItemType and FileID. listFileIDs :: Index -> IO ([((AnchoredPath, ItemType), FileID)]) listFileIDs EmptyIndex = return [] listFileIDs index = do let initial = State { start = size_header , dirlength = 0 , path = anchoredRoot } res <- readItemFileIDs index initial return $ _fileIDs res readItemFileIDs :: Index -> State -> IO ResultF readItemFileIDs index state = do item <- peekItem (mmap index) (start state) res' <- if itemIsDir item then readDirFileIDs index state item else readFileFileID index state item return res' readDirFileIDs :: Index -> State -> Item -> IO ResultF readDirFileIDs index state item = do fileid <- peek $ iFileID item following <- fromIntegral <$> peek (iAux item) let substate = substateof item state subs off = case compare off following of LT -> do result <- readItemFileIDs index $ substate {start = off} rest <- subs $ nextF result return $! (nameof (resitemF result) substate, result) : rest EQ -> return [] GT -> fail $ "Offset mismatch at " ++ show off ++ " (ends at " ++ show following ++ ")" inferiors <- subs $ start substate return $ ResultF { nextF = following , resitemF = item , _fileIDs = (((path substate, TreeType), fileid):concatMap (_fileIDs . snd) inferiors) } readFileFileID :: Index -> State -> Item -> IO ResultF readFileFileID _ state item = do fileid' <- peek $ iFileID item let myname = nameof item state return $ ResultF { nextF = start state + itemNext item , resitemF = item , _fileIDs = [((path state `maybeAppendName` myname, BlobType), fileid')] } -- * Reading and writing 'Tree's from/to the index -- | Initialize an 'Index' from the given index file. openIndex :: FilePath -> IO Index openIndex indexpath = do (mmap_ptr, mmap_size) <- mmapIndex indexpath 0 base <- getCurrentDirectory return $ if mmap_size == 0 then EmptyIndex else Index { mmap = mmap_ptr , basedir = base , predicate = \_ _ -> True } formatIndex :: ForeignPtr () -> Tree IO -> Tree IO -> IO () formatIndex mmap_ptr old reference = do _ <- create (SubTree reference) (anchoredRoot) size_header unsafePokeBS magic index_version withForeignPtr mmap_ptr $ \ptr -> pokeByteOff ptr size_magic index_endianness_indicator where magic = fromForeignPtr (castForeignPtr mmap_ptr) 0 4 create (File _) path' off = do i <- createItem BlobType path' mmap_ptr off -- TODO calling getFileStatus here is both slightly -- inefficient and slightly race-prone st <- getFileStatus (iPath i) updateFileID i (fileID st) case find old path' of Nothing -> return () Just ti -> do let hash = itemHash ti mtime = modificationTime st size = fileSize st -- TODO prove that isNothing hash is impossible updateItem i (fromIntegral size) (fromJust hash) updateTime i mtime return $ off + itemNext i create (SubTree s) path' off = do i <- createItem TreeType path' mmap_ptr off st <- getFileStatus (iPath i) updateFileID i (fileID st) case find old path' of Nothing -> return () Just ti -> case itemHash ti of Nothing -> return () Just h -> updateItem i 0 h let subs [] = return $ off + itemNext i subs ((name,x):xs) = do let path'' = path' `appendPath` name noff <- subs xs create x path'' noff lastOff <- subs (listImmediate s) poke (iAux i) (fromIntegral lastOff) return lastOff create (Stub _ _) path' _ = fail $ "Cannot create index from stubbed Tree at " ++ show path' -- | Add and remove entries in the given 'Index' to make it match the given -- 'Tree'. If an object in the 'Tree' does not exist in the current working -- directory, its index entry will have zero hash, size, aux, and fileID. For -- the hash this translates to 'Nothing', see 'iHash''. updateIndexFrom :: FilePath -> Tree IO -> IO Index updateIndexFrom indexpath ref = do debugMessage "Updating the index ..." old_tree <- treeFromIndex =<< openIndex indexpath reference <- expand ref let len_root = itemAllocSize anchoredRoot len = len_root + sum [ itemAllocSize p | (p, _) <- list reference ] exist <- doesFileExist indexpath -- Note that the file is still open via the mmaped pointer in -- the open index, and we /are/ going to write the index using -- that pointer. If we could rely on posix semantics, -- we would just delete the file. However, on windows this -- would fail, so instead we rename it. when exist $ renameFile indexpath (indexpath <.> "old") (mmap_ptr, _) <- mmapIndex indexpath len formatIndex mmap_ptr old_tree reference debugMessage "Done updating the index, reopening it ..." openIndex indexpath -- | Read an 'Index', starting with the root, to create a 'Tree'. treeFromIndex :: Index -> IO (Tree IO) treeFromIndex EmptyIndex = return emptyTree treeFromIndex index = do let initial = State { start = size_header , dirlength = 0 , path = anchoredRoot } -- This is not a typo! As a side-effect of reading a tree from the -- index, it also gets updated and this is what can take a long time -- since it may involve reading all files in the working tree that -- are also in pristine+pending (to compute their hashes) progressKey = "Updating the index" beginTedious progressKey res <- readItem progressKey index initial endTedious progressKey case treeitem res of Just (SubTree tree) -> return $ filter (predicate index) tree _ -> fail "Unexpected failure in treeFromIndex!" -- | Check that a given file is an index file with a format we can handle. You -- should remove and re-create the index whenever this is not true. indexFormatValid :: FilePath -> IO Bool indexFormatValid path' = do (start, _, _) <- mmapFileForeignPtr path' ReadOnly (Just (0, size_header)) let magic = fromForeignPtr (castForeignPtr start) 0 4 endianness_indicator <- withForeignPtr start $ \ptr -> peekByteOff ptr 4 return $ index_version == magic && index_endianness_indicator == endianness_indicator `catch` \(_::SomeException) -> return False instance FilterTree IndexM IO where filter _ EmptyIndex = EmptyIndex filter p index = index { predicate = \a b -> predicate index a b && p a b } -- * Getting the file ID from a path -- | For a given path, get the corresponding fileID from the filesystem. getFileID :: AnchoredPath -> IO (Maybe FileID) getFileID p = fmap F.fileID <$> getFileStatus (realPath p) -- * Low-level utilities -- Wow, unsafe. unsafePokeBS :: BC.ByteString -> BC.ByteString -> IO () unsafePokeBS to from = do let (fp_to, off_to, len_to) = toForeignPtr to (fp_from, off_from, len_from) = toForeignPtr from when (len_to /= len_from) $ fail $ "Length mismatch in unsafePokeBS: from = " ++ show len_from ++ " /= to = " ++ show len_to withForeignPtr fp_from $ \p_from -> withForeignPtr fp_to $ \p_to -> copyBytes (plusPtr p_to off_to) (plusPtr p_from off_from) (fromIntegral len_to) align :: Integral a => a -> a -> a align boundary i = case i `rem` boundary of 0 -> i x -> i + boundary - x {-# INLINE align #-} getFileStatus :: FilePath -> IO FileStatus getFileStatus path = do mst <- Darcs.Util.File.getFileStatus path case mst of Just st | F.isSymbolicLink st -> do hPutStrLn stderr $ "Warning: ignoring symbolic link " ++ path return Nothing _ -> return mst data IndexEntry = IndexEntry { ieSize :: Int64 , ieAux :: Int64 , ieFileID :: FileID , ieHash :: Maybe Hash , ieType :: Char , iePath :: AnchoredPath } dumpIndex :: FilePath -> IO [IndexEntry] dumpIndex indexpath = mmapWithFilePtr indexpath ReadOnly Nothing $ \(ptr, size) -> do magic <- BS.createFromPtr ptr 4 when (magic /= BS.toShort index_version) $ fail "index format is invalid" readEntries (size - size_header) (ptr `plusPtr` size_header) where readEntries s _ | s < (next 0) = return [] readEntries s p = do (entry, fwd) <- readEntry p entries <- readEntries (s - fwd) (p `plusPtr` fwd) return (entry : entries) readEntry p = do ieSize <- peekByteOff p off_size ieAux <- peekByteOff p off_aux ieFileID <- peekByteOff p off_fileid ieHash <- do h <- BS.createFromPtr (p `plusPtr` off_hash) size_hash return $ if h == shortNullHash then Nothing else Just (SHA256 h) dsclen :: Int32 <- peekByteOff p off_dsclen ieType <- b2c <$> peekByteOff p off_dsc path <- BS.fromShort <$> BS.createFromPtr (p `plusPtr` off_path) (fromIntegral dsclen - size_type - size_null) iePath <- either fail return $ AnchoredPath <$> mapM rawMakeName (BC.split '/' (fixRoot path)) return (IndexEntry {..}, next (B.length path)) b2c :: Word8 -> Char b2c = toEnum . fromIntegral off_path = off_dsc + size_type next pathlen = align 4 $ size_size + size_aux + size_fileid + size_hash + size_dsclen + size_type + pathlen + size_null fixRoot s | s == BC.pack "." = BC.empty fixRoot s = s shortNullHash = BS.toShort nullHash