{-# LANGUAGE PatternSignatures, ScopedTypeVariables #-} module Storage.Hashed.Index where import Prelude hiding ( lookup, readFile, writeFile, catch ) import Storage.Hashed.Utils import Storage.Hashed.Tree import Storage.Hashed.AnchoredPath import Data.Int( Int64, Int32 ) import qualified Data.Set as S import qualified Data.Map as M import Bundled.Posix( getFileStatusBS, modificationTime, getFileStatus, fileSize, fileExists, EpochTime ) import System.IO.MMap( mmapFileForeignPtr, Mode(..) ) import System.Directory( removeFile, doesFileExist ) import Control.Monad( when ) import qualified Data.ByteString.Char8 as BS import Data.ByteString.Internal( toForeignPtr, fromForeignPtr, memcpy , nullForeignPtr ) import Data.IORef( newIORef, readIORef, modifyIORef, IORef ) import Data.Maybe( fromJust, isJust ) import Foreign.Storable import Foreign.ForeignPtr import Foreign.Ptr hashToString :: Hash -> String hashToString (Hash (_,s)) = BS.unpack s -------------------------- -- Indexed trees -- -- |A recursive-ish index structure (as opposed to flat-ish structure, which is -- used by git... It turns out that it's hard to efficiently read a flat index -- with our internal data structures -- we need to turn the flat index into a -- recursive Tree object, which is rather expensive...). As a bonus, we can -- also efficiently implement subtree queries this way (cf. readIndex). data Item = Item { iPath :: BS.ByteString , iName :: BS.ByteString , iHash :: BS.ByteString , iSize :: Ptr Int64 , iAux :: Ptr Int64 -- end-offset for dirs, mtime for files } deriving Show itemSize :: Item -> Int itemSize i = 4 + (BS.length $ iPath i) + 1 + 64 + 16 itemSizeI :: (Num a) => Item -> a itemSizeI = fromIntegral . itemSize itemIsDir :: Item -> Bool itemIsDir i = BS.last (iPath i) == '/' createItem :: ItemType -> AnchoredPath -> ForeignPtr () -> Int -> IO Item createItem typ path fp off = do let name = BS.concat [ anchorBS path, (typ == TreeType) ? (BS.singleton '/', BS.empty), BS.singleton '\0' ] (namefp, nameoff, namel) = toForeignPtr name withForeignPtr fp $ \p -> withForeignPtr namefp $ \namep -> do pokeByteOff p off namel memcpy (plusPtr p $ off + 4) (plusPtr namep nameoff) (fromIntegral namel) peekItem fp off Nothing peekItem :: ForeignPtr () -> Int -> Maybe Int -> IO Item peekItem fp off dirlen = withForeignPtr fp $ \p -> do nl' :: Int32 <- peekByteOff p off let nl = fromIntegral nl' path = fromForeignPtr (castForeignPtr fp) (off + 4) (nl - 1) hash = fromForeignPtr (castForeignPtr fp) (off + 4 + nl) 64 name' = snd $ BS.splitAt (fromJust dirlen) path name = (BS.last name' == '/') ? (BS.init name', name') return $! Item { iName = isJust dirlen ? (name, undefined) , iPath = path , iHash = hash , iSize = plusPtr p (off + 4 + nl + 64) , iAux = plusPtr p (off + 4 + nl + 64 + 8) } -- | Update an existing item with new hash and optionally mtime (give Nothing -- when updating directory entries). update :: Item -> Maybe EpochTime -> Hash -> IO () update item mtime (Hash (Just size,hash)) = do poke (iSize item) size pokeBS (iHash item) hash when (isJust mtime) $ poke (iAux item) (fromIntegral $ fromEnum $ fromJust mtime) update _ _ _ = fail "Index.update requires a hash with size included." iHash' :: Item -> IO Hash iHash' i = do size <- peek $ iSize i return $ hashSetSize (Hash (undefined, iHash i)) size mmapIndex :: forall a. Int -> IO (ForeignPtr a, Int) mmapIndex req_size = do exist <- doesFileExist "_darcs/index" act_size <- if exist then fileSize `fmap` getFileStatus "_darcs/index" else return 0 let size = fromIntegral $ if req_size > 0 then fromIntegral req_size else act_size case size of 0 -> return (castForeignPtr $ nullForeignPtr, size) _ -> do (x, _) <- mmapFileForeignPtr "_darcs/index" ReadWrite (Just (0, size)) return (x, size) -- |See "readIndex". This version also gives a map from paths to items, so the -- extra per-item data can be used (hash and mtime) directly. The map is in a -- form of IORef, since the data is not available until the tree is unfolded. readIndex' :: IO (Tree, IORef (M.Map AnchoredPath Item)) readIndex' = do (mmap, mmap_size) <- mmapIndex 0 dirs_changed <- newIORef S.empty item_map <- newIORef M.empty let readItem :: AnchoredPath -> Int -> Int -> IO (Item, Maybe TreeItem) readItem parent_path off dl = do item <- peekItem mmap off (Just dl) x <- if itemIsDir item then readDir parent_path item off dl else readFile parent_path item when (isJust x) $ modifyIORef item_map $ \m -> M.insert (parent_path `appendPath` (Name $ iName item)) item m return (item, x) readDir :: AnchoredPath -> Item -> Int -> Int -> IO (Maybe TreeItem) readDir parent_path item off dl = do dirend <- peek $ iAux item st <- getFileStatusBS (iPath item) let this_path = parent_path `appendPath` (Name $ iName item) nl = BS.length (iName item) dl' = dl + (nl == 0) ? (0, 1 + nl) subs coff | coff < dirend = do (idx_item, tree_item) <- readItem this_path (fromIntegral coff) dl' next <- if itemIsDir idx_item then peek $ iAux idx_item else return $ coff + itemSizeI idx_item rest <- subs next case tree_item of Nothing -> return $! rest Just ti -> return $! (Name $ iName idx_item, ti) : rest subs coff | coff == dirend = return [] | otherwise = fail "Offset mismatch." updateHash path tree = do changed <- S.member path `fmap` readIORef dirs_changed let hash = darcsTreeHash tree tree' = tree { treeHash = Just hash } if changed then do update item Nothing hash return tree' else return tree treehash <- iHash' item let rt = Stub (do s <- subs $ fromIntegral (off + itemSize item) return $ (makeTree s) { finish = updateHash this_path , treeHash = Just treehash }) (Just treehash) return $ if fileExists st then Just rt else Nothing readFile parent_path item = do st <- getFileStatusBS (iPath item) mtime <- fromIntegral `fmap` (peek $ iAux item) size <- peek $ iSize item let mtime' = modificationTime st size' = fileSize st readblob = readSegment (BS.unpack $ iPath item, Nothing) when ( mtime /= mtime' || size /= fromIntegral size' ) $ do hash_' <- sha256 `fmap` readblob let hash' = hashSetSize hash_' (fromIntegral size') update item (Just mtime') hash' modifyIORef dirs_changed $ \s -> S.union (S.fromList $ parent_path : parents parent_path) s hash <- iHash' item if fileExists st then return $ Just $ File (Blob readblob $ Just hash) else return Nothing if (mmap_size > 0) then do (_, Just (Stub root h)) <- readItem (AnchoredPath []) 0 0 tree <- root return (tree { treeHash = h }, item_map) else return (emptyTree, item_map) -- |Read an index and build up a Tree object from it, referring to current -- working directory. Any parts of the index that are out of date are updated -- in-place. The result is always an up-to-date index. Also, the Tree is stubby -- and only the pieces of the index that are unfolded will be actually updated! -- To implement a subtree query, you can use 'Tree.filter' and then unfold the -- result. Otherwise just unfold the whole tree to avoid unexpected problems. readIndex :: IO Tree readIndex = fst `fmap` readIndex' -- |Will add and remove files in index to make it match the Tree object given -- (it is an error for the Tree to contain a file or directory that does not -- exist in a plain form under FilePath). updateIndexFrom :: Tree -> IO Tree updateIndexFrom ref = do (oldidx', item_map') <- readIndex' unfold oldidx' item_map <- readIORef item_map' reference <- unfold ref let typeLen TreeType = 1 typeLen BlobType = 0 paths = [ (p, itemType i) | (p, i) <- list reference ] len = 87 + sum [ typeLen typ + 84 + 1 + length (anchorPath "" p) | (p, typ) <- paths ] exist <- doesFileExist "_darcs/index" when exist $ removeFile "_darcs/index" -- to avoid clobbering oldidx (mmap, _) <- mmapIndex len let create (File _) path off = do i <- createItem BlobType path mmap off case M.lookup path item_map of Nothing -> return () Just item -> do mtime <- peek $ iAux item hash <- iHash' item update i (Just $ fromIntegral mtime) hash return $ off + itemSize i create (SubTree s) path off = do i <- createItem TreeType path mmap off case M.lookup path item_map of Nothing -> return () Just item -> do hash <- iHash' item update i Nothing hash let subs [] = return $ off + itemSize i subs ((name,x):xs) = do let path' = path `appendPath` name noff <- subs xs create x path' noff lastOff <- subs (listImmediate s) poke (iAux i) (fromIntegral lastOff) return lastOff create (Stub _ _) _ _ = fail "Cannot create index from stubbed Tree." create (SubTree reference) (AnchoredPath []) 0 readIndex