{-# LANGUAGE ParallelListComp #-} module Storage.Hashed.Packed where import Prelude hiding ( lookup, read ) import Storage.Hashed.AnchoredPath import Storage.Hashed.Tree hiding ( lookup ) import Storage.Hashed.Utils import Storage.Hashed.Darcs import Control.Monad( forM, forM_, unless ) import Control.Applicative( (<$>) ) import System.FilePath( (), (<.>) ) import System.Directory( createDirectoryIfMissing, removeFile , getDirectoryContents ) import Bundled.Posix( fileExists, isDirectory, getFileStatus ) import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Char8 as BS import Data.Maybe( listToMaybe, catMaybes, fromJust, isNothing ) import qualified Data.Set as S import qualified Data.Map as M import Data.List( sort ) -- | On-disk format for object storage: we implement a completely loose format -- (one file per object), a compact format stored in a single append-only file -- and an immutable "pack" format. data Format = Loose | Compact | Pack deriving (Show, Eq) is_loose os = format (hatchery os) == Loose is_compact os = format (hatchery os) == Compact loose_dirs = let chars = ['0'..'9'] ++ ['a'..'f'] in [ [a,b] | a <- chars, b <- chars ] loosePath :: OS -> Hash -> FilePath loosePath os (Hash (_,hash)) = let hash' = BS.unpack hash in rootdir os "hatchery" take 2 hash' drop 2 hash' looseLookup :: OS -> Hash -> IO (Maybe FileSegment) looseLookup os hash = do let path = loosePath os hash exist <- fileExists <$> getFileStatus path return $ if exist then Just (path, Nothing) else Nothing -- | Object storage block. When used as a hatchery, the loose or compact format -- are preferable, while for mature space, the pack format is more useful. data Block = Block { blockLookup :: Hash -> IO (Maybe FileSegment) , size :: Int , format :: Format } -- | Object storage. Contains a single "hatchery" and possibly a number of -- mature space blocks, usually in form of packs. It also keeps a list of root -- pointers and has a way to extract pointers from objects (externally -- supplied). These last two things are used to implement a simple GC. data OS = OS { hatchery :: Block , mature :: [Block] , roots :: [Hash] , references :: FileSegment -> IO [Hash] , rootdir :: FilePath } -- | Reduce number of packs in the object storage. This may both recombine -- packs to eliminate dead objects and join some packs to form bigger -- packs. The set of hashes given is used as roots for GC marking. repack :: OS -> S.Set Hash -> IO OS repack os roots = error "repack undefined" -- | Add new objects to the object storage (i.e. put them into hatchery). It is -- safe to call this even on objects that are already present in the storage: -- such objects will be skipped. hatch :: OS -> [BL.ByteString] -> IO OS hatch os blobs = do processed <- mapM sieve blobs write [ (h, b) | (True, h, b) <- processed ] where write bits | is_loose os = do forM bits $ \(hash, blob) -> do BL.writeFile (loosePath os hash) blob return os | is_compact os = error "hatch/compact undefined" | otherwise = fail "Hatchery must be either Loose or Compact." sieve blob = do let hash = sha256 blob absent <- isNothing <$> lookup os hash return (absent, hash, blob) -- | Reduce hatchery size by moving things into packs. compact :: OS -> IO OS compact os = do objects <- live os [hatchery os] block <- createPack os (M.toList objects) cleanup return $ os { mature = block:mature os } where cleanup | is_loose os = forM_ loose_dirs $ nuke . ((rootdir os "hatchery") ) | is_compact os = removeFile (rootdir os "hatchery") >> return () nuke dir = mapM (removeFile . (dir )) =<< (Prelude.filter (`notElem` [".", ".."]) `fmap` getDirectoryContents dir) blocksLookup :: [Block] -> Hash -> IO (Maybe (Hash, FileSegment)) blocksLookup blocks hash = do segment <- cat `fmap` mapM (flip blockLookup hash) blocks return $ case segment of Nothing -> Nothing Just seg -> Just (hash, seg) where cat = listToMaybe . catMaybes lookup :: OS -> Hash -> IO (Maybe FileSegment) lookup os hash = do res <- blocksLookup (hatchery os : mature os) hash return $ case res of Nothing -> Nothing Just (_, seg) -> Just seg -- | Create an empty object storage in given directory, with a hatchery of -- given format. The directory is created if needed, but is assumed to be -- empty. create :: FilePath -> Format -> IO OS create path fmt = do createDirectoryIfMissing True path initHatchery readOS path where initHatchery | fmt == Loose = do mkdir hatchpath forM loose_dirs $ mkdir . (hatchpath ) | fmt == Compact = error "create/mkHatchery Compact undefined" mkdir = createDirectoryIfMissing False hatchpath = path "hatchery" readOS :: FilePath -> IO OS readOS path = do hatch_stat <- getFileStatus $ path "hatchery" let is_os = fileExists hatch_stat is_loose = isDirectory hatch_stat unless is_os $ fail $ path ++ " is not an object storage!" let _hatchery = Block { blockLookup = look os , format = if is_loose then Loose else Compact } os = OS { hatchery = _hatchery , rootdir = path , mature = packs , roots = _roots } look | format _hatchery == Loose = looseLookup | otherwise = undefined packs = [] -- FIXME read packs _roots = [] -- FIXME read packs return os readPack = undefined createPack :: OS -> [(Hash, FileSegment)] -> IO Block createPack os bits = do contents <- mapM readSegment (map snd bits) let offsets = scanl (+) 0 $ map BL.length contents headerbits = [ BL.fromChunks [ hash , BS.pack ": (" , BS.pack (show $ offset) , BS.pack ", " , BS.pack (show $ BL.length string) , BS.pack ")\n" ] | (Hash (_, hash), _) <- bits | string <- contents | offset <- offsets ] header = BL.concat $ sort headerbits blob = BL.concat $ header:contents hash@(Hash (_, hashhash)) = sha256 blob path = rootdir os BS.unpack hashhash <.> "bin" BL.writeFile path blob return $ readPack path -- | Build a map of live objects (i.e. those reachable from the given roots) in -- a given list of Blocks. live :: OS -> [Block] -> IO (M.Map Hash FileSegment) live os blocks = reachable (references os) (blocksLookup blocks) (S.fromList $ roots os) -------------------------------------------------------------- -- Reading and writing darcs-style pristine ---- -- | Read a Tree in the darcs hashed format from an object storage. This is -- basically the same as readDarcsHashed from Storage.Hashed, but uses an -- object storage instead of traditional darcs filesystem layout. Requires the -- tree root hash as a starting point. readPackedDarcsPristine :: OS -> Hash -> IO Tree readPackedDarcsPristine os root = do seg <- fromJust <$> lookup os root items <- darcsParseDir <$> readSegment seg subs <- sequence [ case tp of BlobType -> return (d, File $ file h) TreeType -> let t = readPackedDarcsPristine os h in return (d, Stub t $ Just h) | (tp, d, h) <- items ] return $ makeTreeWithHash subs root where file h = Blob ((fromJust <$> lookup os h) >>= readSegment) (Just h) -- | Write a Tree into an object storage, using the darcs-style directory -- formatting (and therefore darcs-style hashes). Gives back the object storage -- and the root hash of the stored Tree. NB. The function expects that the Tree -- comes equipped with darcs-style hashes already! writePackedDarcsPristine :: Tree -> OS -> IO (OS, Hash) writePackedDarcsPristine tree os = do t <- expand tree files <- sequence [ read b | (_, File b) <- list t ] let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ] os' <- hatch os $ files ++ dirs return (os', darcsTreeHash t) storePackedDarcsPristine :: Tree -> OS -> IO (OS, Hash) storePackedDarcsPristine tree os = do (os', root) <- writePackedDarcsPristine tree os return $ (os' { roots = root : roots os' -- FIXME we probably don't want to override the references -- thing completely here... , references = darcsPristineRefs }, root) darcsPristineRefs :: FileSegment -> IO [Hash] darcsPristineRefs fs = do con <- (darcsParseDir <$> readSegment fs) `catch` \_ -> return [] return $! [ hash | (_, _, hash) <- con, valid hash ] where valid (Hash (_, hash)) = BS.length hash == 64 && all (`elem` "0123456789abcdef") (BS.unpack hash)