module Darcs.Repository.Traverse ( cleanInventories , cleanPatches , cleanPristine , cleanRepository , diffHashLists , listInventories , listInventoriesLocal , listInventoriesRepoDir , listPatchesLocalBucketed , specialPatches ) where import Darcs.Prelude import Data.Maybe ( fromJust ) import qualified Data.ByteString.Char8 as BC ( unpack, pack ) import qualified Data.Set as Set import System.Directory ( listDirectory ) import System.FilePath.Posix( () ) import Darcs.Repository.Cache ( HashedDir(..), bucketFolder ) import Darcs.Repository.HashedIO ( cleanHashdir ) import Darcs.Repository.Inventory ( Inventory(..) , emptyInventory , getValidHash , inventoryPatchNames , parseInventory , peekPristineHash , skipPristineHash ) import Darcs.Repository.InternalTypes ( Repository , repoCache , withRepoLocation ) import Darcs.Repository.Paths ( hashedInventory , hashedInventoryPath , inventoriesDir , inventoriesDirPath , patchesDirPath ) import Darcs.Repository.Prefs ( globalCacheDir ) import Darcs.Util.ByteString ( gzReadFilePS ) import Darcs.Util.Exception ( ifDoesNotExistError ) import Darcs.Util.Global ( darcsdir, debugMessage ) import Darcs.Util.Lock ( removeFileMayNotExist ) cleanRepository :: Repository rt p wR wU wT -> IO () cleanRepository r = cleanPristine r >> cleanInventories r >> cleanPatches r -- | The way patchfiles, inventories, and pristine trees are stored. -- 'PlainLayout' means all files are in the same directory. 'BucketedLayout' -- means we create a second level of subdirectories, such that all files whose -- hash starts with the same two letters are in the same directory. -- Currently, only the global cache uses 'BucketedLayout' while repositories -- use the 'PlainLayout'. data DirLayout = PlainLayout | BucketedLayout -- | Remove unreferenced entries in the pristine cache. cleanPristine :: Repository rt p wR wU wT -> IO () cleanPristine r = withRepoLocation r $ do debugMessage "Cleaning out the pristine cache..." i <- gzReadFilePS hashedInventoryPath cleanHashdir (repoCache r) HashedPristineDir [peekPristineHash i] -- | Set difference between two lists of hashes. diffHashLists :: [String] -> [String] -> [String] diffHashLists xs ys = from_set $ (to_set xs) `Set.difference` (to_set ys) where to_set = Set.fromList . map BC.pack from_set = map BC.unpack . Set.toList -- | Remove unreferenced files in the inventories directory. cleanInventories :: Repository rt p wR wU wT -> IO () cleanInventories _ = do debugMessage "Cleaning out inventories..." hs <- listInventoriesLocal fs <- ifDoesNotExistError [] $ listDirectory inventoriesDirPath mapM_ (removeFileMayNotExist . (inventoriesDirPath )) (diffHashLists fs hs) -- FIXME this is ugly, these files should be directly under _darcs -- since they are not hashed. And 'unrevert' isn't even a real patch but -- a patch bundle. -- | List of special patch files that may exist in the directory -- _darcs/patches/. We must not clean those. specialPatches :: [FilePath] specialPatches = ["unrevert", "pending", "pending.tentative"] -- | Remove unreferenced files in the patches directory. cleanPatches :: Repository rt p wR wU wT -> IO () cleanPatches _ = do debugMessage "Cleaning out patches..." hs <- (specialPatches ++) <$> listPatchesLocal PlainLayout darcsdir darcsdir fs <- ifDoesNotExistError [] (listDirectory patchesDirPath) mapM_ (removeFileMayNotExist . (patchesDirPath )) (diffHashLists fs hs) -- | Return a list of the inventories hashes. -- The first argument can be readInventory or readInventoryLocal. -- The second argument specifies whether the files are expected -- to be stored in plain or in bucketed format. -- The third argument is the directory of the parent inventory files. -- The fourth argument is the directory of the head inventory file. listInventoriesWith :: (FilePath -> IO Inventory) -> DirLayout -> String -> String -> IO [String] listInventoriesWith readInv dirformat baseDir startDir = do mbStartingWithInv <- getStartingWithHash startDir hashedInventory followStartingWiths mbStartingWithInv where getStartingWithHash dir file = inventoryParent <$> readInv (dir file) invDir = baseDir inventoriesDir nextDir dir = case dirformat of BucketedLayout -> invDir bucketFolder dir PlainLayout -> invDir followStartingWiths Nothing = return [] followStartingWiths (Just hash) = do let startingWith = getValidHash hash mbNextInv <- getStartingWithHash (nextDir startingWith) startingWith (startingWith :) <$> followStartingWiths mbNextInv -- | Return a list of the inventories hashes. -- This function attempts to retrieve missing inventory files from the cache. listInventories :: IO [String] listInventories = listInventoriesWith readInventory PlainLayout darcsdir darcsdir -- | Return inventories hashes by following the head inventory. -- This function does not attempt to retrieve missing inventory files. listInventoriesLocal :: IO [String] listInventoriesLocal = listInventoriesWith readInventoryLocal PlainLayout darcsdir darcsdir -- | Return a list of the inventories hashes. -- The argument @repoDir@ is the directory of the repository from which -- we are going to read the head inventory file. -- The rest of hashed files are read from the global cache. listInventoriesRepoDir :: String -> IO [String] listInventoriesRepoDir repoDir = do gCacheDir' <- globalCacheDir let gCacheInvDir = fromJust gCacheDir' listInventoriesWith readInventoryLocal BucketedLayout gCacheInvDir (repoDir darcsdir) -- | Return a list of the patch filenames, extracted from inventory -- files, by starting with the head inventory and then following the -- chain of parent inventories. -- -- This function does not attempt to download missing inventory files. -- -- * The first argument specifies whether the files are expected -- to be stored in plain or in bucketed format. -- * The second argument is the directory of the parent inventory. -- * The third argument is the directory of the head inventory. listPatchesLocal :: DirLayout -> String -> String -> IO [String] listPatchesLocal dirformat baseDir startDir = do inventory <- readInventory (startDir hashedInventory) followStartingWiths (inventoryParent inventory) (inventoryPatchNames inventory) where invDir = baseDir inventoriesDir nextDir dir = case dirformat of BucketedLayout -> invDir bucketFolder dir PlainLayout -> invDir followStartingWiths Nothing patches = return patches followStartingWiths (Just hash) patches = do let startingWith = getValidHash hash inv <- readInventoryLocal (nextDir startingWith startingWith) (patches ++) <$> followStartingWiths (inventoryParent inv) (inventoryPatchNames inv) -- |listPatchesLocalBucketed is similar to listPatchesLocal, but -- it read the inventory directory under @darcsDir@ in bucketed format. listPatchesLocalBucketed :: String -> String -> IO [String] listPatchesLocalBucketed = listPatchesLocal BucketedLayout -- | Read the given inventory file if it exist, otherwise return an empty -- inventory. Used when we expect that some inventory files may be missing. -- Still fails with an error message if file cannot be parsed. readInventoryLocal :: FilePath -> IO Inventory readInventoryLocal path = ifDoesNotExistError emptyInventory $ readInventory path -- | Read an inventory from a file. Fails with an error message if -- file is not there or cannot be parsed. readInventory :: FilePath -> IO Inventory readInventory path = do -- FIXME we should check the hash (if this is a hashed file) inv <- skipPristineHash <$> gzReadFilePS path case parseInventory inv of Right r -> return r Left e -> fail $ unlines [unwords ["parse error in file", path], e]