-- Copyright (C) 2007 David Roundy -- -- This program is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2, or (at your option) -- any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program; if not, write to the Free Software Foundation, -- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. {-# OPTIONS_GHC -fno-warn-missing-methods #-} {-# LANGUAGE MultiParamTypeClasses #-} module Darcs.Repository.HashedIO ( copyHashed, copyPartialsHashed, cleanHashdir, getHashedFiles, pathsAndContents ) where import Darcs.Prelude import Darcs.Util.Global ( darcsdir ) import qualified Data.Set as Set import System.Directory ( getDirectoryContents, createDirectoryIfMissing ) import Control.Monad.State ( StateT, runStateT, modify, get, put, gets, lift, evalStateT ) import Control.Monad ( when, void, unless, guard ) import Data.Maybe ( isJust ) import System.IO.Unsafe ( unsafeInterleaveIO ) import Darcs.Repository.Cache ( Cache, fetchFileUsingCache, writeFileUsingCache, peekInCache, speculateFileUsingCache, okayHash, cleanCachesWithHint, HashedDir(..), hashedDir ) import Darcs.Patch.ApplyMonad ( ApplyMonad(..), ApplyMonadTree(..) ) import Darcs.Repository.Flags ( Compression( .. ), WithWorkingDir (..) ) import Darcs.Repository.Inventory ( PristineHash, getValidHash, mkValidHash ) import Darcs.Util.Lock ( writeAtomicFilePS, removeFileMayNotExist ) import Darcs.Util.File ( withCurrentDirectory ) import Darcs.Util.Progress ( debugMessage, tediousSize, finishedOneIO ) import Darcs.Util.Path ( AnchoredPath , anchorPath , anchoredRoot , parent , breakOnDir , Name , name2fp , decodeWhiteName , encodeWhiteName , isMaliciousSubPath ) import Darcs.Util.ByteString ( linesPS, unlinesPS ) import qualified Data.ByteString as B (ByteString, length, empty) import qualified Data.ByteString.Char8 as BC (unpack, pack) import Darcs.Util.Tree.Hashed( readDarcsHashedDir, darcsLocation, decodeDarcsHash, decodeDarcsSize ) import Darcs.Util.Tree( ItemType(..), Tree ) ap2fp :: AnchoredPath -> FilePath ap2fp = anchorPath "" -- | @readHashFile c subdir hash@ reads the file with hash @hash@ in dir subdir, -- fetching it from 'Cache' @c@ if needed. The return value is a pair of the -- absolute file path and the content. readHashFile :: Cache -> HashedDir -> PristineHash -> IO (FilePath,B.ByteString) readHashFile c subdir hash = do debugMessage $ "Reading hash file "++getValidHash hash++" from "++hashedDir subdir++"/" r <- fetchFileUsingCache c subdir (getValidHash hash) debugMessage $ "Result of reading hash file: " ++ show r return r -- TODO an obvious optimization would be to remember -- the current path and a stack of directories we opened. -- Then we could batch operations in the same directory and write the -- result back only when we pop a dir off teh stack. data HashDir = HashDir { cache :: !Cache, cwdHash :: !PristineHash } type HashedIO = StateT HashDir IO mWithSubDirectory :: Name -> HashedIO a -> HashedIO a mWithSubDirectory dir j = do cwd <- readcwd case geta D dir cwd of Nothing -> fail "dir doesn't exist in mWithSubDirectory..." Just h -> do (h', x) <- withh h j -- update the parent object with new entry writecwd $ seta D dir h' cwd return x -- | This is withCurrentDirectory for read-only actions. mInSubDirectory :: Name -> HashedIO a -> HashedIO a mInSubDirectory dir j = do cwd <- readcwd case geta D dir cwd of Nothing -> fail "dir doesn't exist..." Just h -> inh h j instance ApplyMonad Tree HashedIO where type ApplyMonadBase HashedIO = IO instance ApplyMonadTree HashedIO where mDoesDirectoryExist path = do thing <- identifyThing path case thing of Just (D, _) -> return True _ -> return False mReadFilePS = readFileObject mCreateDirectory path = do h <- writeHashFile B.empty exists <- isJust `fmap` identifyThing path when exists $ fail "can't mCreateDirectory over an existing object." addThing path (D, h) mRename o n = do nexists <- isJust `fmap` identifyThing n when nexists $ fail "mRename failed..." mx <- identifyThing o -- for backwards compatibility accept rename of nonexistent files. case mx of Nothing -> return () Just x -> do rmThing o addThing n x mRemoveDirectory = rmThing mRemoveFile f = do x <- mReadFilePS f when (B.length x /= 0) $ fail $ "Cannot remove non-empty file " ++ ap2fp f rmThing f readFileObject :: AnchoredPath -> HashedIO B.ByteString readFileObject path | path == anchoredRoot = fail "root dir is not a file..." | otherwise = case breakOnDir path of Left file -> do cwd <- readcwd case geta F file cwd of Nothing -> fail $ "file doesn't exist..." ++ ap2fp path Just h -> readhash h Right (name, path') -> do mInSubDirectory name $ readFileObject path' identifyThing :: AnchoredPath -> HashedIO (Maybe (ObjType,PristineHash)) identifyThing path | path == anchoredRoot = do h <- gets cwdHash return $ Just (D, h) | otherwise = case breakOnDir path of Left name -> getany name `fmap` readcwd Right (dir, path') -> do cwd <- readcwd case geta D dir cwd of Nothing -> return Nothing Just h -> inh h $ identifyThing path' addThing :: AnchoredPath -> (ObjType,PristineHash) -> HashedIO () addThing path (o, h) = case breakOnDir path of Left name -> seta o name h `fmap` readcwd >>= writecwd Right (name,path') -> mWithSubDirectory name $ addThing path' (o,h) rmThing :: AnchoredPath -> HashedIO () rmThing path = case breakOnDir path of Left name -> do cwd <- readcwd let cwd' = filter (\(_,x,_)->x/= name) cwd if length cwd' == length cwd - 1 then writecwd cwd' else fail "obj doesn't exist in rmThing" Right (name,path') -> mWithSubDirectory name $ rmThing path' readhash :: PristineHash -> HashedIO B.ByteString readhash h = do c <- gets cache z <- lift $ unsafeInterleaveIO $ readHashFile c HashedPristineDir h let (_,out) = z return out withh :: PristineHash -> HashedIO a -> HashedIO (PristineHash,a) withh h j = do hd <- get put $ hd { cwdHash = h } x <- j h' <- gets cwdHash put hd return (h',x) inh :: PristineHash -> HashedIO a -> HashedIO a inh h j = snd `fmap` withh h j type DirEntry = (ObjType, Name, PristineHash) readcwd :: HashedIO [DirEntry] readcwd = do haveitalready <- peekroot cwd <- gets cwdHash >>= readdir unless haveitalready $ speculate cwd return cwd where speculate :: [(a,b,PristineHash)] -> HashedIO () speculate c = do cac <- gets cache mapM_ (\(_,_,z) -> lift $ speculateFileUsingCache cac HashedPristineDir (getValidHash z)) c peekroot :: HashedIO Bool peekroot = do HashDir c h <- get lift $ peekInCache c HashedPristineDir (getValidHash h) writecwd :: [DirEntry] -> HashedIO () writecwd c = do h <- writedir c modify $ \hd -> hd { cwdHash = h } data ObjType = F | D deriving Eq -- | @geta objtype name direntries@ tries to find an object of type @objtype@ named @name@ -- in @direntries@. geta :: ObjType -> Name -> [DirEntry] -> Maybe PristineHash geta o f c = do (o', h) <- getany f c guard (o == o') return h getany :: Name -> [DirEntry] -> Maybe (ObjType,PristineHash) getany _ [] = Nothing getany f ((o,f',h):_) | f == f' = Just (o,h) getany f (_:r) = getany f r seta :: ObjType -> Name -> PristineHash -> [DirEntry] -> [DirEntry] seta o f h [] = [(o,f,h)] seta o f h ((_,f',_):r) | f == f' = (o,f,h):r seta o f h (x:xs) = x : seta o f h xs readdir :: PristineHash -> HashedIO [DirEntry] readdir hash = do content <- readhash hash -- lift $ debugMessage $ show x let r = (parseLines . linesPS) content --lift $ debugMessage $ unlines $ map (\(_,path,_) -> "DEBUG readdir " ++ -- hash ++ " entry: " ++ show path) r return r where parseLines (t:n:h:rest) | t == dirType = (D, decodeWhiteName n, mkValidHash $ BC.unpack h) : parseLines rest | t == fileType = (F, decodeWhiteName n, mkValidHash $ BC.unpack h) : parseLines rest parseLines _ = [] dirType :: B.ByteString dirType = BC.pack "directory:" fileType :: B.ByteString fileType = BC.pack "file:" writedir :: [DirEntry] -> HashedIO PristineHash writedir c = do --lift $ debugMessage $ unlines $ map (\(_,path,_) -> "DEBUG writedir entry: " ++ show path) c writeHashFile cps where cps = unlinesPS $ concatMap wr c ++ [B.empty] wr (o,d,h) = [showO o, encodeWhiteName d, BC.pack (getValidHash h)] showO D = dirType showO F = fileType writeHashFile :: B.ByteString -> HashedIO PristineHash writeHashFile ps = do c <- gets cache -- pristine files are always compressed lift $ mkValidHash <$> writeFileUsingCache c GzipCompression HashedPristineDir ps type ProgressKey = String -- | Grab a whole pristine tree from a hash, and, if asked, -- write files in the working tree. copyHashed :: ProgressKey -> Cache -> WithWorkingDir -> PristineHash -> IO () copyHashed k c wwd z = void . runStateT cph $ HashDir { cache = c, cwdHash = z } where cph = do cwd <- readcwd lift $ tediousSize k (length cwd) mapM_ cp cwd cp (F,n,h) = do ps <- readhash h lift $ finishedOneIO k $ name2fp n --lift $ debugMessage $ "DEBUG copyHashed " ++ show n case wwd of WithWorkingDir -> lift $ writeAtomicFilePS (name2fp n) ps NoWorkingDir -> ps `seq` return () -- force evaluation of ps to actually copy hashed file cp (D,n,h) = if isMaliciousSubPath (name2fp n) then fail ("Caught malicious path: " ++ name2fp n) else do lift $ finishedOneIO k (name2fp n) case wwd of WithWorkingDir -> do lift $ createDirectoryIfMissing False (name2fp n) lift $ withCurrentDirectory (name2fp n) $ copyHashed k c WithWorkingDir h NoWorkingDir -> lift $ copyHashed k c NoWorkingDir h -- | Returns a list of pairs (FilePath, (strict) ByteString) of -- the pristine tree starting with the hash @root@. -- @path@ should be either "." or end with "/" -- Separator "/" is used since this function is used to generate -- zip archives from pristine trees. pathsAndContents :: FilePath -> Cache -> PristineHash -> IO [(FilePath,B.ByteString)] pathsAndContents path c root = evalStateT cph HashDir { cache = c, cwdHash = root } where cph = do cwd <- readcwd pacs <- concat <$> mapM cp cwd let current = if path == "." then [] else [(path ++ "/" , B.empty)] return $ current ++ pacs cp (F,n,h) = do ps <- readhash h let p = (if path == "." then "" else path ++ "/") ++ name2fp n return [(p,ps)] cp (D,n,h) = do let p = (if path == "." then "" else path) ++ name2fp n ++ "/" lift $ pathsAndContents p c h copyPartialsHashed :: Cache -> PristineHash -> [AnchoredPath] -> IO () copyPartialsHashed c root = mapM_ (copyPartialHashed c root) copyPartialHashed :: Cache -> PristineHash -> AnchoredPath -> IO () copyPartialHashed c root path = do case parent path of Nothing -> return () Just super -> createDirectoryIfMissing True (ap2fp super) void $ runStateT copy HashDir {cache = c, cwdHash = root} where copy = do mt <- identifyThing path case mt of Just (D, h) -> do lift $ createDirectoryIfMissing True (ap2fp path) lift $ withCurrentDirectory (ap2fp path) $ copyHashed "" c WithWorkingDir h Just (F, h) -> do ps <- readhash h lift $ writeAtomicFilePS (ap2fp path) ps Nothing -> return () -- hmm, ignore unknown paths, maybe better fail? cleanHashdir :: Cache -> HashedDir -> [PristineHash] -> IO () cleanHashdir c dir hashroots = do -- we'll remove obsolete bits of "dir" debugMessage $ "Cleaning out " ++ hashedDir dir ++ "..." let hashdir = darcsdir ++ "/" ++ hashedDir dir ++ "/" hs <- set <$> getHashedFiles hashdir (map getValidHash hashroots) fs <- set . filter okayHash <$> getDirectoryContents hashdir mapM_ (removeFileMayNotExist . (hashdir++)) (unset $ fs `Set.difference` hs) -- and also clean out any global caches. debugMessage "Cleaning out any global caches..." cleanCachesWithHint c dir (unset $ fs `Set.difference` hs) where set = Set.fromList . map BC.pack unset = map BC.unpack . Set.toList -- | getHashedFiles returns all hash files targeted by files in hashroots in -- the hashdir directory. getHashedFiles :: FilePath -> [String] -> IO [String] getHashedFiles hashdir hashroots = do let listone h = do let size = decodeDarcsSize $ BC.pack h hash = decodeDarcsHash $ BC.pack h x <- readDarcsHashedDir hashdir (size, hash) let subs = [fst $ darcsLocation "" (s, h') | (TreeType, _, s, h') <- x] hashes = h : [fst $ darcsLocation "" (s, h') | (_, _, s, h') <- x] (hashes ++) . concat <$> mapM listone subs concat <$> mapM listone hashroots