% Copyright (C) 2003-2005 David Roundy % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2, or (at your option) % any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; see the file COPYING. If not, write to % the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, % Boston, MA 02110-1301, USA. \darcsCommand{optimize} \begin{code} {-# OPTIONS_GHC -cpp #-} {-# LANGUAGE CPP #-} module Darcs.Commands.Optimize ( optimize ) where import Control.Applicative ( (<$>) ) import Control.Monad ( when, unless ) import Data.Maybe ( isJust ) import System.Directory ( getDirectoryContents, doesDirectoryExist, doesFileExist, renameFile ) import System.IO.Unsafe ( unsafeInterleaveIO ) import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy as BL import Storage.Hashed.Darcs( decodeDarcsSize ) import Darcs.Hopefully ( info ) import Darcs.Commands ( DarcsCommand(..), nodefaults ) import Darcs.Arguments ( DarcsFlag( UpgradeFormat, UseHashedInventory, Compress, UnCompress, NoCompress, Reorder, Relink, RelinkPristine, OptimizePristine, OptimizeHTTP ), reorderPatches, uncompressNocompress, relink, relinkPristine, sibling, flagsToSiblings, upgradeFormat, workingRepoDir, umaskOption, optimizePristine, -- optimizeHTTP -- disabled for darcs-2.5 ) import Darcs.Repository.Prefs ( getPreflist ) import Darcs.Repository ( Repository, withRepoLock, ($-), withGutsOf, readRepo, optimizeInventory, tentativelyReplacePatches, cleanRepository, amInRepository, finalizeRepositoryChanges, replacePristine ) import Darcs.Witnesses.Ordered ( (+<+), reverseRL, mapRL, (:>>)(..) , mapFL, bunchFL, lengthRL ) import Darcs.Patch.Info ( isTag ) import Darcs.Patch ( RepoPatch ) import Darcs.Patch.Set ( PatchSet(..), newset2RL, newset2FL, progressPatchSet ) import ByteStringUtils ( gzReadFilePS ) import Darcs.Patch.Depends ( splitOnTag ) import Darcs.Lock ( maybeRelink, gzWriteAtomicFilePS, writeAtomicFilePS ) import Darcs.RepoPath ( toFilePath ) import Darcs.Utils ( withCurrentDirectory ) import Progress ( debugMessage ) import Darcs.Repository.Pristine ( identifyPristine, pristineDirectory ) import Darcs.Global ( darcsdir ) -- imports for optimize --upgrade; to be tidied import System.Directory ( createDirectoryIfMissing, removeFile ) import System.FilePath.Posix ( takeExtension, (), (<.>) ) import Progress ( beginTedious, endTedious, tediousSize ) import Darcs.Flags ( compression ) import Darcs.Lock ( rmRecursive ) import Darcs.ProgressPatches ( progressFL ) import Darcs.Repository.Cache ( hashedDir, HashedDir(HashedPristineDir) ) import Darcs.Repository.Format ( identifyRepoFormat, createRepoFormat, writeRepoFormat, formatHas, RepoProperty ( HashedInventory ) ) import qualified Darcs.Repository.HashedRepo as HashedRepo import Darcs.Repository.Prefs ( getCaches ) import Darcs.Repository.Repair ( replayRepository, RepositoryConsistency(..) ) import Darcs.Repository.State ( readRecorded ) import Darcs.Utils ( catchall ) import Storage.Hashed.Tree( TreeItem(..), list, expand, emptyTree ) import Storage.Hashed.AnchoredPath( anchorPath ) import Storage.Hashed.Plain( readPlainTree ) import Storage.Hashed.Darcs( writeDarcsHashed ) import Codec.Archive.Tar ( write ) import Codec.Archive.Tar.Entry ( fileEntry, toTarPath ) import Codec.Compression.GZip ( compress ) #include "gadts.h" optimizeDescription :: String optimizeDescription = "Optimize the repository." optimizeHelp :: String optimizeHelp = "The `darcs optimize' command modifies the current repository in an\n" ++ "attempt to reduce its resource requirements. By default a single\n" ++ "fast, safe optimization is performed; additional optimization\n" ++ "techniques can be enabled by passing options to `darcs optimize'.\n" ++ "\n" ++ optimizeHelpInventory ++ -- "\n" ++ optimize_help_reorder ++ "\n" ++ optimizeHelpRelink ++ -- uncompression is least useful, so it is last. "\n" ++ optimizeHelpCompression ++ "\n" ++ "There is one more optimization which CAN NOT be performed by this\n" ++ "command. Every time your record a patch, a new inventory file is\n" ++ "written to _darcs/inventories/, and old inventories are never reaped.\n" ++ "\n" ++ "If _darcs/inventories/ is consuming a relatively large amount of\n" ++ "space, you can safely reclaim it by using `darcs get' to make a\n" ++ "complete copy of the repo. When doing so, don't forget to copy over\n" ++ "any unsaved changes you have made to the working tree or to\n" ++ "unversioned files in _darcs/prefs/ (such as _darcs/prefs/author).\n" optimize :: DarcsCommand optimize = DarcsCommand {commandName = "optimize", commandHelp = optimizeHelp, commandDescription = optimizeDescription, commandExtraArgs = 0, commandExtraArgHelp = [], commandCommand = optimizeCmd, commandPrereq = amInRepository, commandGetArgPossibilities = return [], commandArgdefaults = nodefaults, commandAdvancedOptions = [uncompressNocompress, umaskOption], commandBasicOptions = [workingRepoDir, reorderPatches, sibling, relink, relinkPristine, upgradeFormat, optimizePristine ]} --optimizeHTTP]} -- disabled for 2.5 optimizeCmd :: [DarcsFlag] -> [String] -> IO () optimizeCmd origopts _ = do when (UpgradeFormat `elem` origopts) optimizeUpgradeFormat withRepoLock opts $- \repository -> do when (OptimizeHTTP `elem` origopts) doOptimizeHTTP if (OptimizePristine `elem` opts) then doOptimizePristine repository else do cleanRepository repository doReorder opts repository doOptimizeInventory repository when (Compress `elem` opts || UnCompress `elem` opts) $ optimizeCompression opts when (Relink `elem` opts || (RelinkPristine `elem` opts)) $ doRelink opts repository putStrLn "Done optimizing!" where opts = if UnCompress `elem` origopts then NoCompress:origopts else origopts optimizeHelpInventory :: String optimizeHelpInventory = "The default optimization moves recent patches (those not included in\n" ++ "the latest tag) to the `front', reducing the amount that a typical\n" ++ "remote command needs to download. It should also reduce the CPU time\n" ++ "needed for some operations.\n" doOptimizeInventory :: RepoPatch p => Repository p C(r u t) -> IO () doOptimizeInventory repository = do debugMessage "Writing out a nice copy of the inventory." optimizeInventory repository debugMessage "Done writing out a nice copy of the inventory." optimizeHelpCompression :: String optimizeHelpCompression = "By default patches are compressed with zlib (RFC 1951) to reduce\n" ++ "storage (and download) size. In exceptional circumstances, it may be\n" ++ "preferable to avoid compression. In this case the `--dont-compress'\n" ++ "option can be used (e.g. with `darcs record') to avoid compression.\n" ++ "\n" ++ "The `darcs optimize --uncompress' and `darcs optimize --compress'\n" ++ "commands can be used to ensure existing patches in the current\n" ++ "repository are respectively uncompressed or compressed. Note that\n" ++ "repositories in the legacy `old-fashioned-inventory' format have a .gz\n" ++ "extension on patch files even when uncompressed.\n" optimizeCompression :: [DarcsFlag] -> IO () optimizeCompression opts = do putStrLn "Optimizing (un)compression of patches..." do_compress (darcsdir++"/patches") putStrLn "Optimizing (un)compression of inventories..." do_compress (darcsdir++"/inventories") where do_compress f = do isd <- doesDirectoryExist f if isd then withCurrentDirectory f $ do fs <- filter notdot `fmap` getDirectoryContents "." mapM_ do_compress fs else if Compress `elem` opts then gzReadFilePS f >>= gzWriteAtomicFilePS f else gzReadFilePS f >>= writeAtomicFilePS f notdot ('.':_) = False notdot _ = True optimizeHelpRelink :: String optimizeHelpRelink = "The `darcs optimize --relink' command hard-links patches that the\n" ++ "current repository has in common with its peers. Peers are those\n" ++ "repositories listed in _darcs/prefs/sources, or defined with the\n" ++ "`--sibling' option (which can be used multiple times).\n" ++ "\n" ++ "Darcs uses hard-links automatically, so this command is rarely needed.\n" ++ "It is most useful if you used `cp -r' instead of `darcs get' to copy a\n" ++ "repository, or if you pulled the same patch from a remote repository\n" ++ "into multiple local repositories.\n" ++ "\n" ++ "A `darcs optimize --relink-pristine' command is also available, but\n" ++ "generally SHOULD NOT be used. It results in a relatively small space\n" ++ "saving at the cost of making many Darcs commands MUCH slower.\n" doOptimizePristine :: RepoPatch p => Repository p C(r u t) -> IO () doOptimizePristine repo = do hashed <- doesFileExist $ darcsdir "hashed_inventory" when hashed $ do inv <- BS.readFile (darcsdir "hashed_inventory") let linesInv = BS.split '\n' inv case linesInv of [] -> return () (pris_line:_) -> let size = decodeDarcsSize $ BS.drop 9 pris_line in when (isJust size) $ do putStrLn "Optimizing hashed pristine..." readRecorded repo >>= replacePristine repo cleanRepository repo doRelink :: RepoPatch p => [DarcsFlag] -> Repository p C(r u t) -> IO () doRelink opts repository = do some_siblings <- return (flagsToSiblings opts) defrepolist <- getPreflist "defaultrepo" siblings <- return (map toFilePath some_siblings ++ defrepolist) if (siblings == []) then putStrLn "No siblings -- no relinking done." else do when (Relink `elem` opts) $ do debugMessage "Relinking patches..." patch_tree <- expand =<< readPlainTree "_darcs/patches" let patches = [ anchorPath "" p | (p, File _) <- list patch_tree ] maybeRelinkFiles siblings patches "_darcs/patches" when (RelinkPristine `elem` opts) $ do pristine <- identifyPristine case (pristineDirectory pristine) of (Just d) -> do debugMessage "Relinking pristine tree..." c <- readRecorded repository maybeRelinkFiles siblings [ anchorPath "" p | (p, File _) <- list c ] d Nothing -> return () debugMessage "Done relinking." return () return () maybeRelinkFiles :: [String] -> [String] -> String -> IO () maybeRelinkFiles src dst dir = mapM_ (maybeRelinkFile src) (map ((dir ++ "/") ++) dst) maybeRelinkFile :: [String] -> String -> IO () maybeRelinkFile [] _ = return () maybeRelinkFile (h:t) f = do done <- maybeRelink (h ++ "/" ++ f) f unless done $ maybeRelinkFile t f return () \end{code} \begin{options} --reorder-patches \end{options} The \verb|--reorder-patches| option causes Darcs to create an optimal ordering of its internal patch inventory. This may help to produce shorter `context' lists when sending patches, and may improve performance for some other operations as well. You should not run \verb!--reorder-patches! on a repository from which someone may be simultaneously pulling or getting, as this could lead to repository corruption. \begin{code} -- FIXME: someone needs to grovel through the source and determine -- just how optimizeInventory differs from doReorder. The following -- is purely speculation. --twb, 2009-04 -- optimize_help_reorder :: String -- optimize_help_reorder = -- "The `darcs optimize --reorder' command is a more comprehensive version\n" ++ -- "of the default optimization. It reorders patches with respect to ALL\n" ++ -- "tags, rather than just the latest tag.\n" doReorder :: RepoPatch p => [DarcsFlag] -> Repository p C(r u r) -> IO () doReorder opts _ | not (Reorder `elem` opts) = return () doReorder opts repository = do debugMessage "Reordering the inventory." PatchSet ps _ <- chooseOrder `fmap` readRepo repository withGutsOf repository $ do tentativelyReplacePatches repository opts $ reverseRL ps finalizeRepositoryChanges repository debugMessage "Done reordering the inventory." chooseOrder :: forall p C(s x) . RepoPatch p => PatchSet p C(s x) -> PatchSet p C(s x) chooseOrder ps = case filter isTag $ mapRL info $ newset2RL ps of [] -> ps (lt:_) -> case splitOnTag lt ps of PatchSet xs ts :>> r -> PatchSet (r+<+xs) ts \end{code} The \verb|--upgrade| option for \verb!darcs optimize! performs an inplace upgrade of your repository to the latest \emph{compatible} format. Right now means that darcs 1 old-fashioned repositories will be upgraded to darcs-1 hashed repositories (and notably, not to darcs 2 repositories as that would not be compatible; see \verb!darcs convert!). \begin{code} optimizeUpgradeFormat :: IO () optimizeUpgradeFormat = do debugMessage $ "Upgrading to hashed..." rf <- either fail return =<< identifyRepoFormat "." debugMessage $ "Found our format" if formatHas HashedInventory rf then putStrLn "No action taken because this repository already is hashed." else do putStrLn "Checking repository in case of corruption..." withRepoLock [] $- \repository -> do state <- replayRepository repository [] return case state of RepositoryConsistent -> do putStrLn "The repository is consistent." actuallyUpgradeFormat repository _repoIsBroken -> putStrLn "Corruption detected! Please run darcs repair first." actuallyUpgradeFormat :: RepoPatch p => Repository p C(r u t) -> IO () actuallyUpgradeFormat repository = do -- convert patches/inventory patches <- readRepo repository let k = "Hashing patch" beginTedious k tediousSize k (lengthRL $ newset2RL patches) let patches' = progressPatchSet k patches cache <- getCaches [] "." let compr = compression [] -- default compression HashedRepo.writeTentativeInventory cache compr patches' endTedious k -- convert pristine by applying patches -- the faster alternative would be to copy pristine, but the apply method is more reliable let patchesToApply = progressFL "Applying patch" $ newset2FL $ patches' createDirectoryIfMissing False $ darcsdir hashedDir HashedPristineDir writeDarcsHashed emptyTree "_darcs/pristine.hashed" sequence_ $ mapFL (HashedRepo.applyToTentativePristine []) $ bunchFL 100 patchesToApply -- now make it official HashedRepo.finalizeTentativeChanges repository compr writeRepoFormat (createRepoFormat [UseHashedInventory]) (darcsdir "format") -- clean out old-fashioned junk debugMessage "Cleaning out old-fashioned repository files..." removeFile $ darcsdir "inventory" removeFile $ darcsdir "tentative_inventory" rmRecursive (darcsdir "pristine") `catchall` rmRecursive (darcsdir "current") rmGzsIn (darcsdir "patches") rmGzsIn (darcsdir "inventories") let checkpointDir = darcsdir "checkpoints" hasCheckPoints <- doesDirectoryExist checkpointDir when hasCheckPoints $ rmRecursive checkpointDir putStrLn "Done upgrading!" where rmGzsIn dir = withCurrentDirectory dir $ do gzs <- filter ((== ".gz") . takeExtension) `fmap` getDirectoryContents "." mapM_ removeFile gzs doOptimizeHTTP :: IO () doOptimizeHTTP = do rf <- either fail return =<< identifyRepoFormat "." unless (formatHas HashedInventory rf) . fail $ "Unsupported repository format:\n" ++ " only hashed repositories can be optimized for HTTP" createDirectoryIfMissing False packsDir ps <- dirContents' "patches" $ \x -> all (x /=) ["unrevert", "pending", "pending.tentative"] BL.writeFile (patchesTar <.> "part") . compress . write =<< mapM fileEntry' ps renameFile (patchesTar <.> "part") patchesTar let i = darcsdir "hashed_inventory" is <- dirContents "inventories" pr <- dirContents "pristine.hashed" BL.writeFile (basicTar <.> "part") . compress . write =<< mapM fileEntry' (i : (is ++ pr)) renameFile (basicTar <.> "part") basicTar where packsDir = darcsdir "packs" basicTar = packsDir "basic.tar.gz" patchesTar = packsDir "patches.tar.gz" fileEntry' x = unsafeInterleaveIO $ do content <- BL.fromChunks . return <$> gzReadFilePS x tp <- either fail return $ toTarPath False x return $ fileEntry tp content dirContents d = dirContents' d $ const True dirContents' d f = map ((darcsdir d) ) . filter (\x -> head x /= '.' && f x) <$> getDirectoryContents (darcsdir d) \end{code}