% Copyright (C) 2003-2005 David Roundy % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2, or (at your option) % any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; see the file COPYING. If not, write to % the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, % Boston, MA 02110-1301, USA. \subsection{darcs optimize} \begin{code} {-# OPTIONS_GHC -cpp #-} module Darcs.Commands.Optimize ( optimize ) where import Control.Monad ( when, unless, liftM ) import Data.Maybe ( isJust ) import Text.Regex ( mkRegex, matchRegex ) import System.Directory ( getDirectoryContents, doesDirectoryExist ) import Darcs.Hopefully ( hopefully, info ) import Darcs.Commands ( DarcsCommand(..), nodefaults ) import Darcs.Arguments ( DarcsFlag( Compress, UnCompress, NoCompress, Reorder, TagName, CheckPoint, Relink, RelinkPristine ), tagname, checkpoint, reorder_patches, uncompress_nocompress, relink, relink_pristine, sibling, flagsToSiblings, working_repo_dir, umask_option, ) import Darcs.Repository.Prefs ( defaultrepo ) import Darcs.Repository ( Repository, PatchSet, withRepoLock, ($-), withGutsOf, read_repo, optimizeInventory, slurp_recorded, tentativelyReplacePatches, cleanRepository, amInRepository, finalizeRepositoryChanges ) import Darcs.Repository.Checkpoint ( write_checkpoint ) import Darcs.Patch.Ordered ( RL(..), unsafeUnRL, (+<+), mapFL_FL, reverseRL, mapRL, concatRL ) import Darcs.Patch.Info ( PatchInfo, just_name, human_friendly ) import Darcs.Patch ( RepoPatch ) import FastPackedString ( gzReadFilePS ) import Darcs.Patch.Depends ( deep_optimize_patchset, slightly_optimize_patchset, get_patches_beyond_tag, get_patches_in_tag, ) import Darcs.Lock ( maybeRelink, gzWriteAtomicFilePS, writeAtomicFilePS ) import Darcs.Utils ( withCurrentDirectory ) import Darcs.Progress ( debugMessage ) import Printer ( putDocLn, text, ($$) ) import Darcs.SlurpDirectory ( slurp, list_slurpy_files ) import Darcs.Repository.Pristine ( identifyPristine, pristineDirectory ) import Darcs.Sealed ( Sealed(..), FlippedSeal(..), unsafeUnseal, liftSM ) import Darcs.Global ( darcsdir ) #include "impossible.h" \end{code} \begin{code} optimize_description :: String optimize_description = "Optimize the repository." \end{code} \options{optimize} \haskell{optimize_help} \begin{code} optimize_help :: String optimize_help = "Optimize can help to improve the performance of your repository in a number of cases.\n" \end{code} \begin{code} optimize :: DarcsCommand optimize = DarcsCommand {command_name = "optimize", command_help = optimize_help, command_description = optimize_description, command_extra_args = 0, command_extra_arg_help = [], command_command = optimize_cmd, command_prereq = amInRepository, command_get_arg_possibilities = return [], command_argdefaults = nodefaults, command_advanced_options = [uncompress_nocompress, umask_option], command_basic_options = [checkpoint, tagname, working_repo_dir, reorder_patches, sibling, relink, relink_pristine]} \end{code} \begin{code} optimize_cmd :: [DarcsFlag] -> [String] -> IO () optimize_cmd origopts _ = withRepoLock opts $- \repository -> do cleanRepository repository do_reorder opts repository do_optimize_inventory repository when (CheckPoint `elem` opts) $ do_checkpoint opts repository when (Compress `elem` opts || UnCompress `elem` opts) $ optimize_compression opts when (Relink `elem` opts || (RelinkPristine `elem` opts)) $ do_relink opts repository putStrLn "Done optimizing!" where opts = if UnCompress `elem` origopts then NoCompress:origopts else origopts is_tag :: PatchInfo -> Bool is_tag pinfo = take 4 (just_name pinfo) == "TAG " \end{code} Optimize always writes out a fresh copy of the inventory that minimizes the amount of inventory that need be downloaded when people pull from the repository. Specifically, it breaks up the inventory on the most recent tag. This speeds up most commands when run remotely, both because a smaller file needs to be transfered (only the most recent inventory). It also gives a guarantee that all the patches prior to a given tag are included in that tag, so less commutation and history traversal is needed. This latter issue can become very important in large repositories. \begin{code} do_optimize_inventory :: RepoPatch p => Repository p -> IO () do_optimize_inventory repository = do debugMessage "Writing out a nice copy of the inventory." optimizeInventory repository debugMessage "Done writing out a nice copy of the inventory." \end{code} \begin{options} --checkpoint, --tag \end{options} If you use the \verb!--checkpoint! option, optimize creates a checkpoint patch for a tag. You can specify the tag with the \verb!--tag! option, or just let darcs choose the most recent tag. Note that optimize \verb!--checkpoint! will fail when used on a ``partial'' repository. Also, the tag that is to be checkpointed must not be preceded by any patches that are not included in that tag. If that is the case, no checkpointing is done. The created checkpoint is used by the \verb!--partial! flag to \verb!get! and \verb!check!. This allows for users to retrieve a working repository with limited history with a savings of disk space and bandwidth. \begin{code} do_checkpoint :: RepoPatch p => [DarcsFlag] -> Repository p -> IO () do_checkpoint opts repository = do mpi <- get_tag opts repository case mpi of Nothing -> return () Just pinfo -> do putDocLn $ text "Checkpointing tag:" $$ human_friendly pinfo write_checkpoint repository pinfo get_tag :: RepoPatch p => [DarcsFlag] -> Repository p -> IO (Maybe PatchInfo) get_tag [] r = do Sealed ps <- read_repo r case filter is_tag $ lasts $ mapRL (mapRL info) ps of [] -> do putStrLn "There is no tag to checkpoint!" return Nothing (pinfo:_) -> return $ Just pinfo get_tag (TagName t:_) r = do Sealed ps <- read_repo r case filter (match_tag t) $ lasts $ mapRL (mapRL info) ps of (pinfo:_) -> return $ Just pinfo _ -> case filter (match_tag t) $ lasts $ mapRL (mapRL info) $ deep_optimize_patchset ps of (pinfo:_) -> return $ Just pinfo _ -> do putStr "Cannot checkpoint any tag " putStr $ "matching '"++t++"'\n" return Nothing get_tag (_:fs) r = get_tag fs r lasts :: [[a]] -> [a] lasts [] = [] lasts (x@(_:_):ls) = last x : lasts ls lasts ([]:ls) = lasts ls \end{code} \begin{code} mymatch :: String -> PatchInfo -> Bool mymatch r = match_name $ matchRegex (mkRegex r) match_name :: (String -> Maybe a) -> PatchInfo -> Bool match_name ch pinfo = isJust $ ch (just_name pinfo) match_tag :: String -> PatchInfo -> Bool match_tag ('^':n) = mymatch $ "^TAG "++n match_tag n = mymatch $ "^TAG .*"++n \end{code} \begin{options} --compress, --dont-compress, --uncompress \end{options} Some compression options are available, and are independent of the \verb!--checkpoint! option. By default the patches in the repository are compressed. These use less disk space, which translates into less bandwidth if the repository is accessed remotely. Note that in the darcs-1.0 (also known as ``old fashioned inventory'') repository format, patches will always have the ``.gz'' extension whether they are compressed or not. You may want to uncompress the patches when you've got enough disk space but are running out of physical memory. If you give the \verb!--compress! option, optimize will compress all the patches in the repository. Similarly, if you give the \verb!--uncompress!, optimize will decompress all the patches in the repository. \verb!--dont-compress! means ``don't compress, but don't uncompress either''. It would be useful if one of the compression options was provided as a default and you wanted to override it. \begin{code} optimize_compression :: [DarcsFlag] -> IO () optimize_compression opts = do putStrLn "Optimizing (un)compression of patches..." do_compress (darcsdir++"/patches") putStrLn "Optimizing (un)compression of inventories..." do_compress (darcsdir++"/inventories") where do_compress f = do isd <- doesDirectoryExist f if isd then withCurrentDirectory f $ do fs <- filter notdot `fmap` getDirectoryContents "." mapM_ do_compress fs else if Compress `elem` opts then gzReadFilePS f >>= gzWriteAtomicFilePS f else gzReadFilePS f >>= writeAtomicFilePS f notdot ('.':_) = False notdot _ = True \end{code} \begin{options} --relink \end{options} The \verb|--relink| and \verb|--relink-pristine| options cause Darcs to relink files from a sibling. See Section \ref{disk-usage}. \begin{code} do_relink :: RepoPatch p => [DarcsFlag] -> Repository p -> IO () do_relink opts repository = do some_siblings <- return (flagsToSiblings opts) defrepo <- defaultrepo [] "" [] siblings <- return (some_siblings ++ defrepo) if (siblings == []) then putStrLn "No siblings -- no relinking done." else do when (Relink `elem` opts) $ do debugMessage "Relinking patches..." patches <- (liftM list_slurpy_files) (slurp $ darcsdir++"/patches") maybeRelinkFiles siblings patches (darcsdir++"/patches") when (RelinkPristine `elem` opts) $ do pristine <- identifyPristine case (pristineDirectory pristine) of (Just d) -> do debugMessage "Relinking pristine tree..." c <- slurp_recorded repository maybeRelinkFiles siblings (list_slurpy_files c) d Nothing -> return () debugMessage "Done relinking." return () return () maybeRelinkFiles :: [String] -> [String] -> String -> IO () maybeRelinkFiles src dst dir = mapM_ (maybeRelinkFile src) (map ((dir ++ "/") ++) dst) maybeRelinkFile :: [String] -> String -> IO () maybeRelinkFile [] _ = return () maybeRelinkFile (h:t) f = do done <- maybeRelink (h ++ "/" ++ f) f unless done $ maybeRelinkFile t f return () \end{code} \begin{options} --reorder-patches \end{options} The \verb|--reorder-patches| option causes Darcs to create an optimal ordering of its internal patch inventory. This may help to produce shorter `context' lists when sending patches, and may improve performance for some other operations as well. You should not run \verb!--reorder-patches! on a repository from which someone may be simultaneously pulling or getting, as this could lead to repository corruption. \begin{code} do_reorder :: RepoPatch p => [DarcsFlag] -> Repository p -> IO () do_reorder opts _ | not (Reorder `elem` opts) = return () do_reorder opts repository = do debugMessage "Reordering the inventory." psnew <- choose_order `liftSM` read_repo repository let ps = mapFL_FL hopefully $ reverseRL $ head $ unsafeUnRL psnew withGutsOf repository $ do tentativelyReplacePatches repository opts ps finalizeRepositoryChanges repository debugMessage "Done reordering the inventory." choose_order :: RepoPatch p => PatchSet p -> PatchSet p choose_order ps | isJust last_tag = case slightly_optimize_patchset $ unsafeUnseal $ get_patches_in_tag lt ps of ((t:<:NilRL):<:pps) -> case get_patches_beyond_tag lt ps of FlippedSeal (p :<: NilRL) -> (p+<+(t:<:NilRL)) :<: pps _ -> impossible _ -> impossible where last_tag = case filter is_tag $ mapRL info $ concatRL ps of (t:_) -> Just t _ -> Nothing lt = fromJust last_tag choose_order ps = ps \end{code}