% Copyright (C) 2003-2005 David Roundy
%
% This program is free software; you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation; either version 2, or (at your option)
% any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program; see the file COPYING. If not, write to
% the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
% Boston, MA 02110-1301, USA.
\darcsCommand{optimize}
\begin{code}
module Darcs.Commands.Optimize ( optimize ) where
import Control.Monad ( when, unless )
import Data.Maybe ( isJust )
import System.Directory ( getDirectoryContents, doesDirectoryExist, doesFileExist )
import qualified Data.ByteString.Char8 as BS
import Storage.Hashed.Darcs( decodeDarcsSize )
import Darcs.Hopefully ( hopefully, info )
import Darcs.Commands ( DarcsCommand(..), nodefaults )
import Darcs.Arguments ( DarcsFlag( UpgradeFormat, UseHashedInventory,
Compress, UnCompress,
NoCompress, Reorder,
Relink, RelinkPristine, OptimizePristine ),
reorderPatches,
uncompressNocompress,
relink, relinkPristine, sibling,
flagsToSiblings,
upgradeFormat,
workingRepoDir, umaskOption, optimizePristine
)
import Darcs.Repository.Prefs ( getPreflist )
import Darcs.Repository ( Repository, PatchSet, withRepoLock, ($-), withGutsOf,
read_repo, optimizeInventory, slurp_recorded,
tentativelyReplacePatches, cleanRepository,
amInRepository, finalizeRepositoryChanges, replacePristine )
import Darcs.Witnesses.Ordered ( RL(..), unsafeUnRL, (+<+), mapFL_FL, reverseRL, mapRL, concatRL )
import Darcs.Patch.Info ( PatchInfo, just_name )
import Darcs.Patch ( RepoPatch )
import ByteStringUtils ( gzReadFilePS )
import Darcs.Patch.Depends ( slightly_optimize_patchset,
get_patches_beyond_tag, get_patches_in_tag,
)
import Darcs.Lock ( maybeRelink, gzWriteAtomicFilePS, writeAtomicFilePS )
import Darcs.RepoPath ( toFilePath )
import Darcs.Utils ( withCurrentDirectory )
import Progress ( debugMessage )
import Darcs.SlurpDirectory ( slurp, list_slurpy_files )
import Darcs.Repository.Pristine ( identifyPristine, pristineDirectory )
import Darcs.Witnesses.Sealed ( FlippedSeal(..), unsafeUnseal )
import Darcs.Global ( darcsdir )
#include "impossible.h"
import qualified Data.ByteString as B (empty)
import System.Directory ( createDirectoryIfMissing, removeFile )
import System.FilePath.Posix ( takeExtension, (</>) )
import Progress ( beginTedious, endTedious, tediousSize, progress )
import SHA1 ( sha1PS )
import Darcs.Flags ( compression )
import Darcs.Lock ( rm_recursive )
import Darcs.Witnesses.Ordered ( mapFL, mapRL_RL, bunchFL, lengthRL )
import Darcs.ProgressPatches ( progressFL )
import Darcs.Repository.Cache ( hashedDir, HashedDir(HashedPristineDir) )
import Darcs.Repository.Format ( identifyRepoFormat,
createRepoFormat, writeRepoFormat, formatHas,
RepoProperty ( HashedInventory ) )
import qualified Darcs.Repository.HashedRepo as HashedRepo
import Darcs.Repository.Prefs ( getCaches )
import Darcs.Repository.Repair ( replayRepository, RepositoryConsistency(..) )
import Darcs.Repository.State ( readRecorded )
import Darcs.Utils ( catchall )
#include "gadts.h"
optimizeDescription :: String
optimizeDescription = "Optimize the repository."
optimizeHelp :: String
optimizeHelp =
"The `darcs optimize' command modifies the current repository in an\n" ++
"attempt to reduce its resource requirements. By default a single\n" ++
"fast, safe optimization is performed; additional optimization\n" ++
"techniques can be enabled by passing options to `darcs optimize'.\n" ++
"\n" ++ optimizeHelpInventory ++
"\n" ++ optimizeHelpRelink ++
"\n" ++ optimizeHelpCompression ++
"\n" ++
"There is one more optimization which CAN NOT be performed by this\n" ++
"command. Every time your record a patch, a new inventory file is\n" ++
"written to _darcs/inventories/, and old inventories are never reaped.\n" ++
"\n" ++
"If _darcs/inventories/ is consuming a relatively large amount of\n" ++
"space, you can safely reclaim it by using `darcs get' to make a\n" ++
"complete copy of the repo. When doing so, don't forget to copy over\n" ++
"any unsaved changes you have made to the working tree or to\n" ++
"unversioned files in _darcs/prefs/ (such as _darcs/prefs/author).\n"
optimize :: DarcsCommand
optimize = DarcsCommand {commandName = "optimize",
commandHelp = optimizeHelp,
commandDescription = optimizeDescription,
commandExtraArgs = 0,
commandExtraArgHelp = [],
commandCommand = optimizeCmd,
commandPrereq = amInRepository,
commandGetArgPossibilities = return [],
commandArgdefaults = nodefaults,
commandAdvancedOptions = [uncompressNocompress, umaskOption],
commandBasicOptions = [workingRepoDir,
reorderPatches,
sibling, relink,
relinkPristine,
upgradeFormat,
optimizePristine]}
optimizeCmd :: [DarcsFlag] -> [String] -> IO ()
optimizeCmd origopts _ = do
when (UpgradeFormat `elem` origopts) optimizeUpgradeFormat
withRepoLock opts $- \repository -> do
if (OptimizePristine `elem` opts)
then doOptimizePristine repository
else do cleanRepository repository
doReorder opts repository
doOptimizeInventory repository
when (Compress `elem` opts || UnCompress `elem` opts) $
optimizeCompression opts
when (Relink `elem` opts || (RelinkPristine `elem` opts)) $
doRelink opts repository
putStrLn "Done optimizing!"
where opts = if UnCompress `elem` origopts then NoCompress:origopts else origopts
isTag :: PatchInfo -> Bool
isTag pinfo = take 4 (just_name pinfo) == "TAG "
optimizeHelpInventory :: String
optimizeHelpInventory =
"The default optimization moves recent patches (those not included in\n" ++
"the latest tag) to the `front', reducing the amount that a typical\n" ++
"remote command needs to download. It should also reduce the CPU time\n" ++
"needed for some operations.\n"
doOptimizeInventory :: RepoPatch p => Repository p -> IO ()
doOptimizeInventory repository = do
debugMessage "Writing out a nice copy of the inventory."
optimizeInventory repository
debugMessage "Done writing out a nice copy of the inventory."
optimizeHelpCompression :: String
optimizeHelpCompression =
"By default patches are compressed with zlib (RFC 1951) to reduce\n" ++
"storage (and download) size. In exceptional circumstances, it may be\n" ++
"preferable to avoid compression. In this case the `--dont-compress'\n" ++
"option can be used (e.g. with `darcs record') to avoid compression.\n" ++
"\n" ++
"The `darcs optimize --uncompress' and `darcs optimize --compress'\n" ++
"commands can be used to ensure existing patches in the current\n" ++
"repository are respectively uncompressed or compressed. Note that\n" ++
"repositories in the legacy `old-fashioned-inventory' format have a .gz\n" ++
"extension on patch files even when uncompressed.\n"
optimizeCompression :: [DarcsFlag] -> IO ()
optimizeCompression opts = do
putStrLn "Optimizing (un)compression of patches..."
do_compress (darcsdir++"/patches")
putStrLn "Optimizing (un)compression of inventories..."
do_compress (darcsdir++"/inventories")
where do_compress f =
do isd <- doesDirectoryExist f
if isd then withCurrentDirectory f $
do fs <- filter notdot `fmap` getDirectoryContents "."
mapM_ do_compress fs
else if Compress `elem` opts
then gzReadFilePS f >>= gzWriteAtomicFilePS f
else gzReadFilePS f >>= writeAtomicFilePS f
notdot ('.':_) = False
notdot _ = True
optimizeHelpRelink :: String
optimizeHelpRelink =
"The `darcs optimize --relink' command hard-links patches that the\n" ++
"current repository has in common with its peers. Peers are those\n" ++
"repositories listed in _darcs/prefs/sources, or defined with the\n" ++
"`--sibling' option (which can be used multiple times).\n" ++
"\n" ++
"Darcs uses hard-links automatically, so this command is rarely needed.\n" ++
"It is most useful if you used `cp -r' instead of `darcs get' to copy a\n" ++
"repository, or if you pulled the same patch from a remote repository\n" ++
"into multiple local repositories.\n" ++
"\n" ++
"A `darcs optimize --relink-pristine' command is also available, but\n" ++
"generally SHOULD NOT be used. It results in a relatively small space\n" ++
"saving at the cost of making many Darcs commands MUCH slower.\n"
doOptimizePristine :: RepoPatch p => Repository p -> IO ()
doOptimizePristine repo = do
hashed <- doesFileExist $ "_darcs" </> "hashed_inventory"
when hashed $ do
inv <- BS.readFile ("_darcs" </> "hashed_inventory")
let linesInv = BS.split '\n' inv
case linesInv of
[] -> return ()
(pris_line:_) ->
let size = decodeDarcsSize $ BS.drop 9 pris_line
in when (isJust size) $ do putStrLn "Optimizing hashed pristine..."
readRecorded repo >>= replacePristine repo
cleanRepository repo
doRelink :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
doRelink opts repository =
do some_siblings <- return (flagsToSiblings opts)
defrepolist <- getPreflist "defaultrepo"
siblings <- return (map toFilePath some_siblings ++ defrepolist)
if (siblings == [])
then putStrLn "No siblings -- no relinking done."
else do when (Relink `elem` opts) $
do debugMessage "Relinking patches..."
patches <-
(fmap list_slurpy_files) (slurp $ darcsdir++"/patches")
maybeRelinkFiles siblings patches (darcsdir++"/patches")
when (RelinkPristine `elem` opts) $
do pristine <- identifyPristine
case (pristineDirectory pristine) of
(Just d) -> do
debugMessage "Relinking pristine tree..."
c <- slurp_recorded repository
maybeRelinkFiles
siblings (list_slurpy_files c) d
Nothing -> return ()
debugMessage "Done relinking."
return ()
return ()
maybeRelinkFiles :: [String] -> [String] -> String -> IO ()
maybeRelinkFiles src dst dir =
mapM_ (maybeRelinkFile src) (map ((dir ++ "/") ++) dst)
maybeRelinkFile :: [String] -> String -> IO ()
maybeRelinkFile [] _ = return ()
maybeRelinkFile (h:t) f =
do done <- maybeRelink (h ++ "/" ++ f) f
unless done $
maybeRelinkFile t f
return ()
\end{code}
\begin{options}
--reorder-patches
\end{options}
The \verb|--reorder-patches| option causes Darcs to create an optimal
ordering of its internal patch inventory. This may help to produce shorter
`context' lists when sending patches, and may improve performance for some
other operations as well. You should not run \verb!--reorder-patches! on a
repository from which someone may be simultaneously pulling or getting, as
this could lead to repository corruption.
\begin{code}
doReorder :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
doReorder opts _ | not (Reorder `elem` opts) = return ()
doReorder opts repository = do
debugMessage "Reordering the inventory."
psnew <- chooseOrder `fmap` read_repo repository
let ps = mapFL_FL hopefully $ reverseRL $ head $ unsafeUnRL psnew
withGutsOf repository $ do tentativelyReplacePatches repository opts ps
finalizeRepositoryChanges repository
debugMessage "Done reordering the inventory."
chooseOrder :: RepoPatch p => PatchSet p -> PatchSet p
chooseOrder ps | isJust last_tag =
case slightly_optimize_patchset $ unsafeUnseal $ get_patches_in_tag lt ps of
((t:<:NilRL):<:pps) -> case get_patches_beyond_tag lt ps of
FlippedSeal p -> (p+<+(t:<:NilRL)) :<: pps
_ -> impossible
where last_tag = case filter isTag $ mapRL info $ concatRL ps of
(t:_) -> Just t
_ -> Nothing
lt = fromJust last_tag
chooseOrder ps = ps
\end{code}
The \verb|--upgrade| option for \verb!darcs optimize! performs an inplace
upgrade of your repository to the lastest \emph{compatible} format. Right now
means that darcs 1 old-fashioned repositories will be upgraded to darcs-1
hashed repositories (and notably, not to darcs 2 repositories as that would not
be compatible; see \verb!darcs convert!).
\begin{code}
optimizeUpgradeFormat :: IO ()
optimizeUpgradeFormat = do
debugMessage $ "Upgrading to hashed..."
rf <- either fail return =<< identifyRepoFormat "."
debugMessage $ "Found our format"
if formatHas HashedInventory rf
then putStrLn "No action taken because this repository already is hashed."
else do putStrLn "Checking repository in case of corruption..."
withRepoLock [] $- \repository -> do
state <- replayRepository repository [] return
case state of
RepositoryConsistent -> do
putStrLn "The repository is consistent."
actuallyUpgradeFormat repository
_repoIsBroken ->
putStrLn "Corruption detected! Please run darcs repair first."
actuallyUpgradeFormat :: RepoPatch p => Repository p C(r u t) -> IO ()
actuallyUpgradeFormat repository = do
patches <- read_repo repository
let k = "Hashing patch"
beginTedious k
tediousSize k (lengthRL $ concatRL patches)
let patches' = mapRL_RL (mapRL_RL (progress k)) patches
cache <- getCaches [] "."
let compr = compression []
HashedRepo.write_tentative_inventory cache compr patches'
endTedious k
let patchesToApply = progressFL "Applying patch" $ reverseRL $ concatRL $ patches'
createDirectoryIfMissing False $ darcsdir </> hashedDir HashedPristineDir
writeFile (darcsdir </> hashedDir HashedPristineDir </> sha1PS B.empty) ""
sequence_ $ mapFL (HashedRepo.apply_to_tentative_pristine cache []) $ bunchFL 100 patchesToApply
HashedRepo.finalize_tentative_changes repository compr
writeRepoFormat (createRepoFormat [UseHashedInventory]) (darcsdir </> "format")
debugMessage "Cleaning out old-fashioned repository files..."
removeFile $ darcsdir </> "inventory"
removeFile $ darcsdir </> "tentative_inventory"
rm_recursive (darcsdir </> "pristine") `catchall` rm_recursive (darcsdir </> "current")
rmGzsIn (darcsdir </> "patches")
rmGzsIn (darcsdir </> "inventories")
let checkpointDir = darcsdir </> "checkpoints"
hasCheckPoints <- doesDirectoryExist checkpointDir
when hasCheckPoints $ rm_recursive checkpointDir
putStrLn "Done upgrading!"
where
rmGzsIn dir =
withCurrentDirectory dir $ do
gzs <- filter ((== ".gz") . takeExtension) `fmap` getDirectoryContents "."
mapM_ removeFile gzs
\end{code}