% Copyright (C) 2003-2005 David Roundy % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2, or (at your option) % any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; see the file COPYING. If not, write to % the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, % Boston, MA 02110-1301, USA. \darcsCommand{optimize} \begin{code}
{-# OPTIONS_GHC -cpp #-}
{-# LANGUAGE CPP #-}

module Darcs.Commands.Optimize ( optimize ) where
import Control.Monad ( when, unless )
import Data.Maybe ( isJust )
import System.Directory ( getDirectoryContents, doesDirectoryExist, doesFileExist )
import qualified Data.ByteString.Char8 as BS

import Storage.Hashed.Darcs( decodeDarcsSize )

import Darcs.Hopefully ( hopefully, info )
import Darcs.Commands ( DarcsCommand(..), nodefaults )
import Darcs.Arguments ( DarcsFlag( UpgradeFormat, UseHashedInventory,
                                    Compress, UnCompress,
                                    NoCompress, Reorder,
                                    Relink, RelinkPristine, OptimizePristine ),
                        reorderPatches,
                        uncompressNocompress,
                        relink, relinkPristine, sibling,
                        flagsToSiblings,
                        upgradeFormat,
                        workingRepoDir, umaskOption, optimizePristine
                      )
import Darcs.Repository.Prefs ( getPreflist )
import Darcs.Repository ( Repository, PatchSet, withRepoLock, ($-), withGutsOf,
                          read_repo, optimizeInventory, slurp_recorded,
                          tentativelyReplacePatches, cleanRepository,
                          amInRepository, finalizeRepositoryChanges, replacePristine )
import Darcs.Witnesses.Ordered ( RL(..), unsafeUnRL, (+<+), mapFL_FL, reverseRL, mapRL, concatRL )
import Darcs.Patch.Info ( PatchInfo, just_name )
import Darcs.Patch ( RepoPatch )
import ByteStringUtils ( gzReadFilePS )
import Darcs.Patch.Depends ( slightly_optimize_patchset,
                 get_patches_beyond_tag, get_patches_in_tag,
               )
import Darcs.Lock ( maybeRelink, gzWriteAtomicFilePS, writeAtomicFilePS )
import Darcs.RepoPath ( toFilePath )
import Darcs.Utils ( withCurrentDirectory )
import Progress ( debugMessage )
import Darcs.SlurpDirectory ( slurp, list_slurpy_files )
import Darcs.Repository.Pristine ( identifyPristine, pristineDirectory )
import Darcs.Witnesses.Sealed ( FlippedSeal(..), unsafeUnseal )
import Darcs.Global ( darcsdir )
#include "impossible.h"
-- imports for optimize --upgrade; to be tidied
import qualified Data.ByteString as B (empty)
import System.Directory ( createDirectoryIfMissing, removeFile )
import System.FilePath.Posix ( takeExtension, (</>) )

import Progress ( beginTedious, endTedious, tediousSize, progress )
import SHA1 ( sha1PS )
import Darcs.Flags ( compression )
import Darcs.Lock ( rm_recursive )
import Darcs.Witnesses.Ordered ( mapFL, mapRL_RL, bunchFL, lengthRL )
import Darcs.ProgressPatches ( progressFL )
import Darcs.Repository.Cache ( hashedDir, HashedDir(HashedPristineDir) )
import Darcs.Repository.Format ( identifyRepoFormat,
                                 createRepoFormat, writeRepoFormat, formatHas,
                                 RepoProperty ( HashedInventory ) )
import qualified Darcs.Repository.HashedRepo as HashedRepo
import Darcs.Repository.Prefs ( getCaches )
import Darcs.Repository.Repair ( replayRepository, RepositoryConsistency(..) )
import Darcs.Repository.State ( readRecorded )
import Darcs.Utils ( catchall )
#include "gadts.h"

optimizeDescription :: String
optimizeDescription = "Optimize the repository."

optimizeHelp :: String
optimizeHelp =
 "The `darcs optimize' command modifies the current repository in an\n" ++
 "attempt to reduce its resource requirements.  By default a single\n" ++
 "fast, safe optimization is performed; additional optimization\n" ++
 "techniques can be enabled by passing options to `darcs optimize'.\n" ++
 "\n" ++ optimizeHelpInventory ++
 -- "\n" ++ optimize_help_reorder ++
 "\n" ++ optimizeHelpRelink ++
 -- uncompression is least useful, so it is last.
 "\n" ++ optimizeHelpCompression ++
 "\n" ++
 "There is one more optimization which CAN NOT be performed by this\n" ++
 "command.  Every time your record a patch, a new inventory file is\n" ++
 "written to _darcs/inventories/, and old inventories are never reaped.\n" ++
 "\n" ++
 "If _darcs/inventories/ is consuming a relatively large amount of\n" ++
 "space, you can safely reclaim it by using `darcs get' to make a\n" ++
 "complete copy of the repo.  When doing so, don't forget to copy over\n" ++
 "any unsaved changes you have made to the working tree or to\n" ++
 "unversioned files in _darcs/prefs/ (such as _darcs/prefs/author).\n"

optimize :: DarcsCommand
optimize = DarcsCommand {commandName = "optimize",
                         commandHelp = optimizeHelp,
                         commandDescription = optimizeDescription,
                         commandExtraArgs = 0,
                         commandExtraArgHelp = [],
                         commandCommand = optimizeCmd,
                         commandPrereq = amInRepository,
                         commandGetArgPossibilities = return [],
                         commandArgdefaults = nodefaults,
                         commandAdvancedOptions = [uncompressNocompress, umaskOption],
                         commandBasicOptions = [workingRepoDir,
                                                 reorderPatches,
                                                 sibling, relink,
                                                 relinkPristine,
                                                  upgradeFormat,
                                                 optimizePristine]}

optimizeCmd :: [DarcsFlag] -> [String] -> IO ()
optimizeCmd origopts _ = do
    when (UpgradeFormat `elem` origopts) optimizeUpgradeFormat
    withRepoLock opts $- \repository -> do
    if (OptimizePristine `elem` opts)
       then doOptimizePristine repository
       else do cleanRepository repository
               doReorder opts repository
               doOptimizeInventory repository
               when (Compress `elem` opts || UnCompress `elem` opts) $
                    optimizeCompression opts
               when (Relink `elem` opts || (RelinkPristine `elem` opts)) $
                    doRelink opts repository
    putStrLn "Done optimizing!"
  where opts = if UnCompress `elem` origopts then NoCompress:origopts else origopts
isTag :: PatchInfo -> Bool
isTag pinfo = take 4 (just_name pinfo) == "TAG "

optimizeHelpInventory :: String
optimizeHelpInventory =
 "The default optimization moves recent patches (those not included in\n" ++
 "the latest tag) to the `front', reducing the amount that a typical\n" ++
 "remote command needs to download.  It should also reduce the CPU time\n" ++
 "needed for some operations.\n"

doOptimizeInventory :: RepoPatch p => Repository p -> IO ()
doOptimizeInventory repository = do
    debugMessage "Writing out a nice copy of the inventory."
    optimizeInventory repository
    debugMessage "Done writing out a nice copy of the inventory."

optimizeHelpCompression :: String
optimizeHelpCompression =
 "By default patches are compressed with zlib (RFC 1951) to reduce\n" ++
 "storage (and download) size.  In exceptional circumstances, it may be\n" ++
 "preferable to avoid compression.  In this case the `--dont-compress'\n" ++
 "option can be used (e.g. with `darcs record') to avoid compression.\n" ++
 "\n" ++
 "The `darcs optimize --uncompress' and `darcs optimize --compress'\n" ++
 "commands can be used to ensure existing patches in the current\n" ++
 "repository are respectively uncompressed or compressed.  Note that\n" ++
 "repositories in the legacy `old-fashioned-inventory' format have a .gz\n" ++
 "extension on patch files even when uncompressed.\n"

optimizeCompression :: [DarcsFlag] -> IO ()
optimizeCompression opts = do
    putStrLn "Optimizing (un)compression of patches..."
    do_compress (darcsdir++"/patches")
    putStrLn "Optimizing (un)compression of inventories..."
    do_compress (darcsdir++"/inventories")
    where do_compress f =
              do isd <- doesDirectoryExist f
                 if isd then withCurrentDirectory f $
                             do fs <- filter notdot `fmap` getDirectoryContents "."
                                mapM_ do_compress fs
                        else if Compress `elem` opts
                             then gzReadFilePS f >>= gzWriteAtomicFilePS f
                             else gzReadFilePS f >>= writeAtomicFilePS f
          notdot ('.':_) = False
          notdot _ = True

optimizeHelpRelink :: String
optimizeHelpRelink =
 "The `darcs optimize --relink' command hard-links patches that the\n" ++
 "current repository has in common with its peers.  Peers are those\n" ++
 "repositories listed in _darcs/prefs/sources, or defined with the\n" ++
 "`--sibling' option (which can be used multiple times).\n" ++
 "\n" ++
 "Darcs uses hard-links automatically, so this command is rarely needed.\n" ++
 "It is most useful if you used `cp -r' instead of `darcs get' to copy a\n" ++
 "repository, or if you pulled the same patch from a remote repository\n" ++
 "into multiple local repositories.\n" ++
 "\n" ++
 "A `darcs optimize --relink-pristine' command is also available, but\n" ++
 "generally SHOULD NOT be used.  It results in a relatively small space\n" ++
 "saving at the cost of making many Darcs commands MUCH slower.\n"

doOptimizePristine :: RepoPatch p => Repository p -> IO ()
doOptimizePristine repo = do
  hashed <- doesFileExist $ "_darcs" </> "hashed_inventory"
  when hashed $ do
    inv <- BS.readFile ("_darcs" </> "hashed_inventory")
    let linesInv = BS.split '\n' inv
    case linesInv of
      [] -> return ()
      (pris_line:_) ->
          let size = decodeDarcsSize $ BS.drop 9 pris_line
           in when (isJust size) $ do putStrLn "Optimizing hashed pristine..."
                                      readRecorded repo >>= replacePristine repo
                                      cleanRepository repo

doRelink :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
doRelink opts repository =
    do some_siblings <- return (flagsToSiblings opts)
       defrepolist <- getPreflist "defaultrepo"
       siblings <- return (map toFilePath some_siblings ++ defrepolist)
       if (siblings == []) 
          then putStrLn "No siblings -- no relinking done."
          else do when (Relink `elem` opts) $
                      do debugMessage "Relinking patches..."
                         patches <-
                           (fmap list_slurpy_files) (slurp $ darcsdir++"/patches")
                         maybeRelinkFiles siblings patches (darcsdir++"/patches")
                  when (RelinkPristine `elem` opts) $
                      do pristine <- identifyPristine
                         case (pristineDirectory pristine) of
                             (Just d) -> do
                                 debugMessage "Relinking pristine tree..."
                                 c <- slurp_recorded repository
                                 maybeRelinkFiles
                                     siblings (list_slurpy_files c) d
                             Nothing -> return ()
                  debugMessage "Done relinking."
                  return ()
       return ()

maybeRelinkFiles :: [String] -> [String] -> String -> IO ()
maybeRelinkFiles src dst dir = 
    mapM_ (maybeRelinkFile src) (map ((dir ++ "/") ++) dst)

maybeRelinkFile :: [String] -> String -> IO ()
maybeRelinkFile [] _ = return ()
maybeRelinkFile (h:t) f =
    do done <- maybeRelink (h ++ "/" ++ f) f
       unless done $
           maybeRelinkFile t f
       return ()


\end{code} \begin{options} --reorder-patches \end{options} The \verb|--reorder-patches| option causes Darcs to create an optimal ordering of its internal patch inventory. This may help to produce shorter `context' lists when sending patches, and may improve performance for some other operations as well. You should not run \verb!--reorder-patches! on a repository from which someone may be simultaneously pulling or getting, as this could lead to repository corruption. \begin{code}

-- FIXME: someone needs to grovel through the source and determine
-- just how optimizeInventory differs from doReorder.  The following
-- is purely speculation. --twb, 2009-04
-- optimize_help_reorder :: String
-- optimize_help_reorder =
--  "The `darcs optimize --reorder' command is a more comprehensive version\n" ++
--  "of the default optimization.  It reorders patches with respect to ALL\n" ++
--  "tags, rather than just the latest tag.\n"

doReorder :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
doReorder opts _ | not (Reorder `elem` opts) = return ()
doReorder opts repository = do
    debugMessage "Reordering the inventory."
    psnew <- chooseOrder `fmap` read_repo repository
    let ps = mapFL_FL hopefully $ reverseRL $ head $ unsafeUnRL psnew
    withGutsOf repository $ do tentativelyReplacePatches repository opts ps
                               finalizeRepositoryChanges repository
    debugMessage "Done reordering the inventory."

chooseOrder :: RepoPatch p => PatchSet p -> PatchSet p
chooseOrder ps | isJust last_tag =
    case slightly_optimize_patchset $ unsafeUnseal $ get_patches_in_tag lt ps of 
    ((t:<:NilRL):<:pps) -> case get_patches_beyond_tag lt ps of
                           FlippedSeal p -> (p+<+(t:<:NilRL)) :<: pps
    _ -> impossible             
    where last_tag = case filter isTag $ mapRL info $ concatRL ps of
                     (t:_) -> Just t
                     _ -> Nothing
          lt = fromJust last_tag
chooseOrder ps = ps
\end{code} The \verb|--upgrade| option for \verb!darcs optimize! performs an inplace upgrade of your repository to the lastest \emph{compatible} format. Right now means that darcs 1 old-fashioned repositories will be upgraded to darcs-1 hashed repositories (and notably, not to darcs 2 repositories as that would not be compatible; see \verb!darcs convert!). \begin{code}
optimizeUpgradeFormat :: IO ()
optimizeUpgradeFormat = do
  debugMessage $ "Upgrading to hashed..."
  rf <- either fail return =<< identifyRepoFormat "."
  debugMessage $ "Found our format"
  if formatHas HashedInventory rf
     then putStrLn "No action taken because this repository already is hashed."
     else do putStrLn "Checking repository in case of corruption..."
             withRepoLock [] $- \repository -> do
             state <- replayRepository repository [] return
             case state of
               RepositoryConsistent -> do
                 putStrLn "The repository is consistent."
                 actuallyUpgradeFormat repository
               _repoIsBroken ->
                 putStrLn "Corruption detected! Please run darcs repair first."

actuallyUpgradeFormat :: RepoPatch p => Repository p C(r u t) -> IO ()
actuallyUpgradeFormat repository = do
  -- convert patches/inventory
  patches <- read_repo repository
  let k = "Hashing patch"
  beginTedious k
  tediousSize k (lengthRL $ concatRL patches)
  let patches' = mapRL_RL (mapRL_RL (progress k)) patches
  cache <- getCaches [] "."
  let compr = compression [] -- default compression
  HashedRepo.write_tentative_inventory cache compr patches'
  endTedious k
  -- convert pristine by applying patches
  -- the faster alternative would be to copy pristine, but the apply method is more reliable
  let patchesToApply = progressFL "Applying patch" $ reverseRL $ concatRL $ patches'
  createDirectoryIfMissing False $ darcsdir </> hashedDir HashedPristineDir
  writeFile (darcsdir </> hashedDir HashedPristineDir </> sha1PS B.empty) ""
  sequence_ $ mapFL (HashedRepo.apply_to_tentative_pristine cache []) $ bunchFL 100 patchesToApply
  -- now make it official
  HashedRepo.finalize_tentative_changes repository compr
  writeRepoFormat (createRepoFormat [UseHashedInventory]) (darcsdir </> "format")
  -- clean out old-fashioned junk
  debugMessage "Cleaning out old-fashioned repository files..."
  removeFile   $ darcsdir </> "inventory"
  removeFile   $ darcsdir </> "tentative_inventory"
  rm_recursive (darcsdir </> "pristine") `catchall` rm_recursive (darcsdir </> "current")
  rmGzsIn (darcsdir </> "patches")
  rmGzsIn (darcsdir </> "inventories")
  let checkpointDir = darcsdir </> "checkpoints"
  hasCheckPoints <- doesDirectoryExist checkpointDir
  when hasCheckPoints $ rm_recursive checkpointDir
  putStrLn "Done upgrading!"
 where
  rmGzsIn dir =
    withCurrentDirectory dir $ do
      gzs <- filter ((== ".gz") . takeExtension) `fmap` getDirectoryContents "."
      mapM_ removeFile gzs
\end{code}