%  Copyright (C) 2003-2005 David Roundy
%
%  This program is free software; you can redistribute it and/or modify
%  it under the terms of the GNU General Public License as published by
%  the Free Software Foundation; either version 2, or (at your option)
%  any later version.
%
%  This program is distributed in the hope that it will be useful,
%  but WITHOUT ANY WARRANTY; without even the implied warranty of
%  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%  GNU General Public License for more details.
%
%  You should have received a copy of the GNU General Public License
%  along with this program; see the file COPYING.  If not, write to
%  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
%  Boston, MA 02110-1301, USA.

\subsection{darcs optimize}
\begin{code}
{-# OPTIONS_GHC -cpp #-}
{-# LANGUAGE CPP #-}

module Darcs.Commands.Optimize ( optimize ) where
import Control.Monad ( when, unless )
import Data.Maybe ( isJust )
import Text.Regex ( mkRegex, matchRegex )
import System.Directory ( getDirectoryContents, doesDirectoryExist )

import Darcs.Hopefully ( hopefully, info )
import Darcs.Commands ( DarcsCommand(..), nodefaults )
import Darcs.Arguments ( DarcsFlag( Compress, UnCompress,
                                    NoCompress, Reorder,
                                    TagName, CheckPoint,
                                    Relink, RelinkPristine ),
                        tagname, checkpoint, reorder_patches,
                        uncompress_nocompress,
                        relink, relink_pristine, sibling,
                        flagsToSiblings,
                        working_repo_dir, umask_option,
                      )
import Darcs.Repository.Prefs ( get_preflist )
import Darcs.Repository ( Repository, PatchSet, withRepoLock, ($-), withGutsOf,
                          read_repo, optimizeInventory, slurp_recorded,
                          tentativelyReplacePatches, cleanRepository,
                          amInRepository, finalizeRepositoryChanges )
import Darcs.Repository.Checkpoint ( write_checkpoint )
import Darcs.Ordered ( RL(..), unsafeUnRL, (+<+), mapFL_FL, reverseRL, mapRL, concatRL )
import Darcs.Patch.Info ( PatchInfo, just_name, human_friendly )
import Darcs.Patch ( RepoPatch )
import ByteStringUtils ( gzReadFilePS )
import Darcs.Patch.Depends ( deep_optimize_patchset, slightly_optimize_patchset,
                 get_patches_beyond_tag, get_patches_in_tag,
               )
import Darcs.Lock ( maybeRelink, gzWriteAtomicFilePS, writeAtomicFilePS )
import Darcs.RepoPath ( toFilePath )
import Darcs.Utils ( withCurrentDirectory )
import Progress ( debugMessage )
import Printer ( putDocLn, text, ($$) )
import Darcs.SlurpDirectory ( slurp, list_slurpy_files )
import Darcs.Repository.Pristine ( identifyPristine, pristineDirectory )
import Darcs.Sealed ( FlippedSeal(..), unsafeUnseal )
import Darcs.Global ( darcsdir )
#include "impossible.h"

optimize_description :: String
optimize_description =
 "Optimize the repository."
\end{code}

\options{optimize}

\haskell{optimize_help}

\begin{code}
optimize_help :: String
optimize_help =
 "Optimize can help to improve the performance of your repository in a number of cases.\n"

optimize :: DarcsCommand
optimize = DarcsCommand {command_name = "optimize",
                         command_help = optimize_help,
                         command_description = optimize_description,
                         command_extra_args = 0,
                         command_extra_arg_help = [],
                         command_command = optimize_cmd,
                         command_prereq = amInRepository,
                         command_get_arg_possibilities = return [],
                         command_argdefaults = nodefaults,
                         command_advanced_options = [uncompress_nocompress, umask_option],
                         command_basic_options = [checkpoint,
                                                 tagname,
                                                 working_repo_dir,
                                                 reorder_patches,
                                                 sibling, relink,
                                                 relink_pristine]}

optimize_cmd :: [DarcsFlag] -> [String] -> IO ()
optimize_cmd origopts _ = withRepoLock opts $- \repository -> do
    cleanRepository repository
    do_reorder opts repository
    do_optimize_inventory repository
    when (CheckPoint `elem` opts) $ do_checkpoint opts repository
    when (Compress `elem` opts || UnCompress `elem` opts) $ optimize_compression opts
    when (Relink `elem` opts || (RelinkPristine `elem` opts)) $
        do_relink opts repository
    putStrLn "Done optimizing!"
  where opts = if UnCompress `elem` origopts then NoCompress:origopts else origopts
is_tag :: PatchInfo -> Bool
is_tag pinfo = take 4 (just_name pinfo) == "TAG "
\end{code}

Optimize always writes out a fresh copy of the inventory that minimizes
the amount of inventory that need be downloaded when people pull from the
repository.

Specifically, it breaks up the inventory on the most recent tag.  This speeds
up most commands when run remotely, both because a smaller file needs to be
transfered (only the most recent inventory).  It also gives a
guarantee that all the patches prior to a given tag are included in that tag,
so less commutation and history traversal is needed.  This latter issue can
become very important in large repositories.

\begin{code}
do_optimize_inventory :: RepoPatch p => Repository p -> IO ()
do_optimize_inventory repository = do
    debugMessage "Writing out a nice copy of the inventory."
    optimizeInventory repository
    debugMessage "Done writing out a nice copy of the inventory."
\end{code}

\begin{options}
--checkpoint, --tag
\end{options}

If you use the \verb!--checkpoint! option, optimize creates a checkpoint patch
for a tag.  You can specify the tag with the \verb!--tag! option, or
just let darcs choose the most recent tag.  Note that optimize
\verb!--checkpoint! will fail when used on a ``partial'' repository.  Also,
the tag that is to be checkpointed must not be preceded by any patches
that are not included in that tag.  If that is the case, no checkpointing
is done.

The created checkpoint is used by the \verb!--partial! flag to 
\verb!get! and \verb!check!. This allows for users to retrieve
a working repository with limited history with a savings of disk
space and bandwidth. 



\begin{code}
do_checkpoint :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
do_checkpoint opts repository = do
    mpi <- get_tag opts repository
    case mpi of
      Nothing -> return ()
      Just pinfo -> do putDocLn $ text "Checkpointing tag:"
                               $$ human_friendly pinfo
                       write_checkpoint repository pinfo

get_tag :: RepoPatch p => [DarcsFlag] -> Repository p -> IO (Maybe PatchInfo)
get_tag [] r = do ps <- read_repo r
                  case filter is_tag $ lasts $ mapRL (mapRL info) ps of
                      [] -> do putStrLn "There is no tag to checkpoint!"
                               return Nothing
                      (pinfo:_) -> return $ Just pinfo
get_tag (TagName t:_) r =
    do ps <- read_repo r
       case filter (match_tag t) $ lasts $ mapRL (mapRL info) ps of
         (pinfo:_) -> return $ Just pinfo
         _ -> case filter (match_tag t) $
                   lasts $ mapRL (mapRL info) $ deep_optimize_patchset ps of
              (pinfo:_) -> return $ Just pinfo
              _ -> do putStr "Cannot checkpoint any tag "
                      putStr $ "matching '"++t++"'\n"
                      return Nothing
get_tag (_:fs) r = get_tag fs r

lasts :: [[a]] -> [a]
lasts [] = []
lasts (x@(_:_):ls) = last x : lasts ls
lasts ([]:ls) = lasts ls

mymatch :: String -> PatchInfo -> Bool
mymatch r = match_name $ matchRegex (mkRegex r)
match_name :: (String -> Maybe a) -> PatchInfo -> Bool
match_name ch pinfo = isJust $ ch (just_name pinfo)
match_tag :: String -> PatchInfo -> Bool
match_tag ('^':n) = mymatch $ "^TAG "++n
match_tag n = mymatch $ "^TAG .*"++n
\end{code}

\begin{options}
--compress, --dont-compress, --uncompress
\end{options}

Some compression options are available, and are independent of the
\verb!--checkpoint! option.

By default the patches in the repository are compressed. These use less
disk space, which translates into less bandwidth if the repository is accessed
remotely.

Note that in the darcs-1.0 (also known as ``old fashioned inventory'')
repository format, patches will always have the ``.gz'' extension whether
they are compressed or not.

You may want to uncompress the patches when you've got enough disk space but
are running out of physical memory.

If you give the \verb!--compress! option, optimize will compress all the
patches in the repository.  Similarly, if you give the \verb!--uncompress!,
optimize will decompress all the patches in the repository.
\verb!--dont-compress!  means ``don't compress, but don't uncompress
either''. It would be useful if one of the compression options was provided
as a default and you wanted to override it.

\begin{code}
optimize_compression :: [DarcsFlag] -> IO ()
optimize_compression opts = do
    putStrLn "Optimizing (un)compression of patches..."
    do_compress (darcsdir++"/patches")
    putStrLn "Optimizing (un)compression of inventories..."
    do_compress (darcsdir++"/inventories")
    where do_compress f =
              do isd <- doesDirectoryExist f
                 if isd then withCurrentDirectory f $
                             do fs <- filter notdot `fmap` getDirectoryContents "."
                                mapM_ do_compress fs
                        else if Compress `elem` opts
                             then gzReadFilePS f >>= gzWriteAtomicFilePS f
                             else gzReadFilePS f >>= writeAtomicFilePS f
          notdot ('.':_) = False
          notdot _ = True

\end{code}

\begin{options}
--relink
\end{options}

The \verb|--relink| and \verb|--relink-pristine| options cause Darcs
to relink files from a sibling.  See Section \ref{disk-usage}.


\begin{code}
do_relink :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
do_relink opts repository =
    do some_siblings <- return (flagsToSiblings opts)
       defrepolist <- get_preflist "defaultrepo"
       siblings <- return (map toFilePath some_siblings ++ defrepolist)
       if (siblings == []) 
          then putStrLn "No siblings -- no relinking done."
          else do when (Relink `elem` opts) $
                      do debugMessage "Relinking patches..."
                         patches <-
                           (fmap list_slurpy_files) (slurp $ darcsdir++"/patches")
                         maybeRelinkFiles siblings patches (darcsdir++"/patches")
                  when (RelinkPristine `elem` opts) $
                      do pristine <- identifyPristine
                         case (pristineDirectory pristine) of
                             (Just d) -> do
                                 debugMessage "Relinking pristine tree..."
                                 c <- slurp_recorded repository
                                 maybeRelinkFiles
                                     siblings (list_slurpy_files c) d
                             Nothing -> return ()
                  debugMessage "Done relinking."
                  return ()
       return ()

maybeRelinkFiles :: [String] -> [String] -> String -> IO ()
maybeRelinkFiles src dst dir = 
    mapM_ (maybeRelinkFile src) (map ((dir ++ "/") ++) dst)

maybeRelinkFile :: [String] -> String -> IO ()
maybeRelinkFile [] _ = return ()
maybeRelinkFile (h:t) f =
    do done <- maybeRelink (h ++ "/" ++ f) f
       unless done $
           maybeRelinkFile t f
       return ()
\end{code}

\begin{options}
--reorder-patches
\end{options}

The \verb|--reorder-patches| option causes Darcs to create an optimal
ordering of its internal patch inventory. This may help to produce shorter
`context' lists when sending patches, and may improve performance for some
other operations as well.  You should not run \verb!--reorder-patches! on a
repository from which someone may be simultaneously pulling or getting, as
this could lead to repository corruption.

\begin{code}
do_reorder :: RepoPatch p => [DarcsFlag] -> Repository p -> IO ()
do_reorder opts _ | not (Reorder `elem` opts) = return ()
do_reorder opts repository = do
    debugMessage "Reordering the inventory."
    psnew <- choose_order `fmap` read_repo repository
    let ps = mapFL_FL hopefully $ reverseRL $ head $ unsafeUnRL psnew
    withGutsOf repository $ do tentativelyReplacePatches repository opts ps
                               finalizeRepositoryChanges repository
    debugMessage "Done reordering the inventory."

choose_order :: RepoPatch p => PatchSet p -> PatchSet p
choose_order ps | isJust last_tag =
    case slightly_optimize_patchset $ unsafeUnseal $ get_patches_in_tag lt ps of 
    ((t:<:NilRL):<:pps) -> case get_patches_beyond_tag lt ps of
                           FlippedSeal (p :<: NilRL) -> (p+<+(t:<:NilRL)) :<: pps
                           _ -> impossible
    _ -> impossible             
    where last_tag = case filter is_tag $ mapRL info $ concatRL ps of
                     (t:_) -> Just t
                     _ -> Nothing
          lt = fromJust last_tag
choose_order ps = ps
\end{code}