%  Copyright (C) 2002-2005 David Roundy
%
%  This program is free software; you can redistribute it and/or modify
%  it under the terms of the GNU General Public License as published by
%  the Free Software Foundation; either version 2, or (at your option)
%  any later version.
%
%  This program is distributed in the hope that it will be useful,
%  but WITHOUT ANY WARRANTY; without even the implied warranty of
%  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%  GNU General Public License for more details.
%
%  You should have received a copy of the GNU General Public License
%  along with this program; see the file COPYING.  If not, write to
%  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
%  Boston, MA 02110-1301, USA.

\darcsCommand{replace}
\begin{code}
{-# OPTIONS_GHC -cpp #-}
{-# LANGUAGE CPP #-}

module Darcs.Commands.Replace ( replace ) where

import Data.Maybe ( isJust )
import Control.Monad ( unless )

import Darcs.Commands ( DarcsCommand(DarcsCommand, command_name, command_help,
                        command_description, command_extra_args,
                        command_extra_arg_help, command_command, command_prereq,
                        command_get_arg_possibilities, command_argdefaults,
                        command_advanced_options, command_basic_options),
                        nodefaults )
import Darcs.Arguments ( DarcsFlag(ForceReplace, Toks), list_registered_files,
                         ignoretimes, umask_option, tokens, force_replace,
                         working_repo_dir, fixSubPaths )
import Darcs.Repository ( withRepoLock, ($-),
                    add_to_pending, slurp_pending,
                    amInRepository, slurp_recorded_and_unrecorded,
                    applyToWorking,
                  )
import Darcs.Patch ( Prim, apply_to_slurpy, tokreplace, force_replace_slurpy )
import Darcs.Ordered ( FL(..), unsafeFL, (+>+), concatFL )
import Darcs.SlurpDirectory ( slurp_hasfile, Slurpy )
import RegChars ( regChars )
import Data.Char ( isSpace )
import Darcs.Diff ( unsafeDiff )
import Darcs.RepoPath ( SubPath, sp2fn, toFilePath )
import Darcs.Repository.Prefs ( FileType(TextFile) )
#include "impossible.h"

replace_description :: String
replace_description = "Substitute one word for another."

replace_help :: String
replace_help =
 "In addition to line-based patches, Darcs supports a limited form of\n" ++
 "lexical substitution.  Files are treated as sequences of words, and\n" ++
 "each occurrence of the old word is replaced by the new word.\n" ++
 "This is intended to provide a clean way to rename a function or\n" ++
 "variable.  Such renamings typically affect lines all through the\n" ++
 "source code, so a traditional line-based patch would be very likely to\n" ++
 "conflict with other branches, requiring manual merging.\n" ++
 "\n" ++
 "Files are tokenized according to one simple rule: words are strings of\n" ++
 "valid token characters, and everything between them (punctuation and\n" ++
 "whitespace) is discarded.\n" ++
 "\n" ++
 "The tokenizer treats files as byte strings, so it is not possible for\n" ++
 "--token-chars to include multi-byte characters, such as the non-ASCII\n" ++
 "parts of UTF-8.  Similarly, trying to replace a `high-bit' character\n" ++
 "from a unibyte encoding will also result in replacement of the same\n" ++
 "byte in files with different encodings.  For example, an acute a from\n" ++
 "ISO 8859-1 will also match an alpha from ISO 8859-7.\n" ++
 "\n" ++
 -- FIXME: this heuristic is ham-fisted and silly.  Can we drop it?
 "By default, valid token characters are letters, numbers and the\n" ++
 "underscore (i.e. [A-Za-z0-9_]).  However if the old and/or new token\n" ++
 "contains either a hyphen or period, BOTH hyphen and period are treated\n" ++
 "as valid by default (i.e. [A-Za-z0-9_.-]).\n" ++
 "\n" ++
 "The set of valid characters can be customized using the --token-chars\n" ++
 "option.  The argument must be surrounded by square brackets.  If a\n" ++
 "hyphen occurs between two characters in the set, it is treated as a\n" ++
 "set range.  For example, in most locales [A-Z] denotes all uppercase\n" ++
 "letters.  If the first character is a caret, valid tokens are taken to\n" ++
 "be the complement of the remaining characters.  For example, [^ \\n\\t]\n" ++
 "declares all characters except the space, tab and newline as valid\n" ++
 "within a word.  Unlike the tr(1) and grep(1) utilities, character\n" ++
 "classes (such as [[:alnum:]]) are NOT supported.\n" ++
 "\n" ++
 "If you choose to use --token-chars, you are STRONGLY encouraged to do\n" ++
 "so consistently.  The consequences of using multiple replace patches\n" ++
 "with different --token-chars arguments on the same file are not well\n" ++
 "tested nor well understood.\n" ++
 "\n" ++
 "By default Darcs will refuse to perform a replacement if the new token\n" ++
 "is already in use, because the replacements would be not be\n" ++
 "distinguishable from the existing tokens.  This behaviour can be\n" ++
 "overridden by supplying the --force option, but an attempt to `darcs\n" ++
 "rollback' the resulting patch will affect these existing tokens.\n"

-- FIXME: can  we just  delete the remaining  text?  It seems  more an
-- instance of "look how clever  I am; I made commutation work" rather
-- than information that is actually useful to users.
\end{code}
There is a potentially confusing difference, however, when a replace is
used to make another replace possible:
\begin{verbatim}
$ darcs replace newtoken aaack ./foo.c
$ darcs replace oldtoken newtoken ./foo.c
$ darcs record
\end{verbatim}
will be valid, even if \verb!newtoken! and \verb!oldtoken! are both present
in the recorded version of foo.c, while the sequence
\begin{verbatim}
$ [manually edit foo.c replacing newtoken with aaack]
$ darcs replace oldtoken newtoken ./foo.c
\end{verbatim}
will fail because ``newtoken'' still exists in the recorded version of
\verb!foo.c!.  The reason for the difference is that when recording, a
``replace'' patch always is recorded \emph{before} any manual changes,
which is usually what you want, since often you will introduce new
occurrences of the ``newtoken'' in your manual changes.  In contrast,
multiple ``replace'' changes are recorded in the order in which
they were made.
\begin{code}

replace :: DarcsCommand
replace = DarcsCommand {command_name = "replace",
                        command_help = replace_help,
                        command_description = replace_description,
                        command_extra_args = -1,
                        command_extra_arg_help = ["<OLD>","<NEW>",
                                                  "<FILE> ..."],
                        command_command = replace_cmd,
                        command_prereq = amInRepository,
                        command_get_arg_possibilities = list_registered_files,
                        command_argdefaults = nodefaults,
                        command_advanced_options = [ignoretimes, umask_option],
                        command_basic_options =
                            [tokens, force_replace, working_repo_dir]}

replace_cmd :: [DarcsFlag] -> [String] -> IO ()
replace_cmd opts (old:new:relfs) = withRepoLock opts $- \repository -> do
  fs <- fixSubPaths opts relfs
  toks <- choose_toks opts old new
  let checkToken tok =
        unless (is_tok toks tok) $ fail $ "'"++tok++"' is not a valid token!"
  checkToken old
  checkToken new
  (_, work) <- slurp_recorded_and_unrecorded repository
  cur <- slurp_pending repository
  pswork <- (concatFL . unsafeFL) `fmap` sequence (map (repl toks cur work) fs)
  add_to_pending repository pswork
  applyToWorking repository opts pswork `catch` \e ->
      fail $ "Can't do replace on working!\n"
          ++ "Perhaps one of the files already contains '"++ new++"'?\n"
          ++ show e
  where ftf _ = TextFile

        repl :: String -> Slurpy -> Slurpy -> SubPath -> IO (FL Prim)
        repl toks cur work f =
          if not $ slurp_hasfile (sp2fn f) work
          then do putStrLn $ "Skipping file '"++f_fp++"' which isn't in the repository."
                  return NilFL
          else if ForceReplace `elem` opts ||
                  isJust (apply_to_slurpy (tokreplace f_fp toks old new) work) ||
                  isJust (apply_to_slurpy (tokreplace f_fp toks old new) cur)
               then return (get_force_replace f toks work)
               else do putStrLn $ "Skipping file '"++f_fp++"'"
                       putStrLn $ "Perhaps the recorded version of this " ++
                                  "file already contains '" ++new++"'?"
                       putStrLn $ "Use the --force option to override."
                       return NilFL
          where f_fp = toFilePath f

        get_force_replace :: SubPath -> String -> Slurpy -> FL Prim
        get_force_replace f toks s =
            case force_replace_slurpy (tokreplace f_fp toks new old) s of
            Nothing -> bug "weird forcing bug in replace."
            Just s' -> case unsafeDiff [] ftf s s' of
                       pfix -> pfix +>+ (tokreplace f_fp toks old new :>: NilFL)
            where f_fp = toFilePath f

replace_cmd _ _ = fail "Usage: darcs replace OLD NEW [FILES]"

default_toks :: String
default_toks = "A-Za-z_0-9"
filename_toks :: String
filename_toks = "A-Za-z_0-9\\-\\."

-- | Given a set of characters and a string, returns true iff the
-- string contains only characters from the set.  A set beginning with
-- a caret (@^@) is treated as a complementary set.
is_tok :: String -> String -> Bool
is_tok _ "" = False
is_tok toks s = and $ map (regChars toks) s

-- | This function checks for @--token-chars@ on the command-line.  If
-- found, it validates the argument and returns it, without the
-- surrounding square brackets.  Otherwise, it returns either
-- 'default_toks' or 'filename_toks' as explained in 'replace_help'.
-- 
-- Note: Limitations in the current replace patch file format prevents
-- tokens and token-char specifiers from containing any whitespace.
choose_toks :: [DarcsFlag] -> String -> String -> IO String
choose_toks (Toks t:_) a b
    | length t <= 2 =
        bad_token_spec $ "It must contain more than 2 characters, because " ++
                         "it should be enclosed in square brackets"
    | head t /= '[' || last t /= ']' =
        bad_token_spec "It should be enclosed in square brackets"
    | '^' == head tok && length tok == 1 =
        bad_token_spec "Must be at least one character in the complementary set"
    | any isSpace t =
        bad_token_spec "Space is not allowed in the spec"
    | any isSpace a = bad_token_spec $ spacey_token a
    | any isSpace b = bad_token_spec $ spacey_token b
    | not (is_tok tok a) = bad_token_spec $ not_a_token a
    | not (is_tok tok b) = bad_token_spec $ not_a_token b
    | otherwise          = return tok
    where tok = init $ tail t :: String
          bad_token_spec msg = fail $ "Bad token spec: '"++ t ++"' ("++ msg ++")"
          spacey_token x = x ++ " must not contain any space"
          not_a_token x = x ++ " is not a token, according to your spec"
choose_toks (_:fs) a b = choose_toks fs a b
choose_toks [] a b = if is_tok default_toks a && is_tok default_toks b
                     then return default_toks
                     else return filename_toks
\end{code}