% Copyright (C) 2002-2005 David Roundy % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2, or (at your option) % any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; see the file COPYING. If not, write to % the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, % Boston, MA 02110-1301, USA. \darcsCommand{replace} \begin{code} {-# OPTIONS_GHC -cpp #-} {-# LANGUAGE CPP #-} module Darcs.Commands.Replace ( replace ) where import Data.Maybe ( isJust ) import Control.Monad ( unless ) import Darcs.Commands ( DarcsCommand(DarcsCommand, command_name, command_help, command_description, command_extra_args, command_extra_arg_help, command_command, command_prereq, command_get_arg_possibilities, command_argdefaults, command_advanced_options, command_basic_options), nodefaults ) import Darcs.Arguments ( DarcsFlag(ForceReplace, Toks), list_registered_files, ignoretimes, umask_option, tokens, force_replace, working_repo_dir, fixSubPaths ) import Darcs.Repository ( withRepoLock, ($-), add_to_pending, slurp_pending, amInRepository, slurp_recorded_and_unrecorded, applyToWorking, ) import Darcs.Patch ( Prim, apply_to_slurpy, tokreplace, force_replace_slurpy ) import Darcs.Ordered ( FL(..), unsafeFL, (+>+), concatFL ) import Darcs.SlurpDirectory ( slurp_hasfile, Slurpy ) import RegChars ( regChars ) import Data.Char ( isSpace ) import Darcs.Diff ( unsafeDiff ) import Darcs.RepoPath ( SubPath, sp2fn, toFilePath ) import Darcs.Repository.Prefs ( FileType(TextFile) ) #include "impossible.h" replace_description :: String replace_description = "Substitute one word for another." replace_help :: String replace_help = "In addition to line-based patches, Darcs supports a limited form of\n" ++ "lexical substitution. Files are treated as sequences of words, and\n" ++ "each occurrence of the old word is replaced by the new word.\n" ++ "This is intended to provide a clean way to rename a function or\n" ++ "variable. Such renamings typically affect lines all through the\n" ++ "source code, so a traditional line-based patch would be very likely to\n" ++ "conflict with other branches, requiring manual merging.\n" ++ "\n" ++ "Files are tokenized according to one simple rule: words are strings of\n" ++ "valid token characters, and everything between them (punctuation and\n" ++ "whitespace) is discarded.\n" ++ "\n" ++ "The tokenizer treats files as byte strings, so it is not possible for\n" ++ "--token-chars to include multi-byte characters, such as the non-ASCII\n" ++ "parts of UTF-8. Similarly, trying to replace a `high-bit' character\n" ++ "from a unibyte encoding will also result in replacement of the same\n" ++ "byte in files with different encodings. For example, an acute a from\n" ++ "ISO 8859-1 will also match an alpha from ISO 8859-7.\n" ++ "\n" ++ -- FIXME: this heuristic is ham-fisted and silly. Can we drop it? "By default, valid token characters are letters, numbers and the\n" ++ "underscore (i.e. [A-Za-z0-9_]). However if the old and/or new token\n" ++ "contains either a hyphen or period, BOTH hyphen and period are treated\n" ++ "as valid by default (i.e. [A-Za-z0-9_.-]).\n" ++ "\n" ++ "The set of valid characters can be customized using the --token-chars\n" ++ "option. The argument must be surrounded by square brackets. If a\n" ++ "hyphen occurs between two characters in the set, it is treated as a\n" ++ "set range. For example, in most locales [A-Z] denotes all uppercase\n" ++ "letters. If the first character is a caret, valid tokens are taken to\n" ++ "be the complement of the remaining characters. For example, [^ \\n\\t]\n" ++ "declares all characters except the space, tab and newline as valid\n" ++ "within a word. Unlike the tr(1) and grep(1) utilities, character\n" ++ "classes (such as [[:alnum:]]) are NOT supported.\n" ++ "\n" ++ "If you choose to use --token-chars, you are STRONGLY encouraged to do\n" ++ "so consistently. The consequences of using multiple replace patches\n" ++ "with different --token-chars arguments on the same file are not well\n" ++ "tested nor well understood.\n" ++ "\n" ++ "By default Darcs will refuse to perform a replacement if the new token\n" ++ "is already in use, because the replacements would be not be\n" ++ "distinguishable from the existing tokens. This behaviour can be\n" ++ "overridden by supplying the --force option, but an attempt to `darcs\n" ++ "rollback' the resulting patch will affect these existing tokens.\n" -- FIXME: can we just delete the remaining text? It seems more an -- instance of "look how clever I am; I made commutation work" rather -- than information that is actually useful to users. \end{code} There is a potentially confusing difference, however, when a replace is used to make another replace possible: \begin{verbatim} $ darcs replace newtoken aaack ./foo.c $ darcs replace oldtoken newtoken ./foo.c $ darcs record \end{verbatim} will be valid, even if \verb!newtoken! and \verb!oldtoken! are both present in the recorded version of foo.c, while the sequence \begin{verbatim} $ [manually edit foo.c replacing newtoken with aaack] $ darcs replace oldtoken newtoken ./foo.c \end{verbatim} will fail because ``newtoken'' still exists in the recorded version of \verb!foo.c!. The reason for the difference is that when recording, a ``replace'' patch always is recorded \emph{before} any manual changes, which is usually what you want, since often you will introduce new occurrences of the ``newtoken'' in your manual changes. In contrast, multiple ``replace'' changes are recorded in the order in which they were made. \begin{code} replace :: DarcsCommand replace = DarcsCommand {command_name = "replace", command_help = replace_help, command_description = replace_description, command_extra_args = -1, command_extra_arg_help = ["","", " ..."], command_command = replace_cmd, command_prereq = amInRepository, command_get_arg_possibilities = list_registered_files, command_argdefaults = nodefaults, command_advanced_options = [ignoretimes, umask_option], command_basic_options = [tokens, force_replace, working_repo_dir]} replace_cmd :: [DarcsFlag] -> [String] -> IO () replace_cmd opts (old:new:relfs) = withRepoLock opts $- \repository -> do fs <- fixSubPaths opts relfs toks <- choose_toks opts old new let checkToken tok = unless (is_tok toks tok) $ fail $ "'"++tok++"' is not a valid token!" checkToken old checkToken new (_, work) <- slurp_recorded_and_unrecorded repository cur <- slurp_pending repository pswork <- (concatFL . unsafeFL) `fmap` sequence (map (repl toks cur work) fs) add_to_pending repository pswork applyToWorking repository opts pswork `catch` \e -> fail $ "Can't do replace on working!\n" ++ "Perhaps one of the files already contains '"++ new++"'?\n" ++ show e where ftf _ = TextFile repl :: String -> Slurpy -> Slurpy -> SubPath -> IO (FL Prim) repl toks cur work f = if not $ slurp_hasfile (sp2fn f) work then do putStrLn $ "Skipping file '"++f_fp++"' which isn't in the repository." return NilFL else if ForceReplace `elem` opts || isJust (apply_to_slurpy (tokreplace f_fp toks old new) work) || isJust (apply_to_slurpy (tokreplace f_fp toks old new) cur) then return (get_force_replace f toks work) else do putStrLn $ "Skipping file '"++f_fp++"'" putStrLn $ "Perhaps the recorded version of this " ++ "file already contains '" ++new++"'?" putStrLn $ "Use the --force option to override." return NilFL where f_fp = toFilePath f get_force_replace :: SubPath -> String -> Slurpy -> FL Prim get_force_replace f toks s = case force_replace_slurpy (tokreplace f_fp toks new old) s of Nothing -> bug "weird forcing bug in replace." Just s' -> case unsafeDiff [] ftf s s' of pfix -> pfix +>+ (tokreplace f_fp toks old new :>: NilFL) where f_fp = toFilePath f replace_cmd _ _ = fail "Usage: darcs replace OLD NEW [FILES]" default_toks :: String default_toks = "A-Za-z_0-9" filename_toks :: String filename_toks = "A-Za-z_0-9\\-\\." -- | Given a set of characters and a string, returns true iff the -- string contains only characters from the set. A set beginning with -- a caret (@^@) is treated as a complementary set. is_tok :: String -> String -> Bool is_tok _ "" = False is_tok toks s = and $ map (regChars toks) s -- | This function checks for @--token-chars@ on the command-line. If -- found, it validates the argument and returns it, without the -- surrounding square brackets. Otherwise, it returns either -- 'default_toks' or 'filename_toks' as explained in 'replace_help'. -- -- Note: Limitations in the current replace patch file format prevents -- tokens and token-char specifiers from containing any whitespace. choose_toks :: [DarcsFlag] -> String -> String -> IO String choose_toks (Toks t:_) a b | length t <= 2 = bad_token_spec $ "It must contain more than 2 characters, because " ++ "it should be enclosed in square brackets" | head t /= '[' || last t /= ']' = bad_token_spec "It should be enclosed in square brackets" | '^' == head tok && length tok == 1 = bad_token_spec "Must be at least one character in the complementary set" | any isSpace t = bad_token_spec "Space is not allowed in the spec" | any isSpace a = bad_token_spec $ spacey_token a | any isSpace b = bad_token_spec $ spacey_token b | not (is_tok tok a) = bad_token_spec $ not_a_token a | not (is_tok tok b) = bad_token_spec $ not_a_token b | otherwise = return tok where tok = init $ tail t :: String bad_token_spec msg = fail $ "Bad token spec: '"++ t ++"' ("++ msg ++")" spacey_token x = x ++ " must not contain any space" not_a_token x = x ++ " is not a token, according to your spec" choose_toks (_:fs) a b = choose_toks fs a b choose_toks [] a b = if is_tok default_toks a && is_tok default_toks b then return default_toks else return filename_toks \end{code}