% Copyright (C) 2002-2003 David Roundy % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2, or (at your option) % any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; see the file COPYING. If not, write to % the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, % Boston, MA 02110-1301, USA. \chapter{Diff} \begin{code} {-# OPTIONS_GHC -cpp -fglasgow-exts #-} #include "gadts.h" module Darcs.Diff ( smart_diff, sync, cmp #ifndef GADT_WITNESSES , diff_files #endif ) where import System.Posix ( setFileTimes ) import System.IO ( IOMode(ReadMode), hFileSize, hClose ) import System.Directory ( doesDirectoryExist, doesFileExist, getDirectoryContents, ) import Control.Monad ( liftM, when ) import Data.List ( sort #ifndef GADT_WITNESSES , intersperse #endif ) import FastPackedString ( hGetPS, lengthPS #ifndef GADT_WITNESSES , PackedString, is_funky, nilPS , linesPS, nullPS, lastPS, takePS, #endif ) import Darcs.SlurpDirectory ( Slurpy, slurp_name, is_dir, is_file, get_dircontents, get_filecontents, get_mtime, get_length, undefined_time #ifndef GADT_WITNESSES , FileContents, undefined_size #endif ) #ifndef GADT_WITNESSES import Darcs.FilePathUtils ( (///) ) #endif import Darcs.Patch ( Prim #ifndef GADT_WITNESSES , hunk, canonize, rmfile, rmdir , addfile, adddir , binary, invert #endif ) import System.IO ( openBinaryFile ) import Darcs.Repository.Prefs ( FileType(..) ) import Darcs.Flags ( DarcsFlag(..) ) import Darcs.Utils ( catchall ) import Darcs.Patch.Ordered ( FL(..) #ifndef GADT_WITNESSES , (+>+) #endif ) #ifndef GADT_WITNESSES #include "impossible.h" #endif \end{code} The diff function takes a recursive diff of two slurped-up directory trees. The code involved is actually pretty trivial. \verb!paranoid_diff! runs a diff in which we don't make the assumption that files with the same modification time are identical. \begin{code} smart_diff :: [DarcsFlag] -> (FilePath -> FileType) -> Slurpy -> Slurpy -> FL Prim C(x y) #ifdef GADT_WITNESSES smart_diff = undefined #else smart_diff opts wt s1 s2 = gendiff (ignore_times, look_for_adds, summary) wt [] s1 s2 NilFL where ignore_times = IgnoreTimes `elem` opts look_for_adds = LookForAdds `elem` opts -- NoSummary/Summary both present gives False -- Just Summary gives True -- Just NoSummary gives False -- Neither gives False summary = Summary `elem` opts && NoSummary `notElem` opts mk_filepath :: [FilePath] -> FilePath mk_filepath fps = concat $ intersperse "/" $ reverse fps gendiff :: (Bool,Bool,Bool) -> (FilePath -> FileType) -> [FilePath] -> Slurpy -> Slurpy -> (FL Prim -> FL Prim) gendiff opts@(isparanoid,_,_) wt fps s1 s2 | is_file s1 && is_file s2 && maybe_differ = case wt n2 of TextFile -> diff_files f b1 b2 BinaryFile -> if b1 /= b2 then (binary f b1 b2:>:) else id | is_dir s1 && is_dir s2 = let fps' = case n2 of "." -> fps _ -> n2:fps in fps' `seq` recur_diff opts (wt . (n2///)) fps' dc1 dc2 | otherwise = id where n2 = slurp_name s2 f = mk_filepath (n2:fps) b1 = get_filecontents s1 b2 = get_filecontents s2 dc1 = get_dircontents s1 dc2 = get_dircontents s2 maybe_differ = isparanoid || get_mtime s1 == undefined_time || get_mtime s1 /= get_mtime s2 || get_length s1 == undefined_size || get_length s1 /= get_length s2 -- recur_diff or recursive diff -- First parameter is (IgnoreTimes?, LookforAdds?, Summary?) recur_diff :: (Bool,Bool,Bool) -> (FilePath -> FileType) -> [FilePath] -> [Slurpy] -> [Slurpy] -> (FL Prim -> FL Prim) recur_diff _ _ _ [] [] = id recur_diff opts@(_,doadd,summary) wt fps (s:ss) (s':ss') -- this is the case if a file has been removed in the working directory | s < s' = diff_removed wt fps s . recur_diff opts wt fps ss (s':ss') -- this next case is when there is a file in the directory that is not -- in the repository (ie, not managed by darcs) | s > s' = let rest = recur_diff opts wt fps (s:ss) ss' in if not doadd then rest else diff_added summary wt fps s' . rest -- actually compare the files because the names match | s == s' = gendiff opts wt fps s s' . recur_diff opts wt fps ss ss' recur_diff opts wt fps (s:ss) [] = diff_removed wt fps s . recur_diff opts wt fps ss [] recur_diff opts@(_,True,summary) wt fps [] (s':ss') = diff_added summary wt fps s' . recur_diff opts wt fps [] ss' recur_diff (_,False,_) _ _ [] _ = id recur_diff _ _ _ _ _ = impossible -- creates a diff for a file or directory which needs to be added to the -- repository diff_added :: Bool -> (FilePath -> FileType) -> [FilePath] -> Slurpy -> (FL Prim -> FL Prim) diff_added summary wt fps s | is_file s = case wt n of TextFile -> (addfile f:>:) . (if summary then id else diff_from_empty id f (get_filecontents s)) BinaryFile -> (addfile f:>:) . (if summary then id else (bin_patch f nilPS (get_filecontents s))) | otherwise {- is_dir s -} = (adddir f:>:) . foldr (.) id (map (diff_added summary wt (n:fps)) (get_dircontents s)) where n = slurp_name s f = mk_filepath (n:fps) get_text :: FileContents -> [PackedString] get_text = linesPS empt :: FileContents empt = nilPS diff_files :: FilePath -> FileContents -> FileContents -> (FL Prim -> FL Prim) diff_files f o n | get_text o == [nilPS] && get_text n == [nilPS] = id | get_text o == [nilPS] = diff_from_empty id f n | get_text n == [nilPS] = diff_from_empty invert f o diff_files f o n = if o == n then id else if has_bin o || has_bin n then (binary f o n:>:) else (canonize (hunk f 1 (linesPS o) (linesPS n)) +>+) diff_from_empty :: (Prim -> Prim) -> FilePath -> FileContents -> (FL Prim -> FL Prim) diff_from_empty inv f b = if b == nilPS then id else let p = if has_bin b then binary f nilPS b else if lastPS b == '\n' then hunk f 1 [] $ init $ linesPS b else hunk f 1 [nilPS] $ linesPS b in (inv p:>:) {- | We take a PackedString which represents a file's contents, and we check to see whether it is a 'binary' file or a 'textual' file. We define a textual file as any file which does not contain two magic characters, '\0' (the NULL character on Unix) and '^Z' (Control-Z, a DOS convention). Note that to improve performance, we won't examine *all* of the string, because that falls down on large files, but just the first 4096 characters. -} has_bin :: FileContents -> Bool has_bin = is_funky . takePS 4096 #endif \end{code} \begin{code} #ifndef GADT_WITNESSES bin_patch :: FilePath -> PackedString -> PackedString -> FL Prim -> FL Prim bin_patch f o n | nullPS o && nullPS n = id | otherwise = (binary f o n:>:) #endif \end{code} \begin{code} #ifndef GADT_WITNESSES diff_removed :: (FilePath -> FileType) -> [FilePath] -> Slurpy -> (FL Prim -> FL Prim) diff_removed wt fps s | is_file s = case wt n of TextFile -> diff_files f (get_filecontents s) empt . (rmfile f:>:) BinaryFile -> (bin_patch f (get_filecontents s) nilPS) . (rmfile f:>:) | otherwise {- is_dir s -} = foldr (.) (rmdir f:>:) $ map (diff_removed wt (n:fps)) (get_dircontents s) where n = slurp_name s f = mk_filepath (n:fps) #endif \end{code} \begin{code} sync :: String -> Slurpy -> Slurpy -> IO () sync path s1 s2 | is_file s1 && is_file s2 && (get_mtime s1 == undefined_time || get_mtime s1 /= get_mtime s2) && get_length s1 == get_length s2 && get_filecontents s1 == get_filecontents s2 = set_mtime n (get_mtime s2) | is_dir s1 && is_dir s2 = n2 `seq` recur_sync n (get_dircontents s1) (get_dircontents s2) | otherwise = return () where n2 = slurp_name s2 n = path++"/"++n2 set_mtime fname ctime = setFileTimes fname ctime ctime `catchall` return () recur_sync _ [] _ = return () recur_sync _ _ [] = return () recur_sync p (s:ss) (s':ss') | s < s' = recur_sync p ss (s':ss') | s > s' = recur_sync p (s:ss) ss' | otherwise = do sync p s s' recur_sync p ss ss' \end{code} \begin{code} cmp :: FilePath -> FilePath -> IO Bool cmp p1 p2 = do dir1 <- doesDirectoryExist p1 dir2 <- doesDirectoryExist p2 file1 <- doesFileExist p1 file2 <- doesFileExist p2 if dir1 && dir2 then cmpdir p1 p2 else if file1 && file2 then cmpfile p1 p2 else return False cmpdir :: FilePath -> FilePath -> IO Bool cmpdir d1 d2 = do fn1 <- liftM (filter (\f->f/="." && f /="..")) $ getDirectoryContents d1 fn2 <- liftM (filter (\f->f/="." && f /="..")) $ getDirectoryContents d2 if sort fn1 /= sort fn2 then return False else andIO $ map (\fn-> cmp (d1++"/"++fn) (d2++"/"++fn)) fn1 andIO :: [IO Bool] -> IO Bool andIO (iob:iobs) = do b <- iob if b then andIO iobs else return False andIO [] = return True cmpfile :: FilePath -> FilePath -> IO Bool cmpfile f1 f2 = do h1 <- openBinaryFile f1 ReadMode h2 <- openBinaryFile f2 ReadMode l1 <- hFileSize h1 l2 <- hFileSize h2 if l1 /= l2 then do hClose h1 hClose h2 putStrLn $ "different file lengths for "++f1++" and "++f2 return False else do b <- hcmp h1 h2 when (not b) $ putStrLn $ "files "++f1++" and "++f2++" differ" hClose h1 hClose h2 return b where hcmp h1 h2 = do c1 <- hGetPS h1 1024 c2 <- hGetPS h2 1024 if c1 /= c2 then return False else if lengthPS c1 == 1024 then hcmp h1 h2 else return True \end{code}