-- Copyright (C) 2002-2003 David Roundy -- -- This program is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2, or (at your option) -- any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program; see the file COPYING. If not, write to -- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -- Boston, MA 02110-1301, USA. {-# OPTIONS_GHC -cpp -fglasgow-exts #-} {-# LANGUAGE CPP #-} #include "gadts.h" module Darcs.Diff ( unsafeDiffAtPaths, unsafeDiff, sync, cmp #ifndef GADT_WITNESSES , diff_files #endif ) where import System.Posix ( setFileTimes ) import System.IO ( IOMode(ReadMode), hFileSize, hClose ) import System.Directory ( doesDirectoryExist, doesFileExist, getDirectoryContents, ) import Control.Monad ( when ) import Data.List ( sort #ifndef GADT_WITNESSES , intersperse, isPrefixOf #endif ) #ifndef GADT_WITNESSES import Data.Maybe ( catMaybes ) #endif #ifndef GADT_WITNESSES import ByteStringUtils ( is_funky, linesPS) import qualified Data.ByteString.Char8 as BC (last) import qualified Data.ByteString as B (null, empty, take, ByteString) #endif import qualified Data.ByteString as B (hGet, length) import Darcs.SlurpDirectory ( Slurpy, slurp_name, is_dir, is_file, #ifndef GADT_WITNESSES get_slurp, #endif get_dircontents, get_filecontents, get_mtime, get_length, undefined_time #ifndef GADT_WITNESSES , FileContents, undefined_size #endif ) #ifndef GADT_WITNESSES import System.FilePath.Posix ( () ) #endif import Darcs.Patch ( Prim #ifndef GADT_WITNESSES , hunk, canonize, rmfile, rmdir , addfile, adddir , binary, invert #endif ) #ifndef GADT_WITNESSES import Darcs.Patch.FileName( fp2fn, breakup ) #endif import System.IO ( openBinaryFile ) import Darcs.Repository.Prefs ( FileType(..) ) import Darcs.Flags ( DarcsFlag(..) ) import Darcs.Utils ( catchall ) import Darcs.Ordered ( FL(..) #ifndef GADT_WITNESSES , (+>+) #endif ) #ifndef GADT_WITNESSES #include "impossible.h" #endif -- | The unsafeDiffAtPaths function calls diff_at_path for a set of files and -- returns all changes to those files. It does *not* explore the given paths -- recursively. -- -- Comparing paths and not slurpies is useful when the user -- requests a diff for a file that is created or removed in the working copy: -- then there is no slurpy for the file in the /current/ or /working/ slurpy -- respectively. -- -- The given paths must always be fixed repository paths starting with a -- ".". It is safe to pass overlapping paths. -- -- The booleans in the first argument tell whether to ignore mtimes, whether -- we must look for additions and if we're diffing for a summary only. -- -- It returns an FL of patches, that contains all the changes that have been -- made at all those paths. unsafeDiffAtPaths :: (Bool, Bool, Bool) -> (FilePath -> FileType) -> Slurpy -> Slurpy -> [FilePath] -> FL Prim C(x y) #ifdef GADT_WITNESSES unsafeDiffAtPaths = undefined #else unsafeDiffAtPaths flags filetypeFunction s1 s2 paths = foldr (+>+) NilFL (catMaybes diffsPerPath) where diffsPerPath = map differ safePaths differ = diff_at_path flags filetypeFunction s1 s2 safePaths = make_nonoverlapping_path_set paths diff_at_path :: (Bool, Bool, Bool) -> (FilePath -> FileType) -> Slurpy -> Slurpy -> FilePath -> Maybe (FL Prim) diff_at_path (ignoreTimes, lookForAdds, summary) filetypeFunction s1 s2 path = case (pathIn1, pathIn2) of (Nothing, Nothing) -> Nothing (Nothing, Just s2PathSlurpy) -> do Just $ diff_added summary filetypeFunction initialFps s2PathSlurpy NilFL (Just s1PathSlurpy, Nothing) -> do Just $ diff_removed filetypeFunction initialFps s1PathSlurpy NilFL (Just s1PathSlurpy, Just s2PathSlurpy) -> Just $ gendiff (ignoreTimes, lookForAdds, summary) filetypeFunction initialFps s1PathSlurpy s2PathSlurpy NilFL where pathIn1 = get_slurp (fp2fn path) s1 pathIn2 = get_slurp (fp2fn path) s2 initialFps = tail $ reverse (breakup path) make_nonoverlapping_path_set :: [FilePath] -> [FilePath] make_nonoverlapping_path_set = map unbreakup . delete_overlapping . map breakup . sort where delete_overlapping :: [[FilePath]] -> [[FilePath]] delete_overlapping (p1:p2:ps) = if p1 `isPrefixOf` p2 then delete_overlapping (p1:ps) else p1 : delete_overlapping (p2:ps) delete_overlapping ps = ps unbreakup = concat . intersperse "/" #endif -- The diff function takes a recursive diff of two slurped-up directory trees. -- The code involved is actually pretty trivial. \verb!paranoid_diff! runs a -- diff in which we don't make the assumption that files with the same -- modification time are identical. unsafeDiff :: [DarcsFlag] -> (FilePath -> FileType) -> Slurpy -> Slurpy -> FL Prim C(x y) #ifdef GADT_WITNESSES unsafeDiff = undefined #else unsafeDiff opts wt s1 s2 = case diff_at_path (ignoreTimes, lookForAdds, summary) wt s1 s2 "" of Just d -> d _ -> impossible -- because "" always exists in a slurpy where -- NoSummary/Summary both present gives False -- Just Summary gives True -- Just NoSummary gives False -- Neither gives False summary = Summary `elem` opts && NoSummary `notElem` opts lookForAdds = LookForAdds `elem` opts ignoreTimes = IgnoreTimes `elem` opts mk_filepath :: [FilePath] -> FilePath mk_filepath fps = concat $ intersperse "/" $ reverse fps gendiff :: (Bool,Bool,Bool) -> (FilePath -> FileType) -> [FilePath] -> Slurpy -> Slurpy -> (FL Prim -> FL Prim) gendiff opts@(isparanoid,_,_) wt fps s1 s2 | is_file s1 && is_file s2 = diff_regular_files isparanoid wt f s1 s2 | is_dir s1 && is_dir s2 = let fps' = case n2 of "." -> fps _ -> n2:fps in fps' `seq` recur_diff opts (wt . (n2)) fps' dc1 dc2 | otherwise = id where n2 = slurp_name s2 f = mk_filepath (n2:fps) dc1 = get_dircontents s1 dc2 = get_dircontents s2 -- recur_diff or recursive diff -- First parameter is (IgnoreTimes?, LookforAdds?, Summary?) recur_diff :: (Bool,Bool,Bool) -> (FilePath -> FileType) -> [FilePath] -> [Slurpy] -> [Slurpy] -> (FL Prim -> FL Prim) recur_diff _ _ _ [] [] = id recur_diff opts@(_,doadd,summary) wt fps (s:ss) (s':ss') -- this is the case if a file has been removed in the working directory | s < s' = diff_removed wt fps s . recur_diff opts wt fps ss (s':ss') -- this next case is when there is a file in the directory that is not -- in the repository (ie, not managed by darcs) | s > s' = let rest = recur_diff opts wt fps (s:ss) ss' in if not doadd then rest else diff_added summary wt fps s' . rest -- actually compare the files because the names match | s == s' = gendiff opts wt fps s s' . recur_diff opts wt fps ss ss' recur_diff opts wt fps (s:ss) [] = diff_removed wt fps s . recur_diff opts wt fps ss [] recur_diff opts@(_,True,summary) wt fps [] (s':ss') = diff_added summary wt fps s' . recur_diff opts wt fps [] ss' recur_diff (_,False,_) _ _ [] _ = id recur_diff _ _ _ _ _ = impossible -- diff, taking into account paranoidness and file type, two regular files diff_regular_files :: Bool -> (FilePath -> FileType) -> FilePath -> Slurpy -> Slurpy -> (FL Prim -> FL Prim) diff_regular_files ignoreTimes filetypeFunction f s1 s2 = if maybe_differ then case filetypeFunction (slurp_name s2) of TextFile -> diff_files f b1 b2 BinaryFile -> if b1 /= b2 then (binary f b1 b2:>:) else id else id where maybe_differ = ignoreTimes || get_mtime s1 == undefined_time || get_mtime s1 /= get_mtime s2 || get_length s1 == undefined_size || get_length s1 /= get_length s2 b1 = get_filecontents s1 b2 = get_filecontents s2 -- creates a diff for a file or directory which needs to be added to the -- repository diff_added :: Bool -> (FilePath -> FileType) -> [FilePath] -> Slurpy -> (FL Prim -> FL Prim) diff_added summary wt fps s | is_file s = case wt n of TextFile -> (addfile f:>:) . (if summary then id else diff_from_empty id f (get_filecontents s)) BinaryFile -> (addfile f:>:) . (if summary then id else (bin_patch f B.empty (get_filecontents s))) | otherwise {- is_dir s -} = (adddir f:>:) . foldr (.) id (map (diff_added summary wt (n:fps)) (get_dircontents s)) where n = slurp_name s f = mk_filepath (n:fps) get_text :: FileContents -> [B.ByteString] get_text = linesPS empt :: FileContents empt = B.empty diff_files :: FilePath -> FileContents -> FileContents -> (FL Prim -> FL Prim) diff_files f o n | get_text o == [B.empty] && get_text n == [B.empty] = id | get_text o == [B.empty] = diff_from_empty id f n | get_text n == [B.empty] = diff_from_empty invert f o diff_files f o n = if o == n then id else if has_bin o || has_bin n then (binary f o n:>:) else (canonize (hunk f 1 (linesPS o) (linesPS n)) +>+) diff_from_empty :: (Prim -> Prim) -> FilePath -> FileContents -> (FL Prim -> FL Prim) diff_from_empty inv f b = if b == B.empty then id else let p = if has_bin b then binary f B.empty b else if BC.last b == '\n' then hunk f 1 [] $ init $ linesPS b else hunk f 1 [B.empty] $ linesPS b in (inv p:>:) {- | We take a B.ByteString which represents a file's contents, and we check to see whether it is a 'binary' file or a 'textual' file. We define a textual file as any file which does not contain two magic characters, '\0' (the NULL character on Unix) and '^Z' (Control-Z, a DOS convention). Note that to improve performance, we won't examine *all* of the string, because that falls down on large files, but just the first 4096 characters. -} has_bin :: FileContents -> Bool has_bin = is_funky . B.take 4096 #endif #ifndef GADT_WITNESSES bin_patch :: FilePath -> B.ByteString -> B.ByteString -> FL Prim -> FL Prim bin_patch f o n | B.null o && B.null n = id | otherwise = (binary f o n:>:) #endif #ifndef GADT_WITNESSES diff_removed :: (FilePath -> FileType) -> [FilePath] -> Slurpy -> (FL Prim -> FL Prim) diff_removed wt fps s | is_file s = case wt n of TextFile -> diff_files f (get_filecontents s) empt . (rmfile f:>:) BinaryFile -> (bin_patch f (get_filecontents s) B.empty) . (rmfile f:>:) | otherwise {- is_dir s -} = foldr (.) (rmdir f:>:) $ map (diff_removed wt (n:fps)) (get_dircontents s) where n = slurp_name s f = mk_filepath (n:fps) #endif sync :: String -> Slurpy -> Slurpy -> IO () sync path s1 s2 | is_file s1 && is_file s2 && (get_mtime s1 == undefined_time || get_mtime s1 /= get_mtime s2) && get_length s1 == get_length s2 && get_filecontents s1 == get_filecontents s2 = set_mtime n (get_mtime s2) | is_dir s1 && is_dir s2 = n2 `seq` recur_sync n (get_dircontents s1) (get_dircontents s2) | otherwise = return () where n2 = slurp_name s2 n = path++"/"++n2 set_mtime fname ctime = setFileTimes fname ctime ctime `catchall` return () recur_sync _ [] _ = return () recur_sync _ _ [] = return () recur_sync p (s:ss) (s':ss') | s < s' = recur_sync p ss (s':ss') | s > s' = recur_sync p (s:ss) ss' | otherwise = do sync p s s' recur_sync p ss ss' cmp :: FilePath -> FilePath -> IO Bool cmp p1 p2 = do dir1 <- doesDirectoryExist p1 dir2 <- doesDirectoryExist p2 file1 <- doesFileExist p1 file2 <- doesFileExist p2 if dir1 && dir2 then cmpdir p1 p2 else if file1 && file2 then cmpfile p1 p2 else return False cmpdir :: FilePath -> FilePath -> IO Bool cmpdir d1 d2 = do fn1 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d1 fn2 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d2 if sort fn1 /= sort fn2 then return False else andIO $ map (\fn-> cmp (d1++"/"++fn) (d2++"/"++fn)) fn1 andIO :: [IO Bool] -> IO Bool andIO (iob:iobs) = do b <- iob if b then andIO iobs else return False andIO [] = return True cmpfile :: FilePath -> FilePath -> IO Bool cmpfile f1 f2 = do h1 <- openBinaryFile f1 ReadMode h2 <- openBinaryFile f2 ReadMode l1 <- hFileSize h1 l2 <- hFileSize h2 if l1 /= l2 then do hClose h1 hClose h2 putStrLn $ "different file lengths for "++f1++" and "++f2 return False else do b <- hcmp h1 h2 when (not b) $ putStrLn $ "files "++f1++" and "++f2++" differ" hClose h1 hClose h2 return b where hcmp h1 h2 = do c1 <- B.hGet h1 1024 c2 <- B.hGet h2 1024 if c1 /= c2 then return False else if B.length c1 == 1024 then hcmp h1 h2 else return True