-- Copyright (C) 2002-2003 David Roundy
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2, or (at your option)
-- any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; see the file COPYING.  If not, write to
-- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-- Boston, MA 02110-1301, USA.

{-# OPTIONS_GHC -cpp -fglasgow-exts #-}
{-# LANGUAGE CPP #-}

#include "gadts.h"

module Darcs.Diff ( unsafeDiffAtPaths, unsafeDiff, sync, cmp
#ifndef GADT_WITNESSES
                  , diff_files
#endif
                  ) where

import System.Posix
     ( setFileTimes )
import System.IO ( IOMode(ReadMode), hFileSize, hClose )
import System.Directory ( doesDirectoryExist, doesFileExist,
                   getDirectoryContents,
                 )
import Control.Monad ( when )
import Data.List ( sort
#ifndef GADT_WITNESSES
                 , intersperse, isPrefixOf
#endif
                 )
#ifndef GADT_WITNESSES
import Data.Maybe ( catMaybes )
#endif

#ifndef GADT_WITNESSES
import ByteStringUtils ( is_funky, linesPS)
import qualified Data.ByteString.Char8 as BC (last)
import qualified Data.ByteString as B       (null, empty, take, ByteString)
#endif
import qualified Data.ByteString as B       (hGet, length)

import Darcs.SlurpDirectory ( Slurpy, slurp_name, is_dir, is_file,
#ifndef GADT_WITNESSES
                        get_slurp,
#endif
                        get_dircontents, get_filecontents,
                        get_mtime, get_length,
                        undefined_time
#ifndef GADT_WITNESSES
                        , FileContents, undefined_size
#endif
                      )
#ifndef GADT_WITNESSES
import System.FilePath.Posix ( (</>) )
#endif
import Darcs.Patch ( Prim
#ifndef GADT_WITNESSES
                   , hunk, canonize, rmfile, rmdir
                   , addfile, adddir
                   , binary, invert
#endif
                   )
#ifndef GADT_WITNESSES
import Darcs.Patch.FileName( fp2fn, breakup )
#endif
import System.IO ( openBinaryFile )
import Darcs.Repository.Prefs ( FileType(..) )
import Darcs.Flags ( DarcsFlag(..) )
import Darcs.Utils ( catchall )
import Darcs.Ordered ( FL(..)
#ifndef GADT_WITNESSES
                           , (+>+)
#endif
                           )
#ifndef GADT_WITNESSES
#include "impossible.h"
#endif

-- | The unsafeDiffAtPaths function calls diff_at_path for a set of files and
--   returns all changes to those files. It does *not* explore the given paths
--   recursively.
--
--   Comparing paths and not slurpies is useful when the user
--   requests a diff for a file that is created or removed in the working copy:
--   then there is no slurpy for the file in the /current/ or /working/ slurpy
--   respectively.
--
--   The given paths must always be fixed repository paths starting with a
--   ".". It is safe to pass overlapping paths.
--
--   The booleans in the first argument tell whether to ignore mtimes, whether
--   we must look for additions and if we're diffing for a summary only.
--
--   It returns an FL of patches, that contains all the changes that have been
--   made at all those paths.
unsafeDiffAtPaths :: (Bool, Bool, Bool) -> (FilePath -> FileType) ->
               Slurpy -> Slurpy -> [FilePath] -> FL Prim C(x y)
#ifdef GADT_WITNESSES
unsafeDiffAtPaths = undefined
#else
unsafeDiffAtPaths flags filetypeFunction s1 s2 paths =
    foldr (+>+) NilFL (catMaybes diffsPerPath)
  where diffsPerPath = map differ safePaths
        differ       = diff_at_path flags filetypeFunction s1 s2
        safePaths    = make_nonoverlapping_path_set paths

diff_at_path :: (Bool, Bool, Bool) -> (FilePath -> FileType)
                -> Slurpy -> Slurpy -> FilePath -> Maybe (FL Prim)
diff_at_path (ignoreTimes, lookForAdds, summary) filetypeFunction s1 s2 path =
    case (pathIn1, pathIn2) of
        (Nothing, Nothing) -> Nothing
        (Nothing, Just s2PathSlurpy) -> do
            Just $ diff_added summary filetypeFunction initialFps s2PathSlurpy NilFL
        (Just s1PathSlurpy, Nothing) -> do
            Just $ diff_removed filetypeFunction initialFps s1PathSlurpy NilFL
        (Just s1PathSlurpy, Just s2PathSlurpy) ->
            Just $ gendiff (ignoreTimes, lookForAdds, summary) filetypeFunction
                           initialFps s1PathSlurpy s2PathSlurpy NilFL
  where pathIn1 = get_slurp (fp2fn path) s1
        pathIn2 = get_slurp (fp2fn path) s2
        initialFps = tail $ reverse (breakup path)

make_nonoverlapping_path_set :: [FilePath] -> [FilePath]
make_nonoverlapping_path_set = map unbreakup . delete_overlapping . map breakup . sort
  where
    delete_overlapping :: [[FilePath]] -> [[FilePath]]
    delete_overlapping (p1:p2:ps) = if p1 `isPrefixOf` p2
                                      then delete_overlapping (p1:ps)
                                      else p1 : delete_overlapping (p2:ps)
    delete_overlapping ps         = ps
    unbreakup = concat . intersperse "/"
#endif

-- The diff function takes a recursive diff of two slurped-up directory trees.
-- The code involved is actually pretty trivial.  \verb!paranoid_diff! runs a
-- diff in which we don't make the assumption that files with the same
-- modification time are identical.

unsafeDiff :: [DarcsFlag]
           -> (FilePath -> FileType) -> Slurpy -> Slurpy -> FL Prim C(x y)
#ifdef GADT_WITNESSES
unsafeDiff = undefined
#else
unsafeDiff opts wt s1 s2
    = case diff_at_path (ignoreTimes, lookForAdds, summary)  wt s1 s2 "" of
          Just d -> d
          _      -> impossible -- because "" always exists in a slurpy 
  where -- NoSummary/Summary both present gives False
        -- Just Summary gives True
        -- Just NoSummary gives False
        -- Neither gives False
        summary = Summary `elem` opts && NoSummary `notElem` opts
        lookForAdds = LookForAdds `elem` opts
        ignoreTimes = IgnoreTimes `elem` opts

mk_filepath :: [FilePath] -> FilePath
mk_filepath fps = concat $ intersperse "/" $ reverse fps

gendiff :: (Bool,Bool,Bool)
        -> (FilePath -> FileType) -> [FilePath] -> Slurpy -> Slurpy
        -> (FL Prim -> FL Prim)
gendiff opts@(isparanoid,_,_) wt fps s1 s2
    | is_file s1 && is_file s2 = diff_regular_files isparanoid wt f s1 s2
    | is_dir s1 && is_dir s2 =
          let fps' = case n2 of
                         "." -> fps
                         _ -> n2:fps
          in fps' `seq` recur_diff opts (wt . (n2</>)) fps' dc1 dc2
    | otherwise = id
    where n2 = slurp_name s2
          f = mk_filepath (n2:fps)
          dc1 = get_dircontents s1
          dc2 = get_dircontents s2

-- recur_diff or recursive diff
-- First parameter is (IgnoreTimes?, LookforAdds?, Summary?)
recur_diff :: (Bool,Bool,Bool)
           -> (FilePath -> FileType) -> [FilePath] -> [Slurpy] -> [Slurpy]
           -> (FL Prim -> FL Prim)
recur_diff _ _ _ [] [] = id
recur_diff opts@(_,doadd,summary) wt fps (s:ss) (s':ss')
    -- this is the case if a file has been removed in the working directory
    | s < s' = diff_removed wt fps s . recur_diff opts wt fps ss (s':ss')
    -- this next case is when there is a file in the directory that is not
    -- in the repository (ie, not managed by darcs)
    | s > s' = let rest = recur_diff opts wt fps (s:ss) ss'
               in if not doadd then                        rest
                               else diff_added summary wt fps s' . rest
    -- actually compare the files because the names match
    | s == s' = gendiff opts wt fps s s' . recur_diff opts wt fps ss ss'
recur_diff opts wt fps (s:ss) [] =
    diff_removed wt fps s . recur_diff opts wt fps ss []
recur_diff opts@(_,True,summary) wt fps [] (s':ss') =
    diff_added summary wt fps s' . recur_diff opts wt fps [] ss'
recur_diff (_,False,_) _ _ [] _ = id
recur_diff _ _ _ _ _ = impossible

-- diff, taking into account paranoidness and file type, two regular files
diff_regular_files :: Bool -> (FilePath -> FileType) -> FilePath -> Slurpy -> Slurpy -> (FL Prim -> FL Prim)
diff_regular_files ignoreTimes filetypeFunction f s1 s2 = 
    if maybe_differ   
        then case filetypeFunction (slurp_name s2) of                                     
               TextFile -> diff_files f b1 b2                    
               BinaryFile -> if b1 /= b2 then (binary f b1 b2:>:)
                                         else id                 
        else id
  where maybe_differ = ignoreTimes
                     || get_mtime s1 == undefined_time
                     || get_mtime s1 /= get_mtime s2
                     || get_length s1 == undefined_size
                     || get_length s1 /= get_length s2
        b1 = get_filecontents s1
        b2 = get_filecontents s2

-- creates a diff for a file or directory which needs to be added to the
-- repository
diff_added :: Bool -> (FilePath -> FileType) -> [FilePath] -> Slurpy
           -> (FL Prim -> FL Prim)
diff_added summary wt fps s
    | is_file s = case wt n of
                  TextFile -> (addfile f:>:) .
                              (if summary
                               then id
                               else diff_from_empty id f (get_filecontents s))
                  BinaryFile -> (addfile f:>:) .
                                (if summary then id else
                                (bin_patch f B.empty (get_filecontents s)))
    | otherwise {- is_dir s -} =
        (adddir f:>:)
      . foldr (.) id (map (diff_added summary wt (n:fps)) (get_dircontents s))
    where n = slurp_name s
          f = mk_filepath (n:fps)

get_text :: FileContents -> [B.ByteString]
get_text = linesPS

empt :: FileContents
empt = B.empty

diff_files :: FilePath -> FileContents -> FileContents
           -> (FL Prim -> FL Prim)
diff_files f o n | get_text o == [B.empty] && get_text n == [B.empty] = id
                 | get_text o == [B.empty] = diff_from_empty id f n
                 | get_text n == [B.empty] = diff_from_empty invert f o
diff_files f o n = if o == n
                   then id
                   else if has_bin o || has_bin n
                        then (binary f o n:>:)
                        else (canonize (hunk f 1 (linesPS o) (linesPS n)) +>+)

diff_from_empty :: (Prim -> Prim) -> FilePath -> FileContents
                -> (FL Prim -> FL Prim)
diff_from_empty inv f b =
    if b == B.empty
    then id
    else let p = if has_bin b
                 then binary f B.empty b
                 else if BC.last b == '\n'
                      then hunk f 1 [] $ init $ linesPS b
                      else hunk f 1 [B.empty] $ linesPS b
         in (inv p:>:)

{- | We take a B.ByteString which represents a file's contents, and we check to see
whether it is a 'binary' file or a 'textual' file. We define a textual file as any file
which does not contain two magic characters, '\0' (the NULL character on Unix) and '^Z'
(Control-Z, a DOS convention).

Note that to improve performance, we won't examine *all* of the string, because that
falls down on large files, but just the first 4096 characters. -}
has_bin :: FileContents -> Bool
has_bin = is_funky . B.take 4096
#endif

#ifndef GADT_WITNESSES
bin_patch :: FilePath -> B.ByteString -> B.ByteString
          -> FL Prim -> FL Prim
bin_patch f o n | B.null o && B.null n = id
                | otherwise = (binary f o n:>:)
#endif

#ifndef GADT_WITNESSES
diff_removed :: (FilePath -> FileType) -> [FilePath] -> Slurpy
             -> (FL Prim -> FL Prim)
diff_removed wt fps s
    | is_file s = case wt n of
                  TextFile -> diff_files f (get_filecontents s) empt
                            . (rmfile f:>:)
                  BinaryFile -> (bin_patch f
                                 (get_filecontents s) B.empty)
                              . (rmfile f:>:)
    | otherwise {- is_dir s -}
        = foldr (.) (rmdir f:>:)
        $ map (diff_removed wt (n:fps)) (get_dircontents s)
    where n = slurp_name s
          f = mk_filepath (n:fps)
#endif

sync :: String -> Slurpy -> Slurpy -> IO ()
sync path s1 s2
    | is_file s1 && is_file s2 &&
      (get_mtime s1 == undefined_time || get_mtime s1 /= get_mtime s2) &&
      get_length s1 == get_length s2 &&
      get_filecontents s1 == get_filecontents s2 =
        set_mtime n (get_mtime s2)
    | is_dir s1 && is_dir s2
        = n2 `seq` recur_sync n (get_dircontents s1) (get_dircontents s2)
    | otherwise = return ()
    where n2 = slurp_name s2
          n = path++"/"++n2
          set_mtime fname ctime = setFileTimes fname ctime ctime
                       `catchall` return ()
          recur_sync _ [] _ = return ()
          recur_sync _ _ [] = return ()
          recur_sync p (s:ss) (s':ss')
              | s < s' = recur_sync p ss (s':ss')
              | s > s' = recur_sync p (s:ss) ss'
              | otherwise = do sync p s s'
                               recur_sync p ss ss'


cmp :: FilePath -> FilePath -> IO Bool
cmp p1 p2 = do
  dir1 <- doesDirectoryExist p1
  dir2 <- doesDirectoryExist p2
  file1 <- doesFileExist p1
  file2 <- doesFileExist p2
  if dir1 && dir2
     then cmpdir p1 p2
     else if file1 && file2
          then cmpfile p1 p2
          else return False
cmpdir :: FilePath -> FilePath -> IO Bool
cmpdir d1 d2 = do
  fn1 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d1
  fn2 <- fmap (filter (\f->f/="." && f /="..")) $ getDirectoryContents d2
  if sort fn1 /= sort fn2
     then return False
     else andIO $ map (\fn-> cmp (d1++"/"++fn) (d2++"/"++fn)) fn1
andIO :: [IO Bool] -> IO Bool
andIO (iob:iobs) = do b <- iob
                      if b then andIO iobs else return False
andIO [] = return True
cmpfile :: FilePath -> FilePath -> IO Bool
cmpfile f1 f2 = do
  h1 <- openBinaryFile f1 ReadMode
  h2 <- openBinaryFile f2 ReadMode
  l1 <- hFileSize h1
  l2 <- hFileSize h2
  if l1 /= l2
     then do hClose h1
             hClose h2
             putStrLn $ "different file lengths for "++f1++" and "++f2
             return False
     else do b <- hcmp h1 h2
             when (not b) $ putStrLn $ "files "++f1++" and "++f2++" differ"
             hClose h1
             hClose h2
             return b
    where hcmp h1 h2 = do c1 <- B.hGet h1 1024
                          c2 <- B.hGet h2 1024
                          if c1 /= c2
                             then return False
                             else if B.length c1 == 1024
                                  then hcmp h1 h2
                                  else return True