{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE InstanceSigs #-} -- | The module exposing the functionality of this package module Filediff ( -- * lists diffLists , applyListDiff -- * files , diffFiles , applyToFile -- * directories , diffDirectories , diffDirectoriesWithIgnoredSubdirs , applyToDirectory ) where import Debug.Trace import qualified Data.HashMap as HMap import Control.Concurrent (forkIO) import Control.Concurrent.Thread as Thread (Result(..)) import Control.Concurrent.Thread.Group as ThreadGroup (new, forkIO, wait) import qualified System.IO as IO import qualified System.Directory as D import qualified Data.Text as T import qualified Data.Text.IO as TIO import Data.MemoCombinators.Class (MemoTable, table) import qualified Data.MemoCombinators as Memo -- function imports import Data.Maybe (isJust, fromJust, catMaybes) import Data.List ((\\), intersect) import Data.Monoid import Control.Applicative import Control.Monad import Control.Monad.Trans.Either import Control.Monad.IO.Class (liftIO) -- Filediff imports import Filediff.Types import Filediff.Utils ( () , (<.>) , getFileDirectory , removeDotDirs , createFileWithContents , dropUntil , removeFirstPathComponent , removePathComponents , getDirectoryContentsRecursiveSafe , isPrefix , dropPrefix , dropInitialSlash , dropTrailingSlash ) -- * basic operations -- | /O(mn)/. Compute the difference between the two files (more -- specifically, the minimal number of changes to make to transform the -- file residing at the location specified by the first -- parameter into the second). Throws an exception if either or both of -- the parameters point to a directory, not a file. -- -- Files are allowed to not exist at either or both of the parameters. diffFiles :: FilePath -> FilePath -> IO Filediff diffFiles a b = do aIsDir <- D.doesDirectoryExist a bIsDir <- D.doesDirectoryExist b when (aIsDir || bIsDir) $ error $ "One or both of " ++ a ++ " and " ++ b ++ "is not a file, but a directory." aExists <- D.doesFileExist a bExists <- D.doesFileExist b case (aExists, bExists) of (False, False) -> return $ Filediff a b mempty (False, True ) -> addCase (True , False) -> delCase (True , True ) -> modCase where addCase :: IO Filediff addCase = do let aLines = [] bLines <- T.lines <$> TIO.readFile b return Filediff { base = a , comp = b , change = Add $ diffLists aLines bLines } delCase :: IO Filediff delCase = do aLines <- T.lines <$> TIO.readFile a let bLines = [] return Filediff { base = a , comp = b , change = Del $ diffLists aLines bLines } modCase :: IO Filediff modCase = do aLines <- T.lines <$> TIO.readFile a bLines <- T.lines <$> TIO.readFile b return Filediff { base = a , comp = b , change = Mod $ diffLists aLines bLines } -- | Compute the difference between the two directories (more -- specifically, the minimal number of changes to make to transform the -- directory residing at the location specified by the first -- parameter into the second). Throws an exception if either or both of -- the parameters point to a file, not a directory. diffDirectories :: FilePath -> FilePath -> IO Diff diffDirectories a b = diffDirectoriesWithIgnoredSubdirs a b [] [] -- | Diff two directories, ignoring some subdirectories. The first -- `[FilePath]` parameter refers to the first `FilePath` parameter, -- and same for the second, respectively. diffDirectoriesWithIgnoredSubdirs :: FilePath -> FilePath -> [FilePath] -> [FilePath] -> IO Diff diffDirectoriesWithIgnoredSubdirs a' b' aToIgnore bToIgnore = do aIsFile <- D.doesFileExist a bIsFile <- D.doesFileExist b when (aIsFile || bIsFile) $ error $ "One or both of " ++ a ++ " and " ++ b ++ "is not a directory, but a file." aContents' <- getDirectoryContentsRecursiveSafe a bContents' <- getDirectoryContentsRecursiveSafe b let aContents = filter (not . shouldIgnore aToIgnore) aContents' let bContents = filter (not . shouldIgnore bToIgnore) bContents' intersectionDiffs <- getDiffs $ intersect aContents bContents aOnlyDiffs <- getDiffs $ aContents \\ bContents bOnlyDiffs <- getDiffs $ bContents \\ aContents let allDiffs = map makeRelative $ intersectionDiffs ++ aOnlyDiffs ++ bOnlyDiffs return $ Diff allDiffs where a :: FilePath a = dropTrailingSlash a' b :: FilePath b = dropTrailingSlash b' -- | `x` is the prefix of the "base" of the diff; `y` is the -- | "compare". getDiffs :: [FilePath] -> IO [Filediff] getDiffs filepaths = filter (not . isIdentityFileDiff) <$> mapM (\fp -> diffFiles (a fp) (b fp)) filepaths isIdentityFileDiff :: Filediff -> Bool isIdentityFileDiff = (==) mempty . change makeRelative :: Filediff -> Filediff makeRelative (Filediff base comp change) = Filediff (dropInitialSlash $ dropPrefix a base) (dropInitialSlash $ dropPrefix b comp) change shouldIgnore :: [FilePath] -> FilePath -> Bool shouldIgnore toIgnore filepath = any (flip isPrefix $ filepath) toIgnore -- | /O(n)/. Apply a diff to a file. Throws an exception if the -- application fails. applyToFile :: Filediff -> FilePath -> IO [Line]--EitherT Error IO () applyToFile (Filediff _ _ change) filepath = do case change of Del _ -> delCase Mod listdiff -> modCase listdiff Add listdiff -> addCase listdiff where delCase :: IO [Line] delCase = D.removeFile filepath >> return [] addCase :: ListDiff Line -> IO [Line] addCase listDiff = createFileWithContents filepath "" >> modCase listDiff modCase :: ListDiff Line -> IO [Line] modCase listDiff = do -- Data.Text.IO.readFile is strict, which is what we -- need, here (because of the write right after) file <- TIO.readFile filepath let result = applyListDiff listDiff . T.lines $ file TIO.writeFile filepath (safeInit . T.unlines $ result) -- `init` for trailing \n return result safeInit :: T.Text -> T.Text safeInit x = if T.null x then x else T.init x -- | Applies a `Diff` to a directory. Throws an exception if the -- application fails. applyToDirectory :: Diff -> FilePath -> IO () applyToDirectory (Diff filediffs) filepath = void $ mapMParallelWaitForAll (void . apply) filediffs where apply :: Filediff -> IO [Line] apply diff@(Filediff base compare _) = applyToFile diff (filepath base) mapMParallelWaitForAll :: (a -> IO b) -> [a] -> IO [Thread.Result b] mapMParallelWaitForAll f list = do group <- ThreadGroup.new ioResults <- mapM (fmap snd . ThreadGroup.forkIO group . f) list ThreadGroup.wait group sequence ioResults -- | Computes the minimal number of additions and deletions needed to -- transform the first parameter into the second. -- -- > λ diffLists "abcdefg" "wabxyze" -- > ListDiff {dels = [(2,'c'),(3,'d'),(5,'f'),(6,'g')], adds = [(0,'w'),(3,'x'),(4,'y'),(5,'z')]} diffLists :: forall a. (Eq a) => [a] -> [a] -> ListDiff a diffLists a b = ListDiff (map (\i -> (i, a !! i)) $ nonSubsequenceIndices common a) (getProgressiveIndicesToAdd common b) where common :: [a] common = longestCommonSubsequenceWrapper a b -- | > λ add -- > [(0,"w"),(3,"x"),(4,"y")] -- > λ common -- > ["a","b","e"] getProgressiveIndicesToAdd :: [a] -> [a] -> [(Int, a)] getProgressiveIndicesToAdd sub super = map (\i -> (i, super !! i)) $ nonSubsequenceIndices sub super -- | > λ diffLists "abcdefg" "wabxyze" -- > ListDiff {dels = [(2,'c'),(3,'d'),(5,'f'),(6,'g')], adds = [(0,'w'),(3,'x'),(4,'y'),(5,'z')]} -- > λ applyListDiff it "abcdefg" -- > "wabxyze" -- -- Throws an exception if the diff can't be applied. applyListDiff :: forall a. (Eq a) => ListDiff a -> [a] -> [a] applyListDiff (ListDiff dels adds) = insertAtProgressiveIndices adds . removeAtIndices dels where -- | Best explained by example: -- | -- | > λ insertAtProgressiveIndices [(1,'a'),(3,'b')] "def" -- | > "daebf" insertAtProgressiveIndices :: [(Int, a)] -> [a] -> [a] insertAtProgressiveIndices = insertAtProgressiveIndices' 0 insertAtProgressiveIndices' :: Int -> [(Int, a)] -> [a] -> [a] insertAtProgressiveIndices' _ [] dest = dest insertAtProgressiveIndices' curr src@((i,s):src') [] = s : insertAtProgressiveIndices' (succ curr) src' [] insertAtProgressiveIndices' curr src@((i,s):src') dest@(d:dest') = if i == curr then s : insertAtProgressiveIndices' (succ curr) src' dest else d : insertAtProgressiveIndices' (succ curr) src dest' -- all functions below are not exposed -- don't hit the memotable if not necessary longestCommonSubsequenceWrapper :: forall a. (Eq a) => [a] -> [a] -> [a] longestCommonSubsequenceWrapper xs ys = if xs == ys then xs -- (WLOG) don't want to return xs ++ xs else commonPrefix ++ longestCommonSubsequence (getMiddle xs) (getMiddle ys) ++ commonSuffix where commonPrefix :: [a] commonPrefix = getCommonPrefix xs ys -- drop (length commonPrefix) to prevent the "abc" vs. "abc*abc" case commonSuffix :: [a] commonSuffix = reverse (getCommonPrefix (reverse (drop (length commonPrefix) xs)) (reverse (drop (length commonPrefix) ys))) getCommonPrefix :: [a] -> [a] -> [a] getCommonPrefix as bs = map fst . takeWhile (uncurry (==)) $ zip as bs -- xs = abcd***efg -- ys = abcd???????efg -- getMiddle xs == **** -- getMiddle ys = ?????? getMiddle :: [a] -> [a] getMiddle elems = take (length elems - length commonPrefix - length commonSuffix) . drop (length commonPrefix) $ elems -- | Compute the longest common (potentially noncontiguous) subsequence -- between two sequences. Element type is fixed because memoization -- requires a static type. longestCommonSubsequence :: forall a. (Eq a) => [a] -> [a] -> [a] longestCommonSubsequence xs ys = longestCommonSubsequence' 0 0 where -- TODO: UArray? xs' :: HMap.Map Int a xs' = foldl update HMap.empty (zip [0..] xs) ys' :: HMap.Map Int a ys' = foldl update HMap.empty (zip [0..] ys) update :: HMap.Map Int a -> (Int, a) -> HMap.Map Int a update hmap (i, a) = HMap.insert i a hmap xsLength :: Int xsLength = length xs ysLength :: Int ysLength = length ys longestCommonSubsequence' :: Int -> Int -> [a] longestCommonSubsequence' = Memo.memo2 Memo.integral Memo.integral longestCommonSubsequence'' longestCommonSubsequence'' :: Int -> Int -> [a] longestCommonSubsequence'' i j | i == xsLength = [] | j == ysLength = [] | x == y = x : longestCommonSubsequence' (i + 1) (j + 1) -- WLOG | length caseX > length caseY = caseX | otherwise = caseY where x :: a x = xs' HMap.! i y :: a y = ys' HMap.! j caseX :: [a] caseX = longestCommonSubsequence' (i + 1) j caseY :: [a] caseY = longestCommonSubsequence' i (j + 1) -- | When `sub` is a (not necessarily contiguous) subsequence of `super`, -- get the index at which each element of `sub` appears. E.g. -- -- > λ subsequenceIndices "abe" "abcdefg" -- > [0,1,4] subsequenceIndices :: (Eq a) => [a] -> [a] -> [Int] subsequenceIndices [] _ = [] subsequenceIndices _ [] = error "`sub` was not a subsequence of `super`" subsequenceIndices sub@(a:sub') super@(b:super') = if a == b then 0 : map succ (subsequenceIndices sub' super') else map succ (subsequenceIndices sub super') -- | When `sub` is a (not necessarily contiguous) subsequence of `super`, -- get the indices at which elements of `sub` do *not* appear. E.g. -- -- > λ nonSubsequenceIndices "abe" "abcdefg" -- > [2,3,5,6] nonSubsequenceIndices :: (Eq a) => [a] -> [a] -> [Int] nonSubsequenceIndices sub super = [0..(length super - 1)] \\ (subsequenceIndices sub super) -- | /O(n)/. `indices` parameter *must* be sorted in increasing order, -- and indices must all exist. Throws an exception if the provided -- list doesn't have those elements at those indices. removeAtIndices :: forall a. (Eq a) => [(Int, a)] -> [a] -> [a] removeAtIndices dels list = if not matches then error $ "Fatal: can't apply this diff to this list." else removeAtIndices' 0 (map fst dels) list where matches :: Bool matches = all (\(i, ch) -> (list !! i) == ch) dels removeAtIndices' :: Int -> [Int] -> [a] -> [a] removeAtIndices' _ [] xs = xs removeAtIndices' curr (i:is) (x:xs) = if curr == i then removeAtIndices' (succ curr) is xs else x : removeAtIndices' (succ curr) (i:is) xs