-- | -- Module : Data.Git.Diff -- License : BSD-style -- Maintainer : Nicolas DI PRIMA -- Stability : experimental -- Portability : unix -- -- Basic Git diff methods. -- module Data.Git.Diff ( -- * Basic features BlobContent(..) , BlobState(..) , BlobStateDiff(..) , getDiffWith -- * Default helpers , HitDiffContent(..) , HitDiff(..) , defaultDiff , getDiff ) where import Control.Applicative ((<$>)) import Data.List (find, filter) import Data.Char (ord) import Data.Git import Data.Git.Repository import Data.Git.Storage import Data.Git.Storage.Object import Data.ByteString.Lazy.Char8 as L import Data.ByteString.Char8 as BS import Data.Algorithm.Patience as AP (Item(..), diff) -- | represents a blob's content (i.e., the content of a file at a given -- reference). data BlobContent = FileContent [L.ByteString] -- ^ Text file's lines | BinaryContent L.ByteString -- ^ Binary content deriving (Show) -- | This is a blob description at a given state (revision) data BlobState = BlobState { bsFilename :: BS.ByteString , bsMode :: Int , bsRef :: Ref , bsContent :: BlobContent } deriving (Show) -- | Two 'BlobState' are equal if they have the same filename, i.e., -- -- > ((BlobState x _ _ _) == (BlobState y _ _ _)) = (x == y) instance Eq BlobState where (BlobState f1 _ _ _) == (BlobState f2 _ _ _) = (f2 == f1) a /= b = not (a == b) -- | Represents a file state between two revisions -- A file (a blob) can be present in the first Tree's revision but not in the -- second one, then it has been deleted. If only in the second Tree's revision, -- then it has been created. If it is in the both, maybe it has been changed. data BlobStateDiff = OnlyOld BlobState | OnlyNew BlobState | OldAndNew BlobState BlobState getBinaryStat :: L.ByteString -> Double getBinaryStat bs = L.foldl' (\acc w -> acc + if isBin $ ord w then 1 else 0) 0 bs / (fromIntegral $ L.length bs) where isBin :: Int -> Bool isBin i | i >= 0 && i <= 8 = True | i == 12 = True | i >= 14 && i <= 31 = True | otherwise = False isBinaryFile :: L.ByteString -> Bool isBinaryFile file = let bs = L.take 512 file in getBinaryStat bs > 0.0 buildListForDiff :: Git -> Ref -> IO [BlobState] buildListForDiff git ref = do commit <- getCommit git ref tree <- resolveTreeish git $ commitTreeish commit case tree of Just t -> do htree <- buildHTree git t buildTreeList htree (BS.empty) _ -> error "cannot build a tree from this reference" where buildTreeList :: HTree -> BS.ByteString -> IO [BlobState] buildTreeList [] _ = return [] buildTreeList ((d,n,TreeFile r):xs) pathPrefix = do content <- catBlobFile r let isABinary = isBinaryFile content listTail <- buildTreeList xs pathPrefix case isABinary of False -> return $ (BlobState (BS.append pathPrefix n) d r (FileContent $ L.lines content)) : listTail True -> return $ (BlobState (BS.append pathPrefix n) d r (BinaryContent content)) : listTail buildTreeList ((_,n,TreeDir _ subTree):xs) pathPrefix = do l1 <- buildTreeList xs pathPrefix l2 <- buildTreeList subTree (BS.concat [pathPrefix, n, BS.pack "/"]) return $ l1 ++ l2 catBlobFile :: Ref -> IO L.ByteString catBlobFile blobRef = do mobj <- getObjectRaw git blobRef True case mobj of Nothing -> error "not a valid object" Just obj -> return $ oiData obj -- | generate a diff list between two revisions with a given diff helper. -- -- Useful to extract any kind of information from two different revisions. -- For example you can get the number of deleted files: -- -- > getdiffwith f 0 head^ head git -- > where f (OnlyOld _) acc = acc+1 -- > f _ acc = acc -- -- Or save the list of new files: -- -- > getdiffwith f [] head^ head git -- > where f (OnlyNew bs) acc = (bsFilename bs):acc -- > f _ acc = acc getDiffWith :: (BlobStateDiff -> a -> a) -- ^ diff helper (State -> accumulator -> accumulator) -> a -- ^ accumulator -> Ref -- ^ commit reference (the original state) -> Ref -- ^ commit reference (the new state) -> Git -- ^ repository -> IO a getDiffWith f acc ref1 ref2 git = do commit1 <- buildListForDiff git ref1 commit2 <- buildListForDiff git ref2 return $ Prelude.foldr f acc $ doDiffWith commit1 commit2 where doDiffWith :: [BlobState] -> [BlobState] -> [BlobStateDiff] doDiffWith [] [] = [] doDiffWith [bs1] [] = [OnlyOld bs1] doDiffWith [] (bs2:xs2) = (OnlyNew bs2):(doDiffWith [] xs2) doDiffWith (bs1:xs1) xs2 = let bs2Maybe = Data.List.find (\x -> x == bs1) xs2 in case bs2Maybe of Just bs2 -> let subxs2 = Data.List.filter (\x -> x /= bs2) xs2 in (OldAndNew bs1 bs2):(doDiffWith xs1 subxs2) Nothing -> (OnlyOld bs1):(doDiffWith xs1 xs2) -- | This is an example of how you can use Hit to get all of information -- between different revision. data HitDiffContent = HitDiffAddition BlobState | HitDiffDeletion BlobState | HitDiffChange [AP.Item L.ByteString] | HitDiffBinChange | HitDiffMode Int Int | HitDiffRefs Ref Ref deriving (Show) -- | This represents a diff. data HitDiff = HitDiff { hitFilename :: BS.ByteString , hitDiff :: [HitDiffContent] } deriving (Show) -- | A default Diff getter which returns all diff information (Mode, Content -- and Binary). -- -- > getDiff = getDiffWith defaultDiff getDiff :: Ref -- ^ commit ref -> Ref -- ^ commit ref -> Git -- ^ repository -> IO [HitDiff] getDiff = getDiffWith defaultDiff [] -- | A default diff helper. It is an example about how you can write your own -- diff helper or you can use it if you want to get all of differences. defaultDiff :: BlobStateDiff -> [HitDiff] -> [HitDiff] defaultDiff (OnlyOld old ) acc = (HitDiff (bsFilename old) ([HitDiffDeletion old])):acc defaultDiff (OnlyNew new) acc = (HitDiff (bsFilename new) ([HitDiffAddition new])):acc defaultDiff (OldAndNew old new) acc = let theDiffMode = if (bsMode old) == (bsMode new) then [] else [HitDiffMode (bsMode old) (bsMode new)] in case ((bsRef old) == (bsRef new)) of -- If the reference is the same, then there is no difference True -> if Prelude.null theDiffMode then acc else (HitDiff (bsFilename old) theDiffMode):acc False -> let theDiff = createANewDiff (bsContent old) (bsContent new) in if (onlyBothDiff $ Prelude.head theDiff) then (HitDiff (bsFilename old) ((HitDiffRefs (bsRef old) (bsRef new)):theDiffMode)):acc else (HitDiff (bsFilename old) ( theDiff ++ ((HitDiffRefs (bsRef old) (bsRef new)):theDiffMode))):acc where createANewDiff :: BlobContent -> BlobContent -> [HitDiffContent] createANewDiff (FileContent a) (FileContent b) = [HitDiffChange (diff a b)] createANewDiff (BinaryContent a) (BinaryContent b) = if a /= b then [HitDiffBinChange] else [] createANewDiff _ _ = [HitDiffBinChange] onlyBothDiff :: HitDiffContent -> Bool onlyBothDiff (HitDiffBinChange) = False onlyBothDiff (HitDiffChange l) = Prelude.all predicate l where predicate (Both _ _) = True predicate _ = False onlyBothDiff _ = True