-- |
-- Module      : Data.Git.Diff
-- License     : BSD-style
-- Maintainer  : Nicolas DI PRIMA <nicolas@di-prima.fr>
-- Stability   : experimental
-- Portability : unix
--
-- Basic Git diff methods.
--

module Data.Git.Diff
    (
    -- * Basic features
      BlobContent(..)
    , BlobState(..)
    , BlobStateDiff(..)
    , getDiffWith
    -- * Default helpers
    , HitDiff(..)
    , HitFileContent(..)
    , FilteredDiff(..)
    , HitFileRef(..)
    , HitFileMode(..)
    , TextLine(..)
    , defaultDiff
    , getDiff
    ) where

import Data.List (find, filter)
import Data.Char (ord)
import Data.Git
import Data.Git.Repository
import Data.Git.Storage
import Data.Git.Storage.Object
import Data.ByteString.Lazy.Char8 as L
import Data.ByteString.Char8 as BS

import Data.Algorithm.Patience as AP (Item(..), diff)

-- | represents a blob's content (i.e., the content of a file at a given
-- reference).
data BlobContent = FileContent [L.ByteString] -- ^ Text file's lines
                 | BinaryContent L.ByteString -- ^ Binary content
    deriving (Show)

-- | This is a blob description at a given state (revision)
data BlobState = BlobState
    { bsFilename :: BS.ByteString
    , bsMode     :: ModePerm
    , bsRef      :: Ref
    , bsContent  :: BlobContent
    }
    deriving (Show)

-- | Two 'BlobState' are equal if they have the same filename, i.e.,
--
-- > ((BlobState x _ _ _) == (BlobState y _ _ _)) = (x == y)
instance Eq BlobState where
    (BlobState f1 _ _ _) == (BlobState f2 _ _ _) = (f2 == f1)
    a /= b = not (a == b)

-- | Represents a file state between two revisions
-- A file (a blob) can be present in the first Tree's revision but not in the
-- second one, then it has been deleted. If only in the second Tree's revision,
-- then it has been created. If it is in the both, maybe it has been changed.
data BlobStateDiff = OnlyOld   BlobState
                   | OnlyNew   BlobState
                   | OldAndNew BlobState BlobState

buildListForDiff :: Git -> Ref -> IO [BlobState]
buildListForDiff git ref = do
    commit <- getCommit git ref
    tree   <- resolveTreeish git $ commitTreeish commit
    case tree of
        Just t -> do htree <- buildHTree git t
                     buildTreeList htree (BS.empty)
        _      -> error "cannot build a tree from this reference"
    where
        buildTreeList :: HTree -> BS.ByteString -> IO [BlobState]
        buildTreeList [] _ = return []
        buildTreeList ((d,n,TreeFile r):xs)  pathPrefix = do
            content <- catBlobFile r
            let isABinary = isBinaryFile content
            listTail <- buildTreeList xs pathPrefix
            case isABinary of
                False -> return $ (BlobState (BS.append pathPrefix n) d r (FileContent $ L.lines content)) : listTail
                True  -> return $ (BlobState (BS.append pathPrefix n) d r (BinaryContent content)) : listTail
        buildTreeList ((_,n,TreeDir _ subTree):xs) pathPrefix = do
            l1 <- buildTreeList xs      pathPrefix
            l2 <- buildTreeList subTree (BS.concat [pathPrefix, n, BS.pack "/"])
            return $ l1 ++ l2

        catBlobFile :: Ref -> IO L.ByteString
        catBlobFile blobRef = do
            mobj <- getObjectRaw git blobRef True
            case mobj of
                Nothing  -> error "not a valid object"
                Just obj -> return $ oiData obj
        getBinaryStat :: L.ByteString -> Double
        getBinaryStat bs = L.foldl' (\acc w -> acc + if isBin $ ord w then 1 else 0) 0 bs / (fromIntegral $ L.length bs)
            where
                isBin :: Int -> Bool
                isBin i
                    | i >= 0 && i <= 8   = True
                    | i == 12            = True
                    | i >= 14 && i <= 31 = True
                    | otherwise          = False

        isBinaryFile :: L.ByteString -> Bool
        isBinaryFile file = let bs = L.take 512 file
                            in  getBinaryStat bs > 0.0

-- | generate a diff list between two revisions with a given diff helper.
--
-- Useful to extract any kind of information from two different revisions.
-- For example you can get the number of deleted files:
--
-- > getdiffwith f 0 head^ head git
-- >     where f (OnlyOld _) acc = acc+1
-- >           f _           acc = acc
--
-- Or save the list of new files:
--
-- > getdiffwith f [] head^ head git
-- >     where f (OnlyNew bs) acc = (bsFilename bs):acc
-- >           f _            acc = acc
getDiffWith :: (BlobStateDiff -> a -> a) -- ^ diff helper (State -> accumulator -> accumulator)
            -> a                         -- ^ accumulator
            -> Ref                       -- ^ commit reference (the original state)
            -> Ref                       -- ^ commit reference (the new state)
            -> Git                       -- ^ repository
            -> IO a
getDiffWith f acc ref1 ref2 git = do
    commit1 <- buildListForDiff git ref1
    commit2 <- buildListForDiff git ref2
    return $ Prelude.foldr f acc $ doDiffWith commit1 commit2
    where
        doDiffWith :: [BlobState] -> [BlobState] -> [BlobStateDiff]
        doDiffWith []        []        = []
        doDiffWith [bs1]     []        = [OnlyOld bs1]
        doDiffWith []        (bs2:xs2) = (OnlyNew bs2):(doDiffWith [] xs2)
        doDiffWith (bs1:xs1) xs2       =
            let bs2Maybe = Data.List.find (\x -> x == bs1) xs2
            in  case bs2Maybe of
                    Just bs2 -> let subxs2 = Data.List.filter (\x -> x /= bs2) xs2
                                in  (OldAndNew bs1 bs2):(doDiffWith xs1 subxs2)
                    Nothing  -> (OnlyOld bs1):(doDiffWith xs1 xs2)

data TextLine = TextLine
    { lineNumber  :: Integer
    , lineContent :: L.ByteString
    }
instance Eq TextLine where
  a == b = (lineContent a) == (lineContent b)
  a /= b = not (a == b)
instance Ord TextLine where
  compare a b = compare (lineContent a) (lineContent b)
  a <  b     = (lineContent a) < (lineContent b)
  a <= b     = (lineContent a) <= (lineContent b)
  a >  b     = b < a
  a >= b     = b <= a

data FilteredDiff = NormalLine (Item TextLine) | Separator

data HitFileContent = NewBinaryFile
                    | OldBinaryFile
                    | NewTextFile  [TextLine]
                    | OldTextFile  [TextLine]
                    | ModifiedBinaryFile
                    | ModifiedFile [FilteredDiff]
                    | UnModifiedFile

data HitFileMode = NewMode        ModePerm
                 | OldMode        ModePerm
                 | ModifiedMode   ModePerm ModePerm
                 | UnModifiedMode ModePerm

data HitFileRef = NewRef        Ref
                | OldRef        Ref
                | ModifiedRef   Ref Ref
                | UnModifiedRef Ref

-- | This is a proposed diff records for a given file.
-- It contains useful information:
--   * the filename (with its path in the root project)
--   * a file diff (with the Data.Algorythm.Patience method)
--   * the file's mode (i.e. the file priviledge)
--   * the file's ref
data HitDiff = HitDiff
    { hFileName    :: BS.ByteString
    , hFileContent :: HitFileContent
    , hFileMode    :: HitFileMode
    , hFileRef     :: HitFileRef
    }

-- | A default Diff getter which returns all diff information (Mode, Content
-- and Binary) with a context of 5 lines.
--
-- > getDiff = getDiffWith (defaultDiff 5) []
getDiff :: Ref
        -> Ref
        -> Git
        -> IO [HitDiff]
getDiff = getDiffWith (defaultDiff 5) []

-- | A default diff helper. It is an example about how you can write your own
-- diff helper or you can use it if you want to get all of differences.
defaultDiff :: Int           -- ^ Number of line for context
            -> BlobStateDiff
            -> [HitDiff]     -- ^ Accumulator
            -> [HitDiff]     -- ^ Accumulator with a new content
defaultDiff _ (OnlyOld   old    ) acc =
    let oldMode    = OldMode (bsMode old)
        oldRef     = OldRef  (bsRef  old)
        oldContent = case bsContent old of
                         BinaryContent _ -> OldBinaryFile
                         FileContent   l -> OldTextFile (Prelude.zipWith TextLine [1..] l)
    in (HitDiff (bsFilename old) oldContent oldMode oldRef):acc
defaultDiff _ (OnlyNew       new) acc =
    let newMode    = NewMode (bsMode new)
        newRef     = NewRef  (bsRef  new)
        newContent = case bsContent new of
                         BinaryContent _ -> NewBinaryFile
                         FileContent   l -> NewTextFile (Prelude.zipWith TextLine [1..] l)
    in (HitDiff (bsFilename new) newContent newMode newRef):acc
defaultDiff context (OldAndNew old new) acc =
    let mode = if (bsMode old) /= (bsMode new) then ModifiedMode (bsMode old) (bsMode new)
                                               else UnModifiedMode (bsMode new)
        ref = if (bsRef old) == (bsRef new) then UnModifiedRef (bsRef new)
                                            else ModifiedRef (bsRef old) (bsRef new)
    in case (mode, ref) of
           ((UnModifiedMode _), (UnModifiedRef _)) -> acc
           _ -> (HitDiff (bsFilename new) (content ref) mode ref):acc
    where content :: HitFileRef -> HitFileContent
          content (UnModifiedRef _) = UnModifiedFile
          content _                 = createDiff (bsContent old) (bsContent new)

          createDiff :: BlobContent -> BlobContent -> HitFileContent
          createDiff (FileContent a) (FileContent b) =
              let linesA = Prelude.zipWith TextLine [1..] a
                  linesB = Prelude.zipWith TextLine [1..] b
              in ModifiedFile $ diffGetContext context (diff linesA linesB)
          createDiff _ _ = ModifiedBinaryFile

-- Used by diffGetContext
data HitwebAccu = AccuBottom | AccuTop

-- Context filter
diffGetContext :: Int -> [Item TextLine] -> [FilteredDiff]
diffGetContext 0 list = fmap NormalLine list
diffGetContext context list =
    let (_, _, filteredDiff) = Prelude.foldr filterContext (0, AccuBottom, []) list
        theList = removeTrailingBoth filteredDiff
    in case Prelude.head theList of
        (NormalLine (Both l1 _)) -> if (lineNumber l1) > 1 then Separator:theList
                                                           else theList
        _ -> theList
    where -- only keep 'context'. The file is annalyzed from the bottom to the top.
          -- The accumulator here is a tuple3 with (the line counter, the
          -- direction and the list of elements)
          filterContext :: (Item TextLine) -> (Int, HitwebAccu, [FilteredDiff]) -> (Int, HitwebAccu, [FilteredDiff])
          filterContext (Both l1 l2) (c, AccuBottom, acc) =
              if c < context then (c+1, AccuBottom, (NormalLine (Both l1 l2)):acc)
                             else (c  , AccuBottom, (NormalLine (Both l1 l2))
                                                    :((Prelude.take (context-1) acc)
                                                    ++ [Separator]
                                                    ++ (Prelude.drop (context+1) acc)))
          filterContext (Both l1 l2) (c, AccuTop, acc) =
              if c < context then (c+1, AccuTop   , (NormalLine (Both l1 l2)):acc)
                             else (0  , AccuBottom, (NormalLine (Both l1 l2)):acc)
          filterContext element (_, _, acc) =
              (0, AccuTop, (NormalLine element):acc)

          startWithSeparator :: [FilteredDiff] -> Bool
          startWithSeparator [] = False
          startWithSeparator (Separator:_) = True
          startWithSeparator ((NormalLine l):xs) =
              case l of
                  (Both _ _) -> startWithSeparator xs
                  _          -> False

          removeTrailingBoth :: [FilteredDiff] -> [FilteredDiff]
          removeTrailingBoth diffList =
              let test = startWithSeparator diffList
              in  if test then Prelude.tail $ Prelude.dropWhile (\a -> not $ startWithSeparator [a]) diffList
                          else diffList