module Storage.Hashed.Diff where

import Prelude hiding ( read, lookup, filter )
import qualified Data.ByteString.Lazy.Char8 as BL
import Storage.Hashed.Tree
import Storage.Hashed.AnchoredPath
import Data.List.LCS
import Data.List ( groupBy )

unidiff :: Tree -> Tree -> IO BL.ByteString
unidiff l r =
    do (from, to) <- diffTrees l r
       diffs <- sequence $ zipCommonFiles diff from to
       return $ BL.concat diffs
    where diff p a b = do x <- read a
                          y <- read b
                          return $ diff' p x y
          diff' p x y =
              case unifiedDiff x y of
                x' | BL.null x' -> BL.empty
                   | otherwise ->
                       (BL.pack $ "--- " ++ anchorPath "old" p ++ "\n" ++
                              "+++ " ++ anchorPath "new" p ++ "\n")
                       `BL.append` x'

type Line = BL.ByteString
data WeaveLine = Common Line
               | Remove Line
               | Add Line
               | Replace Line Line
               | Skip Int deriving Show

-- | A weave -- two files woven together, with common and differing regions
-- marked up. Cf. 'WeaveLine'.
type Weave = [WeaveLine]

-- | Sort of a sub-weave.
type Hunk = [WeaveLine]

-- | Produce unified diff (in a string form, ie. formatted) from a pair of
-- bytestrings.
unifiedDiff :: BL.ByteString -> BL.ByteString -> BL.ByteString
unifiedDiff a b = printUnified $ concat $ unifiedHunks
    where unifiedHunks = reduceContext 3 $ map unifyHunk $ hunks $ weave a b

-- | Weave two bytestrings. Intermediate data structure for the actual unidiff
-- implementation. No skips are produced.
weave :: BL.ByteString -> BL.ByteString -> Weave
weave a' b' = weave' left common right
    where left = init' (BL.split '\n' a') -- drop trailing newline
          right = init' (BL.split '\n' b') -- drop trailing newline
          init' [] = []
          init' x = init x
          common = lcs left right
          weave' []     []     [] = []
          weave' []     c      [] = error $ "oops: Left & Right empty, Common: " ++ show c
          weave' []     []     (b:bs) = (Add b):weave' [] [] bs
          weave' (a:as) []     [] = (Remove a):weave' as [] []
          weave' (a:as) []     (b:bs) = (Replace a b):weave' as [] bs
          weave' (a:as) (c:cs) (b:bs)
                 | a == c && b == c = (Common a):weave' as cs bs
                 | a == c && b /= c = (Add b):weave' (a:as) (c:cs) bs
                 | a /= c && b == c = (Remove a):weave' as (c:cs) (b:bs)
                 | a /= c && b /= c = (Replace a b):weave' as (c:cs) bs
                 | otherwise = error "oops!"
          weave' a c b = error $ "oops: \nLeft: " ++ show a ++ "\nCommon: " ++ show c ++ "\nRight: " ++ show b

-- | Break up a Weave into hunks.
hunks :: Weave -> [Hunk]
hunks = groupBy grp
    where grp (Common _) (Common _) = True
          grp (Common _) _ = False
          grp _ (Common _) = False
          grp _ _ = True

-- | Reformat a Hunk into a format suitable for unified diff. Replaces are
-- turned into add/remove pairs, all removals in a hunk go before all
-- adds. Hunks of Common lines are left intact. Produces input suitable for
-- reduceContext.
unifyHunk :: Hunk -> Hunk
unifyHunk h = case h of
                (Common _:_) -> h
                _ -> reorder $ concatMap breakup h
    where reorder h' = [ Remove a | Remove a <- h' ] ++ [ Add a | Add a <- h' ]
          breakup (Replace f t) = [Remove f, Add t]
          breakup x = [x]

-- | Break up a 'Weave' into unified hunks, leaving @n@ lines of context around
-- every hunk. Consecutive Common lines not used as context are replaced with
-- Skips.
reduceContext :: Int -> [Hunk] -> [Hunk]
reduceContext n hs =
    case hs of
      [] -> []
      [Common _:_] -> []
      [x] -> [x]
      [h,t] -> reduce 0 n h : reduce n 0 t : []
      (h:rest) -> reduce 0 n h :
                    map (reduce n n) (init rest) ++
                    [reduce n 0 $ last rest]
    where
      reduce s e h@(Common _:_)
          | length h <= s + e = h
          | otherwise = take s h ++
                        [Skip $ length h - e - s ] ++
                        drop (length h - e) h
      reduce _ _ h = h

-- | Format a Weave for printing.
deweave :: Weave -> BL.ByteString
deweave = BL.unlines . map disp
    where disp (Common l) = BL.cons ' ' l
          disp (Remove l) = BL.cons '-' l
          disp (Add l) = BL.cons '+' l
          disp (Replace _ t) = BL.cons '!' t
          disp (Skip n) = BL.pack $ "-- skip " ++ show n ++ " lines --"

-- | Print a "hunked" weave in form of an unified diff. Hunk boundaries are
-- marked up as "Skip" lines. Cf. "reduceContext".
printUnified :: Weave -> BL.ByteString
printUnified hunked = printHunks 1 1 $ groupBy splits hunked
    where splits (Skip _) _ = False
          splits _ (Skip _) = False
          splits _ _ = True
          printHunks _ _ [] = BL.empty
          printHunks l r ([Skip n]:rest) =
              printHunks (n+l) (n+r) rest
          printHunks l r (h:rest) =
              (BL.pack $ "@@ -" ++ show l ++ "," ++
                show (removals h) ++ " +" ++ show r ++
                "," ++ show (adds h) ++ " @@\n")
              `BL.append` deweave h `BL.append`
               printHunks (l + removals h) (r + adds h) rest
          commons h = length [ () | (Common _) <- h ]
          adds h = commons h + length [ () | (Add _) <- h ]
          removals h = commons h + length [ () | (Remove _) <- h ]