{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ScopedTypeVariables #-}

-- | Implementation of the Myers algorithm, from "An O(ND) Difference Algorithm
-- and Its Variations", by Eugene Myers page 6 (figure 2).
--
-- Specification: if
--
--    f1 (InBoth v) = Just v
--    f1 (InFirst v) = Just v
--    f1 (InSecond v) = Nothing
--
-- and
--
--    f2 (InBoth v) = Just v
--    f2 (InFirst v) = Nothing
--    f2 (InSecond v) = Just v
--
-- then
--
--    mapPartial f1 (diff l1 l2) == l1
--
-- and
--
--    mapPartial f2 (diff l1 l2) == l2
module Util.Myers(
diff,
diff2,
DiffElement(..),
) where

import Data.Array

import Data.Array.ST

import Util.ExtendedPrelude

-- -----------------------------------------------------------------------
-- Datatypes
-- -----------------------------------------------------------------------

data DiffElement v =
InBoth [v]
|  InFirst [v]
|  InSecond [v] deriving (Show)

-- -----------------------------------------------------------------------
-- The implementation.  The whole function, apart from the body of diff
-- itself, is taken from a message from Andrew Bromage
-- -----------------------------------------------------------------------

diff :: (Eq a) => [a] -> [a] -> [DiffElement a]
diff l1 l2 =
let
common = lcss l1 l2

addFirst :: [a] -> [DiffElement a] -> [DiffElement a]
addFirst l1 de0 = InFirst l1 : de0

addSecond :: [a] -> [DiffElement a] -> [DiffElement a]
addSecond l1 de0 = InSecond l1 : de0

doCommon :: Eq a => [a] -> [a] -> [a] -> [DiffElement a]
doCommon [] l1 l2 = (addFirst l1) . (addSecond l2) \$ []
doCommon (c:cs) l10 l20 =
let
Just (l1A,l11) = splitToElem (== c) l10
Just (l2A,l21) = splitToElem (== c) l20
de0 = doCommon cs l11 l21
de1 = case de0 of
(InBoth cs:rest) -> InBoth (c:cs):rest
_ -> InBoth [c] : de0
in
in
doCommon common l1 l2

-- stolen from message from Andrew Bromage
algb :: (Eq a) => [a] -> [a] -> [Int]
algb xs ys
= 0 : algb1 xs [ (y,0) | y <- ys ]
where
algb1 [] ys' = map snd ys'
algb1 (x:xs) ys'
= algb1 xs (algb2 0 0 ys')
where
algb2 _ _ [] = []
algb2 k0j1 k1j1 ((y,k0j):ys)
= let kjcurr = if x == y then k0j1+1 else max k1j1 k0j
in (y,kjcurr) : algb2 k0j kjcurr ys

algc :: (Eq a) => Int -> Int -> [a] -> [a] -> [a] -> [a]
algc m n xs []  = id
algc m n [x] ys = if x `elem` ys then (x:) else id
algc m n xs ys
= algc m2 k xs1 (take k ys) . algc (m-m2) (n-k) xs2 (drop k ys)
where
m2 = m `div` 2

xs1 = take m2 xs
xs2 = drop m2 xs

l1 = algb xs1 ys
l2 = reverse (algb (reverse xs2) (reverse ys))

k = findk 0 0 (-1) (zip l1 l2)

findk k km m [] = km
findk k km m ((x,y):xys)
| x+y >= m  = findk (k+1) k  (x+y) xys
| otherwise = findk (k+1) km m     xys

lcss :: (Eq a) => [a] -> [a] -> [a]
lcss xs ys = algc (length xs) (length ys) xs ys []

{- Here, as an appendix, is my slow inefficient version -}
diff2 :: Eq v => [v] -> [v] -> [DiffElement v]
diff2 [] [] = []
diff2 a b = runST (diffST2 a b)

-- NB. diffST does not work if both arguments are null, so that
-- case should be handled separately.
diffST2 :: forall v s . Eq v => [v] -> [v] -> ST s [DiffElement v]
diffST2 a b =
do
let
m = length a
(aArr :: Array Int v) = listArray (1,m) a

n = length b
(bArr :: Array Int v) = listArray (1,n) b

match :: Int -> Int -> Bool
match x y = (aArr ! x) == (bArr ! y)

-- Given (x,y) return the highest (x+k,y+k) such that (x+1,y+1),
-- (x+2,y+2)...(x+k,y+k) match.
scan :: Int -> Int -> (Int,Int)
scan x y =
if x < m && y < n
then
let
x' = x+1
y' = y+1
in
if match x' y' then scan x' y' else (x,y)
else
(x,y)

max = m+n
-- We do the computation using an STArray for V
-- We arrange that there is always a -1 on either side of the
-- existing range, to simplify handling of the end-cases.
(v :: STUArray s Int Int) <- newArray (-max-1,max+1) (-1)
writeArray v 1 0

-- The w array contains a list of integers (x,y) such that the snakes
-- starting from the elements (x+1,y+1) together make up all the snakes
-- needed in the optimal solution.
--
-- The idea is that storage for w should not get too big, either if a
-- and b are much the same, or if they are completely different.  Thus
-- in most cases quadratic behaviour *should* be avoided.
(w :: STArray s Int [(Int,Int)]) <- newArray (-max,max) []

let
-- step carries out the algorithm for a given (d,k), returning
-- the appropriate w-list.
step :: Int -> Int -> ST s [(Int,Int)]
step d k =
if k > d
then
innerStep (d+1) (-(d+1))
else
innerStep d k

innerStep :: Int -> Int -> ST s [(Int,Int)]
innerStep d k =
do
(x,l0) <- if vkminus < vkplus
then
do
return (vkplus,l0)
else
do
return (vkminus+1,l)
let
y = x - k

(x',_) = scan x y

l1 =
if x' == x
then
l0
else
(x,y) : l0

-- Can we finish now?
if x' >= m && (y + (x' - x)) >= n
then
return l1
else
do
writeArray v k x'
writeArray w k l1
step d (k+2)

snakes <- step 0 0

let
-- The task is now to reassemble snakes to produce a list.  Since
-- the snakes are given in reverse order, we may as well produce the
-- elements in that order and work backwards.

-> [DiffElement v] -> [DiffElement v]
-- We assume that elements a[lastX+1...] and b[lastY+1...] have
-- been dealt with, and we now add on a segment starting with a
-- snake which begins at (x+1,y+1).
let
-- Compute the end of the snake
(x',y') = scan x y

l1 = (InSecond (map (\ index -> bArr ! index)
[y'+1..lastY])) : l0
l2 = (InFirst (map (\ index -> aArr ! index)
[x'+1..lastX])) : l1
l3 = (InBoth (map (\ index -> aArr ! index)
[x+1..x'])) : l2
in
l3

doSnakes :: (Int,Int) -> [(Int,Int)] -> [DiffElement v]
-> [DiffElement v]
doSnakes last [] l0 =
-- we pretend there's a zero-length snake starting at (1,1).
if last /= (0,0) then addSnake last (0,0) l0 else l0
doSnakes last (s:ss) l0 =
let
l1 = addSnake last s l0
in
doSnakes s ss l1

result0 = doSnakes (m,n) snakes []

result1 = filter
-- Filter out null elements
(\ de -> case de of
InFirst [] -> False
InSecond [] -> False
InBoth [] -> False
_ -> True
)
result0

return result1
{- -}

-- | This version was posted to the Haskell mailing list by Gertjan Kamsteeg
-- on Sun, 15 Dec 2002.
-- But it seems to be slightly slower than the others.
{-

data In a = F a | S a | B a deriving Show

diff xs ys = steps ([(0,0,[],xs,ys)],[]) where
steps (((_,_,ws,[],[]):_),_) = reverse ws
steps d                      = steps (step d) where
step (ps,qs) = let (us,vs) = h1 ps in (h3 qs (h2 us),vs) where
h1 []     = ([],[])
h1 (p:ps) = let (rs,ss) = next p; (us,vs) = h1 ps in (rs++us,ss++vs)
where
next (k,n,ws,(x:xs),[])           = ([(k+1,n+1,F x:ws,xs,[])],[])
next (k,n,ws,[],(y:ys))           = ([(k-1,n+1,S y:ws,[],ys)],[])
next (k,n,ws,xs@(x:us),ys@(y:vs))
| x == y    = ([],[(k,n+1,B x:ws,us,vs)])
| otherwise = ([(k+1,n+1,F x:ws,us,ys),(k-1,n+1,S y:ws,xs,vs)],[])
h2 []                                   = []
h2 ps@[_]                               = ps
h2 (p@(k1,n1,_,_,_):ps@(q@(k2,n2,_,_,_):us))
| k1 == k2  = if n1 <= n2 then p:h2 us else q:h2 us
| otherwise = p:h2 ps
h3 ps [] = ps
h3 [] qs = qs
h3 (ps@(p@(k1,n1,_,_,_):us)) (qs@(q@(k2,n2,_,_,_):vs))
| k1 > k2   = p:h3 us qs
| k1 == k2  = if n1 <= n2 then p:h3 us vs else q:h3 us vs
| otherwise = q:h3 ps vs
-}