{-# LANGUAGE DeriveFunctor #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Algorithm.Diff
-- Copyright   :  (c) Sterling Clover 2008-2011, Kevin Charter 2011
-- License     :  BSD 3 Clause
-- Maintainer  :  s.clover@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- This is an implementation of the O(ND) diff algorithm as described in
-- \"An O(ND) Difference Algorithm and Its Variations (1986)\"
-- <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927>. It is O(mn) in space.
-- The algorithm is the same one used by standared Unix diff.
-----------------------------------------------------------------------------

module Data.Algorithm.Diff
    ( Diff(..)
    -- * Comparing lists for differences
    , getDiff
    , getDiffBy

    -- * Finding chunks of differences
    , getGroupedDiff
    , getGroupedDiffBy
    ) where

import Prelude hiding (pi)

import Data.Array

data DI = F | S | B deriving (Int -> DI -> ShowS
[DI] -> ShowS
DI -> String
(Int -> DI -> ShowS)
-> (DI -> String) -> ([DI] -> ShowS) -> Show DI
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DI] -> ShowS
$cshowList :: [DI] -> ShowS
show :: DI -> String
$cshow :: DI -> String
showsPrec :: Int -> DI -> ShowS
$cshowsPrec :: Int -> DI -> ShowS
Show, DI -> DI -> Bool
(DI -> DI -> Bool) -> (DI -> DI -> Bool) -> Eq DI
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DI -> DI -> Bool
$c/= :: DI -> DI -> Bool
== :: DI -> DI -> Bool
$c== :: DI -> DI -> Bool
Eq)

-- | A value is either from the 'First' list, the 'Second' or from 'Both'.
-- 'Both' contains both the left and right values, in case you are using a form
-- of equality that doesn't check all data (for example, if you are using a
-- newtype to only perform equality on side of a tuple).
data Diff a = First a | Second a | Both a a deriving (Int -> Diff a -> ShowS
[Diff a] -> ShowS
Diff a -> String
(Int -> Diff a -> ShowS)
-> (Diff a -> String) -> ([Diff a] -> ShowS) -> Show (Diff a)
forall a. Show a => Int -> Diff a -> ShowS
forall a. Show a => [Diff a] -> ShowS
forall a. Show a => Diff a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Diff a] -> ShowS
$cshowList :: forall a. Show a => [Diff a] -> ShowS
show :: Diff a -> String
$cshow :: forall a. Show a => Diff a -> String
showsPrec :: Int -> Diff a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Diff a -> ShowS
Show, Diff a -> Diff a -> Bool
(Diff a -> Diff a -> Bool)
-> (Diff a -> Diff a -> Bool) -> Eq (Diff a)
forall a. Eq a => Diff a -> Diff a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Diff a -> Diff a -> Bool
$c/= :: forall a. Eq a => Diff a -> Diff a -> Bool
== :: Diff a -> Diff a -> Bool
$c== :: forall a. Eq a => Diff a -> Diff a -> Bool
Eq, a -> Diff b -> Diff a
(a -> b) -> Diff a -> Diff b
(forall a b. (a -> b) -> Diff a -> Diff b)
-> (forall a b. a -> Diff b -> Diff a) -> Functor Diff
forall a b. a -> Diff b -> Diff a
forall a b. (a -> b) -> Diff a -> Diff b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: a -> Diff b -> Diff a
$c<$ :: forall a b. a -> Diff b -> Diff a
fmap :: (a -> b) -> Diff a -> Diff b
$cfmap :: forall a b. (a -> b) -> Diff a -> Diff b
Functor)

data DL = DL {DL -> Int
poi :: !Int, DL -> Int
poj :: !Int, DL -> [DI]
path::[DI]} deriving (Int -> DL -> ShowS
[DL] -> ShowS
DL -> String
(Int -> DL -> ShowS)
-> (DL -> String) -> ([DL] -> ShowS) -> Show DL
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DL] -> ShowS
$cshowList :: [DL] -> ShowS
show :: DL -> String
$cshow :: DL -> String
showsPrec :: Int -> DL -> ShowS
$cshowsPrec :: Int -> DL -> ShowS
Show, DL -> DL -> Bool
(DL -> DL -> Bool) -> (DL -> DL -> Bool) -> Eq DL
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DL -> DL -> Bool
$c/= :: DL -> DL -> Bool
== :: DL -> DL -> Bool
$c== :: DL -> DL -> Bool
Eq)

instance Ord DL
        where DL
x <= :: DL -> DL -> Bool
<= DL
y = if DL -> Int
poi DL
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== DL -> Int
poi DL
y
                then  DL -> Int
poj DL
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> DL -> Int
poj DL
y
                else DL -> Int
poi DL
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= DL -> Int
poi DL
y

canDiag :: (a -> a -> Bool) -> [a] -> [a] -> Int -> Int -> Int -> Int -> Bool
canDiag :: (a -> a -> Bool) -> [a] -> [a] -> Int -> Int -> Int -> Int -> Bool
canDiag a -> a -> Bool
eq [a]
as [a]
bs Int
lena Int
lenb = \ Int
i Int
j ->
   if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lena Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lenb then (Array Int a
arAs Array Int a -> Int -> a
forall i e. Ix i => Array i e -> i -> e
! Int
i) a -> a -> Bool
`eq` (Array Int a
arBs Array Int a -> Int -> a
forall i e. Ix i => Array i e -> i -> e
! Int
j) else Bool
False
    where arAs :: Array Int a
arAs = (Int, Int) -> [a] -> Array Int a
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0,Int
lena Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) [a]
as
          arBs :: Array Int a
arBs = (Int, Int) -> [a] -> Array Int a
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0,Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) [a]
bs

dstep :: (Int -> Int -> Bool) -> [DL] -> [DL]
dstep :: (Int -> Int -> Bool) -> [DL] -> [DL]
dstep Int -> Int -> Bool
cd [DL]
dls = DL
hdDL -> [DL] -> [DL]
forall a. a -> [a] -> [a]
:[DL] -> [DL]
forall a. Ord a => [a] -> [a]
pairMaxes [DL]
rst
  where (DL
hd:[DL]
rst) = [DL] -> [DL]
nextDLs [DL]
dls
        nextDLs :: [DL] -> [DL]
nextDLs [] = []
        nextDLs (DL
dl:[DL]
rest) = DL
dl'DL -> [DL] -> [DL]
forall a. a -> [a] -> [a]
:DL
dl''DL -> [DL] -> [DL]
forall a. a -> [a] -> [a]
:[DL] -> [DL]
nextDLs [DL]
rest
          where dl' :: DL
dl'  = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd (DL -> DL) -> DL -> DL
forall a b. (a -> b) -> a -> b
$ DL
dl {poi :: Int
poi=DL -> Int
poi DL
dl Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path=(DI
F DI -> [DI] -> [DI]
forall a. a -> [a] -> [a]
: [DI]
pdl)}
                dl'' :: DL
dl'' = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd (DL -> DL) -> DL -> DL
forall a b. (a -> b) -> a -> b
$ DL
dl {poj :: Int
poj=DL -> Int
poj DL
dl Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path=(DI
S DI -> [DI] -> [DI]
forall a. a -> [a] -> [a]
: [DI]
pdl)}
                pdl :: [DI]
pdl = DL -> [DI]
path DL
dl
        pairMaxes :: [a] -> [a]
pairMaxes [] = []
        pairMaxes [a
x] = [a
x]
        pairMaxes (a
x:a
y:[a]
rest) = a -> a -> a
forall a. Ord a => a -> a -> a
max a
x a
ya -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a] -> [a]
pairMaxes [a]
rest

addsnake :: (Int -> Int -> Bool) -> DL -> DL
addsnake :: (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd DL
dl
    | Int -> Int -> Bool
cd Int
pi Int
pj = (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd (DL -> DL) -> DL -> DL
forall a b. (a -> b) -> a -> b
$
                 DL
dl {poi :: Int
poi = Int
pi Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, poj :: Int
poj = Int
pj Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, path :: [DI]
path=(DI
B DI -> [DI] -> [DI]
forall a. a -> [a] -> [a]
: DL -> [DI]
path DL
dl)}
    | Bool
otherwise   = DL
dl
    where pi :: Int
pi = DL -> Int
poi DL
dl; pj :: Int
pj = DL -> Int
poj DL
dl

lcs :: (a -> a -> Bool) -> [a] -> [a] -> [DI]
lcs :: (a -> a -> Bool) -> [a] -> [a] -> [DI]
lcs a -> a -> Bool
eq [a]
as [a]
bs = DL -> [DI]
path (DL -> [DI]) -> (DL -> DL) -> DL -> [DI]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [DL] -> DL
forall a. [a] -> a
head ([DL] -> DL) -> (DL -> [DL]) -> DL -> DL
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DL -> Bool) -> [DL] -> [DL]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (\DL
dl -> DL -> Int
poi DL
dl Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
lena Bool -> Bool -> Bool
|| DL -> Int
poj DL
dl Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
lenb) ([DL] -> [DL]) -> (DL -> [DL]) -> DL -> [DL]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
            [[DL]] -> [DL]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[DL]] -> [DL]) -> (DL -> [[DL]]) -> DL -> [DL]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([DL] -> [DL]) -> [DL] -> [[DL]]
forall a. (a -> a) -> a -> [a]
iterate ((Int -> Int -> Bool) -> [DL] -> [DL]
dstep Int -> Int -> Bool
cd) ([DL] -> [[DL]]) -> (DL -> [DL]) -> DL -> [[DL]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DL -> [DL] -> [DL]
forall a. a -> [a] -> [a]
:[]) (DL -> [DL]) -> (DL -> DL) -> DL -> [DL]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> Bool) -> DL -> DL
addsnake Int -> Int -> Bool
cd (DL -> [DI]) -> DL -> [DI]
forall a b. (a -> b) -> a -> b
$
            DL :: Int -> Int -> [DI] -> DL
DL {poi :: Int
poi=Int
0,poj :: Int
poj=Int
0,path :: [DI]
path=[]}
            where cd :: Int -> Int -> Bool
cd = (a -> a -> Bool) -> [a] -> [a] -> Int -> Int -> Int -> Int -> Bool
forall a.
(a -> a -> Bool) -> [a] -> [a] -> Int -> Int -> Int -> Int -> Bool
canDiag a -> a -> Bool
eq [a]
as [a]
bs Int
lena Int
lenb
                  lena :: Int
lena = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
as; lenb :: Int
lenb = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
bs

-- | Takes two lists and returns a list of differences between them. This is
-- 'getDiffBy' with '==' used as predicate.
getDiff :: (Eq t) => [t] -> [t] -> [Diff t]
getDiff :: [t] -> [t] -> [Diff t]
getDiff = (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
forall t. (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
getDiffBy t -> t -> Bool
forall a. Eq a => a -> a -> Bool
(==)

-- | Takes two lists and returns a list of differences between them, grouped
-- into chunks. This is 'getGroupedDiffBy' with '==' used as predicate.
getGroupedDiff :: (Eq t) => [t] -> [t] -> [Diff [t]]
getGroupedDiff :: [t] -> [t] -> [Diff [t]]
getGroupedDiff = (t -> t -> Bool) -> [t] -> [t] -> [Diff [t]]
forall t. (t -> t -> Bool) -> [t] -> [t] -> [Diff [t]]
getGroupedDiffBy t -> t -> Bool
forall a. Eq a => a -> a -> Bool
(==)

-- | A form of 'getDiff' with no 'Eq' constraint. Instead, an equality predicate
-- is taken as the first argument.
getDiffBy :: (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
getDiffBy :: (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
getDiffBy t -> t -> Bool
eq [t]
a [t]
b = [t] -> [t] -> [DI] -> [Diff t]
forall a. [a] -> [a] -> [DI] -> [Diff a]
markup [t]
a [t]
b ([DI] -> [Diff t]) -> ([DI] -> [DI]) -> [DI] -> [Diff t]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [DI] -> [DI]
forall a. [a] -> [a]
reverse ([DI] -> [Diff t]) -> [DI] -> [Diff t]
forall a b. (a -> b) -> a -> b
$ (t -> t -> Bool) -> [t] -> [t] -> [DI]
forall a. (a -> a -> Bool) -> [a] -> [a] -> [DI]
lcs t -> t -> Bool
eq [t]
a [t]
b
    where markup :: [a] -> [a] -> [DI] -> [Diff a]
markup (a
x:[a]
xs)   [a]
ys   (DI
F:[DI]
ds) = a -> Diff a
forall a. a -> Diff a
First a
x  Diff a -> [Diff a] -> [Diff a]
forall a. a -> [a] -> [a]
: [a] -> [a] -> [DI] -> [Diff a]
markup [a]
xs [a]
ys [DI]
ds
          markup   [a]
xs   (a
y:[a]
ys) (DI
S:[DI]
ds) = a -> Diff a
forall a. a -> Diff a
Second a
y Diff a -> [Diff a] -> [Diff a]
forall a. a -> [a] -> [a]
: [a] -> [a] -> [DI] -> [Diff a]
markup [a]
xs [a]
ys [DI]
ds
          markup (a
x:[a]
xs) (a
y:[a]
ys) (DI
B:[DI]
ds) = a -> a -> Diff a
forall a. a -> a -> Diff a
Both a
x a
y Diff a -> [Diff a] -> [Diff a]
forall a. a -> [a] -> [a]
: [a] -> [a] -> [DI] -> [Diff a]
markup [a]
xs [a]
ys [DI]
ds
          markup [a]
_ [a]
_ [DI]
_ = []

getGroupedDiffBy :: (t -> t -> Bool) -> [t] -> [t] -> [Diff [t]]
getGroupedDiffBy :: (t -> t -> Bool) -> [t] -> [t] -> [Diff [t]]
getGroupedDiffBy t -> t -> Bool
eq [t]
a [t]
b = [Diff t] -> [Diff [t]]
forall a. [Diff a] -> [Diff [a]]
go ([Diff t] -> [Diff [t]]) -> [Diff t] -> [Diff [t]]
forall a b. (a -> b) -> a -> b
$ (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
forall t. (t -> t -> Bool) -> [t] -> [t] -> [Diff t]
getDiffBy t -> t -> Bool
eq [t]
a [t]
b
    where go :: [Diff a] -> [Diff [a]]
go (First a
x  : [Diff a]
xs) = let ([a]
fs, [Diff a]
rest) = [Diff a] -> ([a], [Diff a])
forall a. [Diff a] -> ([a], [Diff a])
goFirsts  [Diff a]
xs in [a] -> Diff [a]
forall a. a -> Diff a
First  (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
fs)     Diff [a] -> [Diff [a]] -> [Diff [a]]
forall a. a -> [a] -> [a]
: [Diff a] -> [Diff [a]]
go [Diff a]
rest
          go (Second a
x : [Diff a]
xs) = let ([a]
fs, [Diff a]
rest) = [Diff a] -> ([a], [Diff a])
forall a. [Diff a] -> ([a], [Diff a])
goSeconds [Diff a]
xs in [a] -> Diff [a]
forall a. a -> Diff a
Second (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
fs)     Diff [a] -> [Diff [a]] -> [Diff [a]]
forall a. a -> [a] -> [a]
: [Diff a] -> [Diff [a]]
go [Diff a]
rest
          go (Both a
x a
y : [Diff a]
xs) = let ([(a, a)]
fs, [Diff a]
rest) = [Diff a] -> ([(a, a)], [Diff a])
forall b. [Diff b] -> ([(b, b)], [Diff b])
goBoth    [Diff a]
xs
                                   ([a]
fxs, [a]
fys) = [(a, a)] -> ([a], [a])
forall a b. [(a, b)] -> ([a], [b])
unzip [(a, a)]
fs
                               in [a] -> [a] -> Diff [a]
forall a. a -> a -> Diff a
Both (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
fxs) (a
ya -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
fys) Diff [a] -> [Diff [a]] -> [Diff [a]]
forall a. a -> [a] -> [a]
: [Diff a] -> [Diff [a]]
go [Diff a]
rest
          go [] = []

          goFirsts :: [Diff a] -> ([a], [Diff a])
goFirsts  (First a
x  : [Diff a]
xs) = let ([a]
fs, [Diff a]
rest) = [Diff a] -> ([a], [Diff a])
goFirsts  [Diff a]
xs in (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
fs, [Diff a]
rest)
          goFirsts  [Diff a]
xs = ([],[Diff a]
xs)

          goSeconds :: [Diff a] -> ([a], [Diff a])
goSeconds (Second a
x : [Diff a]
xs) = let ([a]
fs, [Diff a]
rest) = [Diff a] -> ([a], [Diff a])
goSeconds [Diff a]
xs in (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
fs, [Diff a]
rest)
          goSeconds [Diff a]
xs = ([],[Diff a]
xs)

          goBoth :: [Diff b] -> ([(b, b)], [Diff b])
goBoth    (Both b
x b
y : [Diff b]
xs) = let ([(b, b)]
fs, [Diff b]
rest) = [Diff b] -> ([(b, b)], [Diff b])
goBoth [Diff b]
xs    in ((b
x,b
y)(b, b) -> [(b, b)] -> [(b, b)]
forall a. a -> [a] -> [a]
:[(b, b)]
fs, [Diff b]
rest)
          goBoth    [Diff b]
xs = ([],[Diff b]
xs)