module Filediff.Sequence
(
SeqDiff(..)
, diffSequences
, applySequenceDiff
) where
import GHC.Generics
import Data.Default
import Data.MemoCombinators.Class (MemoTable, table)
import qualified Data.MemoCombinators as Memo
import Data.List ((\\), sort, intersectBy)
import Zora.List (merge, merge_by)
import Data.Monoid
data SeqDiff a = SeqDiff {
dels :: [Int]
, adds :: [(Int, a)] }
deriving (Show, Eq, Generic)
instance Default (SeqDiff a) where
def :: SeqDiff a
def = SeqDiff [] []
instance (Eq a, MemoTable a) => Monoid (SeqDiff a) where
mempty :: SeqDiff a
mempty = SeqDiff [] []
mappend :: SeqDiff a -> SeqDiff a -> SeqDiff a
mappend
(SeqDiff abDels abAdds)
(SeqDiff bcDels bcAdds)
= SeqDiff acDels acAdds
where
acDels :: [Int]
acDels = merge abDels bDelsFromA
bDelsFromA :: [Int]
bDelsFromA
= map fst $ intersectBy
(\(ai, bi) (_, biDeleted) -> bi == biDeleted)
aIndicesInB
$ zip (repeat 0) bcDels
aIndicesInB :: [(Int, Int)]
aIndicesInB = map (\(b,a) -> (a,b)) $ indicesAfterAdds 0 survivingAIndices (map fst abAdds)
survivingAIndices :: [Int]
survivingAIndices = if null abDels
then []
else [0..(maximum abDels)] \\ abDels
indicesAfterAdds :: Int -> [b] -> [Int] -> [(Int, b)]
indicesAfterAdds _ [] _ = []
indicesAfterAdds i elems@(x:xs) [] = (:) (i, x) $ indicesAfterAdds (i + 1) xs []
indicesAfterAdds i elems@(x:xs) adds@(a:as) =
if i < a
then (:) (i, x) $ indicesAfterAdds (i + 1) xs (a:as)
else indicesAfterAdds (i + 1) (x:xs) as
acAdds :: [(Int, a)]
acAdds = merge_by (\(i,_) (j,_) -> i `compare` j) bcAdds cAddsFromA
cAddsFromA :: [(Int, a)]
cAddsFromA = indicesAfterAdds 0 (map snd survivingABAdds) (map fst bcAdds)
survivingABAdds :: [(Int, a)]
survivingABAdds = survivingABAdds' abAdds bcDels
survivingABAdds' :: [(Int, a)] -> [Int] -> [(Int, a)]
survivingABAdds' [] _ = []
survivingABAdds' adds [] = adds
survivingABAdds' (a:adds) (d:dels) =
case (fst a) `compare` d of
LT -> (:) a $ survivingABAdds' adds (d:dels)
EQ -> survivingABAdds' adds dels
GT -> survivingABAdds' (a:adds) dels
diffSequences :: forall a. (Eq a, MemoTable a) => [a] -> [a] -> SeqDiff a
diffSequences a b = SeqDiff
(nonSubsequenceIndices common a)
(getProgressiveIndicesToAdd common b)
where
common :: [a]
common = longestCommonSubsequence a b
getProgressiveIndicesToAdd :: (Eq a) => [a] -> [a] -> [(Int, a)]
getProgressiveIndicesToAdd sub super =
map (\i -> (i, super !! i)) $ nonSubsequenceIndices sub super
applySequenceDiff :: forall a. (Eq a) => SeqDiff a -> [a] -> [a]
applySequenceDiff (SeqDiff dels adds)
= insertAtProgressiveIndices adds . removeAtIndices dels
where
insertAtProgressiveIndices :: [(Int, a)] -> [a] -> [a]
insertAtProgressiveIndices = insertAtProgressiveIndices' 0
insertAtProgressiveIndices' :: Int -> [(Int, a)] -> [a] -> [a]
insertAtProgressiveIndices' _ [] dest = dest
insertAtProgressiveIndices' curr src@((i,s):src') [] =
s : insertAtProgressiveIndices' (succ curr) src' []
insertAtProgressiveIndices' curr src@((i,s):src') dest@(d:dest') =
if i == curr
then s : insertAtProgressiveIndices' (succ curr) src' dest
else d : insertAtProgressiveIndices' (succ curr) src dest'
longestCommonSubsequence :: forall a. (MemoTable a, Eq a) =>
[a] -> [a] -> [a]
longestCommonSubsequence
= Memo.memo2
(Memo.list table)
(Memo.list table)
longestCommonSubsequence'
where
longestCommonSubsequence' :: [a] -> [a] -> [a]
longestCommonSubsequence' [] _ = []
longestCommonSubsequence' _ [] = []
longestCommonSubsequence' (x:xs) (y:ys) =
if x == y
then x : (longestCommonSubsequence xs ys)
else if (length caseX) > (length caseY)
then caseX
else caseY
where
caseX :: [a]
caseX = longestCommonSubsequence xs (y:ys)
caseY :: [a]
caseY = longestCommonSubsequence (x:xs) ys
subsequenceIndices :: (Eq a) => [a] -> [a] -> [Int]
subsequenceIndices [] _ = []
subsequenceIndices _ [] = error "`sub` was not a subsequence of `super`"
subsequenceIndices sub@(a:sub') super@(b:super') =
if a == b
then 0 : map succ (subsequenceIndices sub' super')
else map succ (subsequenceIndices sub super')
nonSubsequenceIndices :: (Eq a) => [a] -> [a] -> [Int]
nonSubsequenceIndices sub super =
[0..(length super 1)] \\ (subsequenceIndices sub super)
removeAtIndices :: forall a. [Int] -> [a] -> [a]
removeAtIndices = removeAtIndices' 0
where
removeAtIndices' :: Int -> [Int] -> [a] -> [a]
removeAtIndices' _ [] xs = xs
removeAtIndices' curr (i:is) (x:xs) =
if curr == i
then removeAtIndices' (succ curr) is xs
else x : removeAtIndices' (succ curr) (i:is) xs