{-#  LANGUAGE GADTs  #-}
{-#  LANGUAGE KindSignatures  #-}
{-#  LANGUAGE TypeFamilies  #-}
{-#  LANGUAGE TypeOperators  #-}
{-#  LANGUAGE MultiParamTypeClasses  #-}
{-#  LANGUAGE FunctionalDependencies  #-}
{-#  LANGUAGE FlexibleInstances  #-}
{-#  LANGUAGE RankNTypes  #-}
{-#  LANGUAGE ScopedTypeVariables  #-}
{-#  LANGUAGE OverlappingInstances  #-} --  Only for the Show

{- |

Use this library to get an efficient, optimal, type-safe 'diff' and
'patch' function for your datatypes of choice by defining a simple GADT and
some class instances.  The process is explained in the documentation of the
'Family' type class.

The result of 'diff' is an optimal 'EditScript' that contains the operations
that need to be performed to get from the /source/ value to the /destination/
value. The edit script can be used by 'patch', inspected with 'show' and used
for all kinds of interesting stuff /you/ come up with.

The library has been extracted from Eelco Lempsink's Master's Thesis 
/Generic type-safe diff and patch for families of datatypes/ (available online:
<http://eelco.lempsink.nl/thesis.pdf>).  See Appendix C for a large example.
Note that some types and functions have been renamed for the benefit of the API
(e.g., @diff@ to @diffL@, @diff1@ to @diff@, @Diff@ to @EditScriptL@).

-}
module Data.Generic.Diff (
    -- * Diffing and patchLing
    diff,
    patch,  
    -- ** For multiple datatypes 
    diffL,
    patchL, 
    -- ** Patch compression
    compress,
    -- * Patch datatype
    EditScriptL(..),
    EditScript,
    -- * Type classes
    Family(..),
    Type(..),
    -- ** Supporting datatypes 
    (:=:)(..),
    Con(..),
    Nil(..), 
    Cons(..),
) where

-- | Edit script type for two single values.
type EditScript f x y = EditScriptL f (Cons x Nil) (Cons y Nil)

-- | Apply the edit script to value.
patch :: EditScript f x y -> x -> y
patch d x = case patchL d (CCons x CNil) of
               CCons y CNil -> y

-- | Calculate the difference between two values in the form of an edit script.
--
-- See the documentation for 'Family' for how to make your own data types work
-- with this function.
diff :: (Type f x, Type f y) => x -> y -> EditScript f x y
diff x y = diffL (CCons x CNil) (CCons y CNil)

-- | Underlying implementation of 'patch', works with (heterogeneous) lists of
-- values.
patchL :: forall f txs tys . EditScriptL f txs tys -> txs -> tys
patchL (Ins  c   d) = insert c .  patchL d
patchL (Del  c   d) =             patchL d . delete c
patchL (Cpy  c   d) = insert c .  patchL d . delete c
patchL (CpyTree  d) = \(CCons x xs) -> CCons x . patchL d $ xs 

-- | Underlying implementation of 'diff', works with (heterogeneous) lists of
-- values.
diffL :: (Family f, List f txs, List f tys) => txs -> tys -> EditScriptL f txs tys
diffL x y = getEditScriptL $ diffLMemo x y


-- | Datatype for type level lists, corresponding to '[]'.  Use when creating
-- your 'Family' instance.
data Nil        = CNil
-- | Datatype for type level lists, corresponding to '(:)'.  Use when creating
-- your 'Family' instance.
data Cons x xs  = CCons x xs

{- | 

To use 'diff' and 'patch' on your datatypes, you must create an instance of
'Family'.  

There are four steps to set up everything for 'diff' and 'patch'.

(1) Define your datatypes.  (Presumable, you already have done this.)

(2) Create a family datatype, grouping your datatypes together.

(3) Make the family datatype an instance of 'Family'

(4) Create 'Type' instances for each member of the family.

Steps 1-3 are explained below, step 4 is explained in the documentation for
'Type'.

As a running example, we create a simple family of datatypes (step 1):

> data Expr  =  Min Expr Term
> data Term  =  Parens Expr
>            |  Number Int

The second step is creating the family datatype.  Each constructor in the
datatypes above gets a constructor in a family GADT:

> data ExprTermFamily :: * -> * -> * where
>     Min'     ::          ExprTermFamily Expr (Cons Expr (Cons Term Nil)) 
>     Parens'  ::          ExprTermFamily Term (Cons Expr Nil)
>     Number'  ::          ExprTermFamily Term (Cons Int Nil)
>     Int'     ::  Int ->  ExprTermFamily Int  Nil

The first type argument of the datatype must be the resulting type of the
constructor.  The second argument captures the types of the arguments the
constructor expects.  Note how to use 'Cons' and 'Nil' to create type level
lists.

The @Int'@ constructor is special, in the sense that it captures the 'Int' type
abstractly (listing all 'Int''s constructors would be quite impossible).

/Caveat emptor/: polymorphic datatypes (like lists) are ill-supported and
require family constructors for each instance.  It might require another master
thesis project to solve this.

Step 3 is to create the instance for the 'Family' class.  For each function you
will have to implement four functions.  It's straightforward albeit a bit
boring.

> instance Family ExprTermFamily where
>     decEq Min'      Min'      =              Just (Refl, Refl)
>     decEq Parens'   Parens'   =              Just (Refl, Refl)
>     decEq Number'   Number'   =              Just (Refl, Refl)
>     decEq (Int' x)  (Int' y)  | x == y     = Just (Refl, Refl)
>                               | otherwise  = Nothing
>     decEq _        _          = Nothing
> 
>     fields Min'      (Min e t)   = Just (CCons e (CCons t CNil))
>     fields Parens'   (Parens e)  = Just (CCons e CNil)
>     fields Number'   (Number i)  = Just (CCons i CNil)
>     fields (Int' _)  _           = Just CNil
>     fields _         _           = Nothing
> 
>     apply Min'      (CCons e (CCons t CNil))  = Min e t
>     apply Parens'   (CCons e CNil)            = Parens e
>     apply Number'   (CCons i CNil)            = Number i
>     apply (Int' i)  CNil                      = i
> 
>     string Min'      = "Min"
>     string Parens'   = "Parens"
>     string Number'   = "Number"
>     string (Int' i)  = show i

-}

class Family f where
    -- | Return an instance of the equality GADT ('Refl') of the type and
    -- constructor arguments are equal, else return 'Nothing'.
    decEq     ::                f tx txs -> f ty tys -> Maybe (tx :=: ty, txs :=: tys)
    -- | When the constructor from the family matches the 'real' constructor,
    -- return the arguments in a heterogeneous list, else return 'Nothing'.
    fields    ::                f t ts -> t -> Maybe ts
    -- | When the constructor from the family and the heterogeneous list of
    -- arguments match, apply the 'real' constructor.  For abstract
    -- constructors, the list of arguments should be 'CNil', but you project
    -- out the value saved with the family constructor.
    apply     ::                f t ts -> ts -> t
    -- | For 'show'ing the constructors from the family.
    string    ::                f t ts -> String

-- | Equality GADT, see the documentation for 'Family' for an example of its use.
data a :=: b where
  Refl :: a :=: a

{- |

For each type in the family, you need to create an instance of 'Type', listing
all the members of the family GADT which belong to one type.

This is step 4 of the four steps needed to be able to use 'diff' and 'patch'.
Steps 1-3 are explained in the documentation for 'Family'.

Continuing the example from the 'Family' documentation, the instances for
'Type' are:

> instance Type ExprTermFamily Term where
>     constructors = [Concr Number', Concr Parens']
> 
> instance Type ExprTermFamily Expr where
>     constructors = [Concr Min']
> 
> instance Type ExprTermFamily Int where
>     constructors = [Abstr Int']

-} 

class (Family f) => Type f t where
    -- | List of constructors from the family GADT for one type in your family
    constructors :: [Con f t]

-- | 'Con' wraps both concrete and abstract constructors to a simple type so
-- constructors for a single type can be put together in a list, see 'Type' for
-- more information and an example. 
--
-- Use 'Concr' for concrete constructors (e.g., for simple abstract datatypes).
--
-- Use 'Abstr' for abstract constructors (e.g., for build-in types or types with many
-- (or infinite) constructors)
data Con :: (* -> * -> *) -> * -> * where
    Concr   :: (List f ts)        =>        f t ts   -> Con f t
    Abstr   :: (Eq t, List f ts)  => (t ->  f t ts)  -> Con f t

class List f ts where
  list :: IsList f ts

data IsList :: (* -> * -> *) -> * -> * where
  IsNil   ::                               IsList f Nil
  IsCons  :: (Type f t) => IsList f ts ->  IsList f (Cons t ts)

instance List f Nil where
  list = IsNil

instance (Type f t, List f ts) => List f (Cons t ts) where
  list = IsCons list

{- |

The 'EditScriptL' datatype captures the result of 'diffL' and can be used as the input
to 'patchL' (and 'compress').

The 'diffL' function use a depth-first preorder traversal to traverse the
expression trees.  The edit script it outputs contains the operation that must
be applied to the constructor at that point: either keeping it ('Cpy'),
removing it ('Del', which does not remove the complete subtree, but /contracts/
the edge of the removed node) or inserting a new constructor ('Ins', which can
only be done if the available subtrees at that point correspond to the types
the constructor expects). (The 'CpyTree' is only used when running 'compress'
over an existing edit script.)

For more information about this datatype, you're advised to look at Eelco
Lempsink's thesis at <http://eelco.lempsink.nl/thesis.pdf>.

-}
data EditScriptL :: (* -> * -> *) -> * -> * -> * where
  Ins      ::  (Type f t, List f ts,              List f tys)  => f t ts  ->
               EditScriptL f               txs   (Append ts    tys)              ->
               EditScriptL f               txs   (Cons t       tys)
  Del      ::  (Type f t, List f ts, List f txs)               => f t ts  ->
               EditScriptL f (Append ts    txs)                tys               ->
               EditScriptL f (Cons t       txs)                tys
  Cpy      ::  (Type f t, List f ts, List f txs,  List f tys)  => f t ts  ->
               EditScriptL f (Append ts    txs)  (Append ts    tys)              ->
               EditScriptL f (Cons t       txs)  (Cons t       tys)

  CpyTree  ::  (Type f t, List f txs, List f tys) =>
               EditScriptL f txs tys                                             -> 
               EditScriptL f (Cons t txs) (Cons t tys)

  End      ::  EditScriptL f Nil                 Nil

type family    Append txs tys :: *
type instance  Append Nil            tys = tys
type instance  Append (Cons tx txs)  tys = Cons tx (Append txs tys)

appendList :: IsList f txs -> IsList f tys -> IsList f (Append txs tys)
appendList IsNil         isys = isys
appendList (IsCons isxs) isys = IsCons (appendList isxs isys)

append :: IsList f txs -> IsList f tys -> txs -> tys -> Append txs tys
append IsNil         _    CNil         ys = ys
append (IsCons isxs) isys (CCons x xs) ys = CCons x (append isxs isys xs ys)

instance Show (EditScriptL f txs tys) where
  show (Ins  c   d)  = "Ins "  ++ string c  ++ " $ " ++ show d
  show (Del  c   d)  = "Del "  ++ string c  ++ " $ " ++ show d
  show (Cpy  c   d)  = "Cpy "  ++ string c  ++ " $ " ++ show d
  show (CpyTree  d)  = "CpyTree"            ++ " $ " ++ show d
  show End           = "End"

delete :: (Type f t, List f ts, List f txs) => f t ts -> Cons t txs -> Append ts txs
delete c (CCons x xs) =
  case fields c x of
    Nothing  -> error "Patching failed"
    Just ts  -> append (isList c) list ts xs

isList :: (Family f, List f ts) => f t ts -> IsList f ts
isList _ = list

insert :: (Type f t, List f ts, List f txs) => f t ts -> Append ts txs -> Cons t txs
insert c xs = CCons (apply c txs) tys
  where (txs, tys) = split (isList c) xs

split :: IsList f txs -> Append txs tys -> (txs, tys)
split IsNil         ys              = (CNil, ys)
split (IsCons isxs) (CCons x xsys)  =  let  (xs, ys) = split isxs xsys
                                       in   (CCons x xs, ys)

matchConstructor ::  (Type f t) => t ->
                     (forall ts. f t ts -> IsList f ts -> ts -> r) -> r
matchConstructor = tryEach constructors
  where
    tryEach :: (Type f t) => [Con f t] -> t -> 
      (forall ts. f t ts -> IsList f ts -> ts -> r) -> r
    tryEach (Concr c  : cs)  x  k  = matchOrRetry c      cs x k
    tryEach (Abstr c  : cs)  x  k  = matchOrRetry (c x)  cs x k
    tryEach [] _ _ = error "Incorrect Family or Type instance."

    matchOrRetry :: (List f ts, Type f t) => f t ts -> [Con f t] -> t -> 
      (forall ts'. f t ts' -> IsList f ts' -> ts' -> r) -> r
    matchOrRetry c cs x k = case fields c x of
      Just ts  -> k c (isList c) ts
      Nothing  -> tryEach cs x k

best :: EditScriptL f txs tys -> EditScriptL f txs tys -> EditScriptL f txs tys
best dx dy = bestSteps (steps dx) dx (steps dy) dy

data Nat = Zero | Succ Nat
  deriving (Eq, Show)

steps :: EditScriptL f txs tys -> Nat
steps (Ins  _ d)  = Succ $ steps d
steps (Del  _ d)  = Succ $ steps d
steps (Cpy  _ d)  = Succ $ steps d
steps End         = Zero

bestSteps :: Nat -> d -> Nat -> d -> d
bestSteps Zero      x _         _ = x
bestSteps _         _ Zero      y = y
bestSteps (Succ nx) x (Succ ny) y = bestSteps nx x ny y

data RListT :: (* -> * -> *) -> * -> * where
  RList :: List f ts => RListT f ts

reify :: IsList f ts -> RListT f ts
reify IsNil          = RList
reify (IsCons ists)  = case reify ists of
                         RList -> RList

ins :: (Type f t) => IsList f ts -> IsList f tys -> 
      f t ts -> EditScriptL f txs (Append ts tys) -> EditScriptL f txs (Cons t tys)
ins ists isys =
  case (reify ists, reify isys) of
    (RList, RList) -> Ins

del :: (Type f t) => IsList f ts -> IsList f txs ->
      f t ts -> EditScriptL f (Append ts txs) tys -> EditScriptL f (Cons t txs) tys 
del ists isxs =
  case (reify ists, reify isxs) of
    (RList, RList) -> Del

cpy :: (Type f t) => IsList f ts -> IsList f txs -> IsList f tys ->
      f t ts -> EditScriptL f (Append ts txs) (Append ts tys) -> 
      EditScriptL f (Cons t txs) (Cons t tys)
cpy ists isxs isys =
  case (reify ists, reify isxs, reify isys) of
    (RList, RList, RList) -> Cpy

compress :: (Family f) => EditScriptL f txs tys -> EditScriptL f txs tys
compress End           = End 
compress (Ins  c   d)  = Ins  c   (compress d)
compress (Del  c   d)  = Del  c   (compress d)
compress (CpyTree  d)  = CpyTree  (compress d) 
compress (Cpy  c   d)  = let d' = compress d in
  case copied (isList c) d' of
    Just d''  -> CpyTree d''
    Nothing   -> Cpy c d'

copied :: (Family f) => IsList f ts -> 
             EditScriptL f (Append ts txs) (Append ts tys) -> Maybe (EditScriptL f txs tys)
copied IsNil                  d   = Just d
copied (IsCons xs)  (CpyTree  d)  = copied xs d
copied (IsCons _)   _             = Nothing 

data EditScriptLMemo :: (* -> * -> *) -> * -> * -> * where
  CC :: (Type f tx, Type f ty, List f txs', List f tys') => 
        f tx txs' -> f ty tys' ->
        EditScriptL f (Cons tx txs) (Cons ty tys) ->
        EditScriptLMemo f (Cons tx txs)      (Append tys' tys)  -> 
        EditScriptLMemo f (Append txs' txs)  (Cons ty tys)      ->
        EditScriptLMemo f (Append txs' txs)  (Append tys' tys)  ->
        EditScriptLMemo f (Cons tx txs)      (Cons ty tys)
  CN :: (Type f tx, List f txs') => f tx txs' ->
        EditScriptL f (Cons tx txs) Nil ->
        EditScriptLMemo f (Append txs' txs)  Nil ->
        EditScriptLMemo f (Cons tx txs)      Nil
  NC :: (Type f ty, List f tys') => f ty tys' ->
        EditScriptL f Nil (Cons ty tys) ->
        EditScriptLMemo f Nil                (Append tys' tys) ->
        EditScriptLMemo f Nil                (Cons ty tys)
  NN :: EditScriptL f Nil Nil ->
        EditScriptLMemo f Nil                Nil

diffLMemo :: (Family f, List f txs, List f tys) => txs -> tys -> EditScriptLMemo f txs tys
diffLMemo = diffLMemo' list list

diffLMemo' :: (Family f) => forall txs tys. IsList f txs -> IsList f tys -> 
        txs -> tys -> EditScriptLMemo f txs tys
diffLMemo' IsNil          IsNil          CNil          CNil          = 
  NN End
diffLMemo' (IsCons isxs)  IsNil          (CCons x xs)  CNil          =
  matchConstructor x
    (\ cx isxs' xs' ->
       let d = diffLMemo'  (appendList isxs' isxs)     IsNil
                       (append isxs' isxs xs' xs)  CNil
       in  cn isxs' isxs cx (del isxs' isxs cx (getEditScriptL d)) d)
diffLMemo' IsNil          (IsCons isys)  CNil          (CCons y ys)  =
  matchConstructor y
    (\ cy isys' ys' ->
       let i = diffLMemo'  IsNil  (appendList isys' isys)
                       CNil   (append isys' isys ys' ys)
       in  nc isys' isys cy (ins isys' isys cy (getEditScriptL i)) i)
diffLMemo' (IsCons isxs)  (IsCons isys)  (CCons x xs)  (CCons y ys)  =
  matchConstructor x
    (\ cx isxs' xs' ->
       matchConstructor y
       (\ cy isys' ys' -> 
          let  c  = diffLMemo'  (appendList isxs' isxs)     (appendList isys' isys)
                            (append isxs' isxs xs' xs)  (append isys' isys ys' ys)
               d  = extendd  isys'  isys  cy  c 
               i  = extendi  isxs'  isxs  cx  c
          in cc isxs' isxs isys' isys cx cy 
               (bestEditScriptLMemo cx cy isxs' isxs isys' isys i d c) i d c))

getEditScriptL :: EditScriptLMemo f txs tys -> EditScriptL f txs tys
getEditScriptL (CC _ _ d _ _ _) = d
getEditScriptL (CN _ d _)       = d
getEditScriptL (NC _ d _)       = d
getEditScriptL (NN d)           = d

bestEditScriptLMemo ::  (Type f tx, Type f ty) => f tx txs' -> f ty tys' ->
              IsList f txs' -> IsList f txs -> IsList f tys' -> IsList f tys ->
              EditScriptLMemo f (Cons tx txs)      (Append tys' tys)  -> 
              EditScriptLMemo f (Append txs' txs)  (Cons ty tys)      ->
              EditScriptLMemo f (Append txs' txs)  (Append tys' tys)  ->
              EditScriptL f (Cons tx txs) (Cons ty tys)
bestEditScriptLMemo cx cy isxs' isxs isys' isys i d c = case decEq cx cy of
    Just (Refl, Refl) -> cpy isxs' isxs isys cx (getEditScriptL c)
    Nothing           -> best  (ins isys' isys cy (getEditScriptL i))
                               (del isxs' isxs cx (getEditScriptL d))

extendd :: (Type f ty) => IsList f tys' -> IsList f tys -> f ty tys' -> 
           EditScriptLMemo f txs' (Append tys' tys) ->
           EditScriptLMemo f txs' (Cons ty tys)
extendd isys' isys cy dt@(NN d)          = nc isys' isys cy (ins isys' isys cy d) dt
extendd isys' isys cy dt@(NC _ d _)      = nc isys' isys cy (ins isys' isys cy d) dt
extendd isys' isys cy dt@(CN _ _ _)        = extendd' isys' isys cy dt
extendd isys' isys cy dt@(CC _ _ _ _ _ _)  = extendd' isys' isys cy dt

extendd' :: (Type f ty, Type f tx) => IsList f tys' -> IsList f tys -> f ty tys' -> 
            EditScriptLMemo f (Cons tx txs) (Append tys' tys) ->
            EditScriptLMemo f (Cons tx txs) (Cons ty tys)
extendd' isys' isys cy dt =
  extractd dt (\ isxs' isxs cx dt' -> 
    let i = dt
        d = extendd isys' isys cy dt'
        c = dt'
    in  cc isxs' isxs isys' isys cx cy (bestEditScriptLMemo cx cy isxs' isxs isys' isys i d c) i d c)

extendi :: (Type f tx) => IsList f txs' -> IsList f txs -> f tx txs' ->
           EditScriptLMemo f (Append txs' txs)  tys' ->
           EditScriptLMemo f (Cons tx txs)      tys'
extendi isxs' isxs cx dt@(NN d)         = cn isxs' isxs cx (del isxs' isxs cx d) dt
extendi isxs' isxs cx dt@(CN _ d _)     = cn isxs' isxs cx (del isxs' isxs cx d) dt
extendi isxs' isxs cx dt@(NC _ _ _)       = extendi' isxs' isxs cx dt
extendi isxs' isxs cx dt@(CC _ _ _ _ _ _) = extendi' isxs' isxs cx dt

extendi' :: (Type f tx, Type f ty) => IsList f txs' -> IsList f txs -> f tx txs' -> 
            EditScriptLMemo f (Append txs' txs)  (Cons ty tys) ->
            EditScriptLMemo f (Cons tx txs)      (Cons ty tys)
extendi' isxs' isxs cx dt =
  extracti dt (\ isys' isys cy dt' -> 
    let i = extendi isxs' isxs cx dt'
        d = dt
        c = dt'
    in  cc   isxs' isxs isys' isys cx cy 
             (bestEditScriptLMemo cx cy isxs' isxs isys' isys i d c) 
             i d c)

extractd :: (Type f tx) => EditScriptLMemo f (Cons tx txs) tys' ->
          (forall txs'. IsList f txs' -> IsList f txs -> f tx txs' ->
          EditScriptLMemo f (Append txs' txs) tys' -> r) -> r
extractd (CC c _ d' _ d _) k = k (isList c) (sourceTail d') c d
extractd (CN c d' d)       k = k (isList c) (sourceTail d') c d

sourceTail :: EditScriptL f (Cons tx txs) tys -> IsList f txs
sourceTail (Ins _ d) = sourceTail d
sourceTail (Del _ _) = list
sourceTail (Cpy _ _) = list

extracti :: (Type f ty) => EditScriptLMemo f txs' (Cons ty tys) ->
          (forall tys'. IsList f tys' -> IsList f tys -> f ty tys' ->
          EditScriptLMemo f txs' (Append tys' tys) -> r) -> r
extracti (CC _ c d i _ _) k = k (isList c) (targetTail d) c i 
extracti (NC c d i)       k = k (isList c) (targetTail d) c i

targetTail :: EditScriptL f txs (Cons ty tys) -> IsList f tys
targetTail (Ins _ _) = list
targetTail (Del _ d) = targetTail d
targetTail (Cpy _ _) = list

nc :: (Type f t) => IsList f ts -> IsList f tys -> 
      f t ts -> EditScriptL f Nil (Cons t tys) -> 
      EditScriptLMemo f Nil (Append ts tys) -> EditScriptLMemo f Nil (Cons t tys)
nc ists isys =
  case (reify ists, reify isys) of
    (RList, RList) -> NC

cn :: (Type f t) => IsList f ts -> IsList f txs -> 
      f t ts -> EditScriptL f (Cons t txs) Nil ->
      EditScriptLMemo f (Append ts txs) Nil -> EditScriptLMemo f (Cons t txs) Nil
cn ists isxs =
  case (reify ists, reify isxs) of
    (RList, RList) -> CN

cc :: (Type f tx, Type f ty) => 
      IsList f txs' -> IsList f txs -> IsList f tys' -> IsList f tys ->
      f tx txs' -> f ty tys' -> EditScriptL f (Cons tx txs) (Cons ty tys) ->
      EditScriptLMemo f (Cons tx txs)      (Append tys' tys)    -> 
      EditScriptLMemo f (Append txs' txs)  (Cons ty tys)        ->
      EditScriptLMemo f (Append txs' txs)  (Append tys' tys)  ->
      EditScriptLMemo f (Cons tx txs)      (Cons ty tys)
cc isxs' isxs isys' isys =
  case (reify isxs', reify isxs, reify isys', reify isys) of
    (RList, RList, RList, RList) -> CC