{-# LANGUAGE OverloadedLists #-}

-- | This module computes a matrix keeping track of the costs necessary
--   to edit one piece of text so that it is transformed into another one
--   From that matrix it is possible to extract the sequence of edit operations with the minimum cost
module Data.Text.EditMatrix where

import Data.Matrix hiding (matrix, (!), getElem, setElem)
import Data.Matrix qualified as M
import Data.Text.Costs
import Data.Text.EditOperation
import Data.Vector as V (Vector, cons, fromList, head, take, (!))
import Protolude

-- | Create a edit matrix where costs are computed using dynamic programming
createEditMatrix :: Costs a -> [a] -> [a] -> Matrix Cost
createEditMatrix :: forall a. Costs a -> [a] -> [a] -> Matrix Cost
createEditMatrix Costs a
costs [a]
as1 [a]
as2 = do
  let initialMatrix :: Matrix Cost
initialMatrix = Int -> Int -> ((Int, Int) -> Cost) -> Matrix Cost
forall a. Int -> Int -> ((Int, Int) -> a) -> Matrix a
M.matrix ([a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
as1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) ([a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
as2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Cost -> (Int, Int) -> Cost
forall a b. a -> b -> a
const (Cost -> (Int, Int) -> Cost) -> Cost -> (Int, Int) -> Cost
forall a b. (a -> b) -> a -> b
$ Int -> Cost
NoAction Int
0)
  let coordinates :: [(Int, Int)]
coordinates = Int -> Int -> [(Int, Int)]
cartesian ([a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
as1) ([a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
as2)
  (Matrix Cost -> (Int, Int) -> Matrix Cost)
-> Matrix Cost -> [(Int, Int)] -> Matrix Cost
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl'
      ( \Matrix Cost
ma (Int
i, Int
j) ->
          let newCost :: Cost
newCost
                | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Int -> Cost
Insertion Int
j -- no more letters for as1, we do j insertions
                | Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Int -> Cost
Deletion Int
i -- no more letters for as2, we do i suppressions
                | Bool
otherwise = Int -> Int -> Matrix Cost -> Cost
costOf Int
i Int
j Matrix Cost
ma -- otherwise we compute the cost and operation to go from as1[i] to as2[j]
           in Cost -> Int -> Int -> Matrix Cost -> Matrix Cost
forall a. a -> Int -> Int -> Matrix a -> Matrix a
setElement Cost
newCost Int
i Int
j Matrix Cost
ma
      )
      Matrix Cost
initialMatrix
      [(Int, Int)]
coordinates
  where
    (Vector a
vs1, Vector a
vs2) = ([a] -> Vector a
forall a. [a] -> Vector a
V.fromList [a]
as1, [a] -> Vector a
forall a. [a] -> Vector a
V.fromList [a]
as2)

    -- compute the cartesian product of the [0..i] x [0..j] lists
    cartesian :: Int -> Int -> [(Int, Int)]
    cartesian :: Int -> Int -> [(Int, Int)]
cartesian Int
m Int
n = [(Int
i, Int
j) | Int
i <- [Int
Item [Int]
0 .. Int
Item [Int]
m], Int
j <- [Int
Item [Int]
0 .. Int
Item [Int]
n]]

    -- compute the cost of going from as1[i] to as2[j], knowing the existing costs
    --  (i-1, j-1) (i-1, j)
    --  (i, j-1)   (i, j)
    --
    -- going from (i-1, j) to (i, j) means that we delete as1[i]
    -- going from (i-1, j-1) to (i, j) means that we substitute as1[i] with as2[j]
    -- going from (i, j-1) to (i, j) means that we insert as2[j]
    costOf :: Int -> Int -> Matrix Cost -> Cost
    costOf :: Int -> Int -> Matrix Cost -> Cost
costOf Int
i Int
j Matrix Cost
matrix = do
      let i1 :: Int
i1 = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
      let j1 :: Int
j1 = Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
      let i1j :: Cost
i1j =  Int -> Int -> Matrix Cost -> Cost
forall a. Int -> Int -> Matrix a -> a
getElement Int
i1 Int
j Matrix Cost
matrix
      let i1j1 :: Cost
i1j1 = Int -> Int -> Matrix Cost -> Cost
forall a. Int -> Int -> Matrix a -> a
getElement Int
i1 Int
j1 Matrix Cost
matrix
      let ij1 :: Cost
ij1 =  Int -> Int -> Matrix Cost -> Cost
forall a. Int -> Int -> Matrix a -> a
getElement Int
i Int
j1 Matrix Cost
matrix
      let result :: Cost
result =
            Costs a -> a -> a -> Int -> Int -> Int -> Cost
forall a. Costs a -> a -> a -> Int -> Int -> Int -> Cost
lowerCost
              Costs a
costs
              (Vector a
vs1 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! Int
i1)
              (Vector a
vs2 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! Int
j1)
              (Cost -> Int
cost Cost
i1j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Costs a -> a -> Int
forall a. Costs a -> a -> Int
deletionCost Costs a
costs (Vector a
vs1 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! Int
i1)) -- suppression
              (Cost -> Int
cost Cost
i1j1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Costs a -> a -> a -> Int
forall a. Costs a -> a -> a -> Int
substitutionCost Costs a
costs (Vector a
vs1 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! Int
i1) (Vector a
vs2 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! Int
j1)) -- substitution
              (Cost -> Int
cost Cost
ij1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Costs a -> a -> Int
forall a. Costs a -> a -> Int
insertionCost Costs a
costs (Vector a
vs2 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! Int
j1)) -- insertion
      -- in case of a substitution if the resulting cost of (i, j) is the same as (i-1, j-1)
      -- this means that we have substituted the same letter and it is the same as doing no action
      case Cost
result of
        Substitution {} | Cost -> Int
cost Cost
i1j1 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Cost -> Int
cost Cost
result -> Int -> Cost
NoAction (Cost -> Int
cost Cost
result)
        Cost
_ -> Cost
result

-- | From the original lists of characters, given the cost matrix
--   return a list of edit operations allowing to edit one text and eventually get the second one
makeEditOperations :: forall a. Vector a -> Vector a -> Matrix Cost -> Vector (EditOperation a)
makeEditOperations :: forall a.
Vector a -> Vector a -> Matrix Cost -> Vector (EditOperation a)
makeEditOperations [] Vector a
_ Matrix Cost
_ = []
makeEditOperations Vector a
_ [] Matrix Cost
_ = []
makeEditOperations Vector a
as1 Vector a
as2 Matrix Cost
matrix =
  Int -> Int -> Vector (EditOperation a) -> Vector (EditOperation a)
go (Vector a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Vector a
as1) (Vector a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Vector a
as2) []
  where
    go :: Int -> Int -> Vector (EditOperation a) -> Vector (EditOperation a)
    go :: Int -> Int -> Vector (EditOperation a) -> Vector (EditOperation a)
go Int
0 Int
0 Vector (EditOperation a)
_ = []
    go Int
i Int
j Vector (EditOperation a)
ops = do
      let op :: Cost
op = Int -> Int -> Matrix Cost -> Cost
forall a. Int -> Int -> Matrix a -> a
getElement Int
i Int
j Matrix Cost
matrix
      let dist :: Int
dist = Cost -> Int
cost Cost
op
      if Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
        then
          if Int
dist Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
            then EditOperation a
-> Vector (EditOperation a) -> Vector (EditOperation a)
forall a. a -> Vector a -> Vector a
V.cons (a -> EditOperation a
forall a. a -> EditOperation a
Keep (Vector a -> a
forall a. Vector a -> a
V.head Vector a
as1)) Vector (EditOperation a)
ops
            else EditOperation a
-> Vector (EditOperation a) -> Vector (EditOperation a)
forall a. a -> Vector a -> Vector a
V.cons (a -> a -> EditOperation a
forall a. a -> a -> EditOperation a
Substitute (Vector a -> a
forall a. Vector a -> a
V.head Vector a
as1) (Vector a -> a
forall a. Vector a -> a
V.head Vector a
as2)) Vector (EditOperation a)
ops
        else
          if Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0
            then (a -> EditOperation a) -> Vector a -> Vector (EditOperation a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> EditOperation a
forall a. a -> EditOperation a
Delete (Int -> Vector a -> Vector a
forall a. Int -> Vector a -> Vector a
V.take Int
i Vector a
as1) Vector (EditOperation a)
-> Vector (EditOperation a) -> Vector (EditOperation a)
forall a. Semigroup a => a -> a -> a
<> Vector (EditOperation a)
ops
            else
              if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0
                then (a -> EditOperation a) -> Vector a -> Vector (EditOperation a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> EditOperation a
forall a. a -> EditOperation a
Insert (Int -> Vector a -> Vector a
forall a. Int -> Vector a -> Vector a
V.take Int
j Vector a
as2) Vector (EditOperation a)
-> Vector (EditOperation a) -> Vector (EditOperation a)
forall a. Semigroup a => a -> a -> a
<> Vector (EditOperation a)
ops
                else case Cost
op of
                  Insertion {} -> Int -> Int -> Vector (EditOperation a) -> Vector (EditOperation a)
go Int
i (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (EditOperation a
-> Vector (EditOperation a) -> Vector (EditOperation a)
forall a. a -> Vector a -> Vector a
V.cons (a -> EditOperation a
forall a. a -> EditOperation a
Insert (Vector a
as2 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))) Vector (EditOperation a)
ops)
                  Deletion {} -> Int -> Int -> Vector (EditOperation a) -> Vector (EditOperation a)
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int
j (EditOperation a
-> Vector (EditOperation a) -> Vector (EditOperation a)
forall a. a -> Vector a -> Vector a
V.cons (a -> EditOperation a
forall a. a -> EditOperation a
Delete (Vector a
as1 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))) Vector (EditOperation a)
ops)
                  Substitution {} -> Int -> Int -> Vector (EditOperation a) -> Vector (EditOperation a)
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (EditOperation a
-> Vector (EditOperation a) -> Vector (EditOperation a)
forall a. a -> Vector a -> Vector a
V.cons (a -> a -> EditOperation a
forall a. a -> a -> EditOperation a
Substitute (Vector a
as1 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) (Vector a
as2 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))) Vector (EditOperation a)
ops)
                  Cost
_ -> Int -> Int -> Vector (EditOperation a) -> Vector (EditOperation a)
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (EditOperation a
-> Vector (EditOperation a) -> Vector (EditOperation a)
forall a. a -> Vector a -> Vector a
V.cons (a -> EditOperation a
forall a. a -> EditOperation a
Keep (Vector a
as1 Vector a -> Int -> a
forall a. Vector a -> Int -> a
! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))) Vector (EditOperation a)
ops)

-- | Return the element at position i, j in the matrix
--   A matrix in the matrix package is 1-indexed but all the computations in this module are 0-indexed
--   so we need to shift the indices
getElement :: Int -> Int -> Matrix a -> a
getElement :: forall a. Int -> Int -> Matrix a -> a
getElement Int
i Int
j = Int -> Int -> Matrix a -> a
forall a. Int -> Int -> Matrix a -> a
M.getElem (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

-- | Set the element at position i, j in the matrix
--   A matrix in the matrix package is 1-indexed but all the computations in this module are 0-indexed
--   so we need to shift the indices
setElement :: a -> Int -> Int -> Matrix a -> Matrix a
setElement :: forall a. a -> Int -> Int -> Matrix a -> Matrix a
setElement a
a Int
i Int
j = a -> (Int, Int) -> Matrix a -> Matrix a
forall a. a -> (Int, Int) -> Matrix a -> Matrix a
M.setElem a
a (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)