{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE KindSignatures #-}

-- |
-- Module      :  ELynx.Sequence.Divergence
-- Description :  Compute divergence matrix between sequences
-- Copyright   :  2022 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- Creation date: Thu Jul  7 17:55:23 2022.
module ELynx.Sequence.Divergence
  ( divergence,
  )
where

import Control.Monad.Primitive
import qualified Data.Matrix.Unboxed as MU
import qualified Data.Matrix.Unboxed.Mutable as MU
import Data.Maybe
import qualified Data.Set as S
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import ELynx.Alphabet.Alphabet
import ELynx.Sequence.Sequence
import GHC.Exts (Constraint)

type Context x = (VUM.Unbox x :: Constraint)

modify :: (Context a, PrimMonad s) => MU.MMatrix (PrimState s) a -> (Int, Int) -> (a -> a) -> s ()
modify :: MMatrix (PrimState s) a -> (Int, Int) -> (a -> a) -> s ()
modify MMatrix (PrimState s) a
m (Int, Int)
ij a -> a
f = do
  a
x <- MMatrix (PrimState s) a -> (Int, Int) -> s a
forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> s a
MU.read MMatrix (PrimState s) a
m (Int, Int)
ij
  MMatrix (PrimState s) a -> (Int, Int) -> a -> s ()
forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> a -> s ()
MU.write MMatrix (PrimState s) a
m (Int, Int)
ij (a -> a
f a
x)

divergence :: Sequence -> Sequence -> Either String (MU.Matrix Int)
divergence :: Sequence -> Sequence -> Either String (Matrix Int)
divergence Sequence
s1 Sequence
s2
  | Bool -> Bool
not (Sequence -> Alphabet
alphabet Sequence
s1 Alphabet -> Alphabet -> Bool
forall a. Eq a => a -> a -> Bool
== Sequence -> Alphabet
alphabet Sequence
s2) = String -> Either String (Matrix Int)
forall b. String -> Either String b
err String
"sequences have different alphabets"
  | Bool -> Bool
not ([Sequence] -> Bool
equalLength [Sequence
s1, Sequence
s2]) = String -> Either String (Matrix Int)
forall b. String -> Either String b
err String
"sequences have different lengths"
  | Bool
otherwise = Matrix Int -> Either String (Matrix Int)
forall a b. b -> Either a b
Right (Matrix Int -> Either String (Matrix Int))
-> Matrix Int -> Either String (Matrix Int)
forall a b. (a -> b) -> a -> b
$ (forall s. ST s (MMatrix s Int)) -> Matrix Int
forall a. Context a => (forall s. ST s (MMatrix s a)) -> Matrix a
MU.create ((forall s. ST s (MMatrix s Int)) -> Matrix Int)
-> (forall s. ST s (MMatrix s Int)) -> Matrix Int
forall a b. (a -> b) -> a -> b
$ do
      MMatrix s Int
m <- (Int, Int) -> ST s (MMatrix (PrimState (ST s)) Int)
forall a (s :: * -> *).
(Context a, PrimMonad s) =>
(Int, Int) -> s (MMatrix (PrimState s) a)
MU.new (Int
n, Int
n)
      -- Initialize matrix.
      [ST s ()] -> ST s ()
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, Monad m) =>
t (m a) -> m ()
sequence_ [MMatrix (PrimState (ST s)) Int -> (Int, Int) -> Int -> ST s ()
forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> a -> s ()
MU.write MMatrix s Int
MMatrix (PrimState (ST s)) Int
m (Int
i, Int
j) Int
0 | Int
i <- [Int
0 .. (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)], Int
j <- [Int
0 .. (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]]
      -- Fill matrix.
      [ST s ()] -> ST s ()
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, Monad m) =>
t (m a) -> m ()
sequence_ ([ST s ()] -> ST s ()) -> [ST s ()] -> ST s ()
forall a b. (a -> b) -> a -> b
$
        [Maybe (ST s ())] -> [ST s ()]
forall a. [Maybe a] -> [a]
catMaybes
          [ do
              -- Only treat sites where both characters are standard.
              Int
i <- Maybe Int
mi
              Int
j <- Maybe Int
mj
              ST s () -> Maybe (ST s ())
forall a. a -> Maybe a
Just (ST s () -> Maybe (ST s ())) -> ST s () -> Maybe (ST s ())
forall a b. (a -> b) -> a -> b
$ MMatrix (PrimState (ST s)) Int
-> (Int, Int) -> (Int -> Int) -> ST s ()
forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> (a -> a) -> s ()
modify MMatrix s Int
MMatrix (PrimState (ST s)) Int
m (Int
i, Int
j) (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
            | (Character
x, Character
y) <- [Character] -> [Character] -> [(Character, Character)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Vector Character -> [Character]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Character
cs1) (Vector Character -> [Character]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Character
cs2),
              let mi :: Maybe Int
mi = Character -> Set Character -> Maybe Int
forall a. Ord a => a -> Set a -> Maybe Int
S.lookupIndex Character
x Set Character
a,
              let mj :: Maybe Int
mj = Character -> Set Character -> Maybe Int
forall a. Ord a => a -> Set a -> Maybe Int
S.lookupIndex Character
y Set Character
a
          ]
      MMatrix s Int -> ST s (MMatrix s Int)
forall (m :: * -> *) a. Monad m => a -> m a
return MMatrix s Int
m
  where
    err :: String -> Either String b
err String
m = String -> Either String b
forall a b. a -> Either a b
Left (String -> Either String b) -> String -> Either String b
forall a b. (a -> b) -> a -> b
$ String
"divergence: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
m
    a :: Set Character
a = AlphabetSpec -> Set Character
std (AlphabetSpec -> Set Character) -> AlphabetSpec -> Set Character
forall a b. (a -> b) -> a -> b
$ Alphabet -> AlphabetSpec
alphabetSpec (Alphabet -> AlphabetSpec) -> Alphabet -> AlphabetSpec
forall a b. (a -> b) -> a -> b
$ Sequence -> Alphabet
alphabet Sequence
s1
    n :: Int
n = Set Character -> Int
forall a. Set a -> Int
S.size Set Character
a
    cs1 :: Vector Character
cs1 = Sequence -> Vector Character
characters Sequence
s1
    cs2 :: Vector Character
cs2 = Sequence -> Vector Character
characters Sequence
s2