{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE TupleSections #-}
module Bio.Chain.Alignment.Algorithms where
import Control.Lens (Index, IxValue)
import qualified Data.Array.Unboxed as A (bounds, range)
import Data.List (maximumBy)
import Bio.Chain hiding ((!))
import Bio.Chain.Alignment.Type
import Control.Monad (forM_)
import Data.Ord (comparing)
import Control.Monad.ST (ST)
import Data.Array.Base (readArray, writeArray)
import Data.Array.ST (MArray (..), STUArray, newArray,
runSTUArray)
import Data.Array.Unboxed (Ix (..), UArray, (!))
newtype EditDistance e1 e2 = EditDistance (e1 -> e2 -> Bool)
data GlobalAlignment a e1 e2 = GlobalAlignment (Scoring e1 e2) a
data LocalAlignment a e1 e2 = LocalAlignment (Scoring e1 e2) a
data SemiglobalAlignment a e1 e2 = SemiglobalAlignment (Scoring e1 e2) a
{-# SPECIALISE substitute :: (Char -> Char -> Int) -> Chain Int Char -> Chain Int Char -> Int -> Int -> Int #-}
{-# INLINE substitute #-}
substitute :: (Alignable m, Alignable m') => (IxValue m -> IxValue m' -> Int) -> m -> m' -> Index m -> Index m' -> Int
substitute f s t i j = f (s `unsafeRead` (pred i)) (t `unsafeRead` (pred j))
substituteED :: EditDistance e1 e2 -> (e1 -> e2 -> Int)
substituteED (EditDistance genericEq) x y = if x `genericEq` y then 1 else 0
{-# SPECIALISE defStop :: Matrix (Chain Int Char) (Chain Int Char) -> Chain Int Char -> Chain Int Char -> Int -> Int -> Bool #-}
{-# INLINE defStop #-}
defStop :: (Alignable m, Alignable m') => Matrix m m' -> m -> m' -> Index m -> Index m' -> Bool
defStop _ s t i j = let (lowerS, _) = bounds s
(lowerT, _) = bounds t
in i == lowerS && j == lowerT
{-# SPECIALISE localStop :: Matrix (Chain Int Char) (Chain Int Char) -> Chain Int Char -> Chain Int Char -> Int -> Int -> Bool #-}
{-# INLINE localStop #-}
localStop :: (Alignable m, Alignable m') => Matrix m m' -> m -> m' -> Index m -> Index m' -> Bool
localStop m' s t i j = let (lowerS, _) = bounds s
(lowerT, _) = bounds t
in i == lowerS || j == lowerT || m' ! (i, j, Match) == 0
horiz :: (Alignable m, Alignable m', IsGap g) => g -> Matrix m m' -> m -> m' -> Index m -> Index m' -> Bool
horiz g m s t i j = (j > lowerT) && ((i == lowerS) || (m ! (i, pred j, Match) + add == m ! (i, j, Match)))
where
add | isAffine g = m ! (i, pred j, Insert)
| otherwise = insertCostOpen g
(lowerT, _) = bounds t
(lowerS, _) = bounds s
vert :: (Alignable m, Alignable m', IsGap g) => g -> Matrix m m' -> m -> m' -> Index m -> Index m' -> Bool
vert g m s t i j = (i > lowerS) && ((lowerT == j) || (m ! (pred i, j, Match) + add == m ! (i, j, Match)))
where
add | isAffine g = m ! (pred i, j, Delete)
| otherwise = deleteCostOpen g
(lowerT, _) = bounds t
(lowerS, _) = bounds s
{-# SPECIALISE defDiag :: (Char -> Char -> Int) -> Matrix (Chain Int Char) (Chain Int Char) -> Chain Int Char -> Chain Int Char -> Int -> Int -> Bool #-}
{-# INLINE defDiag #-}
defDiag :: (Alignable m, Alignable m') => (IxValue m -> IxValue m' -> Int) -> Matrix m m' -> m -> m' -> Index m -> Index m' -> Bool
defDiag sub' m s t i j = let sub = substitute sub' s t
in m ! (pred i, pred j, Match) + sub i j == m ! (i, j, Match)
{-# SPECIALISE defStart :: Matrix (Chain Int Char) (Chain Int Char) -> Chain Int Char -> Chain Int Char -> (Int, Int) #-}
{-# INLINE defStart #-}
defStart :: (Alignable m, Alignable m') => Matrix m m' -> m -> m' -> (Index m, Index m')
defStart m _ _ = let ((_, _, _), (upperS, upperT, _)) = A.bounds m in (upperS, upperT)
{-# SPECIALISE localStart :: Matrix (Chain Int Char) (Chain Int Char) -> Chain Int Char -> Chain Int Char -> (Int, Int) #-}
{-# INLINE localStart #-}
localStart :: (Alignable m, Alignable m') => Matrix m m' -> m -> m' -> (Index m, Index m')
localStart m _ _ = let ((lowerS, lowerT, _), (upperS, upperT, _)) = A.bounds m
range' = A.range ((lowerS, lowerT, Match), (upperS, upperT, Match))
in (\(a, b, _) -> (a, b)) $ maximumBy (comparing (m !)) range'
{-# SPECIALISE semiStart :: Matrix (Chain Int Char) (Chain Int Char) -> Chain Int Char -> Chain Int Char -> (Int, Int) #-}
{-# INLINE semiStart #-}
semiStart :: (Alignable m, Alignable m') => Matrix m m' -> m -> m' -> (Index m, Index m')
semiStart m _ _ = let ((lowerS, lowerT, _), (upperS, upperT, _)) = A.bounds m
lastCol = (, upperT, Match) <$> [lowerS .. upperS]
lastRow = (upperS, , Match) <$> [lowerT .. upperT]
in (\(a, b, _) -> (a, b)) $ maximumBy (comparing (m !)) $ lastCol ++ lastRow
instance IsGap g => SequenceAlignment (GlobalAlignment g) where
{-# INLINE cond #-}
cond (GlobalAlignment subC gap) = Conditions defStop (defDiag subC) (vert gap) (horiz gap)
{-# INLINE traceStart #-}
traceStart = const defStart
scoreMatrix :: forall m m' . (Alignable m, Alignable m')
=> GlobalAlignment g (IxValue m) (IxValue m')
-> m
-> m'
-> Matrix m m'
scoreMatrix (GlobalAlignment subC g) s t | isAffine g = uMatrixAffine
| otherwise = uMatrixSimple
where
uMatrixSimple :: UArray (Index m, Index m', EditOp) Int
uMatrixSimple = runSTUArray $ do
matrix <- newArray ((lowerS, lowerT, Match), (nilS, nilT, Match)) 0 :: ST s (STUArray s (Index m, Index m', EditOp) Int)
forM_ [lowerS .. nilS] $ \ixS ->
forM_ [lowerT .. nilT] $ \ixT ->
if | ixS == lowerS -> writeArray matrix (ixS, ixT, Match) $ (insertCostOpen g) * index (lowerT, nilT) ixT
| ixT == lowerT -> writeArray matrix (ixS, ixT, Match) $ (deleteCostOpen g) * index (lowerS, nilS) ixS
| otherwise -> do
predDiag <- matrix `readArray` (pred ixS, pred ixT, Match)
predS <- matrix `readArray` (pred ixS, ixT, Match)
predT <- matrix `readArray` ( ixS, pred ixT, Match)
writeArray matrix (ixS, ixT, Match) $ maximum [ predDiag + sub ixS ixT
, predS + deleteCostOpen g
, predT + insertCostOpen g
]
pure matrix
uMatrixAffine :: UArray (Index m, Index m', EditOp) Int
uMatrixAffine = runSTUArray $ do
matrix <- newArray ((lowerS, lowerT, Insert), (nilS, nilT, Match)) 0 :: ST s (STUArray s (Index m, Index m', EditOp) Int)
forM_ [lowerS .. nilS] $ \ixS ->
forM_ [lowerT .. nilT] $ \ixT ->
if | ixS == lowerS && ixT == lowerT -> do
writeArray matrix (ixS, ixT, Match) 0
writeArray matrix (ixS, ixT, Insert) $ insertCostOpen g
writeArray matrix (ixS, ixT, Delete) $ deleteCostOpen g
| ixS == lowerS -> do
writeArray matrix (ixS, ixT, Match) $ insertCostOpen g + (insertCostExtend g) * pred (index (lowerT, nilT) ixT)
writeArray matrix (ixS, ixT, Insert) $ insertCostExtend g
writeArray matrix (ixS, ixT, Delete) $ deleteCostOpen g
| ixT == lowerT -> do
writeArray matrix (ixS, ixT, Match) $ deleteCostOpen g + (deleteCostExtend g) * pred (index (lowerS, nilS) ixS)
writeArray matrix (ixS, ixT, Delete) $ deleteCostExtend g
writeArray matrix (ixS, ixT, Insert) $ insertCostOpen g
| otherwise -> do
predDiag <- matrix `readArray` (pred ixS, pred ixT, Match)
predS <- matrix `readArray` (pred ixS, ixT, Match)
predT <- matrix `readArray` ( ixS, pred ixT, Match)
delCost <- matrix `readArray` (pred ixS, ixT, Delete)
insCost <- matrix `readArray` ( ixS, pred ixT, Insert)
let maxScore = maximum [ predDiag + sub ixS ixT
, predS + delCost
, predT + insCost
]
writeArray matrix (ixS, ixT, Delete) $ if predS + delCost == maxScore then deleteCostExtend g else deleteCostOpen g
writeArray matrix (ixS, ixT, Insert) $ if predT + insCost == maxScore then insertCostExtend g else insertCostOpen g
writeArray matrix (ixS, ixT, Match) maxScore
pure matrix
(lowerS, upperS) = bounds s
(lowerT, upperT) = bounds t
nilS = succ upperS
nilT = succ upperT
sub :: Index m -> Index m' -> Int
sub = substitute subC s t
instance IsGap g => SequenceAlignment (LocalAlignment g) where
{-# INLINE cond #-}
cond (LocalAlignment subC gap) = Conditions localStop (defDiag subC) (vert gap) (horiz gap)
{-# INLINE traceStart #-}
traceStart = const localStart
scoreMatrix :: forall m m' . (Alignable m, Alignable m')
=> LocalAlignment g (IxValue m) (IxValue m')
-> m
-> m'
-> Matrix m m'
scoreMatrix (LocalAlignment subC g) s t | isAffine g = uMatrixAffine
| otherwise = uMatrixSimple
where
uMatrixSimple :: UArray (Index m, Index m', EditOp) Int
uMatrixSimple = runSTUArray $ do
matrix <- newArray ((lowerS, lowerT, Match), (nilS, nilT, Match)) 0 :: ST s (STUArray s (Index m, Index m', EditOp) Int)
forM_ [lowerS .. nilS] $ \ixS ->
forM_ [lowerT .. nilT] $ \ixT ->
if | ixS == lowerS -> writeArray matrix (ixS, ixT, Match) 0
| ixT == lowerT -> writeArray matrix (ixS, ixT, Match) 0
| otherwise -> do
predDiag <- matrix `readArray` (pred ixS, pred ixT, Match)
predS <- matrix `readArray` (pred ixS, ixT, Match)
predT <- matrix `readArray` ( ixS, pred ixT, Match)
writeArray matrix (ixS, ixT, Match) $ maximum [ predDiag + sub ixS ixT
, predS + deleteCostOpen g
, predT + insertCostOpen g
, 0
]
pure matrix
uMatrixAffine :: UArray (Index m, Index m', EditOp) Int
uMatrixAffine = runSTUArray $ do
matrix <- newArray ((lowerS, lowerT, Insert), (nilS, nilT, Match)) 0 :: ST s (STUArray s (Index m, Index m', EditOp) Int)
forM_ [lowerS .. nilS] $ \ixS ->
forM_ [lowerT .. nilT] $ \ixT ->
if | ixS == lowerS || ixT == lowerT -> do
writeArray matrix (ixS, ixT, Match) 0
writeArray matrix (ixS, ixT, Insert) $ insertCostOpen g
writeArray matrix (ixS, ixT, Delete) $ deleteCostOpen g
| otherwise -> do
predDiag <- matrix `readArray` (pred ixS, pred ixT, Match)
predS <- matrix `readArray` (pred ixS, ixT, Match)
predT <- matrix `readArray` ( ixS, pred ixT, Match)
delCost <- matrix `readArray` (pred ixS, ixT, Delete)
insCost <- matrix `readArray` ( ixS, pred ixT, Insert)
let maxScore = maximum [ predDiag + sub ixS ixT
, predS + delCost
, predT + insCost
, 0
]
writeArray matrix (ixS, ixT, Delete) $ if predS + delCost == maxScore then deleteCostExtend g else deleteCostOpen g
writeArray matrix (ixS, ixT, Insert) $ if predT + insCost == maxScore then insertCostExtend g else insertCostOpen g
writeArray matrix (ixS, ixT, Match) maxScore
pure matrix
(lowerS, upperS) = bounds s
(lowerT, upperT) = bounds t
nilS = succ upperS
nilT = succ upperT
sub :: Index m -> Index m' -> Int
sub = substitute subC s t
instance IsGap g => SequenceAlignment (SemiglobalAlignment g) where
{-# INLINE semi #-}
semi = const True
{-# INLINE cond #-}
cond (SemiglobalAlignment subC gap) = Conditions defStop (defDiag subC) (vert gap) (horiz gap)
{-# INLINE traceStart #-}
traceStart = const semiStart
scoreMatrix :: forall m m' . (Alignable m, Alignable m')
=> SemiglobalAlignment g (IxValue m) (IxValue m')
-> m
-> m'
-> Matrix m m'
scoreMatrix (SemiglobalAlignment subC g) s t | isAffine g = uMatrixAffine
| otherwise = uMatrixSimple
where
uMatrixSimple :: UArray (Index m, Index m', EditOp) Int
uMatrixSimple = runSTUArray $ do
matrix <- newArray ((lowerS, lowerT, Match), (nilS, nilT, Match)) 0 :: ST s (STUArray s (Index m, Index m', EditOp) Int)
forM_ [lowerS .. nilS] $ \ixS ->
forM_ [lowerT .. nilT] $ \ixT ->
if | ixS == lowerS -> writeArray matrix (ixS, ixT, Match) 0
| ixT == lowerT -> writeArray matrix (ixS, ixT, Match) 0
| otherwise -> do
predDiag <- matrix `readArray` (pred ixS, pred ixT, Match)
predS <- matrix `readArray` (pred ixS, ixT, Match)
predT <- matrix `readArray` ( ixS, pred ixT, Match)
writeArray matrix (ixS, ixT, Match) $ maximum [ predDiag + sub ixS ixT
, predS + deleteCostOpen g
, predT + insertCostOpen g
]
pure matrix
uMatrixAffine :: UArray (Index m, Index m', EditOp) Int
uMatrixAffine = runSTUArray $ do
matrix <- newArray ((lowerS, lowerT, Insert), (nilS, nilT, Match)) 0 :: ST s (STUArray s (Index m, Index m', EditOp) Int)
forM_ [lowerS .. nilS] $ \ixS ->
forM_ [lowerT .. nilT] $ \ixT ->
if | ixS == lowerS || ixT == lowerT -> do
writeArray matrix (ixS, ixT, Match) 0
writeArray matrix (ixS, ixT, Insert) $ insertCostOpen g
writeArray matrix (ixS, ixT, Delete) $ deleteCostOpen g
| otherwise -> do
predDiag <- matrix `readArray` (pred ixS, pred ixT, Match)
predS <- matrix `readArray` (pred ixS, ixT, Match)
predT <- matrix `readArray` ( ixS, pred ixT, Match)
delCost <- matrix `readArray` (pred ixS, ixT, Delete)
insCost <- matrix `readArray` ( ixS, pred ixT, Insert)
let maxScore = maximum [ predDiag + sub ixS ixT
, predS + delCost
, predT + insCost
]
writeArray matrix (ixS, ixT, Delete) $ if predS + delCost == maxScore then deleteCostExtend g else deleteCostOpen g
writeArray matrix (ixS, ixT, Insert) $ if predT + insCost == maxScore then insertCostExtend g else insertCostOpen g
writeArray matrix (ixS, ixT, Match) maxScore
pure matrix
(lowerS, upperS) = bounds s
(lowerT, upperT) = bounds t
nilS = succ upperS
nilT = succ upperT
sub :: Index m -> Index m' -> Int
sub = substitute subC s t