module Bio.Chain.Alignment
( AlignmentResult (..), SimpleGap, SimpleGap2, AffineGap (..), AffineGap2, Operation (..)
, EditDistance (..)
, GlobalAlignment (..), LocalAlignment (..), SemiglobalAlignment (..)
, IsGap (..)
, align
, viewAlignment
, prettyAlignmment
, similarityGen
, differenceGen
, similarity
, difference
) where
import Control.Lens (Index, IxValue, Ixed (..), to,
(^?!))
import Data.Array.Unboxed ((!))
import Data.List (intercalate)
import Data.List.Split (chunksOf)
import Bio.Chain hiding ((!))
import Bio.Chain.Alignment.Algorithms
import Bio.Chain.Alignment.Type
import Bio.Utils.Geometry (R)
import Bio.Utils.Monomer (Symbol (..))
{-# SPECIALISE align :: LocalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: LocalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: SemiglobalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: SemiglobalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: GlobalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: GlobalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
align :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m') => algo (IxValue m) (IxValue m') -> m -> m' -> AlignmentResult m m'
align algo s t = AlignmentResult alignmentScore alignmentResult s t
where
(lowerS, upperS) = bounds s
(lowerT, upperT) = bounds t
mat :: Matrix m m'
mat = scoreMatrix algo s t
coords :: (Index m, Index m')
coords = traceStart algo mat s t
alignmentScore :: Int
alignmentScore = let (x, y) = coords in mat ! (x, y, Match)
alignmentResult :: [Operation (Index m) (Index m')]
alignmentResult
| semi algo = preResult ++ suffix
| otherwise = preResult
where
preResult = uncurry (traceback algo mat s t) coords
lastI = last . (pred lowerS :) . map getI $ filter (not . isInsert) preResult
lastJ = last . (pred lowerT :) . map getJ $ filter (not . isDelete) preResult
suffix = case last (MATCH (pred lowerS) (pred lowerT) : preResult) of
MATCH i j -> map DELETE [succ i .. upperS] ++ map INSERT [succ j .. upperT]
INSERT _ -> map DELETE [succ lastI .. upperS]
DELETE _ -> map INSERT [succ lastJ .. upperT]
traceback :: (SequenceAlignment algo, Alignable m, Alignable m')
=> algo (IxValue m) (IxValue m')
-> Matrix m m'
-> m
-> m'
-> Index m
-> Index m'
-> [Operation (Index m) (Index m')]
traceback algo mat s t i' j' = helper i' j' []
where
helper i j ar | isStop (cond algo) mat s t i j = ar
| isVert (cond algo) mat s t i j = helper (pred i) j (DELETE (pred i):ar)
| isHoriz (cond algo) mat s t i j = helper i (pred j) (INSERT (pred j):ar)
| isDiag (cond algo) mat s t i j = helper (pred i) (pred j) (MATCH (pred i) (pred j):ar)
| otherwise = error "Alignment traceback: you cannot be here"
similarityGen :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m')
=> algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool)
-> m
-> m'
-> R
similarityGen algo genericEq s t = fromIntegral hamming / fromIntegral len
where
operations = alignment (align algo s t)
len = length operations
hamming = sum $ toScores <$> operations
toScores :: Operation (Index m) (Index m') -> Int
toScores (MATCH i j) = if (s ^?! ix i) `genericEq` (t ^?! ix j) then 1 else 0
toScores _ = 0
similarity :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
=> algo (IxValue m) (IxValue m')
-> m
-> m'
-> R
similarity algo = similarityGen algo (==)
differenceGen :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m')
=> algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool)
-> m
-> m'
-> R
differenceGen algo genericEq s t = 1.0 - similarityGen algo genericEq s t
difference :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
=> algo (IxValue m) (IxValue m')
-> m
-> m'
-> R
difference algo = differenceGen algo (==)
viewAlignment :: forall m m'.(Alignable m, Alignable m', Symbol (IxValue m), Symbol (IxValue m')) => AlignmentResult m m' -> (String, String)
viewAlignment ar = unzip (toChars <$> alignment ar)
where
(s, t) = (sequence1 ar, sequence2 ar)
toChars :: Operation (Index m) (Index m') -> (Char, Char)
toChars (MATCH i j) = (symbol (s ^?! ix i), symbol (t ^?! ix j))
toChars (DELETE i) = (symbol (s ^?! ix i), '-')
toChars (INSERT j) = ('-', symbol (t ^?! ix j))
prettyAlignmment
:: forall m m'
. (Alignable m, Alignable m', Symbol (IxValue m), Symbol (IxValue m'))
=> AlignmentResult m m'
-> Int
-> String
prettyAlignmment ar width =
intercalate "\n" $ tail resultRows
where
(s, t) = (sequence1 ar, sequence2 ar)
rows = chunksOf width $ alignment ar
chainLength :: forall c. ChainLike c => c -> Int
chainLength ch = let (a, b) = bounds ch in fromEnum b - fromEnum a + 1
numWidth = length $ show $ max (chainLength s) (chainLength t)
padLeft :: String -> String
padLeft x = replicate (numWidth - length x) ' ' <> x
toCharTriple :: Operation (Index m) (Index m') -> (Char, Char, Char)
toCharTriple (MATCH i j) = (left, if left == right then '|' else ' ', right)
where
left = s ^?! ix i . to symbol
right = t ^?! ix j . to symbol
toCharTriple (DELETE i) = (s ^?! ix i . to symbol, ' ', '-')
toCharTriple (INSERT j) = ('-', ' ', t ^?! ix j . to symbol)
formatRow :: (Int, Int) -> [Operation (Index m) (Index m')] -> ((Int, Int), [String])
formatRow (prevI, prevJ) row = ((lastI, lastJ), [resLine1, resLine2, resLine3])
where
(line1, line2, line3) = unzip3 $ map toCharTriple row
countChars :: (Int, Int) -> Operation (Index m) (Index m') -> (Int, Int)
countChars (li, lj) (MATCH _ _) = (li + 1, lj + 1)
countChars (li, lj) (DELETE _) = (li + 1, lj)
countChars (li, lj) (INSERT _) = (li, lj + 1)
(lengthI, lengthJ) = foldl countChars (0, 0) row
(firstI, firstJ) =
( if lengthI > 0 then prevI + 1 else prevI
, if lengthJ > 0 then prevJ + 1 else prevJ
)
(lastI, lastJ) = (prevI + lengthI, prevJ + lengthJ)
toZeroBased :: Int -> Int
toZeroBased 0 = 0
toZeroBased i = i - 1
resLine1 = padLeft (show $ toZeroBased firstI) <> " " <> line1 <> " " <> padLeft (show $ toZeroBased lastI)
resLine2 = padLeft "" <> " " <> line2
resLine3 = padLeft (show $ toZeroBased firstJ) <> " " <> line3 <> " " <> padLeft (show $ toZeroBased lastJ)
(_, resultRows) =
foldl
(\(off, res) ops -> let (newOff, newRes) = formatRow off ops in (newOff, res <> [""] <> newRes))
((0, 0), [])
rows