module Bio.Chain.Alignment
  (
    -- * Alignment function
    align
    -- ** Alignment algorithms
  , EditDistance (..)
  , GlobalAlignment (..), LocalAlignment (..), SemiglobalAlignment (..)
  , SimpleGap, SimpleGap2, AffineGap (..), AffineGap2
  , IsGap (..)
    -- ** Alignment result types
  , AlignmentResult (..),  Operation (..)
    -- * Viewing alignment results
  , viewAlignment
  , prettyAlignmment
    -- * Similarity functions
    -- $similarity
  , similarityGen'
  , similarityGen
  , differenceGen'
  , differenceGen
  , similarity'
  , similarity
  , difference'
  , difference
  ) where

import           Control.Lens                   (Index, IxValue, Ixed (..), to,
                                                 (^?!))
import           Data.Array.Unboxed             ((!))
import           Data.List                      (intercalate)
import           Data.List.Split                (chunksOf)

import           Bio.Chain                      hiding ((!))
import           Bio.Chain.Alignment.Algorithms
import           Bio.Chain.Alignment.Type
import           Bio.Utils.Geometry             (R)
import           Bio.Utils.Monomer              (Symbol (..))

-- | Align chains using specifed algorithm
--
{-# SPECIALISE align :: LocalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: LocalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: SemiglobalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: SemiglobalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: GlobalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: GlobalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
align :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m') => algo (IxValue m) (IxValue m') -> m -> m' -> AlignmentResult m m'
align :: algo (IxValue m) (IxValue m') -> m -> m' -> AlignmentResult m m'
align algo (IxValue m) (IxValue m')
algo m
s m'
t = Int
-> [Operation (Index m) (Index m')]
-> m
-> m'
-> AlignmentResult m m'
forall m m'.
Int
-> [Operation (Index m) (Index m')]
-> m
-> m'
-> AlignmentResult m m'
AlignmentResult Int
alignmentScore [Operation (Index m) (Index m')]
alignmentResult m
s m'
t
  where
    -- Bounds of chains specify bounds of alignment matrix
    (Index m
lowerS, Index m
upperS) = m -> (Index m, Index m)
forall m. ChainLike m => m -> (Index m, Index m)
bounds m
s
    (Index m'
lowerT, Index m'
upperT) = m' -> (Index m', Index m')
forall m. ChainLike m => m -> (Index m, Index m)
bounds m'
t
    -- Fill the matrix
    mat :: Matrix m m'
    mat :: Matrix m m'
mat = algo (IxValue m) (IxValue m') -> m -> m' -> Matrix m m'
forall (a :: * -> * -> *) m m'.
(SequenceAlignment a, Alignable m, Alignable m') =>
a (IxValue m) (IxValue m') -> m -> m' -> Matrix m m'
scoreMatrix algo (IxValue m) (IxValue m')
algo m
s m'
t
    -- Result coordinates
    coords :: (Index m, Index m')
    coords :: (Index m, Index m')
coords = algo (IxValue m) (IxValue m')
-> Matrix m m' -> m -> m' -> (Index m, Index m')
forall (a :: * -> * -> *) m m'.
(SequenceAlignment a, Alignable m, Alignable m') =>
a (IxValue m) (IxValue m')
-> Matrix m m' -> m -> m' -> (Index m, Index m')
traceStart algo (IxValue m) (IxValue m')
algo Matrix m m'
mat m
s m'
t
    -- Score of alignment
    alignmentScore :: Int
    alignmentScore :: Int
alignmentScore = let (Index m
x, Index m'
y) = (Index m, Index m')
coords in Matrix m m'
mat Matrix m m' -> (Index m, Index m', EditOp) -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! (Index m
x, Index m'
y, EditOp
Match)

    -- Resulting alignment should contain additional deletions/insertions in case of semiglobal
    -- alignment
    alignmentResult :: [Operation (Index m) (Index m')]
    alignmentResult :: [Operation (Index m) (Index m')]
alignmentResult
        | algo (IxValue m) (IxValue m') -> Bool
forall (a :: * -> * -> *) e1 e2.
SequenceAlignment a =>
a e1 e2 -> Bool
semi algo (IxValue m) (IxValue m')
algo = [Operation (Index m) (Index m')]
preResult [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
forall a. [a] -> [a] -> [a]
++ [Operation (Index m) (Index m')]
suffix
        | Bool
otherwise = [Operation (Index m) (Index m')]
preResult
      where
        preResult :: [Operation (Index m) (Index m')]
preResult = (Index m -> Index m' -> [Operation (Index m) (Index m')])
-> (Index m, Index m') -> [Operation (Index m) (Index m')]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (algo (IxValue m) (IxValue m')
-> Matrix m m'
-> m
-> m'
-> Index m
-> Index m'
-> [Operation (Index m) (Index m')]
forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> Matrix m m'
-> m
-> m'
-> Index m
-> Index m'
-> [Operation (Index m) (Index m')]
traceback algo (IxValue m) (IxValue m')
algo Matrix m m'
mat m
s m'
t) (Index m, Index m')
coords
        -- Last index of FIRST chain affected by some operation in preResult or (lowerS - 1).
        lastI :: Index m
lastI = [Index m] -> Index m
forall a. [a] -> a
last ([Index m] -> Index m)
-> ([Operation (Index m) (Index m')] -> [Index m])
-> [Operation (Index m) (Index m')]
-> Index m
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Index m -> Index m
forall a. Enum a => a -> a
pred Index m
lowerS Index m -> [Index m] -> [Index m]
forall a. a -> [a] -> [a]
:) ([Index m] -> [Index m])
-> ([Operation (Index m) (Index m')] -> [Index m])
-> [Operation (Index m) (Index m')]
-> [Index m]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Operation (Index m) (Index m') -> Index m)
-> [Operation (Index m) (Index m')] -> [Index m]
forall a b. (a -> b) -> [a] -> [b]
map Operation (Index m) (Index m') -> Index m
forall i j. Operation i j -> i
getI ([Operation (Index m) (Index m')] -> Index m)
-> [Operation (Index m) (Index m')] -> Index m
forall a b. (a -> b) -> a -> b
$ (Operation (Index m) (Index m') -> Bool)
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool)
-> (Operation (Index m) (Index m') -> Bool)
-> Operation (Index m) (Index m')
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Operation (Index m) (Index m') -> Bool
forall i j. Operation i j -> Bool
isInsert) [Operation (Index m) (Index m')]
preResult
        -- Last index of SECOND chain affected by some operation in preResult or (lowerS - 1).
        lastJ :: Index m'
lastJ = [Index m'] -> Index m'
forall a. [a] -> a
last ([Index m'] -> Index m')
-> ([Operation (Index m) (Index m')] -> [Index m'])
-> [Operation (Index m) (Index m')]
-> Index m'
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Index m' -> Index m'
forall a. Enum a => a -> a
pred Index m'
lowerT Index m' -> [Index m'] -> [Index m']
forall a. a -> [a] -> [a]
:) ([Index m'] -> [Index m'])
-> ([Operation (Index m) (Index m')] -> [Index m'])
-> [Operation (Index m) (Index m')]
-> [Index m']
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Operation (Index m) (Index m') -> Index m')
-> [Operation (Index m) (Index m')] -> [Index m']
forall a b. (a -> b) -> [a] -> [b]
map Operation (Index m) (Index m') -> Index m'
forall i j. Operation i j -> j
getJ ([Operation (Index m) (Index m')] -> Index m')
-> [Operation (Index m) (Index m')] -> Index m'
forall a b. (a -> b) -> a -> b
$ (Operation (Index m) (Index m') -> Bool)
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool)
-> (Operation (Index m) (Index m') -> Bool)
-> Operation (Index m) (Index m')
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Operation (Index m) (Index m') -> Bool
forall i j. Operation i j -> Bool
isDelete) [Operation (Index m) (Index m')]
preResult
        -- Deletions and insertions of symbols after last operation in preResult
        suffix :: [Operation (Index m) (Index m')]
suffix = case [Operation (Index m) (Index m')] -> Operation (Index m) (Index m')
forall a. [a] -> a
last (Index m -> Index m' -> Operation (Index m) (Index m')
forall i j. i -> j -> Operation i j
MATCH (Index m -> Index m
forall a. Enum a => a -> a
pred Index m
lowerS) (Index m' -> Index m'
forall a. Enum a => a -> a
pred Index m'
lowerT) Operation (Index m) (Index m')
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
forall a. a -> [a] -> [a]
: [Operation (Index m) (Index m')]
preResult) of
                   MATCH Index m
i Index m'
j -> (Index m -> Operation (Index m) (Index m'))
-> [Index m] -> [Operation (Index m) (Index m')]
forall a b. (a -> b) -> [a] -> [b]
map Index m -> Operation (Index m) (Index m')
forall i j. i -> Operation i j
DELETE [Index m -> Index m
forall a. Enum a => a -> a
succ Index m
i .. Index m
upperS] [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
forall a. [a] -> [a] -> [a]
++ (Index m' -> Operation (Index m) (Index m'))
-> [Index m'] -> [Operation (Index m) (Index m')]
forall a b. (a -> b) -> [a] -> [b]
map Index m' -> Operation (Index m) (Index m')
forall i j. j -> Operation i j
INSERT [Index m' -> Index m'
forall a. Enum a => a -> a
succ Index m'
j .. Index m'
upperT]
                   INSERT Index m'
_ -> (Index m -> Operation (Index m) (Index m'))
-> [Index m] -> [Operation (Index m) (Index m')]
forall a b. (a -> b) -> [a] -> [b]
map Index m -> Operation (Index m) (Index m')
forall i j. i -> Operation i j
DELETE [Index m -> Index m
forall a. Enum a => a -> a
succ Index m
lastI .. Index m
upperS]
                   DELETE Index m
_ -> (Index m' -> Operation (Index m) (Index m'))
-> [Index m'] -> [Operation (Index m) (Index m')]
forall a b. (a -> b) -> [a] -> [b]
map Index m' -> Operation (Index m) (Index m')
forall i j. j -> Operation i j
INSERT [Index m' -> Index m'
forall a. Enum a => a -> a
succ Index m'
lastJ .. Index m'
upperT]

-- | Traceback function.
--
-- Builds traceback for alignment algorithm @algo@ in matrix @mat@, that is
-- result of alignment of sequences @s@ and @t@. Traceback will start from
-- position with coordinates (@i@, @j@) in matrix.
--
-- Traceback is represented as list of 'Operation's.
--
traceback :: (SequenceAlignment algo, Alignable m, Alignable m')
          => algo (IxValue m) (IxValue m')
          -> Matrix m m'
          -> m
          -> m'
          -> Index m
          -> Index m'
          -> [Operation (Index m) (Index m')]
traceback :: algo (IxValue m) (IxValue m')
-> Matrix m m'
-> m
-> m'
-> Index m
-> Index m'
-> [Operation (Index m) (Index m')]
traceback algo (IxValue m) (IxValue m')
algo Matrix m m'
mat m
s m'
t Index m
i' Index m'
j' = Index m
-> Index m'
-> EditOp
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
helper Index m
i' Index m'
j' EditOp
Match []
  where
    helper :: Index m
-> Index m'
-> EditOp
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
helper Index m
i Index m'
j EditOp
prevOp [Operation (Index m) (Index m')]
ar
      | Conditions m m' -> Stop m m'
forall m m'. Conditions m m' -> Stop m m'
isStop  (algo (IxValue m) (IxValue m') -> Conditions m m'
forall (a :: * -> * -> *) m m'.
(SequenceAlignment a, Alignable m, Alignable m') =>
a (IxValue m) (IxValue m') -> Conditions m m'
cond algo (IxValue m) (IxValue m')
algo) Matrix m m'
mat m
s m'
t Index m
i Index m'
j = [Operation (Index m) (Index m')]
ar
      | Bool
otherwise =
        let (EditOp
nextOp, Index m
nextI, Index m'
nextJ, Operation (Index m) (Index m')
op) = Conditions m m' -> Move m m'
forall m m'. Conditions m m' -> Move m m'
doMove (algo (IxValue m) (IxValue m') -> Conditions m m'
forall (a :: * -> * -> *) m m'.
(SequenceAlignment a, Alignable m, Alignable m') =>
a (IxValue m) (IxValue m') -> Conditions m m'
cond algo (IxValue m) (IxValue m')
algo) Matrix m m'
mat m
s m'
t Index m
i Index m'
j EditOp
prevOp
        in Index m
-> Index m'
-> EditOp
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
helper Index m
nextI Index m'
nextJ EditOp
nextOp ([Operation (Index m) (Index m')]
 -> [Operation (Index m) (Index m')])
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
forall a b. (a -> b) -> a -> b
$ Operation (Index m) (Index m')
opOperation (Index m) (Index m')
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
forall a. a -> [a] -> [a]
:[Operation (Index m) (Index m')]
ar

{- $similarity
These are generic variants of similarity and difference functions alongside with their specialised variants.
Generic versions take the alignment algorithm used for sequence alignment,
an equality function on elements of both sequences to calculate hamming distance on aligned sequences,
and the sequences themselves.

Sample usage of generic functions:

>>> similarityGen (GlobalAlignment (\x y -> if x == ord y then 1 else 0) (AffineGap (-11) (-1))) (\x y -> x == ord y) [ord 'R'.. ord 'z'] ['a'..'z']
0.63414633

This one will calculate similarity between a list of 'Int's and a list of 'Char's.
Generic scoring function used in alignment is @\\x y -> if x == ord y then 1 else 0@.
Generic equality function used in hamming distance is @\\x y -> x == ord y@.

Specialised versions do not take the equality function as the sequences are already constrained to have 'Eq' elements.

Sample usage of specialised function is the same as before:

>>> :{
seq1 :: String
seq1 = "EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCAKVQLERYFDYWGQGTLVTVSS"
<BLANKLINE>
seq2 :: String
seq2 = "EVQLLESGGGLVQPGGSLRLSAAASGFTFSTFSMNWVRQAPGKGLEWVSYISRTSKTIYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYVARGRFFDYWGQGTLVTVS"
<BLANKLINE>
similarity (GlobalAlignment blosum62 (AffineGap (-11) (-1))) s1 s2
:}
0.8130081

Sometimes for biological reasons gaps appearing in one of two sequences, that are being aligned,
are not physical. For that reason we might want to use different gap penalties when aligning these sequences.

Example of usage of different gaps when aligning two sequences is presented below:

>>> :{
seq1 :: String
seq1 = "AAAAAAGGGGGGGGGGGGTTTTTTTTT"
<BLANKLINE>
seq2 :: String
seq2 = "AAAAAATTTTTTTTT"
<BLANKLINE>
gapForSeq1 :: AffineGap
gapForSeq1 = AffineGap (-5) (-1)
<BLANKLINE>
gapForSeq2 :: AffineGap
gapForSeq2 = AffineGap (-1000) (-1000) -- basically, we forbid gaps on @seq2@
<BLANKLINE>
local = LocalAlignment nuc44 (gapForSeq1, gapForSeq2)
<BLANKLINE>
viewAlignment (align local seq1 seq2)
:}
("TTTTTTTTT", "TTTTTTTTT")
-}

-- | Calculate similarity and difference between two sequences, aligning them first using given algorithm.
--
similarityGen :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m')
              => algo (IxValue m) (IxValue m')
              -> (IxValue m -> IxValue m' -> Bool)
              -> m
              -> m'
              -> R
similarityGen :: algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
similarityGen algo (IxValue m) (IxValue m')
algo IxValue m -> IxValue m' -> Bool
genericEq m
s m'
t = AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
similarityGen' (algo (IxValue m) (IxValue m') -> m -> m' -> AlignmentResult m m'
forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m') -> m -> m' -> AlignmentResult m m'
align algo (IxValue m) (IxValue m')
algo m
s m'
t) IxValue m -> IxValue m' -> Bool
genericEq

-- | Calculate similarity by precomputed 'AlignmentResult'.
similarityGen' :: forall m m'. (Alignable m, Alignable m')
               => AlignmentResult m m'
               -> (IxValue m -> IxValue m' -> Bool)
               -> R
similarityGen' :: AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
similarityGen' AlignmentResult m m'
res IxValue m -> IxValue m' -> Bool
genericEq = Int -> R
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
hamming R -> R -> R
forall a. Fractional a => a -> a -> a
/ Int -> R
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len
  where
    operations :: [Operation (Index m) (Index m')]
operations = AlignmentResult m m' -> [Operation (Index m) (Index m')]
forall m m'.
AlignmentResult m m' -> [Operation (Index m) (Index m')]
alignment AlignmentResult m m'
res
    s :: m
s          = AlignmentResult m m' -> m
forall m m'. AlignmentResult m m' -> m
sequence1 AlignmentResult m m'
res
    t :: m'
t          = AlignmentResult m m' -> m'
forall m m'. AlignmentResult m m' -> m'
sequence2 AlignmentResult m m'
res
    len :: Int
len        = [Operation (Index m) (Index m')] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Operation (Index m) (Index m')]
operations
    hamming :: Int
hamming    = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ Operation (Index m) (Index m') -> Int
toScores (Operation (Index m) (Index m') -> Int)
-> [Operation (Index m) (Index m')] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Operation (Index m) (Index m')]
operations

    toScores :: Operation (Index m) (Index m') -> Int
    toScores :: Operation (Index m) (Index m') -> Int
toScores (MATCH Index m
i Index m'
j) = if (m
s m -> Getting (Endo (IxValue m)) m (IxValue m) -> IxValue m
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m -> Traversal' m (IxValue m)
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i) IxValue m -> IxValue m' -> Bool
`genericEq` (m'
t m' -> Getting (Endo (IxValue m')) m' (IxValue m') -> IxValue m'
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m' -> Traversal' m' (IxValue m')
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j) then Int
1 else Int
0
    toScores Operation (Index m) (Index m')
_           = Int
0

similarity :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
           => algo (IxValue m) (IxValue m')
           -> m
           -> m'
           -> R
similarity :: algo (IxValue m) (IxValue m') -> m -> m' -> R
similarity algo (IxValue m) (IxValue m')
algo = algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
similarityGen algo (IxValue m) (IxValue m')
algo IxValue m -> IxValue m' -> Bool
forall a. Eq a => a -> a -> Bool
(==)

similarity' :: forall m m'.(Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
            => AlignmentResult m m'
            -> R
similarity' :: AlignmentResult m m' -> R
similarity' AlignmentResult m m'
res = AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
similarityGen' AlignmentResult m m'
res IxValue m -> IxValue m' -> Bool
forall a. Eq a => a -> a -> Bool
(==)

differenceGen :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m')
              => algo (IxValue m) (IxValue m')
              -> (IxValue m -> IxValue m' -> Bool)
              -> m
              -> m'
              -> R
differenceGen :: algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
differenceGen algo (IxValue m) (IxValue m')
algo IxValue m -> IxValue m' -> Bool
genericEq m
s m'
t = R
1.0 R -> R -> R
forall a. Num a => a -> a -> a
- algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
similarityGen algo (IxValue m) (IxValue m')
algo IxValue m -> IxValue m' -> Bool
genericEq m
s m'
t

differenceGen' :: forall m m'.(Alignable m, Alignable m')
               => AlignmentResult m m'
               -> (IxValue m -> IxValue m' -> Bool)
               -> R
differenceGen' :: AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
differenceGen' AlignmentResult m m'
res IxValue m -> IxValue m' -> Bool
genericEq = R
1.0 R -> R -> R
forall a. Num a => a -> a -> a
- AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
similarityGen' AlignmentResult m m'
res IxValue m -> IxValue m' -> Bool
genericEq

difference :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
           => algo (IxValue m) (IxValue m')
           -> m
           -> m'
           -> R
difference :: algo (IxValue m) (IxValue m') -> m -> m' -> R
difference algo (IxValue m) (IxValue m')
algo = algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
differenceGen algo (IxValue m) (IxValue m')
algo IxValue m -> IxValue m' -> Bool
forall a. Eq a => a -> a -> Bool
(==)

difference' :: forall m m'.(Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
            => AlignmentResult m m'
            -> R
difference' :: AlignmentResult m m' -> R
difference' AlignmentResult m m'
res = AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
differenceGen' AlignmentResult m m'
res IxValue m -> IxValue m' -> Bool
forall a. Eq a => a -> a -> Bool
(==)

-- | View alignment results as simple strings with gaps.
--
viewAlignment :: forall m m'.(Alignable m, Alignable m', Symbol (IxValue m), Symbol (IxValue m')) => AlignmentResult m m' -> (String, String)
viewAlignment :: AlignmentResult m m' -> (String, String)
viewAlignment AlignmentResult m m'
ar = [(Char, Char)] -> (String, String)
forall a b. [(a, b)] -> ([a], [b])
unzip (Operation (Index m) (Index m') -> (Char, Char)
toChars (Operation (Index m) (Index m') -> (Char, Char))
-> [Operation (Index m) (Index m')] -> [(Char, Char)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> AlignmentResult m m' -> [Operation (Index m) (Index m')]
forall m m'.
AlignmentResult m m' -> [Operation (Index m) (Index m')]
alignment AlignmentResult m m'
ar)
  where
    (m
s, m'
t) = (AlignmentResult m m' -> m
forall m m'. AlignmentResult m m' -> m
sequence1 AlignmentResult m m'
ar, AlignmentResult m m' -> m'
forall m m'. AlignmentResult m m' -> m'
sequence2 AlignmentResult m m'
ar)

    toChars :: Operation (Index m) (Index m') -> (Char, Char)
    toChars :: Operation (Index m) (Index m') -> (Char, Char)
toChars (MATCH Index m
i Index m'
j) = (IxValue m -> Char
forall a. Symbol a => a -> Char
symbol (m
s m -> Getting (Endo (IxValue m)) m (IxValue m) -> IxValue m
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m -> Traversal' m (IxValue m)
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i), IxValue m' -> Char
forall a. Symbol a => a -> Char
symbol (m'
t m' -> Getting (Endo (IxValue m')) m' (IxValue m') -> IxValue m'
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m' -> Traversal' m' (IxValue m')
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j))
    toChars (DELETE Index m
i)  = (IxValue m -> Char
forall a. Symbol a => a -> Char
symbol (m
s m -> Getting (Endo (IxValue m)) m (IxValue m) -> IxValue m
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m -> Traversal' m (IxValue m)
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i), Char
'-')
    toChars (INSERT Index m'
j)  = (Char
'-', IxValue m' -> Char
forall a. Symbol a => a -> Char
symbol (m'
t m' -> Getting (Endo (IxValue m')) m' (IxValue m') -> IxValue m'
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m' -> Traversal' m' (IxValue m')
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j))

-- | Format alignment result as pretty columns of symbols.
--
-- Example with width equal to 20:
--
-- @
--  0 --------------------  0
--
--  0 TTTTTTTTTTTTTTTTTTTT 19
--
--  0 --GCCTGAATGGTGTGGTGT 17
--      || |||||| |||| |||
-- 20 TTGC-TGAATG-TGTG-TGT 36
--
-- 18 TCGGCGGAGGGACCCAGCTA 37
--     || |||||||||||||||
-- 37 -CG-CGGAGGGACCCAGCT- 53
--
-- 38 AAAAAAAAAA 47
--
-- 53 ---------- 53
-- @
prettyAlignmment
  :: forall m m'
  . (Alignable m, Alignable m', Symbol (IxValue m), Symbol (IxValue m'))
  => AlignmentResult m m' -- ^ Result of alignment to format
  -> Int                  -- ^ Desired width of one alignment row
  -> String
prettyAlignmment :: AlignmentResult m m' -> Int -> String
prettyAlignmment AlignmentResult m m'
ar Int
width =
  -- Due to construction 'resultRows' first element will be empty string, and we don't need it.
  String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"\n" ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [String] -> [String]
forall a. [a] -> [a]
tail [String]
resultRows
  where
    (m
s, m'
t) = (AlignmentResult m m' -> m
forall m m'. AlignmentResult m m' -> m
sequence1 AlignmentResult m m'
ar, AlignmentResult m m' -> m'
forall m m'. AlignmentResult m m' -> m'
sequence2 AlignmentResult m m'
ar)
    rows :: [[Operation (Index m) (Index m')]]
rows = Int
-> [Operation (Index m) (Index m')]
-> [[Operation (Index m) (Index m')]]
forall e. Int -> [e] -> [[e]]
chunksOf Int
width ([Operation (Index m) (Index m')]
 -> [[Operation (Index m) (Index m')]])
-> [Operation (Index m) (Index m')]
-> [[Operation (Index m) (Index m')]]
forall a b. (a -> b) -> a -> b
$ AlignmentResult m m' -> [Operation (Index m) (Index m')]
forall m m'.
AlignmentResult m m' -> [Operation (Index m) (Index m')]
alignment AlignmentResult m m'
ar

    chainLength :: forall c. ChainLike c => c -> Int
    chainLength :: c -> Int
chainLength c
ch = let (Index c
a, Index c
b) = c -> (Index c, Index c)
forall m. ChainLike m => m -> (Index m, Index m)
bounds c
ch in Index c -> Int
forall a. Enum a => a -> Int
fromEnum Index c
b Int -> Int -> Int
forall a. Num a => a -> a -> a
- Index c -> Int
forall a. Enum a => a -> Int
fromEnum Index c
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

    -- Determine how many characters to leave for position numbers
    numWidth :: Int
numWidth = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ Int -> String
forall a. Show a => a -> String
show (Int -> String) -> Int -> String
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (m -> Int
forall c. ChainLike c => c -> Int
chainLength m
s) (m' -> Int
forall c. ChainLike c => c -> Int
chainLength m'
t)

    padLeft :: String -> String
    padLeft :: String -> String
padLeft String
x = Int -> Char -> String
forall a. Int -> a -> [a]
replicate (Int
numWidth Int -> Int -> Int
forall a. Num a => a -> a -> a
- String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
x) Char
' ' String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
x

    -- Build one column of nice alignment like
    -- T
    -- |
    -- T
    toCharTriple :: Operation (Index m) (Index m') -> (Char, Char, Char)
    toCharTriple :: Operation (Index m) (Index m') -> (Char, Char, Char)
toCharTriple (MATCH Index m
i Index m'
j) = (Char
left, if Char
left Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
right then Char
'|' else Char
' ', Char
right)
      where
        left :: Char
left  = m
s m -> Getting (Endo Char) m Char -> Char
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m -> Traversal' m (IxValue m)
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i ((IxValue m -> Const (Endo Char) (IxValue m))
 -> m -> Const (Endo Char) m)
-> ((Char -> Const (Endo Char) Char)
    -> IxValue m -> Const (Endo Char) (IxValue m))
-> Getting (Endo Char) m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IxValue m -> Char)
-> (Char -> Const (Endo Char) Char)
-> IxValue m
-> Const (Endo Char) (IxValue m)
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to IxValue m -> Char
forall a. Symbol a => a -> Char
symbol
        right :: Char
right = m'
t m' -> Getting (Endo Char) m' Char -> Char
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m' -> Traversal' m' (IxValue m')
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j ((IxValue m' -> Const (Endo Char) (IxValue m'))
 -> m' -> Const (Endo Char) m')
-> ((Char -> Const (Endo Char) Char)
    -> IxValue m' -> Const (Endo Char) (IxValue m'))
-> Getting (Endo Char) m' Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IxValue m' -> Char)
-> (Char -> Const (Endo Char) Char)
-> IxValue m'
-> Const (Endo Char) (IxValue m')
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to IxValue m' -> Char
forall a. Symbol a => a -> Char
symbol
    toCharTriple (DELETE Index m
i) = (m
s m -> Getting (Endo Char) m Char -> Char
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m -> Traversal' m (IxValue m)
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i ((IxValue m -> Const (Endo Char) (IxValue m))
 -> m -> Const (Endo Char) m)
-> ((Char -> Const (Endo Char) Char)
    -> IxValue m -> Const (Endo Char) (IxValue m))
-> Getting (Endo Char) m Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IxValue m -> Char)
-> (Char -> Const (Endo Char) Char)
-> IxValue m
-> Const (Endo Char) (IxValue m)
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to IxValue m -> Char
forall a. Symbol a => a -> Char
symbol, Char
' ', Char
'-')
    toCharTriple (INSERT Index m'
j) = (Char
'-', Char
' ', m'
t m' -> Getting (Endo Char) m' Char -> Char
forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! Index m' -> Traversal' m' (IxValue m')
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j ((IxValue m' -> Const (Endo Char) (IxValue m'))
 -> m' -> Const (Endo Char) m')
-> ((Char -> Const (Endo Char) Char)
    -> IxValue m' -> Const (Endo Char) (IxValue m'))
-> Getting (Endo Char) m' Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IxValue m' -> Char)
-> (Char -> Const (Endo Char) Char)
-> IxValue m'
-> Const (Endo Char) (IxValue m')
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to IxValue m' -> Char
forall a. Symbol a => a -> Char
symbol)

    -- Format one chunk of alignment, adding indices to start and end of strings
    -- (prevI, prevJ) must be 1-based indices of last printed characters in each string.
    formatRow :: (Int, Int) -> [Operation (Index m) (Index m')] -> ((Int, Int), [String])
    formatRow :: (Int, Int)
-> [Operation (Index m) (Index m')] -> ((Int, Int), [String])
formatRow (Int
prevI, Int
prevJ) [Operation (Index m) (Index m')]
row = ((Int
lastI, Int
lastJ), [String
resLine1, String
resLine2, String
resLine3])
      where
        (String
line1, String
line2, String
line3) = [(Char, Char, Char)] -> (String, String, String)
forall a b c. [(a, b, c)] -> ([a], [b], [c])
unzip3 ([(Char, Char, Char)] -> (String, String, String))
-> [(Char, Char, Char)] -> (String, String, String)
forall a b. (a -> b) -> a -> b
$ (Operation (Index m) (Index m') -> (Char, Char, Char))
-> [Operation (Index m) (Index m')] -> [(Char, Char, Char)]
forall a b. (a -> b) -> [a] -> [b]
map Operation (Index m) (Index m') -> (Char, Char, Char)
toCharTriple [Operation (Index m) (Index m')]
row

        -- | Folding function to count lengths of both strings in alignment row
        countChars :: (Int, Int) -> Operation (Index m) (Index m') -> (Int, Int)
        countChars :: (Int, Int) -> Operation (Index m) (Index m') -> (Int, Int)
countChars (Int
li, Int
lj) (MATCH Index m
_ Index m'
_) = (Int
li Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, Int
lj Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        countChars (Int
li, Int
lj) (DELETE Index m
_)  = (Int
li Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, Int
lj)
        countChars (Int
li, Int
lj) (INSERT Index m'
_)  = (Int
li, Int
lj Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

        (Int
lengthI, Int
lengthJ) = ((Int, Int) -> Operation (Index m) (Index m') -> (Int, Int))
-> (Int, Int) -> [Operation (Index m) (Index m')] -> (Int, Int)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (Int, Int) -> Operation (Index m) (Index m') -> (Int, Int)
countChars (Int
0, Int
0) [Operation (Index m) (Index m')]
row

        -- Indices of first printed non-gap characters in the current row.
        -- If the row contains any non-gap characters, this is equal to index
        -- of last printed character + 1
        (Int
firstI, Int
firstJ) =
          ( if Int
lengthI Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 then Int
prevI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 else Int
prevI
          , if Int
lengthJ Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 then Int
prevJ Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 else Int
prevJ
          )
        (Int
lastI, Int
lastJ) = (Int
prevI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lengthI, Int
prevJ Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lengthJ)

        -- It's easier to do everything in 1-based indices and convert before showing
        toZeroBased :: Int -> Int
        toZeroBased :: Int -> Int
toZeroBased Int
0 = Int
0
        toZeroBased Int
i = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1

        resLine1 :: String
resLine1 = String -> String
padLeft (Int -> String
forall a. Show a => a -> String
show (Int -> String) -> Int -> String
forall a b. (a -> b) -> a -> b
$ Int -> Int
toZeroBased Int
firstI) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
line1 String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String -> String
padLeft (Int -> String
forall a. Show a => a -> String
show (Int -> String) -> Int -> String
forall a b. (a -> b) -> a -> b
$ Int -> Int
toZeroBased Int
lastI)
        resLine2 :: String
resLine2 = String -> String
padLeft String
""                          String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
line2
        resLine3 :: String
resLine3 = String -> String
padLeft (Int -> String
forall a. Show a => a -> String
show (Int -> String) -> Int -> String
forall a b. (a -> b) -> a -> b
$ Int -> Int
toZeroBased Int
firstJ) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
line3 String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String -> String
padLeft (Int -> String
forall a. Show a => a -> String
show (Int -> String) -> Int -> String
forall a b. (a -> b) -> a -> b
$ Int -> Int
toZeroBased Int
lastJ)

    -- Go through all chunks of operations, accummulating current offsets in both strings
    ((Int, Int)
_, [String]
resultRows) =
      (((Int, Int), [String])
 -> [Operation (Index m) (Index m')] -> ((Int, Int), [String]))
-> ((Int, Int), [String])
-> [[Operation (Index m) (Index m')]]
-> ((Int, Int), [String])
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
        (\((Int, Int)
off, [String]
res) [Operation (Index m) (Index m')]
ops -> let ((Int, Int)
newOff, [String]
newRes) = (Int, Int)
-> [Operation (Index m) (Index m')] -> ((Int, Int), [String])
formatRow (Int, Int)
off [Operation (Index m) (Index m')]
ops in ((Int, Int)
newOff, [String]
res [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> [String
""] [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> [String]
newRes))
        ((Int
0, Int
0), [])
        [[Operation (Index m) (Index m')]]
rows