module Bio.Chain.Alignment
  (
    -- * Alignment function
    align
    -- ** Alignment algorithms
  , EditDistance (..)
  , GlobalAlignment (..), LocalAlignment (..), SemiglobalAlignment (..)
  , SimpleGap, SimpleGap2, AffineGap (..), AffineGap2
  , IsGap (..)
    -- ** Alignment result types
  , AlignmentResult (..),  Operation (..)
    -- * Viewing alignment results
  , viewAlignment
  , prettyAlignmment
    -- * Similarity functions
    -- $similarity
  , similarityGen'
  , similarityGen
  , differenceGen'
  , differenceGen
  , similarity'
  , similarity
  , difference'
  , difference
  ) where

import           Control.Lens                   (Index, IxValue, Ixed (..), to,
                                                 (^?!))
import           Data.Array.Unboxed             ((!))
import           Data.List                      (intercalate)
import           Data.List.Split                (chunksOf)

import           Bio.Chain                      hiding ((!))
import           Bio.Chain.Alignment.Algorithms
import           Bio.Chain.Alignment.Type
import           Bio.Utils.Geometry             (R)
import           Bio.Utils.Monomer              (Symbol (..))

-- | Align chains using specifed algorithm
--
{-# SPECIALISE align :: LocalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: LocalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: SemiglobalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: SemiglobalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: GlobalAlignment SimpleGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
{-# SPECIALISE align :: GlobalAlignment AffineGap Char Char -> Chain Int Char -> Chain Int Char -> AlignmentResult (Chain Int Char) (Chain Int Char) #-}
align :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m') => algo (IxValue m) (IxValue m') -> m -> m' -> AlignmentResult m m'
align :: forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m') -> m -> m' -> AlignmentResult m m'
align algo (IxValue m) (IxValue m')
algo m
s m'
t = forall m m'.
SimpleGap
-> [Operation (Index m) (Index m')]
-> m
-> m'
-> AlignmentResult m m'
AlignmentResult SimpleGap
alignmentScore [Operation (Index m) (Index m')]
alignmentResult m
s m'
t
  where
    -- Bounds of chains specify bounds of alignment matrix
    (Index m
lowerS, Index m
upperS) = forall m. ChainLike m => m -> (Index m, Index m)
bounds m
s
    (Index m'
lowerT, Index m'
upperT) = forall m. ChainLike m => m -> (Index m, Index m)
bounds m'
t
    -- Fill the matrix
    mat :: Matrix m m'
    mat :: Matrix m m'
mat = forall (a :: * -> * -> *) m m'.
(SequenceAlignment a, Alignable m, Alignable m') =>
a (IxValue m) (IxValue m') -> m -> m' -> Matrix m m'
scoreMatrix algo (IxValue m) (IxValue m')
algo m
s m'
t
    -- Result coordinates
    coords :: (Index m, Index m')
    coords :: (Index m, Index m')
coords = forall (a :: * -> * -> *) m m'.
(SequenceAlignment a, Alignable m, Alignable m') =>
a (IxValue m) (IxValue m')
-> Matrix m m' -> m -> m' -> (Index m, Index m')
traceStart algo (IxValue m) (IxValue m')
algo Matrix m m'
mat m
s m'
t
    -- Score of alignment
    alignmentScore :: Int
    alignmentScore :: SimpleGap
alignmentScore = let (Index m
x, Index m'
y) = (Index m, Index m')
coords in Matrix m m'
mat forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! (Index m
x, Index m'
y, EditOp
Match)

    -- Resulting alignment should contain additional deletions/insertions in case of semiglobal
    -- alignment
    alignmentResult :: [Operation (Index m) (Index m')]
    alignmentResult :: [Operation (Index m) (Index m')]
alignmentResult
        | forall (a :: * -> * -> *) e1 e2.
SequenceAlignment a =>
a e1 e2 -> Bool
semi algo (IxValue m) (IxValue m')
algo = [Operation (Index m) (Index m')]
preResult forall a. [a] -> [a] -> [a]
++ [Operation (Index m) (Index m')]
suffix
        | Bool
otherwise = [Operation (Index m) (Index m')]
preResult
      where
        preResult :: [Operation (Index m) (Index m')]
preResult = forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> Matrix m m'
-> m
-> m'
-> Index m
-> Index m'
-> [Operation (Index m) (Index m')]
traceback algo (IxValue m) (IxValue m')
algo Matrix m m'
mat m
s m'
t) (Index m, Index m')
coords
        -- Last index of FIRST chain affected by some operation in preResult or (lowerS - 1).
        lastI :: Index m
lastI = forall a. [a] -> a
last forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Enum a => a -> a
pred Index m
lowerS forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map forall i j. Operation i j -> i
getI forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall i j. Operation i j -> Bool
isInsert) [Operation (Index m) (Index m')]
preResult
        -- Last index of SECOND chain affected by some operation in preResult or (lowerS - 1).
        lastJ :: Index m'
lastJ = forall a. [a] -> a
last forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Enum a => a -> a
pred Index m'
lowerT forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map forall i j. Operation i j -> j
getJ forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall i j. Operation i j -> Bool
isDelete) [Operation (Index m) (Index m')]
preResult
        -- Deletions and insertions of symbols after last operation in preResult
        suffix :: [Operation (Index m) (Index m')]
suffix = case forall a. [a] -> a
last (forall i j. i -> j -> Operation i j
MATCH (forall a. Enum a => a -> a
pred Index m
lowerS) (forall a. Enum a => a -> a
pred Index m'
lowerT) forall a. a -> [a] -> [a]
: [Operation (Index m) (Index m')]
preResult) of
                   MATCH Index m
i Index m'
j -> forall a b. (a -> b) -> [a] -> [b]
map forall i j. i -> Operation i j
DELETE [forall a. Enum a => a -> a
succ Index m
i .. Index m
upperS] forall a. [a] -> [a] -> [a]
++ forall a b. (a -> b) -> [a] -> [b]
map forall i j. j -> Operation i j
INSERT [forall a. Enum a => a -> a
succ Index m'
j .. Index m'
upperT]
                   INSERT Index m'
_ -> forall a b. (a -> b) -> [a] -> [b]
map forall i j. i -> Operation i j
DELETE [forall a. Enum a => a -> a
succ Index m
lastI .. Index m
upperS]
                   DELETE Index m
_ -> forall a b. (a -> b) -> [a] -> [b]
map forall i j. j -> Operation i j
INSERT [forall a. Enum a => a -> a
succ Index m'
lastJ .. Index m'
upperT]

-- | Traceback function.
--
-- Builds traceback for alignment algorithm @algo@ in matrix @mat@, that is
-- result of alignment of sequences @s@ and @t@. Traceback will start from
-- position with coordinates (@i@, @j@) in matrix.
--
-- Traceback is represented as list of 'Operation's.
--
traceback :: (SequenceAlignment algo, Alignable m, Alignable m')
          => algo (IxValue m) (IxValue m')
          -> Matrix m m'
          -> m
          -> m'
          -> Index m
          -> Index m'
          -> [Operation (Index m) (Index m')]
traceback :: forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> Matrix m m'
-> m
-> m'
-> Index m
-> Index m'
-> [Operation (Index m) (Index m')]
traceback algo (IxValue m) (IxValue m')
algo Matrix m m'
mat m
s m'
t Index m
i' Index m'
j' = Index m
-> Index m'
-> EditOp
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
helper Index m
i' Index m'
j' EditOp
Match []
  where
    helper :: Index m
-> Index m'
-> EditOp
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
helper Index m
i Index m'
j EditOp
prevOp [Operation (Index m) (Index m')]
ar
      | forall m m'. Conditions m m' -> Stop m m'
isStop  (forall (a :: * -> * -> *) m m'.
(SequenceAlignment a, Alignable m, Alignable m') =>
a (IxValue m) (IxValue m') -> Conditions m m'
cond algo (IxValue m) (IxValue m')
algo) Matrix m m'
mat m
s m'
t Index m
i Index m'
j = [Operation (Index m) (Index m')]
ar
      | Bool
otherwise =
        let (EditOp
nextOp, Index m
nextI, Index m'
nextJ, Operation (Index m) (Index m')
op) = forall m m'. Conditions m m' -> Move m m'
doMove (forall (a :: * -> * -> *) m m'.
(SequenceAlignment a, Alignable m, Alignable m') =>
a (IxValue m) (IxValue m') -> Conditions m m'
cond algo (IxValue m) (IxValue m')
algo) Matrix m m'
mat m
s m'
t Index m
i Index m'
j EditOp
prevOp
        in Index m
-> Index m'
-> EditOp
-> [Operation (Index m) (Index m')]
-> [Operation (Index m) (Index m')]
helper Index m
nextI Index m'
nextJ EditOp
nextOp forall a b. (a -> b) -> a -> b
$ Operation (Index m) (Index m')
opforall a. a -> [a] -> [a]
:[Operation (Index m) (Index m')]
ar

{- $similarity
These are generic variants of similarity and difference functions alongside with their specialised variants.
Generic versions take the alignment algorithm used for sequence alignment,
an equality function on elements of both sequences to calculate hamming distance on aligned sequences,
and the sequences themselves.

Sample usage of generic functions:

>>> similarityGen (GlobalAlignment (\x y -> if x == ord y then 1 else 0) (AffineGap (-11) (-1))) (\x y -> x == ord y) [ord 'R'.. ord 'z'] ['a'..'z']
0.63414633

This one will calculate similarity between a list of 'Int's and a list of 'Char's.
Generic scoring function used in alignment is @\\x y -> if x == ord y then 1 else 0@.
Generic equality function used in hamming distance is @\\x y -> x == ord y@.

Specialised versions do not take the equality function as the sequences are already constrained to have 'Eq' elements.

Sample usage of specialised function is the same as before:

>>> :{
seq1 :: String
seq1 = "EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCAKVQLERYFDYWGQGTLVTVSS"
<BLANKLINE>
seq2 :: String
seq2 = "EVQLLESGGGLVQPGGSLRLSAAASGFTFSTFSMNWVRQAPGKGLEWVSYISRTSKTIYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYVARGRFFDYWGQGTLVTVS"
<BLANKLINE>
similarity (GlobalAlignment blosum62 (AffineGap (-11) (-1))) s1 s2
:}
0.8130081

Sometimes for biological reasons gaps appearing in one of two sequences, that are being aligned,
are not physical. For that reason we might want to use different gap penalties when aligning these sequences.

Example of usage of different gaps when aligning two sequences is presented below:

>>> :{
seq1 :: String
seq1 = "AAAAAAGGGGGGGGGGGGTTTTTTTTT"
<BLANKLINE>
seq2 :: String
seq2 = "AAAAAATTTTTTTTT"
<BLANKLINE>
gapForSeq1 :: AffineGap
gapForSeq1 = AffineGap (-5) (-1)
<BLANKLINE>
gapForSeq2 :: AffineGap
gapForSeq2 = AffineGap (-1000) (-1000) -- basically, we forbid gaps on @seq2@
<BLANKLINE>
local = LocalAlignment nuc44 (gapForSeq1, gapForSeq2)
<BLANKLINE>
viewAlignment (align local seq1 seq2)
:}
("TTTTTTTTT", "TTTTTTTTT")
-}

-- | Calculate similarity and difference between two sequences, aligning them first using given algorithm.
--
similarityGen :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m')
              => algo (IxValue m) (IxValue m')
              -> (IxValue m -> IxValue m' -> Bool)
              -> m
              -> m'
              -> R
similarityGen :: forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
similarityGen algo (IxValue m) (IxValue m')
algo IxValue m -> IxValue m' -> Bool
genericEq m
s m'
t = forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
similarityGen' (forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m') -> m -> m' -> AlignmentResult m m'
align algo (IxValue m) (IxValue m')
algo m
s m'
t) IxValue m -> IxValue m' -> Bool
genericEq

-- | Calculate similarity by precomputed 'AlignmentResult'.
similarityGen' :: forall m m'. (Alignable m, Alignable m')
               => AlignmentResult m m'
               -> (IxValue m -> IxValue m' -> Bool)
               -> R
similarityGen' :: forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
similarityGen' AlignmentResult m m'
res IxValue m -> IxValue m' -> Bool
genericEq = forall a b. (Integral a, Num b) => a -> b
fromIntegral SimpleGap
hamming forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral SimpleGap
len
  where
    operations :: [Operation (Index m) (Index m')]
operations = forall m m'.
AlignmentResult m m' -> [Operation (Index m) (Index m')]
alignment AlignmentResult m m'
res
    s :: m
s          = forall m m'. AlignmentResult m m' -> m
sequence1 AlignmentResult m m'
res
    t :: m'
t          = forall m m'. AlignmentResult m m' -> m'
sequence2 AlignmentResult m m'
res
    len :: SimpleGap
len        = forall (t :: * -> *) a. Foldable t => t a -> SimpleGap
length [Operation (Index m) (Index m')]
operations
    hamming :: SimpleGap
hamming    = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum forall a b. (a -> b) -> a -> b
$ Operation (Index m) (Index m') -> SimpleGap
toScores forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Operation (Index m) (Index m')]
operations

    toScores :: Operation (Index m) (Index m') -> Int
    toScores :: Operation (Index m) (Index m') -> SimpleGap
toScores (MATCH Index m
i Index m'
j) = if (m
s forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i) IxValue m -> IxValue m' -> Bool
`genericEq` (m'
t forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j) then SimpleGap
1 else SimpleGap
0
    toScores Operation (Index m) (Index m')
_           = SimpleGap
0

similarity :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
           => algo (IxValue m) (IxValue m')
           -> m
           -> m'
           -> R
similarity :: forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m',
 IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m')) =>
algo (IxValue m) (IxValue m') -> m -> m' -> R
similarity algo (IxValue m) (IxValue m')
algo = forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
similarityGen algo (IxValue m) (IxValue m')
algo forall a. Eq a => a -> a -> Bool
(==)

similarity' :: forall m m'.(Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
            => AlignmentResult m m'
            -> R
similarity' :: forall m m'.
(Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m),
 Eq (IxValue m')) =>
AlignmentResult m m' -> R
similarity' AlignmentResult m m'
res = forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
similarityGen' AlignmentResult m m'
res forall a. Eq a => a -> a -> Bool
(==)

differenceGen :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m')
              => algo (IxValue m) (IxValue m')
              -> (IxValue m -> IxValue m' -> Bool)
              -> m
              -> m'
              -> R
differenceGen :: forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
differenceGen algo (IxValue m) (IxValue m')
algo IxValue m -> IxValue m' -> Bool
genericEq m
s m'
t = R
1.0 forall a. Num a => a -> a -> a
- forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
similarityGen algo (IxValue m) (IxValue m')
algo IxValue m -> IxValue m' -> Bool
genericEq m
s m'
t

differenceGen' :: forall m m'.(Alignable m, Alignable m')
               => AlignmentResult m m'
               -> (IxValue m -> IxValue m' -> Bool)
               -> R
differenceGen' :: forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
differenceGen' AlignmentResult m m'
res IxValue m -> IxValue m' -> Bool
genericEq = R
1.0 forall a. Num a => a -> a -> a
- forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
similarityGen' AlignmentResult m m'
res IxValue m -> IxValue m' -> Bool
genericEq

difference :: forall algo m m'.(SequenceAlignment algo, Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
           => algo (IxValue m) (IxValue m')
           -> m
           -> m'
           -> R
difference :: forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m',
 IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m')) =>
algo (IxValue m) (IxValue m') -> m -> m' -> R
difference algo (IxValue m) (IxValue m')
algo = forall (algo :: * -> * -> *) m m'.
(SequenceAlignment algo, Alignable m, Alignable m') =>
algo (IxValue m) (IxValue m')
-> (IxValue m -> IxValue m' -> Bool) -> m -> m' -> R
differenceGen algo (IxValue m) (IxValue m')
algo forall a. Eq a => a -> a -> Bool
(==)

difference' :: forall m m'.(Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m), Eq (IxValue m'))
            => AlignmentResult m m'
            -> R
difference' :: forall m m'.
(Alignable m, Alignable m', IxValue m ~ IxValue m', Eq (IxValue m),
 Eq (IxValue m')) =>
AlignmentResult m m' -> R
difference' AlignmentResult m m'
res = forall m m'.
(Alignable m, Alignable m') =>
AlignmentResult m m' -> (IxValue m -> IxValue m' -> Bool) -> R
differenceGen' AlignmentResult m m'
res forall a. Eq a => a -> a -> Bool
(==)

-- | View alignment results as simple strings with gaps.
--
viewAlignment :: forall m m'.(Alignable m, Alignable m', Symbol (IxValue m), Symbol (IxValue m')) => AlignmentResult m m' -> (String, String)
viewAlignment :: forall m m'.
(Alignable m, Alignable m', Symbol (IxValue m),
 Symbol (IxValue m')) =>
AlignmentResult m m' -> (String, String)
viewAlignment AlignmentResult m m'
ar = forall a b. [(a, b)] -> ([a], [b])
unzip (Operation (Index m) (Index m') -> (Char, Char)
toChars forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall m m'.
AlignmentResult m m' -> [Operation (Index m) (Index m')]
alignment AlignmentResult m m'
ar)
  where
    (m
s, m'
t) = (forall m m'. AlignmentResult m m' -> m
sequence1 AlignmentResult m m'
ar, forall m m'. AlignmentResult m m' -> m'
sequence2 AlignmentResult m m'
ar)

    toChars :: Operation (Index m) (Index m') -> (Char, Char)
    toChars :: Operation (Index m) (Index m') -> (Char, Char)
toChars (MATCH Index m
i Index m'
j) = (forall a. Symbol a => a -> Char
symbol (m
s forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i), forall a. Symbol a => a -> Char
symbol (m'
t forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j))
    toChars (DELETE Index m
i)  = (forall a. Symbol a => a -> Char
symbol (m
s forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i), Char
'-')
    toChars (INSERT Index m'
j)  = (Char
'-', forall a. Symbol a => a -> Char
symbol (m'
t forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j))

-- | Format alignment result as pretty columns of symbols.
--
-- Example with width equal to 20:
--
-- @
--  0 --------------------  0
--
--  0 TTTTTTTTTTTTTTTTTTTT 19
--
--  0 --GCCTGAATGGTGTGGTGT 17
--      || |||||| |||| |||
-- 20 TTGC-TGAATG-TGTG-TGT 36
--
-- 18 TCGGCGGAGGGACCCAGCTA 37
--     || |||||||||||||||
-- 37 -CG-CGGAGGGACCCAGCT- 53
--
-- 38 AAAAAAAAAA 47
--
-- 53 ---------- 53
-- @
prettyAlignmment
  :: forall m m'
  . (Alignable m, Alignable m', Symbol (IxValue m), Symbol (IxValue m'))
  => AlignmentResult m m' -- ^ Result of alignment to format
  -> Int                  -- ^ Desired width of one alignment row
  -> String
prettyAlignmment :: forall m m'.
(Alignable m, Alignable m', Symbol (IxValue m),
 Symbol (IxValue m')) =>
AlignmentResult m m' -> SimpleGap -> String
prettyAlignmment AlignmentResult m m'
ar SimpleGap
width =
  -- Due to construction 'resultRows' first element will be empty string, and we don't need it.
  forall a. [a] -> [[a]] -> [a]
intercalate String
"\n" forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a]
tail [String]
resultRows
  where
    (m
s, m'
t) = (forall m m'. AlignmentResult m m' -> m
sequence1 AlignmentResult m m'
ar, forall m m'. AlignmentResult m m' -> m'
sequence2 AlignmentResult m m'
ar)
    rows :: [[Operation (Index m) (Index m')]]
rows = forall e. SimpleGap -> [e] -> [[e]]
chunksOf SimpleGap
width forall a b. (a -> b) -> a -> b
$ forall m m'.
AlignmentResult m m' -> [Operation (Index m) (Index m')]
alignment AlignmentResult m m'
ar

    chainLength :: forall c. ChainLike c => c -> Int
    chainLength :: forall c. ChainLike c => c -> SimpleGap
chainLength c
ch = let (Index c
a, Index c
b) = forall m. ChainLike m => m -> (Index m, Index m)
bounds c
ch in forall a. Enum a => a -> SimpleGap
fromEnum Index c
b forall a. Num a => a -> a -> a
- forall a. Enum a => a -> SimpleGap
fromEnum Index c
a forall a. Num a => a -> a -> a
+ SimpleGap
1

    -- Determine how many characters to leave for position numbers
    numWidth :: SimpleGap
numWidth = forall (t :: * -> *) a. Foldable t => t a -> SimpleGap
length forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show forall a b. (a -> b) -> a -> b
$ forall a. Ord a => a -> a -> a
max (forall c. ChainLike c => c -> SimpleGap
chainLength m
s) (forall c. ChainLike c => c -> SimpleGap
chainLength m'
t)

    padLeft :: String -> String
    padLeft :: String -> String
padLeft String
x = forall a. SimpleGap -> a -> [a]
replicate (SimpleGap
numWidth forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. Foldable t => t a -> SimpleGap
length String
x) Char
' ' forall a. Semigroup a => a -> a -> a
<> String
x

    -- Build one column of nice alignment like
    -- T
    -- |
    -- T
    toCharTriple :: Operation (Index m) (Index m') -> (Char, Char, Char)
    toCharTriple :: Operation (Index m) (Index m') -> (Char, Char, Char)
toCharTriple (MATCH Index m
i Index m'
j) = (Char
left, if Char
left forall a. Eq a => a -> a -> Bool
== Char
right then Char
'|' else Char
' ', Char
right)
      where
        left :: Char
left  = m
s forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to forall a. Symbol a => a -> Char
symbol
        right :: Char
right = m'
t forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to forall a. Symbol a => a -> Char
symbol
    toCharTriple (DELETE Index m
i) = (m
s forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m
i forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to forall a. Symbol a => a -> Char
symbol, Char
' ', Char
'-')
    toCharTriple (INSERT Index m'
j) = (Char
'-', Char
' ', m'
t forall s a. HasCallStack => s -> Getting (Endo a) s a -> a
^?! forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Index m'
j forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to forall a. Symbol a => a -> Char
symbol)

    -- Format one chunk of alignment, adding indices to start and end of strings
    -- (prevI, prevJ) must be 1-based indices of last printed characters in each string.
    formatRow :: (Int, Int) -> [Operation (Index m) (Index m')] -> ((Int, Int), [String])
    formatRow :: (SimpleGap, SimpleGap)
-> [Operation (Index m) (Index m')]
-> ((SimpleGap, SimpleGap), [String])
formatRow (SimpleGap
prevI, SimpleGap
prevJ) [Operation (Index m) (Index m')]
row = ((SimpleGap
lastI, SimpleGap
lastJ), [String
resLine1, String
resLine2, String
resLine3])
      where
        (String
line1, String
line2, String
line3) = forall a b c. [(a, b, c)] -> ([a], [b], [c])
unzip3 forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Operation (Index m) (Index m') -> (Char, Char, Char)
toCharTriple [Operation (Index m) (Index m')]
row

        -- | Folding function to count lengths of both strings in alignment row
        countChars :: (Int, Int) -> Operation (Index m) (Index m') -> (Int, Int)
        countChars :: (SimpleGap, SimpleGap)
-> Operation (Index m) (Index m') -> (SimpleGap, SimpleGap)
countChars (SimpleGap
li, SimpleGap
lj) (MATCH Index m
_ Index m'
_) = (SimpleGap
li forall a. Num a => a -> a -> a
+ SimpleGap
1, SimpleGap
lj forall a. Num a => a -> a -> a
+ SimpleGap
1)
        countChars (SimpleGap
li, SimpleGap
lj) (DELETE Index m
_)  = (SimpleGap
li forall a. Num a => a -> a -> a
+ SimpleGap
1, SimpleGap
lj)
        countChars (SimpleGap
li, SimpleGap
lj) (INSERT Index m'
_)  = (SimpleGap
li, SimpleGap
lj forall a. Num a => a -> a -> a
+ SimpleGap
1)

        (SimpleGap
lengthI, SimpleGap
lengthJ) = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (SimpleGap, SimpleGap)
-> Operation (Index m) (Index m') -> (SimpleGap, SimpleGap)
countChars (SimpleGap
0, SimpleGap
0) [Operation (Index m) (Index m')]
row

        -- Indices of first printed non-gap characters in the current row.
        -- If the row contains any non-gap characters, this is equal to index
        -- of last printed character + 1
        (SimpleGap
firstI, SimpleGap
firstJ) =
          ( if SimpleGap
lengthI forall a. Ord a => a -> a -> Bool
> SimpleGap
0 then SimpleGap
prevI forall a. Num a => a -> a -> a
+ SimpleGap
1 else SimpleGap
prevI
          , if SimpleGap
lengthJ forall a. Ord a => a -> a -> Bool
> SimpleGap
0 then SimpleGap
prevJ forall a. Num a => a -> a -> a
+ SimpleGap
1 else SimpleGap
prevJ
          )
        (SimpleGap
lastI, SimpleGap
lastJ) = (SimpleGap
prevI forall a. Num a => a -> a -> a
+ SimpleGap
lengthI, SimpleGap
prevJ forall a. Num a => a -> a -> a
+ SimpleGap
lengthJ)

        -- It's easier to do everything in 1-based indices and convert before showing
        toZeroBased :: Int -> Int
        toZeroBased :: SimpleGap -> SimpleGap
toZeroBased SimpleGap
0 = SimpleGap
0
        toZeroBased SimpleGap
i = SimpleGap
i forall a. Num a => a -> a -> a
- SimpleGap
1

        resLine1 :: String
resLine1 = String -> String
padLeft (forall a. Show a => a -> String
show forall a b. (a -> b) -> a -> b
$ SimpleGap -> SimpleGap
toZeroBased SimpleGap
firstI) forall a. Semigroup a => a -> a -> a
<> String
" " forall a. Semigroup a => a -> a -> a
<> String
line1 forall a. Semigroup a => a -> a -> a
<> String
" " forall a. Semigroup a => a -> a -> a
<> String -> String
padLeft (forall a. Show a => a -> String
show forall a b. (a -> b) -> a -> b
$ SimpleGap -> SimpleGap
toZeroBased SimpleGap
lastI)
        resLine2 :: String
resLine2 = String -> String
padLeft String
""                          forall a. Semigroup a => a -> a -> a
<> String
" " forall a. Semigroup a => a -> a -> a
<> String
line2
        resLine3 :: String
resLine3 = String -> String
padLeft (forall a. Show a => a -> String
show forall a b. (a -> b) -> a -> b
$ SimpleGap -> SimpleGap
toZeroBased SimpleGap
firstJ) forall a. Semigroup a => a -> a -> a
<> String
" " forall a. Semigroup a => a -> a -> a
<> String
line3 forall a. Semigroup a => a -> a -> a
<> String
" " forall a. Semigroup a => a -> a -> a
<> String -> String
padLeft (forall a. Show a => a -> String
show forall a b. (a -> b) -> a -> b
$ SimpleGap -> SimpleGap
toZeroBased SimpleGap
lastJ)

    -- Go through all chunks of operations, accummulating current offsets in both strings
    ((SimpleGap, SimpleGap)
_, [String]
resultRows) =
      forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
        (\((SimpleGap, SimpleGap)
off, [String]
res) [Operation (Index m) (Index m')]
ops -> let ((SimpleGap, SimpleGap)
newOff, [String]
newRes) = (SimpleGap, SimpleGap)
-> [Operation (Index m) (Index m')]
-> ((SimpleGap, SimpleGap), [String])
formatRow (SimpleGap, SimpleGap)
off [Operation (Index m) (Index m')]
ops in ((SimpleGap, SimpleGap)
newOff, [String]
res forall a. Semigroup a => a -> a -> a
<> [String
""] forall a. Semigroup a => a -> a -> a
<> [String]
newRes))
        ((SimpleGap
0, SimpleGap
0), [])
        [[Operation (Index m) (Index m')]]
rows