{- |
   Simple alignment of sequences

   Standard alignment\/edit distance 
-}

module Bio.Alignment.SAlign 
   ( 
   -- * Smith-Waterman, or locally optimal alignment
   local_score, local_align

   -- * Needleman-Wunsch, or globally optimal alignment
   , global_score, global_align

   ) where

import Data.List (maximumBy)
import Bio.Sequence.SeqData
import Bio.Alignment.AlignData

-- ------------------------------------------------------------
-- Edit distances

-- | Calculate global edit distance (Needleman-Wunsch alignment score)
global_score :: (Num a, Ord a) => SubstMx a -> a -> Sequence -> Sequence -> a
global_score mx g s1 s2 = last . last $ columns (g_score mx g) 0 s1 s2

-- | Scoring\/selection function for global alignment
g_score:: (Num a,Ord a) => SubstMx a -> a -> Selector a
g_score mx g cds = maximum [s+eval mx g e | (s,e) <- cds]

-- | Calculate local edit distance (Smith-Waterman alignment score)
local_score :: (Num a, Ord a) => SubstMx a -> a -> Sequence -> Sequence -> a
local_score mx g s1 s2 = maximum . concat $ columns (l_score mx g) 0 s1 s2

-- | Scoring\/selection funciton for local alignmnet
l_score :: (Num a,Ord a) => SubstMx a -> a -> Selector a
l_score mx g cds = maximum (0:[s+eval mx g e | (s,e) <- cds])

-- ------------------------------------------------------------
-- Alignments

-- It is of course possible to retain the columns from an alignment score above, and
-- do the usual backtracking, but it is simpler and more efficient on average to 
-- store the current alignment (a list; essentially a pointer backwards) along with
-- the score in each cell.  Unreachable cells can then be GC'ed.

-- | Calculate alignments.
global_align :: (Num a, Ord a) => SubstMx a -> a -> Sequence -> Sequence -> EditList
global_align mx g s1 s2 = reverse . snd . last . last 
                          $ columns (g_align mx g) (0,[]) s1 s2

g_align :: (Num a, Ord a) => SubstMx a -> a -> Selector (a,EditList)
g_align mx g cds = maximumBy (compare `on` fst) [(s+eval mx g e,e:a) | ((s,a),e) <- cds]

local_align :: (Num a, Ord a) => SubstMx a -> a -> Sequence -> Sequence -> EditList 
local_align mx g s1 s2 = reverse . snd . maximumBy (compare `on` fst) . concat 
                         $ columns (l_align mx g) (0,[]) s1 s2

l_align :: (Num a, Ord a) => SubstMx a -> a -> Selector (a,EditList)
l_align mx g cds = maximumBy (compare `on` fst) $ (0,[]):[(s+eval mx g e,e:a) | ((s,a),e) <- cds]