{- | Simple alignment of sequences Standard alignment\/edit distance -} module Bio.Alignment.SAlign ( -- * Smith-Waterman, or locally optimal alignment local_score, local_align -- * Needleman-Wunsch, or globally optimal alignment , global_score, global_align ) where import Data.List (maximumBy) import Bio.Sequence.SeqData import Bio.Alignment.AlignData -- ------------------------------------------------------------ -- Edit distances -- | Calculate global edit distance (Needleman-Wunsch alignment score) global_score :: (Num a, Ord a) => SubstMx t a -> a -> Sequence t -> Sequence t -> a global_score mx g s1 s2 = last . last $ columns (g_score mx g) 0 s1 s2 -- | Scoring\/selection function for global alignment g_score:: (Num a,Ord a) => SubstMx t a -> a -> Selector a g_score mx g cds = maximum [s+eval mx g e | (s,e) <- cds] -- | Calculate local edit distance (Smith-Waterman alignment score) local_score :: (Num a, Ord a) => SubstMx t a -> a -> Sequence t -> Sequence t -> a local_score mx g s1 s2 = maximum . concat $ columns (l_score mx g) 0 s1 s2 -- | Scoring\/selection funciton for local alignmnet l_score :: (Num a,Ord a) => SubstMx t a -> a -> Selector a l_score mx g cds = maximum (0:[s+eval mx g e | (s,e) <- cds]) -- ------------------------------------------------------------ -- Alignments -- It is of course possible to retain the columns from an alignment score above, and -- do the usual backtracking, but it is simpler and more efficient on average to -- store the current alignment (a list; essentially a pointer backwards) along with -- the score in each cell. Unreachable cells can then be GC'ed. -- | Calculate alignments. global_align :: (Num a, Ord a) => SubstMx t a -> a -> Sequence t -> Sequence t -> EditList global_align mx g s1 s2 = reverse . snd . last . last $ columns (g_align mx g) (0,[]) s1 s2 g_align :: (Num a, Ord a) => SubstMx t a -> a -> Selector (a,EditList) g_align mx g cds = maximumBy (compare `on` fst) [(s+eval mx g e,e:a) | ((s,a),e) <- cds] local_align :: (Num a, Ord a) => SubstMx t a -> a -> Sequence t -> Sequence t -> EditList local_align mx g s1 s2 = reverse . snd . maximumBy (compare `on` fst) . concat $ columns (l_align mx g) (0,[]) s1 s2 l_align :: (Num a, Ord a) => SubstMx t a -> a -> Selector (a,EditList) l_align mx g cds = maximumBy (compare `on` fst) $ (0,[]):[(s+eval mx g e,e:a) | ((s,a),e) <- cds]