{- | Implement alignments\/edit distance with affine gap penalties I've seen g = (-10,-1) as the suggested price to pay for a gaps using BLOSUM62. Good choice as any, I guess. -} module Bio.Alignment.AAlign ( -- * Smith-Waterman, or locally optimal alignment with affine gaps local_score, local_align -- * Needleman-Wunsch, or globally optimal alignment with affine gaps , global_score, global_align ) where import Data.List (partition,maximumBy) import Bio.Sequence.SeqData import Bio.Alignment.AlignData -- | Minus infinity (or an approximation thereof) minf :: Num a => a minf = -100000000 -- ------------------------------------------------------------ -- Edit distances -- | Calculate global edit distance (Needleman-Wunsch alignment score) global_score :: (Num a, Ord a) => SubstMx t a -> (a,a) -> Sequence t -> Sequence t -> a global_score mx g s1 s2 = uncurry max . last . last $ columns (score_select minf mx g) (0,fst g) s1 s2 -- | Calculate local edit distance (Smith-Waterman alignment score) local_score :: (Num a, Ord a) => SubstMx t a -> (a,a) -> Sequence t -> Sequence t -> a local_score mx g s1 s2 = maximum . map (uncurry max) . concat $ columns (score_select 0 mx g) (0,fst g) s1 s2 -- | Generic scoring and selection function for global and local scoring score_select :: (Num a,Ord a) => a -> SubstMx t a -> (a,a) -> Selector (a,a) score_select minf mx (go,ge) cds = let (reps,ids) = partition (isRepl.snd) cds s = maximum $ minf:[max sub gap +mx (x,y) | ((sub,gap),Repl x y) <- reps] g = maximum $ minf:[max (sub+go) (gap+ge) | ((sub,gap),_) <- ids] in (s,g) -- ------------------------------------------------------------ -- Alignments -- maximum and addition for compound values max' (x,ax) (y,yx) = if x>=y then (x,ax) else (y,yx) fp (x,ax) (s,e) = (x+s,e:ax) -- | Calculate global alignment (Needleman-Wunsch) global_align :: (Num a, Ord a) => SubstMx t a -> (a,a) -> Sequence t -> Sequence t -> (a,EditList) global_align mx g s1 s2 = revsnd . uncurry max' . last . last $ columns (align_select minf mx g) ((0,[]),(fst g,[])) s1 s2 -- | Calculate local alignmnet (Smith-Waterman) local_align :: (Num a, Ord a) => SubstMx t a -> (a,a) -> Sequence t -> Sequence t -> (a,EditList) local_align mx g s1 s2 = revsnd . maximumBy (compare `on` fst) . map (uncurry max') . concat $ columns (align_select 0 mx g) ((0,[]),(fst g,[])) s1 s2 revsnd (s,a) = (s,reverse a) -- | Generic scoring and selection for global and local alignment align_select :: (Num a, Ord a) => a -> SubstMx t a -> (a,a) -> Selector ((a,EditList),(a,EditList)) align_select minf mx (go,ge) cds = let (reps,ids) = partition (isRepl.snd) cds s = maximumBy (compare `on` fst) $ (minf,[]):[max' sub gap `fp` (mx (x,y),e) | ((sub,gap),e@(Repl x y)) <- reps] g = maximumBy (compare `on` fst) $ (minf,[]):[max' (sub `fp` (go,e)) (gap `fp` (ge,e)) | ((sub,gap),e) <- ids] in (s,g)