{-# LANGUAGE Rank2Types #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE BangPatterns #-} module Bio.Motif.Alignment ( alignment , alignmentBy , linPenal , quadPenal , cubePenal , expPenal , AlignFn ) where import qualified Data.Vector.Generic as G import qualified Data.Vector.Unboxed as U import qualified Data.Matrix.Unboxed as M import Bio.Motif import Bio.Utils.Functions -- | penalty function takes the gaps number as input, return penalty value type PenalFn = Int -> Double type DistanceFn = forall v. (G.Vector v Double, G.Vector v (Double, Double)) => v Double -> v Double -> Double type AlignFn = PWM -> PWM -> (Double, (Bool, Int)) -- ^ (distance, (on same direction, -- position w.r.t. the first pwm)) alignment :: AlignFn alignment = alignmentBy jsd $ quadPenal 0.15 -- | linear penalty linPenal :: Double -> PenalFn linPenal x n = fromIntegral n * x -- | quadratic penalty quadPenal :: Double -> PenalFn quadPenal x n = fromIntegral (n ^ (2 :: Int)) * x -- | cubic penalty cubePenal :: Double -> PenalFn cubePenal x n = fromIntegral (n ^ (3 :: Int)) * x -- | exponentail penalty expPenal :: Double -> PenalFn expPenal x n = fromIntegral (2^n :: Int) * x -- internal gaps are not allowed, larger score means larger distance, so the smaller the better alignmentBy :: DistanceFn -- ^ compute the distance between two aligned pwms -> PenalFn -- ^ gap penalty -> AlignFn alignmentBy fn pFn m1 m2 | fst forwardAlign <= fst reverseAlign = (fst forwardAlign, (True, snd forwardAlign)) | otherwise = (fst reverseAlign, (False, snd reverseAlign)) where forwardAlign | d1 < d2 = (d1,i1) | otherwise = (d2,-i2) where (d1,i1) = loop opti2 (1/0,-1) s2 s1 0 (d2,i2) = loop opti1 (1/0,-1) s1 s2 0 reverseAlign | d1 < d2 = (d1,i1) | otherwise = (d2,-i2) where (d1,i1) = loop opti2 (1/0,-1) s2' s1 0 (d2,i2) = loop opti1 (1/0,-1) s1 s2' 0 loop opti (min',i') a b@(_:xs) !i | currentBest >= min' = (min',i') | d < min' = loop opti (d,i) a xs (i+1) | otherwise = loop opti (min',i') a xs (i+1) where d = (G.sum sc + gapP) / fromIntegral (U.length sc + nGaps) currentBest = opti U.! i sc = U.fromList $ zipWith fn a b nGaps = n1 + n2 - 2 * U.length sc gapP = pFn nGaps loop _ (min',i') _ _ _ = (min',i') opti1 = optimalSc n1 n2 opti2 = optimalSc n2 n1 optimalSc x y = U.fromList $ scanr1 f $ go 0 where f v min' = min v min' go i | a == 0 = [] | otherwise = pFn b / fromIntegral (a + b) : go (i+1) where a = min x (y-i) b = i + abs (x - (y-i)) s1 = M.toRows . _mat $ m1 s2 = M.toRows . _mat $ m2 s2' = M.toRows . _mat $ m2' m2' = rcPWM m2 n1 = length s1 n2 = length s2 {-# INLINE alignmentBy #-}