module Bio.Sequence.Alignment.Scoring.Similarity
( similarity
, hamming
, relativeHamming
, alignmentScore
, alignmentSemiglobalScore
) where
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as B
import Bio.Sequence.Alignment (Substitution, alignment,
mkGlobal, mkSemiglobal)
import Bio.Sequence.Alignment.Matrix (blosum62)
similarity :: ByteString -> ByteString -> Float
similarity f s = 1 fromIntegral (hamming f' s') / fromIntegral (B.length f')
where (_, (f', s')) = alignment (mkGlobal blosum62 (5)) f s
hamming :: ByteString -> ByteString -> Int
hamming f s = sum $ B.zipWith notTheSame f s
where
notTheSame :: Char -> Char -> Int
notTheSame f' s' = if f' /= s' then 1 else 0
relativeHamming :: ByteString -> ByteString -> Float
relativeHamming f s = fromIntegral hammingValue / fromIntegral (B.length f)
where hammingValue = hamming f s
alignmentScore :: Substitution Char -> ByteString -> ByteString -> Int
alignmentScore matrix f s = fst $ alignment (mkGlobal matrix (5)) f s
alignmentSemiglobalScore :: Substitution Char -> ByteString -> ByteString -> Int
alignmentSemiglobalScore matrix f s = fst $ alignment (mkSemiglobal matrix (5)) f s