module Bio.Sequence.Alignment.Scoring.Similarity ( similarity , hamming , relativeHamming , alignmentScore , alignmentSemiglobalScore ) where import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B import Bio.Sequence.Alignment (Substitution, alignment, mkGlobal, mkSemiglobal) import Bio.Sequence.Alignment.Matrix (blosum62) similarity :: ByteString -> ByteString -> Float similarity f s = 1 - fromIntegral (hamming f' s') / fromIntegral (B.length f') where (_, (f', s')) = alignment (mkGlobal blosum62 (-5)) f s hamming :: ByteString -> ByteString -> Int hamming f s = sum $ B.zipWith notTheSame f s where notTheSame :: Char -> Char -> Int notTheSame f' s' = if f' /= s' then 1 else 0 relativeHamming :: ByteString -> ByteString -> Float relativeHamming f s = fromIntegral hammingValue / fromIntegral (B.length f) where hammingValue = hamming f s alignmentScore :: Substitution Char -> ByteString -> ByteString -> Int alignmentScore matrix f s = fst $ alignment (mkGlobal matrix (-5)) f s alignmentSemiglobalScore :: Substitution Char -> ByteString -> ByteString -> Int alignmentSemiglobalScore matrix f s = fst $ alignment (mkSemiglobal matrix (-5)) f s