module Bio.Sequence.Alignment.Scoring.Similarity
  ( similarity
  , hamming
  , relativeHamming
  , alignmentScore
  , alignmentSemiglobalScore
  ) where

import           Data.ByteString.Char8         (ByteString)
import qualified Data.ByteString.Char8         as B

import           Bio.Sequence.Alignment        (Substitution, alignment,
                                                mkGlobal, mkSemiglobal)
import           Bio.Sequence.Alignment.Matrix (blosum62)

similarity :: ByteString -> ByteString -> Float
similarity f s = 1 - fromIntegral (hamming f' s') / fromIntegral (B.length f')
  where (_, (f', s')) = alignment (mkGlobal blosum62 (-5)) f s

hamming :: ByteString -> ByteString -> Int
hamming f s = sum $ B.zipWith notTheSame f s
  where
    notTheSame :: Char -> Char -> Int
    notTheSame f' s' = if f' /= s' then 1 else 0

relativeHamming :: ByteString -> ByteString -> Float
relativeHamming f s = fromIntegral hammingValue / fromIntegral (B.length f)
  where hammingValue = hamming f s

alignmentScore :: Substitution Char -> ByteString -> ByteString -> Int
alignmentScore matrix f s = fst $ alignment (mkGlobal matrix (-5)) f s

alignmentSemiglobalScore :: Substitution Char -> ByteString -> ByteString -> Int
alignmentSemiglobalScore matrix f s = fst $ alignment (mkSemiglobal matrix (-5)) f s