-- |
-- Module      :  ELynx.Data.Sequence.Distance
-- Description :  Distance functions between sequences
-- Copyright   :  (c) Dominik Schrempf, 2020
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Aug 21 15:09:58 2020.
module ELynx.Data.Sequence.Distance
  ( hamming,
  )
where

import qualified Data.Vector.Unboxed as V
import ELynx.Data.Sequence.Sequence

countFalses :: (Int -> Bool -> Int)
countFalses :: Int -> Bool -> Int
countFalses Int
n Bool
False = Int -> Int
forall a. Enum a => a -> a
succ Int
n
countFalses Int
n Bool
True = Int
n

-- | Compute hamming distance between two sequences.
hamming :: Sequence -> Sequence -> Either String Int
hamming :: Sequence -> Sequence -> Either String Int
hamming Sequence
l Sequence
r
  | Sequence -> Alphabet
alphabet Sequence
l Alphabet -> Alphabet -> Bool
forall a. Eq a => a -> a -> Bool
/= Sequence -> Alphabet
alphabet Sequence
r = String -> Either String Int
forall a b. a -> Either a b
Left String
"hamming: Alphabets of sequences differ."
  | Vector Character -> Int
forall a. Unbox a => Vector a -> Int
V.length Vector Character
csL Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Vector Character -> Int
forall a. Unbox a => Vector a -> Int
V.length Vector Character
csR = String -> Either String Int
forall a b. a -> Either a b
Left String
"hamming: Sequence lengths differ."
  | Vector Character -> Bool
forall a. Unbox a => Vector a -> Bool
V.null Vector Character
csL Bool -> Bool -> Bool
|| Vector Character -> Bool
forall a. Unbox a => Vector a -> Bool
V.null Vector Character
csR = String -> Either String Int
forall a b. a -> Either a b
Left String
"hamming: Empty sequence encountered."
  | Bool
otherwise = Int -> Either String Int
forall a b. b -> Either a b
Right (Int -> Either String Int) -> Int -> Either String Int
forall a b. (a -> b) -> a -> b
$ (Int -> Bool -> Int) -> Int -> Vector Bool -> Int
forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
V.foldl' Int -> Bool -> Int
countFalses Int
0 (Vector Bool -> Int) -> Vector Bool -> Int
forall a b. (a -> b) -> a -> b
$ (Character -> Character -> Bool)
-> Vector Character -> Vector Character -> Vector Bool
forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith Character -> Character -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Sequence -> Vector Character
characters Sequence
l) (Sequence -> Vector Character
characters Sequence
r)
  where
    csL :: Vector Character
csL = Sequence -> Vector Character
characters Sequence
l
    csR :: Vector Character
csR = Sequence -> Vector Character
characters Sequence
r