{-# LANGUAGE FlexibleContexts #-}
module ELynx.Data.Alphabet.DistributionDiversity
(
entropy,
kEffEntropy,
homoplasy,
kEffHomoplasy,
frequencyCharacters,
)
where
import qualified Data.Set as S
import Data.Vector.Generic
( Vector,
toList,
)
import qualified Data.Vector.Generic as V
import ELynx.Data.Alphabet.Alphabet
import ELynx.Data.Alphabet.Character
eps :: Double
eps = 1e-12
xLogX :: Double -> Double
xLogX x
| x < 0.0 = error "Argument lower than zero."
| eps > x = 0.0
| otherwise = x * log x
entropy :: (Vector v Double) => v Double -> Double
entropy v =
if isNaN res
then
error
("entropy: Sesult of following vector is NaN: " ++ show (toList v) ++ ".")
else res
where
res = negate $ V.sum $ V.map xLogX v
kEffEntropy :: Vector v Double => v Double -> Double
kEffEntropy v = if e < eps then 1.0 else exp e where e = entropy v
homoplasy :: Vector v Double => v Double -> Double
homoplasy v = V.sum $ V.map (\x -> x * x) v
kEffHomoplasy :: Vector v Double => v Double -> Double
kEffHomoplasy v = 1.0 / homoplasy v
incrementElemIndexByOne :: Vector v Int => [Int] -> v Int -> v Int
incrementElemIndexByOne is v = v V.// zip is es'
where
es' = [v V.! i + 1 | i <- is]
acc :: Vector v Int => AlphabetSpec -> v Int -> Character -> v Int
acc alph vec char = incrementElemIndexByOne is vec
where
is = [S.findIndex c (std alph) | c <- toStd alph char]
countCharacters ::
(Vector v Character, Vector v Int) => AlphabetSpec -> v Character -> v Int
countCharacters alph = V.foldl' (acc alph) zeroCounts
where
nChars = length (std alph)
zeroCounts = V.replicate nChars (0 :: Int)
saveDivision :: Int -> Int -> Double
saveDivision value divisor =
if divisor == 0 then 0.0 else fromIntegral value / fromIntegral divisor
frequencyCharacters ::
(Vector v Character, Vector v Int, Vector v Double) =>
AlphabetSpec ->
v Character ->
v Double
frequencyCharacters alph d = V.map (`saveDivision` s) counts
where
counts = countCharacters alph d
s = V.sum counts