module ELynx.Data.Alphabet.DistributionDiversity
(
entropy
, kEffEntropy
, homoplasy
, kEffHomoplasy
, frequencyCharacters
) where
import qualified Data.Set as S
import qualified Data.Vector.Unboxed as V
import ELynx.Data.Alphabet.Alphabet
import ELynx.Data.Alphabet.Character
import ELynx.Tools.Definitions
import ELynx.Tools.Numeric
import ELynx.Tools.Vector
entropy :: V.Vector Double -> Double
entropy v = if isNaN res
then error ("entropy: Sesult of following vector is NaN: " ++ show v ++ ".")
else res
where res = negate $ sumVec $ V.map xLogX v
kEffEntropy :: V.Vector Double -> Double
kEffEntropy v = if e < eps
then 1.0
else exp e
where e = entropy v
homoplasy :: V.Vector Double -> Double
homoplasy v = sumVec $ V.map (\x -> x*x) v
kEffHomoplasy :: V.Vector Double -> Double
kEffHomoplasy v = 1.0 / homoplasy v
incrementElemIndexByOne :: [Int] -> V.Vector Int -> V.Vector Int
incrementElemIndexByOne is v = v V.// zip is es'
where es' = [v V.! i + 1 | i <- is]
acc :: AlphabetSpec -> V.Vector Int -> Character -> V.Vector Int
acc alph vec char = incrementElemIndexByOne is vec
where
is = [ S.findIndex c (std alph) | c <- toStd alph char ]
countCharacters :: AlphabetSpec -> V.Vector Character -> V.Vector Int
countCharacters alph =
V.foldl' (acc alph) zeroCounts
where
nChars = length (std alph)
zeroCounts = V.replicate nChars (0 :: Int)
saveDivision :: Int -> Int -> Double
saveDivision value divisor =
if divisor == 0
then 0.0
else fromIntegral value / fromIntegral divisor
frequencyCharacters :: AlphabetSpec -> V.Vector Character -> V.Vector Double
frequencyCharacters alph d = V.map (`saveDivision` s) counts
where
counts = countCharacters alph d
s = sumVec counts