-- |
-- Module      :  ELynx.MarkovProcess.GammaRateHeterogeneity
-- Description :  Discrete gamma rate heterogeneity
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Thu Feb 28 14:09:11 2019.
--
-- At the moment, a mixture model is used to emulate gamma rate heterogeneity. This
-- does not come with huge run time increases when simulating data. For inference
-- however, it would make a lot of sense to reuse the Eigendecomposition for all
-- rate heterogeneity components though.
module ELynx.MarkovProcess.GammaRateHeterogeneity
  ( summarizeGammaRateHeterogeneity,
    expand,
  )
where

import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Vector as V
import qualified ELynx.MarkovProcess.MixtureModel as M
import qualified ELynx.MarkovProcess.PhyloModel as P
import qualified ELynx.MarkovProcess.SubstitutionModel as S
import Numeric.Integration.TanhSinh
import Statistics.Distribution
import Statistics.Distribution.Gamma
import Prelude hiding (repeat)

-- | Short summary of gamma rate heterogeneity parameters.
summarizeGammaRateHeterogeneity :: Int -> Double -> [BL.ByteString]
summarizeGammaRateHeterogeneity :: Int -> Double -> [ByteString]
summarizeGammaRateHeterogeneity Int
n Double
alpha =
  forall a b. (a -> b) -> [a] -> [b]
map
    [Char] -> ByteString
BL.pack
    [ [Char]
"Discrete gamma rate heterogeneity.",
      [Char]
"Number of categories: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
n,
      [Char]
"Shape parameter of gamma distribution: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Double
alpha,
      [Char]
"Rates: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int -> Double -> Vector Double
getMeans Int
n Double
alpha)
    ]

-- | For a given number of rate categories, a gamma shape parameter alpha and a
-- substitution model, compute the scaled substitution models corresponding to
-- the gamma rates.
expand :: Int -> Double -> P.PhyloModel -> P.PhyloModel
expand :: Int -> Double -> PhyloModel -> PhyloModel
expand Int
n Double
alpha (P.SubstitutionModel SubstitutionModel
sm) =
  MixtureModel -> PhyloModel
P.MixtureModel forall a b. (a -> b) -> a -> b
$ Int -> Double -> SubstitutionModel -> MixtureModel
expandSubstitutionModel Int
n Double
alpha SubstitutionModel
sm
expand Int
n Double
alpha (P.MixtureModel MixtureModel
mm) =
  MixtureModel -> PhyloModel
P.MixtureModel forall a b. (a -> b) -> a -> b
$ Int -> Double -> MixtureModel -> MixtureModel
expandMixtureModel Int
n Double
alpha MixtureModel
mm

getName :: Int -> Double -> String
getName :: Int -> Double -> [Char]
getName Int
n Double
alpha =
  [Char]
" with discrete gamma rate heterogeneity; "
    forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
n
    forall a. [a] -> [a] -> [a]
++ [Char]
" categories; "
    forall a. [a] -> [a] -> [a]
++ [Char]
"shape parameter "
    forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Double
alpha

splitSubstitutionModel ::
  Int -> Double -> S.SubstitutionModel -> V.Vector S.SubstitutionModel
splitSubstitutionModel :: Int -> Double -> SubstitutionModel -> Vector SubstitutionModel
splitSubstitutionModel Int
n Double
alpha SubstitutionModel
sm = Vector SubstitutionModel
renamedSMs
  where
    means :: Vector Double
means = Int -> Double -> Vector Double
getMeans Int
n Double
alpha
    scaledSMs :: Vector SubstitutionModel
scaledSMs = forall a b. (a -> b) -> Vector a -> Vector b
V.map (Double -> SubstitutionModel -> SubstitutionModel
`S.scale` SubstitutionModel
sm) Vector Double
means
    names :: Vector [Char]
names = forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (([Char]
"; gamma rate category " forall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> [Char]
show) [Int
1 :: Int ..]
    renamedSMs :: Vector SubstitutionModel
renamedSMs = forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith [Char] -> SubstitutionModel -> SubstitutionModel
S.appendName Vector [Char]
names Vector SubstitutionModel
scaledSMs

expandSubstitutionModel ::
  Int -> Double -> S.SubstitutionModel -> M.MixtureModel
expandSubstitutionModel :: Int -> Double -> SubstitutionModel -> MixtureModel
expandSubstitutionModel Int
n Double
alpha SubstitutionModel
sm = [Char] -> Vector Double -> Vector SubstitutionModel -> MixtureModel
M.fromSubstitutionModels [Char]
name Vector Double
ws Vector SubstitutionModel
sms
  where
    name :: [Char]
name = SubstitutionModel -> [Char]
S.name SubstitutionModel
sm forall a. Semigroup a => a -> a -> a
<> Int -> Double -> [Char]
getName Int
n Double
alpha
    ws :: Vector Double
ws = forall a. Int -> a -> Vector a
V.replicate Int
n Double
1.0
    sms :: Vector SubstitutionModel
sms = Int -> Double -> SubstitutionModel -> Vector SubstitutionModel
splitSubstitutionModel Int
n Double
alpha SubstitutionModel
sm

expandMixtureModel :: Int -> Double -> M.MixtureModel -> M.MixtureModel
expandMixtureModel :: Int -> Double -> MixtureModel -> MixtureModel
expandMixtureModel Int
n Double
alpha MixtureModel
mm = [Char] -> Vector MixtureModel -> MixtureModel
M.concatenate [Char]
name Vector MixtureModel
renamedMMs
  where
    name :: [Char]
name = MixtureModel -> [Char]
M.name MixtureModel
mm forall a. Semigroup a => a -> a -> a
<> Int -> Double -> [Char]
getName Int
n Double
alpha
    means :: Vector Double
means = Int -> Double -> Vector Double
getMeans Int
n Double
alpha
    scaledMMs :: Vector MixtureModel
scaledMMs = forall a b. (a -> b) -> Vector a -> Vector b
V.map (Double -> MixtureModel -> MixtureModel
`M.scale` MixtureModel
mm) Vector Double
means
    names :: Vector [Char]
names = forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (([Char]
"; gamma rate category " forall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> [Char]
show) [Int
1 :: Int ..]
    renamedMMs :: Vector MixtureModel
renamedMMs = forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith [Char] -> MixtureModel -> MixtureModel
M.appendNameComponents Vector [Char]
names Vector MixtureModel
scaledMMs

-- For a given number of rate categories 'n' and a shape parameter 'alpha' (the
-- rate or scale is set such that the mean is 1.0), return a list of rates that
-- represent the respective categories. Use the mean rate for each category.
getMeans :: Int -> Double -> V.Vector Double
getMeans :: Int -> Double -> Vector Double
getMeans Int
n Double
alpha
  | Int
n forall a. Ord a => a -> a -> Bool
<= Int
0 = forall a. HasCallStack => [Char] -> a
error [Char]
"getMeans: Number of rate categories is zero or negative."
  | Bool
otherwise = Vector Double
means forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. Applicative f => a -> f a
pure Double
lastMean
  where
    gamma :: GammaDistribution
gamma = Double -> Double -> GammaDistribution
gammaDistr Double
alpha (Double
1.0 forall a. Fractional a => a -> a -> a
/ Double
alpha)
    quantiles :: [Double]
quantiles =
      [forall d. ContDistr d => d -> Double -> Double
quantile GammaDistribution
gamma (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n) | Int
i <- [Int
0 .. Int
n]]
    -- Calculate the mean rate. Multiplication with the number of rate
    -- categories 'n' is necessary because in each n-quantile the
    -- probability mass is 1/n.
    meanFunc :: Double -> Double
meanFunc Double
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n forall a. Num a => a -> a -> a
* Double
x forall a. Num a => a -> a -> a
* forall d. ContDistr d => d -> Double -> Double
density GammaDistribution
gamma Double
x
    -- Only calculate the first (n-1) categories with normal integration.
    means :: Vector Double
means =
      forall a. [a] -> Vector a
V.fromList
        [ (Double -> Double) -> Double -> Double -> Double
integralAToB Double -> Double
meanFunc ([Double]
quantiles forall a. [a] -> Int -> a
!! Int
i) ([Double]
quantiles forall a. [a] -> Int -> a
!! (Int
i forall a. Num a => a -> a -> a
+ Int
1))
          | Int
i <- [Int
0 .. Int
n forall a. Num a => a -> a -> a
- Int
2]
        ]
    -- The last category has to be calculated with an improper integration.
    lastMean :: Double
lastMean = (Double -> Double) -> Double -> Double
integralAToInf Double -> Double
meanFunc ([Double]
quantiles forall a. [a] -> Int -> a
!! (Int
n forall a. Num a => a -> a -> a
- Int
1))

eps :: Double
eps :: Double
eps = Double
1e-8

-- The integration method to use
method :: (Double -> Double) -> Double -> Double -> [Result]
method :: (Double -> Double) -> Double -> Double -> [Result]
method = (Double -> Double) -> Double -> Double -> [Result]
parSimpson

-- Helper function for a normal integral from 'a' to 'b'.
integralAToB :: (Double -> Double) -> Double -> Double -> Double
integralAToB :: (Double -> Double) -> Double -> Double -> Double
integralAToB Double -> Double
f Double
a Double
b = Result -> Double
result forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> [Result] -> Result
absolute Double
eps forall a b. (a -> b) -> a -> b
$ (Double -> Double) -> Double -> Double -> [Result]
method Double -> Double
f Double
a Double
b

-- Helper function for an improper integral from 'a' to infinity.
integralAToInf :: (Double -> Double) -> Double -> Double
integralAToInf :: (Double -> Double) -> Double -> Double
integralAToInf Double -> Double
f Double
a =
  (Result -> Double
result forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> [Result] -> Result
absolute Double
eps forall a b. (a -> b) -> a -> b
$ forall r.
((Double -> Double) -> Double -> Double -> r)
-> (Double -> Double) -> r
nonNegative (Double -> Double) -> Double -> Double -> [Result]
method Double -> Double
f) forall a. Num a => a -> a -> a
- (Double -> Double) -> Double -> Double -> Double
integralAToB Double -> Double
f Double
0 Double
a