{- |
   Description :  Work with transition probability matrices on rooted trees
   Copyright   :  (c) Dominik Schrempf 2017
   License     :  GPLv3

   Maintainer  :  dominik.schrempf@gmail.com
   Stability   :  unstable
   Portability :  non-portable (not tested)

Calculate transition probability matrices, map rate matrices on trees, populate
a tree with states according to a stationary distribution, etc.

The implementation of the Markov process is more than basic and can be improved
in a lot of ways.

-}

module ELynx.Simulate.MarkovProcessAlongTree
  ( -- * Single rate matrix.
    simulate
  , simulateAndFlatten
    -- * Mixture models.
  , simulateMixtureModel
  , simulateAndFlattenMixtureModel
  , simulateAndFlattenMixtureModelPar
  )
where

import           Control.Monad
import           Control.Monad.Primitive
import           Control.Parallel.Strategies
import           Data.Tree
import           Numeric.LinearAlgebra
import           System.Random.MWC              ( Gen
                                                , GenIO
                                                )
import           System.Random.MWC.Distributions
                                                ( categorical )

import           ELynx.Data.Tree

import           ELynx.Tools

import           ELynx.Data.MarkovProcess.RateMatrix
import           ELynx.Simulate.MarkovProcess

toProbTree :: (Measurable a) => RateMatrix -> Tree a -> Tree ProbMatrix
toProbTree q = fmap (probMatrix q . getLen)

getRootStates
  :: PrimMonad m
  => Int
  -> StationaryDistribution
  -> Gen (PrimState m)
  -> m [State]
getRootStates n d g = replicateM n $ categorical d g

-- | Simulate a number of sites for a given substitution model. Only the states
-- at the leafs are retained. The states at internal nodes are removed. This has
-- a lower memory footprint.
--
-- XXX: Improve performance. Use vectors, not lists.
simulateAndFlatten
  :: (PrimMonad m, Measurable a)
  => Int
  -> StationaryDistribution
  -> ExchangeabilityMatrix
  -> Tree a
  -> Gen (PrimState m)
  -> m [[State]]
simulateAndFlatten n d e t g = do
  let q  = fromExchangeabilityMatrix e d
      pt = toProbTree q t
  is <- getRootStates n d g
  simulateAndFlatten' is pt g

-- This is the heart of the simulation. Take a tree and a list of root states.
-- Recursively jump down the branches to the leafs. Forget states at internal.
simulateAndFlatten'
  :: (PrimMonad m)
  => [State]
  -> Tree ProbMatrix
  -> Gen (PrimState m)
  -> m [[State]]
simulateAndFlatten' is (Node p f) g = do
  is' <- mapM (\i -> jump i p g) is
  if null f
    then return [is']
    else concat <$> sequence [ simulateAndFlatten' is' t g | t <- f ]

-- | Simulate a number of sites for a given substitution model. Keep states at
-- internal nodes. The result is a tree with the list of simulated states as
-- node labels.
simulate
  :: (PrimMonad m, Measurable a)
  => Int
  -> StationaryDistribution
  -> ExchangeabilityMatrix
  -> Tree a
  -> Gen (PrimState m)
  -> m (Tree [State])
simulate n d e t g = do
  let q  = fromExchangeabilityMatrix e d
      pt = toProbTree q t
  is <- getRootStates n d g
  simulate' is pt g

-- This is the heart of the simulation. Take a tree and a list of root states.
-- Recursively jump down the branches to the leafs.
simulate'
  :: (PrimMonad m)
  => [State]
  -> Tree ProbMatrix
  -> Gen (PrimState m)
  -> m (Tree [State])
simulate' is (Node p f) g = do
  is' <- mapM (\i -> jump i p g) is
  f'  <- sequence [ simulate' is' t g | t <- f ]
  return $ Node is' f'

toProbTreeMixtureModel
  :: (Measurable a) => [RateMatrix] -> Tree a -> Tree [ProbMatrix]
toProbTreeMixtureModel qs =
  fmap (\a -> [ probMatrix q . getLen $ a | q <- qs ] `using` parList rpar)

getComponentsAndRootStates
  :: PrimMonad m
  => Int
  -> Vector R
  -> [StationaryDistribution]
  -> Gen (PrimState m)
  -> m ([Int], [State])
getComponentsAndRootStates n ws ds g = do
  cs <- replicateM n $ categorical ws g
  is <- sequence [ categorical (ds !! c) g | c <- cs ]
  return (cs, is)

-- | Simulate a number of sites for a given set of substitution models with
-- corresponding weights. Forget states at internal nodes. See also
-- 'simulateAndFlatten'.
simulateAndFlattenMixtureModel
  :: (PrimMonad m, Measurable a)
  => Int
  -> Vector R
  -> [StationaryDistribution]
  -> [ExchangeabilityMatrix]
  -> Tree a
  -> Gen (PrimState m)
  -> m [[State]]
simulateAndFlattenMixtureModel n ws ds es t g = do
  let qs = zipWith fromExchangeabilityMatrix es ds
      pt = toProbTreeMixtureModel qs t
  (cs, is) <- getComponentsAndRootStates n ws ds g
  simulateAndFlattenMixtureModel' is cs pt g

simulateAndFlattenMixtureModel'
  :: (PrimMonad m)
  => [State]
  -> [Int]
  -> Tree [ProbMatrix]
  -> Gen (PrimState m)
  -> m [[State]]
simulateAndFlattenMixtureModel' is cs (Node ps f) g = do
  is' <- sequence [ jump i (ps !! c) g | (i, c) <- zip is cs ]
  if null f
    then return [is']
    else concat
      <$> sequence [ simulateAndFlattenMixtureModel' is' cs t g | t <- f ]

-- | See 'simulateAndFlattenMixtureModel', parallel version;
-- needs to be run in IO monad.
simulateAndFlattenMixtureModelPar
  :: Measurable a
  => Int
  -> Vector R
  -> [StationaryDistribution]
  -> [ExchangeabilityMatrix]
  -> Tree a
  -> GenIO
  -> IO [[[State]]]
simulateAndFlattenMixtureModelPar n ws ds es t g = do
  let qs = zipWith fromExchangeabilityMatrix es ds
      pt = toProbTreeMixtureModel qs t
  parComp
    n
    (\n' g' -> do
      (cs, is) <- getComponentsAndRootStates n' ws ds g'
      simulateAndFlattenMixtureModel' is cs pt g'
    )
    g

-- | Simulate a number of sites for a given set of substitution models with
-- corresponding weights. Keep states at internal nodes. See also
-- 'simulate'.
simulateMixtureModel
  :: (PrimMonad m, Measurable a)
  => Int
  -> Vector R
  -> [StationaryDistribution]
  -> [ExchangeabilityMatrix]
  -> Tree a
  -> Gen (PrimState m)
  -> m (Tree [State])
simulateMixtureModel n ws ds es t g = do
  let qs = zipWith fromExchangeabilityMatrix es ds
      pt = toProbTreeMixtureModel qs t
  (cs, is) <- getComponentsAndRootStates n ws ds g
  simulateMixtureModel' is cs pt g

-- See 'simulateAlongProbTree', only we have a number of mixture components. The
-- starting states and the components for each site have to be provided.
simulateMixtureModel'
  :: (PrimMonad m)
  => [State]
  -> [Int]
  -> Tree [ProbMatrix]
  -> Gen (PrimState m)
  -> m (Tree [State])
simulateMixtureModel' is cs (Node ps f) g = do
  is' <- sequence [ jump i (ps !! c) g | (i, c) <- zip is cs ]
  f'  <- sequence [ simulateMixtureModel' is' cs t g | t <- f ]
  return $ Node is' f'