{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}

-- |
--   Description :  Simulate reconstructed trees
--   Copyright   :  2021 Dominik Schrempf
--   License     :  GPL-3.0-or-later
--
--   Maintainer  :  dominik.schrempf@gmail.com
--   Stability   :  unstable
--   Portability :  portable
--
-- Creation date: Tue Feb 27 17:27:16 2018.
--
-- See Gernhard, T. (2008). The conditioned reconstructed process. Journal of
-- Theoretical Biology, 253(4), 769–778. http://doi.org/10.1016/j.jtbi.2008.04.005.
--
-- Mon Feb 4 14:26:11 CET 2019: Adding sampling probability rho. See Article
-- (Stadler2009) Stadler, T. On incomplete sampling under birth–death models and
-- connections to the sampling-based coalescent Journal of Theoretical Biology,
-- Elsevier BV, 2009, 261, 58-66
module TLynx.Simulate.Simulate
  ( simulate,
  )
where

import Control.Concurrent (getNumCapabilities)
import Control.Concurrent.Async
  ( mapConcurrently,
  )
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Reader hiding (local)
import Control.Parallel.Strategies
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Foldable
import Data.Maybe
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import ELynx.Tree
import qualified ELynx.Tree.Simulate.Coalescent as CS
import qualified ELynx.Tree.Simulate.PointProcess as PP
import System.Random.Stateful
import TLynx.Grabble
import TLynx.Simulate.Options

-- For a given number of capabilities and number of calculations, get chunk
-- sizes. The chunk sizes will be as evenly distributed as possible and sum up
-- to the number of calculations.
getChunks :: Int -> Int -> [Int]
getChunks :: Int -> Int -> [Int]
getChunks Int
c Int
n = [Int]
ns
  where
    n' :: Int
n' = Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
c
    r :: Int
r = Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
c
    ns :: [Int]
ns = Int -> Int -> [Int]
forall a. Int -> a -> [a]
replicate Int
r (Int
n' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ Int -> Int -> [Int]
forall a. Int -> a -> [a]
replicate (Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
r) Int
n'

-- | Simulate phylogenetic trees using birth and death process.
simulate :: ELynx SimulateArguments ()
simulate :: ELynx SimulateArguments ()
simulate = do
  l :: SimulateArguments
l@(SimulateArguments Int
nTrees Int
nLeaves Process
pr Maybe Double
subS Bool
sumS SeedOpt
sOpt) <- Environment SimulateArguments -> SimulateArguments
forall a. Environment a -> a
localArguments (Environment SimulateArguments -> SimulateArguments)
-> ReaderT
     (Environment SimulateArguments) IO (Environment SimulateArguments)
-> ReaderT (Environment SimulateArguments) IO SimulateArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment SimulateArguments) IO (Environment SimulateArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  let s :: Int
s = case SeedOpt -> Maybe Int
fromSeedOpt SeedOpt
sOpt of
        Maybe Int
Nothing -> [Char] -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
"simulate: No seed."
        Just Int
x -> Int
x
  Int
c <- IO Int -> ReaderT (Environment SimulateArguments) IO Int
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO Int
getNumCapabilities
  [Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection [Char]
"Arguments"
  [Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoS ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ SimulateArguments -> [Char]
reportSimulateArguments SimulateArguments
l
  [Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection [Char]
"Simulation"
  [Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoS ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Number of used cores: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
c
  IOGenM StdGen
gen <- StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen)
forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM (StdGen
 -> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen))
-> StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen)
forall a b. (a -> b) -> a -> b
$ Int -> StdGen
mkStdGen Int
s
  [StdGen]
rs <- Int
-> ReaderT (Environment SimulateArguments) IO StdGen
-> ReaderT (Environment SimulateArguments) IO [StdGen]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
c (ReaderT (Environment SimulateArguments) IO StdGen
 -> ReaderT (Environment SimulateArguments) IO [StdGen])
-> ReaderT (Environment SimulateArguments) IO StdGen
-> ReaderT (Environment SimulateArguments) IO [StdGen]
forall a b. (a -> b) -> a -> b
$ IOGenM StdGen -> ReaderT (Environment SimulateArguments) IO StdGen
forall g r (m :: * -> *). RandomGenM g r m => g -> m r
splitGenM IOGenM StdGen
gen
  [IOGenM StdGen]
gs <- (StdGen
 -> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen))
-> [StdGen]
-> ReaderT (Environment SimulateArguments) IO [IOGenM StdGen]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen)
forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM [StdGen]
rs
  let chunks :: [Int]
chunks = Int -> Int -> [Int]
getChunks Int
c Int
nTrees
  Forest Length Int
trs <- case Process
pr of
    (BirthDeath Double
lambda Double
mu Maybe Double
mRho TimeSpec
timeSpec) -> do
      let rho :: Double
rho = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
1.0 Maybe Double
mRho
      case Maybe Double
subS of
        Maybe Double
Nothing -> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Int)
 -> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM StdGen]
-> IO (Forest Length Int)
forall g.
RandomGen g =>
Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
lambda Double
mu Double
rho TimeSpec
timeSpec [Int]
chunks [IOGenM StdGen]
gs
        Just Double
p ->
          Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM StdGen]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall g.
RandomGen g =>
Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently
            Int
nLeaves
            Double
lambda
            Double
mu
            Double
rho
            Double
p
            TimeSpec
timeSpec
            [Int]
chunks
            [IOGenM StdGen]
gs
    Process
Coalescent -> case Maybe Double
subS of
      Maybe Double
Nothing -> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Int)
 -> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> [Int] -> [IOGenM StdGen] -> IO (Forest Length Int)
forall g.
RandomGen g =>
Int -> [Int] -> [IOGenM g] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nLeaves [Int]
chunks [IOGenM StdGen]
gs
      Just Double
p -> Int
-> Double
-> [Int]
-> [IOGenM StdGen]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall g.
RandomGen g =>
Int
-> Double
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
p [Int]
chunks [IOGenM StdGen]
gs
  let ls :: [ByteString]
ls =
        if Bool
sumS
          then Strategy ByteString
-> (Tree Length Int -> ByteString)
-> Forest Length Int
-> [ByteString]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy ByteString
forall a. Strategy a
rpar (NChildSumStat -> ByteString
formatNChildSumStat (NChildSumStat -> ByteString)
-> (Tree Length Int -> NChildSumStat)
-> Tree Length Int
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree Length Int -> NChildSumStat
forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat) Forest Length Int
trs
          else Strategy ByteString
-> (Tree Phylo Int -> ByteString)
-> [Tree Phylo Int]
-> [ByteString]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy ByteString
forall a. Strategy a
rpar Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick ([Tree Phylo Int] -> [ByteString])
-> [Tree Phylo Int] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Phylo Int)
-> Forest Length Int -> [Tree Phylo Int]
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Forest Length Int
trs
  let res :: ByteString
res = [ByteString] -> ByteString
BL.unlines [ByteString]
ls
  [Char] -> ByteString -> [Char] -> ELynx SimulateArguments ()
forall a.
Reproducible a =>
[Char] -> ByteString -> [Char] -> ELynx a ()
out [Char]
"simulated trees" ByteString
res [Char]
".tree"

bdSimulateNTreesConcurrently ::
  RandomGen g =>
  Int ->
  Double ->
  Double ->
  Double ->
  PP.TimeSpec ->
  [Int] ->
  [IOGenM g] ->
  IO (Forest Length Int)
bdSimulateNTreesConcurrently :: Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
l Double
m Double
r TimeSpec
timeSpec [Int]
chunks [IOGenM g]
gs = do
  let l' :: Double
l' = Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
r
      m' :: Double
m' = Double
m Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
r)
  [Forest Length Int]
trss <-
    ((Int, IOGenM g) -> IO (Forest Length Int))
-> [(Int, IOGenM g)] -> IO [Forest Length Int]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
      (\(Int
n, IOGenM g
g) -> Int
-> Int
-> TimeSpec
-> Double
-> Double
-> IOGenM g
-> IO (Forest Length Int)
forall g (m :: * -> *).
StatefulGen g m =>
Int
-> Int
-> TimeSpec
-> Double
-> Double
-> g
-> m (Forest Length Int)
PP.simulateNReconstructedTrees Int
n Int
nLeaves TimeSpec
timeSpec Double
l' Double
m' IOGenM g
g)
      ([Int] -> [IOGenM g] -> [(Int, IOGenM g)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
  Forest Length Int -> IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int -> IO (Forest Length Int))
-> Forest Length Int -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ [Forest Length Int] -> Forest Length Int
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss

coalSimulateNTreesConcurrently ::
  RandomGen g =>
  Int ->
  [Int] ->
  [IOGenM g] ->
  IO (Forest Length Int)
coalSimulateNTreesConcurrently :: Int -> [Int] -> [IOGenM g] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nL [Int]
chunks [IOGenM g]
gs = do
  [Forest Length Int]
trss <-
    ((Int, IOGenM g) -> IO (Forest Length Int))
-> [(Int, IOGenM g)] -> IO [Forest Length Int]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
      (\(Int
n, IOGenM g
g) -> Int -> IO (Tree Length Int) -> IO (Forest Length Int)
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
n (IO (Tree Length Int) -> IO (Forest Length Int))
-> IO (Tree Length Int) -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> IOGenM g -> IO (Tree Length Int)
forall g (m :: * -> *).
StatefulGen g m =>
Int -> g -> m (Tree Length Int)
CS.simulate Int
nL IOGenM g
g)
      ([Int] -> [IOGenM g] -> [(Int, IOGenM g)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
  Forest Length Int -> IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int -> IO (Forest Length Int))
-> Forest Length Int -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ [Forest Length Int] -> Forest Length Int
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss

bdSimulateAndSubSampleNTreesConcurrently ::
  RandomGen g =>
  Int ->
  Double ->
  Double ->
  Double ->
  Double ->
  PP.TimeSpec ->
  [Int] ->
  [IOGenM g] ->
  ELynx SimulateArguments (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently :: Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
l Double
m Double
r Double
p TimeSpec
timeSpec [Int]
chunks [IOGenM g]
gs = do
  let nLeavesBigTree :: Int
nLeavesBigTree = (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nLeaves Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
      l' :: Double
l' = Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
r
      m' :: Double
m' = Double
m Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
r)
  [Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    [Char]
"Simulate one big tree with "
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
nLeavesBigTree
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" leaves."
  Tree Length Int
tr <- IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Length Int)
 -> ReaderT (Environment SimulateArguments) IO (Tree Length Int))
-> IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall a b. (a -> b) -> a -> b
$ Int
-> TimeSpec -> Double -> Double -> IOGenM g -> IO (Tree Length Int)
forall g (m :: * -> *).
StatefulGen g m =>
Int -> TimeSpec -> Double -> Double -> g -> m (Tree Length Int)
PP.simulateReconstructedTree Int
nLeavesBigTree TimeSpec
timeSpec Double
l' Double
m' ([IOGenM g] -> IOGenM g
forall a. [a] -> a
head [IOGenM g]
gs)
  -- Log the base tree.
  ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick (Tree Phylo Int -> ByteString) -> Tree Phylo Int -> ByteString
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Tree Length Int
tr
  [Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    [Char]
"Sub sample "
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" trees with "
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
nLeaves
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" leaves."
  let lvs :: Seq Int
lvs = [Int] -> Seq Int
forall a. [a] -> Seq a
Seq.fromList ([Int] -> Seq Int) -> [Int] -> Seq Int
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> [Int]
forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
  [[Maybe (Tree Length Int)]]
trss <-
    IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Maybe (Tree Length Int)]]
 -> ReaderT
      (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]])
-> IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall a b. (a -> b) -> a -> b
$
      ((Int, IOGenM g) -> IO [Maybe (Tree Length Int)])
-> [(Int, IOGenM g)] -> IO [[Maybe (Tree Length Int)]]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
        (\(Int
nSamples, IOGenM g
g) -> Int
-> Seq Int
-> Int
-> Tree Length Int
-> IOGenM g
-> IO [Maybe (Tree Length Int)]
forall a g (m :: * -> *) e.
(Ord a, StatefulGen g m) =>
Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nLeaves Tree Length Int
tr IOGenM g
g)
        ([Int] -> [IOGenM g] -> [(Int, IOGenM g)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
  let trs :: Forest Length Int
trs = [Maybe (Tree Length Int)] -> Forest Length Int
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Tree Length Int)] -> Forest Length Int)
-> [Maybe (Tree Length Int)] -> Forest Length Int
forall a b. (a -> b) -> a -> b
$ [[Maybe (Tree Length Int)]] -> [Maybe (Tree Length Int)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
  Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int
 -> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Length Int)
-> Forest Length Int -> Forest Length Int
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Length Int
forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs

coalSimulateAndSubSampleNTreesConcurrently ::
  RandomGen g =>
  Int ->
  Double ->
  [Int] ->
  [IOGenM g] ->
  ELynx SimulateArguments (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently :: Int
-> Double
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nL Double
p [Int]
chunks [IOGenM g]
gs = do
  let nLeavesBigTree :: Int
nLeavesBigTree = (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nL Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
  [Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    [Char]
"Simulate one big tree with "
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
nLeavesBigTree
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" leaves."
  Tree Length Int
tr <- IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Length Int)
 -> ReaderT (Environment SimulateArguments) IO (Tree Length Int))
-> IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> IOGenM g -> IO (Tree Length Int)
forall g (m :: * -> *).
StatefulGen g m =>
Int -> g -> m (Tree Length Int)
CS.simulate Int
nLeavesBigTree ([IOGenM g] -> IOGenM g
forall a. [a] -> a
head [IOGenM g]
gs)
  -- Log the base tree.
  ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick (Tree Phylo Int -> ByteString) -> Tree Phylo Int -> ByteString
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Tree Length Int
tr
  [Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    [Char]
"Sub sample "
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" trees with "
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
nL
      [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" leaves."
  let lvs :: Seq Int
lvs = [Int] -> Seq Int
forall a. [a] -> Seq a
Seq.fromList ([Int] -> Seq Int) -> [Int] -> Seq Int
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> [Int]
forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
  [[Maybe (Tree Length Int)]]
trss <-
    IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Maybe (Tree Length Int)]]
 -> ReaderT
      (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]])
-> IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall a b. (a -> b) -> a -> b
$
      ((Int, IOGenM g) -> IO [Maybe (Tree Length Int)])
-> [(Int, IOGenM g)] -> IO [[Maybe (Tree Length Int)]]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
        (\(Int
nSamples, IOGenM g
g) -> Int
-> Seq Int
-> Int
-> Tree Length Int
-> IOGenM g
-> IO [Maybe (Tree Length Int)]
forall a g (m :: * -> *) e.
(Ord a, StatefulGen g m) =>
Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nL Tree Length Int
tr IOGenM g
g)
        ([Int] -> [IOGenM g] -> [(Int, IOGenM g)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
  let trs :: Forest Length Int
trs = [Maybe (Tree Length Int)] -> Forest Length Int
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Tree Length Int)] -> Forest Length Int)
-> [Maybe (Tree Length Int)] -> Forest Length Int
forall a b. (a -> b) -> a -> b
$ [[Maybe (Tree Length Int)]] -> [Maybe (Tree Length Int)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
  Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int
 -> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Length Int)
-> Forest Length Int -> Forest Length Int
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Length Int
forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs

-- Extract a random subtree with @N@ leaves of a tree with @M@ leaves, where
-- @M>N@ (otherwise error). The complete list of leaves (names are assumed to be
-- unique) has to be provided as a 'Seq.Seq', and a 'Seq.Set', so that fast
-- sub-sampling as well as lookup are fast and so that these data structures do
-- not have to be recomputed when many sub-samples are requested.
nSubSamples ::
  (Ord a, StatefulGen g m) =>
  Int ->
  Seq.Seq a ->
  Int ->
  Tree e a ->
  g ->
  m [Maybe (Tree e a)]
nSubSamples :: Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
m Seq a
lvs Int
n Tree e a
tree g
g
  | Seq a -> Int
forall a. Seq a -> Int
Seq.length Seq a
lvs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n =
      [Char] -> m [Maybe (Tree e a)]
forall a. HasCallStack => [Char] -> a
error
        [Char]
"Given list of leaves is shorter than requested number of leaves."
  | Bool
otherwise = do
      [[a]]
lss <- [a] -> Int -> Int -> g -> m [[a]]
forall g (m :: * -> *) a.
StatefulGen g m =>
[a] -> Int -> Int -> g -> m [[a]]
grabble (Seq a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Seq a
lvs) Int
m Int
n g
g
      let lsSets :: [Set a]
lsSets = ([a] -> Set a) -> [[a]] -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map [a] -> Set a
forall a. Ord a => [a] -> Set a
Set.fromList [[a]]
lss
      [Maybe (Tree e a)] -> m [Maybe (Tree e a)]
forall (m :: * -> *) a. Monad m => a -> m a
return [(a -> Bool) -> Tree e a -> Maybe (Tree e a)
forall a e. (a -> Bool) -> Tree e a -> Maybe (Tree e a)
dropLeavesWith (a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set a
ls) Tree e a
tree | Set a
ls <- [Set a]
lsSets]

-- Pair of branch length with number of extant children.
type BrLnNChildren = (Length, Int)

-- Possible summary statistic of phylogenetic trees. A list of tuples
-- (Length, NumberOfExtantChildrenBelowThisBranch).
type NChildSumStat = [BrLnNChildren]

-- Format the summary statistics in the following form:
--
-- @
--   nLeaves1 branchLength1
--   nLeaves2 branchLength2
--   ....
-- @
formatNChildSumStat :: NChildSumStat -> BL.ByteString
formatNChildSumStat :: NChildSumStat -> ByteString
formatNChildSumStat NChildSumStat
s =
  Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString)
-> ([Builder] -> Builder) -> [Builder] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> ByteString) -> [Builder] -> ByteString
forall a b. (a -> b) -> a -> b
$ (BrLnNChildren -> Builder) -> NChildSumStat -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map BrLnNChildren -> Builder
formatNChildSumStatLine NChildSumStat
s

formatNChildSumStatLine :: BrLnNChildren -> BB.Builder
formatNChildSumStatLine :: BrLnNChildren -> Builder
formatNChildSumStatLine (Length
l, Int
n) =
  Int -> Builder
BB.intDec Int
n Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
' ' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Double -> Builder
BB.doubleDec (Length -> Double
fromLength Length
l) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n'

-- Compute NChilSumStat for a phylogenetic tree.
toNChildSumStat :: HasLength e => Tree e a -> NChildSumStat
toNChildSumStat :: Tree e a -> NChildSumStat
toNChildSumStat (Node e
br a
_ []) = [(e -> Length
forall e. HasLength e => e -> Length
getLength e
br, Int
1)]
toNChildSumStat (Node e
br a
_ [Tree e a]
ts) = (e -> Length
forall e. HasLength e => e -> Length
getLength e
br, Int
sumCh) BrLnNChildren -> NChildSumStat -> NChildSumStat
forall a. a -> [a] -> [a]
: [NChildSumStat] -> NChildSumStat
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [NChildSumStat]
nChSS
  where
    nChSS :: [NChildSumStat]
nChSS = (Tree e a -> NChildSumStat) -> [Tree e a] -> [NChildSumStat]
forall a b. (a -> b) -> [a] -> [b]
map Tree e a -> NChildSumStat
forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat [Tree e a]
ts
    sumCh :: Int
sumCh = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (NChildSumStat -> Int) -> [NChildSumStat] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (BrLnNChildren -> Int
forall a b. (a, b) -> b
snd (BrLnNChildren -> Int)
-> (NChildSumStat -> BrLnNChildren) -> NChildSumStat -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NChildSumStat -> BrLnNChildren
forall a. [a] -> a
head) [NChildSumStat]
nChSS