{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}

-- |
--   Description :  Simulate reconstructed trees
--   Copyright   :  (c) Dominik Schrempf 2018
--   License     :  GPL-3.0-or-later
--
--   Maintainer  :  dominik.schrempf@gmail.com
--   Stability   :  unstable
--   Portability :  portable
--
-- Creation date: Tue Feb 27 17:27:16 2018.
--
-- See Gernhard, T. (2008). The conditioned reconstructed process. Journal of
-- Theoretical Biology, 253(4), 769–778. http://doi.org/10.1016/j.jtbi.2008.04.005.
--
-- Mon Feb 4 14:26:11 CET 2019: Adding sampling probability rho. See Article
-- (Stadler2009) Stadler, T. On incomplete sampling under birth–death models and
-- connections to the sampling-based coalescent Journal of Theoretical Biology,
-- Elsevier BV, 2009, 261, 58-66
module TLynx.Simulate.Simulate
  ( simulate,
  )
where

import Control.Concurrent (getNumCapabilities)
import Control.Concurrent.Async.Lifted.Safe
  ( mapConcurrently,
  )
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Logger
import Control.Monad.Trans.Reader hiding (local)
import Control.Parallel.Strategies
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Foldable
import Data.Maybe
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Encoding as LT
import ELynx.Tools
import ELynx.Tree
import qualified ELynx.Tree.Simulate.Coalescent as CS
import qualified ELynx.Tree.Simulate.PointProcess as PP
import System.Random.MWC
import TLynx.Simulate.Options

-- | Simulate phylogenetic trees using birth and death process.
simulate :: ELynx SimulateArguments ()
simulate :: ELynx SimulateArguments ()
simulate = do
  l :: SimulateArguments
l@(SimulateArguments Int
nTrees Int
nLeaves Process
pr Maybe Double
subS Bool
sumS (Fixed Vector Word32
s)) <- Arguments SimulateArguments -> SimulateArguments
forall a. Arguments a -> a
local (Arguments SimulateArguments -> SimulateArguments)
-> ReaderT
     (Arguments SimulateArguments)
     (LoggingT IO)
     (Arguments SimulateArguments)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) SimulateArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Arguments SimulateArguments)
  (LoggingT IO)
  (Arguments SimulateArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  Int
c <- IO Int -> ReaderT (Arguments SimulateArguments) (LoggingT IO) Int
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO Int
getNumCapabilities
  Text -> ELynx SimulateArguments ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logNewSection Text
"Arguments"
  $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx SimulateArguments ()
(Text -> ELynx SimulateArguments ())
-> (Text -> Text) -> Text -> ELynx SimulateArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx SimulateArguments ())
-> Text -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ SimulateArguments -> String
reportSimulateArguments SimulateArguments
l
  Text -> ELynx SimulateArguments ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logNewSection Text
"Simulation"
  $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx SimulateArguments ()
(Text -> ELynx SimulateArguments ())
-> (Text -> Text) -> Text -> ELynx SimulateArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx SimulateArguments ())
-> Text -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ String
"Number of used cores: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
c
  [Gen RealWorld]
gs <- IO [Gen RealWorld]
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) [Gen RealWorld]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [Gen RealWorld]
 -> ReaderT
      (Arguments SimulateArguments) (LoggingT IO) [Gen RealWorld])
-> IO [Gen RealWorld]
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) [Gen RealWorld]
forall a b. (a -> b) -> a -> b
$ Vector Word32 -> IO (Gen (PrimState IO))
forall (m :: * -> *) (v :: * -> *).
(PrimMonad m, Vector v Word32) =>
v Word32 -> m (Gen (PrimState m))
initialize Vector Word32
s IO (Gen RealWorld)
-> (Gen RealWorld -> IO [Gen RealWorld]) -> IO [Gen RealWorld]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Gen RealWorld
gen -> Int -> Gen (PrimState IO) -> IO [Gen (PrimState IO)]
forall (m :: * -> *).
PrimMonad m =>
Int -> Gen (PrimState m) -> m [Gen (PrimState m)]
splitGen Int
c Gen RealWorld
Gen (PrimState IO)
gen
  let chunks :: [Int]
chunks = Int -> Int -> [Int]
getChunks Int
c Int
nTrees
  Forest Length Int
trs <- case Process
pr of
    (BirthDeath Double
lambda Double
mu Maybe Double
mRho TimeSpec
timeSpec) -> do
      let rho :: Double
rho = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
1.0 Maybe Double
mRho
      case Maybe Double
subS of
        Maybe Double
Nothing -> IO (Forest Length Int)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Int)
 -> ReaderT
      (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int))
-> IO (Forest Length Int)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [Gen (PrimState IO)]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
lambda Double
mu Double
rho TimeSpec
timeSpec [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs
        Just Double
p ->
          Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [Gen (PrimState IO)]
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently
            Int
nLeaves
            Double
lambda
            Double
mu
            Double
rho
            Double
p
            TimeSpec
timeSpec
            [Int]
chunks
            [Gen RealWorld]
[Gen (PrimState IO)]
gs
    Process
Coalescent -> case Maybe Double
subS of
      Maybe Double
Nothing -> IO (Forest Length Int)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Int)
 -> ReaderT
      (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int))
-> IO (Forest Length Int)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> [Int] -> [Gen (PrimState IO)] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nLeaves [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs
      Just Double
p -> Int
-> Double
-> [Int]
-> [Gen (PrimState IO)]
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
p [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs
  let ls :: [ByteString]
ls =
        if Bool
sumS
          then Strategy ByteString
-> (Tree Length Int -> ByteString)
-> Forest Length Int
-> [ByteString]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy ByteString
forall a. Strategy a
rpar (NChildSumStat -> ByteString
formatNChildSumStat (NChildSumStat -> ByteString)
-> (Tree Length Int -> NChildSumStat)
-> Tree Length Int
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree Length Int -> NChildSumStat
forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat) Forest Length Int
trs
          else Strategy ByteString
-> (Tree Phylo Int -> ByteString)
-> [Tree Phylo Int]
-> [ByteString]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy ByteString
forall a. Strategy a
rpar Tree Phylo Int -> ByteString
forall a. HasName a => Tree Phylo a -> ByteString
toNewick ([Tree Phylo Int] -> [ByteString])
-> [Tree Phylo Int] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Phylo Int)
-> Forest Length Int -> [Tree Phylo Int]
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Phylo Int
forall e a. HasLength e => Tree e a -> Tree Phylo a
measurableToPhyloTree Forest Length Int
trs
  let res :: ByteString
res = [ByteString] -> ByteString
BL.unlines [ByteString]
ls
  String -> ByteString -> String -> ELynx SimulateArguments ()
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"simulated trees" ByteString
res String
".tree"

bdSimulateNTreesConcurrently ::
  Int ->
  Double ->
  Double ->
  Double ->
  PP.TimeSpec ->
  [Int] ->
  [GenIO] ->
  IO (Forest Length Int)
bdSimulateNTreesConcurrently :: Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [Gen (PrimState IO)]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
l Double
m Double
r TimeSpec
timeSpec [Int]
chunks [Gen (PrimState IO)]
gs = do
  let l' :: Double
l' = Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
r
      m' :: Double
m' = Double
m Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
r)
  [Forest Length Int]
trss <-
    ((Int, Gen RealWorld) -> IO (Forest Length Int))
-> [(Int, Gen RealWorld)] -> IO [Forest Length Int]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, MonadBaseControl IO m, Forall (Pure m)) =>
(a -> m b) -> t a -> m (t b)
mapConcurrently
      (\(Int
n, Gen RealWorld
g) -> Int
-> Int
-> TimeSpec
-> Double
-> Double
-> Gen (PrimState IO)
-> IO (Forest Length Int)
forall (m :: * -> *).
PrimMonad m =>
Int
-> Int
-> TimeSpec
-> Double
-> Double
-> Gen (PrimState m)
-> m (Forest Length Int)
PP.simulateNReconstructedTrees Int
n Int
nLeaves TimeSpec
timeSpec Double
l' Double
m' Gen RealWorld
Gen (PrimState IO)
g)
      ([Int] -> [Gen RealWorld] -> [(Int, Gen RealWorld)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  Forest Length Int -> IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int -> IO (Forest Length Int))
-> Forest Length Int -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ [Forest Length Int] -> Forest Length Int
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss

coalSimulateNTreesConcurrently ::
  Int ->
  [Int] ->
  [GenIO] ->
  IO (Forest Length Int)
coalSimulateNTreesConcurrently :: Int -> [Int] -> [Gen (PrimState IO)] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nL [Int]
chunks [Gen (PrimState IO)]
gs = do
  [Forest Length Int]
trss <-
    ((Int, Gen RealWorld) -> IO (Forest Length Int))
-> [(Int, Gen RealWorld)] -> IO [Forest Length Int]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, MonadBaseControl IO m, Forall (Pure m)) =>
(a -> m b) -> t a -> m (t b)
mapConcurrently
      (\(Int
n, Gen RealWorld
g) -> Int -> IO (Tree Length Int) -> IO (Forest Length Int)
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
n (IO (Tree Length Int) -> IO (Forest Length Int))
-> IO (Tree Length Int) -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> Gen (PrimState IO) -> IO (Tree Length Int)
forall (m :: * -> *).
PrimMonad m =>
Int -> Gen (PrimState m) -> m (Tree Length Int)
CS.simulate Int
nL Gen RealWorld
Gen (PrimState IO)
g)
      ([Int] -> [Gen RealWorld] -> [(Int, Gen RealWorld)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  Forest Length Int -> IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int -> IO (Forest Length Int))
-> Forest Length Int -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ [Forest Length Int] -> Forest Length Int
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss

bdSimulateAndSubSampleNTreesConcurrently ::
  Int ->
  Double ->
  Double ->
  Double ->
  Double ->
  PP.TimeSpec ->
  [Int] ->
  [GenIO] ->
  ELynx SimulateArguments (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently :: Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [Gen (PrimState IO)]
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
l Double
m Double
r Double
p TimeSpec
timeSpec [Int]
chunks [Gen (PrimState IO)]
gs = do
  let nLeavesBigTree :: Int
nLeavesBigTree = (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nLeaves Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
      l' :: Double
l' = Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
r
      m' :: Double
m' = Double
m Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
r)
  Text -> ELynx SimulateArguments ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logNewSection (Text -> ELynx SimulateArguments ())
-> Text -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
      String
"Simulate one big tree with "
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nLeavesBigTree
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
  Tree Length Int
tr <- IO (Tree Length Int)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Tree Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Length Int)
 -> ReaderT
      (Arguments SimulateArguments) (LoggingT IO) (Tree Length Int))
-> IO (Tree Length Int)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Tree Length Int)
forall a b. (a -> b) -> a -> b
$ Int
-> TimeSpec
-> Double
-> Double
-> Gen (PrimState IO)
-> IO (Tree Length Int)
forall (m :: * -> *).
PrimMonad m =>
Int
-> TimeSpec
-> Double
-> Double
-> Gen (PrimState m)
-> m (Tree Length Int)
PP.simulateReconstructedTree Int
nLeavesBigTree TimeSpec
timeSpec Double
l' Double
m' ([Gen RealWorld] -> Gen RealWorld
forall a. [a] -> a
head [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  -- Log the base tree.
  $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx SimulateArguments ()
(Text -> ELynx SimulateArguments ())
-> (Text -> Text) -> Text -> ELynx SimulateArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx SimulateArguments ())
-> Text -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.toStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ ByteString -> Text
LT.decodeUtf8 (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ Tree Phylo Int -> ByteString
forall a. HasName a => Tree Phylo a -> ByteString
toNewick (Tree Phylo Int -> ByteString) -> Tree Phylo Int -> ByteString
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> Tree Phylo Int
forall e a. HasLength e => Tree e a -> Tree Phylo a
measurableToPhyloTree Tree Length Int
tr
  Text -> ELynx SimulateArguments ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logNewSection (Text -> ELynx SimulateArguments ())
-> Text -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
      String
"Sub sample "
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" trees with "
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nLeaves
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
  let lvs :: Seq Int
lvs = [Int] -> Seq Int
forall a. [a] -> Seq a
Seq.fromList ([Int] -> Seq Int) -> [Int] -> Seq Int
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> [Int]
forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
  [[Maybe (Tree Length Int)]]
trss <-
    IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Arguments SimulateArguments)
     (LoggingT IO)
     [[Maybe (Tree Length Int)]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Maybe (Tree Length Int)]]
 -> ReaderT
      (Arguments SimulateArguments)
      (LoggingT IO)
      [[Maybe (Tree Length Int)]])
-> IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Arguments SimulateArguments)
     (LoggingT IO)
     [[Maybe (Tree Length Int)]]
forall a b. (a -> b) -> a -> b
$
      ((Int, Gen RealWorld) -> IO [Maybe (Tree Length Int)])
-> [(Int, Gen RealWorld)] -> IO [[Maybe (Tree Length Int)]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, MonadBaseControl IO m, Forall (Pure m)) =>
(a -> m b) -> t a -> m (t b)
mapConcurrently
        (\(Int
nSamples, Gen RealWorld
g) -> Int
-> Seq Int
-> Int
-> Tree Length Int
-> Gen (PrimState IO)
-> IO [Maybe (Tree Length Int)]
forall a e.
Ord a =>
Int
-> Seq a
-> Int
-> Tree e a
-> Gen (PrimState IO)
-> IO [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nLeaves Tree Length Int
tr Gen RealWorld
Gen (PrimState IO)
g)
        ([Int] -> [Gen RealWorld] -> [(Int, Gen RealWorld)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  let trs :: Forest Length Int
trs = [Maybe (Tree Length Int)] -> Forest Length Int
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Tree Length Int)] -> Forest Length Int)
-> [Maybe (Tree Length Int)] -> Forest Length Int
forall a b. (a -> b) -> a -> b
$ [[Maybe (Tree Length Int)]] -> [Maybe (Tree Length Int)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
  Forest Length Int
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int
 -> ReaderT
      (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int))
-> Forest Length Int
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Length Int)
-> Forest Length Int -> Forest Length Int
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Length Int
forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs

coalSimulateAndSubSampleNTreesConcurrently ::
  Int ->
  Double ->
  [Int] ->
  [GenIO] ->
  ELynx SimulateArguments (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently :: Int
-> Double
-> [Int]
-> [Gen (PrimState IO)]
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nL Double
p [Int]
chunks [Gen (PrimState IO)]
gs = do
  let nLeavesBigTree :: Int
nLeavesBigTree = (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nL Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
  Text -> ELynx SimulateArguments ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logNewSection (Text -> ELynx SimulateArguments ())
-> Text -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
      String
"Simulate one big tree with "
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nLeavesBigTree
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
  Tree Length Int
tr <- IO (Tree Length Int)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Tree Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Length Int)
 -> ReaderT
      (Arguments SimulateArguments) (LoggingT IO) (Tree Length Int))
-> IO (Tree Length Int)
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Tree Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> Gen (PrimState IO) -> IO (Tree Length Int)
forall (m :: * -> *).
PrimMonad m =>
Int -> Gen (PrimState m) -> m (Tree Length Int)
CS.simulate Int
nLeavesBigTree ([Gen RealWorld] -> Gen RealWorld
forall a. [a] -> a
head [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  -- Log the base tree.
  $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx SimulateArguments ()
(Text -> ELynx SimulateArguments ())
-> (Text -> Text) -> Text -> ELynx SimulateArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx SimulateArguments ())
-> Text -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.toStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ ByteString -> Text
LT.decodeUtf8 (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ Tree Phylo Int -> ByteString
forall a. HasName a => Tree Phylo a -> ByteString
toNewick (Tree Phylo Int -> ByteString) -> Tree Phylo Int -> ByteString
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> Tree Phylo Int
forall e a. HasLength e => Tree e a -> Tree Phylo a
measurableToPhyloTree Tree Length Int
tr
  Text -> ELynx SimulateArguments ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logNewSection (Text -> ELynx SimulateArguments ())
-> Text -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
      String
"Sub sample "
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" trees with "
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nL
        String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
  let lvs :: Seq Int
lvs = [Int] -> Seq Int
forall a. [a] -> Seq a
Seq.fromList ([Int] -> Seq Int) -> [Int] -> Seq Int
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> [Int]
forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
  [[Maybe (Tree Length Int)]]
trss <-
    IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Arguments SimulateArguments)
     (LoggingT IO)
     [[Maybe (Tree Length Int)]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Maybe (Tree Length Int)]]
 -> ReaderT
      (Arguments SimulateArguments)
      (LoggingT IO)
      [[Maybe (Tree Length Int)]])
-> IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Arguments SimulateArguments)
     (LoggingT IO)
     [[Maybe (Tree Length Int)]]
forall a b. (a -> b) -> a -> b
$
      ((Int, Gen RealWorld) -> IO [Maybe (Tree Length Int)])
-> [(Int, Gen RealWorld)] -> IO [[Maybe (Tree Length Int)]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, MonadBaseControl IO m, Forall (Pure m)) =>
(a -> m b) -> t a -> m (t b)
mapConcurrently
        (\(Int
nSamples, Gen RealWorld
g) -> Int
-> Seq Int
-> Int
-> Tree Length Int
-> Gen (PrimState IO)
-> IO [Maybe (Tree Length Int)]
forall a e.
Ord a =>
Int
-> Seq a
-> Int
-> Tree e a
-> Gen (PrimState IO)
-> IO [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nL Tree Length Int
tr Gen RealWorld
Gen (PrimState IO)
g)
        ([Int] -> [Gen RealWorld] -> [(Int, Gen RealWorld)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  let trs :: Forest Length Int
trs = [Maybe (Tree Length Int)] -> Forest Length Int
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Tree Length Int)] -> Forest Length Int)
-> [Maybe (Tree Length Int)] -> Forest Length Int
forall a b. (a -> b) -> a -> b
$ [[Maybe (Tree Length Int)]] -> [Maybe (Tree Length Int)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
  Forest Length Int
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int
 -> ReaderT
      (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int))
-> Forest Length Int
-> ReaderT
     (Arguments SimulateArguments) (LoggingT IO) (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Length Int)
-> Forest Length Int -> Forest Length Int
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Length Int
forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs

-- Extract a random subtree with @N@ leaves of a tree with @M@ leaves, where
-- @M>N@ (otherwise error). The complete list of leaves (names are assumed to be
-- unique) has to be provided as a 'Seq.Seq', and a 'Seq.Set', so that fast
-- sub-sampling as well as lookup are fast and so that these data structures do
-- not have to be recomputed when many sub-samples are requested.
nSubSamples ::
  Ord a =>
  Int ->
  Seq.Seq a ->
  Int ->
  Tree e a ->
  GenIO ->
  IO [Maybe (Tree e a)]
nSubSamples :: Int
-> Seq a
-> Int
-> Tree e a
-> Gen (PrimState IO)
-> IO [Maybe (Tree e a)]
nSubSamples Int
m Seq a
lvs Int
n Tree e a
tree Gen (PrimState IO)
g
  | Seq a -> Int
forall a. Seq a -> Int
Seq.length Seq a
lvs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n =
    String -> IO [Maybe (Tree e a)]
forall a. HasCallStack => String -> a
error
      String
"Given list of leaves is shorter than requested number of leaves."
  | Bool
otherwise = do
    [[a]]
lss <- [a] -> Int -> Int -> Gen (PrimState IO) -> IO [[a]]
forall (m :: * -> *) a.
PrimMonad m =>
[a] -> Int -> Int -> Gen (PrimState m) -> m [[a]]
grabble (Seq a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Seq a
lvs) Int
m Int
n Gen (PrimState IO)
g
    let lsSets :: [Set a]
lsSets = ([a] -> Set a) -> [[a]] -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map [a] -> Set a
forall a. Ord a => [a] -> Set a
Set.fromList [[a]]
lss
    [Maybe (Tree e a)] -> IO [Maybe (Tree e a)]
forall (m :: * -> *) a. Monad m => a -> m a
return [(a -> Bool) -> Tree e a -> Maybe (Tree e a)
forall a e. (a -> Bool) -> Tree e a -> Maybe (Tree e a)
dropLeavesWith (a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set a
ls) Tree e a
tree | Set a
ls <- [Set a]
lsSets]

-- Pair of branch length with number of extant children.
type BrLnNChildren = (Length, Int)

-- Possible summary statistic of phylogenetic trees. A list of tuples
-- (Length, NumberOfExtantChildrenBelowThisBranch).
type NChildSumStat = [BrLnNChildren]

-- Format the summary statistics in the following form:
--
-- @
--   nLeaves1 branchLength1
--   nLeaves2 branchLength2
--   ....
-- @
formatNChildSumStat :: NChildSumStat -> BL.ByteString
formatNChildSumStat :: NChildSumStat -> ByteString
formatNChildSumStat NChildSumStat
s =
  Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString)
-> ([Builder] -> Builder) -> [Builder] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> ByteString) -> [Builder] -> ByteString
forall a b. (a -> b) -> a -> b
$ (BrLnNChildren -> Builder) -> NChildSumStat -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map BrLnNChildren -> Builder
formatNChildSumStatLine NChildSumStat
s

formatNChildSumStatLine :: BrLnNChildren -> BB.Builder
formatNChildSumStatLine :: BrLnNChildren -> Builder
formatNChildSumStatLine (Length
l, Int
n) =
  Int -> Builder
BB.intDec Int
n Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
' ' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Double -> Builder
BB.doubleDec (Length -> Double
fromLength Length
l) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n'

-- Compute NChilSumStat for a phylogenetic tree.
toNChildSumStat :: HasLength e => Tree e a -> NChildSumStat
toNChildSumStat :: Tree e a -> NChildSumStat
toNChildSumStat (Node e
br a
_ []) = [(e -> Length
forall e. HasLength e => e -> Length
getLen e
br, Int
1)]
toNChildSumStat (Node e
br a
_ [Tree e a]
ts) = (e -> Length
forall e. HasLength e => e -> Length
getLen e
br, Int
sumCh) BrLnNChildren -> NChildSumStat -> NChildSumStat
forall a. a -> [a] -> [a]
: [NChildSumStat] -> NChildSumStat
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [NChildSumStat]
nChSS
  where
    nChSS :: [NChildSumStat]
nChSS = (Tree e a -> NChildSumStat) -> [Tree e a] -> [NChildSumStat]
forall a b. (a -> b) -> [a] -> [b]
map Tree e a -> NChildSumStat
forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat [Tree e a]
ts
    sumCh :: Int
sumCh = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (NChildSumStat -> Int) -> [NChildSumStat] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (BrLnNChildren -> Int
forall a b. (a, b) -> b
snd (BrLnNChildren -> Int)
-> (NChildSumStat -> BrLnNChildren) -> NChildSumStat -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NChildSumStat -> BrLnNChildren
forall a. [a] -> a
head) [NChildSumStat]
nChSS