{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
module TLynx.Simulate.Simulate
( simulate,
)
where
import Control.Concurrent (getNumCapabilities)
import Control.Concurrent.Async
( mapConcurrently,
)
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Reader hiding (local)
import Control.Parallel.Strategies
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Foldable
import Data.Maybe
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import ELynx.Tree
import qualified ELynx.Tree.Simulate.Coalescent as CS
import qualified ELynx.Tree.Simulate.PointProcess as PP
import System.Random.Stateful
import TLynx.Grabble
import TLynx.Simulate.Options
getChunks :: Int -> Int -> [Int]
getChunks :: Int -> Int -> [Int]
getChunks Int
c Int
n = [Int]
ns
where
n' :: Int
n' = Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
c
r :: Int
r = Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
c
ns :: [Int]
ns = Int -> Int -> [Int]
forall a. Int -> a -> [a]
replicate Int
r (Int
n' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ Int -> Int -> [Int]
forall a. Int -> a -> [a]
replicate (Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
r) Int
n'
simulate :: ELynx SimulateArguments ()
simulate :: ELynx SimulateArguments ()
simulate = do
l :: SimulateArguments
l@(SimulateArguments Int
nTrees Int
nLeaves Process
pr Maybe Double
subS Bool
sumS SeedOpt
sOpt) <- Environment SimulateArguments -> SimulateArguments
forall a. Environment a -> a
localArguments (Environment SimulateArguments -> SimulateArguments)
-> ReaderT
(Environment SimulateArguments) IO (Environment SimulateArguments)
-> ReaderT (Environment SimulateArguments) IO SimulateArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
(Environment SimulateArguments) IO (Environment SimulateArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
let s :: Int
s = case SeedOpt -> Maybe Int
fromSeedOpt SeedOpt
sOpt of
Maybe Int
Nothing -> [Char] -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
"simulate: No seed."
Just Int
x -> Int
x
Int
c <- IO Int -> ReaderT (Environment SimulateArguments) IO Int
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO Int
getNumCapabilities
[Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection [Char]
"Arguments"
[Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoS ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ SimulateArguments -> [Char]
reportSimulateArguments SimulateArguments
l
[Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection [Char]
"Simulation"
[Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoS ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Number of used cores: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
c
IOGenM StdGen
gen <- StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen)
forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM (StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen))
-> StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen)
forall a b. (a -> b) -> a -> b
$ Int -> StdGen
mkStdGen Int
s
[StdGen]
rs <- Int
-> ReaderT (Environment SimulateArguments) IO StdGen
-> ReaderT (Environment SimulateArguments) IO [StdGen]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
c (ReaderT (Environment SimulateArguments) IO StdGen
-> ReaderT (Environment SimulateArguments) IO [StdGen])
-> ReaderT (Environment SimulateArguments) IO StdGen
-> ReaderT (Environment SimulateArguments) IO [StdGen]
forall a b. (a -> b) -> a -> b
$ IOGenM StdGen -> ReaderT (Environment SimulateArguments) IO StdGen
forall g r (m :: * -> *). RandomGenM g r m => g -> m r
splitGenM IOGenM StdGen
gen
[IOGenM StdGen]
gs <- (StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen))
-> [StdGen]
-> ReaderT (Environment SimulateArguments) IO [IOGenM StdGen]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen)
forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM [StdGen]
rs
let chunks :: [Int]
chunks = Int -> Int -> [Int]
getChunks Int
c Int
nTrees
Forest Length Int
trs <- case Process
pr of
(BirthDeath Double
lambda Double
mu Maybe Double
mRho TimeSpec
timeSpec) -> do
let rho :: Double
rho = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
1.0 Maybe Double
mRho
case Maybe Double
subS of
Maybe Double
Nothing -> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM StdGen]
-> IO (Forest Length Int)
forall g.
RandomGen g =>
Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
lambda Double
mu Double
rho TimeSpec
timeSpec [Int]
chunks [IOGenM StdGen]
gs
Just Double
p ->
Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM StdGen]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall g.
RandomGen g =>
Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently
Int
nLeaves
Double
lambda
Double
mu
Double
rho
Double
p
TimeSpec
timeSpec
[Int]
chunks
[IOGenM StdGen]
gs
Process
Coalescent -> case Maybe Double
subS of
Maybe Double
Nothing -> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> [Int] -> [IOGenM StdGen] -> IO (Forest Length Int)
forall g.
RandomGen g =>
Int -> [Int] -> [IOGenM g] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nLeaves [Int]
chunks [IOGenM StdGen]
gs
Just Double
p -> Int
-> Double
-> [Int]
-> [IOGenM StdGen]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall g.
RandomGen g =>
Int
-> Double
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
p [Int]
chunks [IOGenM StdGen]
gs
let ls :: [ByteString]
ls =
if Bool
sumS
then Strategy ByteString
-> (Tree Length Int -> ByteString)
-> Forest Length Int
-> [ByteString]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy ByteString
forall a. Strategy a
rpar (NChildSumStat -> ByteString
formatNChildSumStat (NChildSumStat -> ByteString)
-> (Tree Length Int -> NChildSumStat)
-> Tree Length Int
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree Length Int -> NChildSumStat
forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat) Forest Length Int
trs
else Strategy ByteString
-> (Tree Phylo Int -> ByteString)
-> [Tree Phylo Int]
-> [ByteString]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy ByteString
forall a. Strategy a
rpar Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick ([Tree Phylo Int] -> [ByteString])
-> [Tree Phylo Int] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Phylo Int)
-> Forest Length Int -> [Tree Phylo Int]
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Forest Length Int
trs
let res :: ByteString
res = [ByteString] -> ByteString
BL.unlines [ByteString]
ls
[Char] -> ByteString -> [Char] -> ELynx SimulateArguments ()
forall a.
Reproducible a =>
[Char] -> ByteString -> [Char] -> ELynx a ()
out [Char]
"simulated trees" ByteString
res [Char]
".tree"
bdSimulateNTreesConcurrently ::
RandomGen g =>
Int ->
Double ->
Double ->
Double ->
PP.TimeSpec ->
[Int] ->
[IOGenM g] ->
IO (Forest Length Int)
bdSimulateNTreesConcurrently :: Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
l Double
m Double
r TimeSpec
timeSpec [Int]
chunks [IOGenM g]
gs = do
let l' :: Double
l' = Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
r
m' :: Double
m' = Double
m Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
r)
[Forest Length Int]
trss <-
((Int, IOGenM g) -> IO (Forest Length Int))
-> [(Int, IOGenM g)] -> IO [Forest Length Int]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
(\(Int
n, IOGenM g
g) -> Int
-> Int
-> TimeSpec
-> Double
-> Double
-> IOGenM g
-> IO (Forest Length Int)
forall g (m :: * -> *).
StatefulGen g m =>
Int
-> Int
-> TimeSpec
-> Double
-> Double
-> g
-> m (Forest Length Int)
PP.simulateNReconstructedTrees Int
n Int
nLeaves TimeSpec
timeSpec Double
l' Double
m' IOGenM g
g)
([Int] -> [IOGenM g] -> [(Int, IOGenM g)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
Forest Length Int -> IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int -> IO (Forest Length Int))
-> Forest Length Int -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ [Forest Length Int] -> Forest Length Int
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss
coalSimulateNTreesConcurrently ::
RandomGen g =>
Int ->
[Int] ->
[IOGenM g] ->
IO (Forest Length Int)
coalSimulateNTreesConcurrently :: Int -> [Int] -> [IOGenM g] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nL [Int]
chunks [IOGenM g]
gs = do
[Forest Length Int]
trss <-
((Int, IOGenM g) -> IO (Forest Length Int))
-> [(Int, IOGenM g)] -> IO [Forest Length Int]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
(\(Int
n, IOGenM g
g) -> Int -> IO (Tree Length Int) -> IO (Forest Length Int)
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
n (IO (Tree Length Int) -> IO (Forest Length Int))
-> IO (Tree Length Int) -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> IOGenM g -> IO (Tree Length Int)
forall g (m :: * -> *).
StatefulGen g m =>
Int -> g -> m (Tree Length Int)
CS.simulate Int
nL IOGenM g
g)
([Int] -> [IOGenM g] -> [(Int, IOGenM g)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
Forest Length Int -> IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int -> IO (Forest Length Int))
-> Forest Length Int -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ [Forest Length Int] -> Forest Length Int
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss
bdSimulateAndSubSampleNTreesConcurrently ::
RandomGen g =>
Int ->
Double ->
Double ->
Double ->
Double ->
PP.TimeSpec ->
[Int] ->
[IOGenM g] ->
ELynx SimulateArguments (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently :: Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
l Double
m Double
r Double
p TimeSpec
timeSpec [Int]
chunks [IOGenM g]
gs = do
let nLeavesBigTree :: Int
nLeavesBigTree = (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nLeaves Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
l' :: Double
l' = Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
r
m' :: Double
m' = Double
m Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
r)
[Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
[Char]
"Simulate one big tree with "
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
nLeavesBigTree
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" leaves."
Tree Length Int
tr <- IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int))
-> IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall a b. (a -> b) -> a -> b
$ Int
-> TimeSpec -> Double -> Double -> IOGenM g -> IO (Tree Length Int)
forall g (m :: * -> *).
StatefulGen g m =>
Int -> TimeSpec -> Double -> Double -> g -> m (Tree Length Int)
PP.simulateReconstructedTree Int
nLeavesBigTree TimeSpec
timeSpec Double
l' Double
m' ([IOGenM g] -> IOGenM g
forall a. [a] -> a
head [IOGenM g]
gs)
ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick (Tree Phylo Int -> ByteString) -> Tree Phylo Int -> ByteString
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Tree Length Int
tr
[Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
[Char]
"Sub sample "
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" trees with "
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
nLeaves
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" leaves."
let lvs :: Seq Int
lvs = [Int] -> Seq Int
forall a. [a] -> Seq a
Seq.fromList ([Int] -> Seq Int) -> [Int] -> Seq Int
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> [Int]
forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
[[Maybe (Tree Length Int)]]
trss <-
IO [[Maybe (Tree Length Int)]]
-> ReaderT
(Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Maybe (Tree Length Int)]]
-> ReaderT
(Environment SimulateArguments) IO [[Maybe (Tree Length Int)]])
-> IO [[Maybe (Tree Length Int)]]
-> ReaderT
(Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall a b. (a -> b) -> a -> b
$
((Int, IOGenM g) -> IO [Maybe (Tree Length Int)])
-> [(Int, IOGenM g)] -> IO [[Maybe (Tree Length Int)]]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
(\(Int
nSamples, IOGenM g
g) -> Int
-> Seq Int
-> Int
-> Tree Length Int
-> IOGenM g
-> IO [Maybe (Tree Length Int)]
forall a g (m :: * -> *) e.
(Ord a, StatefulGen g m) =>
Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nLeaves Tree Length Int
tr IOGenM g
g)
([Int] -> [IOGenM g] -> [(Int, IOGenM g)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
let trs :: Forest Length Int
trs = [Maybe (Tree Length Int)] -> Forest Length Int
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Tree Length Int)] -> Forest Length Int)
-> [Maybe (Tree Length Int)] -> Forest Length Int
forall a b. (a -> b) -> a -> b
$ [[Maybe (Tree Length Int)]] -> [Maybe (Tree Length Int)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Length Int)
-> Forest Length Int -> Forest Length Int
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Length Int
forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs
coalSimulateAndSubSampleNTreesConcurrently ::
RandomGen g =>
Int ->
Double ->
[Int] ->
[IOGenM g] ->
ELynx SimulateArguments (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently :: Int
-> Double
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nL Double
p [Int]
chunks [IOGenM g]
gs = do
let nLeavesBigTree :: Int
nLeavesBigTree = (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nL Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
[Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
[Char]
"Simulate one big tree with "
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
nLeavesBigTree
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" leaves."
Tree Length Int
tr <- IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int))
-> IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> IOGenM g -> IO (Tree Length Int)
forall g (m :: * -> *).
StatefulGen g m =>
Int -> g -> m (Tree Length Int)
CS.simulate Int
nLeavesBigTree ([IOGenM g] -> IOGenM g
forall a. [a] -> a
head [IOGenM g]
gs)
ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick (Tree Phylo Int -> ByteString) -> Tree Phylo Int -> ByteString
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Tree Length Int
tr
[Char] -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
[Char] -> Logger e ()
logInfoNewSection ([Char] -> ELynx SimulateArguments ())
-> [Char] -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
[Char]
"Sub sample "
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" trees with "
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Int -> [Char]
forall a. Show a => a -> [Char]
show Int
nL
[Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
" leaves."
let lvs :: Seq Int
lvs = [Int] -> Seq Int
forall a. [a] -> Seq a
Seq.fromList ([Int] -> Seq Int) -> [Int] -> Seq Int
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> [Int]
forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
[[Maybe (Tree Length Int)]]
trss <-
IO [[Maybe (Tree Length Int)]]
-> ReaderT
(Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Maybe (Tree Length Int)]]
-> ReaderT
(Environment SimulateArguments) IO [[Maybe (Tree Length Int)]])
-> IO [[Maybe (Tree Length Int)]]
-> ReaderT
(Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall a b. (a -> b) -> a -> b
$
((Int, IOGenM g) -> IO [Maybe (Tree Length Int)])
-> [(Int, IOGenM g)] -> IO [[Maybe (Tree Length Int)]]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
(\(Int
nSamples, IOGenM g
g) -> Int
-> Seq Int
-> Int
-> Tree Length Int
-> IOGenM g
-> IO [Maybe (Tree Length Int)]
forall a g (m :: * -> *) e.
(Ord a, StatefulGen g m) =>
Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nL Tree Length Int
tr IOGenM g
g)
([Int] -> [IOGenM g] -> [(Int, IOGenM g)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
let trs :: Forest Length Int
trs = [Maybe (Tree Length Int)] -> Forest Length Int
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Tree Length Int)] -> Forest Length Int)
-> [Maybe (Tree Length Int)] -> Forest Length Int
forall a b. (a -> b) -> a -> b
$ [[Maybe (Tree Length Int)]] -> [Maybe (Tree Length Int)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Length Int)
-> Forest Length Int -> Forest Length Int
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Length Int
forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs
nSubSamples ::
(Ord a, StatefulGen g m) =>
Int ->
Seq.Seq a ->
Int ->
Tree e a ->
g ->
m [Maybe (Tree e a)]
nSubSamples :: Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
m Seq a
lvs Int
n Tree e a
tree g
g
| Seq a -> Int
forall a. Seq a -> Int
Seq.length Seq a
lvs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n =
[Char] -> m [Maybe (Tree e a)]
forall a. HasCallStack => [Char] -> a
error
[Char]
"Given list of leaves is shorter than requested number of leaves."
| Bool
otherwise = do
[[a]]
lss <- [a] -> Int -> Int -> g -> m [[a]]
forall g (m :: * -> *) a.
StatefulGen g m =>
[a] -> Int -> Int -> g -> m [[a]]
grabble (Seq a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Seq a
lvs) Int
m Int
n g
g
let lsSets :: [Set a]
lsSets = ([a] -> Set a) -> [[a]] -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map [a] -> Set a
forall a. Ord a => [a] -> Set a
Set.fromList [[a]]
lss
[Maybe (Tree e a)] -> m [Maybe (Tree e a)]
forall (m :: * -> *) a. Monad m => a -> m a
return [(a -> Bool) -> Tree e a -> Maybe (Tree e a)
forall a e. (a -> Bool) -> Tree e a -> Maybe (Tree e a)
dropLeavesWith (a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set a
ls) Tree e a
tree | Set a
ls <- [Set a]
lsSets]
type BrLnNChildren = (Length, Int)
type NChildSumStat = [BrLnNChildren]
formatNChildSumStat :: NChildSumStat -> BL.ByteString
formatNChildSumStat :: NChildSumStat -> ByteString
formatNChildSumStat NChildSumStat
s =
Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString)
-> ([Builder] -> Builder) -> [Builder] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> ByteString) -> [Builder] -> ByteString
forall a b. (a -> b) -> a -> b
$ (BrLnNChildren -> Builder) -> NChildSumStat -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map BrLnNChildren -> Builder
formatNChildSumStatLine NChildSumStat
s
formatNChildSumStatLine :: BrLnNChildren -> BB.Builder
formatNChildSumStatLine :: BrLnNChildren -> Builder
formatNChildSumStatLine (Length
l, Int
n) =
Int -> Builder
BB.intDec Int
n Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
' ' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Double -> Builder
BB.doubleDec (Length -> Double
fromLength Length
l) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n'
toNChildSumStat :: HasLength e => Tree e a -> NChildSumStat
toNChildSumStat :: Tree e a -> NChildSumStat
toNChildSumStat (Node e
br a
_ []) = [(e -> Length
forall e. HasLength e => e -> Length
getLength e
br, Int
1)]
toNChildSumStat (Node e
br a
_ [Tree e a]
ts) = (e -> Length
forall e. HasLength e => e -> Length
getLength e
br, Int
sumCh) BrLnNChildren -> NChildSumStat -> NChildSumStat
forall a. a -> [a] -> [a]
: [NChildSumStat] -> NChildSumStat
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [NChildSumStat]
nChSS
where
nChSS :: [NChildSumStat]
nChSS = (Tree e a -> NChildSumStat) -> [Tree e a] -> [NChildSumStat]
forall a b. (a -> b) -> [a] -> [b]
map Tree e a -> NChildSumStat
forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat [Tree e a]
ts
sumCh :: Int
sumCh = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (NChildSumStat -> Int) -> [NChildSumStat] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (BrLnNChildren -> Int
forall a b. (a, b) -> b
snd (BrLnNChildren -> Int)
-> (NChildSumStat -> BrLnNChildren) -> NChildSumStat -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NChildSumStat -> BrLnNChildren
forall a. [a] -> a
head) [NChildSumStat]
nChSS