{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module TLynx.Simulate.Simulate
( simulate,
)
where
import Control.Concurrent (getNumCapabilities)
import Control.Concurrent.Async
( mapConcurrently,
)
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Reader hiding (local)
import Control.Parallel.Strategies
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Foldable
import Data.Maybe
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import ELynx.Tree
import qualified ELynx.Tree.Simulate.Coalescent as CS
import qualified ELynx.Tree.Simulate.PointProcess as PP
import System.Random.Stateful
import TLynx.Grabble
import TLynx.Simulate.Options
getChunks :: Int -> Int -> [Int]
getChunks :: Int -> Int -> [Int]
getChunks Int
c Int
n = [Int]
ns
where
n' :: Int
n' = Int
n forall a. Integral a => a -> a -> a
`div` Int
c
r :: Int
r = Int
n forall a. Integral a => a -> a -> a
`mod` Int
c
ns :: [Int]
ns = forall a. Int -> a -> [a]
replicate Int
r (Int
n' forall a. Num a => a -> a -> a
+ Int
1) forall a. [a] -> [a] -> [a]
++ forall a. Int -> a -> [a]
replicate (Int
c forall a. Num a => a -> a -> a
- Int
r) Int
n'
simulate :: ELynx SimulateArguments ()
simulate :: ELynx SimulateArguments ()
simulate = do
l :: SimulateArguments
l@(SimulateArguments Int
nTrees Int
nLeaves Process
pr Maybe Double
subS Bool
sumS SeedOpt
sOpt) <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
let s :: Int
s = case SeedOpt -> Maybe Int
fromSeedOpt SeedOpt
sOpt of
Maybe Int
Nothing -> forall a. HasCallStack => String -> a
error String
"simulate: No seed."
Just Int
x -> Int
x
Int
c <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO Int
getNumCapabilities
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection String
"Arguments"
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ SimulateArguments -> String
reportSimulateArguments SimulateArguments
l
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection String
"Simulation"
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ String
"Number of used cores: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
c
IOGenM StdGen
gen <- forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM forall a b. (a -> b) -> a -> b
$ Int -> StdGen
mkStdGen Int
s
[StdGen]
rs <- forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
c forall a b. (a -> b) -> a -> b
$ forall g r (m :: * -> *). RandomGenM g r m => g -> m r
splitGenM IOGenM StdGen
gen
[IOGenM StdGen]
gs <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM [StdGen]
rs
let chunks :: [Int]
chunks = Int -> Int -> [Int]
getChunks Int
c Int
nTrees
Forest Length Int
trs <- case Process
pr of
(BirthDeath Double
lambda Double
mu Maybe Double
mRho TimeSpec
timeSpec) -> do
let rho :: Double
rho = forall a. a -> Maybe a -> a
fromMaybe Double
1.0 Maybe Double
mRho
case Maybe Double
subS of
Maybe Double
Nothing -> forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall g.
RandomGen g =>
Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
lambda Double
mu Double
rho TimeSpec
timeSpec [Int]
chunks [IOGenM StdGen]
gs
Just Double
p ->
forall g.
RandomGen g =>
Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently
Int
nLeaves
Double
lambda
Double
mu
Double
rho
Double
p
TimeSpec
timeSpec
[Int]
chunks
[IOGenM StdGen]
gs
Process
Coalescent -> case Maybe Double
subS of
Maybe Double
Nothing -> forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall g.
RandomGen g =>
Int -> [Int] -> [IOGenM g] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nLeaves [Int]
chunks [IOGenM StdGen]
gs
Just Double
p -> forall g.
RandomGen g =>
Int
-> Double
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
p [Int]
chunks [IOGenM StdGen]
gs
let ls :: [ByteString]
ls =
if Bool
sumS
then forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap forall a. Strategy a
rpar (NChildSumStat -> ByteString
formatNChildSumStat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat) Forest Length Int
trs
else forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap forall a. Strategy a
rpar forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Forest Length Int
trs
let res :: ByteString
res = [ByteString] -> ByteString
BL.unlines [ByteString]
ls
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"simulated trees" ByteString
res String
".tree"
bdSimulateNTreesConcurrently ::
RandomGen g =>
Int ->
Double ->
Double ->
Double ->
PP.TimeSpec ->
[Int] ->
[IOGenM g] ->
IO (Forest Length Int)
bdSimulateNTreesConcurrently :: forall g.
RandomGen g =>
Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
l Double
m Double
r TimeSpec
timeSpec [Int]
chunks [IOGenM g]
gs = do
let l' :: Double
l' = Double
l forall a. Num a => a -> a -> a
* Double
r
m' :: Double
m' = Double
m forall a. Num a => a -> a -> a
- Double
l forall a. Num a => a -> a -> a
* (Double
1.0 forall a. Num a => a -> a -> a
- Double
r)
[Forest Length Int]
trss <-
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
(\(Int
n, IOGenM g
g) -> forall g (m :: * -> *).
StatefulGen g m =>
Int
-> Int
-> TimeSpec
-> Double
-> Double
-> g
-> m (Forest Length Int)
PP.simulateNReconstructedTrees Int
n Int
nLeaves TimeSpec
timeSpec Double
l' Double
m' IOGenM g
g)
(forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss
coalSimulateNTreesConcurrently ::
RandomGen g =>
Int ->
[Int] ->
[IOGenM g] ->
IO (Forest Length Int)
coalSimulateNTreesConcurrently :: forall g.
RandomGen g =>
Int -> [Int] -> [IOGenM g] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nL [Int]
chunks [IOGenM g]
gs = do
[Forest Length Int]
trss <-
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
(\(Int
n, IOGenM g
g) -> forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
n forall a b. (a -> b) -> a -> b
$ forall g (m :: * -> *).
StatefulGen g m =>
Int -> g -> m (Tree Length Int)
CS.simulate Int
nL IOGenM g
g)
(forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss
bdSimulateAndSubSampleNTreesConcurrently ::
RandomGen g =>
Int ->
Double ->
Double ->
Double ->
Double ->
PP.TimeSpec ->
[Int] ->
[IOGenM g] ->
ELynx SimulateArguments (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently :: forall g.
RandomGen g =>
Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
l Double
m Double
r Double
p TimeSpec
timeSpec [Int]
chunks [IOGenM g]
gs = do
let nLeavesBigTree :: Int
nLeavesBigTree = (forall a b. (RealFrac a, Integral b) => a -> b
round forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nLeaves forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
l' :: Double
l' = Double
l forall a. Num a => a -> a -> a
* Double
r
m' :: Double
m' = Double
m forall a. Num a => a -> a -> a
- Double
l forall a. Num a => a -> a -> a
* (Double
1.0 forall a. Num a => a -> a -> a
- Double
r)
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection forall a b. (a -> b) -> a -> b
$
String
"Simulate one big tree with "
forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
nLeavesBigTree
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
Tree Length Int
tr <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall g (m :: * -> *).
StatefulGen g m =>
Int -> TimeSpec -> Double -> Double -> g -> m (Tree Length Int)
PP.simulateReconstructedTree Int
nLeavesBigTree TimeSpec
timeSpec Double
l' Double
m' (forall a. [a] -> a
head [IOGenM g]
gs)
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB forall a b. (a -> b) -> a -> b
$ forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick forall a b. (a -> b) -> a -> b
$ forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Tree Length Int
tr
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection forall a b. (a -> b) -> a -> b
$
String
"Sub sample "
forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show (forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
forall a. Semigroup a => a -> a -> a
<> String
" trees with "
forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
nLeaves
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
let lvs :: Seq Int
lvs = forall a. [a] -> Seq a
Seq.fromList forall a b. (a -> b) -> a -> b
$ forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
[[Maybe (Tree Length Int)]]
trss <-
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
(\(Int
nSamples, IOGenM g
g) -> forall a g (m :: * -> *) e.
(Ord a, StatefulGen g m) =>
Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nLeaves Tree Length Int
tr IOGenM g
g)
(forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
let trs :: Forest Length Int
trs = forall a. [Maybe a] -> [a]
catMaybes forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs
coalSimulateAndSubSampleNTreesConcurrently ::
RandomGen g =>
Int ->
Double ->
[Int] ->
[IOGenM g] ->
ELynx SimulateArguments (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently :: forall g.
RandomGen g =>
Int
-> Double
-> [Int]
-> [IOGenM g]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nL Double
p [Int]
chunks [IOGenM g]
gs = do
let nLeavesBigTree :: Int
nLeavesBigTree = (forall a b. (RealFrac a, Integral b) => a -> b
round forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nL forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection forall a b. (a -> b) -> a -> b
$
String
"Simulate one big tree with "
forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
nLeavesBigTree
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
Tree Length Int
tr <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall g (m :: * -> *).
StatefulGen g m =>
Int -> g -> m (Tree Length Int)
CS.simulate Int
nLeavesBigTree (forall a. [a] -> a
head [IOGenM g]
gs)
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB forall a b. (a -> b) -> a -> b
$ forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick forall a b. (a -> b) -> a -> b
$ forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Tree Length Int
tr
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection forall a b. (a -> b) -> a -> b
$
String
"Sub sample "
forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show (forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
forall a. Semigroup a => a -> a -> a
<> String
" trees with "
forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
nL
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
let lvs :: Seq Int
lvs = forall a. [a] -> Seq a
Seq.fromList forall a b. (a -> b) -> a -> b
$ forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
[[Maybe (Tree Length Int)]]
trss <-
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
(\(Int
nSamples, IOGenM g
g) -> forall a g (m :: * -> *) e.
(Ord a, StatefulGen g m) =>
Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nL Tree Length Int
tr IOGenM g
g)
(forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [IOGenM g]
gs)
let trs :: Forest Length Int
trs = forall a. [Maybe a] -> [a]
catMaybes forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs
nSubSamples ::
(Ord a, StatefulGen g m) =>
Int ->
Seq.Seq a ->
Int ->
Tree e a ->
g ->
m [Maybe (Tree e a)]
nSubSamples :: forall a g (m :: * -> *) e.
(Ord a, StatefulGen g m) =>
Int -> Seq a -> Int -> Tree e a -> g -> m [Maybe (Tree e a)]
nSubSamples Int
m Seq a
lvs Int
n Tree e a
tree g
g
| forall a. Seq a -> Int
Seq.length Seq a
lvs forall a. Ord a => a -> a -> Bool
< Int
n =
forall a. HasCallStack => String -> a
error
String
"Given list of leaves is shorter than requested number of leaves."
| Bool
otherwise = do
[[a]]
lss <- forall g (m :: * -> *) a.
StatefulGen g m =>
[a] -> Int -> Int -> g -> m [[a]]
grabble (forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Seq a
lvs) Int
m Int
n g
g
let lsSets :: [Set a]
lsSets = forall a b. (a -> b) -> [a] -> [b]
map forall a. Ord a => [a] -> Set a
Set.fromList [[a]]
lss
forall (m :: * -> *) a. Monad m => a -> m a
return [forall a e. (a -> Bool) -> Tree e a -> Maybe (Tree e a)
dropLeavesWith (forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set a
ls) Tree e a
tree | Set a
ls <- [Set a]
lsSets]
type BrLnNChildren = (Length, Int)
type NChildSumStat = [BrLnNChildren]
formatNChildSumStat :: NChildSumStat -> BL.ByteString
formatNChildSumStat :: NChildSumStat -> ByteString
formatNChildSumStat NChildSumStat
s =
Builder -> ByteString
BB.toLazyByteString forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map BrLnNChildren -> Builder
formatNChildSumStatLine NChildSumStat
s
formatNChildSumStatLine :: BrLnNChildren -> BB.Builder
formatNChildSumStatLine :: BrLnNChildren -> Builder
formatNChildSumStatLine (Length
l, Int
n) =
Int -> Builder
BB.intDec Int
n forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
' ' forall a. Semigroup a => a -> a -> a
<> Double -> Builder
BB.doubleDec (Length -> Double
fromLength Length
l) forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n'
toNChildSumStat :: HasLength e => Tree e a -> NChildSumStat
toNChildSumStat :: forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat (Node e
br a
_ []) = [(forall e. HasLength e => e -> Length
getLength e
br, Int
1)]
toNChildSumStat (Node e
br a
_ [Tree e a]
ts) = (forall e. HasLength e => e -> Length
getLength e
br, Int
sumCh) forall a. a -> [a] -> [a]
: forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [NChildSumStat]
nChSS
where
nChSS :: [NChildSumStat]
nChSS = forall a b. (a -> b) -> [a] -> [b]
map forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat [Tree e a]
ts
sumCh :: Int
sumCh = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head) [NChildSumStat]
nChSS