{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
module Simulate.Simulate
( simulate
)
where
import Control.Concurrent (getNumCapabilities)
import Control.Concurrent.Async.Lifted.Safe (mapConcurrently)
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Logger
import Control.Monad.Trans.Class
import Control.Monad.Trans.Reader
import Control.Parallel.Strategies
import qualified Data.ByteString.Lazy.Char8 as L
import Data.Maybe
import qualified Data.Sequence as Seq
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Encoding as LT
import Data.Tree
import Simulate.Options
import ELynx.Data.Tree.MeasurableTree
import ELynx.Data.Tree.PhyloTree (PhyloIntLabel)
import ELynx.Data.Tree.SumStat (formatNChildSumStat,
toNChildSumStat)
import ELynx.Data.Tree.Tree
import ELynx.Export.Tree.Newick (toNewick)
import ELynx.Simulate.PointProcess (simulateNReconstructedTrees,
simulateReconstructedTree)
import ELynx.Tools.Concurrent
import ELynx.Tools.InputOutput
import ELynx.Tools.Logger
simulate :: Maybe FilePath -> Simulate ()
simulate outFile = do
a <- lift ask
when (isNothing (argsHeight a) && argsConditionMRCA a) $
error "Cannot condition on MRCA (-M) when height is not given (-H)."
let s = argsSumStat a
nCap <- liftIO getNumCapabilities
logNewSection "Arguments"
$(logInfo) $ T.pack $ reportSimulateArguments a
logNewSection "Simulation"
$(logInfo) $ T.pack $ "Number of used cores: " <> show nCap
trs <- if argsSubSample a
then simulateAndSubSampleNTreesConcurrently nCap
else simulateNTreesConcurrently nCap
let ls = if s
then parMap rpar (formatNChildSumStat . toNChildSumStat) trs
else parMap rpar toNewick trs
let outFile' = (++ ".tree") <$> outFile
let res = L.unlines ls
io "simulated trees" res outFile'
simulateNTreesConcurrently :: Int -> Simulate [Tree PhyloIntLabel]
simulateNTreesConcurrently c = do
(SimulateArguments nT nL h cM l m r _ _ s) <- lift ask
let l' = l * r
m' = m - l * (1.0 - r)
gs <- liftIO $ getNGen c s
let chunks = getChunks c nT
timeSpec = fmap (, cM) h
trss <- liftIO $ mapConcurrently
(\(n, g) -> simulateNReconstructedTrees n nL timeSpec l' m' g)
(zip chunks gs)
return $ concat trss
simulateAndSubSampleNTreesConcurrently :: Int -> Simulate [Tree PhyloIntLabel]
simulateAndSubSampleNTreesConcurrently c = do
(SimulateArguments nT nL h cM l m r _ _ s) <- lift ask
let nLeavesBigTree = (round $ fromIntegral nL / r) :: Int
gs <- liftIO $ getNGen c s
let chunks = getChunks c nT
timeSpec = fmap (, cM) h
tr <- liftIO $ simulateReconstructedTree nLeavesBigTree timeSpec l m (head gs)
logNewSection $ T.pack $ "Simulate one big tree with " <> show nLeavesBigTree <> " leaves."
$(logInfo) $ LT.toStrict $ LT.decodeUtf8 $ toNewick tr
logNewSection $ T.pack $ "Sub sample " <> show nT <> " trees with " <> show nL <> " leaves."
let lvs = Seq.fromList $ leaves tr
trss <- liftIO $ mapConcurrently
(\(nSamples, g) -> nSubSamples nSamples lvs nL tr g)
(zip chunks gs)
let trs = catMaybes $ concat trss
return $ map prune trs