{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module TLynx.Shuffle.Shuffle
( shuffleCmd,
)
where
import qualified Control.Comonad as C
import Control.Monad (when)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Logger (logDebug, logInfo)
import Control.Monad.Trans.Reader (ask)
import qualified Data.ByteString.Lazy.Char8 as BL
import ELynx.Tools
import ELynx.Tree
import ELynx.Tree.Simulate.PointProcess
( PointProcess (PointProcess),
toReconstructedTree,
)
import System.IO (hClose)
import System.Random.MWC (GenIO, initialize)
import TLynx.Shuffle.Options
import TLynx.Parsers
shuffleCmd :: ELynx ShuffleArguments ()
shuffleCmd = do
l <- local <$> ask
h <- outHandle "results" ".tree"
let nwF = nwFormat l
tPhylo <- liftIO $ parseTree nwF (inFile l)
$(logInfo) "Input tree:"
$(logInfo) $ fromBs $ toNewick tPhylo
let t = either error id $ phyloToLengthTree tPhylo
let dh = sum $ map (height t -) (distancesOriginLeaves t)
$(logDebug) $ "Distance in branch length to being ultrametric: " <> tShow dh
when (dh > 2e-4) (error "Tree is not ultrametric.")
when (dh > eps && dh < 2e-4) $
$(logInfo)
"Tree is nearly ultrametric, ignore branch length differences smaller than 2e-4."
when (dh < eps) $ $(logInfo) "Tree is ultrametric."
let cs = filter (> 0) $ labels $ C.extend rootHeight t
ls = map getName $ leaves t
$(logDebug) $ "Number of coalescent times: " <> tShow (length cs)
$(logDebug) $ "Number of leaves: " <> tShow (length ls)
$(logDebug) "The coalescent times are: "
$(logDebug) $ tShow cs
gen <- case argsSeed l of
Random -> error "Seed not available; please contact maintainer."
Fixed s -> liftIO $ initialize s
ts <- liftIO $ shuffleT (nReplicates l) (height t) cs ls gen
liftIO $ BL.hPutStr h $ BL.unlines $ map (toNewick . measurableToPhyloTree) ts
liftIO $ hClose h
shuffleT ::
Int ->
Double ->
[Double] ->
[BL.ByteString] ->
GenIO ->
IO (Forest Length BL.ByteString)
shuffleT n o cs ls gen = do
css <- grabble cs n (length cs) gen
lss <- grabble ls n (length ls) gen
return
[ toReconstructedTree "" (PointProcess names times o)
| (times, names) <- zip css lss
]