{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  TLynx.Shuffle.Shuffle
-- Description :  Shuffle a phylogeny
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Thu Sep 19 15:01:52 2019.
--
-- The coalescent times are unaffected. The topology and the leaf order is
-- shuffled. Branch support values are ignored and lost.
module TLynx.Shuffle.Shuffle
  ( shuffleCmd,
  )
where

import qualified Control.Comonad as C
import Control.Monad (when)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Reader (ask)
import qualified Data.ByteString.Lazy.Char8 as BL
import ELynx.Tools.Definitions
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import ELynx.Tree
import ELynx.Tree.Simulate.PointProcess
  ( PointProcess (PointProcess),
    toReconstructedTree,
  )
import System.IO (hClose)
import System.Random.Stateful
import TLynx.Grabble
import TLynx.Parsers
import TLynx.Shuffle.Options

-- | Shuffle a tree. Get all coalescent times, shuffle them. Get all leaves,
-- shuffle them. Connect the shuffled leaves with the shuffled coalescent times.
-- The shuffled tree has a new topology while keeping the same set of coalescent
-- times and leaves.
shuffleCmd :: ELynx ShuffleArguments ()
shuffleCmd :: ELynx ShuffleArguments ()
shuffleCmd = do
  ShuffleArguments
l <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  Handle
h <- forall a. Reproducible a => String -> String -> ELynx a Handle
outHandle String
"results" String
".tree"
  let nwF :: NewickFormat
nwF = ShuffleArguments -> NewickFormat
nwFormat ShuffleArguments
l
  Tree Phylo Name
tPhylo <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwF (ShuffleArguments -> String
inFile ShuffleArguments
l)
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Input tree:"
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB forall a b. (a -> b) -> a -> b
$ forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick Tree Phylo Name
tPhylo
  let t :: Tree Length Name
t = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
tPhylo
  -- Check if tree is ultrametric enough.
  let dh :: Length
dh = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall e a. HasLength e => Tree e a -> Length
height Tree Length Name
t forall a. Num a => a -> a -> a
-) (forall e a. HasLength e => Tree e a -> [Length]
distancesOriginLeaves Tree Length Name
t)
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS forall a b. (a -> b) -> a -> b
$ String
"Distance in branch length to being ultrametric: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Length
dh
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh forall a. Ord a => a -> a -> Bool
> Length
2e-4) (forall a. HasCallStack => String -> a
error String
"Tree is not ultrametric.")
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh forall a. Ord a => a -> a -> Bool
> Double -> Length
toLengthUnsafe Double
eps Bool -> Bool -> Bool
&& Length
dh forall a. Ord a => a -> a -> Bool
< Length
2e-4) forall a b. (a -> b) -> a -> b
$
    forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS
      String
"Tree is nearly ultrametric, ignore branch length differences smaller than 2e-4."
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh forall a. Ord a => a -> a -> Bool
< Double -> Length
toLengthUnsafe Double
eps) forall a b. (a -> b) -> a -> b
$ forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Tree is ultrametric."
  let cs :: [Length]
cs = forall a. (a -> Bool) -> [a] -> [a]
filter (forall a. Ord a => a -> a -> Bool
> Length
0) forall a b. (a -> b) -> a -> b
$ forall e a. Tree e a -> [a]
labels forall a b. (a -> b) -> a -> b
$ forall (w :: * -> *) a b. Comonad w => (w a -> b) -> w a -> w b
C.extend forall e a. HasLength e => Tree e a -> Length
rootHeight Tree Length Name
t
      ls :: [Name]
ls = forall a b. (a -> b) -> [a] -> [b]
map forall a. HasName a => a -> Name
getName forall a b. (a -> b) -> a -> b
$ forall e a. Tree e a -> [a]
leaves Tree Length Name
t
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS forall a b. (a -> b) -> a -> b
$ String
"Number of coalescent times: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Length]
cs)
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS forall a b. (a -> b) -> a -> b
$ String
"Number of leaves: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Name]
ls)
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS String
"The coalescent times are: "
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show [Length]
cs
  IOGenM StdGen
gen <- forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM forall a b. (a -> b) -> a -> b
$ Int -> StdGen
mkStdGen forall a b. (a -> b) -> a -> b
$ case ShuffleArguments -> SeedOpt
argsSeed ShuffleArguments
l of
    SeedOpt
RandomUnset -> forall a. HasCallStack => String -> a
error String
"Seed not available; please contact maintainer."
    RandomSet Int
s -> Int
s
    Fixed Int
s -> Int
s
  Forest Length Name
ts <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall g (m :: * -> *).
StatefulGen g m =>
Int -> Length -> [Length] -> [Name] -> g -> m (Forest Length Name)
shuffleT (ShuffleArguments -> Int
nReplicates ShuffleArguments
l) (forall e a. HasLength e => Tree e a -> Length
height Tree Length Name
t) [Length]
cs [Name]
ls IOGenM StdGen
gen
  forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BL.hPutStr Handle
h forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.unlines forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree) Forest Length Name
ts
  forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ Handle -> IO ()
hClose Handle
h

shuffleT ::
  StatefulGen g m =>
  Int -> -- How many?
  Length -> -- Stem length.
  [Length] -> -- Coalescent times.
  [Name] -> -- Leave names.
  g ->
  m (Forest Length Name)
shuffleT :: forall g (m :: * -> *).
StatefulGen g m =>
Int -> Length -> [Length] -> [Name] -> g -> m (Forest Length Name)
shuffleT Int
n Length
o [Length]
cs [Name]
ls g
gen = do
  [[Length]]
css <- forall g (m :: * -> *) a.
StatefulGen g m =>
[a] -> Int -> Int -> g -> m [[a]]
grabble [Length]
cs Int
n (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Length]
cs) g
gen
  [[Name]]
lss <- forall g (m :: * -> *) a.
StatefulGen g m =>
[a] -> Int -> Int -> g -> m [[a]]
grabble [Name]
ls Int
n (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Name]
ls) g
gen
  forall (m :: * -> *) a. Monad m => a -> m a
return
    [ forall a. a -> PointProcess a Length -> Tree Length a
toReconstructedTree Name
"" (forall a b. [a] -> [b] -> b -> PointProcess a b
PointProcess [Name]
names [Length]
times Length
o)
      | ([Length]
times, [Name]
names) <- forall a b. [a] -> [b] -> [(a, b)]
zip [[Length]]
css [[Name]]
lss
    ]