{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      :  TLynx.Shuffle.Shuffle
-- Description :  Shuffle a phylogeny
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Thu Sep 19 15:01:52 2019.
--
-- The coalescent times are unaffected. The topology and the leaf order is
-- shuffled. Branch support values are ignored and lost.
module TLynx.Shuffle.Shuffle
  ( shuffleCmd,
  )
where

import qualified Control.Comonad as C
import Control.Monad (when)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Reader (ask)
import qualified Data.ByteString.Lazy.Char8 as BL
import ELynx.Tools.Definitions
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import ELynx.Tree
import ELynx.Tree.Simulate.PointProcess
  ( PointProcess (PointProcess),
    toReconstructedTree,
  )
import System.IO (hClose)
import System.Random.MWC (GenIO, initialize)
import TLynx.Grabble
import TLynx.Parsers
import TLynx.Shuffle.Options

-- | Shuffle a tree. Get all coalescent times, shuffle them. Get all leaves,
-- shuffle them. Connect the shuffled leaves with the shuffled coalescent times.
-- The shuffled tree has a new topology while keeping the same set of coalescent
-- times and leaves.
shuffleCmd :: ELynx ShuffleArguments ()
shuffleCmd :: ELynx ShuffleArguments ()
shuffleCmd = do
  ShuffleArguments
l <- Environment ShuffleArguments -> ShuffleArguments
forall a. Environment a -> a
localArguments (Environment ShuffleArguments -> ShuffleArguments)
-> ReaderT
     (Environment ShuffleArguments) IO (Environment ShuffleArguments)
-> ReaderT (Environment ShuffleArguments) IO ShuffleArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment ShuffleArguments) IO (Environment ShuffleArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  Handle
h <- String -> String -> ELynx ShuffleArguments Handle
forall a. Reproducible a => String -> String -> ELynx a Handle
outHandle String
"results" String
".tree"
  let nwF :: NewickFormat
nwF = ShuffleArguments -> NewickFormat
nwFormat ShuffleArguments
l
  Tree Phylo Name
tPhylo <- IO (Tree Phylo Name)
-> ReaderT (Environment ShuffleArguments) IO (Tree Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Phylo Name)
 -> ReaderT (Environment ShuffleArguments) IO (Tree Phylo Name))
-> IO (Tree Phylo Name)
-> ReaderT (Environment ShuffleArguments) IO (Tree Phylo Name)
forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwF (ShuffleArguments -> String
inFile ShuffleArguments
l)
  String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Input tree:"
  ByteString -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx ShuffleArguments ())
-> ByteString -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick Tree Phylo Name
tPhylo
  let t :: Tree Length Name
t = (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
tPhylo
  -- Check if tree is ultrametric enough.
  let dh :: Length
dh = [Length] -> Length
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Length] -> Length) -> [Length] -> Length
forall a b. (a -> b) -> a -> b
$ (Length -> Length) -> [Length] -> [Length]
forall a b. (a -> b) -> [a] -> [b]
map (Tree Length Name -> Length
forall e a. HasLength e => Tree e a -> Length
height Tree Length Name
t Length -> Length -> Length
forall a. Num a => a -> a -> a
-) (Tree Length Name -> [Length]
forall e a. HasLength e => Tree e a -> [Length]
distancesOriginLeaves Tree Length Name
t)
  String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx ShuffleArguments ())
-> String -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Distance in branch length to being ultrametric: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Length -> String
forall a. Show a => a -> String
show Length
dh
  Bool -> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
> Length
2e-4) (String -> ELynx ShuffleArguments ()
forall a. HasCallStack => String -> a
error String
"Tree is not ultrametric.")
  Bool -> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
> Double -> Length
toLengthUnsafe Double
eps Bool -> Bool -> Bool
&& Length
dh Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
< Length
2e-4) (ELynx ShuffleArguments () -> ELynx ShuffleArguments ())
-> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$
    String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS
      String
"Tree is nearly ultrametric, ignore branch length differences smaller than 2e-4."
  Bool -> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
< Double -> Length
toLengthUnsafe Double
eps) (ELynx ShuffleArguments () -> ELynx ShuffleArguments ())
-> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Tree is ultrametric."
  let cs :: [Length]
cs = (Length -> Bool) -> [Length] -> [Length]
forall a. (a -> Bool) -> [a] -> [a]
filter (Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
> Length
0) ([Length] -> [Length]) -> [Length] -> [Length]
forall a b. (a -> b) -> a -> b
$ Tree Length Length -> [Length]
forall e a. Tree e a -> [a]
labels (Tree Length Length -> [Length]) -> Tree Length Length -> [Length]
forall a b. (a -> b) -> a -> b
$ (Tree Length Name -> Length)
-> Tree Length Name -> Tree Length Length
forall (w :: * -> *) a b. Comonad w => (w a -> b) -> w a -> w b
C.extend Tree Length Name -> Length
forall e a. HasLength e => Tree e a -> Length
rootHeight Tree Length Name
t
      ls :: [Name]
ls = (Name -> Name) -> [Name] -> [Name]
forall a b. (a -> b) -> [a] -> [b]
map Name -> Name
forall a. HasName a => a -> Name
getName ([Name] -> [Name]) -> [Name] -> [Name]
forall a b. (a -> b) -> a -> b
$ Tree Length Name -> [Name]
forall e a. Tree e a -> [a]
leaves Tree Length Name
t
  String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx ShuffleArguments ())
-> String -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Number of coalescent times: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Length] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Length]
cs)
  String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx ShuffleArguments ())
-> String -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Number of leaves: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Name] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Name]
ls)
  String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS String
"The coalescent times are: "
  String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx ShuffleArguments ())
-> String -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ [Length] -> String
forall a. Show a => a -> String
show [Length]
cs
  Gen RealWorld
gen <- IO (Gen RealWorld)
-> ReaderT (Environment ShuffleArguments) IO (Gen RealWorld)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Gen RealWorld)
 -> ReaderT (Environment ShuffleArguments) IO (Gen RealWorld))
-> IO (Gen RealWorld)
-> ReaderT (Environment ShuffleArguments) IO (Gen RealWorld)
forall a b. (a -> b) -> a -> b
$
    Vector Word32 -> IO (Gen (PrimState IO))
forall (m :: * -> *) (v :: * -> *).
(PrimMonad m, Vector v Word32) =>
v Word32 -> m (Gen (PrimState m))
initialize (Vector Word32 -> IO (Gen (PrimState IO)))
-> Vector Word32 -> IO (Gen (PrimState IO))
forall a b. (a -> b) -> a -> b
$ case ShuffleArguments -> SeedOpt
argsSeed ShuffleArguments
l of
      SeedOpt
RandomUnset -> String -> Vector Word32
forall a. HasCallStack => String -> a
error String
"Seed not available; please contact maintainer."
      RandomSet Vector Word32
s -> Vector Word32
s
      Fixed Vector Word32
s -> Vector Word32
s
  Forest Length Name
ts <- IO (Forest Length Name)
-> ReaderT (Environment ShuffleArguments) IO (Forest Length Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Name)
 -> ReaderT (Environment ShuffleArguments) IO (Forest Length Name))
-> IO (Forest Length Name)
-> ReaderT (Environment ShuffleArguments) IO (Forest Length Name)
forall a b. (a -> b) -> a -> b
$ Int
-> Length
-> [Length]
-> [Name]
-> Gen (PrimState IO)
-> IO (Forest Length Name)
shuffleT (ShuffleArguments -> Int
nReplicates ShuffleArguments
l) (Tree Length Name -> Length
forall e a. HasLength e => Tree e a -> Length
height Tree Length Name
t) [Length]
cs [Name]
ls Gen RealWorld
Gen (PrimState IO)
gen
  IO () -> ELynx ShuffleArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx ShuffleArguments ())
-> IO () -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BL.hPutStr Handle
h (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.unlines ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ (Tree Length Name -> ByteString)
-> Forest Length Name -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Tree Phylo Name -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick (Tree Phylo Name -> ByteString)
-> (Tree Length Name -> Tree Phylo Name)
-> Tree Length Name
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree Length Name -> Tree Phylo Name
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree) Forest Length Name
ts
  IO () -> ELynx ShuffleArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx ShuffleArguments ())
-> IO () -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> IO ()
hClose Handle
h

shuffleT ::
  Int -> -- How many?
  Length -> -- Stem length.
  [Length] -> -- Coalescent times.
  [Name] -> -- Leave names.
  GenIO ->
  IO (Forest Length Name)
shuffleT :: Int
-> Length
-> [Length]
-> [Name]
-> Gen (PrimState IO)
-> IO (Forest Length Name)
shuffleT Int
n Length
o [Length]
cs [Name]
ls Gen (PrimState IO)
gen = do
  [[Length]]
css <- [Length] -> Int -> Int -> Gen (PrimState IO) -> IO [[Length]]
forall (m :: * -> *) a.
PrimMonad m =>
[a] -> Int -> Int -> Gen (PrimState m) -> m [[a]]
grabble [Length]
cs Int
n ([Length] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Length]
cs) Gen (PrimState IO)
gen
  [[Name]]
lss <- [Name] -> Int -> Int -> Gen (PrimState IO) -> IO [[Name]]
forall (m :: * -> *) a.
PrimMonad m =>
[a] -> Int -> Int -> Gen (PrimState m) -> m [[a]]
grabble [Name]
ls Int
n ([Name] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Name]
ls) Gen (PrimState IO)
gen
  Forest Length Name -> IO (Forest Length Name)
forall (m :: * -> *) a. Monad m => a -> m a
return
    [ Name -> PointProcess Name Length -> Tree Length Name
forall a. a -> PointProcess a Length -> Tree Length a
toReconstructedTree Name
"" ([Name] -> [Length] -> Length -> PointProcess Name Length
forall a b. [a] -> [b] -> b -> PointProcess a b
PointProcess [Name]
names [Length]
times Length
o)
      | ([Length]
times, [Name]
names) <- [[Length]] -> [[Name]] -> [([Length], [Name])]
forall a b. [a] -> [b] -> [(a, b)]
zip [[Length]]
css [[Name]]
lss
    ]