{-# LANGUAGE OverloadedStrings #-}
module TLynx.Shuffle.Shuffle
( shuffleCmd,
)
where
import qualified Control.Comonad as C
import Control.Monad (when)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Reader (ask)
import qualified Data.ByteString.Lazy.Char8 as BL
import ELynx.Tools.Definitions
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import ELynx.Tree
import ELynx.Tree.Simulate.PointProcess
( PointProcess (PointProcess),
toReconstructedTree,
)
import System.IO (hClose)
import System.Random.Stateful
import TLynx.Grabble
import TLynx.Parsers
import TLynx.Shuffle.Options
shuffleCmd :: ELynx ShuffleArguments ()
shuffleCmd :: ELynx ShuffleArguments ()
shuffleCmd = do
ShuffleArguments
l <- Environment ShuffleArguments -> ShuffleArguments
forall a. Environment a -> a
localArguments (Environment ShuffleArguments -> ShuffleArguments)
-> ReaderT
(Environment ShuffleArguments) IO (Environment ShuffleArguments)
-> ReaderT (Environment ShuffleArguments) IO ShuffleArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
(Environment ShuffleArguments) IO (Environment ShuffleArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
Handle
h <- String -> String -> ELynx ShuffleArguments Handle
forall a. Reproducible a => String -> String -> ELynx a Handle
outHandle String
"results" String
".tree"
let nwF :: NewickFormat
nwF = ShuffleArguments -> NewickFormat
nwFormat ShuffleArguments
l
Tree Phylo Name
tPhylo <- IO (Tree Phylo Name)
-> ReaderT (Environment ShuffleArguments) IO (Tree Phylo Name)
forall a. IO a -> ReaderT (Environment ShuffleArguments) IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Phylo Name)
-> ReaderT (Environment ShuffleArguments) IO (Tree Phylo Name))
-> IO (Tree Phylo Name)
-> ReaderT (Environment ShuffleArguments) IO (Tree Phylo Name)
forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwF (ShuffleArguments -> String
inFile ShuffleArguments
l)
String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Input tree:"
ByteString -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx ShuffleArguments ())
-> ByteString -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick Tree Phylo Name
tPhylo
let t :: Tree Length Name
t = (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
tPhylo
let dh :: Length
dh = [Length] -> Length
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Length] -> Length) -> [Length] -> Length
forall a b. (a -> b) -> a -> b
$ (Length -> Length) -> [Length] -> [Length]
forall a b. (a -> b) -> [a] -> [b]
map (Tree Length Name -> Length
forall e a. HasLength e => Tree e a -> Length
height Tree Length Name
t Length -> Length -> Length
forall a. Num a => a -> a -> a
-) (Tree Length Name -> [Length]
forall e a. HasLength e => Tree e a -> [Length]
distancesOriginLeaves Tree Length Name
t)
String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx ShuffleArguments ())
-> String -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Distance in branch length to being ultrametric: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Length -> String
forall a. Show a => a -> String
show Length
dh
Bool -> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
> Length
2e-4) (String -> ELynx ShuffleArguments ()
forall a. HasCallStack => String -> a
error String
"Tree is not ultrametric.")
Bool -> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
> Double -> Length
toLengthUnsafe Double
eps Bool -> Bool -> Bool
&& Length
dh Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
< Length
2e-4) (ELynx ShuffleArguments () -> ELynx ShuffleArguments ())
-> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$
String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS
String
"Tree is nearly ultrametric, ignore branch length differences smaller than 2e-4."
Bool -> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Length
dh Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
< Double -> Length
toLengthUnsafe Double
eps) (ELynx ShuffleArguments () -> ELynx ShuffleArguments ())
-> ELynx ShuffleArguments () -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Tree is ultrametric."
let cs :: [Length]
cs = (Length -> Bool) -> [Length] -> [Length]
forall a. (a -> Bool) -> [a] -> [a]
filter (Length -> Length -> Bool
forall a. Ord a => a -> a -> Bool
> Length
0) ([Length] -> [Length]) -> [Length] -> [Length]
forall a b. (a -> b) -> a -> b
$ Tree Length Length -> [Length]
forall e a. Tree e a -> [a]
labels (Tree Length Length -> [Length]) -> Tree Length Length -> [Length]
forall a b. (a -> b) -> a -> b
$ (Tree Length Name -> Length)
-> Tree Length Name -> Tree Length Length
forall a b. (Tree Length a -> b) -> Tree Length a -> Tree Length b
forall (w :: * -> *) a b. Comonad w => (w a -> b) -> w a -> w b
C.extend Tree Length Name -> Length
forall e a. HasLength e => Tree e a -> Length
rootHeight Tree Length Name
t
ls :: [Name]
ls = (Name -> Name) -> [Name] -> [Name]
forall a b. (a -> b) -> [a] -> [b]
map Name -> Name
forall a. HasName a => a -> Name
getName ([Name] -> [Name]) -> [Name] -> [Name]
forall a b. (a -> b) -> a -> b
$ Tree Length Name -> [Name]
forall e a. Tree e a -> [a]
leaves Tree Length Name
t
String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx ShuffleArguments ())
-> String -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Number of coalescent times: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Length] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Length]
cs)
String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx ShuffleArguments ())
-> String -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Number of leaves: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Name] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Name]
ls)
String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS String
"The coalescent times are: "
String -> ELynx ShuffleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx ShuffleArguments ())
-> String -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ [Length] -> String
forall a. Show a => a -> String
show [Length]
cs
IOGenM StdGen
gen <- StdGen -> ReaderT (Environment ShuffleArguments) IO (IOGenM StdGen)
forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM (StdGen
-> ReaderT (Environment ShuffleArguments) IO (IOGenM StdGen))
-> StdGen
-> ReaderT (Environment ShuffleArguments) IO (IOGenM StdGen)
forall a b. (a -> b) -> a -> b
$ Int -> StdGen
mkStdGen (Int -> StdGen) -> Int -> StdGen
forall a b. (a -> b) -> a -> b
$ case ShuffleArguments -> SeedOpt
argsSeed ShuffleArguments
l of
SeedOpt
RandomUnset -> String -> Int
forall a. HasCallStack => String -> a
error String
"Seed not available; please contact maintainer."
RandomSet Int
s -> Int
s
Fixed Int
s -> Int
s
Forest Length Name
ts <- IO (Forest Length Name)
-> ReaderT (Environment ShuffleArguments) IO (Forest Length Name)
forall a. IO a -> ReaderT (Environment ShuffleArguments) IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Name)
-> ReaderT (Environment ShuffleArguments) IO (Forest Length Name))
-> IO (Forest Length Name)
-> ReaderT (Environment ShuffleArguments) IO (Forest Length Name)
forall a b. (a -> b) -> a -> b
$ Int
-> Length
-> [Length]
-> [Name]
-> IOGenM StdGen
-> IO (Forest Length Name)
forall g (m :: * -> *).
StatefulGen g m =>
Int -> Length -> [Length] -> [Name] -> g -> m (Forest Length Name)
shuffleT (ShuffleArguments -> Int
nReplicates ShuffleArguments
l) (Tree Length Name -> Length
forall e a. HasLength e => Tree e a -> Length
height Tree Length Name
t) [Length]
cs [Name]
ls IOGenM StdGen
gen
IO () -> ELynx ShuffleArguments ()
forall a. IO a -> ReaderT (Environment ShuffleArguments) IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx ShuffleArguments ())
-> IO () -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BL.hPutStr Handle
h (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.unlines ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ (Tree Length Name -> ByteString)
-> Forest Length Name -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Tree Phylo Name -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick (Tree Phylo Name -> ByteString)
-> (Tree Length Name -> Tree Phylo Name)
-> Tree Length Name
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree Length Name -> Tree Phylo Name
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree) Forest Length Name
ts
IO () -> ELynx ShuffleArguments ()
forall a. IO a -> ReaderT (Environment ShuffleArguments) IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx ShuffleArguments ())
-> IO () -> ELynx ShuffleArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> IO ()
hClose Handle
h
shuffleT ::
(StatefulGen g m) =>
Int ->
Length ->
[Length] ->
[Name] ->
g ->
m (Forest Length Name)
shuffleT :: forall g (m :: * -> *).
StatefulGen g m =>
Int -> Length -> [Length] -> [Name] -> g -> m (Forest Length Name)
shuffleT Int
n Length
o [Length]
cs [Name]
ls g
gen = do
[[Length]]
css <- [Length] -> Int -> Int -> g -> m [[Length]]
forall g (m :: * -> *) a.
StatefulGen g m =>
[a] -> Int -> Int -> g -> m [[a]]
grabble [Length]
cs Int
n ([Length] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Length]
cs) g
gen
[[Name]]
lss <- [Name] -> Int -> Int -> g -> m [[Name]]
forall g (m :: * -> *) a.
StatefulGen g m =>
[a] -> Int -> Int -> g -> m [[a]]
grabble [Name]
ls Int
n ([Name] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Name]
ls) g
gen
Forest Length Name -> m (Forest Length Name)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return
[ Name -> PointProcess Name Length -> Tree Length Name
forall a. a -> PointProcess a Length -> Tree Length a
toReconstructedTree Name
"" ([Name] -> [Length] -> Length -> PointProcess Name Length
forall a b. [a] -> [b] -> b -> PointProcess a b
PointProcess [Name]
names [Length]
times Length
o)
| ([Length]
times, [Name]
names) <- [[Length]] -> [[Name]] -> [([Length], [Name])]
forall a b. [a] -> [b] -> [(a, b)]
zip [[Length]]
css [[Name]]
lss
]