{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      :  SLynx.Simulate.Simulate
-- Description :  Simulate multiple sequence alignments
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Mon Jan 28 14:12:52 2019.
module SLynx.Simulate.Simulate
  ( simulateCmd,
  )
where

import Control.Applicative ((<|>))
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import Control.Monad.Trans.Reader (ask)
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.List
import Data.Maybe
import qualified Data.Set as Set
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Unboxed as U
import ELynx.Alphabet.Alphabet as A
import ELynx.Import.MarkovProcess.EDMModelPhylobayes
import ELynx.Import.MarkovProcess.SiteprofilesPhylobayes
import qualified ELynx.MarkovProcess.AminoAcid as MA
import ELynx.MarkovProcess.GammaRateHeterogeneity
import qualified ELynx.MarkovProcess.MixtureModel as MM
import qualified ELynx.MarkovProcess.PhyloModel as MP
import qualified ELynx.MarkovProcess.RateMatrix as MR
import qualified ELynx.MarkovProcess.SubstitutionModel as MS
import ELynx.Sequence.Export.Fasta
import qualified ELynx.Sequence.Sequence as Seq hiding
  ( name,
  )
import ELynx.Simulate.MarkovProcessAlongTree
import ELynx.Tools.ByteString
import ELynx.Tools.Definitions
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.InputOutput
import ELynx.Tools.Logger
import ELynx.Tools.Options
import ELynx.Tools.Reproduction
import ELynx.Tree
import qualified Numeric.LinearAlgebra as L
import SLynx.Simulate.Options
import SLynx.Simulate.PhyloModel
import System.Random.Stateful
import Text.Printf

-- Display a vector with given precision.
dispv :: Int -> VS.Vector L.R -> String
dispv :: Int -> Vector R -> String
dispv Int
p Vector R
v = [String] -> String
forall a. [a] -> a
head ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [String] -> [String]
forall a. [a] -> [a]
tail ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ String -> [String]
lines (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ Int -> Matrix R -> String
L.dispf Int
p (Vector R -> Matrix R
forall a. Storable a => Vector a -> Matrix a
L.asRow Vector R
v)

-- Display a matrix with given precision and indent.
dispmi :: Int -> Int -> L.Matrix L.R -> String
dispmi :: Int -> Int -> Matrix R -> String
dispmi Int
p Int
i Matrix R
m =
  String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"\n" ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Char -> String
forall a. Int -> a -> [a]
replicate Int
i Char
' ' String -> String -> String
forall a. [a] -> [a] -> [a]
++) ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ [String] -> [String]
forall a. [a] -> [a]
tail ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ String -> [String]
lines (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ Int -> Matrix R -> String
L.dispf Int
p Matrix R
m

getDistLine :: Int -> MR.StationaryDistribution -> BB.Builder
getDistLine :: Int -> Vector R -> Builder
getDistLine Int
i Vector R
d =
  Int -> Builder
BB.intDec Int
i
    Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
' '
    Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
s
  where
    s :: Builder
s = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder) -> [Builder] -> Builder
forall a b. (a -> b) -> a -> b
$ Builder -> [Builder] -> [Builder]
forall a. a -> [a] -> [a]
intersperse (Char -> Builder
BB.char8 Char
' ') ([Builder] -> [Builder]) -> [Builder] -> [Builder]
forall a b. (a -> b) -> a -> b
$ (R -> Builder) -> [R] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map R -> Builder
BB.doubleDec ([R] -> [Builder]) -> [R] -> [Builder]
forall a b. (a -> b) -> a -> b
$ Vector R -> [R]
forall a. Storable a => Vector a -> [a]
VS.toList Vector R
d

writeSiteDists :: [Int] -> V.Vector MR.StationaryDistribution -> ELynx SimulateArguments ()
-- writeSiteDists is ds = out "site distributions of distribution mixture model" output ".sitedists"
writeSiteDists :: [Int] -> Vector (Vector R) -> ELynx SimulateArguments ()
writeSiteDists [Int]
componentIs Vector (Vector R)
ds = do
  Maybe String
mbn <- GlobalArguments -> Maybe String
outFileBaseName (GlobalArguments -> Maybe String)
-> (Environment SimulateArguments -> GlobalArguments)
-> Environment SimulateArguments
-> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Environment SimulateArguments -> GlobalArguments
forall a. Environment a -> GlobalArguments
globalArguments (Environment SimulateArguments -> Maybe String)
-> ReaderT
     (Environment SimulateArguments) IO (Environment SimulateArguments)
-> ReaderT (Environment SimulateArguments) IO (Maybe String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment SimulateArguments) IO (Environment SimulateArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  case Maybe String
mbn of
    Maybe String
Nothing -> () -> ELynx SimulateArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    Just String
bn -> IO () -> ELynx SimulateArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx SimulateArguments ())
-> IO () -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ String -> ByteString -> IO ()
BL.writeFile (String
bn String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
".sitedists") ByteString
output
  where
    dsPaml :: Vector (Vector R)
dsPaml = (Vector R -> Vector R) -> Vector (Vector R) -> Vector (Vector R)
forall a b. (a -> b) -> Vector a -> Vector b
V.map Vector R -> Vector R
MA.alphaToPamlVec Vector (Vector R)
ds
    lns :: [Builder]
lns = [Int -> Vector R -> Builder
getDistLine Int
i Vector R
d | (Int
i, Int
c) <- [Int] -> [Int] -> [(Int, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 ..] [Int]
componentIs, let d :: Vector R
d = Vector (Vector R)
dsPaml Vector (Vector R) -> Int -> Vector R
forall a. Vector a -> Int -> a
V.! Int
c]
    output :: ByteString
output = Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$ [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder) -> [Builder] -> Builder
forall a b. (a -> b) -> a -> b
$ Builder -> [Builder] -> [Builder]
forall a. a -> [a] -> [a]
intersperse (Char -> Builder
BB.char8 Char
'\n') [Builder]
lns

-- Simulate a 'Alignment' for a given phylogenetic model,
-- phylogenetic tree, and alignment length.
simulateAlignment ::
  (RandomGen g, HasLength e, HasName a) =>
  MP.PhyloModel ->
  Tree e a ->
  Int ->
  IOGenM g ->
  ELynx SimulateArguments ()
simulateAlignment :: PhyloModel
-> Tree e a -> Int -> IOGenM g -> ELynx SimulateArguments ()
simulateAlignment PhyloModel
pm Tree e a
t' Int
n IOGenM g
g = do
  let t :: Tree R
t = Length -> R
fromLength (Length -> R) -> (e -> Length) -> e -> R
forall b c a. (b -> c) -> (a -> b) -> a -> c
. e -> Length
forall e. HasLength e => e -> Length
getLength (e -> R) -> Tree e -> Tree R
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tree e a -> Tree e
forall e a. Tree e a -> Tree e
toTreeBranchLabels Tree e a
t'
  [[Int]]
leafStates <- case PhyloModel
pm of
    MP.SubstitutionModel SubstitutionModel
sm -> IO [[Int]] -> ReaderT (Environment SimulateArguments) IO [[Int]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Int]] -> ReaderT (Environment SimulateArguments) IO [[Int]])
-> IO [[Int]] -> ReaderT (Environment SimulateArguments) IO [[Int]]
forall a b. (a -> b) -> a -> b
$ Int -> Vector R -> Matrix R -> Tree R -> IOGenM g -> IO [[Int]]
forall g.
RandomGen g =>
Int -> Vector R -> Matrix R -> Tree R -> IOGenM g -> IO [[Int]]
simulateAndFlattenPar Int
n Vector R
d Matrix R
e Tree R
t IOGenM g
g
      where
        d :: Vector R
d = SubstitutionModel -> Vector R
MS.stationaryDistribution SubstitutionModel
sm
        e :: Matrix R
e = SubstitutionModel -> Matrix R
MS.exchangeabilityMatrix SubstitutionModel
sm
    MP.MixtureModel MixtureModel
mm -> do
      ([Int]
cs, [[Int]]
ss) <- IO ([Int], [[Int]])
-> ReaderT (Environment SimulateArguments) IO ([Int], [[Int]])
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ([Int], [[Int]])
 -> ReaderT (Environment SimulateArguments) IO ([Int], [[Int]]))
-> IO ([Int], [[Int]])
-> ReaderT (Environment SimulateArguments) IO ([Int], [[Int]])
forall a b. (a -> b) -> a -> b
$ Int
-> Vector R
-> Vector (Vector R)
-> Vector (Matrix R)
-> Tree R
-> IOGenM g
-> IO ([Int], [[Int]])
forall g.
RandomGen g =>
Int
-> Vector R
-> Vector (Vector R)
-> Vector (Matrix R)
-> Tree R
-> IOGenM g
-> IO ([Int], [[Int]])
simulateAndFlattenMixtureModelPar Int
n Vector R
ws Vector (Vector R)
ds Vector (Matrix R)
es Tree R
t IOGenM g
g
      -- TODO: Writing site distributions only makes sense for EDM models.
      -- Remove this if not needed or improve to be helpful in general.
      [Int] -> Vector (Vector R) -> ELynx SimulateArguments ()
writeSiteDists [Int]
cs Vector (Vector R)
ds
      [[Int]] -> ReaderT (Environment SimulateArguments) IO [[Int]]
forall (m :: * -> *) a. Monad m => a -> m a
return [[Int]]
ss
      where
        ws :: Vector R
ws = MixtureModel -> Vector R
MM.getWeights MixtureModel
mm
        ds :: Vector (Vector R)
ds = (SubstitutionModel -> Vector R)
-> Vector SubstitutionModel -> Vector (Vector R)
forall a b. (a -> b) -> Vector a -> Vector b
V.map SubstitutionModel -> Vector R
MS.stationaryDistribution (Vector SubstitutionModel -> Vector (Vector R))
-> Vector SubstitutionModel -> Vector (Vector R)
forall a b. (a -> b) -> a -> b
$ MixtureModel -> Vector SubstitutionModel
MM.getSubstitutionModels MixtureModel
mm
        es :: Vector (Matrix R)
es = (SubstitutionModel -> Matrix R)
-> Vector SubstitutionModel -> Vector (Matrix R)
forall a b. (a -> b) -> Vector a -> Vector b
V.map SubstitutionModel -> Matrix R
MS.exchangeabilityMatrix (Vector SubstitutionModel -> Vector (Matrix R))
-> Vector SubstitutionModel -> Vector (Matrix R)
forall a b. (a -> b) -> a -> b
$ MixtureModel -> Vector SubstitutionModel
MM.getSubstitutionModels MixtureModel
mm
  let leafNames :: [Name]
leafNames = (a -> Name) -> [a] -> [Name]
forall a b. (a -> b) -> [a] -> [b]
map a -> Name
forall a. HasName a => a -> Name
getName ([a] -> [Name]) -> [a] -> [Name]
forall a b. (a -> b) -> a -> b
$ Tree e a -> [a]
forall e a. Tree e a -> [a]
leaves Tree e a
t'
      code :: Alphabet
code = PhyloModel -> Alphabet
MP.getAlphabet PhyloModel
pm
      -- XXX: Probably use type safe stuff here?
      alph :: Set Character
alph = AlphabetSpec -> Set Character
A.all (AlphabetSpec -> Set Character) -> AlphabetSpec -> Set Character
forall a b. (a -> b) -> a -> b
$ Alphabet -> AlphabetSpec
alphabetSpec Alphabet
code
      sequences :: [Sequence]
sequences =
        [ ByteString -> ByteString -> Alphabet -> Characters -> Sequence
Seq.Sequence (Name -> ByteString
fromName Name
sName) ByteString
"" Alphabet
code ([Character] -> Characters
forall a. Unbox a => [a] -> Vector a
U.fromList ([Character] -> Characters) -> [Character] -> Characters
forall a b. (a -> b) -> a -> b
$ (Int -> Character) -> [Int] -> [Character]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Set Character -> Character
forall a. Int -> Set a -> a
`Set.elemAt` Set Character
alph) [Int]
ss)
          | (Name
sName, [Int]
ss) <- [Name] -> [[Int]] -> [(Name, [Int])]
forall a b. [a] -> [b] -> [(a, b)]
zip [Name]
leafNames [[Int]]
leafStates
        ]
      output :: ByteString
output = [Sequence] -> ByteString
sequencesToFasta [Sequence]
sequences
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
""
  String -> ByteString -> String -> ELynx SimulateArguments ()
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"simulated multi sequence alignment" ByteString
output String
".fasta"

-- Summarize EDM components; line to be printed to screen or log.
summarizeEDMComponents :: [EDMComponent] -> BL.ByteString
summarizeEDMComponents :: [EDMComponent] -> ByteString
summarizeEDMComponents [EDMComponent]
cs =
  String -> ByteString
BL.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$
    String
"Empiricial distribution mixture model with "
      String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show ([EDMComponent] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [EDMComponent]
cs)
      String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" components."

reportModel :: MP.PhyloModel -> ELynx SimulateArguments ()
reportModel :: PhyloModel -> ELynx SimulateArguments ()
reportModel PhyloModel
m = do
  GlobalArguments
as <- Environment SimulateArguments -> GlobalArguments
forall a. Environment a -> GlobalArguments
globalArguments (Environment SimulateArguments -> GlobalArguments)
-> ReaderT
     (Environment SimulateArguments) IO (Environment SimulateArguments)
-> ReaderT (Environment SimulateArguments) IO GlobalArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment SimulateArguments) IO (Environment SimulateArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  if GlobalArguments -> Bool
writeElynxFile GlobalArguments
as
    then
      ( do
          let bn :: Maybe String
bn = GlobalArguments -> Maybe String
outFileBaseName GlobalArguments
as
          case Maybe String
bn of
            Maybe String
Nothing ->
              String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS
                String
"No output file provided; omit writing machine-readable phylogenetic model."
            Just String
_ ->
              String -> ByteString -> String -> ELynx SimulateArguments ()
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"model definition (machine readable)" (String -> ByteString
BL.pack (PhyloModel -> String
forall a. Show a => a -> String
show PhyloModel
m) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n") String
".model.gz"
      )
    else String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"No elynx file required; omit writing machine-readable phylogenetic model."

pretty :: Length -> String
pretty :: Length -> String
pretty = String -> R -> String
forall r. PrintfType r => String -> r
printf String
"%.5f" (R -> String) -> (Length -> R) -> Length -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Length -> R
fromLength

prettyRow :: String -> String -> BL.ByteString
prettyRow :: String -> String -> ByteString
prettyRow String
name String
val = Int -> ByteString -> ByteString
alignLeft Int
33 ByteString
n ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignRight Int
8 ByteString
v
  where
    n :: ByteString
n = String -> ByteString
BL.pack String
name
    v :: ByteString
v = String -> ByteString
BL.pack String
val

-- | Examine branches of a tree.
summarizeLengths :: HasLength e => Tree e a -> BL.ByteString
summarizeLengths :: Tree e a -> ByteString
summarizeLengths Tree e a
t =
  ByteString -> [ByteString] -> ByteString
BL.intercalate
    ByteString
"\n"
    [ String -> String -> ByteString
prettyRow String
"Origin height: " (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ Length -> String
pretty Length
h,
      String -> String -> ByteString
prettyRow String
"Average distance origin to leaves: " (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ Length -> String
pretty Length
h',
      String -> String -> ByteString
prettyRow String
"Total branch length: " (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ Length -> String
pretty Length
b
    ]
  where
    n :: Int
n = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([a] -> Int) -> [a] -> Int
forall a b. (a -> b) -> a -> b
$ Tree e a -> [a]
forall e a. Tree e a -> [a]
leaves Tree e a
t
    h :: Length
h = Tree e a -> Length
forall e a. HasLength e => Tree e a -> Length
height Tree e a
t
    h' :: Length
h' = [Length] -> Length
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (Tree e a -> [Length]
forall e a. HasLength e => Tree e a -> [Length]
distancesOriginLeaves Tree e a
t) Length -> Length -> Length
forall a. Fractional a => a -> a -> a
/ Int -> Length
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
    b :: Length
b = Tree e a -> Length
forall e a. HasLength e => Tree e a -> Length
totalBranchLength Tree e a
t

-- Round double to a given precision.
roundN :: Int -> Double -> Double
roundN :: Int -> R -> R
roundN Int
n R
v = Integer -> R
forall a. Num a => Integer -> a
fromInteger (R -> Integer
forall a b. (RealFrac a, Integral b) => a -> b
round (R -> Integer) -> R -> Integer
forall a b. (a -> b) -> a -> b
$ R
v R -> R -> R
forall a. Num a => a -> a -> a
* (R
10 R -> Int -> R
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
n)) R -> R -> R
forall a. Fractional a => a -> a -> a
/ (R
10.0 R -> Int -> R
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
n)

-- Summarize a substitution model; lines to be printed to screen or log.
summarizeSM :: MS.SubstitutionModel -> [BL.ByteString]
summarizeSM :: SubstitutionModel -> [ByteString]
summarizeSM SubstitutionModel
sm =
  (String -> ByteString) -> [String] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map String -> ByteString
BL.pack ([String] -> [ByteString]) -> [String] -> [ByteString]
forall a b. (a -> b) -> a -> b
$
    (Alphabet -> String
forall a. Show a => a -> String
show (SubstitutionModel -> Alphabet
MS.alphabet SubstitutionModel
sm) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" substitution model: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ SubstitutionModel -> String
MS.name SubstitutionModel
sm String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
".")
      String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String
"Parameters: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [R] -> String
forall a. Show a => a -> String
show (SubstitutionModel -> [R]
MS.params SubstitutionModel
sm) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"." | Bool -> Bool
not ([R] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (SubstitutionModel -> [R]
MS.params SubstitutionModel
sm))]
      [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ case SubstitutionModel -> Alphabet
MS.alphabet SubstitutionModel
sm of
        Alphabet
DNA ->
          [ String
"Stationary distribution: "
              String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> Vector R -> String
dispv Int
precision (SubstitutionModel -> Vector R
MS.stationaryDistribution SubstitutionModel
sm)
              String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
".",
            String
"Exchangeability matrix:\n"
              String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> Int -> Matrix R -> String
dispmi Int
2 Int
precision (SubstitutionModel -> Matrix R
MS.exchangeabilityMatrix SubstitutionModel
sm)
              String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
".",
            String
"Scale: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ R -> String
forall a. Show a => a -> String
show (Int -> R -> R
roundN Int
precision (R -> R) -> R -> R
forall a b. (a -> b) -> a -> b
$ SubstitutionModel -> R
MS.totalRate SubstitutionModel
sm) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"."
          ]
        Alphabet
Protein ->
          [ String
"Stationary distribution: "
              String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> Vector R -> String
dispv Int
precision (SubstitutionModel -> Vector R
MS.stationaryDistribution SubstitutionModel
sm)
              String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
".",
            String
"Scale: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ R -> String
forall a. Show a => a -> String
show (Int -> R -> R
roundN Int
precision (R -> R) -> R -> R
forall a b. (a -> b) -> a -> b
$ SubstitutionModel -> R
MS.totalRate SubstitutionModel
sm) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"."
          ]
        Alphabet
_ ->
          String -> [String]
forall a. HasCallStack => String -> a
error
            String
"Extended character sets are not supported with substitution models."

-- Summarize a mixture model component; lines to be printed to screen or log.
summarizeMMComponent :: MM.Component -> [BL.ByteString]
summarizeMMComponent :: Component -> [ByteString]
summarizeMMComponent Component
c =
  String -> ByteString
BL.pack String
"Weight: "
    ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> (Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString) -> (R -> Builder) -> R -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. R -> Builder
BB.doubleDec (R -> ByteString) -> R -> ByteString
forall a b. (a -> b) -> a -> b
$ Component -> R
MM.weight Component
c)
    ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: SubstitutionModel -> [ByteString]
summarizeSM (Component -> SubstitutionModel
MM.substModel Component
c)

-- Summarize a mixture model; lines to be printed to screen or log.
summarizeMM :: MM.MixtureModel -> [BL.ByteString]
summarizeMM :: MixtureModel -> [ByteString]
summarizeMM MixtureModel
m =
  [ String -> ByteString
BL.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ String
"Mixture model: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ MixtureModel -> String
MM.name MixtureModel
m String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
".",
    String -> ByteString
BL.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ String
"Number of components: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
n String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"."
  ]
    [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString]
detail
  where
    n :: Int
n = Vector Component -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Vector Component -> Int) -> Vector Component -> Int
forall a b. (a -> b) -> a -> b
$ MixtureModel -> Vector Component
MM.components MixtureModel
m
    detail :: [ByteString]
detail =
      if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
100
        then
          [[ByteString]] -> [ByteString]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
            [ String -> ByteString
BL.pack (String
"Component " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
":") ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: Component -> [ByteString]
summarizeMMComponent Component
c
              | (Int
i, Component
c) <- [Int] -> [Component] -> [(Int, Component)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 :: Int ..] (Vector Component -> [Component]
forall a. Vector a -> [a]
V.toList (Vector Component -> [Component])
-> Vector Component -> [Component]
forall a b. (a -> b) -> a -> b
$ MixtureModel -> Vector Component
MM.components MixtureModel
m)
            ]
        else []

-- Summarize a phylogenetic model; lines to be printed to screen or log.
summarizePM :: MP.PhyloModel -> [BL.ByteString]
summarizePM :: PhyloModel -> [ByteString]
summarizePM (MP.MixtureModel MixtureModel
mm) = MixtureModel -> [ByteString]
summarizeMM MixtureModel
mm
summarizePM (MP.SubstitutionModel SubstitutionModel
sm) = SubstitutionModel -> [ByteString]
summarizeSM SubstitutionModel
sm

-- | Simulate sequences.
simulateCmd :: ELynx SimulateArguments ()
simulateCmd :: ELynx SimulateArguments ()
simulateCmd = do
  SimulateArguments
l <- Environment SimulateArguments -> SimulateArguments
forall a. Environment a -> a
localArguments (Environment SimulateArguments -> SimulateArguments)
-> ReaderT
     (Environment SimulateArguments) IO (Environment SimulateArguments)
-> ReaderT (Environment SimulateArguments) IO SimulateArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment SimulateArguments) IO (Environment SimulateArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  let treeFile :: String
treeFile = SimulateArguments -> String
argsTreeFile SimulateArguments
l
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
""
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Read tree from file '" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
treeFile String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"'."
  Tree Phylo Name
tree <- IO (Tree Phylo Name)
-> ReaderT (Environment SimulateArguments) IO (Tree Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Phylo Name)
 -> ReaderT (Environment SimulateArguments) IO (Tree Phylo Name))
-> IO (Tree Phylo Name)
-> ReaderT (Environment SimulateArguments) IO (Tree Phylo Name)
forall a b. (a -> b) -> a -> b
$ Parser (Tree Phylo Name) -> String -> IO (Tree Phylo Name)
forall a. Parser a -> String -> IO a
parseFileWith (NewickFormat -> Parser (Tree Phylo Name)
newick NewickFormat
Standard) String
treeFile
  let t' :: Tree Length Name
t' = (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
tree
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Number of leaves: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show ([Name] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Name] -> Int) -> [Name] -> Int
forall a b. (a -> b) -> a -> b
$ Tree Length Name -> [Name]
forall e a. Tree e a -> [a]
leaves Tree Length Name
t')
  ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Length Name -> ByteString
forall e a. HasLength e => Tree e a -> ByteString
summarizeLengths Tree Length Name
t'
  let edmFile :: Maybe String
edmFile = SimulateArguments -> Maybe String
argsEDMFile SimulateArguments
l
  let sProfileFiles :: Maybe [String]
sProfileFiles = SimulateArguments -> Maybe [String]
argsSiteprofilesFiles SimulateArguments
l
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
""
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS String
"Read EDM file or siteprofile files."
  Bool -> ELynx SimulateArguments () -> ELynx SimulateArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Maybe String -> Bool
forall a. Maybe a -> Bool
isJust Maybe String
edmFile Bool -> Bool -> Bool
&& Maybe [String] -> Bool
forall a. Maybe a -> Bool
isJust Maybe [String]
sProfileFiles) (ELynx SimulateArguments () -> ELynx SimulateArguments ())
-> ELynx SimulateArguments () -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String -> ELynx SimulateArguments ()
forall a. HasCallStack => String -> a
error String
"Got both: --edm-file and --siteprofile-files."
  Maybe [EDMComponent]
edmCs <- case Maybe String
edmFile of
    Maybe String
Nothing -> Maybe [EDMComponent]
-> ReaderT
     (Environment SimulateArguments) IO (Maybe [EDMComponent])
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe [EDMComponent]
forall a. Maybe a
Nothing
    Just String
edmF -> do
      String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Read EDM file."
      IO (Maybe [EDMComponent])
-> ReaderT
     (Environment SimulateArguments) IO (Maybe [EDMComponent])
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Maybe [EDMComponent])
 -> ReaderT
      (Environment SimulateArguments) IO (Maybe [EDMComponent]))
-> IO (Maybe [EDMComponent])
-> ReaderT
     (Environment SimulateArguments) IO (Maybe [EDMComponent])
forall a b. (a -> b) -> a -> b
$ [EDMComponent] -> Maybe [EDMComponent]
forall a. a -> Maybe a
Just ([EDMComponent] -> Maybe [EDMComponent])
-> IO [EDMComponent] -> IO (Maybe [EDMComponent])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser [EDMComponent] -> String -> IO [EDMComponent]
forall a. Parser a -> String -> IO a
parseFileWith Parser [EDMComponent]
phylobayes String
edmF
  ELynx SimulateArguments ()
-> ([EDMComponent] -> ELynx SimulateArguments ())
-> Maybe [EDMComponent]
-> ELynx SimulateArguments ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
    (() -> ELynx SimulateArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ())
    (ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ([EDMComponent] -> ByteString)
-> [EDMComponent]
-> ELynx SimulateArguments ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [EDMComponent] -> ByteString
summarizeEDMComponents)
    Maybe [EDMComponent]
edmCs
  Maybe [EDMComponent]
sProfiles <- case Maybe [String]
sProfileFiles of
    Maybe [String]
Nothing -> Maybe [EDMComponent]
-> ReaderT
     (Environment SimulateArguments) IO (Maybe [EDMComponent])
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe [EDMComponent]
forall a. Maybe a
Nothing
    Just [String]
fns -> do
      String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
        String
"Read siteprofiles from "
          String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show ([String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
fns)
          String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" file(s)."
      String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ String
"The file names are:" String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
forall a. Show a => a -> String
show [String]
fns
      [[EDMComponent]]
xs <- IO [[EDMComponent]]
-> ReaderT (Environment SimulateArguments) IO [[EDMComponent]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[EDMComponent]]
 -> ReaderT (Environment SimulateArguments) IO [[EDMComponent]])
-> IO [[EDMComponent]]
-> ReaderT (Environment SimulateArguments) IO [[EDMComponent]]
forall a b. (a -> b) -> a -> b
$ (String -> IO [EDMComponent]) -> [String] -> IO [[EDMComponent]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Parser [EDMComponent] -> String -> IO [EDMComponent]
forall a. Parser a -> String -> IO a
parseFileWith Parser [EDMComponent]
siteprofiles) [String]
fns
      Maybe [EDMComponent]
-> ReaderT
     (Environment SimulateArguments) IO (Maybe [EDMComponent])
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe [EDMComponent]
 -> ReaderT
      (Environment SimulateArguments) IO (Maybe [EDMComponent]))
-> Maybe [EDMComponent]
-> ReaderT
     (Environment SimulateArguments) IO (Maybe [EDMComponent])
forall a b. (a -> b) -> a -> b
$ [EDMComponent] -> Maybe [EDMComponent]
forall a. a -> Maybe a
Just ([EDMComponent] -> Maybe [EDMComponent])
-> [EDMComponent] -> Maybe [EDMComponent]
forall a b. (a -> b) -> a -> b
$ [[EDMComponent]] -> [EDMComponent]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[EDMComponent]]
xs
  ELynx SimulateArguments ()
-> ([EDMComponent] -> ELynx SimulateArguments ())
-> Maybe [EDMComponent]
-> ELynx SimulateArguments ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
    (() -> ELynx SimulateArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ())
    (ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ([EDMComponent] -> ByteString)
-> [EDMComponent]
-> ELynx SimulateArguments ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [EDMComponent] -> ByteString
summarizeEDMComponents)
    Maybe [EDMComponent]
sProfiles
  let edmCsOrSiteprofiles :: Maybe [EDMComponent]
edmCsOrSiteprofiles = Maybe [EDMComponent]
edmCs Maybe [EDMComponent]
-> Maybe [EDMComponent] -> Maybe [EDMComponent]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe [EDMComponent]
sProfiles
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Read model string."
  let ms :: Maybe String
ms = SimulateArguments -> Maybe String
argsSubstitutionModelString SimulateArguments
l
      mm :: Maybe String
mm = SimulateArguments -> Maybe String
argsMixtureModelString SimulateArguments
l
      mws :: Maybe [R]
mws = SimulateArguments -> Maybe [R]
argsMixtureWeights SimulateArguments
l
      eitherPhyloModel' :: Either String PhyloModel
eitherPhyloModel' = Maybe String
-> Maybe String
-> Maybe [R]
-> Maybe [EDMComponent]
-> Either String PhyloModel
getPhyloModel Maybe String
ms Maybe String
mm Maybe [R]
mws Maybe [EDMComponent]
edmCsOrSiteprofiles
  PhyloModel
phyloModel' <- case Either String PhyloModel
eitherPhyloModel' of
    Left String
err -> IO PhyloModel
-> ReaderT (Environment SimulateArguments) IO PhyloModel
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO PhyloModel
 -> ReaderT (Environment SimulateArguments) IO PhyloModel)
-> IO PhyloModel
-> ReaderT (Environment SimulateArguments) IO PhyloModel
forall a b. (a -> b) -> a -> b
$ String -> IO PhyloModel
forall a. HasCallStack => String -> a
error String
err
    Right PhyloModel
pm -> PhyloModel -> ReaderT (Environment SimulateArguments) IO PhyloModel
forall (m :: * -> *) a. Monad m => a -> m a
return PhyloModel
pm
  let maybeGammaParams :: Maybe GammaRateHeterogeneityParams
maybeGammaParams = SimulateArguments -> Maybe GammaRateHeterogeneityParams
argsGammaParams SimulateArguments
l
  PhyloModel
phyloModel <- case Maybe GammaRateHeterogeneityParams
maybeGammaParams of
    Maybe GammaRateHeterogeneityParams
Nothing -> do
      ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.unlines ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ PhyloModel -> [ByteString]
summarizePM PhyloModel
phyloModel'
      PhyloModel -> ReaderT (Environment SimulateArguments) IO PhyloModel
forall (m :: * -> *) a. Monad m => a -> m a
return PhyloModel
phyloModel'
    Just (Int
n, R
alpha) -> do
      ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString] -> ByteString
BL.intercalate ByteString
"\n" ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ PhyloModel -> [ByteString]
summarizePM PhyloModel
phyloModel'
      String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
""
      ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString] -> ByteString
BL.intercalate ByteString
"\n" ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> R -> [ByteString]
summarizeGammaRateHeterogeneity Int
n R
alpha
      PhyloModel -> ReaderT (Environment SimulateArguments) IO PhyloModel
forall (m :: * -> *) a. Monad m => a -> m a
return (PhyloModel
 -> ReaderT (Environment SimulateArguments) IO PhyloModel)
-> PhyloModel
-> ReaderT (Environment SimulateArguments) IO PhyloModel
forall a b. (a -> b) -> a -> b
$ Int -> R -> PhyloModel -> PhyloModel
expand Int
n R
alpha PhyloModel
phyloModel'
  PhyloModel -> ELynx SimulateArguments ()
reportModel PhyloModel
phyloModel
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Simulate alignment."
  let alignmentLength :: Int
alignmentLength = SimulateArguments -> Int
argsLength SimulateArguments
l
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Length: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
alignmentLength String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."
  IOGenM StdGen
gen <- StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen)
forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM (StdGen
 -> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen))
-> StdGen
-> ReaderT (Environment SimulateArguments) IO (IOGenM StdGen)
forall a b. (a -> b) -> a -> b
$ Int -> StdGen
mkStdGen (Int -> StdGen) -> Int -> StdGen
forall a b. (a -> b) -> a -> b
$ case SimulateArguments -> SeedOpt
argsSeed SimulateArguments
l of
    SeedOpt
RandomUnset -> String -> Int
forall a. HasCallStack => String -> a
error String
"simulateCmd: seed not available; please contact maintainer."
    RandomSet Int
s -> Int
s
    Fixed Int
s -> Int
s
  PhyloModel
-> Tree Length Name
-> Int
-> IOGenM StdGen
-> ELynx SimulateArguments ()
forall g e a.
(RandomGen g, HasLength e, HasName a) =>
PhyloModel
-> Tree e a -> Int -> IOGenM g -> ELynx SimulateArguments ()
simulateAlignment PhyloModel
phyloModel Tree Length Name
t' Int
alignmentLength IOGenM StdGen
gen