{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  Analyze.Analyze
-- Description :  Parse sequence file formats and analyze them
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Oct  5 08:41:05 2018.
module SLynx.SubSample.SubSample
  ( subSampleCmd,
  )
where

import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Reader (ask)
import qualified ELynx.Sequence.Alignment as M
import ELynx.Sequence.Export.Fasta
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import SLynx.SubSample.Options
import SLynx.Tools
import System.Random.Stateful

-- | Sub sample sequences.
subSampleCmd :: ELynx SubSampleArguments ()
subSampleCmd :: ELynx SubSampleArguments ()
subSampleCmd = do
  (SubSampleArguments Alphabet
al String
inFile Int
nSites Int
nAlignments SeedOpt
sOpt) <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  let s :: Int
s = case SeedOpt -> Maybe Int
fromSeedOpt SeedOpt
sOpt of
        Maybe Int
Nothing -> forall a. HasCallStack => String -> a
error String
"subSampleCmd: No seed."
        Just Int
x -> Int
x
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ String
"  Sample " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
nSites forall a. Semigroup a => a -> a -> a
<> String
" sites."
  forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ String
"  Sample " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
nAlignments forall a. Semigroup a => a -> a -> a
<> String
" multi sequence alignments."
  [Sequence]
ss <- forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> String -> Logger e [Sequence]
readSeqs Alphabet
al String
inFile
  IOGenM StdGen
gen <- forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM forall a b. (a -> b) -> a -> b
$ Int -> StdGen
mkStdGen Int
s
  let a :: Alignment
a = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id ([Sequence] -> Either String Alignment
M.fromSequences [Sequence]
ss)
  [Alignment]
samples <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
nAlignments forall a b. (a -> b) -> a -> b
$ forall g (m :: * -> *).
StatefulGen g m =>
Int -> Alignment -> g -> m Alignment
M.randomSubSample Int
nSites Alignment
a IOGenM StdGen
gen
  let results :: [ByteString]
results = forall a b. (a -> b) -> [a] -> [b]
map ([Sequence] -> ByteString
sequencesToFasta forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Sequence]
M.toSequences) [Alignment]
samples
      sfxs :: [String]
sfxs = Int -> String -> [String]
getOutSuffixes Int
nAlignments String
"fasta"
  forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"sub sampled multi sequence alignments") [ByteString]
results [String]
sfxs