{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  Analyze.Analyze
-- Description :  Parse sequence file formats and analyze them
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Oct  5 08:41:05 2018.
module SLynx.SubSample.SubSample
  ( subSampleCmd,
  )
where

import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Reader (ask)
import qualified ELynx.Sequence.Alignment as M
import ELynx.Sequence.Export.Fasta
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import SLynx.SubSample.Options
import SLynx.Tools
import System.Random.Stateful

-- | Sub sample sequences.
subSampleCmd :: ELynx SubSampleArguments ()
subSampleCmd :: ELynx SubSampleArguments ()
subSampleCmd = do
  (SubSampleArguments Alphabet
al String
inFile Int
nSites Int
nAlignments SeedOpt
sOpt) <- Environment SubSampleArguments -> SubSampleArguments
forall a. Environment a -> a
localArguments (Environment SubSampleArguments -> SubSampleArguments)
-> ReaderT
     (Environment SubSampleArguments)
     IO
     (Environment SubSampleArguments)
-> ReaderT (Environment SubSampleArguments) IO SubSampleArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment SubSampleArguments)
  IO
  (Environment SubSampleArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  let s :: Int
s = case SeedOpt -> Maybe Int
fromSeedOpt SeedOpt
sOpt of
        Maybe Int
Nothing -> String -> Int
forall a. HasCallStack => String -> a
error String
"subSampleCmd: No seed."
        Just Int
x -> Int
x
  String -> ELynx SubSampleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx SubSampleArguments ())
-> String -> ELynx SubSampleArguments ()
forall a b. (a -> b) -> a -> b
$ String
"  Sample " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nSites String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" sites."
  String -> ELynx SubSampleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx SubSampleArguments ())
-> String -> ELynx SubSampleArguments ()
forall a b. (a -> b) -> a -> b
$ String
"  Sample " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nAlignments String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" multi sequence alignments."
  [Sequence]
ss <- Alphabet
-> String -> Logger (Environment SubSampleArguments) [Sequence]
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> String -> Logger e [Sequence]
readSeqs Alphabet
al String
inFile
  IOGenM StdGen
gen <- StdGen
-> ReaderT (Environment SubSampleArguments) IO (IOGenM StdGen)
forall (m :: * -> *) g. MonadIO m => g -> m (IOGenM g)
newIOGenM (StdGen
 -> ReaderT (Environment SubSampleArguments) IO (IOGenM StdGen))
-> StdGen
-> ReaderT (Environment SubSampleArguments) IO (IOGenM StdGen)
forall a b. (a -> b) -> a -> b
$ Int -> StdGen
mkStdGen Int
s
  let a :: Alignment
a = (String -> Alignment)
-> (Alignment -> Alignment) -> Either String Alignment -> Alignment
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Alignment
forall a. HasCallStack => String -> a
error Alignment -> Alignment
forall a. a -> a
id ([Sequence] -> Either String Alignment
M.fromSequences [Sequence]
ss)
  [Alignment]
samples <- IO [Alignment]
-> ReaderT (Environment SubSampleArguments) IO [Alignment]
forall a. IO a -> ReaderT (Environment SubSampleArguments) IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [Alignment]
 -> ReaderT (Environment SubSampleArguments) IO [Alignment])
-> IO [Alignment]
-> ReaderT (Environment SubSampleArguments) IO [Alignment]
forall a b. (a -> b) -> a -> b
$ Int -> IO Alignment -> IO [Alignment]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
nAlignments (IO Alignment -> IO [Alignment]) -> IO Alignment -> IO [Alignment]
forall a b. (a -> b) -> a -> b
$ Int -> Alignment -> IOGenM StdGen -> IO Alignment
forall g (m :: * -> *).
StatefulGen g m =>
Int -> Alignment -> g -> m Alignment
M.randomSubSample Int
nSites Alignment
a IOGenM StdGen
gen
  let results :: [ByteString]
results = (Alignment -> ByteString) -> [Alignment] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ([Sequence] -> ByteString
sequencesToFasta ([Sequence] -> ByteString)
-> (Alignment -> [Sequence]) -> Alignment -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Sequence]
M.toSequences) [Alignment]
samples
      sfxs :: [String]
sfxs = Int -> String -> [String]
getOutSuffixes Int
nAlignments String
"fasta"
  (ByteString -> String -> ELynx SubSampleArguments ())
-> [ByteString] -> [String] -> ELynx SubSampleArguments ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (String -> ByteString -> String -> ELynx SubSampleArguments ()
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"sub sampled multi sequence alignments") [ByteString]
results [String]
sfxs