{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      :  Analyze.Analyze
-- Description :  Parse sequence file formats and analyze them
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Oct  5 08:41:05 2018.
module SLynx.SubSample.SubSample
  ( subSampleCmd,
  )
where

import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Reader (ask)
import qualified ELynx.Data.Sequence.Alignment as M
import ELynx.Export.Sequence.Fasta
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tools.Reproduction
import SLynx.SubSample.Options
import SLynx.Tools
import System.Random.MWC

-- | Sub sample sequences.
subSampleCmd :: ELynx SubSampleArguments ()
subSampleCmd :: ELynx SubSampleArguments ()
subSampleCmd = do
  (SubSampleArguments Alphabet
al FilePath
inFile Int
nSites Int
nAlignments (Fixed Vector Word32
s)) <- Environment SubSampleArguments -> SubSampleArguments
forall a. Environment a -> a
localArguments (Environment SubSampleArguments -> SubSampleArguments)
-> ReaderT
     (Environment SubSampleArguments)
     IO
     (Environment SubSampleArguments)
-> ReaderT (Environment SubSampleArguments) IO SubSampleArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment SubSampleArguments)
  IO
  (Environment SubSampleArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  FilePath -> ELynx SubSampleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS (FilePath -> ELynx SubSampleArguments ())
-> FilePath -> ELynx SubSampleArguments ()
forall a b. (a -> b) -> a -> b
$ FilePath
"  Sample " FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> Int -> FilePath
forall a. Show a => a -> FilePath
show Int
nSites FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> FilePath
" sites."
  FilePath -> ELynx SubSampleArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS (FilePath -> ELynx SubSampleArguments ())
-> FilePath -> ELynx SubSampleArguments ()
forall a b. (a -> b) -> a -> b
$ FilePath
"  Sample " FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> Int -> FilePath
forall a. Show a => a -> FilePath
show Int
nAlignments FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> FilePath
" multi sequence alignments."
  [Sequence]
ss <- Alphabet
-> FilePath -> Logger (Environment SubSampleArguments) [Sequence]
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> FilePath -> Logger e [Sequence]
readSeqs Alphabet
al FilePath
inFile
  Gen RealWorld
gen <- IO (Gen RealWorld)
-> ReaderT (Environment SubSampleArguments) IO (Gen RealWorld)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Gen RealWorld)
 -> ReaderT (Environment SubSampleArguments) IO (Gen RealWorld))
-> IO (Gen RealWorld)
-> ReaderT (Environment SubSampleArguments) IO (Gen RealWorld)
forall a b. (a -> b) -> a -> b
$ Vector Word32 -> IO (Gen (PrimState IO))
forall (m :: * -> *) (v :: * -> *).
(PrimMonad m, Vector v Word32) =>
v Word32 -> m (Gen (PrimState m))
initialize Vector Word32
s
  let a :: Alignment
a = (FilePath -> Alignment)
-> (Alignment -> Alignment)
-> Either FilePath Alignment
-> Alignment
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either FilePath -> Alignment
forall a. HasCallStack => FilePath -> a
error Alignment -> Alignment
forall a. a -> a
id ([Sequence] -> Either FilePath Alignment
M.fromSequences [Sequence]
ss)
  [Alignment]
samples <- IO [Alignment]
-> ReaderT (Environment SubSampleArguments) IO [Alignment]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [Alignment]
 -> ReaderT (Environment SubSampleArguments) IO [Alignment])
-> IO [Alignment]
-> ReaderT (Environment SubSampleArguments) IO [Alignment]
forall a b. (a -> b) -> a -> b
$ Int -> IO Alignment -> IO [Alignment]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
nAlignments (IO Alignment -> IO [Alignment]) -> IO Alignment -> IO [Alignment]
forall a b. (a -> b) -> a -> b
$ Int -> Alignment -> Gen (PrimState IO) -> IO Alignment
forall (m :: * -> *).
PrimMonad m =>
Int -> Alignment -> Gen (PrimState m) -> m Alignment
M.randomSubSample Int
nSites Alignment
a Gen RealWorld
Gen (PrimState IO)
gen
  let results :: [ByteString]
results = (Alignment -> ByteString) -> [Alignment] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ([Sequence] -> ByteString
sequencesToFasta ([Sequence] -> ByteString)
-> (Alignment -> [Sequence]) -> Alignment -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Sequence]
M.toSequences) [Alignment]
samples
      sfxs :: [FilePath]
sfxs = Int -> FilePath -> [FilePath]
getOutSuffixes Int
nAlignments FilePath
"fasta"
  (ByteString -> FilePath -> ELynx SubSampleArguments ())
-> [ByteString] -> [FilePath] -> ELynx SubSampleArguments ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (FilePath -> ByteString -> FilePath -> ELynx SubSampleArguments ()
forall a.
Reproducible a =>
FilePath -> ByteString -> FilePath -> ELynx a ()
out FilePath
"sub sampled multi sequence alignments") [ByteString]
results [FilePath]
sfxs