{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  Analyze.Analyze
-- Description :  Parse sequence file formats and analyze them
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Oct  5 08:41:05 2018.
module SLynx.Filter.Filter
  ( filterRowsCmd,
    filterColsCmd,
  )
where

import Control.Monad (when)
import Control.Monad.Trans.Reader
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Maybe (fromMaybe)
import qualified ELynx.Sequence.Alignment as M
import ELynx.Sequence.Export.Fasta
import qualified ELynx.Sequence.Sequence as S
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import SLynx.Filter.Options
import SLynx.Tools

-- Chain a list of functions together. See https://wiki.haskell.org/Compose.
compose :: [a -> a] -> a -> a
compose :: forall a. [a -> a] -> a -> a
compose = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall b c a. (b -> c) -> (a -> b) -> a -> c
(.)) forall a. a -> a
id

filterRows :: Maybe Int -> Maybe Int -> Bool -> [S.Sequence] -> BL.ByteString
filterRows :: Maybe Int -> Maybe Int -> Bool -> [Sequence] -> ByteString
filterRows Maybe Int
ml Maybe Int
ms Bool
std [Sequence]
ss = [Sequence] -> ByteString
sequencesToFasta forall a b. (a -> b) -> a -> b
$ forall a. [a -> a] -> a -> a
compose [[Sequence] -> [Sequence]]
filters [Sequence]
ss
  where
    filters' :: [[Sequence] -> [Sequence]]
filters' =
      forall a b. (a -> b) -> [a] -> [b]
map (forall a. a -> Maybe a -> a
fromMaybe forall a. a -> a
id) [Int -> [Sequence] -> [Sequence]
S.filterLongerThan forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Int
ml, Int -> [Sequence] -> [Sequence]
S.filterShorterThan forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Int
ms]
    filters :: [[Sequence] -> [Sequence]]
filters = if Bool
std then [Sequence] -> [Sequence]
S.filterStandard forall a. a -> [a] -> [a]
: [[Sequence] -> [Sequence]]
filters' else [[Sequence] -> [Sequence]]
filters'

-- | Filter sequences.
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd = do
  (FilterRowsArguments Alphabet
al String
inFile Maybe Int
long Maybe Int
short Bool
std) <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  forall b a. b -> (a -> b) -> Maybe a -> b
maybe
    (forall (m :: * -> *) a. Monad m => a -> m a
return ())
    ( \Int
val ->
        forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ String
"  Keep sequences longer than " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
val forall a. Semigroup a => a -> a -> a
<> String
"."
    )
    Maybe Int
long
  forall b a. b -> (a -> b) -> Maybe a -> b
maybe
    (forall (m :: * -> *) a. Monad m => a -> m a
return ())
    ( \Int
val ->
        forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ String
"  Keep sequences shorter than " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
val forall a. Semigroup a => a -> a -> a
<> String
"."
    )
    Maybe Int
short
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
std forall a b. (a -> b) -> a -> b
$
    forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS
      String
"  Keep sequences containing at least one standard (i.e., non-IUPAC) character."
  [Sequence]
ss <- forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> String -> Logger e [Sequence]
readSeqs Alphabet
al String
inFile
  let result :: ByteString
result = Maybe Int -> Maybe Int -> Bool -> [Sequence] -> ByteString
filterRows Maybe Int
long Maybe Int
short Bool
std [Sequence]
ss
  forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"filtered sequences" ByteString
result String
".fasta"

filterCols :: Maybe Double -> [S.Sequence] -> BL.ByteString
filterCols :: Maybe Double -> [Sequence] -> ByteString
filterCols Maybe Double
ms [Sequence]
ss = [Sequence] -> ByteString
sequencesToFasta forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Sequence]
M.toSequences forall a b. (a -> b) -> a -> b
$ forall a. [a -> a] -> a -> a
compose [Alignment -> Alignment]
filters Alignment
a
  where
    a :: Alignment
a = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id ([Sequence] -> Either String Alignment
M.fromSequences [Sequence]
ss)
    filters :: [Alignment -> Alignment]
filters = forall a b. (a -> b) -> [a] -> [b]
map (forall a. a -> Maybe a -> a
fromMaybe forall a. a -> a
id) [Double -> Alignment -> Alignment
M.filterColsStd forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
ms]

-- | Filter columns.
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd = do
  (FilterColsArguments Alphabet
al String
inFile Maybe Double
standard) <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  case Maybe Double
standard of
    Maybe Double
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
    Just Double
p ->
      forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$
        String
"  Keep columns with a proportion of standard (non-IUPAC) characters larger than "
          forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Double
p
          forall a. [a] -> [a] -> [a]
++ String
"."
  [Sequence]
ss <- forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> String -> Logger e [Sequence]
readSeqs Alphabet
al String
inFile
  let result :: ByteString
result = Maybe Double -> [Sequence] -> ByteString
filterCols Maybe Double
standard [Sequence]
ss
  forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"filtered sequences" ByteString
result String
".fasta"