{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module SLynx.Filter.Filter
( filterRowsCmd,
filterColsCmd,
)
where
import Control.Monad (when)
import Control.Monad.Logger
import Control.Monad.Trans.Reader (ask)
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import qualified ELynx.Data.Sequence.Alignment as M
import qualified ELynx.Data.Sequence.Sequence as S
import ELynx.Export.Sequence.Fasta
import ELynx.Tools
import SLynx.Filter.Options
import SLynx.Tools
filterRows :: Maybe Int -> Maybe Int -> Bool -> [S.Sequence] -> BL.ByteString
filterRows ml ms std ss = sequencesToFasta $ compose filters ss
where
filters' =
map (fromMaybe id) [S.filterLongerThan <$> ml, S.filterShorterThan <$> ms]
filters = if std then S.filterStandard : filters' else filters'
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd = do
(FilterRowsArguments al inFile long short std) <- local <$> ask
$(logInfo) "Command: Filter sequences of a list of sequences."
maybe
(return ())
( \val ->
$(logInfo) $ T.pack $ " Keep sequences longer than " <> show val <> "."
)
long
maybe
(return ())
( \val ->
$(logInfo) $ T.pack $ " Keep sequences shorter than " <> show val <> "."
)
short
when std $
$(logInfo)
" Keep sequences containing at least one standard (i.e., non-IUPAC) character."
ss <- readSeqs al inFile
let result = filterRows long short std ss
out "filtered sequences" result ".fasta"
filterCols :: Maybe Double -> [S.Sequence] -> BL.ByteString
filterCols ms ss = sequencesToFasta . M.toSequences $ compose filters a
where
a = either error id (M.fromSequences ss)
filters = map (fromMaybe id) [M.filterColsStd <$> ms]
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd = do
(FilterColsArguments al inFile standard) <- local <$> ask
$(logInfo) "Command: Filter columns of a multi sequence alignment."
case standard of
Nothing -> return ()
Just p ->
$(logInfo) $
T.pack $
" Keep columns with a proportion of standard (non-IUPAC) characters larger than "
++ show p
++ "."
ss <- readSeqs al inFile
let result = filterCols standard ss
out "filtered sequences" result ".fasta"