{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell   #-}

{- |
Module      :  Analyze.Analyze
Description :  Parse sequence file formats and analyze them
Copyright   :  (c) Dominik Schrempf 2018
License     :  GPL-3

Maintainer  :  dominik.schrempf@gmail.com
Stability   :  unstable
Portability :  portable

Creation date: Fri Oct  5 08:41:05 2018.

-}

module Filter.Filter
  ( filterRowsCmd
  , filterColumnsCmd
  )
  where

import           Control.Monad.Logger
import           Control.Monad.Trans.Class
import           Control.Monad.Trans.Reader
import qualified Data.ByteString.Lazy.Char8                 as L
import           Data.Maybe                                 (fromMaybe)
import qualified Data.Text                                  as T

import           Filter.Options
import           Tools

import           ELynx.Data.Sequence.MultiSequenceAlignment
import           ELynx.Data.Sequence.Sequence
import           ELynx.Export.Sequence.Fasta
import           ELynx.Tools.InputOutput
import           ELynx.Tools.Misc

filterRows :: Maybe Int -> Maybe Int -> [Sequence] -> L.ByteString
filterRows ml ms ss = sequencesToFasta $ compose filters ss
  where filters = map (fromMaybe id) [filterLongerThan <$> ml, filterShorterThan <$> ms]

-- | Filter sequences.
filterRowsCmd :: Maybe FilePath -> FilterRows ()
filterRowsCmd outFileBaseName = do
  $(logInfo) "Command: Filter sequences of a list of sequences."
  FilterRowsArguments al inFile long short <- lift ask
  maybe (return ())
    (\val -> $(logInfo) $ T.pack $ "  Keep sequences longer than " <> show val <> ".") long
  maybe (return ())
    (\val -> $(logInfo) $ T.pack $ "  Keep sequences shorter than " <> show val <> ".") short
  ss <- readSeqs al inFile
  let result      = filterRows long short ss
  let outFilePath = (++ ".fasta") <$> outFileBaseName
  io "filtered sequences" result outFilePath

filterColumns :: Maybe Double -> [Sequence] -> L.ByteString
filterColumns ms ss = sequencesToFasta . toSequenceList $ compose filters msa
  where msa = either error id (fromSequenceList ss)
        filters = map (fromMaybe id) [ filterColumnsStd <$> ms ]

-- | Filter columns.
filterColumnsCmd :: Maybe FilePath -> FilterColumns ()
filterColumnsCmd outFileBaseName = do
  $(logInfo) "Command: Filter columns of a multi sequence alignment."
  FilterColumnsArguments al inFile standard <- lift ask
  case standard of
    Nothing -> return ()
    Just p -> $(logInfo) $ T.pack $
        "  Keep columns with a proportion of standard (non-IUPAC) characters larger than "
        ++ show p ++ "."
  ss <- readSeqs al inFile
  let result      = filterColumns standard ss
  let outFilePath = (++ ".fasta") <$> outFileBaseName
  io "filtered sequences" result outFilePath