-- FilterFastaList module. -- By Gregory W. Schwartz -- -- Collection of functions for the filtering of a pipesFasta {-# LANGUAGE OverloadedStrings, FlexibleContexts #-} module FilterFastaList ( hasNoStops , isInFrame , hasCustomFilter , hasAllCustomFilters ) where -- Built in import Data.List import Data.Maybe import Text.Regex.TDFA import Text.Regex.TDFA.Text import qualified Data.Text as T -- Cabal import Data.Fasta.Text -- Local import Types -- | Remove clone sequences that have stop codons in the first stopRange -- codons hasNoStops :: GeneticUnit -> CodonTable -> Int -> FastaSequence -> Bool hasNoStops genUnit table stopRange = result . stop genUnit where result (Right x) = x result (Left x) = error . T.unpack $ x stop Nucleotide = fmap ( not . T.isInfixOf "*" . T.take stopRange . fastaSeq ) . customTranslate table 1 stop AminoAcid = Right . not . T.isInfixOf "*" . T.take stopRange . fastaSeq -- | Remove out of frame sequences isInFrame :: FastaSequence -> Bool isInFrame = (== 0) . mod 3 . T.length . T.filter (\x -> not . T.isInfixOf (T.singleton x) $ ".-") . fastaSeq -- | Remove sequences that do not contain the string customFilter in the -- customField location, split by "|". Note that this is 1 indexed and -- 0 means to search the entire header for the customFilter. If the -- customRemove option is enabled, this function will instead remove -- sequences that have headers which match the custom filter, as opposed to -- the other way around (this is defined in the "equal" function). Also -- takes into account whether to filter on the germline versus the actual -- sequences. hasCustomFilter :: Bool -> Maybe Int -> T.Text -> FastaSequence -> Bool hasCustomFilter rm customField customFilter fasta | customField == Just 0 || isNothing customField = inField fasta | customField > Just 0 = inCustomField fasta where inField = equal rm customFilter . fastaHeader inCustomField x = equal rm customFilter . (!!) (T.splitOn "|" . fastaHeader $ x) $ (fromJust customField - 1) equal :: Bool -> T.Text -> T.Text -> Bool equal False x y = y =~ x :: Bool equal True x y = not . equal False x $ y hasAllCustomFilters :: Bool -> [(Maybe Int, T.Text)] -> FastaSequence -> Bool hasAllCustomFilters rm filters f = all filterMap filters where filterMap (x, y) = hasCustomFilter rm x y f