-- FilterFastaList module. -- By Gregory W. Schwartz -- -- Collection of functions for the filtering of a pipesFasta {-# LANGUAGE BangPatterns, OverloadedStrings, FlexibleContexts #-} module FilterCloneList ( filterHighlyMutatedEntry ) where -- Built in import Data.List import Data.Maybe import Data.Either import Text.Regex.TDFA import Text.Regex.TDFA.Text import qualified Data.Text as T -- Cabal import Data.Fasta.Text -- Local import Types -- | Remove highly mutated sequences (sequences with more than a third of -- their sequence being mutated). filterHighlyMutatedEntry :: GeneticUnit -> CodonTable -> CloneEntry -> CloneEntry filterHighlyMutatedEntry !genUnit !table = newEntry where newEntry (!germline, !fseqs) = ( germline , map snd . filter (not . fst) . rights . assignMutated germline $ fseqs ) assignMutated k = map (isHighlyMutated k) isHighlyMutated !k !x = case (readSeq genUnit k, readSeq genUnit x) of ((Right a), (Right b)) -> (\n -> Right (n, b)) $ ( (fromIntegral (T.length (fastaSeq a)) :: Double) / 3 ) <= ( ( genericLength . realMutations (fastaSeq a) $ fastaSeq b ) ) ((Left a), (Right _)) -> error ("Error in germline: " ++ T.unpack a) ((Right _), (Left b)) -> error ("Error in sequence: " ++ T.unpack b) ((Left a), (Left b)) -> error (unwords [ "Error in sequence:" , T.unpack b , "with germline:" , T.unpack a ] ) realMutations k x = filterCodonMutStab (\(!y, !z) -> y /= z) . map snd . mutation k $ x filterCodonMutStab isWhat = filter (filterRules genUnit isWhat) filterRules AminoAcid isWhat x = isWhat x && not (inTuple '-' x) && not (inTuple '.' x) && not (inTuple '~' x) filterRules Nucleotide isWhat x = isWhat x && not (inTuple '-' x) && not (inTuple '.' x) && not (inTuple '~' x) && not (inTuple 'N' x) inTuple c (x, y) | c == x || c == y = True | otherwise = False mutation x y = zip [1..] . T.zip x $ y readSeq Nucleotide x = Right x readSeq AminoAcid x = customTranslate table 1 x