module FilterCloneMap where
import Data.List
import Data.Char
import Data.Maybe
import Data.Either
import qualified Data.Set as S
import qualified Data.Map as M
import Text.Regex.TDFA
import Text.Regex.TDFA.Text
import qualified Data.Text as T
import Data.Fasta.Text
import Types
import Diversity
isRight' :: Either a b -> Bool
isRight' (Right _) = True
isRight' _ = False
listToMaybe' :: [a] -> Maybe [a]
listToMaybe' [] = Nothing
listToMaybe' x = Just x
filterHighlyMutated :: GeneticUnit
-> CodonTable
-> CloneMap
-> (CloneMap, Maybe String)
filterHighlyMutated !genUnit !table !cloneMap = (newCloneMap, errorString)
where
newCloneMap = M.map (map snd . filter (not . fst) . rights)
errorCloneMap
errorString = listToMaybe'
. unlines
. filter (not . null)
. map snd
. M.toAscList
. M.map (intercalate "\n" . lefts)
$ errorCloneMap
errorCloneMap = M.mapWithKey assignMutated cloneMap
assignMutated k = map (isHighlyMutated (snd k))
isHighlyMutated !k !x =
case (readSeq genUnit k, readSeq genUnit x) of
((Right a), (Right b)) -> (\n -> Right (n, b))
$ ( (fromIntegral (T.length (fastaSeq a)) :: Double)
/ 3 )
<= ( ( genericLength
. realMutations (fastaSeq a)
$ fastaSeq b ) )
((Left a), (Right _)) -> Left (unwords ["Germline: ", T.unpack a])
((Right _), (Left b)) -> Left (unwords ["Sequence: ", T.unpack b])
((Left a), (Left b)) -> Left (unwords [ "Sequence:"
, T.unpack b
, "with Germline:"
, T.unpack a ] )
realMutations k x = filterCodonMutStab (\(!y, !z) -> y /= z)
. map snd
. mutation k
$ x
filterCodonMutStab isWhat = filter (filterRules genUnit isWhat)
filterRules AminoAcid isWhat x = isWhat x
&& not (inTuple '-' x)
&& not (inTuple '.' x)
&& not (inTuple '~' x)
filterRules Nucleotide isWhat x = isWhat x
&& not (inTuple '-' x)
&& not (inTuple '.' x)
&& not (inTuple '~' x)
&& not (inTuple 'N' x)
inTuple c (x, y)
| c == x || c == y = True
| otherwise = False
mutation x y = zip [1..] . T.zip x $ y
readSeq Nucleotide x = Right x
readSeq AminoAcid x = customTranslate table 1 x
removeCodonMutCount :: CodonMut -> T.Text -> T.Text -> CloneMap -> CloneMap
removeCodonMutCount codonMut codonMutType mutType = M.mapWithKey mapRemove
where
mapRemove (_, germ) = map (removeCodon germ)
removeCodon germ clone = clone { fastaSeq
= remove (fastaSeq germ)
. fastaSeq $ clone }
remove germSeq = mconcat
. map (snd . replaceCodon)
. zip (codonSplit germSeq)
. codonSplit
replaceCodon (x, y)
| (codonMutOp codonMutType) (hamming x y) codonMut
&& isMutType (T.toUpper mutType) x y = (x, y)
| otherwise = ("---", "---")
codonSplit = fullCodon . T.chunksOf 3
fullCodon = filter ((== 3) . T.length)
codonMutOp ">" = (>)
codonMutOp "<" = (<)
codonMutOp "=" = (==)
isMutType "REPLACEMENT" x y = codon2aa x /= codon2aa y
isMutType "SILENT" x y = codon2aa x == codon2aa y
isMutType _ _ _ = True
removeStopsCloneMap :: GeneticUnit
-> CodonTable
-> Int
-> CloneMap
-> (CloneMap, Maybe String)
removeStopsCloneMap !genUnit !table !stopRange !cloneMap = ( newCloneMap
, errorString )
where
errorString = listToMaybe'
. unlines
. filter (not . null)
. map snd
. M.toAscList
. M.map ( intercalate "\n"
. map T.unpack
. lefts
. map (customTranslate table 1)
)
$ cloneMap
newCloneMap = M.map (filter (filterStops genUnit)) cloneMap
filterStops Nucleotide x = (isRight' . customTranslate table 1 $ x)
&& ( not
. T.isInfixOf "*"
. T.take stopRange
. fastaSeq
. fromEither
. customTranslate table 1 ) x
filterStops AminoAcid x = not
. T.isInfixOf "*"
. T.take stopRange
. fastaSeq
$ x
fromEither (Right x) = x
fromEither (Left x) = error (T.unpack x)
removeDuplicatesCloneMap :: CloneMap -> CloneMap
removeDuplicatesCloneMap cloneMap = M.map
(filter (`S.member` duplicateSet))
cloneMap
where
duplicateSet = S.fromList
. nubBy (\x y -> fastaSeq x == fastaSeq y)
. concatMap snd
. M.toAscList
$ cloneMap
removeOutOfFrameSeqs :: CloneMap -> CloneMap
removeOutOfFrameSeqs = M.map (filter isInFrame)
where
isInFrame = (== 0)
. mod 3
. T.length
. T.filter (\x -> not $ T.isInfixOf (T.singleton x) ".-")
. fastaSeq
removeCustomFilter :: Bool
-> Bool
-> Maybe Int
-> T.Text
-> CloneMap
-> CloneMap
removeCustomFilter germ rm customField customFilter cloneMap
| germ && ((customField == Just 0) || (isNothing customField))
= M.filterWithKey (\(_, k) _ -> inField k) cloneMap
| germ && customField > Just 0
= M.filterWithKey (\(_, k) _ -> inCustomField k) cloneMap
| (customField == Just 0) || (isNothing customField)
= M.map (filter inField) cloneMap
| customField > Just 0 =
M.map (filter inCustomField) cloneMap
where
inField = equal rm customFilter . fastaHeader
inCustomField x = equal rm customFilter
. (!!) (T.splitOn "|" . fastaHeader $ x)
$ (fromJust customField 1)
equal False x y = y =~ x :: Bool
equal True x y = not . equal False x $ y
removeAllCustomFilters :: Bool
-> Bool
-> CloneMap
-> [(Maybe Int, T.Text)]
-> CloneMap
removeAllCustomFilters germ rm = foldl' filterMap
where
filterMap acc (x, y) = removeCustomFilter germ rm x y acc
removeEmptyClone :: CloneMap -> CloneMap
removeEmptyClone = M.filter (not . null)
convertToAminoAcidsCloneMap :: CodonTable
-> CloneMap
-> (CloneMap, Maybe String)
convertToAminoAcidsCloneMap table cloneMap = (newCloneMap, errorString)
where
newCloneMap = M.mapKeysWith (++) (\(!x, !y) -> (x, fromEither y))
. M.filterWithKey (\(_, !y) _ -> isRight' y)
. M.map rights
$ errorCloneMap
errorString = listToMaybe'
. concatMap snd
. M.toAscList
. M.mapWithKey (\(_, !y) v -> (++) (eitherToString y)
. concatMap T.unpack
. lefts
$ v )
$ errorCloneMap
errorCloneMap = M.mapKeys keyMap
. M.map (map (customTranslate table 1))
$ cloneMap
keyMap (!x, !y) = (x, customTranslate table 1 y)
eitherToString (Right _) = ""
eitherToString (Left x) = T.unpack x
fromEither (Right x) = x
fromEither (Left x) = error (T.unpack x)