{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DuplicateRecordFields #-}
module Filter
( convertHighFreqToNormal
, filterFrequency
) where
import Data.List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.ByteString.Lazy.Char8 as B
import Types
convertReadToNormal :: PrintITD -> PrintITD
convertReadToNormal read = read { dSubstring = ""
, dLocations = ""
, dMutations = ""
, sSubstring = ""
, sLocation = ""
, sOtherLocations = ""
, classification = "Normal"
}
getFrequentDuplications :: FilterType
-> AbsoluteOrFraction
-> Frequency
-> [PrintITD]
-> Set.Set B.ByteString
getFrequentDuplications filterType absOrFrac (Frequency freq) xs =
Set.fromList
. Map.keys
. Map.filter (> freq)
. (\m -> Map.map (getFreq absOrFrac) m)
. Map.fromListWith (+)
. flip zip [1,1..]
. fmap (whichField filterType)
$ xs
where
whichField Substring x = dSubstring (x :: PrintITD)
whichField Position x = dLocations (x :: PrintITD)
getFreq Absolute x = x
getFreq Fraction x = x / numReads
numReads = genericLength xs
convertHighFreqToNormal :: FilterType
-> AbsoluteOrFraction
-> Frequency
-> [PrintITD]
-> [PrintITD]
convertHighFreqToNormal filterType absOrFrac freq xs =
fmap (\ x -> if Set.member (dSubstring (x :: PrintITD)) highSet
then convertReadToNormal x
else x
)
xs
where
highSet = getFrequentDuplications filterType absOrFrac freq xs
filterFrequency :: Frequency -> [PrintWithCloneID] -> [PrintWithCloneID]
filterFrequency (Frequency freq) =
filter (\x -> (> freq) (frequency (x :: PrintWithCloneID)))