{- Filter
Gregory W. Schwartz

Collects the functions pertaining to the filtering of reads and clones.
-}

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DuplicateRecordFields #-}

module Filter
    ( convertHighFreqToNormal
    , filterFrequency
    ) where

-- Standard
import Data.List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set

-- Cabal
import qualified Data.ByteString.Lazy.Char8 as B

-- Local
import Types

convertReadToNormal :: PrintITD -> PrintITD
convertReadToNormal read = read { dSubstring      = ""
                                , dLocations      = ""
                                , dMutations      = ""
                                , sSubstring      = ""
                                , sLocation       = ""
                                , sOtherLocations = ""
                                , classification  = "Normal"
                                }

-- | Get the set of frequent duplications.
getFrequentDuplications :: FilterType
                        -> AbsoluteOrFraction
                        -> Frequency
                        -> [PrintITD]
                        -> Set.Set B.ByteString
getFrequentDuplications filterType absOrFrac (Frequency freq) xs =
    Set.fromList
        . Map.keys
        . Map.filter (> freq)
        . (\m -> Map.map (getFreq absOrFrac) m)
        . Map.fromListWith (+)
        . flip zip [1,1..]
        . fmap (whichField filterType)
        $ xs
  where
    whichField Substring x = dSubstring (x :: PrintITD)
    whichField Position x  = dLocations (x :: PrintITD)
    getFreq Absolute x = x
    getFreq Fraction x = x / numReads
    numReads = genericLength xs

-- | Convert high frequency duplication reads to normal reads.
convertHighFreqToNormal :: FilterType
                        -> AbsoluteOrFraction
                        -> Frequency
                        -> [PrintITD]
                        -> [PrintITD]
convertHighFreqToNormal filterType absOrFrac freq xs =
    fmap (\ x -> if Set.member (dSubstring (x :: PrintITD)) highSet
                    then convertReadToNormal x
                    else x
         )
        xs
  where
    highSet = getFrequentDuplications filterType absOrFrac freq xs

-- | Filter reads from clones that have too low a frequency.
filterFrequency :: Frequency -> [PrintWithCloneID] -> [PrintWithCloneID]
filterFrequency (Frequency freq) =
    filter (\x -> (> freq) (frequency (x :: PrintWithCloneID)))