{-# LANGUAGE OverloadedStrings #-}
module SequenceTools.PileupCaller (callToDosage, Call(..), callGenotypeFromPileup,
    callMajorityAllele, findMajorityAlleles, callRandomAllele,
    callRandomDiploid, CallingMode(..),
    TransitionsMode(..), filterTransitions, cleanSSdamageAllSamples) where

import SequenceFormats.FreqSum (FreqSumEntry(..))
import SequenceFormats.Pileup (Strand(..))
import SequenceTools.Utils (sampleWithoutReplacement)

import Data.List (sortOn, group, sort)
import Pipes (Pipe, cat)
import qualified Pipes.Prelude as P

-- |A datatype to represent a single genotype call
data Call = HaploidCall Char | DiploidCall Char Char | MissingCall deriving (Int -> Call -> ShowS
[Call] -> ShowS
Call -> String
(Int -> Call -> ShowS)
-> (Call -> String) -> ([Call] -> ShowS) -> Show Call
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Call] -> ShowS
$cshowList :: [Call] -> ShowS
show :: Call -> String
$cshow :: Call -> String
showsPrec :: Int -> Call -> ShowS
$cshowsPrec :: Int -> Call -> ShowS
Show, Call -> Call -> Bool
(Call -> Call -> Bool) -> (Call -> Call -> Bool) -> Eq Call
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Call -> Call -> Bool
$c/= :: Call -> Call -> Bool
== :: Call -> Call -> Bool
$c== :: Call -> Call -> Bool
Eq)

-- |A datatype to specify the calling mode
data CallingMode = MajorityCalling Bool | RandomCalling | RandomDiploidCalling deriving (Int -> CallingMode -> ShowS
[CallingMode] -> ShowS
CallingMode -> String
(Int -> CallingMode -> ShowS)
-> (CallingMode -> String)
-> ([CallingMode] -> ShowS)
-> Show CallingMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CallingMode] -> ShowS
$cshowList :: [CallingMode] -> ShowS
show :: CallingMode -> String
$cshow :: CallingMode -> String
showsPrec :: Int -> CallingMode -> ShowS
$cshowsPrec :: Int -> CallingMode -> ShowS
Show)

data TransitionsMode = TransitionsMissing | SkipTransitions | SingleStrandMode | AllSites deriving (TransitionsMode -> TransitionsMode -> Bool
(TransitionsMode -> TransitionsMode -> Bool)
-> (TransitionsMode -> TransitionsMode -> Bool)
-> Eq TransitionsMode
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TransitionsMode -> TransitionsMode -> Bool
$c/= :: TransitionsMode -> TransitionsMode -> Bool
== :: TransitionsMode -> TransitionsMode -> Bool
$c== :: TransitionsMode -> TransitionsMode -> Bool
Eq, Int -> TransitionsMode -> ShowS
[TransitionsMode] -> ShowS
TransitionsMode -> String
(Int -> TransitionsMode -> ShowS)
-> (TransitionsMode -> String)
-> ([TransitionsMode] -> ShowS)
-> Show TransitionsMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TransitionsMode] -> ShowS
$cshowList :: [TransitionsMode] -> ShowS
show :: TransitionsMode -> String
$cshow :: TransitionsMode -> String
showsPrec :: Int -> TransitionsMode -> ShowS
$cshowsPrec :: Int -> TransitionsMode -> ShowS
Show)

-- |a function to turn a call into the dosage of non-reference alleles
callToDosage :: Char -> Char -> Call -> Maybe Int
callToDosage :: Char -> Char -> Call -> Maybe Int
callToDosage Char
refA Char
altA Call
call = case Call
call of
    HaploidCall Char
a | Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
refA -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
                  | Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
altA -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1
                  | Bool
otherwise -> Maybe Int
forall a. Maybe a
Nothing
    DiploidCall Char
a1 Char
a2 | (Char
a1, Char
a2) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
refA, Char
refA) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
                      | (Char
a1, Char
a2) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
refA, Char
altA) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1
                      | (Char
a1, Char
a2) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
altA, Char
refA) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1
                      | (Char
a1, Char
a2) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
altA, Char
altA) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
2
                      | Bool
otherwise                -> Maybe Int
forall a. Maybe a
Nothing
    Call
MissingCall -> Maybe Int
forall a. Maybe a
Nothing

-- |Make a call from alleles 
callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call
callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call
callGenotypeFromPileup CallingMode
mode Int
minDepth String
alleles =
    if String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
alleles Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
minDepth then Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall else
        case CallingMode
mode of
            MajorityCalling Bool
withDownsampling -> Bool -> Int -> String -> IO Call
callMajorityAllele Bool
withDownsampling Int
minDepth String
alleles
            CallingMode
RandomCalling -> String -> IO Call
callRandomAllele String
alleles
            CallingMode
RandomDiploidCalling -> String -> IO Call
callRandomDiploid String
alleles

-- |Sample the majority allele, or one of the majority alleles
callMajorityAllele :: Bool -> Int-> String -> IO Call
callMajorityAllele :: Bool -> Int -> String -> IO Call
callMajorityAllele Bool
withDownsampling Int
minDepth String
alleles = do
    Maybe String
maybeAlleles <- if Bool
withDownsampling
        then String -> Int -> IO (Maybe String)
forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
minDepth
        else Maybe String -> IO (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Maybe String
forall a. a -> Maybe a
Just String
alleles)
    case Maybe String
maybeAlleles of
        Maybe String
Nothing -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
        Just String
alleles' -> do
            Char
a <- case ShowS
findMajorityAlleles String
alleles' of
                [] -> String -> IO Char
forall a. HasCallStack => String -> a
error String
"should not happen"
                [Char
a'] -> Char -> IO Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
a'
                String
listA -> do
                    Maybe String
r <- String -> Int -> IO (Maybe String)
forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
listA Int
1
                    case Maybe String
r of
                        Just [Char
r'] -> Char -> IO Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
r'
                        Maybe String
_ -> String -> IO Char
forall a. HasCallStack => String -> a
error String
"should not happen"
            Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return (Call -> IO Call) -> Call -> IO Call
forall a b. (a -> b) -> a -> b
$ Char -> Call
HaploidCall Char
a

-- |Find the majority allele(s)
findMajorityAlleles :: String -> [Char]
findMajorityAlleles :: ShowS
findMajorityAlleles String
alleles =
    let groupedAlleles :: [(Int, Char)]
groupedAlleles = ((Int, Char) -> Int) -> [(Int, Char)] -> [(Int, Char)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (Int, Char) -> Int
forall a b. (a, b) -> a
fst [(String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
g, String -> Char
forall a. [a] -> a
head String
g) | String
g <- String -> [String]
forall a. Eq a => [a] -> [[a]]
group (String -> [String]) -> ShowS -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. Ord a => [a] -> [a]
sort (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ String
alleles]
        majorityCount :: Int
majorityCount = (Int, Char) -> Int
forall a b. (a, b) -> a
fst ((Int, Char) -> Int)
-> ([(Int, Char)] -> (Int, Char)) -> [(Int, Char)] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Char)] -> (Int, Char)
forall a. [a] -> a
last ([(Int, Char)] -> Int) -> [(Int, Char)] -> Int
forall a b. (a -> b) -> a -> b
$ [(Int, Char)]
groupedAlleles
    in  [Char
a | (Int
n, Char
a) <- [(Int, Char)]
groupedAlleles, Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
majorityCount]

-- |call a random allele
callRandomAllele :: String -> IO Call
callRandomAllele :: String -> IO Call
callRandomAllele String
alleles = do
    Maybe String
res <- String -> Int -> IO (Maybe String)
forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
1
    case Maybe String
res of
        Maybe String
Nothing -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
        Just [Char
a] -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return (Call -> IO Call) -> Call -> IO Call
forall a b. (a -> b) -> a -> b
$ Char -> Call
HaploidCall Char
a
        Maybe String
_ -> String -> IO Call
forall a. HasCallStack => String -> a
error String
"should not happen"

-- |call two random alleles
callRandomDiploid :: String -> IO Call
callRandomDiploid :: String -> IO Call
callRandomDiploid String
alleles = do
    Maybe String
res <- String -> Int -> IO (Maybe String)
forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
2
    case Maybe String
res of
        Maybe String
Nothing -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
        Just [Char
a1, Char
a2] -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return (Call -> IO Call) -> Call -> IO Call
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Call
DiploidCall Char
a1 Char
a2
        Maybe String
_ -> String -> IO Call
forall a. HasCallStack => String -> a
error String
"should not happen"

filterTransitions :: (Monad m) => TransitionsMode -> Pipe FreqSumEntry FreqSumEntry m ()
filterTransitions :: TransitionsMode -> Pipe FreqSumEntry FreqSumEntry m ()
filterTransitions TransitionsMode
transversionsMode =
    case TransitionsMode
transversionsMode of
        TransitionsMode
SkipTransitions ->
            (FreqSumEntry -> Bool) -> Pipe FreqSumEntry FreqSumEntry m ()
forall (m :: * -> *) a r. Functor m => (a -> Bool) -> Pipe a a m r
P.filter (\(FreqSumEntry Chrom
_ Int
_ Maybe ByteString
_ Maybe Double
_ Char
ref Char
alt [Maybe Int]
_) -> Char -> Char -> Bool
isTransversion Char
ref Char
alt)
        TransitionsMode
TransitionsMissing ->
            (FreqSumEntry -> FreqSumEntry)
-> Pipe FreqSumEntry FreqSumEntry m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\(FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
id_ Maybe Double
gpos Char
ref Char
alt [Maybe Int]
calls) ->
                let calls' :: [Maybe Int]
calls' = if Char -> Char -> Bool
isTransversion Char
ref Char
alt then [Maybe Int]
calls else [Maybe Int
forall a. Maybe a
Nothing | Maybe Int
_ <- [Maybe Int]
calls]
                in  Chrom
-> Int
-> Maybe ByteString
-> Maybe Double
-> Char
-> Char
-> [Maybe Int]
-> FreqSumEntry
FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
id_ Maybe Double
gpos Char
ref Char
alt [Maybe Int]
calls')
        TransitionsMode
_ -> Pipe FreqSumEntry FreqSumEntry m ()
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat
  where
    isTransversion :: Char -> Char -> Bool
isTransversion Char
ref Char
alt = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isTransition Char
ref Char
alt
    isTransition :: Char -> Char -> Bool
isTransition Char
ref Char
alt =
        ((Char
ref Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'A') Bool -> Bool -> Bool
&& (Char
alt Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'G')) Bool -> Bool -> Bool
||
        ((Char
ref Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'G') Bool -> Bool -> Bool
&& (Char
alt Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'A')) Bool -> Bool -> Bool
||
        ((Char
ref Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'C') Bool -> Bool -> Bool
&& (Char
alt Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'T')) Bool -> Bool -> Bool
||
        ((Char
ref Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'T') Bool -> Bool -> Bool
&& (Char
alt Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'C'))

cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String]
cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String]
cleanSSdamageAllSamples Char
ref Char
alt [String]
basesPerSample [[Strand]]
strandPerSample
    | (Char
ref, Char
alt) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
'C', Char
'T') Bool -> Bool -> Bool
|| (Char
ref, Char
alt) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
'T', Char
'C') =
        [String -> [Strand] -> String
removeForwardBases String
bases [Strand]
strands | (String
bases, [Strand]
strands) <- [String] -> [[Strand]] -> [(String, [Strand])]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
basesPerSample [[Strand]]
strandPerSample]
    | (Char
ref, Char
alt) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
'G', Char
'A') Bool -> Bool -> Bool
|| (Char
ref, Char
alt) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
'A', Char
'G') =
        [String -> [Strand] -> String
removeReverseBases String
bases [Strand]
strands | (String
bases, [Strand]
strands) <- [String] -> [[Strand]] -> [(String, [Strand])]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
basesPerSample [[Strand]]
strandPerSample]
    | Bool
otherwise =
        [String]
basesPerSample
  where
    removeForwardBases :: String -> [Strand] -> String
removeForwardBases = Strand -> String -> [Strand] -> String
removeReads Strand
ForwardStrand
    removeReverseBases :: String -> [Strand] -> String
removeReverseBases = Strand -> String -> [Strand] -> String
removeReads Strand
ReverseStrand

removeReads :: Strand -> String -> [Strand] -> String
removeReads :: Strand -> String -> [Strand] -> String
removeReads Strand
strand String
bases [Strand]
strands = [Char
b | (Char
b, Strand
s) <- String -> [Strand] -> [(Char, Strand)]
forall a b. [a] -> [b] -> [(a, b)]
zip String
bases [Strand]
strands, Strand
s Strand -> Strand -> Bool
forall a. Eq a => a -> a -> Bool
/= Strand
strand]