{-# LANGUAGE OverloadedStrings #-}
module SequenceTools.PileupCaller (callToDosage, Call(..), callGenotypeFromPileup,
    callMajorityAllele, findMajorityAlleles, callRandomAllele,
    callRandomDiploid, CallingMode(..),
    TransitionsMode(..), filterTransitions, cleanSSdamageAllSamples) where

import SequenceFormats.FreqSum (FreqSumEntry(..))
import SequenceFormats.Pileup (Strand(..))
import SequenceTools.Utils (sampleWithoutReplacement)

import Data.List (sortOn, group, sort)
import Pipes (Pipe, cat)
import qualified Pipes.Prelude as P

-- |A datatype to represent a single genotype call
data Call = HaploidCall Char | DiploidCall Char Char | MissingCall deriving (Int -> Call -> ShowS
[Call] -> ShowS
Call -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Call] -> ShowS
$cshowList :: [Call] -> ShowS
show :: Call -> String
$cshow :: Call -> String
showsPrec :: Int -> Call -> ShowS
$cshowsPrec :: Int -> Call -> ShowS
Show, Call -> Call -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Call -> Call -> Bool
$c/= :: Call -> Call -> Bool
== :: Call -> Call -> Bool
$c== :: Call -> Call -> Bool
Eq)

-- |A datatype to specify the calling mode
data CallingMode = MajorityCalling Bool | RandomCalling | RandomDiploidCalling deriving (Int -> CallingMode -> ShowS
[CallingMode] -> ShowS
CallingMode -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CallingMode] -> ShowS
$cshowList :: [CallingMode] -> ShowS
show :: CallingMode -> String
$cshow :: CallingMode -> String
showsPrec :: Int -> CallingMode -> ShowS
$cshowsPrec :: Int -> CallingMode -> ShowS
Show)

data TransitionsMode = TransitionsMissing | SkipTransitions | SingleStrandMode | AllSites deriving (TransitionsMode -> TransitionsMode -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TransitionsMode -> TransitionsMode -> Bool
$c/= :: TransitionsMode -> TransitionsMode -> Bool
== :: TransitionsMode -> TransitionsMode -> Bool
$c== :: TransitionsMode -> TransitionsMode -> Bool
Eq, Int -> TransitionsMode -> ShowS
[TransitionsMode] -> ShowS
TransitionsMode -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TransitionsMode] -> ShowS
$cshowList :: [TransitionsMode] -> ShowS
show :: TransitionsMode -> String
$cshow :: TransitionsMode -> String
showsPrec :: Int -> TransitionsMode -> ShowS
$cshowsPrec :: Int -> TransitionsMode -> ShowS
Show)

-- |a function to turn a call into the dosage of non-reference alleles
callToDosage :: Char -> Char -> Call -> Maybe Int
callToDosage :: Char -> Char -> Call -> Maybe Int
callToDosage Char
refA Char
altA Call
call = case Call
call of
    HaploidCall Char
a | Char
a forall a. Eq a => a -> a -> Bool
== Char
refA -> forall a. a -> Maybe a
Just Int
0
                  | Char
a forall a. Eq a => a -> a -> Bool
== Char
altA -> forall a. a -> Maybe a
Just Int
1
                  | Bool
otherwise -> forall a. Maybe a
Nothing
    DiploidCall Char
a1 Char
a2 | (Char
a1, Char
a2) forall a. Eq a => a -> a -> Bool
== (Char
refA, Char
refA) -> forall a. a -> Maybe a
Just Int
0
                      | (Char
a1, Char
a2) forall a. Eq a => a -> a -> Bool
== (Char
refA, Char
altA) -> forall a. a -> Maybe a
Just Int
1
                      | (Char
a1, Char
a2) forall a. Eq a => a -> a -> Bool
== (Char
altA, Char
refA) -> forall a. a -> Maybe a
Just Int
1
                      | (Char
a1, Char
a2) forall a. Eq a => a -> a -> Bool
== (Char
altA, Char
altA) -> forall a. a -> Maybe a
Just Int
2
                      | Bool
otherwise                -> forall a. Maybe a
Nothing
    Call
MissingCall -> forall a. Maybe a
Nothing

-- |Make a call from alleles 
callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call
callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call
callGenotypeFromPileup CallingMode
mode Int
minDepth String
alleles =
    if forall (t :: * -> *) a. Foldable t => t a -> Int
length String
alleles forall a. Ord a => a -> a -> Bool
< Int
minDepth then forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall else
        case CallingMode
mode of
            MajorityCalling Bool
withDownsampling -> Bool -> Int -> String -> IO Call
callMajorityAllele Bool
withDownsampling Int
minDepth String
alleles
            CallingMode
RandomCalling -> String -> IO Call
callRandomAllele String
alleles
            CallingMode
RandomDiploidCalling -> String -> IO Call
callRandomDiploid String
alleles

-- |Sample the majority allele, or one of the majority alleles
callMajorityAllele :: Bool -> Int-> String -> IO Call
callMajorityAllele :: Bool -> Int -> String -> IO Call
callMajorityAllele Bool
withDownsampling Int
minDepth String
alleles = do
    Maybe String
maybeAlleles <- if Bool
withDownsampling
        then forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
minDepth
        else forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just String
alleles)
    case Maybe String
maybeAlleles of
        Maybe String
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
        Just String
alleles' -> do
            Char
a <- case ShowS
findMajorityAlleles String
alleles' of
                [] -> forall a. HasCallStack => String -> a
error String
"should not happen"
                [Char
a'] -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
a'
                String
listA -> do
                    Maybe String
r <- forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
listA Int
1
                    case Maybe String
r of
                        Just [Char
r'] -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
r'
                        Maybe String
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Call
HaploidCall Char
a

-- |Find the majority allele(s)
findMajorityAlleles :: String -> [Char]
findMajorityAlleles :: ShowS
findMajorityAlleles String
alleles =
    let groupedAlleles :: [(Int, Char)]
groupedAlleles = forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn forall a b. (a, b) -> a
fst [(forall (t :: * -> *) a. Foldable t => t a -> Int
length String
g, forall a. [a] -> a
head String
g) | String
g <- forall a. Eq a => [a] -> [[a]]
group forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => [a] -> [a]
sort forall a b. (a -> b) -> a -> b
$ String
alleles]
        majorityCount :: Int
majorityCount = forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
last forall a b. (a -> b) -> a -> b
$ [(Int, Char)]
groupedAlleles
    in  [Char
a | (Int
n, Char
a) <- [(Int, Char)]
groupedAlleles, Int
n forall a. Eq a => a -> a -> Bool
== Int
majorityCount]

-- |call a random allele
callRandomAllele :: String -> IO Call
callRandomAllele :: String -> IO Call
callRandomAllele String
alleles = do
    Maybe String
res <- forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
1
    case Maybe String
res of
        Maybe String
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
        Just [Char
a] -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Call
HaploidCall Char
a
        Maybe String
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"

-- |call two random alleles
callRandomDiploid :: String -> IO Call
callRandomDiploid :: String -> IO Call
callRandomDiploid String
alleles = do
    Maybe String
res <- forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
2
    case Maybe String
res of
        Maybe String
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
        Just [Char
a1, Char
a2] -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Char -> Call
DiploidCall Char
a1 Char
a2
        Maybe String
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"

filterTransitions :: (Monad m) => TransitionsMode -> Pipe FreqSumEntry FreqSumEntry m ()
filterTransitions :: forall (m :: * -> *).
Monad m =>
TransitionsMode -> Pipe FreqSumEntry FreqSumEntry m ()
filterTransitions TransitionsMode
transversionsMode =
    case TransitionsMode
transversionsMode of
        TransitionsMode
SkipTransitions ->
            forall (m :: * -> *) a r. Functor m => (a -> Bool) -> Pipe a a m r
P.filter (\(FreqSumEntry Chrom
_ Int
_ Maybe ByteString
_ Maybe Double
_ Char
ref Char
alt [Maybe Int]
_) -> Char -> Char -> Bool
isTransversion Char
ref Char
alt)
        TransitionsMode
TransitionsMissing ->
            forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\(FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
id_ Maybe Double
gpos Char
ref Char
alt [Maybe Int]
calls) ->
                let calls' :: [Maybe Int]
calls' = if Char -> Char -> Bool
isTransversion Char
ref Char
alt then [Maybe Int]
calls else [forall a. Maybe a
Nothing | Maybe Int
_ <- [Maybe Int]
calls]
                in  Chrom
-> Int
-> Maybe ByteString
-> Maybe Double
-> Char
-> Char
-> [Maybe Int]
-> FreqSumEntry
FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
id_ Maybe Double
gpos Char
ref Char
alt [Maybe Int]
calls')
        TransitionsMode
_ -> forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat
  where
    isTransversion :: Char -> Char -> Bool
isTransversion Char
ref Char
alt = Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isTransition Char
ref Char
alt
    isTransition :: Char -> Char -> Bool
isTransition Char
ref Char
alt =
        ((Char
ref forall a. Eq a => a -> a -> Bool
== Char
'A') Bool -> Bool -> Bool
&& (Char
alt forall a. Eq a => a -> a -> Bool
== Char
'G')) Bool -> Bool -> Bool
||
        ((Char
ref forall a. Eq a => a -> a -> Bool
== Char
'G') Bool -> Bool -> Bool
&& (Char
alt forall a. Eq a => a -> a -> Bool
== Char
'A')) Bool -> Bool -> Bool
||
        ((Char
ref forall a. Eq a => a -> a -> Bool
== Char
'C') Bool -> Bool -> Bool
&& (Char
alt forall a. Eq a => a -> a -> Bool
== Char
'T')) Bool -> Bool -> Bool
||
        ((Char
ref forall a. Eq a => a -> a -> Bool
== Char
'T') Bool -> Bool -> Bool
&& (Char
alt forall a. Eq a => a -> a -> Bool
== Char
'C'))

cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String]
cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String]
cleanSSdamageAllSamples Char
ref Char
alt [String]
basesPerSample [[Strand]]
strandPerSample
    | (Char
ref, Char
alt) forall a. Eq a => a -> a -> Bool
== (Char
'C', Char
'T') Bool -> Bool -> Bool
|| (Char
ref, Char
alt) forall a. Eq a => a -> a -> Bool
== (Char
'T', Char
'C') =
        [String -> [Strand] -> String
removeForwardBases String
bases [Strand]
strands | (String
bases, [Strand]
strands) <- forall a b. [a] -> [b] -> [(a, b)]
zip [String]
basesPerSample [[Strand]]
strandPerSample]
    | (Char
ref, Char
alt) forall a. Eq a => a -> a -> Bool
== (Char
'G', Char
'A') Bool -> Bool -> Bool
|| (Char
ref, Char
alt) forall a. Eq a => a -> a -> Bool
== (Char
'A', Char
'G') =
        [String -> [Strand] -> String
removeReverseBases String
bases [Strand]
strands | (String
bases, [Strand]
strands) <- forall a b. [a] -> [b] -> [(a, b)]
zip [String]
basesPerSample [[Strand]]
strandPerSample]
    | Bool
otherwise =
        [String]
basesPerSample
  where
    removeForwardBases :: String -> [Strand] -> String
removeForwardBases = Strand -> String -> [Strand] -> String
removeReads Strand
ForwardStrand
    removeReverseBases :: String -> [Strand] -> String
removeReverseBases = Strand -> String -> [Strand] -> String
removeReads Strand
ReverseStrand

removeReads :: Strand -> String -> [Strand] -> String
removeReads :: Strand -> String -> [Strand] -> String
removeReads Strand
strand String
bases [Strand]
strands = [Char
b | (Char
b, Strand
s) <- forall a b. [a] -> [b] -> [(a, b)]
zip String
bases [Strand]
strands, Strand
s forall a. Eq a => a -> a -> Bool
/= Strand
strand]