{-# LANGUAGE OverloadedStrings #-}
module SequenceTools.PileupCaller (callToDosage, Call(..), callGenotypeFromPileup,
callMajorityAllele, findMajorityAlleles, callRandomAllele,
callRandomDiploid, CallingMode(..),
TransitionsMode(..), filterTransitions, cleanSSdamageAllSamples) where
import SequenceFormats.FreqSum (FreqSumEntry(..))
import SequenceFormats.Pileup (Strand(..))
import SequenceTools.Utils (sampleWithoutReplacement)
import Data.List (sortOn, group, sort)
import Pipes (Pipe, cat)
import qualified Pipes.Prelude as P
data Call = HaploidCall Char | DiploidCall Char Char | MissingCall deriving (Int -> Call -> ShowS
[Call] -> ShowS
Call -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Call] -> ShowS
$cshowList :: [Call] -> ShowS
show :: Call -> String
$cshow :: Call -> String
showsPrec :: Int -> Call -> ShowS
$cshowsPrec :: Int -> Call -> ShowS
Show, Call -> Call -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Call -> Call -> Bool
$c/= :: Call -> Call -> Bool
== :: Call -> Call -> Bool
$c== :: Call -> Call -> Bool
Eq)
data CallingMode = MajorityCalling Bool | RandomCalling | RandomDiploidCalling deriving (Int -> CallingMode -> ShowS
[CallingMode] -> ShowS
CallingMode -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CallingMode] -> ShowS
$cshowList :: [CallingMode] -> ShowS
show :: CallingMode -> String
$cshow :: CallingMode -> String
showsPrec :: Int -> CallingMode -> ShowS
$cshowsPrec :: Int -> CallingMode -> ShowS
Show)
data TransitionsMode = TransitionsMissing | SkipTransitions | SingleStrandMode | AllSites deriving (TransitionsMode -> TransitionsMode -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TransitionsMode -> TransitionsMode -> Bool
$c/= :: TransitionsMode -> TransitionsMode -> Bool
== :: TransitionsMode -> TransitionsMode -> Bool
$c== :: TransitionsMode -> TransitionsMode -> Bool
Eq, Int -> TransitionsMode -> ShowS
[TransitionsMode] -> ShowS
TransitionsMode -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TransitionsMode] -> ShowS
$cshowList :: [TransitionsMode] -> ShowS
show :: TransitionsMode -> String
$cshow :: TransitionsMode -> String
showsPrec :: Int -> TransitionsMode -> ShowS
$cshowsPrec :: Int -> TransitionsMode -> ShowS
Show)
callToDosage :: Char -> Char -> Call -> Maybe Int
callToDosage :: Char -> Char -> Call -> Maybe Int
callToDosage Char
refA Char
altA Call
call = case Call
call of
HaploidCall Char
a | Char
a forall a. Eq a => a -> a -> Bool
== Char
refA -> forall a. a -> Maybe a
Just Int
0
| Char
a forall a. Eq a => a -> a -> Bool
== Char
altA -> forall a. a -> Maybe a
Just Int
1
| Bool
otherwise -> forall a. Maybe a
Nothing
DiploidCall Char
a1 Char
a2 | (Char
a1, Char
a2) forall a. Eq a => a -> a -> Bool
== (Char
refA, Char
refA) -> forall a. a -> Maybe a
Just Int
0
| (Char
a1, Char
a2) forall a. Eq a => a -> a -> Bool
== (Char
refA, Char
altA) -> forall a. a -> Maybe a
Just Int
1
| (Char
a1, Char
a2) forall a. Eq a => a -> a -> Bool
== (Char
altA, Char
refA) -> forall a. a -> Maybe a
Just Int
1
| (Char
a1, Char
a2) forall a. Eq a => a -> a -> Bool
== (Char
altA, Char
altA) -> forall a. a -> Maybe a
Just Int
2
| Bool
otherwise -> forall a. Maybe a
Nothing
Call
MissingCall -> forall a. Maybe a
Nothing
callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call
callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call
callGenotypeFromPileup CallingMode
mode Int
minDepth String
alleles =
if forall (t :: * -> *) a. Foldable t => t a -> Int
length String
alleles forall a. Ord a => a -> a -> Bool
< Int
minDepth then forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall else
case CallingMode
mode of
MajorityCalling Bool
withDownsampling -> Bool -> Int -> String -> IO Call
callMajorityAllele Bool
withDownsampling Int
minDepth String
alleles
CallingMode
RandomCalling -> String -> IO Call
callRandomAllele String
alleles
CallingMode
RandomDiploidCalling -> String -> IO Call
callRandomDiploid String
alleles
callMajorityAllele :: Bool -> Int-> String -> IO Call
callMajorityAllele :: Bool -> Int -> String -> IO Call
callMajorityAllele Bool
withDownsampling Int
minDepth String
alleles = do
Maybe String
maybeAlleles <- if Bool
withDownsampling
then forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
minDepth
else forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just String
alleles)
case Maybe String
maybeAlleles of
Maybe String
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
Just String
alleles' -> do
Char
a <- case ShowS
findMajorityAlleles String
alleles' of
[] -> forall a. HasCallStack => String -> a
error String
"should not happen"
[Char
a'] -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
a'
String
listA -> do
Maybe String
r <- forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
listA Int
1
case Maybe String
r of
Just [Char
r'] -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
r'
Maybe String
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Call
HaploidCall Char
a
findMajorityAlleles :: String -> [Char]
findMajorityAlleles :: ShowS
findMajorityAlleles String
alleles =
let groupedAlleles :: [(Int, Char)]
groupedAlleles = forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn forall a b. (a, b) -> a
fst [(forall (t :: * -> *) a. Foldable t => t a -> Int
length String
g, forall a. [a] -> a
head String
g) | String
g <- forall a. Eq a => [a] -> [[a]]
group forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => [a] -> [a]
sort forall a b. (a -> b) -> a -> b
$ String
alleles]
majorityCount :: Int
majorityCount = forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
last forall a b. (a -> b) -> a -> b
$ [(Int, Char)]
groupedAlleles
in [Char
a | (Int
n, Char
a) <- [(Int, Char)]
groupedAlleles, Int
n forall a. Eq a => a -> a -> Bool
== Int
majorityCount]
callRandomAllele :: String -> IO Call
callRandomAllele :: String -> IO Call
callRandomAllele String
alleles = do
Maybe String
res <- forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
1
case Maybe String
res of
Maybe String
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
Just [Char
a] -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Call
HaploidCall Char
a
Maybe String
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"
callRandomDiploid :: String -> IO Call
callRandomDiploid :: String -> IO Call
callRandomDiploid String
alleles = do
Maybe String
res <- forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
2
case Maybe String
res of
Maybe String
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
Just [Char
a1, Char
a2] -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Char -> Call
DiploidCall Char
a1 Char
a2
Maybe String
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"
filterTransitions :: (Monad m) => TransitionsMode -> Pipe FreqSumEntry FreqSumEntry m ()
filterTransitions :: forall (m :: * -> *).
Monad m =>
TransitionsMode -> Pipe FreqSumEntry FreqSumEntry m ()
filterTransitions TransitionsMode
transversionsMode =
case TransitionsMode
transversionsMode of
TransitionsMode
SkipTransitions ->
forall (m :: * -> *) a r. Functor m => (a -> Bool) -> Pipe a a m r
P.filter (\(FreqSumEntry Chrom
_ Int
_ Maybe ByteString
_ Maybe Double
_ Char
ref Char
alt [Maybe Int]
_) -> Char -> Char -> Bool
isTransversion Char
ref Char
alt)
TransitionsMode
TransitionsMissing ->
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\(FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
id_ Maybe Double
gpos Char
ref Char
alt [Maybe Int]
calls) ->
let calls' :: [Maybe Int]
calls' = if Char -> Char -> Bool
isTransversion Char
ref Char
alt then [Maybe Int]
calls else [forall a. Maybe a
Nothing | Maybe Int
_ <- [Maybe Int]
calls]
in Chrom
-> Int
-> Maybe ByteString
-> Maybe Double
-> Char
-> Char
-> [Maybe Int]
-> FreqSumEntry
FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
id_ Maybe Double
gpos Char
ref Char
alt [Maybe Int]
calls')
TransitionsMode
_ -> forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat
where
isTransversion :: Char -> Char -> Bool
isTransversion Char
ref Char
alt = Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isTransition Char
ref Char
alt
isTransition :: Char -> Char -> Bool
isTransition Char
ref Char
alt =
((Char
ref forall a. Eq a => a -> a -> Bool
== Char
'A') Bool -> Bool -> Bool
&& (Char
alt forall a. Eq a => a -> a -> Bool
== Char
'G')) Bool -> Bool -> Bool
||
((Char
ref forall a. Eq a => a -> a -> Bool
== Char
'G') Bool -> Bool -> Bool
&& (Char
alt forall a. Eq a => a -> a -> Bool
== Char
'A')) Bool -> Bool -> Bool
||
((Char
ref forall a. Eq a => a -> a -> Bool
== Char
'C') Bool -> Bool -> Bool
&& (Char
alt forall a. Eq a => a -> a -> Bool
== Char
'T')) Bool -> Bool -> Bool
||
((Char
ref forall a. Eq a => a -> a -> Bool
== Char
'T') Bool -> Bool -> Bool
&& (Char
alt forall a. Eq a => a -> a -> Bool
== Char
'C'))
cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String]
cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String]
cleanSSdamageAllSamples Char
ref Char
alt [String]
basesPerSample [[Strand]]
strandPerSample
| (Char
ref, Char
alt) forall a. Eq a => a -> a -> Bool
== (Char
'C', Char
'T') Bool -> Bool -> Bool
|| (Char
ref, Char
alt) forall a. Eq a => a -> a -> Bool
== (Char
'T', Char
'C') =
[String -> [Strand] -> String
removeForwardBases String
bases [Strand]
strands | (String
bases, [Strand]
strands) <- forall a b. [a] -> [b] -> [(a, b)]
zip [String]
basesPerSample [[Strand]]
strandPerSample]
| (Char
ref, Char
alt) forall a. Eq a => a -> a -> Bool
== (Char
'G', Char
'A') Bool -> Bool -> Bool
|| (Char
ref, Char
alt) forall a. Eq a => a -> a -> Bool
== (Char
'A', Char
'G') =
[String -> [Strand] -> String
removeReverseBases String
bases [Strand]
strands | (String
bases, [Strand]
strands) <- forall a b. [a] -> [b] -> [(a, b)]
zip [String]
basesPerSample [[Strand]]
strandPerSample]
| Bool
otherwise =
[String]
basesPerSample
where
removeForwardBases :: String -> [Strand] -> String
removeForwardBases = Strand -> String -> [Strand] -> String
removeReads Strand
ForwardStrand
removeReverseBases :: String -> [Strand] -> String
removeReverseBases = Strand -> String -> [Strand] -> String
removeReads Strand
ReverseStrand
removeReads :: Strand -> String -> [Strand] -> String
removeReads :: Strand -> String -> [Strand] -> String
removeReads Strand
strand String
bases [Strand]
strands = [Char
b | (Char
b, Strand
s) <- forall a b. [a] -> [b] -> [(a, b)]
zip String
bases [Strand]
strands, Strand
s forall a. Eq a => a -> a -> Bool
/= Strand
strand]