{-# LANGUAGE OverloadedStrings #-}
module SequenceTools.PileupCaller (callToDosage, Call(..), callGenotypeFromPileup,
callMajorityAllele, findMajorityAlleles, callRandomAllele,
callRandomDiploid, CallingMode(..),
TransitionsMode(..), filterTransitions, cleanSSdamageAllSamples) where
import SequenceFormats.FreqSum (FreqSumEntry(..))
import SequenceFormats.Pileup (Strand(..))
import SequenceTools.Utils (sampleWithoutReplacement)
import Data.List (sortOn, group, sort)
import Pipes (Pipe, cat)
import qualified Pipes.Prelude as P
data Call = HaploidCall Char | DiploidCall Char Char | MissingCall deriving (Int -> Call -> ShowS
[Call] -> ShowS
Call -> String
(Int -> Call -> ShowS)
-> (Call -> String) -> ([Call] -> ShowS) -> Show Call
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Call] -> ShowS
$cshowList :: [Call] -> ShowS
show :: Call -> String
$cshow :: Call -> String
showsPrec :: Int -> Call -> ShowS
$cshowsPrec :: Int -> Call -> ShowS
Show, Call -> Call -> Bool
(Call -> Call -> Bool) -> (Call -> Call -> Bool) -> Eq Call
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Call -> Call -> Bool
$c/= :: Call -> Call -> Bool
== :: Call -> Call -> Bool
$c== :: Call -> Call -> Bool
Eq)
data CallingMode = MajorityCalling Bool | RandomCalling | RandomDiploidCalling deriving (Int -> CallingMode -> ShowS
[CallingMode] -> ShowS
CallingMode -> String
(Int -> CallingMode -> ShowS)
-> (CallingMode -> String)
-> ([CallingMode] -> ShowS)
-> Show CallingMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CallingMode] -> ShowS
$cshowList :: [CallingMode] -> ShowS
show :: CallingMode -> String
$cshow :: CallingMode -> String
showsPrec :: Int -> CallingMode -> ShowS
$cshowsPrec :: Int -> CallingMode -> ShowS
Show)
data TransitionsMode = TransitionsMissing | SkipTransitions | SingleStrandMode | AllSites deriving (TransitionsMode -> TransitionsMode -> Bool
(TransitionsMode -> TransitionsMode -> Bool)
-> (TransitionsMode -> TransitionsMode -> Bool)
-> Eq TransitionsMode
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TransitionsMode -> TransitionsMode -> Bool
$c/= :: TransitionsMode -> TransitionsMode -> Bool
== :: TransitionsMode -> TransitionsMode -> Bool
$c== :: TransitionsMode -> TransitionsMode -> Bool
Eq, Int -> TransitionsMode -> ShowS
[TransitionsMode] -> ShowS
TransitionsMode -> String
(Int -> TransitionsMode -> ShowS)
-> (TransitionsMode -> String)
-> ([TransitionsMode] -> ShowS)
-> Show TransitionsMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TransitionsMode] -> ShowS
$cshowList :: [TransitionsMode] -> ShowS
show :: TransitionsMode -> String
$cshow :: TransitionsMode -> String
showsPrec :: Int -> TransitionsMode -> ShowS
$cshowsPrec :: Int -> TransitionsMode -> ShowS
Show)
callToDosage :: Char -> Char -> Call -> Maybe Int
callToDosage :: Char -> Char -> Call -> Maybe Int
callToDosage Char
refA Char
altA Call
call = case Call
call of
HaploidCall Char
a | Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
refA -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
| Char
a Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
altA -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1
| Bool
otherwise -> Maybe Int
forall a. Maybe a
Nothing
DiploidCall Char
a1 Char
a2 | (Char
a1, Char
a2) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
refA, Char
refA) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
| (Char
a1, Char
a2) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
refA, Char
altA) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1
| (Char
a1, Char
a2) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
altA, Char
refA) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1
| (Char
a1, Char
a2) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
altA, Char
altA) -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
2
| Bool
otherwise -> Maybe Int
forall a. Maybe a
Nothing
Call
MissingCall -> Maybe Int
forall a. Maybe a
Nothing
callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call
callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call
callGenotypeFromPileup CallingMode
mode Int
minDepth String
alleles =
if String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
alleles Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
minDepth then Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall else
case CallingMode
mode of
MajorityCalling Bool
withDownsampling -> Bool -> Int -> String -> IO Call
callMajorityAllele Bool
withDownsampling Int
minDepth String
alleles
CallingMode
RandomCalling -> String -> IO Call
callRandomAllele String
alleles
CallingMode
RandomDiploidCalling -> String -> IO Call
callRandomDiploid String
alleles
callMajorityAllele :: Bool -> Int-> String -> IO Call
callMajorityAllele :: Bool -> Int -> String -> IO Call
callMajorityAllele Bool
withDownsampling Int
minDepth String
alleles = do
Maybe String
maybeAlleles <- if Bool
withDownsampling
then String -> Int -> IO (Maybe String)
forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
minDepth
else Maybe String -> IO (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Maybe String
forall a. a -> Maybe a
Just String
alleles)
case Maybe String
maybeAlleles of
Maybe String
Nothing -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
Just String
alleles' -> do
Char
a <- case ShowS
findMajorityAlleles String
alleles' of
[] -> String -> IO Char
forall a. HasCallStack => String -> a
error String
"should not happen"
[Char
a'] -> Char -> IO Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
a'
String
listA -> do
Maybe String
r <- String -> Int -> IO (Maybe String)
forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
listA Int
1
case Maybe String
r of
Just [Char
r'] -> Char -> IO Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
r'
Maybe String
_ -> String -> IO Char
forall a. HasCallStack => String -> a
error String
"should not happen"
Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return (Call -> IO Call) -> Call -> IO Call
forall a b. (a -> b) -> a -> b
$ Char -> Call
HaploidCall Char
a
findMajorityAlleles :: String -> [Char]
findMajorityAlleles :: ShowS
findMajorityAlleles String
alleles =
let groupedAlleles :: [(Int, Char)]
groupedAlleles = ((Int, Char) -> Int) -> [(Int, Char)] -> [(Int, Char)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (Int, Char) -> Int
forall a b. (a, b) -> a
fst [(String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
g, String -> Char
forall a. [a] -> a
head String
g) | String
g <- String -> [String]
forall a. Eq a => [a] -> [[a]]
group (String -> [String]) -> ShowS -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. Ord a => [a] -> [a]
sort (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ String
alleles]
majorityCount :: Int
majorityCount = (Int, Char) -> Int
forall a b. (a, b) -> a
fst ((Int, Char) -> Int)
-> ([(Int, Char)] -> (Int, Char)) -> [(Int, Char)] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Char)] -> (Int, Char)
forall a. [a] -> a
last ([(Int, Char)] -> Int) -> [(Int, Char)] -> Int
forall a b. (a -> b) -> a -> b
$ [(Int, Char)]
groupedAlleles
in [Char
a | (Int
n, Char
a) <- [(Int, Char)]
groupedAlleles, Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
majorityCount]
callRandomAllele :: String -> IO Call
callRandomAllele :: String -> IO Call
callRandomAllele String
alleles = do
Maybe String
res <- String -> Int -> IO (Maybe String)
forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
1
case Maybe String
res of
Maybe String
Nothing -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
Just [Char
a] -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return (Call -> IO Call) -> Call -> IO Call
forall a b. (a -> b) -> a -> b
$ Char -> Call
HaploidCall Char
a
Maybe String
_ -> String -> IO Call
forall a. HasCallStack => String -> a
error String
"should not happen"
callRandomDiploid :: String -> IO Call
callRandomDiploid :: String -> IO Call
callRandomDiploid String
alleles = do
Maybe String
res <- String -> Int -> IO (Maybe String)
forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement String
alleles Int
2
case Maybe String
res of
Maybe String
Nothing -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return Call
MissingCall
Just [Char
a1, Char
a2] -> Call -> IO Call
forall (m :: * -> *) a. Monad m => a -> m a
return (Call -> IO Call) -> Call -> IO Call
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Call
DiploidCall Char
a1 Char
a2
Maybe String
_ -> String -> IO Call
forall a. HasCallStack => String -> a
error String
"should not happen"
filterTransitions :: (Monad m) => TransitionsMode -> Pipe FreqSumEntry FreqSumEntry m ()
filterTransitions :: TransitionsMode -> Pipe FreqSumEntry FreqSumEntry m ()
filterTransitions TransitionsMode
transversionsMode =
case TransitionsMode
transversionsMode of
TransitionsMode
SkipTransitions ->
(FreqSumEntry -> Bool) -> Pipe FreqSumEntry FreqSumEntry m ()
forall (m :: * -> *) a r. Functor m => (a -> Bool) -> Pipe a a m r
P.filter (\(FreqSumEntry Chrom
_ Int
_ Maybe ByteString
_ Maybe Double
_ Char
ref Char
alt [Maybe Int]
_) -> Char -> Char -> Bool
isTransversion Char
ref Char
alt)
TransitionsMode
TransitionsMissing ->
(FreqSumEntry -> FreqSumEntry)
-> Pipe FreqSumEntry FreqSumEntry m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\(FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
id_ Maybe Double
gpos Char
ref Char
alt [Maybe Int]
calls) ->
let calls' :: [Maybe Int]
calls' = if Char -> Char -> Bool
isTransversion Char
ref Char
alt then [Maybe Int]
calls else [Maybe Int
forall a. Maybe a
Nothing | Maybe Int
_ <- [Maybe Int]
calls]
in Chrom
-> Int
-> Maybe ByteString
-> Maybe Double
-> Char
-> Char
-> [Maybe Int]
-> FreqSumEntry
FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
id_ Maybe Double
gpos Char
ref Char
alt [Maybe Int]
calls')
TransitionsMode
_ -> Pipe FreqSumEntry FreqSumEntry m ()
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat
where
isTransversion :: Char -> Char -> Bool
isTransversion Char
ref Char
alt = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Char -> Char -> Bool
isTransition Char
ref Char
alt
isTransition :: Char -> Char -> Bool
isTransition Char
ref Char
alt =
((Char
ref Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'A') Bool -> Bool -> Bool
&& (Char
alt Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'G')) Bool -> Bool -> Bool
||
((Char
ref Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'G') Bool -> Bool -> Bool
&& (Char
alt Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'A')) Bool -> Bool -> Bool
||
((Char
ref Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'C') Bool -> Bool -> Bool
&& (Char
alt Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'T')) Bool -> Bool -> Bool
||
((Char
ref Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'T') Bool -> Bool -> Bool
&& (Char
alt Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'C'))
cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String]
cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String]
cleanSSdamageAllSamples Char
ref Char
alt [String]
basesPerSample [[Strand]]
strandPerSample
| (Char
ref, Char
alt) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
'C', Char
'T') Bool -> Bool -> Bool
|| (Char
ref, Char
alt) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
'T', Char
'C') =
[String -> [Strand] -> String
removeForwardBases String
bases [Strand]
strands | (String
bases, [Strand]
strands) <- [String] -> [[Strand]] -> [(String, [Strand])]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
basesPerSample [[Strand]]
strandPerSample]
| (Char
ref, Char
alt) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
'G', Char
'A') Bool -> Bool -> Bool
|| (Char
ref, Char
alt) (Char, Char) -> (Char, Char) -> Bool
forall a. Eq a => a -> a -> Bool
== (Char
'A', Char
'G') =
[String -> [Strand] -> String
removeReverseBases String
bases [Strand]
strands | (String
bases, [Strand]
strands) <- [String] -> [[Strand]] -> [(String, [Strand])]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
basesPerSample [[Strand]]
strandPerSample]
| Bool
otherwise =
[String]
basesPerSample
where
removeForwardBases :: String -> [Strand] -> String
removeForwardBases = Strand -> String -> [Strand] -> String
removeReads Strand
ForwardStrand
removeReverseBases :: String -> [Strand] -> String
removeReverseBases = Strand -> String -> [Strand] -> String
removeReads Strand
ReverseStrand
removeReads :: Strand -> String -> [Strand] -> String
removeReads :: Strand -> String -> [Strand] -> String
removeReads Strand
strand String
bases [Strand]
strands = [Char
b | (Char
b, Strand
s) <- String -> [Strand] -> [(Char, Strand)]
forall a b. [a] -> [b] -> [(a, b)]
zip String
bases [Strand]
strands, Strand
s Strand -> Strand -> Bool
forall a. Eq a => a -> a -> Bool
/= Strand
strand]