{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Pileup (readPileupFromStdIn, readPileupFromFile, PileupRow(..), Strand(..)) where

import Control.Monad.Catch (MonadThrow)
import Control.Monad.IO.Class (MonadIO)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Char (toUpper)
import Pipes (Producer)
import qualified Pipes.ByteString as PB
import Pipes.Safe (MonadSafe)

import SequenceFormats.Utils (Chrom(..), word, readFileProd, consumeProducer)

-- |A datatype to represent the strand orientation of a single base.
data Strand = ForwardStrand | ReverseStrand deriving (Strand -> Strand -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Strand -> Strand -> Bool
$c/= :: Strand -> Strand -> Bool
== :: Strand -> Strand -> Bool
$c== :: Strand -> Strand -> Bool
Eq, Int -> Strand -> ShowS
[Strand] -> ShowS
Strand -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Strand] -> ShowS
$cshowList :: [Strand] -> ShowS
show :: Strand -> String
$cshow :: Strand -> String
showsPrec :: Int -> Strand -> ShowS
$cshowsPrec :: Int -> Strand -> ShowS
Show)

-- |A datatype to represent a single pileup row for multiple individuals.
-- The constructor arguments are: Chromosome, Position, Refererence Allelele,
-- Pileup String per individual
data PileupRow = PileupRow {
    PileupRow -> Chrom
pileupChrom :: Chrom, -- ^The chromosome
    PileupRow -> Int
pileupPos :: Int, -- ^The position
    PileupRow -> Char
pileupRef :: Char, -- ^The reference base
    PileupRow -> [String]
pileupBases :: [String], -- ^The base string
    PileupRow -> [[Strand]]
pileupStrandInfo :: [[Strand]]
 } deriving (PileupRow -> PileupRow -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PileupRow -> PileupRow -> Bool
$c/= :: PileupRow -> PileupRow -> Bool
== :: PileupRow -> PileupRow -> Bool
$c== :: PileupRow -> PileupRow -> Bool
Eq, Int -> PileupRow -> ShowS
[PileupRow] -> ShowS
PileupRow -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PileupRow] -> ShowS
$cshowList :: [PileupRow] -> ShowS
show :: PileupRow -> String
$cshow :: PileupRow -> String
showsPrec :: Int -> PileupRow -> ShowS
$cshowsPrec :: Int -> PileupRow -> ShowS
Show)

-- |Read a pileup-formatted file from StdIn, for reading from an
-- external command `samtools mpileup`.
readPileupFromStdIn :: (MonadIO m, MonadThrow m) => Producer PileupRow m ()
readPileupFromStdIn :: forall (m :: * -> *).
(MonadIO m, MonadThrow m) =>
Producer PileupRow m ()
readPileupFromStdIn = forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser PileupRow
pileupParser forall (m :: * -> *). MonadIO m => Producer' ByteString m ()
PB.stdin

-- |Read pileup from a file.
readPileupFromFile :: (MonadSafe m) => FilePath -> Producer PileupRow m ()
readPileupFromFile :: forall (m :: * -> *).
MonadSafe m =>
String -> Producer PileupRow m ()
readPileupFromFile = forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser PileupRow
pileupParser forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd

pileupParser :: A.Parser PileupRow
pileupParser :: Parser PileupRow
pileupParser = do
    ByteString
chrom <- Parser ByteString
word
    Char
_ <- Parser Char
A.space
    Int
pos <- forall a. Integral a => Parser a
A.decimal
    Char
_ <- Parser Char
A.space
    Char
refA <- Char -> Char
toUpper forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNactgnM")
     -- for some reason, there is an M in the human reference at
     -- position 3:60830534 (both in hs37d5 and in hg19)
    Char
_ <- Parser Char
A.space
    [(String, [Strand])]
baseAndStrandEntries <- Char -> Parser ByteString (String, [Strand])
parsePileupPerSample Char
refA forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1`
        (Char -> Bool) -> Parser Char
A.satisfy (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\t')
    Parser ()
A.endOfLine
    let baseStrings :: [String]
baseStrings = forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(String, [Strand])]
baseAndStrandEntries
        strandInfoStrings :: [[Strand]]
strandInfoStrings = forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(String, [Strand])]
baseAndStrandEntries
    let ret :: PileupRow
ret = Chrom -> Int -> Char -> [String] -> [[Strand]] -> PileupRow
PileupRow (ByteString -> Chrom
Chrom ByteString
chrom) Int
pos Char
refA [String]
baseStrings [[Strand]]
strandInfoStrings
    --trace (show ret) $ return ret
    forall (m :: * -> *) a. Monad m => a -> m a
return PileupRow
ret
  where
    parsePileupPerSample :: Char -> Parser ByteString (String, [Strand])
parsePileupPerSample Char
refA =
        Char -> Int -> ByteString -> (String, [Strand])
processPileupEntry Char
refA forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
A.space forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString
word forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
A.space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString
word

processPileupEntry :: Char -> Int -> B.ByteString -> (String, [Strand])
processPileupEntry :: Char -> Int -> ByteString -> (String, [Strand])
processPileupEntry Char
refA Int
cov ByteString
readBaseString =
    if Int
cov forall a. Eq a => a -> a -> Bool
== Int
0 then (String
"", []) else
        let res :: [(Char, Strand)]
res = String -> [(Char, Strand)]
go (ByteString -> String
B.unpack ByteString
readBaseString)
        in  (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(Char, Strand)]
res, forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(Char, Strand)]
res)
  where
    go :: String -> [(Char, Strand)]
go (Char
x:String
xs)
        | Char
x forall a. Eq a => a -> a -> Bool
== Char
'.' = (Char
refA, Strand
ForwardStrand) forall a. a -> [a] -> [a]
: String -> [(Char, Strand)]
go String
xs
        | Char
x forall a. Eq a => a -> a -> Bool
== Char
',' = (Char
refA, Strand
ReverseStrand) forall a. a -> [a] -> [a]
: String -> [(Char, Strand)]
go String
xs
        | Char
x forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"ACTGN" :: String) = (Char
x, Strand
ForwardStrand) forall a. a -> [a] -> [a]
: String -> [(Char, Strand)]
go String
xs
        | Char
x forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"actgn" :: String) = (Char -> Char
toUpper Char
x, Strand
ReverseStrand) forall a. a -> [a] -> [a]
: String -> [(Char, Strand)]
go String
xs
        | Char
x forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"$*#<>" :: String) = String -> [(Char, Strand)]
go String
xs
        | Char
x forall a. Eq a => a -> a -> Bool
== Char
'^' = String -> [(Char, Strand)]
go (forall a. Int -> [a] -> [a]
drop Int
1 String
xs) -- skip the next character, which is the mapping quality 
        | Char
x forall a. Eq a => a -> a -> Bool
== Char
'+' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'-' =  -- insertions or deletions, followed by a decimal number
            case forall a. Read a => ReadS a
reads String
xs of
                [(Int
num, String
rest)] -> String -> [(Char, Strand)]
go (forall a. Int -> [a] -> [a]
drop Int
num String
rest)
                [(Int, String)]
_ -> forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"cannot parse read base string: " forall a. [a] -> [a] -> [a]
++ (Char
xforall a. a -> [a] -> [a]
:String
xs)
        | Bool
otherwise = forall a. HasCallStack => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"cannot parse read base string: " forall a. [a] -> [a] -> [a]
++ (Char
xforall a. a -> [a] -> [a]
:String
xs)
    go [] = []