{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Pileup (readPileupFromStdIn, readPileupFromFile, PileupRow(..), Strand(..)) where
import Control.Monad.Catch (MonadThrow)
import Control.Monad.IO.Class (MonadIO)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Char (toUpper)
import Pipes (Producer)
import qualified Pipes.ByteString as PB
import Pipes.Safe (MonadSafe)
import SequenceFormats.Utils (Chrom(..), word, readFileProd, consumeProducer)
data Strand = ForwardStrand | ReverseStrand deriving (Eq, Show)
data PileupRow = PileupRow {
pileupChrom :: Chrom,
pileupPos :: Int,
pileupRef :: Char,
pileupBases :: [String],
pileupStrandInfo :: [[Strand]]
} deriving (Eq, Show)
readPileupFromStdIn :: (MonadIO m, MonadThrow m) => Producer PileupRow m ()
readPileupFromStdIn = consumeProducer pileupParser PB.stdin
readPileupFromFile :: (MonadSafe m) => FilePath -> Producer PileupRow m ()
readPileupFromFile = consumeProducer pileupParser . readFileProd
pileupParser :: A.Parser PileupRow
pileupParser = do
chrom <- word
_ <- A.space
pos <- A.decimal
_ <- A.space
refA <- A.satisfy (A.inClass "ACTGNactgnM")
_ <- A.space
baseAndStrandEntries <- parsePileupPerSample refA `A.sepBy1`
A.satisfy (\c -> c == ' ' || c == '\t')
A.endOfLine
let baseStrings = map fst baseAndStrandEntries
strandInfoStrings = map snd baseAndStrandEntries
let ret = PileupRow (Chrom chrom) pos refA baseStrings strandInfoStrings
return ret
where
parsePileupPerSample refA =
processPileupEntry refA <$> A.decimal <* A.space <*> word <* A.space <* word
processPileupEntry :: Char -> Int -> B.ByteString -> (String, [Strand])
processPileupEntry refA cov readBaseString =
if cov == 0 then ("", []) else
let res = go (B.unpack readBaseString)
in (map fst res, map snd res)
where
go (x:xs)
| x == '.' = (refA, ForwardStrand) : go xs
| x == ',' = (refA, ReverseStrand) : go xs
| x `elem` ("ACTGN" :: String) = (x, ForwardStrand) : go xs
| x `elem` ("actgn" :: String) = (toUpper x, ReverseStrand) : go xs
| x `elem` ("$*" :: String) = go xs
| x == '^' = go (drop 1 xs)
| (x == '+' || x == '-') =
let [(num, rest)] = reads xs in go (drop num rest)
| otherwise = error $ "cannot parse read base string: " ++ (x:xs)
go [] = []