{-# LANGUAGE OverloadedStrings #-}

{-| A module to help with parsing VCF files. The VCF format is defined here:
<https://en.wikipedia.org/wiki/Variant_Call_Format>
-}

module SequenceFormats.VCF (VCFheader(..),
                     VCFentry(..),
                     readVCFfromStdIn,
                     readVCFfromFile,
                     readVCFfromProd,
                     getGenotypes,
                     getDosages,
                     isTransversionSnp,
                     vcfToFreqSumEntry,
                     isBiallelicSnp) where

import SequenceFormats.Utils (consumeProducer, Chrom(..),
    readFileProd, SeqFormatException(..), word)
import SequenceFormats.FreqSum (FreqSumEntry(..))

import Control.Applicative ((<|>))
import Control.Error (headErr, assertErr)
import Control.Monad (void)
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.Trans.State.Strict (runStateT)
import Control.Monad.IO.Class (MonadIO)
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.Char (isSpace)
import qualified Data.ByteString.Char8 as B
import Pipes (Producer)
import Pipes.Attoparsec (parse)
import Pipes.Safe (MonadSafe)
import qualified Pipes.ByteString as PB

-- |A datatype to represent the VCF Header. Most comments are simply parsed as entire lines, but the very last comment line, containing the sample names, is separated out
data VCFheader = VCFheader {
    VCFheader -> [String]
vcfHeaderComments :: [String], -- ^A list of containing all comments starting with a single '#'
    VCFheader -> [String]
vcfSampleNames :: [String] -- ^The list of sample names parsed from the last comment line 
                             -- starting with '##'
} deriving (Int -> VCFheader -> ShowS
[VCFheader] -> ShowS
VCFheader -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [VCFheader] -> ShowS
$cshowList :: [VCFheader] -> ShowS
show :: VCFheader -> String
$cshow :: VCFheader -> String
showsPrec :: Int -> VCFheader -> ShowS
$cshowsPrec :: Int -> VCFheader -> ShowS
Show)

-- |A Datatype representing a single VCF entry.
data VCFentry = VCFentry {
    VCFentry -> Chrom
vcfChrom :: Chrom, -- ^The chromosome
    VCFentry -> Int
vcfPos :: Int, -- ^The position
    VCFentry -> Maybe ByteString
vcfId :: Maybe B.ByteString, -- ^The SNP ID if non-missing
    VCFentry -> ByteString
vcfRef :: B.ByteString, -- ^ The reference allele (supports also multi-character alleles for Indels)
    VCFentry -> [ByteString]
vcfAlt :: [B.ByteString], -- ^The alternative alleles, each one possible of multiple characters 
    VCFentry -> Maybe Double
vcfQual :: Maybe Double, -- ^The quality value
    VCFentry -> Maybe ByteString
vcfFilter :: Maybe B.ByteString, -- ^The Filter value, if non-missing.
    VCFentry -> [ByteString]
vcfInfo :: [B.ByteString], -- ^A list of Info fields
    VCFentry -> [ByteString]
vcfFormatString :: [B.ByteString], -- ^A list of format tags
    VCFentry -> [[ByteString]]
vcfGenotypeInfo :: [[B.ByteString]] -- ^A list of format fields for each sample.
} deriving (Int -> VCFentry -> ShowS
[VCFentry] -> ShowS
VCFentry -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [VCFentry] -> ShowS
$cshowList :: [VCFentry] -> ShowS
show :: VCFentry -> String
$cshow :: VCFentry -> String
showsPrec :: Int -> VCFentry -> ShowS
$cshowsPrec :: Int -> VCFentry -> ShowS
Show, VCFentry -> VCFentry -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: VCFentry -> VCFentry -> Bool
$c/= :: VCFentry -> VCFentry -> Bool
== :: VCFentry -> VCFentry -> Bool
$c== :: VCFentry -> VCFentry -> Bool
Eq)

-- |reads a VCFheader and VCFentries from a text producer.
readVCFfromProd :: (MonadThrow m) =>
    Producer B.ByteString m () -> m (VCFheader, Producer VCFentry m ())
readVCFfromProd :: forall (m :: * -> *).
MonadThrow m =>
Producer ByteString m () -> m (VCFheader, Producer VCFentry m ())
readVCFfromProd Producer ByteString m ()
prod = do
    (Maybe (Either ParsingError VCFheader)
res, Producer ByteString m ()
rest) <- forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT (forall (m :: * -> *) a b.
(Monad m, ParserInput a) =>
Parser a b -> Parser a m (Maybe (Either ParsingError b))
parse Parser VCFheader
vcfHeaderParser) Producer ByteString m ()
prod
    VCFheader
header <- case Maybe (Either ParsingError VCFheader)
res of
        Maybe (Either ParsingError VCFheader)
Nothing -> forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ String -> SeqFormatException
SeqFormatException String
"freqSum file exhausted"
        Just (Left ParsingError
e) -> forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (String -> SeqFormatException
SeqFormatException (forall a. Show a => a -> String
show ParsingError
e))
        Just (Right VCFheader
h) -> forall (m :: * -> *) a. Monad m => a -> m a
return VCFheader
h
    forall (m :: * -> *) a. Monad m => a -> m a
return (VCFheader
header, forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser VCFentry
vcfEntryParser Producer ByteString m ()
rest)

-- |Reading a VCF from StdIn. Returns a VCFHeader and a Producer over VCFentries.
readVCFfromStdIn :: (MonadIO m, MonadThrow m) => m (VCFheader, Producer VCFentry m ())
readVCFfromStdIn :: forall (m :: * -> *).
(MonadIO m, MonadThrow m) =>
m (VCFheader, Producer VCFentry m ())
readVCFfromStdIn = forall (m :: * -> *).
MonadThrow m =>
Producer ByteString m () -> m (VCFheader, Producer VCFentry m ())
readVCFfromProd forall (m :: * -> *). MonadIO m => Producer' ByteString m ()
PB.stdin

-- |Reading a VCF from a file. Returns a VCFHeader and a Producer over VCFentries.
readVCFfromFile :: (MonadSafe m) => FilePath -> m (VCFheader, Producer VCFentry m ())
readVCFfromFile :: forall (m :: * -> *).
MonadSafe m =>
String -> m (VCFheader, Producer VCFentry m ())
readVCFfromFile = forall (m :: * -> *).
MonadThrow m =>
Producer ByteString m () -> m (VCFheader, Producer VCFentry m ())
readVCFfromProd forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd

vcfHeaderParser :: A.Parser VCFheader
vcfHeaderParser :: Parser VCFheader
vcfHeaderParser = [String] -> [String] -> VCFheader
VCFheader forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
A.many1' Parser ByteString String
doubleCommentLine forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString [String]
singleCommentLine
  where
    doubleCommentLine :: Parser ByteString String
doubleCommentLine = do
        ByteString
c1 <- ByteString -> Parser ByteString
A.string ByteString
"##"
        ByteString
s_ <- (Char -> Bool) -> Parser ByteString
A.takeWhile1 (forall a. Eq a => a -> a -> Bool
/=Char
'\n')
        Parser ()
A.endOfLine
        forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
B.unpack forall a b. (a -> b) -> a -> b
$ ByteString
c1 forall a. Semigroup a => a -> a -> a
<> ByteString
s_
    singleCommentLine :: Parser ByteString [String]
singleCommentLine = do
        forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
A.char Char
'#'
        ByteString
s_ <- (Char -> Bool) -> Parser ByteString
A.takeWhile1 (forall a. Eq a => a -> a -> Bool
/=Char
'\n')
        Parser ()
A.endOfLine
        let fields :: [ByteString]
fields = (Char -> Bool) -> ByteString -> [ByteString]
B.splitWith (forall a. Eq a => a -> a -> Bool
==Char
'\t') ByteString
s_
        forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Int -> [a] -> [a]
drop Int
9 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map ByteString -> String
B.unpack forall a b. (a -> b) -> a -> b
$ [ByteString]
fields

vcfEntryParser :: A.Parser VCFentry
vcfEntryParser :: Parser VCFentry
vcfEntryParser = Parser VCFentry
vcfEntryParserFull forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser VCFentry
vcfEntryParserTruncated
  where
    vcfEntryParserFull :: Parser VCFentry
vcfEntryParserFull = Chrom
-> Int
-> Maybe ByteString
-> ByteString
-> [ByteString]
-> Maybe Double
-> Maybe ByteString
-> [ByteString]
-> [ByteString]
-> [[ByteString]]
-> VCFentry
VCFentry forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Chrom
Chrom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
word) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString (Maybe ByteString)
parseId forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*
        Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString
word forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString [ByteString]
parseAlternativeAlleles forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString (Maybe Double)
parseQual forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString (Maybe ByteString)
parseFilter forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* 
        Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString [ByteString]
parseInfoFields forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString [ByteString]
parseFormatStrings forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString [[ByteString]]
parseGenotypeInfos forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* 
        Parser ()
A.endOfLine
    vcfEntryParserTruncated :: Parser VCFentry
vcfEntryParserTruncated = Chrom
-> Int
-> Maybe ByteString
-> ByteString
-> [ByteString]
-> Maybe Double
-> Maybe ByteString
-> [ByteString]
-> [ByteString]
-> [[ByteString]]
-> VCFentry
VCFentry forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Chrom
Chrom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
word) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString (Maybe ByteString)
parseId forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*
        Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString
word forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString [ByteString]
parseAlternativeAlleles forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString (Maybe Double)
parseQual forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString (Maybe ByteString)
parseFilter forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*
        Parser Char
sp forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString [ByteString]
parseInfoFields forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Applicative f => a -> f a
pure [] forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Applicative f => a -> f a
pure [] forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.endOfLine
    sp :: Parser Char
sp = (Char -> Bool) -> Parser Char
A.satisfy (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\t')
    parseId :: Parser ByteString (Maybe ByteString)
parseId = (Parser Char
parseDot forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
word)
    parseDot :: Parser Char
parseDot = Char -> Parser Char
A.char Char
'.'
    parseAlternativeAlleles :: Parser ByteString [ByteString]
parseAlternativeAlleles = (Parser Char
parseDot forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure []) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ByteString
parseAllele forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser Char
A.char Char
',')
    parseAllele :: Parser ByteString
parseAllele = (Char -> Bool) -> Parser ByteString
A.takeTill (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
',' Bool -> Bool -> Bool
|| Char -> Bool
isSpace Char
c)
    parseQual :: Parser ByteString (Maybe Double)
parseQual = (Parser Char
parseDot forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Double
A.double)
    parseFilter :: Parser ByteString (Maybe ByteString)
parseFilter = (Parser Char
parseDot forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
word)
    parseInfoFields :: Parser ByteString [ByteString]
parseInfoFields = (Parser Char
parseDot forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure []) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ByteString
parseInfoField forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser Char
A.char Char
';')
    parseInfoField :: Parser ByteString
parseInfoField = (Char -> Bool) -> Parser ByteString
A.takeTill (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
';' Bool -> Bool -> Bool
|| Char -> Bool
isSpace Char
c)
    parseFormatStrings :: Parser ByteString [ByteString]
parseFormatStrings = Parser ByteString
parseFormatString forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser Char
A.char Char
':'
    parseFormatString :: Parser ByteString
parseFormatString = (Char -> Bool) -> Parser ByteString
A.takeTill (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
':' Bool -> Bool -> Bool
|| Char -> Bool
isSpace Char
c)
    parseGenotypeInfos :: Parser ByteString [[ByteString]]
parseGenotypeInfos = Parser ByteString [ByteString]
parseGenotype forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Parser Char
sp
    parseGenotype :: Parser ByteString [ByteString]
parseGenotype = Parser ByteString
parseGenoField forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser Char
A.char Char
':'
    parseGenoField :: Parser ByteString
parseGenoField = (Char -> Bool) -> Parser ByteString
A.takeTill (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
':' Bool -> Bool -> Bool
|| Char -> Bool
isSpace Char
c) 

-- |returns True if the SNP is biallelic.
isBiallelicSnp :: B.ByteString -> [B.ByteString] -> Bool
isBiallelicSnp :: ByteString -> [ByteString] -> Bool
isBiallelicSnp ByteString
ref [ByteString]
alt = Bool
validRef Bool -> Bool -> Bool
&& Bool
validAlt
  where
    validRef :: Bool
validRef = (ByteString
ref forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString
"A", ByteString
"C", ByteString
"G", ByteString
"T"])
    validAlt :: Bool
validAlt = case [ByteString]
alt of
        [ByteString
alt'] -> ByteString
alt' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString
"A", ByteString
"C", ByteString
"G", ByteString
"T"]
        [ByteString]
_ -> Bool
False

-- |returns True if the SNp is a biallelic Transversion SNP (i.e. one of G/T, G/C, A/T, A/C)
isTransversionSnp :: B.ByteString -> [B.ByteString] -> Bool
isTransversionSnp :: ByteString -> [ByteString] -> Bool
isTransversionSnp ByteString
ref [ByteString]
alt =
    case [ByteString]
alt of
        [ByteString
alt'] -> ByteString -> [ByteString] -> Bool
isBiallelicSnp ByteString
ref [ByteString]
alt Bool -> Bool -> Bool
&& (Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall {a} {a}.
(Eq a, Eq a, IsString a, IsString a) =>
a -> a -> Bool
isTransition ByteString
ref ByteString
alt')
        [ByteString]
_ -> Bool
False
  where
    isTransition :: a -> a -> Bool
isTransition a
r a
a = ((a
r forall a. Eq a => a -> a -> Bool
== a
"A") Bool -> Bool -> Bool
&& (a
a forall a. Eq a => a -> a -> Bool
== a
"G")) Bool -> Bool -> Bool
|| ((a
r forall a. Eq a => a -> a -> Bool
== a
"G") Bool -> Bool -> Bool
&& (a
a forall a. Eq a => a -> a -> Bool
== a
"A")) Bool -> Bool -> Bool
||
                       ((a
r forall a. Eq a => a -> a -> Bool
== a
"C") Bool -> Bool -> Bool
&& (a
a forall a. Eq a => a -> a -> Bool
== a
"T")) Bool -> Bool -> Bool
|| ((a
r forall a. Eq a => a -> a -> Bool
== a
"T") Bool -> Bool -> Bool
&& (a
a forall a. Eq a => a -> a -> Bool
== a
"C"))

-- |Extracts the genotype fields (for each sapmle) from a VCF entry
getGenotypes :: VCFentry -> Either String [B.ByteString]
getGenotypes :: VCFentry -> Either String [ByteString]
getGenotypes VCFentry
vcfEntry = do
    Int
gtIndex <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e a. e -> [a] -> Either e a
headErr String
"GT format field not found" forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Eq a => a -> a -> Bool
==ByteString
"GT") forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
               forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] forall b c a. (b -> c) -> (a -> b) -> a -> c
. VCFentry -> [ByteString]
vcfFormatString forall a b. (a -> b) -> a -> b
$ VCFentry
vcfEntry
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall a. [a] -> Int -> a
!!Int
gtIndex) (VCFentry -> [[ByteString]]
vcfGenotypeInfo VCFentry
vcfEntry)

-- |Extracts the dosages (the sum of non-reference alleles) per sample (returns a Left Error if it fails.)
getDosages :: VCFentry -> Either String [Maybe Int]
getDosages :: VCFentry -> Either String [Maybe Int]
getDosages VCFentry
vcfEntry = do
    [ByteString]
genotypes <- VCFentry -> Either String [ByteString]
getGenotypes VCFentry
vcfEntry
    let dosages :: [Maybe Int]
dosages = do
            ByteString
gen <- [ByteString]
genotypes
            if Char
'.' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (ByteString -> String
B.unpack ByteString
gen) then
                forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
            else
                forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ Char -> ByteString -> Int
B.count Char
'1' ByteString
gen
    forall (m :: * -> *) a. Monad m => a -> m a
return [Maybe Int]
dosages

-- |Converts a VCFentry to the simpler FreqSum format (returns a Left Error if it fails.)
vcfToFreqSumEntry :: VCFentry -> Either String FreqSumEntry
vcfToFreqSumEntry :: VCFentry -> Either String FreqSumEntry
vcfToFreqSumEntry VCFentry
vcfEntry = do
    [Maybe Int]
dosages <- VCFentry -> Either String [Maybe Int]
getDosages VCFentry
vcfEntry
    forall e. e -> Bool -> Either e ()
assertErr String
"multi-site reference allele" forall a b. (a -> b) -> a -> b
$ ByteString -> Int
B.length (VCFentry -> ByteString
vcfRef VCFentry
vcfEntry) forall a. Eq a => a -> a -> Bool
== Int
1
    forall e. e -> Bool -> Either e ()
assertErr String
"need exactly one alternative allele" forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length (VCFentry -> [ByteString]
vcfAlt VCFentry
vcfEntry) forall a. Eq a => a -> a -> Bool
== Int
1
    forall e. e -> Bool -> Either e ()
assertErr String
"multi-site alternative allele" forall a b. (a -> b) -> a -> b
$ ByteString -> Int
B.length (forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. VCFentry -> [ByteString]
vcfAlt forall a b. (a -> b) -> a -> b
$ VCFentry
vcfEntry) forall a. Eq a => a -> a -> Bool
== Int
1
    let ref :: Char
ref = ByteString -> Char
B.head (VCFentry -> ByteString
vcfRef VCFentry
vcfEntry)
    let alt :: Char
alt = ByteString -> Char
B.head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. VCFentry -> [ByteString]
vcfAlt forall a b. (a -> b) -> a -> b
$ VCFentry
vcfEntry
    forall e. e -> Bool -> Either e ()
assertErr String
"Invalid Reference Allele" forall a b. (a -> b) -> a -> b
$ Char
ref forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'A', Char
'C', Char
'T', Char
'G', Char
'N']
    forall e. e -> Bool -> Either e ()
assertErr String
"Invalid Alternative Allele" forall a b. (a -> b) -> a -> b
$ Char
alt forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'A', Char
'C', Char
'T', Char
'G', Char
'.']
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Chrom
-> Int
-> Maybe ByteString
-> Maybe Double
-> Char
-> Char
-> [Maybe Int]
-> FreqSumEntry
FreqSumEntry (VCFentry -> Chrom
vcfChrom VCFentry
vcfEntry) (VCFentry -> Int
vcfPos VCFentry
vcfEntry) (VCFentry -> Maybe ByteString
vcfId VCFentry
vcfEntry) forall a. Maybe a
Nothing Char
ref Char
alt [Maybe Int]
dosages