{-# LANGUAGE OverloadedStrings #-}

{-|Module to read and parse Eigenstrat-formatted genotype data. The Eigenstrat format is defined at <https://github.com/argriffing/eigensoft/blob/master/CONVERTF/README>.

-}

module SequenceFormats.Eigenstrat (EigenstratSnpEntry(..), EigenstratIndEntry(..),
    readEigenstratInd, GenoEntry(..), GenoLine, Sex(..),
    readEigenstratSnpStdIn, readEigenstratSnpFile,
    readEigenstrat, writeEigenstrat, writeEigenstratIndFile, writeEigenstratSnp,
    writeEigenstratGeno) where

import           SequenceFormats.Utils            (Chrom (..),
                                                   SeqFormatException (..),
                                                   consumeProducer,
                                                   readFileProd, word)

import           Control.Applicative              ((<|>))
import           Control.Exception                (throw)
import           Control.Monad                    (forM_, void)
import           Control.Monad.Catch              (MonadThrow)
import           Control.Monad.IO.Class           (MonadIO, liftIO)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8            as B
import           Data.Vector                      (Vector, fromList, toList)
import           Pipes                            (Consumer, Pipe, Producer,
                                                   cat, for, yield, (>->))
import qualified Pipes.ByteString                 as PB
import qualified Pipes.Prelude                    as P
import           Pipes.Safe                       (MonadSafe)
import qualified Pipes.Safe.Prelude               as PS
import           System.IO                        (Handle, IOMode (..),
                                                   hPutStrLn, withFile)

-- |A datatype to represent a single genomic SNP. The constructor arguments are:
-- Chromosome, Position, Reference Allele, Alternative Allele.
data EigenstratSnpEntry = EigenstratSnpEntry
    { EigenstratSnpEntry -> Chrom
snpChrom      :: Chrom
    , EigenstratSnpEntry -> Int
snpPos        :: Int
    , EigenstratSnpEntry -> Double
snpGeneticPos :: Double
    , EigenstratSnpEntry -> ByteString
snpId         :: B.ByteString
    , EigenstratSnpEntry -> Char
snpRef        :: Char
    , EigenstratSnpEntry -> Char
snpAlt        :: Char
    }
    deriving (EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
(EigenstratSnpEntry -> EigenstratSnpEntry -> Bool)
-> (EigenstratSnpEntry -> EigenstratSnpEntry -> Bool)
-> Eq EigenstratSnpEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
$c/= :: EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
== :: EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
$c== :: EigenstratSnpEntry -> EigenstratSnpEntry -> Bool
Eq, Int -> EigenstratSnpEntry -> ShowS
[EigenstratSnpEntry] -> ShowS
EigenstratSnpEntry -> String
(Int -> EigenstratSnpEntry -> ShowS)
-> (EigenstratSnpEntry -> String)
-> ([EigenstratSnpEntry] -> ShowS)
-> Show EigenstratSnpEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [EigenstratSnpEntry] -> ShowS
$cshowList :: [EigenstratSnpEntry] -> ShowS
show :: EigenstratSnpEntry -> String
$cshow :: EigenstratSnpEntry -> String
showsPrec :: Int -> EigenstratSnpEntry -> ShowS
$cshowsPrec :: Int -> EigenstratSnpEntry -> ShowS
Show)

-- |A datatype to represent a single individual. The constructor arguments are:
-- Name, Sex and Population Name
data EigenstratIndEntry = EigenstratIndEntry String Sex String
    deriving (EigenstratIndEntry -> EigenstratIndEntry -> Bool
(EigenstratIndEntry -> EigenstratIndEntry -> Bool)
-> (EigenstratIndEntry -> EigenstratIndEntry -> Bool)
-> Eq EigenstratIndEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EigenstratIndEntry -> EigenstratIndEntry -> Bool
$c/= :: EigenstratIndEntry -> EigenstratIndEntry -> Bool
== :: EigenstratIndEntry -> EigenstratIndEntry -> Bool
$c== :: EigenstratIndEntry -> EigenstratIndEntry -> Bool
Eq, Int -> EigenstratIndEntry -> ShowS
[EigenstratIndEntry] -> ShowS
EigenstratIndEntry -> String
(Int -> EigenstratIndEntry -> ShowS)
-> (EigenstratIndEntry -> String)
-> ([EigenstratIndEntry] -> ShowS)
-> Show EigenstratIndEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [EigenstratIndEntry] -> ShowS
$cshowList :: [EigenstratIndEntry] -> ShowS
show :: EigenstratIndEntry -> String
$cshow :: EigenstratIndEntry -> String
showsPrec :: Int -> EigenstratIndEntry -> ShowS
$cshowsPrec :: Int -> EigenstratIndEntry -> ShowS
Show)

-- |A datatype to represent Sex in an Eigenstrat Individual file
data Sex = Male
    | Female
    | Unknown
    deriving (Sex -> Sex -> Bool
(Sex -> Sex -> Bool) -> (Sex -> Sex -> Bool) -> Eq Sex
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Sex -> Sex -> Bool
$c/= :: Sex -> Sex -> Bool
== :: Sex -> Sex -> Bool
$c== :: Sex -> Sex -> Bool
Eq, Int -> Sex -> ShowS
[Sex] -> ShowS
Sex -> String
(Int -> Sex -> ShowS)
-> (Sex -> String) -> ([Sex] -> ShowS) -> Show Sex
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Sex] -> ShowS
$cshowList :: [Sex] -> ShowS
show :: Sex -> String
$cshow :: Sex -> String
showsPrec :: Int -> Sex -> ShowS
$cshowsPrec :: Int -> Sex -> ShowS
Show)

-- |A datatype to represent the genotype of an individual at a SNP.
data GenoEntry = HomRef
    | Het
    | HomAlt
    | Missing
    deriving (GenoEntry -> GenoEntry -> Bool
(GenoEntry -> GenoEntry -> Bool)
-> (GenoEntry -> GenoEntry -> Bool) -> Eq GenoEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GenoEntry -> GenoEntry -> Bool
$c/= :: GenoEntry -> GenoEntry -> Bool
== :: GenoEntry -> GenoEntry -> Bool
$c== :: GenoEntry -> GenoEntry -> Bool
Eq, Int -> GenoEntry -> ShowS
[GenoEntry] -> ShowS
GenoEntry -> String
(Int -> GenoEntry -> ShowS)
-> (GenoEntry -> String)
-> ([GenoEntry] -> ShowS)
-> Show GenoEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [GenoEntry] -> ShowS
$cshowList :: [GenoEntry] -> ShowS
show :: GenoEntry -> String
$cshow :: GenoEntry -> String
showsPrec :: Int -> GenoEntry -> ShowS
$cshowsPrec :: Int -> GenoEntry -> ShowS
Show)

-- |Vector of the genotypes of all individuals at a single SNP.
type GenoLine = Vector GenoEntry

eigenstratSnpParser :: A.Parser EigenstratSnpEntry
eigenstratSnpParser :: Parser EigenstratSnpEntry
eigenstratSnpParser = do
    ByteString
snpId_ <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString ByteString
word
    ByteString
chrom <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString ByteString
word
    Double
geneticPos <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Double -> Parser ByteString Double
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Double
A.double
    Int
pos <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Int -> Parser ByteString Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Int
forall a. Integral a => Parser a
A.decimal
    Char
ref <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNX")
    Char
alt <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNX")
    Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
    EigenstratSnpEntry -> Parser EigenstratSnpEntry
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratSnpEntry -> Parser EigenstratSnpEntry)
-> EigenstratSnpEntry -> Parser EigenstratSnpEntry
forall a b. (a -> b) -> a -> b
$ Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry (ByteString -> Chrom
Chrom ByteString
chrom) Int
pos Double
geneticPos ByteString
snpId_ Char
ref Char
alt

eigenstratIndParser :: A.Parser EigenstratIndEntry
eigenstratIndParser :: Parser EigenstratIndEntry
eigenstratIndParser = do
    Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany Parser ByteString Char
A.space
    ByteString
name <- Parser ByteString ByteString
word
    Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space
    Sex
sex <- Parser Sex
parseSex
    Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space
    ByteString
popName <- Parser ByteString ByteString
word
    Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
    EigenstratIndEntry -> Parser EigenstratIndEntry
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratIndEntry -> Parser EigenstratIndEntry)
-> EigenstratIndEntry -> Parser EigenstratIndEntry
forall a b. (a -> b) -> a -> b
$ String -> Sex -> String -> EigenstratIndEntry
EigenstratIndEntry (ByteString -> String
B.unpack ByteString
name) Sex
sex (ByteString -> String
B.unpack ByteString
popName)

parseSex :: A.Parser Sex
parseSex :: Parser Sex
parseSex = Parser Sex
parseMale Parser Sex -> Parser Sex -> Parser Sex
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Sex
parseFemale Parser Sex -> Parser Sex -> Parser Sex
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Sex
parseUnknown
  where
    parseMale :: Parser Sex
parseMale = Char -> Parser ByteString Char
A.char Char
'M' Parser ByteString Char -> Parser Sex -> Parser Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Male
    parseFemale :: Parser Sex
parseFemale = Char -> Parser ByteString Char
A.char Char
'F' Parser ByteString Char -> Parser Sex -> Parser Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Female
    parseUnknown :: Parser Sex
parseUnknown = Char -> Parser ByteString Char
A.char Char
'U' Parser ByteString Char -> Parser Sex -> Parser Sex
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser Sex
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Unknown

-- |Function to read an Eigenstrat individual file. Returns the Eigenstrat Individual Entries as list.
readEigenstratInd :: (MonadIO m) => FilePath -> m [EigenstratIndEntry]
readEigenstratInd :: String -> m [EigenstratIndEntry]
readEigenstratInd String
fn =
    IO [EigenstratIndEntry] -> m [EigenstratIndEntry]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [EigenstratIndEntry] -> m [EigenstratIndEntry])
-> ((Handle -> IO [EigenstratIndEntry]) -> IO [EigenstratIndEntry])
-> (Handle -> IO [EigenstratIndEntry])
-> m [EigenstratIndEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String
-> IOMode
-> (Handle -> IO [EigenstratIndEntry])
-> IO [EigenstratIndEntry]
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
fn IOMode
ReadMode ((Handle -> IO [EigenstratIndEntry]) -> m [EigenstratIndEntry])
-> (Handle -> IO [EigenstratIndEntry]) -> m [EigenstratIndEntry]
forall a b. (a -> b) -> a -> b
$ \Handle
handle ->
        Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry]
forall (m :: * -> *) a. Monad m => Producer a m () -> m [a]
P.toListM (Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry])
-> Producer EigenstratIndEntry IO () -> IO [EigenstratIndEntry]
forall a b. (a -> b) -> a -> b
$ Parser EigenstratIndEntry
-> Producer ByteString IO () -> Producer EigenstratIndEntry IO ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratIndEntry
eigenstratIndParser (Handle -> Producer' ByteString IO ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
PB.fromHandle Handle
handle)

eigenstratGenoParser :: A.Parser GenoLine
eigenstratGenoParser :: Parser GenoLine
eigenstratGenoParser = do
    ByteString
line <- (Char -> Bool) -> Parser ByteString ByteString
A.takeWhile1 Char -> Bool
isValidNum
    Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
    GenoLine -> Parser GenoLine
forall (m :: * -> *) a. Monad m => a -> m a
return (GenoLine -> Parser GenoLine)
-> ([GenoEntry] -> GenoLine) -> [GenoEntry] -> Parser GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
fromList ([GenoEntry] -> Parser GenoLine) -> [GenoEntry] -> Parser GenoLine
forall a b. (a -> b) -> a -> b
$ do
        Char
l <- ByteString -> String
B.unpack ByteString
line
        case Char
l of
            Char
'0' -> GenoEntry -> [GenoEntry]
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
HomAlt
            Char
'1' -> GenoEntry -> [GenoEntry]
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
Het
            Char
'2' -> GenoEntry -> [GenoEntry]
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
HomRef
            Char
'9' -> GenoEntry -> [GenoEntry]
forall (m :: * -> *) a. Monad m => a -> m a
return GenoEntry
Missing
            Char
_   -> String -> [GenoEntry]
forall a. HasCallStack => String -> a
error String
"this should never happen"
  where
    isValidNum :: Char -> Bool
isValidNum Char
c = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'0' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'1' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'2' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'9'

-- |Function to read a Snp File from StdIn. Returns a Pipes-Producer over the EigenstratSnpEntries.
readEigenstratSnpStdIn :: (MonadThrow m, MonadIO m) => Producer EigenstratSnpEntry m ()
readEigenstratSnpStdIn :: Producer EigenstratSnpEntry m ()
readEigenstratSnpStdIn = Parser EigenstratSnpEntry
-> Producer ByteString m () -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratSnpEntry
eigenstratSnpParser Producer ByteString m ()
forall (m :: * -> *). MonadIO m => Producer' ByteString m ()
PB.stdin

-- |Function to read a Snp File from a file. Returns a Pipes-Producer over the EigenstratSnpEntries.
readEigenstratSnpFile :: (MonadSafe m) => FilePath -> Producer EigenstratSnpEntry m ()
readEigenstratSnpFile :: String -> Producer EigenstratSnpEntry m ()
readEigenstratSnpFile = Parser EigenstratSnpEntry
-> Producer ByteString m () -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratSnpEntry
eigenstratSnpParser (Producer ByteString m () -> Producer EigenstratSnpEntry m ())
-> (String -> Producer ByteString m ())
-> String
-> Producer EigenstratSnpEntry m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd

-- |Function to read a full Eigenstrat database from files. Returns a pair of the Eigenstrat Individual Entries, and a joint Producer over the snp entries and the genotypes.
readEigenstrat :: (MonadSafe m) => FilePath -- ^The Genotype file
               -> FilePath -- ^The Snp File
               -> FilePath -- ^The Ind file
               -> m ([EigenstratIndEntry], Producer (EigenstratSnpEntry, GenoLine) m ()) -- The return pair of individual entries and a joint Snp/Geno Producer.
readEigenstrat :: String
-> String
-> String
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
readEigenstrat String
genoFile String
snpFile String
indFile = do
    [EigenstratIndEntry]
indEntries <- String -> m [EigenstratIndEntry]
forall (m :: * -> *). MonadIO m => String -> m [EigenstratIndEntry]
readEigenstratInd String
indFile
    let snpProd :: Producer EigenstratSnpEntry m ()
snpProd = String -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer EigenstratSnpEntry m ()
readEigenstratSnpFile String
snpFile
        genoProd :: Proxy X () () GenoLine m ()
genoProd = Parser GenoLine
-> Producer ByteString m () -> Proxy X () () GenoLine m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser GenoLine
eigenstratGenoParser (String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
genoFile) Proxy X () () GenoLine m ()
-> Proxy () GenoLine () GenoLine m ()
-> Proxy X () () GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>->
            Int -> Proxy () GenoLine () GenoLine m ()
forall (m :: * -> *).
MonadThrow m =>
Int -> Pipe GenoLine GenoLine m ()
validateEigenstratEntries ([EigenstratIndEntry] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [EigenstratIndEntry]
indEntries)
    ([EigenstratIndEntry],
 Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([EigenstratIndEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
forall (m :: * -> *) a. Monad m => a -> m a
return ([EigenstratIndEntry]
indEntries, Producer EigenstratSnpEntry m ()
-> Proxy X () () GenoLine m ()
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a r b x' x.
Monad m =>
Producer a m r -> Producer b m r -> Proxy x' x () (a, b) m r
P.zip Producer EigenstratSnpEntry m ()
snpProd Proxy X () () GenoLine m ()
genoProd)

validateEigenstratEntries :: (MonadThrow m) => Int -> Pipe GenoLine GenoLine m ()
validateEigenstratEntries :: Int -> Pipe GenoLine GenoLine m ()
validateEigenstratEntries Int
nr = Pipe GenoLine GenoLine m ()
-> (GenoLine -> Pipe GenoLine GenoLine m ())
-> Pipe GenoLine GenoLine m ()
forall (m :: * -> *) x' x b' b a' c' c.
Functor m =>
Proxy x' x b' b m a'
-> (b -> Proxy x' x c' c m b') -> Proxy x' x c' c m a'
for Pipe GenoLine GenoLine m ()
forall (m :: * -> *) a r. Functor m => Pipe a a m r
cat ((GenoLine -> Pipe GenoLine GenoLine m ())
 -> Pipe GenoLine GenoLine m ())
-> (GenoLine -> Pipe GenoLine GenoLine m ())
-> Pipe GenoLine GenoLine m ()
forall a b. (a -> b) -> a -> b
$ \GenoLine
line ->
    if GenoLine -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length GenoLine
line Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
nr
    then do
        let msg :: String
msg = String
"inconsistent nr of genotypes (" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show (GenoLine -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length GenoLine
line) String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
", but should be " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nr String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
") in \
                \genotype line " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> GenoLine -> String
forall a. Show a => a -> String
show GenoLine
line
        SeqFormatException -> Pipe GenoLine GenoLine m ()
forall a e. Exception e => e -> a
throw (String -> SeqFormatException
SeqFormatException String
msg)
    else
        GenoLine -> Pipe GenoLine GenoLine m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield GenoLine
line

-- |Function to write an Eigenstrat Ind file.
writeEigenstratIndFile :: (MonadIO m) => FilePath -> [EigenstratIndEntry] -> m ()
writeEigenstratIndFile :: String -> [EigenstratIndEntry] -> m ()
writeEigenstratIndFile String
f [EigenstratIndEntry]
indEntries =
    IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ())
-> ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
f IOMode
WriteMode ((Handle -> IO ()) -> m ()) -> (Handle -> IO ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Handle
h ->
        [EigenstratIndEntry] -> (EigenstratIndEntry -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [EigenstratIndEntry]
indEntries ((EigenstratIndEntry -> IO ()) -> IO ())
-> (EigenstratIndEntry -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \(EigenstratIndEntry String
name Sex
sex String
popName) ->
            Handle -> String -> IO ()
hPutStrLn Handle
h (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
name String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"\t" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Sex -> String
forall p. IsString p => Sex -> p
sexToStr Sex
sex String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"\t" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
popName
  where
    sexToStr :: Sex -> p
sexToStr Sex
sex = case Sex
sex of
        Sex
Male    -> p
"M"
        Sex
Female  -> p
"F"
        Sex
Unknown -> p
"U"

-- |Function to write an Eigenstrat Snp File. Returns a consumer expecting EigenstratSnpEntries.
writeEigenstratSnp :: (MonadIO m) => Handle -- ^The Eigenstrat Snp File Handle.
    -> Consumer EigenstratSnpEntry m () -- ^A consumer to read EigenstratSnpEntries
writeEigenstratSnp :: Handle -> Consumer EigenstratSnpEntry m ()
writeEigenstratSnp Handle
snpFileH =
    let snpOutTextConsumer :: Proxy () ByteString y' y m r
snpOutTextConsumer = Handle -> Consumer' ByteString m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
snpFileH
        toTextPipe :: Pipe EigenstratSnpEntry ByteString m r
toTextPipe = (EigenstratSnpEntry -> ByteString)
-> Pipe EigenstratSnpEntry ByteString m r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\(EigenstratSnpEntry Chrom
chrom Int
pos Double
gpos ByteString
gid Char
ref Char
alt) ->
            let snpLine :: ByteString
snpLine = ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"\t" [ByteString
gid, Chrom -> ByteString
unChrom Chrom
chrom, String -> ByteString
B.pack (Double -> String
forall a. Show a => a -> String
show Double
gpos),
                    String -> ByteString
B.pack (Int -> String
forall a. Show a => a -> String
show Int
pos), Char -> ByteString
B.singleton Char
ref, Char -> ByteString
B.singleton Char
alt]
            in  ByteString
snpLine ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n")
    in  Pipe EigenstratSnpEntry ByteString m ()
forall r. Pipe EigenstratSnpEntry ByteString m r
toTextPipe Pipe EigenstratSnpEntry ByteString m ()
-> Proxy () ByteString () X m ()
-> Consumer EigenstratSnpEntry m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X m ()
forall y' y r. Proxy () ByteString y' y m r
snpOutTextConsumer

-- |Function to write an Eigentrat Geno File. Returns a consumer expecting Eigenstrat Genolines.
writeEigenstratGeno :: (MonadIO m) => Handle -- ^The Genotype file handle
                -> Consumer GenoLine m () -- ^A consumer to read Genotype entries.
writeEigenstratGeno :: Handle -> Consumer GenoLine m ()
writeEigenstratGeno Handle
genoFileH =
    let genoOutTextConsumer :: Proxy () ByteString y' y m r
genoOutTextConsumer = Handle -> Consumer' ByteString m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
genoFileH
        toTextPipe :: Pipe GenoLine ByteString m r
toTextPipe = (GenoLine -> ByteString) -> Pipe GenoLine ByteString m r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\GenoLine
genoLine ->
            let genoLineStr :: ByteString
genoLineStr = [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString)
-> (GenoLine -> [ByteString]) -> GenoLine -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GenoEntry -> ByteString) -> [GenoEntry] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (String -> ByteString
B.pack (String -> ByteString)
-> (GenoEntry -> String) -> GenoEntry -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String
forall a. Show a => a -> String
show (Int -> String) -> (GenoEntry -> Int) -> GenoEntry -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenoEntry -> Int
toEigenStratNum) ([GenoEntry] -> [ByteString])
-> (GenoLine -> [GenoEntry]) -> GenoLine -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenoLine -> [GenoEntry]
forall a. Vector a -> [a]
toList (GenoLine -> ByteString) -> GenoLine -> ByteString
forall a b. (a -> b) -> a -> b
$ GenoLine
genoLine
            in  ByteString
genoLineStr ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n")
    in  Pipe GenoLine ByteString m ()
forall r. Pipe GenoLine ByteString m r
toTextPipe Pipe GenoLine ByteString m ()
-> Proxy () ByteString () X m () -> Consumer GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X m ()
forall y' y r. Proxy () ByteString y' y m r
genoOutTextConsumer
  where
    toEigenStratNum :: GenoEntry -> Int
toEigenStratNum GenoEntry
c = case GenoEntry
c of
        GenoEntry
HomRef  -> Int
2 :: Int
        GenoEntry
Het     -> Int
1
        GenoEntry
HomAlt  -> Int
0
        GenoEntry
Missing -> Int
9

-- |Function to write an Eigenstrat Database. Returns a consumer expecting joint Snp- and Genotype lines.
writeEigenstrat :: (MonadSafe m) => FilePath -- ^The Genotype file
                -> FilePath -- ^The Snp File
                -> FilePath -- ^The Ind file
                -> [EigenstratIndEntry] -- ^The list of individual entries
                -> Consumer (EigenstratSnpEntry, GenoLine) m () -- ^A consumer to read joint Snp/Genotype entries.
writeEigenstrat :: String
-> String
-> String
-> [EigenstratIndEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writeEigenstrat String
genoFile String
snpFile String
indFile [EigenstratIndEntry]
indEntries = do
    IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ())
-> IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a -> b) -> a -> b
$ String -> [EigenstratIndEntry] -> IO ()
forall (m :: * -> *).
MonadIO m =>
String -> [EigenstratIndEntry] -> m ()
writeEigenstratIndFile String
indFile [EigenstratIndEntry]
indEntries
    let snpOutConsumer :: Proxy () EigenstratSnpEntry () X m ()
snpOutConsumer = String
-> IOMode
-> (Handle -> Proxy () EigenstratSnpEntry () X m ())
-> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
snpFile IOMode
WriteMode Handle -> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Consumer EigenstratSnpEntry m ()
writeEigenstratSnp
        genoOutConsumer :: Proxy () GenoLine () X m ()
genoOutConsumer = String
-> IOMode
-> (Handle -> Proxy () GenoLine () X m ())
-> Proxy () GenoLine () X m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
genoFile IOMode
WriteMode Handle -> Proxy () GenoLine () X m ()
forall (m :: * -> *). MonadIO m => Handle -> Consumer GenoLine m ()
writeEigenstratGeno
    Consumer (EigenstratSnpEntry, GenoLine) m ()
-> Pipe
     (EigenstratSnpEntry, GenoLine) (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a r. Monad m => Consumer a m r -> Pipe a a m r
P.tee (((EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry)
-> Pipe (EigenstratSnpEntry, GenoLine) EigenstratSnpEntry m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry
forall a b. (a, b) -> a
fst Pipe (EigenstratSnpEntry, GenoLine) EigenstratSnpEntry m ()
-> Proxy () EigenstratSnpEntry () X m ()
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () EigenstratSnpEntry () X m ()
snpOutConsumer) Pipe
  (EigenstratSnpEntry, GenoLine) (EigenstratSnpEntry, GenoLine) m ()
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> ((EigenstratSnpEntry, GenoLine) -> GenoLine)
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (EigenstratSnpEntry, GenoLine) -> GenoLine
forall a b. (a, b) -> b
snd Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
-> Proxy () GenoLine () X m ()
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () GenoLine () X m ()
genoOutConsumer