{-# LANGUAGE BinaryLiterals    #-}
{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Plink (readBimStdIn,
                              readBimFile,
                              writeBim,
                              readFamFile,
                              readPlinkBedFile,
                              readPlink,
                              writePlink,
                              PlinkFamEntry(..),
                              plinkFam2EigenstratInd,
                              eigenstratInd2PlinkFam,
                              PlinkPopNameMode(..)) where

import           SequenceFormats.Eigenstrat       (EigenstratIndEntry (..),
                                                   EigenstratSnpEntry (..),
                                                   GenoEntry (..), GenoLine,
                                                   Sex (..))
import           SequenceFormats.Utils            (Chrom (..), consumeProducer,
                                                   readFileProd, word)

import           Control.Applicative              ((<|>))
import           Control.Monad                    (forM_, void)
import           Control.Monad.Catch              (MonadThrow, throwM)
import           Control.Monad.IO.Class           (MonadIO, liftIO)
import           Control.Monad.Trans.State.Strict (runStateT)
import qualified Data.Attoparsec.ByteString       as AB
import qualified Data.Attoparsec.ByteString.Char8 as A
import           Data.Bits                        (shiftL, shiftR, (.&.), (.|.))
import qualified Data.ByteString                  as BB
import qualified Data.ByteString.Char8            as B
import           Data.List                        (intercalate)
import           Data.Vector                      (fromList, toList)
import           Data.Word                        (Word8)
import           Pipes                            (Consumer, Producer, (>->))
import           Pipes.Attoparsec                 (ParsingError (..), parse)
import qualified Pipes.ByteString                 as PB
import qualified Pipes.Prelude                    as P
import           Pipes.Safe                       (MonadSafe)
import qualified Pipes.Safe.Prelude               as PS
import           System.IO                        (Handle, IOMode (..),
                                                   hPutStrLn, withFile)

-- see https://www.cog-genomics.org/plink/2.0/formats#fam
data PlinkFamEntry = PlinkFamEntry {
    PlinkFamEntry -> String
_famFamilyID     :: String,
    PlinkFamEntry -> String
_famIndividualID :: String,
    PlinkFamEntry -> String
_famFatherID     :: String,
    PlinkFamEntry -> String
_famMotherID     :: String,
    PlinkFamEntry -> Sex
_famSexCode      :: Sex,
    PlinkFamEntry -> String
_famPhenotype    :: String
} deriving (PlinkFamEntry -> PlinkFamEntry -> Bool
(PlinkFamEntry -> PlinkFamEntry -> Bool)
-> (PlinkFamEntry -> PlinkFamEntry -> Bool) -> Eq PlinkFamEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: PlinkFamEntry -> PlinkFamEntry -> Bool
== :: PlinkFamEntry -> PlinkFamEntry -> Bool
$c/= :: PlinkFamEntry -> PlinkFamEntry -> Bool
/= :: PlinkFamEntry -> PlinkFamEntry -> Bool
Eq, Int -> PlinkFamEntry -> ShowS
[PlinkFamEntry] -> ShowS
PlinkFamEntry -> String
(Int -> PlinkFamEntry -> ShowS)
-> (PlinkFamEntry -> String)
-> ([PlinkFamEntry] -> ShowS)
-> Show PlinkFamEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PlinkFamEntry -> ShowS
showsPrec :: Int -> PlinkFamEntry -> ShowS
$cshow :: PlinkFamEntry -> String
show :: PlinkFamEntry -> String
$cshowList :: [PlinkFamEntry] -> ShowS
showList :: [PlinkFamEntry] -> ShowS
Show)

data PlinkPopNameMode = PlinkPopNameAsFamily | PlinkPopNameAsPhenotype | PlinkPopNameAsBoth deriving (PlinkPopNameMode -> PlinkPopNameMode -> Bool
(PlinkPopNameMode -> PlinkPopNameMode -> Bool)
-> (PlinkPopNameMode -> PlinkPopNameMode -> Bool)
-> Eq PlinkPopNameMode
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: PlinkPopNameMode -> PlinkPopNameMode -> Bool
== :: PlinkPopNameMode -> PlinkPopNameMode -> Bool
$c/= :: PlinkPopNameMode -> PlinkPopNameMode -> Bool
/= :: PlinkPopNameMode -> PlinkPopNameMode -> Bool
Eq, Int -> PlinkPopNameMode -> ShowS
[PlinkPopNameMode] -> ShowS
PlinkPopNameMode -> String
(Int -> PlinkPopNameMode -> ShowS)
-> (PlinkPopNameMode -> String)
-> ([PlinkPopNameMode] -> ShowS)
-> Show PlinkPopNameMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PlinkPopNameMode -> ShowS
showsPrec :: Int -> PlinkPopNameMode -> ShowS
$cshow :: PlinkPopNameMode -> String
show :: PlinkPopNameMode -> String
$cshowList :: [PlinkPopNameMode] -> ShowS
showList :: [PlinkPopNameMode] -> ShowS
Show)

bimParser :: A.Parser EigenstratSnpEntry
bimParser :: Parser EigenstratSnpEntry
bimParser = do
    ByteString
chrom      <- Parser ByteString
word
    ByteString
snpId_     <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString () -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
word
    Double
geneticPos <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Double -> Parser ByteString Double
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Double
A.double
    Int
pos        <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Int -> Parser ByteString Int
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Int
forall a. Integral a => Parser a
A.decimal
    Char
ref        <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNX01234")
    Char
alt        <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGNX01234")
    Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
    let refConvert :: Char
refConvert = Char -> Char
convertNum Char
ref
        altConvert :: Char
altConvert = Char -> Char
convertNum Char
alt
    EigenstratSnpEntry -> Parser EigenstratSnpEntry
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (EigenstratSnpEntry -> Parser EigenstratSnpEntry)
-> EigenstratSnpEntry -> Parser EigenstratSnpEntry
forall a b. (a -> b) -> a -> b
$ Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry (ByteString -> Chrom
Chrom ByteString
chrom) Int
pos Double
geneticPos ByteString
snpId_ Char
refConvert Char
altConvert
  where
    convertNum :: Char -> Char
convertNum Char
'0' = Char
'N'
    convertNum Char
'1' = Char
'A'
    convertNum Char
'2' = Char
'C'
    convertNum Char
'3' = Char
'G'
    convertNum Char
'4' = Char
'T'
    convertNum Char
x   = Char
x

famParser :: A.Parser PlinkFamEntry
famParser :: Parser PlinkFamEntry
famParser = do
    Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany Parser ByteString Char
A.space
    String
famID    <- ByteString -> String
B.unpack (ByteString -> String)
-> Parser ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
word
    String
indID    <- ByteString -> String
B.unpack (ByteString -> String)
-> Parser ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString () -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
word)
    String
fatherID <- ByteString -> String
B.unpack (ByteString -> String)
-> Parser ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString () -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
word)
    String
motherID <- ByteString -> String
B.unpack (ByteString -> String)
-> Parser ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString () -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
word)
    Sex
sex      <- Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString ()
-> Parser ByteString Sex -> Parser ByteString Sex
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Sex
parseSex
    String
phen     <- ByteString -> String
B.unpack (ByteString -> String)
-> Parser ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
A.skipMany1 Parser ByteString Char
A.space Parser ByteString () -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
word)
    Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString ()
A.endOfLine
    PlinkFamEntry -> Parser PlinkFamEntry
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (PlinkFamEntry -> Parser PlinkFamEntry)
-> PlinkFamEntry -> Parser PlinkFamEntry
forall a b. (a -> b) -> a -> b
$ String
-> String -> String -> String -> Sex -> String -> PlinkFamEntry
PlinkFamEntry String
famID String
indID String
fatherID String
motherID Sex
sex String
phen
  where
    parseSex :: Parser ByteString Sex
parseSex = Parser ByteString Sex
parseMale Parser ByteString Sex
-> Parser ByteString Sex -> Parser ByteString Sex
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Sex
parseFemale Parser ByteString Sex
-> Parser ByteString Sex -> Parser ByteString Sex
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Sex
parseUnknown
    parseMale :: Parser ByteString Sex
parseMale = Char -> Parser ByteString Char
A.char Char
'1' Parser ByteString Char
-> Parser ByteString Sex -> Parser ByteString Sex
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser ByteString Sex
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Male
    parseFemale :: Parser ByteString Sex
parseFemale = Char -> Parser ByteString Char
A.char Char
'2' Parser ByteString Char
-> Parser ByteString Sex -> Parser ByteString Sex
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser ByteString Sex
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Female
    parseUnknown :: Parser ByteString Sex
parseUnknown = Parser ByteString Char
A.anyChar Parser ByteString Char
-> Parser ByteString Sex -> Parser ByteString Sex
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Sex -> Parser ByteString Sex
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return Sex
Unknown

plinkFam2EigenstratInd :: PlinkPopNameMode -> PlinkFamEntry -> EigenstratIndEntry
plinkFam2EigenstratInd :: PlinkPopNameMode -> PlinkFamEntry -> EigenstratIndEntry
plinkFam2EigenstratInd PlinkPopNameMode
plinkPopNameMode (PlinkFamEntry String
famId String
indId String
_ String
_ Sex
sex String
phen) =
    let popName :: String
popName = case PlinkPopNameMode
plinkPopNameMode of
            PlinkPopNameMode
PlinkPopNameAsFamily    -> String
famId
            PlinkPopNameMode
PlinkPopNameAsPhenotype -> String
phen
            -- If the two differ but you want both, then merge them somehow.
            PlinkPopNameMode
PlinkPopNameAsBoth -> if String
famId String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
phen then String
famId else String
famId String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
":" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
phen
    in  String -> Sex -> String -> EigenstratIndEntry
EigenstratIndEntry String
indId Sex
sex String
popName

eigenstratInd2PlinkFam :: PlinkPopNameMode -> EigenstratIndEntry -> PlinkFamEntry
eigenstratInd2PlinkFam :: PlinkPopNameMode -> EigenstratIndEntry -> PlinkFamEntry
eigenstratInd2PlinkFam PlinkPopNameMode
plinkPopNameMode (EigenstratIndEntry String
indId Sex
sex String
popName)=
    case PlinkPopNameMode
plinkPopNameMode of
        PlinkPopNameMode
PlinkPopNameAsFamily    -> String
-> String -> String -> String -> Sex -> String -> PlinkFamEntry
PlinkFamEntry String
popName String
indId String
"0" String
"0" Sex
sex String
"0"
        PlinkPopNameMode
PlinkPopNameAsPhenotype -> String
-> String -> String -> String -> Sex -> String -> PlinkFamEntry
PlinkFamEntry String
"DummyFamily" String
indId String
"0" String
"0" Sex
sex String
popName
        PlinkPopNameMode
PlinkPopNameAsBoth      -> String
-> String -> String -> String -> Sex -> String -> PlinkFamEntry
PlinkFamEntry String
popName String
indId String
"0" String
"0" Sex
sex String
popName

bedHeaderParser :: AB.Parser ()
bedHeaderParser :: Parser ByteString ()
bedHeaderParser = do
    Parser ByteString Word8 -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Word8 -> Parser ByteString ())
-> Parser ByteString Word8 -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Word8 -> Parser ByteString Word8
AB.word8 Word8
0b01101100 -- magic number I for BED files
    Parser ByteString Word8 -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Word8 -> Parser ByteString ())
-> Parser ByteString Word8 -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Word8 -> Parser ByteString Word8
AB.word8 Word8
0b00011011 -- magic number II for BED files
    Parser ByteString Word8 -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Word8 -> Parser ByteString ())
-> Parser ByteString Word8 -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Word8 -> Parser ByteString Word8
AB.word8 Word8
0b00000001 -- we can only parse SNP-major order

bedGenotypeParser :: Int -> AB.Parser GenoLine
bedGenotypeParser :: Int -> Parser GenoLine
bedGenotypeParser Int
nrInds = do
    let nrBytes :: Int
nrBytes = if Int
nrInds Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
4 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int
nrInds Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
4 else (Int
nrInds Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
4) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
    [Word8]
bytes <- ByteString -> [Word8]
BB.unpack (ByteString -> [Word8])
-> Parser ByteString -> Parser ByteString [Word8]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser ByteString
AB.take Int
nrBytes
    let indBitPairs :: [Word8]
indBitPairs = (Word8 -> [Word8]) -> [Word8] -> [Word8]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Word8 -> [Word8]
forall {b}. (Bits b, Num b) => b -> [b]
getBitPairs [Word8]
bytes
    GenoLine -> Parser GenoLine
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenoLine -> Parser GenoLine)
-> ([Word8] -> GenoLine) -> [Word8] -> Parser GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
fromList ([GenoEntry] -> GenoLine)
-> ([Word8] -> [GenoEntry]) -> [Word8] -> GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [GenoEntry] -> [GenoEntry]
forall a. Int -> [a] -> [a]
take Int
nrInds ([GenoEntry] -> [GenoEntry])
-> ([Word8] -> [GenoEntry]) -> [Word8] -> [GenoEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> GenoEntry) -> [Word8] -> [GenoEntry]
forall a b. (a -> b) -> [a] -> [b]
map Word8 -> GenoEntry
forall {a}. (Eq a, Num a) => a -> GenoEntry
bitPairToGenotype ([Word8] -> Parser GenoLine) -> [Word8] -> Parser GenoLine
forall a b. (a -> b) -> a -> b
$ [Word8]
indBitPairs
  where
    getBitPairs :: b -> [b]
getBitPairs b
byte = (b -> b) -> [b] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map (b
0b00000011 b -> b -> b
forall a. Bits a => a -> a -> a
.&.) [b
byte, b -> Int -> b
forall a. Bits a => a -> Int -> a
shiftR b
byte Int
2, b -> Int -> b
forall a. Bits a => a -> Int -> a
shiftR b
byte Int
4, b -> Int -> b
forall a. Bits a => a -> Int -> a
shiftR b
byte Int
6]
    bitPairToGenotype :: a -> GenoEntry
bitPairToGenotype a
0b00000000 = GenoEntry
HomRef
    bitPairToGenotype a
0b00000010 = GenoEntry
Het
    bitPairToGenotype a
0b00000011 = GenoEntry
HomAlt
    bitPairToGenotype a
0b00000001 = GenoEntry
Missing
    bitPairToGenotype a
_          = String -> GenoEntry
forall a. HasCallStack => String -> a
error String
"This should never happen"

readPlinkBedProd :: (MonadThrow m) => Int -> Producer B.ByteString m () -> m (Producer GenoLine m ())
readPlinkBedProd :: forall (m :: * -> *).
MonadThrow m =>
Int -> Producer ByteString m () -> m (Producer GenoLine m ())
readPlinkBedProd Int
nrInds Producer ByteString m ()
prod = do
    (Maybe (Either ParsingError ())
res, Producer ByteString m ()
rest) <- StateT
  (Producer ByteString m ()) m (Maybe (Either ParsingError ()))
-> Producer ByteString m ()
-> m (Maybe (Either ParsingError ()), Producer ByteString m ())
forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT (Parser ByteString ()
-> Parser ByteString m (Maybe (Either ParsingError ()))
forall (m :: * -> *) a b.
(Monad m, ParserInput a) =>
Parser a b -> Parser a m (Maybe (Either ParsingError b))
parse Parser ByteString ()
bedHeaderParser) Producer ByteString m ()
prod
    ()
_ <- case Maybe (Either ParsingError ())
res of
        Maybe (Either ParsingError ())
Nothing -> ParsingError -> m ()
forall e a. (HasCallStack, Exception e) => e -> m a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM (ParsingError -> m ()) -> ParsingError -> m ()
forall a b. (a -> b) -> a -> b
$ [String] -> String -> ParsingError
ParsingError [] String
"Bed file exhausted prematurely"
        Just (Left ParsingError
e) -> ParsingError -> m ()
forall e a. (HasCallStack, Exception e) => e -> m a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM ParsingError
e
        Just (Right ()
h) -> () -> m ()
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
h
    Producer GenoLine m () -> m (Producer GenoLine m ())
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer GenoLine m () -> m (Producer GenoLine m ()))
-> Producer GenoLine m () -> m (Producer GenoLine m ())
forall a b. (a -> b) -> a -> b
$ Parser GenoLine
-> Producer ByteString m () -> Producer GenoLine m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer (Int -> Parser GenoLine
bedGenotypeParser Int
nrInds) Producer ByteString m ()
rest

-- |A function to read a bed file from a file. Returns a Producer over all lines.
readPlinkBedFile :: (MonadSafe m) => FilePath -> Int -> m (Producer GenoLine m ())
readPlinkBedFile :: forall (m :: * -> *).
MonadSafe m =>
String -> Int -> m (Producer GenoLine m ())
readPlinkBedFile String
file Int
nrInds = Int -> Producer ByteString m () -> m (Producer GenoLine m ())
forall (m :: * -> *).
MonadThrow m =>
Int -> Producer ByteString m () -> m (Producer GenoLine m ())
readPlinkBedProd Int
nrInds (String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
file)

-- |Function to read a Bim File from StdIn. Returns a Pipes-Producer over the EigenstratSnpEntries.
readBimStdIn :: (MonadThrow m, MonadIO m) => Producer EigenstratSnpEntry m ()
readBimStdIn :: forall (m :: * -> *).
(MonadThrow m, MonadIO m) =>
Producer EigenstratSnpEntry m ()
readBimStdIn = Parser EigenstratSnpEntry
-> Producer ByteString m () -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratSnpEntry
bimParser Producer ByteString m ()
Producer' ByteString m ()
forall (m :: * -> *). MonadIO m => Producer' ByteString m ()
PB.stdin

-- |Function to read a Bim File from a file. Returns a Pipes-Producer over the EigenstratSnpEntries.
readBimFile :: (MonadSafe m) => FilePath -> Producer EigenstratSnpEntry m ()
readBimFile :: forall (m :: * -> *).
MonadSafe m =>
String -> Producer EigenstratSnpEntry m ()
readBimFile = Parser EigenstratSnpEntry
-> Producer ByteString m () -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser EigenstratSnpEntry
bimParser (Producer ByteString m () -> Producer EigenstratSnpEntry m ())
-> (String -> Producer ByteString m ())
-> String
-> Producer EigenstratSnpEntry m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd

-- |Function to read a Plink fam file. Returns the Eigenstrat Individual Entries as list.
readFamFile :: (MonadIO m) => FilePath -> m [PlinkFamEntry]
readFamFile :: forall (m :: * -> *). MonadIO m => String -> m [PlinkFamEntry]
readFamFile String
fn =
    IO [PlinkFamEntry] -> m [PlinkFamEntry]
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [PlinkFamEntry] -> m [PlinkFamEntry])
-> ((Handle -> IO [PlinkFamEntry]) -> IO [PlinkFamEntry])
-> (Handle -> IO [PlinkFamEntry])
-> m [PlinkFamEntry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String
-> IOMode -> (Handle -> IO [PlinkFamEntry]) -> IO [PlinkFamEntry]
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
fn IOMode
ReadMode ((Handle -> IO [PlinkFamEntry]) -> m [PlinkFamEntry])
-> (Handle -> IO [PlinkFamEntry]) -> m [PlinkFamEntry]
forall a b. (a -> b) -> a -> b
$ \Handle
handle ->
        Producer PlinkFamEntry IO () -> IO [PlinkFamEntry]
forall (m :: * -> *) a. Monad m => Producer a m () -> m [a]
P.toListM (Producer PlinkFamEntry IO () -> IO [PlinkFamEntry])
-> Producer PlinkFamEntry IO () -> IO [PlinkFamEntry]
forall a b. (a -> b) -> a -> b
$ Parser PlinkFamEntry
-> Producer ByteString IO () -> Producer PlinkFamEntry IO ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser PlinkFamEntry
famParser (Handle -> Producer' ByteString IO ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
PB.fromHandle Handle
handle)

-- |Function to read a full Plink dataset from files. Returns a pair of the Plink Individual Entries, and a joint Producer over the snp entries and the genotypes.
readPlink :: (MonadSafe m) => FilePath -- ^The Bed file
               -> FilePath -- ^The Bim File
               -> FilePath -- ^The Fam file
               -> m ([PlinkFamEntry], Producer (EigenstratSnpEntry, GenoLine) m ()) -- The return pair of individual entries and a joint Snp/Geno Producer.
readPlink :: forall (m :: * -> *).
MonadSafe m =>
String
-> String
-> String
-> m ([PlinkFamEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
readPlink String
bedFile String
bimFile String
famFile = do
    [PlinkFamEntry]
indEntries <- String -> m [PlinkFamEntry]
forall (m :: * -> *). MonadIO m => String -> m [PlinkFamEntry]
readFamFile String
famFile
    let nrInds :: Int
nrInds = [PlinkFamEntry] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [PlinkFamEntry]
indEntries
        snpProd :: Producer EigenstratSnpEntry m ()
snpProd = String -> Producer EigenstratSnpEntry m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer EigenstratSnpEntry m ()
readBimFile String
bimFile
    Producer GenoLine m ()
genoProd <- String -> Int -> m (Producer GenoLine m ())
forall (m :: * -> *).
MonadSafe m =>
String -> Int -> m (Producer GenoLine m ())
readPlinkBedFile String
bedFile Int
nrInds
    ([PlinkFamEntry], Producer (EigenstratSnpEntry, GenoLine) m ())
-> m ([PlinkFamEntry],
      Producer (EigenstratSnpEntry, GenoLine) m ())
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([PlinkFamEntry]
indEntries, Producer EigenstratSnpEntry m ()
-> Producer GenoLine m ()
-> Producer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a r b x' x.
Monad m =>
Producer a m r -> Producer b m r -> Proxy x' x () (a, b) m r
P.zip Producer EigenstratSnpEntry m ()
snpProd Producer GenoLine m ()
genoProd)

-- |Function to write a Bim file. Returns a consumer expecting EigenstratSnpEntries.
writeBim :: (MonadIO m) => Handle -- ^The Eigenstrat Snp File handle.
    -> Consumer EigenstratSnpEntry m () -- ^A consumer to read EigenstratSnpEntries
writeBim :: forall (m :: * -> *).
MonadIO m =>
Handle -> Consumer EigenstratSnpEntry m ()
writeBim Handle
snpFileH =
    let snpOutTextConsumer :: Proxy () ByteString y' y m r
snpOutTextConsumer = Handle -> Consumer' ByteString m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
snpFileH
        toTextPipe :: Pipe EigenstratSnpEntry ByteString m r
toTextPipe = (EigenstratSnpEntry -> ByteString)
-> Pipe EigenstratSnpEntry ByteString m r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (\(EigenstratSnpEntry Chrom
chrom Int
pos Double
gpos ByteString
gid Char
ref Char
alt) ->
            let bimLine :: ByteString
bimLine = ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"\t" [Chrom -> ByteString
unChrom Chrom
chrom, ByteString
gid, String -> ByteString
B.pack (Double -> String
forall a. Show a => a -> String
show Double
gpos),
                    String -> ByteString
B.pack (Int -> String
forall a. Show a => a -> String
show Int
pos), Char -> ByteString
B.singleton Char
ref, Char -> ByteString
B.singleton Char
alt]
            in  ByteString
bimLine ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n")
    in  Pipe EigenstratSnpEntry ByteString m ()
forall {r}. Pipe EigenstratSnpEntry ByteString m r
toTextPipe Pipe EigenstratSnpEntry ByteString m ()
-> Proxy () ByteString () X m ()
-> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X m ()
forall {y'} {y} {r}. Proxy () ByteString y' y m r
snpOutTextConsumer

-- |Function to write a Plink Fam file.
writeFam :: (MonadIO m) => FilePath -> [PlinkFamEntry] -> m ()
writeFam :: forall (m :: * -> *).
MonadIO m =>
String -> [PlinkFamEntry] -> m ()
writeFam String
f [PlinkFamEntry]
indEntries =
    IO () -> m ()
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ())
-> ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
f IOMode
WriteMode ((Handle -> IO ()) -> m ()) -> (Handle -> IO ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Handle
h ->
        [PlinkFamEntry] -> (PlinkFamEntry -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [PlinkFamEntry]
indEntries ((PlinkFamEntry -> IO ()) -> IO ())
-> (PlinkFamEntry -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \(PlinkFamEntry String
famId String
indId String
fatherId String
motherId Sex
sex String
phen) ->
            Handle -> String -> IO ()
hPutStrLn Handle
h (String -> IO ()) -> ([String] -> String) -> [String] -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"\t" ([String] -> IO ()) -> [String] -> IO ()
forall a b. (a -> b) -> a -> b
$ [String
famId, String
indId, String
fatherId, String
motherId, Sex -> String
forall {a}. IsString a => Sex -> a
sexToStr Sex
sex, String
phen]
  where
    sexToStr :: Sex -> a
sexToStr Sex
sex = case Sex
sex of
        Sex
Male    -> a
"1"
        Sex
Female  -> a
"2"
        Sex
Unknown -> a
"0"

-- |Function to write an Eigentrat Geno File. Returns a consumer expecting Eigenstrat Genolines.
writeBed :: (MonadIO m) => Handle -- ^The Bed file handle
                -> Consumer GenoLine m () -- ^A consumer to read Genotype entries.
writeBed :: forall (m :: * -> *). MonadIO m => Handle -> Consumer GenoLine m ()
writeBed Handle
bedFileH = do
    IO () -> Consumer GenoLine m ()
forall a. IO a -> Proxy () GenoLine () X m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Consumer GenoLine m ())
-> IO () -> Consumer GenoLine m ()
forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BB.hPut Handle
bedFileH ([Word8] -> ByteString
BB.pack [Word8
0b01101100, Word8
0b00011011, Word8
0b00000001])
    let bedOutConsumer :: Proxy () ByteString y' y m r
bedOutConsumer = Handle -> Consumer' ByteString m r
forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
bedFileH
        toPlinkPipe :: Pipe GenoLine ByteString m r
toPlinkPipe = (GenoLine -> ByteString) -> Pipe GenoLine ByteString m r
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map ([Word8] -> ByteString
BB.pack ([Word8] -> ByteString)
-> (GenoLine -> [Word8]) -> GenoLine -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GenoLine -> [Word8]
genoLineToBytes)
    Pipe GenoLine ByteString m ()
forall {r}. Pipe GenoLine ByteString m r
toPlinkPipe Pipe GenoLine ByteString m ()
-> Proxy () ByteString () X m () -> Consumer GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X m ()
forall {y'} {y} {r}. Proxy () ByteString y' y m r
bedOutConsumer
  where
    genoLineToBytes :: GenoLine -> [Word8]
    genoLineToBytes :: GenoLine -> [Word8]
genoLineToBytes GenoLine
genoLine = [GenoEntry] -> [Word8]
go (GenoLine -> [GenoEntry]
forall a. Vector a -> [a]
toList GenoLine
genoLine)
      where
        go :: [GenoEntry] -> [Word8]
        go :: [GenoEntry] -> [Word8]
go []                         = [] -- empty list for recursion stop
        go (GenoEntry
g1 : GenoEntry
g2 : GenoEntry
g3 : GenoEntry
g4 : [GenoEntry]
rest) = [GenoEntry] -> Word8
constructByte [GenoEntry
g1, GenoEntry
g2, GenoEntry
g3, GenoEntry
g4] Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: [GenoEntry] -> [Word8]
go [GenoEntry]
rest -- at least 5 entries -> more than 1 byte
        go [GenoEntry]
genoEntries                = [[GenoEntry] -> Word8
constructByte [GenoEntry]
genoEntries] -- four or less entries -> 1 byte
        constructByte :: [GenoEntry] -> Word8
        constructByte :: [GenoEntry] -> Word8
constructByte []     = String -> Word8
forall a. HasCallStack => String -> a
error String
"constructByte - should never happen"
        constructByte [GenoEntry
g]    = GenoEntry -> Word8
genoEntryToByte GenoEntry
g
        constructByte (GenoEntry
g:[GenoEntry]
gs) = Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftL ([GenoEntry] -> Word8
constructByte [GenoEntry]
gs) Int
2 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. GenoEntry -> Word8
genoEntryToByte GenoEntry
g

genoEntryToByte :: GenoEntry -> Word8
genoEntryToByte :: GenoEntry -> Word8
genoEntryToByte GenoEntry
HomRef  = Word8
0b00000000
genoEntryToByte GenoEntry
HomAlt  = Word8
0b00000011
genoEntryToByte GenoEntry
Het     = Word8
0b00000010
genoEntryToByte GenoEntry
Missing = Word8
0b00000001

-- |Function to write a Plink Database. Returns a consumer expecting joint Snp- and Genotype lines.
writePlink :: (MonadSafe m) => FilePath -- ^The Bed file
                -> FilePath -- ^The Bim File
                -> FilePath -- ^The Fam file
                -> [PlinkFamEntry] -- ^The list of individual entries
                -> Consumer (EigenstratSnpEntry, GenoLine) m () -- ^A consumer to read joint Snp/Genotype entries.
writePlink :: forall (m :: * -> *).
MonadSafe m =>
String
-> String
-> String
-> [PlinkFamEntry]
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
writePlink String
bedFile String
bimFile String
famFile [PlinkFamEntry]
indEntries = do
    IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall a. IO a -> Proxy () (EigenstratSnpEntry, GenoLine) () X m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ())
-> IO () -> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall a b. (a -> b) -> a -> b
$ String -> [PlinkFamEntry] -> IO ()
forall (m :: * -> *).
MonadIO m =>
String -> [PlinkFamEntry] -> m ()
writeFam String
famFile [PlinkFamEntry]
indEntries
    let bimOutConsumer :: Proxy () EigenstratSnpEntry () X m ()
bimOutConsumer = String
-> IOMode
-> (Handle -> Proxy () EigenstratSnpEntry () X m ())
-> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
bimFile IOMode
WriteMode Handle -> Proxy () EigenstratSnpEntry () X m ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Consumer EigenstratSnpEntry m ()
writeBim
        bedOutConsumer :: Proxy () GenoLine () X m ()
bedOutConsumer = String
-> IOMode
-> (Handle -> Proxy () GenoLine () X m ())
-> Proxy () GenoLine () X m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
bedFile IOMode
WriteMode Handle -> Proxy () GenoLine () X m ()
forall (m :: * -> *). MonadIO m => Handle -> Consumer GenoLine m ()
writeBed
    Consumer (EigenstratSnpEntry, GenoLine) m ()
-> Pipe
     (EigenstratSnpEntry, GenoLine) (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a r. Monad m => Consumer a m r -> Pipe a a m r
P.tee (((EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry)
-> Pipe (EigenstratSnpEntry, GenoLine) EigenstratSnpEntry m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (EigenstratSnpEntry, GenoLine) -> EigenstratSnpEntry
forall a b. (a, b) -> a
fst Pipe (EigenstratSnpEntry, GenoLine) EigenstratSnpEntry m ()
-> Proxy () EigenstratSnpEntry () X m ()
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () EigenstratSnpEntry () X m ()
bimOutConsumer) Pipe
  (EigenstratSnpEntry, GenoLine) (EigenstratSnpEntry, GenoLine) m ()
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> ((EigenstratSnpEntry, GenoLine) -> GenoLine)
-> Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map (EigenstratSnpEntry, GenoLine) -> GenoLine
forall a b. (a, b) -> b
snd Proxy () (EigenstratSnpEntry, GenoLine) () GenoLine m ()
-> Proxy () GenoLine () X m ()
-> Consumer (EigenstratSnpEntry, GenoLine) m ()
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () GenoLine () X m ()
bedOutConsumer