{-# LANGUAGE OverloadedStrings #-}

{-| Module to parse and write freqSum files. The freqsum format is defined here:
<https://rarecoal-docs.readthedocs.io/en/latest/rarecoal-tools.html#vcf2freqsum>
-}

module SequenceFormats.FreqSum (readFreqSumStdIn, readFreqSumFile, FreqSumEntry(..),  
    FreqSumHeader(..), printFreqSumStdOut, printFreqSumFile, freqSumEntryToText) where

import SequenceFormats.Utils (consumeProducer, Chrom(..), readFileProd)

import Control.Applicative ((<|>))
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.State.Strict (runStateT)
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.Char (isAlphaNum, isSpace)
import qualified Data.ByteString.Char8 as B
import Pipes (Producer, (>->), Consumer)
import Pipes.Attoparsec (parse, ParsingError(..))
import qualified Pipes.Prelude as P
import Pipes.Safe (MonadSafe)
import Pipes.Safe.Prelude (withFile)
import qualified Pipes.ByteString as PB
import Prelude hiding (putStr)
import System.IO (IOMode(..))

-- |A Datatype representing the Header
data FreqSumHeader = FreqSumHeader {
    FreqSumHeader -> [String]
fshNames :: [String], -- ^A list of individual or group names
    FreqSumHeader -> [Int]
fshCounts :: [Int] -- ^A list of haplotype counts per individual/group.
} deriving (FreqSumHeader -> FreqSumHeader -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: FreqSumHeader -> FreqSumHeader -> Bool
$c/= :: FreqSumHeader -> FreqSumHeader -> Bool
== :: FreqSumHeader -> FreqSumHeader -> Bool
$c== :: FreqSumHeader -> FreqSumHeader -> Bool
Eq, Int -> FreqSumHeader -> ShowS
[FreqSumHeader] -> ShowS
FreqSumHeader -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FreqSumHeader] -> ShowS
$cshowList :: [FreqSumHeader] -> ShowS
show :: FreqSumHeader -> String
$cshow :: FreqSumHeader -> String
showsPrec :: Int -> FreqSumHeader -> ShowS
$cshowsPrec :: Int -> FreqSumHeader -> ShowS
Show)

freqSumHeaderToText :: FreqSumHeader -> B.ByteString
freqSumHeaderToText :: FreqSumHeader -> ByteString
freqSumHeaderToText (FreqSumHeader [String]
names [Int]
nCounts) =
    ByteString
"#CHROM\tPOS\tREF\tALT\t" forall a. Semigroup a => a -> a -> a
<> ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"\t" [ByteString]
tuples forall a. Semigroup a => a -> a -> a
<> ByteString
"\n"
  where
    tuples :: [ByteString]
tuples = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\String
n Int
c -> String -> ByteString
B.pack String
n forall a. Semigroup a => a -> a -> a
<> ByteString
"(" forall a. Semigroup a => a -> a -> a
<> String -> ByteString
B.pack (forall a. Show a => a -> String
show Int
c) forall a. Semigroup a => a -> a -> a
<> ByteString
")") [String]
names [Int]
nCounts

-- |A Datatype to denote a single freqSum line
data FreqSumEntry = FreqSumEntry {
    FreqSumEntry -> Chrom
fsChrom  :: Chrom, -- ^The chromosome of the site
    FreqSumEntry -> Int
fsPos    :: Int, -- ^The position of the site
    FreqSumEntry -> Maybe ByteString
fsSnpId  :: Maybe B.ByteString, -- ^An optional parameter to take the snpId. This is not parsed from or printed to freqSum format but is used in internal conversions from Eigenstrat.
    FreqSumEntry -> Maybe Double
fsGeneticPos :: Maybe Double, -- ^An optional parameter to take the genetic pos. This is not parsed from or printed to freqSum format but is used in internal conversions from Eigenstrat.
    FreqSumEntry -> Char
fsRef    :: Char, -- ^The reference allele
    FreqSumEntry -> Char
fsAlt    :: Char, -- ^The alternative allele
    FreqSumEntry -> [Maybe Int]
fsCounts :: [Maybe Int] -- ^A list of allele counts in each group. Nothing denotes missing data.
} deriving (FreqSumEntry -> FreqSumEntry -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: FreqSumEntry -> FreqSumEntry -> Bool
$c/= :: FreqSumEntry -> FreqSumEntry -> Bool
== :: FreqSumEntry -> FreqSumEntry -> Bool
$c== :: FreqSumEntry -> FreqSumEntry -> Bool
Eq, Int -> FreqSumEntry -> ShowS
[FreqSumEntry] -> ShowS
FreqSumEntry -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FreqSumEntry] -> ShowS
$cshowList :: [FreqSumEntry] -> ShowS
show :: FreqSumEntry -> String
$cshow :: FreqSumEntry -> String
showsPrec :: Int -> FreqSumEntry -> ShowS
$cshowsPrec :: Int -> FreqSumEntry -> ShowS
Show)

-- |This function converts a single freqSum entry to a printable freqSum line.
freqSumEntryToText :: FreqSumEntry -> B.ByteString
freqSumEntryToText :: FreqSumEntry -> ByteString
freqSumEntryToText (FreqSumEntry Chrom
chrom Int
pos Maybe ByteString
_ Maybe Double
_ Char
ref Char
alt [Maybe Int]
maybeCounts) =
    ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"\t" [Chrom -> ByteString
unChrom Chrom
chrom, String -> ByteString
B.pack (forall a. Show a => a -> String
show Int
pos), Char -> ByteString
B.singleton Char
ref, Char -> ByteString
B.singleton Char
alt, ByteString
countStr] forall a. Semigroup a => a -> a -> a
<> ByteString
"\n"
  where
    countStr :: ByteString
countStr = ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"\t" forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (String -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {a}. Num a => Maybe a -> a
convertToNum) forall a b. (a -> b) -> a -> b
$ [Maybe Int]
maybeCounts 
    convertToNum :: Maybe a -> a
convertToNum Maybe a
Nothing = -a
1
    convertToNum (Just a
a) = a
a

readFreqSumProd :: (MonadThrow m) =>
    Producer B.ByteString m () -> m (FreqSumHeader, Producer FreqSumEntry m ())
readFreqSumProd :: forall (m :: * -> *).
MonadThrow m =>
Producer ByteString m ()
-> m (FreqSumHeader, Producer FreqSumEntry m ())
readFreqSumProd Producer ByteString m ()
prod = do
    (Maybe (Either ParsingError FreqSumHeader)
res, Producer ByteString m ()
rest) <- forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT (forall (m :: * -> *) a b.
(Monad m, ParserInput a) =>
Parser a b -> Parser a m (Maybe (Either ParsingError b))
parse Parser FreqSumHeader
parseFreqSumHeader) Producer ByteString m ()
prod
    FreqSumHeader
header <- case Maybe (Either ParsingError FreqSumHeader)
res of
        Maybe (Either ParsingError FreqSumHeader)
Nothing -> forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ [String] -> String -> ParsingError
ParsingError [] String
"freqSum file exhausted"
        Just (Left ParsingError
e) -> forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM ParsingError
e
        Just (Right FreqSumHeader
h) -> forall (m :: * -> *) a. Monad m => a -> m a
return FreqSumHeader
h
    forall (m :: * -> *) a. Monad m => a -> m a
return (FreqSumHeader
header, forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser FreqSumEntry
parseFreqSumEntry Producer ByteString m ()
rest)

-- |A function to read a freqsum file from StdIn. Returns a pair of a freqSum Header and a Producer over all lines.
readFreqSumStdIn :: (MonadIO m, MonadThrow m) => m (FreqSumHeader, Producer FreqSumEntry m ())
readFreqSumStdIn :: forall (m :: * -> *).
(MonadIO m, MonadThrow m) =>
m (FreqSumHeader, Producer FreqSumEntry m ())
readFreqSumStdIn = forall (m :: * -> *).
MonadThrow m =>
Producer ByteString m ()
-> m (FreqSumHeader, Producer FreqSumEntry m ())
readFreqSumProd forall (m :: * -> *). MonadIO m => Producer' ByteString m ()
PB.stdin

-- |A function to read a freqsum file from a file. Returns a pair of a freqSum Header and a Producer over all lines.
readFreqSumFile :: (MonadSafe m) => FilePath -> m (FreqSumHeader, Producer FreqSumEntry m ())
readFreqSumFile :: forall (m :: * -> *).
MonadSafe m =>
String -> m (FreqSumHeader, Producer FreqSumEntry m ())
readFreqSumFile = forall (m :: * -> *).
MonadThrow m =>
Producer ByteString m ()
-> m (FreqSumHeader, Producer FreqSumEntry m ())
readFreqSumProd forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd

parseFreqSumHeader :: A.Parser FreqSumHeader
parseFreqSumHeader :: Parser FreqSumHeader
parseFreqSumHeader = do
    [(ByteString, Int)]
tuples <- ByteString -> Parser ByteString
A.string ByteString
"#CHROM\tPOS\tREF\tALT\t" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a s. MonadPlus m => m a -> m s -> m [a]
A.sepBy' Parser ByteString (ByteString, Int)
tuple Parser Char
A.space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.endOfLine
    let names :: [ByteString]
names = forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(ByteString, Int)]
tuples
        counts :: [Int]
counts = forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(ByteString, Int)]
tuples
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [String] -> [Int] -> FreqSumHeader
FreqSumHeader (forall a b. (a -> b) -> [a] -> [b]
map ByteString -> String
B.unpack [ByteString]
names) [Int]
counts
  where
    tuple :: Parser ByteString (ByteString, Int)
tuple = (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
A.takeWhile (\Char
c -> Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'-') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
A.char Char
'(' forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
A.char Char
')'

parseFreqSumEntry :: A.Parser FreqSumEntry
parseFreqSumEntry :: Parser FreqSumEntry
parseFreqSumEntry = Chrom
-> Int
-> Maybe ByteString
-> Maybe Double
-> Char
-> Char
-> [Maybe Int]
-> FreqSumEntry
FreqSumEntry forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Chrom
Chrom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
A.takeTill Char -> Bool
isSpace) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.skipSpace forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*
    Parser ()
A.skipSpace forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char
base forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.skipSpace forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char
baseOrDot forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.skipSpace forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString [Maybe Int]
counts forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.endOfLine
  where
    counts :: Parser ByteString [Maybe Int]
counts = (forall {a}. Parser ByteString (Maybe a)
parseMissing forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString (Maybe Int)
parseCount) forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy` Char -> Parser Char
A.char Char
'\t'
    parseMissing :: Parser ByteString (Maybe a)
parseMissing = ByteString -> Parser ByteString
A.string ByteString
"-1" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing
    parseCount :: Parser ByteString (Maybe Int)
parseCount = forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Integral a => Parser a
A.decimal
    base :: Parser Char
base = (Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTGN")
    baseOrDot :: Parser Char
baseOrDot = (Char -> Bool) -> Parser Char
A.satisfy (String -> Char -> Bool
A.inClass String
"ACTG.")

-- |A function to write freqSum data to StdOut. Expects the freqSum header as argument, and then returns a Consumer that accepts freqSum entries.
printFreqSumStdOut :: (MonadIO m) => FreqSumHeader -> Consumer FreqSumEntry m ()
printFreqSumStdOut :: forall (m :: * -> *).
MonadIO m =>
FreqSumHeader -> Consumer FreqSumEntry m ()
printFreqSumStdOut FreqSumHeader
fsh = do
    forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> IO ()
B.putStr forall b c a. (b -> c) -> (a -> b) -> a -> c
. FreqSumHeader -> ByteString
freqSumHeaderToText forall a b. (a -> b) -> a -> b
$ FreqSumHeader
fsh
    forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map FreqSumEntry -> ByteString
freqSumEntryToText forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> forall (m :: * -> *). MonadIO m => Consumer' ByteString m ()
PB.stdout

-- |A function that writes a freqSum file. Expects the FilePath and the freqSum header as arguments, and then returns a Consumer that accepts freqSum entries.
printFreqSumFile :: (MonadSafe m) => FilePath -> FreqSumHeader -> Consumer FreqSumEntry m ()
printFreqSumFile :: forall (m :: * -> *).
MonadSafe m =>
String -> FreqSumHeader -> Consumer FreqSumEntry m ()
printFreqSumFile String
outFile FreqSumHeader
fsh = forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
withFile String
outFile IOMode
WriteMode forall {m :: * -> *} {c'} {c} {b}.
MonadIO m =>
Handle -> Proxy () FreqSumEntry c' c m b
go
  where
    go :: Handle -> Proxy () FreqSumEntry c' c m b
go Handle
h = do
        forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> ByteString -> IO ()
B.hPutStr Handle
h forall b c a. (b -> c) -> (a -> b) -> a -> c
. FreqSumHeader -> ByteString
freqSumHeaderToText forall a b. (a -> b) -> a -> b
$ FreqSumHeader
fsh
        forall (m :: * -> *) a b r. Functor m => (a -> b) -> Pipe a b m r
P.map FreqSumEntry -> ByteString
freqSumEntryToText forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> forall (m :: * -> *) r.
MonadIO m =>
Handle -> Consumer' ByteString m r
PB.toHandle Handle
h