{-# LANGUAGE OverloadedStrings #-}
module SequenceTools.Utils (versionInfoOpt, versionInfoText, sampleWithoutReplacement,
    freqSumToEigenstrat, dosageToEigenstratGeno) where 

import SequenceFormats.FreqSum (FreqSumEntry(..))
import SequenceFormats.Eigenstrat (EigenstratSnpEntry(..), GenoLine, GenoEntry(..))
import SequenceFormats.Utils (Chrom(..))

import qualified Data.ByteString.Char8 as B
import Data.Vector (fromList)
import Data.Version (showVersion)
import qualified Options.Applicative as OP
import Paths_sequenceTools (version)
import System.Random (randomRIO)

versionInfoOpt :: OP.Parser (a -> a)
versionInfoOpt :: Parser (a -> a)
versionInfoOpt = String -> Mod OptionFields (a -> a) -> Parser (a -> a)
forall a. String -> Mod OptionFields (a -> a) -> Parser (a -> a)
OP.infoOption (Version -> String
showVersion Version
version) (String -> Mod OptionFields (a -> a)
forall (f :: * -> *) a. HasName f => String -> Mod f a
OP.long String
"version" Mod OptionFields (a -> a)
-> Mod OptionFields (a -> a) -> Mod OptionFields (a -> a)
forall a. Semigroup a => a -> a -> a
<> String -> Mod OptionFields (a -> a)
forall (f :: * -> *) a. String -> Mod f a
OP.help String
"Print version and exit")

versionInfoText :: String
versionInfoText :: String
versionInfoText = String
"This tool is part of sequenceTools version " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Version -> String
showVersion Version
version

sampleWithoutReplacement :: [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement :: [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement = [a] -> [a] -> Int -> IO (Maybe [a])
forall (m :: * -> *) a.
MonadIO m =>
[a] -> [a] -> Int -> m (Maybe [a])
go []
  where
    go :: [a] -> [a] -> Int -> m (Maybe [a])
go [a]
res [a]
_ Int
0 = Maybe [a] -> m (Maybe [a])
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe [a] -> m (Maybe [a])) -> Maybe [a] -> m (Maybe [a])
forall a b. (a -> b) -> a -> b
$ [a] -> Maybe [a]
forall a. a -> Maybe a
Just [a]
res
    go [a]
res [a]
xs Int
n
        | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs = Maybe [a] -> m (Maybe [a])
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe [a]
forall a. Maybe a
Nothing
        | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs = Maybe [a] -> m (Maybe [a])
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe [a] -> m (Maybe [a])) -> Maybe [a] -> m (Maybe [a])
forall a b. (a -> b) -> a -> b
$ [a] -> Maybe [a]
forall a. a -> Maybe a
Just ([a]
xs [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a]
res)
        | Bool
otherwise = do
                Int
rn <- (Int, Int) -> m Int
forall a (m :: * -> *). (Random a, MonadIO m) => (a, a) -> m a
randomRIO (Int
0, [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                let a :: a
a = [a]
xs [a] -> Int -> a
forall a. [a] -> Int -> a
!! Int
rn
                    xs' :: [a]
xs' = Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
remove Int
rn [a]
xs
                [a] -> [a] -> Int -> m (Maybe [a])
go (a
aa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
res) [a]
xs' (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
    remove :: Int -> [a] -> [a]
remove Int
i [a]
xs = let ([a]
ys, [a]
zs) = Int -> [a] -> ([a], [a])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
i [a]
xs in [a]
ys [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a] -> [a]
forall a. [a] -> [a]
tail [a]
zs

-- |convert a freqSum entry to an eigenstrat SNP entry
freqSumToEigenstrat :: Bool -> FreqSumEntry -> (EigenstratSnpEntry, GenoLine)
freqSumToEigenstrat :: Bool -> FreqSumEntry -> (EigenstratSnpEntry, GenoLine)
freqSumToEigenstrat Bool
diploidizeCall (FreqSumEntry chrom :: Chrom
chrom@(Chrom ByteString
c) Int
pos Maybe ByteString
maybeSnpId Maybe Double
maybeGeneticPos Char
ref Char
alt [Maybe Int]
calls) =
    let snpId_ :: ByteString
snpId_ = case Maybe ByteString
maybeSnpId of 
            Just ByteString
id_ -> ByteString
id_
            Maybe ByteString
Nothing -> ByteString
c ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"_" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> String -> ByteString
B.pack (Int -> String
forall a. Show a => a -> String
show Int
pos)
        geneticPos :: Double
geneticPos = case Maybe Double
maybeGeneticPos of
            Just Double
p -> Double
p
            Maybe Double
Nothing -> Double
0.0
        snpEntry :: EigenstratSnpEntry
snpEntry = Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
geneticPos ByteString
snpId_ Char
ref Char
alt
        geno :: GenoLine
geno = [GenoEntry] -> GenoLine
forall a. [a] -> Vector a
fromList ([GenoEntry] -> GenoLine)
-> ([Maybe Int] -> [GenoEntry]) -> [Maybe Int] -> GenoLine
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe Int -> GenoEntry) -> [Maybe Int] -> [GenoEntry]
forall a b. (a -> b) -> [a] -> [b]
map (Bool -> Maybe Int -> GenoEntry
dosageToEigenstratGeno Bool
diploidizeCall) ([Maybe Int] -> GenoLine) -> [Maybe Int] -> GenoLine
forall a b. (a -> b) -> a -> b
$ [Maybe Int]
calls
    in  (EigenstratSnpEntry
snpEntry, GenoLine
geno)

-- |convert a Dosage to an eigenstrat-encoded genotype
dosageToEigenstratGeno :: Bool -> Maybe Int -> GenoEntry
dosageToEigenstratGeno :: Bool -> Maybe Int -> GenoEntry
dosageToEigenstratGeno Bool
diploidizeCall Maybe Int
c =
    if Bool
diploidizeCall then
        case Maybe Int
c of
            Just Int
0 -> GenoEntry
HomRef
            Just Int
1 -> GenoEntry
HomAlt
            Maybe Int
Nothing -> GenoEntry
Missing
            Maybe Int
_ -> String -> GenoEntry
forall a. HasCallStack => String -> a
error String
"illegal call for pseudo-haploid Calling method"
    else
        case Maybe Int
c of
            Just Int
0 -> GenoEntry
HomRef
            Just Int
1 -> GenoEntry
Het
            Just Int
2 -> GenoEntry
HomAlt
            Maybe Int
Nothing -> GenoEntry
Missing
            Maybe Int
_ -> String -> GenoEntry
forall a. HasCallStack => String -> a
error (String
"unknown genotype " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Maybe Int -> String
forall a. Show a => a -> String
show Maybe Int
c)