{-# LANGUAGE OverloadedStrings #-}
module SequenceTools.Utils (versionInfoOpt, versionInfoText, sampleWithoutReplacement,
freqSumToEigenstrat, dosageToEigenstratGeno) where
import SequenceFormats.FreqSum (FreqSumEntry(..))
import SequenceFormats.Eigenstrat (EigenstratSnpEntry(..), GenoLine, GenoEntry(..))
import SequenceFormats.Utils (Chrom(..))
import qualified Data.ByteString.Char8 as B
import Data.Vector (fromList)
import Data.Version (showVersion)
import qualified Options.Applicative as OP
import Paths_sequenceTools (version)
import System.Random (randomRIO)
versionInfoOpt :: OP.Parser (a -> a)
versionInfoOpt :: forall a. Parser (a -> a)
versionInfoOpt = forall a. [Char] -> Mod OptionFields (a -> a) -> Parser (a -> a)
OP.infoOption (Version -> [Char]
showVersion Version
version) (forall (f :: * -> *) a. HasName f => [Char] -> Mod f a
OP.long [Char]
"version" forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. [Char] -> Mod f a
OP.help [Char]
"Print version and exit")
versionInfoText :: String
versionInfoText :: [Char]
versionInfoText = [Char]
"This tool is part of sequenceTools version " forall a. [a] -> [a] -> [a]
++ Version -> [Char]
showVersion Version
version
sampleWithoutReplacement :: [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement :: forall a. [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement = forall {m :: * -> *} {a}.
MonadIO m =>
[a] -> [a] -> Int -> m (Maybe [a])
go []
where
go :: [a] -> [a] -> Int -> m (Maybe [a])
go [a]
res [a]
_ Int
0 = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just [a]
res
go [a]
res [a]
xs Int
n
| Int
n forall a. Ord a => a -> a -> Bool
> forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
| Int
n forall a. Eq a => a -> a -> Bool
== forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just ([a]
xs forall a. [a] -> [a] -> [a]
++ [a]
res)
| Bool
otherwise = do
Int
rn <- forall a (m :: * -> *). (Random a, MonadIO m) => (a, a) -> m a
randomRIO (Int
0, forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs forall a. Num a => a -> a -> a
- Int
1)
let a :: a
a = [a]
xs forall a. [a] -> Int -> a
!! Int
rn
xs' :: [a]
xs' = forall {a}. Int -> [a] -> [a]
remove Int
rn [a]
xs
[a] -> [a] -> Int -> m (Maybe [a])
go (a
aforall a. a -> [a] -> [a]
:[a]
res) [a]
xs' (Int
n forall a. Num a => a -> a -> a
- Int
1)
remove :: Int -> [a] -> [a]
remove Int
i [a]
xs = let ([a]
ys, [a]
zs) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
i [a]
xs in [a]
ys forall a. [a] -> [a] -> [a]
++ forall a. [a] -> [a]
tail [a]
zs
freqSumToEigenstrat :: Bool -> FreqSumEntry -> (EigenstratSnpEntry, GenoLine)
freqSumToEigenstrat :: Bool -> FreqSumEntry -> (EigenstratSnpEntry, GenoLine)
freqSumToEigenstrat Bool
diploidizeCall (FreqSumEntry chrom :: Chrom
chrom@(Chrom ByteString
c) Int
pos Maybe ByteString
maybeSnpId Maybe Double
maybeGeneticPos Char
ref Char
alt [Maybe Int]
calls) =
let snpId_ :: ByteString
snpId_ = case Maybe ByteString
maybeSnpId of
Just ByteString
id_ -> ByteString
id_
Maybe ByteString
Nothing -> ByteString
c forall a. Semigroup a => a -> a -> a
<> ByteString
"_" forall a. Semigroup a => a -> a -> a
<> [Char] -> ByteString
B.pack (forall a. Show a => a -> [Char]
show Int
pos)
geneticPos :: Double
geneticPos = case Maybe Double
maybeGeneticPos of
Just Double
p -> Double
p
Maybe Double
Nothing -> Double
0.0
snpEntry :: EigenstratSnpEntry
snpEntry = Chrom
-> Int
-> Double
-> ByteString
-> Char
-> Char
-> EigenstratSnpEntry
EigenstratSnpEntry Chrom
chrom Int
pos Double
geneticPos ByteString
snpId_ Char
ref Char
alt
geno :: GenoLine
geno = forall a. [a] -> Vector a
fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (Bool -> Maybe Int -> GenoEntry
dosageToEigenstratGeno Bool
diploidizeCall) forall a b. (a -> b) -> a -> b
$ [Maybe Int]
calls
in (EigenstratSnpEntry
snpEntry, GenoLine
geno)
dosageToEigenstratGeno :: Bool -> Maybe Int -> GenoEntry
dosageToEigenstratGeno :: Bool -> Maybe Int -> GenoEntry
dosageToEigenstratGeno Bool
diploidizeCall Maybe Int
c =
if Bool
diploidizeCall then
case Maybe Int
c of
Just Int
0 -> GenoEntry
HomRef
Just Int
1 -> GenoEntry
HomAlt
Maybe Int
Nothing -> GenoEntry
Missing
Maybe Int
_ -> forall a. HasCallStack => [Char] -> a
error [Char]
"illegal call for pseudo-haploid Calling method"
else
case Maybe Int
c of
Just Int
0 -> GenoEntry
HomRef
Just Int
1 -> GenoEntry
Het
Just Int
2 -> GenoEntry
HomAlt
Maybe Int
Nothing -> GenoEntry
Missing
Maybe Int
_ -> forall a. HasCallStack => [Char] -> a
error ([Char]
"unknown genotype " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe Int
c)