{-# LANGUAGE OverloadedStrings #-}
module SequenceTools.Utils (versionInfoOpt, versionInfoText, sampleWithoutReplacement,
freqSumToEigenstrat, dosageToEigenstratGeno) where
import SequenceFormats.FreqSum (FreqSumEntry(..))
import SequenceFormats.Eigenstrat (EigenstratSnpEntry(..), GenoLine, GenoEntry(..))
import SequenceFormats.Utils (Chrom(..))
import qualified Data.ByteString.Char8 as B
import Data.Vector (fromList)
import Data.Version (showVersion)
import qualified Options.Applicative as OP
import Paths_sequenceTools (version)
import System.Random (randomRIO)
versionInfoOpt :: OP.Parser (a -> a)
versionInfoOpt = OP.infoOption (showVersion version) (OP.long "version" <> OP.help "Print version and exit")
versionInfoText :: String
versionInfoText = "This tool is part of sequenceTools version " ++ showVersion version
sampleWithoutReplacement :: [a] -> Int -> IO (Maybe [a])
sampleWithoutReplacement = go []
where
go res _ 0 = return $ Just res
go res xs n
| n > length xs = return Nothing
| n == length xs = return $ Just (xs ++ res)
| otherwise = do
rn <- randomRIO (0, length xs - 1)
let a = xs !! rn
xs' = remove rn xs
go (a:res) xs' (n - 1)
remove i xs = let (ys, zs) = splitAt i xs in ys ++ tail zs
freqSumToEigenstrat :: Bool -> FreqSumEntry -> (EigenstratSnpEntry, GenoLine)
freqSumToEigenstrat diploidizeCall (FreqSumEntry chrom@(Chrom c) pos maybeSnpId maybeGeneticPos ref alt calls) =
let snpId_ = case maybeSnpId of
Just id_ -> id_
Nothing -> c <> "_" <> B.pack (show pos)
geneticPos = case maybeGeneticPos of
Just p -> p
Nothing -> 0.0
snpEntry = EigenstratSnpEntry chrom pos geneticPos snpId_ ref alt
geno = fromList . map (dosageToEigenstratGeno diploidizeCall) $ calls
in (snpEntry, geno)
dosageToEigenstratGeno :: Bool -> Maybe Int -> GenoEntry
dosageToEigenstratGeno diploidizeCall c =
if diploidizeCall then
case c of
Just 0 -> HomRef
Just 1 -> HomAlt
Nothing -> Missing
_ -> error "illegal call for pseudo-haploid Calling method"
else
case c of
Just 0 -> HomRef
Just 1 -> Het
Just 2 -> HomAlt
Nothing -> Missing
_ -> error ("unknown genotype " ++ show c)