{-# LANGUAGE DeriveGeneric #-}
module SLynx.SubSample.Options
( SubSampleArguments (..),
subSampleArguments,
getOutSuffixes,
)
where
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Builder as LT
import qualified Data.Text.Lazy.Builder.Int as LT
import ELynx.Data.Alphabet.Alphabet
import ELynx.Tools
import Options.Applicative
import SLynx.Tools
data SubSampleArguments = SubSampleArguments
{ ssAlphabet :: Alphabet,
ssInFile :: FilePath,
ssNSites :: Int,
ssNAlignments :: Int,
ssMbSeed :: Seed
}
deriving (Eq, Show, Generic)
getOutSuffixes :: Int -> String -> [String]
getOutSuffixes n suffix =
["." ++ digitStr i ++ "." ++ suffix | i <- [0 .. n - 1]]
where
nDigits = ceiling $ logBase (10 :: Double) (fromIntegral n)
digitStr i =
T.unpack $
T.justifyRight nDigits '0' (LT.toStrict $ LT.toLazyText $ LT.decimal i)
instance Reproducible SubSampleArguments where
inFiles = pure . ssInFile
outSuffixes a = getOutSuffixes (ssNAlignments a) "fasta"
getSeed = Just . ssMbSeed
setSeed a s = a {ssMbSeed = Fixed s}
parser = subSampleArguments
cmdName = "sub-sample"
cmdDsc = ["Sub-sample columns from multi sequence alignments."]
cmdFtr =
[ "Create a given number of multi sequence alignments, each of which contains a given number of random sites drawn from the original multi sequence alignment."
]
instance FromJSON SubSampleArguments
instance ToJSON SubSampleArguments
subSampleArguments :: Parser SubSampleArguments
subSampleArguments =
SubSampleArguments
<$> alphabetOpt
<*> filePathArg
<*> subSampleNSitesOpt
<*> subSampleNAlignmentsOpt
<*> seedOpt
subSampleNSitesOpt :: Parser Int
subSampleNSitesOpt =
option auto $
long "number-of-sites" <> short 'n' <> metavar "INT"
<> help
"Number of sites randomly drawn with replacement"
subSampleNAlignmentsOpt :: Parser Int
subSampleNAlignmentsOpt =
option auto $
long "number-of-alignments"
<> short 'm'
<> metavar "INT"
<> help "Number of multi sequence alignments to be created"
filePathArg :: Parser FilePath
filePathArg =
strArgument $ metavar "INPUT-FILE" <> help "Read sequences from INPUT-FILE"