{-| Module Config - replaces Options, use CmdArgs instead -} {-# LANGUAGE DeriveDataTypeable #-} module Config (Config(..), mkconf ,inform, whenLoud, whenNotQuiet) where import Data.Char (toLower) import Data.List (isPrefixOf, intersperse) import System.Console.CmdArgs as C import System.Directory (doesFileExist) import Control.Monad (when) import System.IO (hPutStrLn, stderr, stdin) import EmpFile import Version import Bio.Core.Sequence import Bio.Sequence.Fasta import Generations.GenBase import Generations.GS20 import Generations.Titanium import Generations.Empirical -- data GenName = Titanium | FLX | GS20 deriving (Typeable, Data, Show, Eq) type GenName = String type Dist = String data Config = Conf { generation :: GenName , degradation:: Dist , model :: FilePath , qualmethod :: GenName -- , discardfilters :: [String] -- , trimfilters :: [String] , flowkey :: String , hplinput :: FilePath , flowlength :: Int , flowcycle :: String , inputs :: [FilePath] , output :: FilePath } deriving (Typeable, Data, Show, Read) opts :: Config opts = Conf { generation = "Titanium" &= help "454 generation to simulate" &= typ "GEN" &= C.name "G" , degradation = def &= help "model for degradation of the flow model" &= typ "DIST" , model = def &= help "empirical distribution for flow generation" &= typFile , qualmethod = def &= help "method for calculating quality" &= typ "STRING" -- , discardfilters = def &= help "discarding filters to apply" &= typ "DFILT" -- , trimfilters = def &= help "trimming filters to apply" &= typ "TFILT" , flowkey = def &= explicit &= C.name "key" &= help "sequence key to start each read (TCAG)" &= typ "STRING" , hplinput = def &= help "input genome for HPL count estimate" &= typFile , flowlength = def &= help "number of flows for each read" &= typ "INT" , flowcycle = def &= help "sequence nucleotides in each flow cycle (TACG)" &= typ "STRING" , inputs = def &= args &= typFile , output = def &= help "output file" &= typFile } &= program "flowsim" &= summary ("flowsim "++version) &= details hs where hs = ["Generations (GEN): "++concat (intersperse ", " $ map fst generations) , "Distributions (DIST): Uniform a b, Normal mu sigma, LogNormal mu sigma" -- , "Discarding filters (DFILT): ...." -- , "Trimming filters (TFILT): ...." ] mkconf :: IO (Generation,IO [Sequence],HPLprob,FilePath) mkconf = do cf <- cmdArgs opts let inp = if null (inputs cf) then hReadFasta stdin else concat `fmap` mapM readFasta (inputs cf) gen <- mkgen cf whenLoud $ inform ("Generation: "++generation cf ++ "\n" ++ show gen) when (null $ output cf) $ error "Please specify an output file with -o" hplc <- case hplinput cf of [] -> return default_hplc file -> (mkHPLprobs . freqtable . concatMap (hpls (f_cycle gen) . unSD . seqdata)) `fmap` readFasta file return (gen, inp, hplc, output cf) default_hplc :: HPLprob default_hplc _ l = 3/4**fromIntegral l generations :: [(GenName,Generation)] generations = [("GS20", gs20), ("Titanium",titanium),("EmpTitanium",tiEmp)] lookupGen :: String -> Generation lookupGen g = case map snd . filter (isPrefixOf (map toLower g) . map toLower . fst) $ generations of [x] -> x _ -> error ("Generation '"++g++"' is non-existing or ambigous\nAvailable generations are: "++show (map fst generations)) -- if non-null, pick field f from generation str, else pick it from the default setFromGen :: Generation -> (Generation -> field) -> String -> field setFromGen dflt f str = if null str then f dflt else f (lookupGen str) -- similar, but use Read to parse the string setFromStr :: (Read a) => Generation -> (Generation -> a) -> String -> a setFromStr dflt f str = if null str then f dflt else read str mkgen :: Config -> IO Generation mkgen (Conf g deg fm qm {- df tf -} fk _hpl fl fc _inputs _output) = do let gen = lookupGen g setG = setFromGen gen -- set from a 454 generation setD = setFromStr gen -- statistical distributions setS f s = if null s then f gen else s -- set from string value setF fs | null fs = disc_filters gen | otherwise = concatMap (maybe [] id) $ map (flip lookup filterlist) fs setT fs | null fs = trim_filters gen | otherwise = concatMap (maybe [] id) $ map (flip lookup trimlist) fs my_qcall = if null qm then qcall gen else case qm of "gs20tab" -> qual_gs20_tab "dexact" -> qual_exact_decreasing "fexact" -> qual_exact_fixed _ -> error ("No such quality call method:"++show qm ++"\nAvailable methods: gs20tab, dexact, fexact.") my_model <- if null fm then return (models gen) else do f <- doesFileExist fm ms <- if f then readPdf fm else return (parse_models fm) return $ combine_models (models gen) ms return $ gen { qcall = my_qcall , degrade = setD degrade deg , models = my_model -- , disc_filters = setF df -- , trim_filters = setT tf , f_key = setS f_key fk , f_len = if fl > 0 then 4*((fl+3)`div` 4) else f_len gen -- is this sufficient? , f_cycle = setS f_cycle fc } parse_models :: String -> [Distribution] parse_models str = read ("["++str++"]") -- filterlist :: [DiscardFilter] filterlist = [] -- todo: export from SFF_filters -- trimlist :: [TrimFilter] trimlist = [] -- Logging inform :: String -> IO () inform = hPutStrLn stderr whenNotQuiet :: IO () -> IO () whenNotQuiet action = do n <- isNormal l <- isLoud when (n || l) action