{-| Module Config - replaces Options, use CmdArgs instead -} {-# LANGUAGE DeriveDataTypeable #-} module Config (Config(..), mkconf ,inform, whenLoud, whenNotQuiet) where import Data.Char (toLower) import Data.List (isPrefixOf, intersperse) import System.Console.CmdArgs import System.Directory (doesFileExist) import Control.Monad (when) import System.IO (hPutStrLn, stderr, stdin) import EmpFile import Bio.Sequence import Generations.GenBase import Generations.GS20 import Generations.Titanium import Generations.Empirical version :: String version = "flowsim v0.2.7, copyright 2010 Ketil Malde" -- data GenName = Titanium | FLX | GS20 deriving (Typeable, Data, Show, Eq) type GenName = String type Dist = String data Config = Conf { generation :: GenName , degradation:: Dist , model :: FilePath , qualmethod :: GenName , discardfilters :: [String] , trimfilters :: [String] , flowkey :: String , hplinput :: FilePath , flowlength :: Int , flowcycle :: String , inputs :: [FilePath] , output :: FilePath } deriving (Typeable, Data, Show, Read) modes :: Mode Config modes = mode $ Conf { generation = "Titanium" &= text "454 generation to simulate" & typ "GEN" & flag "G" , degradation = def &= text "model for degradation of the flow model" & typ "DIST" , model = def &= text "empirical distribution for flow generation" & typFile , qualmethod = def &= text "method for calculating quality" & typ "STRING" , discardfilters = def &= text "discarding filters to apply" & typ "DFILT" , trimfilters = def &= text "trimming filters to apply" & typ "TFILT" , flowkey = def &= text "sequence key to start each read (TCAG)" , hplinput = def &= text "input genome for HPL count estimate" & typFile , flowlength = def &= text "number of flow cycles to run" , flowcycle = def &= text "sequence nucleotides in each flow cycle (TACG)" , inputs = def &= args & typFile , output = def &= text "output file" } &= prog "flowsim" & text "simulate 454 pyrosequencing." & helpSuffix hs where hs = ["Generations (GEN): "++concat (intersperse ", " $ map fst generations) , "Distributions (DIST): Uniform a b, Normal mu sigma, LogNormal mu sigma" , "Discarding filters (DFILT): ...." , "Trimming filters (TFILT): ...."] mkconf :: IO (Generation,IO [Sequence Nuc],HPLprob,FilePath) mkconf = do cf <- cmdArgs version [modes] let inp = map castToNuc `fmap` if null (inputs cf) then hReadFasta stdin else concat `fmap` mapM readFasta (inputs cf) gen <- mkgen cf whenLoud $ inform ("Generation: "++generation cf ++ "\n" ++ show gen) when (null $ output cf) $ error "Please specify an output file with -o" hplc <- case hplinput cf of [] -> return default_hplc file -> (mkHPLprobs . freqtable . concatMap (hpls (f_cycle gen) . seqdata)) `fmap` readFasta file return (gen, inp, hplc, output cf) default_hplc :: HPLprob default_hplc _ l = 3/4**fromIntegral l generations :: [(GenName,Generation)] generations = [("GS20", gs20), ("Titanium",titanium),("EmpTitanium",tiEmp)] lookupGen :: String -> Generation lookupGen g = case map snd . filter (isPrefixOf (map toLower g) . map toLower . fst) $ generations of [x] -> x _ -> error ("Generation '"++g++"' is non-existing or ambigous\nAvailable generations are: "++show (map fst generations)) -- if non-null, pick field f from generation str, else pick it from the default setFromGen :: Generation -> (Generation -> field) -> String -> field setFromGen dflt f str = if null str then f dflt else f (lookupGen str) -- similar, but use Read to parse the string setFromStr :: (Read a) => Generation -> (Generation -> a) -> String -> a setFromStr dflt f str = if null str then f dflt else read str mkgen :: Config -> IO Generation mkgen (Conf g deg fm qm df tf fk hpl fl fc _inputs _output) = do let gen = lookupGen g setG = setFromGen gen setD = setFromStr gen setF fs | null fs = disc_filters gen | otherwise = concatMap (maybe [] id) $ map (flip lookup filterlist) fs setT fs | null fs = trim_filters gen | otherwise = concatMap (maybe [] id) $ map (flip lookup trimlist) fs my_qcall = if null qm then qcall gen else case qm of "gs20tab" -> qual_gs20_tab "dexact" -> qual_exact_decreasing "fexact" -> qual_exact_fixed _ -> error ("No such quality call method:"++show qm ++"\nAvailable methods: gs20tab, dexact, fexact.") my_model <- if null fm then return (models gen) else do f <- doesFileExist fm ms <- if f then readPdf fm else return (parse_models fm) return $ combine_models (models gen) ms return $ gen { qcall = my_qcall , degrade = setD degrade deg , models = my_model , disc_filters = setF df , trim_filters = setT tf , f_key = setG f_key fk , f_len = if fl > 0 then fl else f_len gen , f_cycle = setG f_cycle fc } parse_models :: String -> [Distribution] parse_models str = read ("["++str++"]") -- filterlist :: [DiscardFilter] filterlist = [] -- todo: export from SFF_filters -- trimlist :: [Trimfilter] trimlist = [] -- Logging inform :: String -> IO () inform = hPutStrLn stderr whenLoud, whenNotQuiet :: IO () -> IO () whenLoud action = flip when action =<< isLoud -- note that isQuiet = return False in CmdArgs! whenNotQuiet action = do n <- isNormal l <- isLoud when (n || l) action