{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE DeriveDataTypeable #-} module Main where import System.Console.CmdArgs import Control.Monad (liftM,when) import Text.Printf import Data.List (sortBy) import Data.Ord (comparing) import Data.List.Split (splitEvery) import Biobase.DataSource.MCFold import Biobase.DataSource.MCFold.Import import Biobase.RNA import Biobase.Structure import Biobase.Structure.Constraint --import Biobase.Structure.DotBracket import BioInf.MCFoldDP data Options = Fold { database :: FilePath , strictInput :: Bool , noSparseDataCorrection :: Bool , orderSuboptimals :: Bool , band :: Double , oneResult :: Bool , constraint :: Maybe String , printConstraint :: Bool } | Constrain { database :: FilePath , strictInput :: Bool , noSparseDataCorrection :: Bool , orderSuboptimals :: Bool , band :: Double , oneResult :: Bool , printConstraint :: Bool } deriving (Data,Typeable,Show) options = Fold { database = "./MCFOLD-DB" &= typDir &= help "path to MCFOLD-DB (default: ./MCFOLD-DB)" , strictInput = False &= help "Silently ignore all sequences containing characters other than ACGU (default: convert other characters to E and handle gracefully)" , noSparseDataCorrection = False &= help "disable sparse data correction (default: enabled)" , orderSuboptimals = False &= help "sort suboptimal results by score (better scores first) (default: false)" , band = 0.1 &= help "score band above the ground state for which suboptimal results are allowed (default: 0.1)" , oneResult = False &= help "Return only one of several co-optimal structures in the backtracking phase (default: false)" , constraint = Nothing &= help "Structure constraint, follows ViennaRNA notation. Cf. \"Constraint\" major mode" , printConstraint = False &= help "prints the constraint as second line of output. This produces three or more lines: input, constraint, MC-Fold-DP suboptimals" } &= help "Major mode: expects input on stdin, each line is assumed to be a nucleotide sequence and suboptimal secondary structure prediction is performed." &= details [ "This program performs calculations similar to those done by MC-Fold." , "Important differences are: polynomial runtime, no pseudoknot handling," , "sparse data correction, and the possibility to gracefully handle all" , "input sequences." ] constrain = Constrain { } &= help "Major mode: expects the input to be in the form or two lines each, one the sequence, one the structural constraint (followed, possibly by more characters which are ignored). Using this mode, MC-Fold-DP can postprocess ViennaRNA RNAfold input." &= details [ "This major mode expects input in two lines each from stdin." , "The first line is interpreted as a sequence to fold in its entirety." , "The second line is split into words. The first word has to be the same length" , "as the first line, other words are discarded. The complete second line is" , "printed out by giving --printConstraint. This allows for easier comparison" ] ms = [options &= auto, constrain] &= summary "MCFold-DP, (c) Christian Hoener zu Siederdissen et al, 2010-2011" main = do o <- cmdArgs $ modes ms runMain o runMain o@Fold{..} = do db <- parseDir database -- TODO enable sparsity correction! cnts <- liftM (filter (\x -> not strictInput || strict x) . lines) $ getContents mapM_ (doFold db oneResult band orderSuboptimals printConstraint constraint) cnts runMain o@Constrain{..} = do db <- parseDir database -- TODO enable sparsity correction! cnts <- liftM (filter (\[x,y] -> not strictInput || strict x) . splitEvery 2 . lines) $ getContents mapM_ (\[x,y] -> doFold db oneResult band orderSuboptimals printConstraint (Just y) x) cnts strict :: String -> Bool strict xs = (all (`elem` "ACGUacgu")) xs -- | Executes folding a single sequence. doFold :: MotifDB -> Bool -> Double -> Bool -> Bool -> Maybe String -> String -> IO () doFold db oneResult band orderSuboptimals printConstraint cnstr inp | Nothing <- cnstr = do putStrLn inp common . mkConstraint $ replicate (length inp) '.' | Just cns <- cnstr , [] /= cns , let (c:_) = words cns , length inp == length c = do putStrLn inp when printConstraint $ putStrLn (cns ++ " (Constraint)") common . mkConstraint $ c | otherwise = error $ "doFold: " ++ show (inp,cnstr) where common :: Constraint -> IO () common c = do let pri = mkPrimary inp let ts = fold db pri c let res' = (if oneResult then take 1 else id) $ backtrack db band pri c ts let res = if null res' then [(0,mkD1S $ replicate (length inp) '.')] else res' mapM_ (\(e,s) -> printf "%s (%6.2f)\n" (fromD1S s :: String) e) . (if orderSuboptimals then sortBy (comparing fst) else id) $ res