-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | RNA folding training data -- -- Parameter training for RNA secondary structure prediction tools -- requires data to train on. Since there are a number of different -- available formats, and handling them all in the training tools is a -- pain, we have this library and programs. MkTrainingData -- transforms different formats and they all produce a common training -- data format. This format is Haskell-readable (and only partially -- human-readable) line-by-line. Generating additional training data is -- therefor easy as one can just cat together different training -- files. -- -- Note that several features are designed around extended RNA -- secondary structures. -- -- Now with some filtering and manipulation options. @package BiobaseTrainingData @version 0.1.2.2 module Biobase.TrainingData -- | One training data element. We can store sequence and known structure -- (primary, secondary) as well as a predicted structure (stored, if -- not.null). The weight is how strongly this element -- should influence a training system. extendedKnowledge is True, -- if the data element comes from a source which knows about extended -- secondary structures, like the PDB. Otherwise it is False. -- -- NOTE During training, one should not penalize non-canonical -- predictions in interior loops and multibranch loops, unless they -- hinder formation of true pairs. -- -- TODO at some point we will move toward pseudoknots and other fun data TrainingData TrainingData :: String -> [ExtPairIdx] -> [ExtPairIdx] -> Double -> Bool -> [String] -> TrainingData primary :: TrainingData -> String secondary :: TrainingData -> [ExtPairIdx] predicted :: TrainingData -> [ExtPairIdx] weight :: TrainingData -> Double extendedKnowledge :: TrainingData -> Bool comments :: TrainingData -> [String] -- | Create TrainingData from various sources. class MkTrainingData a mkTrainingData :: MkTrainingData a => a -> TrainingData instance [overlap ok] Read TrainingData instance [overlap ok] Show TrainingData instance [overlap ok] MkTrainingData RNAstrand instance [overlap ok] MkTrainingData LinFR3D module Biobase.TrainingData.Manip -- | Left elements are filtered out, Right elements are kept. type TDmanip = Either TrainingData TrainingData -- | Prospective TrainingData elements need to be filtered as there -- are a number of entries which do not provide good training. module Biobase.TrainingData.Filter -- | Filter out elements containing not enough base pairs (in relative -- terms) fMinRelPairs :: Maybe Double -> TDmanip -> TDmanip -- | Error-checking filter. fErrorCheck :: TDmanip -> TDmanip -- | Fancy importer for TrainingData. module Biobase.TrainingData.Import -- | Enumeratee producing TrainingData from a bytestring. eneeTrainingData :: Monad m => Enumeratee ByteString [TrainingData] m a -- | Convenience function fromFile :: FilePath -> IO [TrainingData]