-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | RNA folding training data
--
-- Parameter training for RNA secondary structure prediction tools
-- requires data to train on. Since there are a number of different
-- available formats, and handling them all in the training tools is a
-- pain, we have this library and programs. MkTrainingData
-- transforms different formats and they all produce a common training
-- data format. This format is Haskell-readable (and only partially
-- human-readable) line-by-line. Generating additional training data is
-- therefor easy as one can just cat together different training
-- files.
--
-- Note that several features are designed around extended RNA
-- secondary structures.
--
-- Now with some filtering and manipulation options.
@package BiobaseTrainingData
@version 0.1.2.0
module Biobase.TrainingData
-- | One training data element. We can store sequence and known structure
-- (primary, secondary) as well as a predicted structure (stored, if
-- not.null). The weight is how strongly this element
-- should influence a training system. extendedKnowledge is True,
-- if the data element comes from a source which knows about extended
-- secondary structures, like the PDB. Otherwise it is False.
--
-- NOTE During training, one should not penalize non-canonical
-- predictions in interior loops and multibranch loops, unless they
-- hinder formation of true pairs.
--
-- TODO at some point we will move toward pseudoknots and other fun
data TrainingData
TrainingData :: String -> [ExtPairIdx] -> [ExtPairIdx] -> Double -> Bool -> [String] -> TrainingData
primary :: TrainingData -> String
secondary :: TrainingData -> [ExtPairIdx]
predicted :: TrainingData -> [ExtPairIdx]
weight :: TrainingData -> Double
extendedKnowledge :: TrainingData -> Bool
comments :: TrainingData -> [String]
-- | Create TrainingData from various sources.
class MkTrainingData a
mkTrainingData :: MkTrainingData a => a -> TrainingData
instance [overlap ok] Read TrainingData
instance [overlap ok] Show TrainingData
instance [overlap ok] MkTrainingData RNAstrand
instance [overlap ok] MkTrainingData LinFR3D
module Biobase.TrainingData.Manip
-- | Left elements are filtered out, Right elements are kept.
type TDmanip = Either TrainingData TrainingData
-- | Prospective TrainingData elements need to be filtered as there
-- are a number of entries which do not provide good training.
module Biobase.TrainingData.Filter
-- | Filter out elements containing not enough base pairs (in relative
-- terms)
fMinRelPairs :: Maybe Double -> TDmanip -> TDmanip
-- | Error-checking filter.
fErrorCheck :: TDmanip -> TDmanip
-- | Fancy importer for TrainingData.
module Biobase.TrainingData.Import
-- | Enumeratee producing TrainingData from a bytestring.
eneeTrainingData :: Monad m => Enumeratee ByteString [TrainingData] m a
-- | Convenience function
fromFile :: FilePath -> IO [TrainingData]