-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | RNA folding with non-canonical basepairs and base-triplets.
--   
--   The algorithm implemented here-in provides extended RNA secondary
--   structure prediction. Each predicted nucleotide pairing is extended
--   with an annotation describing which of three nucleotide edges is
--   engaged in the pairing. In addition, each nucleotide may be engaged in
--   more than one pairing.
--   
--   The algorithm is described in
--   
--   Hoener zu Siederdissen C, Bernhart SH, Stadler PF, Hofacker IL,
--   
--   <a>A Folding Algorithm for Extended RNA Secondary Structures</a>,
--   
--   Bioinformatics (2011) 27 (13), i129-136
--   
--   <a>http://www.tbi.univie.ac.at/software/rnawolf/</a>
--   
--   Please note that <a>experimental</a> does mean experimental. We are
--   mostly concerned with determining a good set of (heuristic) rules for
--   run-time reduction currently. This version does include stacking and
--   is able to fold sequences of a few hundred nucleotides in seconds.
--   
--   Triplet calculations will come back with the next version (in a few
--   days). The recursions require a number of changes to keep the runtimes
--   down (as has been done for the extended loops without triplets).
--   
--   We have recently split the Biohaskell libraries into smaller
--   individual libraries. In addition, stacking, intermediate arrays,
--   fusion and newtype-wrapping did require a number of changes. Please
--   send a mail, if you encounter strange behaviour or bugs.
--   
--   Changes in 0.3.2.0
--   
--   <ul>
--   <li>simpler training wrapper</li>
--   <li>added parallelism option for multi-core systems (reduce iteration
--   time for the cost of a possible reduction in training efficiency; but
--   should be worth it)</li>
--   </ul>
--   
--   Changes in 0.3.1.0
--   
--   <ul>
--   <li>fixed bugs introduced by bulge<i>interior</i>multi-loops</li>
--   </ul>
@package RNAwolf
@version 0.3.2.0


-- | RNA-folding parameter space.
--   
--   TODO find better names for types, functions, and minima/maxima.
module BioInf.Params

-- | A (very) rich set of paramters.
--   
--   TODO 1xn interior loops should be tested (how often do they occur?)
--   
--   TODO external loop
data Params
Params :: PaLength -> PaExtPairNN -> Pa2ExtPairs -> Pa2ExtPairs -> PaLength -> PaLength -> PaExtPairNN -> PaLength -> Pa2ExtPairs -> PaExtPair -> PaExtPairNN -> Double -> Double -> Double -> PaDistance -> Double -> Params
hairpinLength :: Params -> PaLength
hairpinClose :: Params -> PaExtPairNN
stem :: Params -> Pa2ExtPairs
stemTriplet :: Params -> Pa2ExtPairs
interiorLength :: Params -> PaLength
interiorAsym :: Params -> PaLength
interiorClose :: Params -> PaExtPairNN
bulgeLength :: Params -> PaLength
bulgeTriplet :: Params -> Pa2ExtPairs
bulgeClose :: Params -> PaExtPair
mbClose :: Params -> PaExtPairNN
multiBranched :: Params -> Double
multiHelix :: Params -> Double
multiUnpaired :: Params -> Double
pairDistance :: Params -> PaDistance
interMolInit :: Params -> Double

-- | An array which encodes <a>length</a> information
type PaLength = PrimArray Int Double

-- | This is an experimental annotation for long-distance interactions
type PaDistance = PrimArray Int Double

-- | An array holding information for one extended pair.
type PaExtPair = PrimArray ExtPair Double

-- | An array holding information for two extended pairs, e.g. stems.
type Pa2ExtPairs = PrimArray (ExtPair, ExtPair) Double

-- | An array holding information for one extended pair and two unpaired
--   nucleotides, closes a loop.
type PaExtPairNN = PrimArray (ExtPair, Nuc, Nuc) Double
instance Read Params
instance Show Params


-- | Given a list of doubles with the <i>exact required length</i> import
--   into a <a>Params</a> structure.
module BioInf.Params.Import

-- | Cast a list of values to parameters.
--   
--   NOTE This operation is rather fragile if there are layout changes.
--   Consider Repr for this.
--   
--   NOTE BIG FAT WARNING: BE ABSOLUTELY SURE THAT ALL IMPORTS AND EXPORTS
--   FOLLOW THIS ORDERING EXACTLY, OTHERWISE KEYS WILL BE MAPPED TO WRONG
--   POSITIONS DURING LOOKUP AND VALUES END UP SOMEWHERE ELSE.
fromList :: [Double] -> Params

-- | split up a list accordings to given lengths
splitXs :: [Int] -> [Double] -> [[Double]]


-- | Exporting parameters is a bit more involved as we need the ability to
--   export into a database format as well as linearize to list form.
module BioInf.Params.Export

-- | Just a long list of doubles.
toList :: Params -> [Double]

module BioInf.RNAwolf.Types

-- | Should really go into BiobaseXNA
--   
--   Should really go into BiobaseXNA
type ExtBT = Int -> Int -> CTisomerism -> Edge -> Edge -> Double -> BTAnswer
type NBT = Int -> Int -> Double -> BTAnswer
type BTAnswer = [([ExtPairIdx], Double)]
type Table = PrimArray PairIdx Double
type ExtTable = PrimArray ExtPairIdx Double
type BaseF a = Params -> Primary -> a
type ExtFeatures a = Int -> Int -> CTisomerism -> Edge -> Edge -> a
type Features a = Int -> Int -> a
type Tables = (EStem, NStem, NInte, NInteLoop, NBulg, NBulgLoop, NMult, NMbr, NMbr1, NMultLoop, NExtn)
newtype EStem
EStem :: ExtTable -> EStem
unEStem :: EStem -> ExtTable
newtype NStem
NStem :: Table -> NStem
unNStem :: NStem -> Table
newtype NInte
NInte :: Table -> NInte
unNInte :: NInte -> Table
newtype NInteLoop
NInteLoop :: Table -> NInteLoop
unNInteLoop :: NInteLoop -> Table
newtype NMult
NMult :: Table -> NMult
unNMult :: NMult -> Table
newtype NBulg
NBulg :: Table -> NBulg
unNBulg :: NBulg -> Table
newtype NBulgLoop
NBulgLoop :: Table -> NBulgLoop
unBulgLoop :: NBulgLoop -> Table
newtype NMbr
NMbr :: Table -> NMbr
unNMbr :: NMbr -> Table
newtype NMbr1
NMbr1 :: Table -> NMbr1
unNMbr1 :: NMbr1 -> Table
newtype NExtn
NExtn :: Table -> NExtn
unNExtn :: NExtn -> Table
newtype NMultLoop
NMultLoop :: Table -> NMultLoop
unMultLoop :: NMultLoop -> Table

module BioInf.RNAwolf.Bulge

-- | The outer closing pair of a bulge loop (one unpaired region).
fBulgeOuter :: BaseF (NBulgLoop -> ExtFeatures (Vector (PairIdx, Double)))

-- | Outer part of a normal bulge
btBulgeOuter :: Params -> Primary -> EStem -> NBulgLoop -> NBT -> ExtBT

-- | The loop-part of bulges. Increases speed by 2x
fBulgeLoop :: BaseF (NBulg -> Features (Vector (PairIdx, Double)))

-- | Index generator for bulged loops
--   
--   Backtrack the bulge loop part.
btBulgeLoop :: Params -> Primary -> NBulgLoop -> NBulg -> NBT -> NBT

-- | Inner part of a bulge to speed up bulge calculations
fBulgeInner :: BaseF (EStem -> Features (Vector (ExtPairIdx, Double)))

-- | Backtrack the inner part of a bulge
btBulgeInner :: Params -> Primary -> NBulg -> EStem -> ExtBT -> NBT


-- | External loops are complete substructures, of which zero to many sit
--   on the chain of nucleotides.
module BioInf.RNAwolf.Extern

-- | An external loop with an unpaired nucleotide to the left
fLeftUnpaired :: BaseF (NExtn -> Features (Vector (PairIdx, Double)))

-- | Backtracking a structure with an unpaired nucleotide to the left.
--   
--   FIXME In btLeftUnpaired, allow only non-empty structures on the right.
--   It would be nice to make the recursion scheme take care of that.
btLeftUnpaired :: Params -> Primary -> NExtn -> NBT -> NBT

-- | Energy for exactly one stem at (i,k)
fStem :: BaseF (NStem -> Features (Vector (PairIdx, Double)))

-- | Backtrack one stem with right index k.
btStem :: Params -> Primary -> NExtn -> NStem -> NBT -> NBT

-- | This one is important as otherwise, some stretches of nucleotides will
--   always have to be paired. (Obviously, I forgot to add this one for a
--   time...)
fOne :: BaseF (Features (Vector (PairIdx, Double)))
btOne :: Params -> Primary -> NExtn -> NBT

-- | External structures with more than one stem have a NStem on the left
--   and an external NExtn structure on the right.
fStems :: BaseF (NStem -> NExtn -> Features (Vector (Int, Double)))

-- | Backtracking of an external structure with more than one stem
btStems :: Params -> Primary -> NStem -> NExtn -> NBT -> NBT -> NBT

module BioInf.RNAwolf.Hairpin

-- | A hairpin is a number of 0 or more unpaired nucleotides, enclosed by
--   the nucleotides (i,j) which pair.
--   
--   TODO should we allow hairpins with no unpaired nucleotides in the pin?
--   They do occur, but only under special circumstances which we should
--   model differently...
--   
--   TODO re-allow IMI
fHairpin :: [Int] -> BaseF (ExtFeatures (Vector (ExtPairIdx, Double)))

-- | Backtracking hairpins.
btHairpin :: Params -> Primary -> EStem -> ExtBT

module BioInf.RNAwolf.Interior

-- | The outer part of an interior loop. Given a certain basepair type, add
--   the cost from the unpaired part.
fInteriorOuter :: BaseF (NInteLoop -> ExtFeatures (Vector (PairIdx, Double)))
btInteriorOuter :: Params -> Primary -> EStem -> NInteLoop -> NBT -> ExtBT

-- | Performs the interior loop calculations between (i,j) <a>outer</a> and
--   (k,l) <a>inner</a> part. The score based on the unpaired nucleotides
--   is independent of both, the outer and the inner basepair type.
--   
--   NOTE / TODO -- fusion enabled for this function (due to it taking 50%
--   of the time), full fusion is still dependent on other factors and
--   needs to be checked (in particular, we still have allocation events)
fInteriorLoop :: BaseF (NInte -> Features (Vector (PairIdx, Double)))

-- | Backtrack the unpaired loop region
btInteriorLoop :: Params -> Primary -> NInteLoop -> NInte -> NBT -> NBT

-- | This opens up an interior loop. For each index (i,j) we minimize over
--   all possible basepair types.
fInteriorInner :: BaseF (EStem -> Features (Vector (ExtPairIdx, Double)))

-- | Backtrack from an NInte result to the corresponding EStem parts
btInteriorInner :: Params -> Primary -> NInte -> EStem -> ExtBT -> NBT

-- | Since backtracking interior loops is mostly selfcontained, we
--   encapsulate the above three functions -- which we can't do easily with
--   the forward calculations as they actually have to save on runtime.
--   
--   Given the outer indices (i,j), produces delta_i and delta_j so that
--   i+delta_i and j-delta_j are the inner indices. <a>fInteriorKLs</a>
--   should fuse and should make sure that l-k&gt;=4 is always true (maxd).
--   Furthermore the maximal unpaired length of both sides combined is
--   determined by <a>maxLength</a>.
--   
--   TODO better name than <a>maxLength</a>
fInteriorKLs :: Int -> Int -> Vector (Int, Int)


-- | Functions for handling non-triplet multibranched loops.
--   
--   TODO We can do the loop-splitting thing again to speed up
--   multibranched closing by x10.
module BioInf.RNAwolf.Multibranched

-- | Energy for having the rightmost nucleotide (at j) unpaired in NMBr.
fUnpairedRight :: BaseF (NMbr -> Features (Vector (PairIdx, Double)))

-- | Backtrack in NMbr if the nucleotide at j is unpaired.
btUnpairedRight :: Params -> Primary -> NMbr -> NBT -> NBT

-- | Energy for having the rightmost nucleotide (at j) unpaired in NMBr1.
fUnpairedRight1 :: BaseF (NMbr1 -> Features (Vector (PairIdx, Double)))

-- | Backtrack NMbr1 if the nucleotide at j is unpaired.
btUnpairedRight1 :: Params -> Primary -> NMbr1 -> NBT -> NBT

-- | A multibranched helix (except the closing one). (i,j) are closed by a
--   basepair. Backtracking into the EStem reveals the type of pairing.
fMlHelix :: BaseF (EStem -> Features (Vector (ExtPairIdx, Double)))

-- | Backtracks from (i,j) in NMult into the extended-pairing EStem.
btMlHelix :: Params -> Primary -> NMult -> EStem -> ExtBT -> NBT

-- | Closes a multibranch loop.
--   
--   TODO make completely triplet compliant
fMlClose :: BaseF (NMultLoop -> ExtFeatures (Vector (PairIdx, Double)))

-- | Backtrack from and extended annotation (ij,ext) into the helper table
--   NMultLoop.
btMlClose :: Params -> Primary -> EStem -> NMultLoop -> NBT -> ExtBT

-- | Multibranched loop helper function that combines <a>at least one
--   stem</a> with <a>exactly one stem</a> but does not add the closing
--   energy from (i,j).
fMlLoop :: BaseF (NMbr -> NMbr1 -> Features (Vector (Int, Double)))

-- | Backtracking the multibranched loop.
btMlLoop :: Params -> Primary -> NMultLoop -> NMbr -> NMbr1 -> NBT -> NBT -> NBT

-- | Backtrack a single stem in NMbr, where the stem has zero or more
--   unpaired nucleotides to the left.
fMlStem :: BaseF (NMult -> Features (Vector (Int, Double)))

-- | Backtrack by trying to find a multilooped helix.
btMlStem :: Params -> Primary -> NMbr -> NMult -> NBT -> NBT

-- | Add a stem to a multibranch table containing already at least one
--   stem.
fMlStems :: BaseF (NMbr -> NMult -> Features (Vector (Int, Double)))

-- | Backtrack by finding the splitting index between an NMbr composite
--   structure and a single multibranched stem NMult (which can contain
--   unpaired nucleotides to the left).
btMlStems :: Params -> Primary -> NMbr -> NMult -> NBT -> NBT -> NBT

-- | Add a single stem to a multibranch table containing zero stems
--   already.
--   
--   TODO this would be equal to mlHelix, unify!
fMl1Stem :: BaseF (NMult -> Features (Vector ((Int, Int), Double)))

-- | Backtrack a single stem closed at (i,j) for NMbr1. Takes the route
--   through NMult which solves for the exact pairtype.
btMl1Stem :: Params -> Primary -> NMbr1 -> NMult -> NBT -> NBT

module BioInf.RNAwolf.Stem

-- | A normal stem is created by taking the minimum over all possible
--   basepairs of the extended stem.
fNstem :: BaseF (EStem -> Features (Vector (ExtPairIdx, Double)))

-- | Backtrack from a normal stem back into the extended stem.
btNstem :: Params -> Primary -> NStem -> EStem -> ExtBT -> NBT

-- | A stem is extended by another pair. The score contribution is
--   dependent on the previous pair. Note that for score lookup purposes,
--   the inner pair is switched.
fStem :: BaseF (EStem -> ExtFeatures (Vector (ExtPairIdx, Double)))

-- | Stem backtracking.
btStem :: Params -> Primary -> EStem -> ExtBT -> ExtBT

module BioInf.RNAwolf.TripletBulge

module BioInf.RNAwolf.TripletStem


-- | The RNAwolf folding algorithm, version 1.9. We now have full stacking
--   and rich parameters everywhere. In general, most parameters closely
--   follow what we have for ViennaRNA 1.8 but with extended RNA secondary
--   structures, instead of canonicals only. Further refinements of the
--   parameter system will follow.
--   
--   TODO right now, 1-diagrams only, 2-diagrams come back in a few days. I
--   want to be sure that the full stacking approach does not introduce
--   subtle bugs.
--   
--   TODO recast all fZZZ functions for folding to actually fuse on
--   minimum/fZZZ.
--   
--   TODO VU.! -&gt; VU.unsafeIndex
--   
--   TODO possibly very big TODO: is this being optimized? : fold $ g z
--   where g z = if z==True then [1..10] else []. If this is not optimized,
--   we should change all functions below in a way that allows
--   optimization. (I dont think fusion can fire on these objects...)
--   
--   TODO rewrite minimumVU to accept <a>Either</a> ctors and specialize on
--   them. <a>Left</a> to be used for strange errors, <a>Right</a> for
--   correct streams
module BioInf.RNAwolf

-- | Wrapper around the state monad.
rnaWolf :: Params -> Primary -> Tables

-- | Given parameters, input, score band, and filled tables we can
--   backtrack.
--   
--   NOTE the order in which backtracking for individual functions is
--   performed, is important. In case of ties in energy, the first result
--   is taken. This should be considered!
--   
--   <ul>
--   <li><i>1</i> We consider unpaired stretches always first. This is kind
--   of arbitrary.</li>
--   <li><i>2</i> extended stems always come last. This is because they can
--   potentially introduce many co-optimal structures before they are all
--   discarded.</li>
--   </ul>
--   
--   TODO all the crap in comments are bug-fix backtracking options.
rnaWolfBacktrack :: Params -> Primary -> Double -> Tables -> [([ExtPairIdx], Double)]

-- | Return the optimal energy.
rnaWolfOptimal :: Tables -> Double


-- | Transformation of predictions and known structures into keys. Keys are
--   used for linearization.
--   
--   NOTE READ THE BIG FAT KEYS WARNING
--   
--   TODO Generalize and move into its own library
module BioInf.Keys

-- | A list of <a>named</a> parameters.
--   
--   Uniquely tag each key
--   
--   NOTE BIG FAT WARNING: BE ABSOLUTELY SURE THAT ALL IMPORTS AND EXPORTS
--   FOLLOW THIS ORDERING EXACTLY, OTHERWISE KEYS WILL BE MAPPED TO WRONG
--   POSITIONS DURING LOOKUP AND VALUES END UP SOMEWHERE ELSE.
data Keys
HairpinLength :: Int -> Keys
HairpinClose :: (ExtPair, Nuc, Nuc) -> Keys
Stem :: (ExtPair, ExtPair) -> Keys
StemTriplet :: (ExtPair, ExtPair) -> Keys
InteriorLength :: Int -> Keys
InteriorAsym :: Int -> Keys
InteriorClose :: (ExtPair, Nuc, Nuc) -> Keys
BulgeLength :: Int -> Keys
BulgeTriplet :: (ExtPair, ExtPair) -> Keys
BulgeClose :: ExtPair -> Keys
MbClose :: (ExtPair, Nuc, Nuc) -> Keys
MultiBranched :: Keys
MultiHelix :: Keys
MultiUnpaired :: Keys
PairDistance :: Int -> Keys
InterMolInit :: Keys

-- | Training data to feature vector
featureVector :: String -> [ExtPairIdx] -> [Int]

-- | transform feature to 0-based index
lookupFeatureIndex :: Keys -> Int

-- | Map param keys to thei Int-indices.
--   
--   And back from Int-indices to the keys.
--   
--   Takes a primary structure and secondary structure tree and produces a
--   list of keys.
--   
--   TODO Data.Traversable ?!
--   
--   TODO better handling of unknown features: we can have genuine errors
--   (pseudoknots) and uncoded features (e.g. hairpins of size &gt; 30)
treeToFeatures :: (MkPrimary a, Show a) => a -> SSTree ExtPairIdx t -> [Keys]

-- | Create the secondary structure tree
--   
--   FIXME okPairs is ad-hoc, we should allow for other kinds of pairs!
ssTree :: Int -> [ExtPairIdx] -> SSTree ExtPairIdx ()
instance Read Keys
instance Show Keys
instance Eq Keys
instance Ord Keys


-- | Passive-aggressive optimization. Mainly based on:
--   
--   Zakov, Shay and Goldberg, Yoav and Elhaded, Michael and Ziv-Ukelson,
--   Michal <a>Rich Parameterization Improves RNA Structure Prediction</a>
--   RECOMB 2011
--   
--   and
--   
--   Crammer, Koby and (et al) <a>Online Passive-Aggressive Algorithms</a>
--   Journal of Machine Learning Research (2006)
--   
--   TODO as always: move out of here and put in its own library
module BioInf.PassiveAggressive

-- | Default implementation of P/A. We return a data structure that
--   contains all <a>changes</a> required from this run, the <a>enerDif</a>
--   or energy difference between the known and the predicted structure,
--   and a structural difference score. Furthermore, some errors are being
--   reported in <a>errors</a>.
--   
--   The energy difference can be (i) in that case, a wrongly predicted
--   structure has better (lower) energy than the known one. (ii) It can be
--   zero, then we have either found a co-optimal structural or the correct
--   structure. (iii) In some cases, it can be positive, this is a formal
--   error, but will not abort the program. (The calling program may opt to
--   abort on (not . null $ errors).
--   
--   The structural difference is [0..1] with <a>0</a> for structurally
--   identical predictions and known structures and otherwise growing
--   toward <a>1</a> for bad predictions where nothing is correct.
defaultPA :: Double -> Params -> TrainingData -> PA

-- | In case that the known structure has a score <tt>epsilon</tt> better
--   than the predicted, we have an error condition, as this should never
--   be the case.
--   
--   Return a lot of information from each P/A call. We do not return the
--   new <tt>Params</tt> anymore, only a list of changes. This allows us to
--   do some things. If the implementation of <tt>Params</tt> is switched,
--   we can update in place; or we can perform calculations in parallel.
data PA
PA :: [(Int, Double)] -> Double -> Double -> [String] -> PA
changes :: PA -> [(Int, Double)]
enerDif :: PA -> Double
accMeas :: PA -> Double
errors :: PA -> [String]
instance Show PA
instance MkConfusionMatrix TrainingData
instance NFData PA