{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RecordWildCards #-}

-- | These functions are implementations of RNA secondary structure folding as
-- described in "Bompfuenewere et al., 2006, Variations on RNA folding and
-- alignment"
--
-- We have all the facilities needed for folding with the RNA parameter of
-- Turner 2004 http://rna.urmc.rochester.edu/NNDB/turner04/ but consider only
-- "double dangles" which correspond to the ViennaRNA package option "-d2".
-- They are a bit easier to implement and are what is used for partition
-- function calculations. In addition, it seems unlikely to see a statiscally
-- relevant improvement in predication with "-d1" or "-d3".
--
-- All functions work on an algebraic ring structure. This should make it
-- easier to implement certain functionality without having to rewrite all the
-- functions given here. Try deriving a new 'Ring' instance first and see if it
-- /just works/.
--
-- These functions do quite well, performancewise. GHC-HEAD with "-Odph" and
-- "-fllvm" takes 14.4s, while the highly optimized viennaRNA package (the yet
-- unpublished 2.0 version) takes about 1-2s on a sequence of length 1000.
--
-- NOTE For GHC <= 6.12.3 you should copy the default instances into your
-- instance BLA, otherwise the resulting code will be slow. Or you could just
-- wait for the new GHC to arrive! The new one produces good code without such
-- stuff.
--
-- TODO single nucleotide bulges:
-- http://rna.urmc.rochester.edu/NNDB/turner04/bulge.html , check what Vienna
-- 2.0 does!

module BioInf.RNAFold.Functions
  ( FoldFunctions (..)
  , Table
  , TurnerTables
  , ringSumL
  , pair
  , riap
  , ringProductL
  , tabbedInteriorLoopDistances
  ) where

import qualified Data.Vector.Unboxed as VU
import Control.Exception (assert)
import Data.List (foldl')
import qualified Data.Map as M

import Biobase.RNA
import Biobase.Turner.Tables
import Biobase.Types.Ring
import Biobase.Vienna
import Data.PrimitiveArray
import Data.Primitive.Types

import Debug.Trace.Tools

type Cell = (Int,Int)
type Table a = PrimArray Cell a
type TurnerTables a = Turner2004 ViennaPair Nucleotide a



-- | The folding functions. It could happen that we need different folding
-- functions with the same type, hence the class-based approach. The default
-- instance uses the usual ring methods.

class (Show a, Ring a, VU.Unbox a, Prim a) => FoldFunctions a where

  stackOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  stackIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  hairpinOpt  :: TurnerTables a -> Primary -> Int -> Int -> a
  hairpinIdx  :: TurnerTables a -> Primary -> Int -> Int -> [a]

  largeInteriorLoopOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  largeInteriorLoopIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  tabbedInteriorLoopOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  tabbedInteriorLoopIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  bulgeLOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  bulgeLIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  bulgeROpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  bulgeRIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  interior1xnLOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  interior1xnLIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  interior1xnROpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  interior1xnRIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  multibranchIJLoopOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  multibranchIJLoopIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  multibranchUnpairedJOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  multibranchUnpairedJIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  multibranchKJHelixOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  multibranchKJHelixIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Int,a)]

  multibranchAddKJHelixOpt  :: TurnerTables a -> Primary -> Table a -> Table a -> Int -> Int -> a
  multibranchAddKJHelixIdx  :: TurnerTables a -> Primary -> Table a -> Table a -> Int -> Int -> [(Int,a)]

  multibranchCloseOpt  :: TurnerTables a -> Primary -> Table a -> Table a -> Int -> Int -> a
  multibranchCloseIdx  :: TurnerTables a -> Primary -> Table a -> Table a -> Int -> Int -> [(Int,a)]

  externalLoopOpt  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> a
  externalLoopIdx  :: TurnerTables a -> Primary -> Table a -> Int -> Int -> [(Cell,a)]

  externalAddLoopOpt  :: TurnerTables a -> Primary -> Table a -> Table a -> Int -> Int -> a
  externalAddLoopIdx  :: TurnerTables a -> Primary -> Table a -> Table a -> Int -> Int -> [(Int,a)] -- return only 'k' index

  -- | Calculate the ninio asymmetric malus. Can not be written using ring
  -- functions alone as a 'min' or 'max' functions is required.

  calcNinio :: a -> a -> Int -> a

  -- | Applies a terminal AU/GU penalty, where required.
  --
  -- TODO shouldn't this be just: if CG||GC then one else termAU?

  calcTermAU :: a -> ViennaPair -> a -- ^ Apply terminal AU penalty

  -- | large hairpin loops >30 require special calculations that involve
  -- 'floor', rounding and other stuff that can not be handled by the Ring
  -- class alone

  calcLargeLoop :: Int -> a

  -- NOTE Copy these functions into your own instance. In most cases, your are
  -- now done and get optimized loops. Each function that you do not copy uses
  -- the version here. This can lead to less efficient code.
  --
  -- NOTE Fixed in GHC 6.13. The default instances should yield superb code!

  stackOpt trnr inp tbl i j =
    VU.foldl' (.+.) zero $ stackBase trnr inp tbl i j
  hairpinOpt trnr inp i j =
    VU.foldl' (.+.) zero $ hairpinBase trnr inp i j
  largeInteriorLoopOpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ largeInteriorLoopBase trnr inp strong i j
  tabbedInteriorLoopOpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ tabbedInteriorLoopBase trnr inp strong i j
  bulgeLOpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ bulgeLBase trnr inp strong i j
  bulgeROpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ bulgeRBase trnr inp strong i j
  interior1xnLOpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ interior1xnLBase trnr inp strong i j
  interior1xnROpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ interior1xnRBase trnr inp strong i j
  multibranchIJLoopOpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ multibranchIJLoopBase trnr inp strong i j
  multibranchUnpairedJOpt trnr inp mtable i j =
    VU.foldl' (.+.) zero $ multibranchUnpairedJBase trnr inp mtable i j
  multibranchKJHelixOpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ multibranchKJHelixBase trnr inp strong i j
  multibranchAddKJHelixOpt trnr inp table strong i j =
    VU.foldl' (.+.) zero $ multibranchAddKJHelixBase trnr inp table strong i j
  multibranchCloseOpt trnr inp m m1 i j =
    VU.foldl' (.+.) zero $ multibranchCloseBase trnr inp m m1 i j
  externalLoopOpt trnr inp strong i j =
    VU.foldl' (.+.) zero $ externalLoopBase trnr inp strong i j
  externalAddLoopOpt trnr inp strong extern i j =
    VU.foldl' (.+.) zero $ externalAddLoopBase trnr inp strong extern i j

  -- NOTE copy and instanciate these functions if you have to work with many
  -- candidate sequences. Otherwise you probably do not need the speed-up from
  -- these functions. This stuff is for backtracking  mostly as you get lists
  -- of indices with attached scores.
  --
  -- NOTE With GHC 6.13 we should get optimized code anyways!

  stackIdx trnr inp tbl i j =
    [((i+1,j-1),stackOpt trnr inp tbl i j)]
  hairpinIdx trnr inp i j =
    VU.toList $ hairpinBase trnr inp i j
  largeInteriorLoopIdx trnr inp strong i j =
    VU.toList $ VU.zip (interiorLoopIndices i j) (largeInteriorLoopBase trnr inp strong i j)
  tabbedInteriorLoopIdx trnr inp strong i j =
    VU.toList $ VU.zip (tabbedInteriorLoopIndices i j) $ tabbedInteriorLoopBase trnr inp strong i j
  bulgeLIdx trnr inp strong i j =
    VU.toList $ VU.zip (VU.map (\k -> (i+1+k,j-1)) . uncurry VU.enumFromN $ bulgeLimit i j) $ bulgeLBase trnr inp strong i j
  bulgeRIdx trnr inp strong i j =
    VU.toList $ VU.zip (VU.map (\k -> (i+1,j-1-k)) . uncurry VU.enumFromN $ bulgeLimit i j) $ bulgeRBase trnr inp strong i j
  interior1xnLIdx trnr inp strong i j =
    VU.toList $ VU.zip (VU.map (\k -> (i+1+k,j-2)) $ uncurry VU.enumFromN $ iloop1xnLimit i j) $ interior1xnLBase trnr inp strong i j
  interior1xnRIdx trnr inp strong i j =
    VU.toList $ VU.zip (VU.map (\k -> (i+2,j-1-k)) $ uncurry VU.enumFromN $ iloop1xnLimit i j) $ interior1xnRBase trnr inp strong i j
  multibranchIJLoopIdx trnr inp strong i j =
    VU.toList $ VU.zip (VU.singleton (i,j)) $ multibranchIJLoopBase trnr inp strong i j
  multibranchUnpairedJIdx trnr inp mtable i j =
    VU.toList $ VU.zip (VU.singleton (i,j-1)) $ multibranchUnpairedJBase trnr inp mtable i j
  multibranchKJHelixIdx trnr inp strong i j =
    VU.toList $ VU.zip (uncurry VU.enumFromN $ multibranchKJHelixLimit i j) $ multibranchKJHelixBase trnr inp strong i j
  multibranchCloseIdx trnr inp m m1 i j = -- TODO this is wrong!
    VU.toList $ VU.zip (uncurry VU.enumFromN $ multibranchCloseLimit i j) $ multibranchCloseBase trnr inp m m1 i j
  multibranchAddKJHelixIdx trnr inp table strong i j =
    VU.toList $ VU.zip (uncurry VU.enumFromN $ multibranchAddKJHelixLimit i j) $ multibranchAddKJHelixBase trnr inp table strong i j
  externalLoopIdx trnr inp strong i j =
    VU.toList $ VU.singleton ((i,j), externalLoopOpt trnr inp strong i j)
  externalAddLoopIdx trnr inp strong extern i j =
    VU.toList $ VU.zip (uncurry VU.enumFromN $ externalAddLoopLimit i j) $ externalAddLoopBase trnr inp strong extern i j



-- * We provide a default instance working on rings.

-- | Simple hairpin loops. If (i,j) pairs then, first, try to do a lookup of
-- the exact sequence of the hairpin in a tabulated list. If this fails then,
-- second, try length 3 hairpins and so on.

hairpinBase :: (FoldFunctions a) => TurnerTables a -> Primary -> Int -> Int -> VU.Vector a
hairpinBase Turner2004{..} inp i j = VU.singleton go where
  go
    | pIJ==vpNP || l<3 = zero
    | l<=6, Just v <- s `M.lookup` hairpinLookup = v
    | l==3      = (hairpinL ! l) -- .*. tAU -- apparantly not anymore according to NNDB
    | l>=31     = (hairpinL ! 30) .*. (hairpinMM ! (pIJ,bI,bJ)) .*. (calcLargeLoop l)
    | otherwise = {-traceVal (show (i,j,pIJ,bI,bJ,hairpinL!l,tAU,hairpinMM!(pIJ,bI,bJ))) $ -} (hairpinL ! l) .*. (hairpinMM ! (pIJ,bI,bJ))
  l = j-i-1
  s = assert (i>=0 && checkBounds inp j) $ [inp ! k | k <- [i..j-1]]
  bI = inp ! (i+1)
  bJ = inp ! (j-1)
  pIJ = pair inp i j
  tAU = calcTermAU termAU pIJ
{-# INLINE hairpinBase #-}

-- | Extend a stem by another pair.

stackBase :: (FoldFunctions a) => TurnerTables a -> Primary -> Table a -> Int -> Int -> VU.Vector a
stackBase Turner2004{..} inp tbl i j = VU.singleton $ go (i+1,j-1) where
  pIJ = pair inp i j
  go (k,l)
    | pIJ==vpNP || pKL==vpNP || isZero tE
    = zero
    | otherwise
    = tE .*. ( stack ! (pIJ,pKL))
    where
      pKL = riap inp k l
      tE  = tbl ! (k,l)
{-# INLINE stackBase #-}

-- | These are special cases of interior loops. Short iloops, such as 1x2 loops
-- are completely tabulated. Look each one up here.
--
-- TODO needs 1-bulges as a special case

tabbedInteriorLoopBase :: (Show a, FoldFunctions a) => TurnerTables a -> Primary -> Table a -> Int -> Int -> VU.Vector a
tabbedInteriorLoopBase Turner2004{..} inp strong i j = res where
  res = VU.map (\(k,l) -> (strong ! (k,l)) .*. (ftabbed (k-i-1,j-l-1) (k,l))) ili
  ili = tabbedInteriorLoopIndices i j
  ftabbed (di,dj) (k,l)
    | ds==0 && dl==1 = bulgeL!1 .*. stack!(pIJ,pKL) -- no termAU, since the helical stack continues (nndb)
    | ds==1 && dl==1 = iloop1x1 ! (pIJ,pKL,bI,bJ)
    | di==1 && dj==2 = iloop1x2 ! (pIJ,pKL,bI,bL,bJ)
    | di==2 && dj==1 = iloop1x2 ! (pIJ,pKL,bJ,bI,bK)
    | ds==2 && dl==2 = iloop2x2 ! (pIJ,pKL,bI,bK,bL,bJ)
    | ds==2 && dl==3 = iloop2x3MM!(pIJ,bI,bJ) .*. iloop2x3MM ! (pKL,bL,bK) .*. iloopL ! 5 .*. ninio
    where
      pKL = riap inp k l
      bK  = inp ! (k-1)
      bL  = inp ! (l+1)
      ds  = min di dj
      dl  = max di dj
  pIJ = pair inp i j
  bI  = inp ! (i+1)
  bJ  = inp ! (j-1)
{-# INLINE tabbedInteriorLoopBase #-}

-- | A large number of iloops up to a total maximum loop size of 30 have to be
-- checked. There are about 300 cases for j-i>>30. In addition, this loop does
-- not like fusion very much and does many different lookups.
--
-- TODO Any performance increase here should yield substantial improvements for
-- the overall performance of folding algorithms.
--
-- TODO performance improvement: Split mismatch calculation into its own table
-- in the caller. Callee gets an additional argument.
--
-- TODO didjs needs to go back into a *Limit function.

largeInteriorLoopBase :: (FoldFunctions a) => TurnerTables a -> Primary -> Table a -> Int -> Int -> VU.Vector a
largeInteriorLoopBase Turner2004{..} inp strong i j = res where
  res = VU.map (\(di,dj) ->
                    ijmm .*.
                    (strong ! (i+di,j-dj)) .*.
                    (iloopL ! (di+dj-2)) .*. -- -2 because di,dj is total IL length +2
                    (iloopMM ! (riap inp (i+di) (j-dj), inp ! (i+di-1), inp ! (j-dj+1))) .*.
                    calcNinio maxNinio ninio (abs $ di-dj)
                ) didjs
  -- NOTE NO FUSION! :-(
  -- TODO check this!
  didjs = interiorLoopDistances i j
  {-
  didjs = traceVal (show (i,j)) $ VU.unfoldr (\(di,dj) ->
                        if di>maxc
                          then Nothing
                          else if di+dj>=nexc
                                then Just ((di,dj),(di+1,3   ))
                                else Just ((di,dj),(di  ,dj+1))
                      ) (3,5)  -}
  -- constant
  {-
  maxc = min 27 (j-i-16)
  nexc = min 30 (j-i-13)
  -}
  ijmm = iloopMM ! (pIJ,bI,bJ)
  pIJ  = pair inp i j
  bI   = inp ! (i+1)
  bJ   = inp ! (j-1)
{-# INLINE largeInteriorLoopBase #-}

-- | A bulge on the left side of the inner closing pair. 1-bulges are treated
-- as tabulated interior loops.
--
-- (..((...)))
-- 01234567890

bulgeLBase :: (FoldFunctions a) => TurnerTables a -> Primary -> Table a -> Int -> Int -> VU.Vector a
bulgeLBase Turner2004{..} inp strong i j = res where
  res = VU.map (\k ->
                  strong ! (i+1+k,j-1) .*.
                  bulgeL ! k .*.
                  calcTermAU termAU (riap inp (i+1+k) (j-1)) .*.
                  tAUij
                ) . uncurry VU.enumFromN $ bulgeLimit i j
  tAUij = calcTermAU termAU $ pair inp i j
{-# INLINE bulgeLBase #-}

-- | A bulge on the right side of the inner closing pair. Mirroring bulgeLBase
--
-- ((~)...)

bulgeRBase :: (FoldFunctions a) => TurnerTables a -> Primary -> Table a -> Int -> Int -> VU.Vector a
bulgeRBase Turner2004{..} inp strong i j = res where
  res = VU.map (\k ->
                  strong ! (i+1,j-1-k) .*.
                  bulgeL ! k .*.
                  calcTermAU termAU (riap inp (i+1) (j-1-k)) .*.
                  tAUij
                ) . uncurry VU.enumFromN $ bulgeLimit i j
  tAUij  = calcTermAU termAU $ pair inp i j
{-# INLINE bulgeRBase #-}

-- | 1xn iloops work a bit like bulges. They are more complicated because we
-- have to consider 'ninio' values and terminal mismatches.
--
-- TODO how big an improvement would 'iloopL1xn' be with integrated 'ninio'
-- values?
--
-- (...(~).)

interior1xnLBase Turner2004{..} inp strong i j = res where
  res = VU.map (\k ->
                  strong ! (i+1+k,j-2) .*.
                  iloopL ! (k+1) .*.
                  iloop1xnMM ! (riap inp (i+1+k) (j-2), inp ! (i+k), inp ! (j-1)) .*.
                  calcNinio maxNinio ninio (k-1) .*.
                  pIJmm
                ) . uncurry VU.enumFromN $ iloop1xnLimit i j
  pIJmm = iloop1xnMM ! (pair inp i j, inp ! (i+1), inp ! (j-1))
{-# INLINE interior1xnLBase #-}

-- | A mirror to the above.
--
-- (.(~)...)

interior1xnRBase Turner2004{..} inp strong i j = res where
  res = VU.map (\k ->
                  strong ! (i+2,j-1-k) .*.
                  iloopL ! (k+1) .*.
                  iloop1xnMM ! (riap inp (i+2) (j-1-k), inp ! (j-k), inp ! (i+1)) .*.
                  calcNinio maxNinio ninio (k-1) .*.
                  pIJmm
                ) . uncurry VU.enumFromN $ iloop1xnLimit i j
  pIJmm = iloop1xnMM ! (pair inp i j, inp ! (i+1), inp ! (j-1))
{-# INLINE interior1xnRBase #-}

-- | Close a multibranch loop by trying to combine one element of 'm' with one
-- of 'm1'.
--
-- <[[...]][[...]]>
-- 0123456789012345
--           1

multibranchCloseBase :: (FoldFunctions a) => TurnerTables a -> Primary -> Table a -> Table a -> Int -> Int -> VU.Vector a
multibranchCloseBase Turner2004{..} inp m m1 i j = res where
  res = VU.map (\k ->
                  m ! (i+1,k) .*.
                  m1 ! (k+1,j-1) .*.
                  ijmm .*.
                  mbcl
                ) . uncurry VU.enumFromN $ multibranchCloseLimit i j
  ijmm =  multiMM ! (pIJ,bJ,bI)
  mbcl =  multiOffset .*. multiHelix
  pIJ  = riap inp i j
  bI   = inp ! (i+1)
  bJ   = inp ! (j-1)
{-# INLINE multibranchCloseBase #-}

-- | Adds a multibranch loop at exactly (i,j).

multibranchIJLoopBase Turner2004{..} inp strong i j = res where
  res = VU.singleton $ strong ! (i,j) .*. mbrhlx .*. mbrmm
  mbrhlx = multiHelix
  mbrmm  =  multiMM ! (pIJ,bI,bJ) -- TODO correct orientation?
  pIJ    = pair inp i j
  bI     = inp ! (i-1)
  bJ     = inp ! (j+1)
{-# INLINE multibranchIJLoopBase #-}

-- | Trivial function that adds a single unpaired nucleotide within a
-- multibranch loop.

multibranchUnpairedJBase Turner2004{..} inp mtable i j = res where
  res = VU.singleton $ mtable ! (i,j-1) .*. mbrup
  mbrup = multiNuc
{-# INLINE multibranchUnpairedJBase #-}

-- | Adds a multibranch loop at (k,j), with k-i unpaired nucleotides to the
-- left of 'k'.

multibranchKJHelixBase  :: (FoldFunctions a) => TurnerTables a -> Primary -> Table a -> Int -> Int -> VU.Vector a
multibranchKJHelixBase Turner2004{..} inp strong i j = res where
  res = VU.map (\k ->
          multiNuc .^. (k-i) .*.
          strong ! (k,j) .*.
          multiHelix .*.
          multiMM ! (pair inp k j, inp ! (k-1), inp ! (j+1))
        ) . uncurry VU.enumFromN $ multibranchKJHelixLimit i j
{-# INLINE multibranchKJHelixBase #-}

-- |

multibranchAddKJHelixBase Turner2004{..} inp table strong i j = res where
  res = VU.map (\k ->
                  table ! (i,k) .*.
                  strong ! (k+1,j) .*.
                  multiMM ! (pair inp (k+1) j, inp ! k, inp ! (j+1))
                ) . uncurry VU.enumFromN $ multibranchAddKJHelixLimit i j
{-# INLINE multibranchAddKJHelixBase #-}

-- | [[...]]
--   0123456

externalLoopBase Turner2004{..} inp strong i j = res where
  res = VU.singleton $ strong ! (i,j) .*. mm .*. tAU
  n = snd $ bounds inp
  pIJ = pair inp i j
  bI = inp ! (i-1)
  bJ = inp ! (j+1)
  tAU = calcTermAU termAU pIJ
  mm
    | i>0&&j<n  = extMM ! (pIJ,bI,bJ)
    | i>0       = dangle5 ! (pIJ,bI)
    | j<n       = dangle3 ! (pIJ,bJ)
    | otherwise = one
{-# INLINE externalLoopBase #-}

-- |

externalAddLoopBase :: (FoldFunctions a) => TurnerTables a -> Primary -> Table a -> Table a -> Int -> Int -> VU.Vector a
externalAddLoopBase trnr@Turner2004{..} inp strong extern i j = res where
  res = VU.map (\k ->
                  externalLoopOpt trnr inp strong i k .*.
                  extern ! (k+1,j)
                ) . uncurry VU.enumFromN $ externalAddLoopLimit i j
{-# INLINE externalAddLoopBase #-}



-- * Helper Functions.

-- TODO We definitely need to check that this works!

pair :: Primary -> Int -> Int -> ViennaPair
pair inp i j
  = assert (checkBounds inp i && checkBounds inp j)
  $ toPair (inp `unsafeIndex` i) (inp `unsafeIndex` j)
{-# INLINE pair #-}

riap inp i j
  = assert (i>=0 && j>=0 && checkBounds inp i && checkBounds inp j)
  $ toPair (inp ! j) (inp ! i)
{-# INLINE riap #-}

ringSum :: (Ring a, VU.Unbox a) => VU.Vector a -> a
ringSum v = VU.foldl' (.+.) zero v
{- INLINE ringSum #-}

ringSumL :: (Ring a, VU.Unbox a) => [a] -> a
ringSumL v = foldl' (.+.) zero v
{-# INLINE ringSumL #-}

ringProduct :: (Ring a, VU.Unbox a) => VU.Vector a -> a
ringProduct v = VU.foldl' (.*.) one v
{-# INLINE ringProduct #-}

ringProductL :: (Ring a, VU.Unbox a) => [a] -> a
ringProductL v = foldl' (.*.) one v

-- | Explicit index generator for interior loops. Does not create indices for:
-- - bulges     0 k
-- - 1xn loops  1 k
-- - 2x3 loops  2 3, 3 2
-- - tabulated  1 1, 1 2, 2 1, 2 2

interiorLoopDistances i j =
  VU.concatMap (
    \d -> VU.map (\d' -> (d',d-d'))  -- written as a tuple
          $ VU.enumFromN 3 (d-5))    -- for each distance, all possible left/right combinations
  $ VU.enumFromN 8 (min 23 (j-i-13)) -- diagonal distance or number of unpaired nucleotides -2.
{-# INLINE interiorLoopDistances #-}

-- WTF ?! the function below is 10.000x slower than the function above!

interiorLoopIndices :: Int -> Int -> VU.Vector (Int,Int)
interiorLoopIndices !i !j = VU.map (\(k,l) -> (i+k,j-l)) $ interiorLoopDistances i j
{-# INLINE interiorLoopIndices #-}

--
-- TODO filter 'ili' to accept only indices that are not too close. Does

tabbedInteriorLoopDistances :: Int -> Int -> VU.Vector (Int,Int)
tabbedInteriorLoopDistances i j
  | j-i>=8    = VU.fromList [(0,1),(1,0),(1,1),(1,2),(2,1),(2,2),(2,3),(3,2)]
  | otherwise = VU.empty
{-# INLINE tabbedInteriorLoopDistances #-}

-- | Yes, therer is the (+1,+1) and yes, this is inconsistent with the other function

tabbedInteriorLoopIndices i j = VU.map (\(di,dj) -> (i+di+1,j-dj-1)) $ tabbedInteriorLoopDistances i j
{-# INLINE tabbedInteriorLoopIndices #-}

-- * All limits depending on (i,j) should be here.

-- | Bulge limitations

bulgeLimit :: Int -> Int -> (Int,Int)
bulgeLimit i j = (2,min 29 $ j-i-9)
{-# INLINE bulgeLimit #-}

-- | 1xn iloop limits

iloop1xnLimit :: Int -> Int -> (Int,Int)
iloop1xnLimit i j = (3,min 26 $ j-i-10)
{-# INLINE iloop1xnLimit #-}

-- | closing of a multibranch loop

multibranchCloseLimit :: Int -> Int -> (Int,Int)
multibranchCloseLimit i j = (i+1,j-i-2) -- (i+7, j-i-14)
{-# INLINE multibranchCloseLimit #-}

-- | add mb helix

multibranchAddKJHelixLimit :: Int -> Int -> (Int,Int)
multibranchAddKJHelixLimit i j = (i+1,j-i-1)
{-# INLINE multibranchAddKJHelixLimit #-}

-- | mb helix

multibranchKJHelixLimit :: Int -> Int -> (Int,Int)
multibranchKJHelixLimit i j = (i,j-i)
{-# INLINE multibranchKJHelixLimit #-}

-- | add external loop

externalAddLoopLimit :: Int -> Int -> (Int,Int)
externalAddLoopLimit i j = (i+5,j-i-5)
{-# INLINE externalAddLoopLimit #-}