{-# LANGUAGE BangPatterns #-}

-- | The RNAwolf folding algorithm, version 1.9. We now have full stacking and
-- rich parameters everywhere. In general, most parameters closely follow what
-- we have for ViennaRNA 1.8 but with extended RNA secondary structures,
-- instead of canonicals only. Further refinements of the parameter system will
-- follow.
--
-- TODO right now, 1-diagrams only, 2-diagrams come back in a few days. I want
-- to be sure that the full stacking approach does not introduce subtle bugs.
--
-- TODO recast all fZZZ functions for folding to actually fuse on minimum/fZZZ.
--
-- TODO VU.! -> VU.unsafeIndex
--
-- TODO possibly very big TODO: is this being optimized? : fold $ g z where g z
-- = if z==True then [1..10] else []. If this is not optimized, we should
-- change all functions below in a way that allows optimization. (I dont think
-- fusion can fire on these objects...)
--
--   TODO rewrite minimumVU to accept "Either" ctors and specialize on them.
--   "Left" to be used for strange errors, "Right" for correct streams


module BioInf.RNAwolf
  ( rnaWolf
  , rnaWolfBacktrack
  ) where

import Control.Monad
import Control.Monad.ST
import qualified Data.Vector.Unboxed as VU
import Control.Arrow

import Data.PrimitiveArray
import Data.PrimitiveArray.Ix
import Biobase.Primary
import Biobase.Secondary

import BioInf.Params
import BioInf.RNAwolf.Types
import qualified BioInf.RNAwolf.Bulge as Bul
import qualified BioInf.RNAwolf.Extern as Ext
import qualified BioInf.RNAwolf.Hairpin as Hp
import qualified BioInf.RNAwolf.Interior as Int
import qualified BioInf.RNAwolf.Multibranched as Mul
import qualified BioInf.RNAwolf.Stem as Stem
import qualified BioInf.RNAwolf.TripletBulge as TrB
import qualified BioInf.RNAwolf.TripletStem as TrS

import Debug.Trace


-- * Folding

-- | Wrapper around the state monad.

rnaWolf :: Params -> Primary -> Tables
rnaWolf ps inp = {-# SCC "rnaWolf" #-} runST $ foldST ps inp

-- | Folding magic. In principle, this is not more complicated than
-- Nussinov-style folding.

foldST :: Params -> Primary -> ST s Tables
foldST ps inp = do
  let n = VU.length inp -1
  let imi = map fst . filter ((==nIMI).snd) $ zip [0..] (VU.toList inp)
  (eStemM,eStem) <- second EStem `fmap` mkExtTable n
  (nStemM,nStem) <- second NStem `fmap` mkTable n
  (nInteM,nInte) <- second NInte `fmap` mkTable n -- interior loop helper table
  (nMultM,nMult) <- second NMult `fmap` mkTable n -- multibranched loop helper table
  (nBulgM,nBulg) <- second NBulg `fmap` mkTable n -- bulge loop helper table
  (nMbrM ,nMbr ) <- second NMbr  `fmap` mkTable n
  (nMbr1M,nMbr1) <- second NMbr1 `fmap` mkTable n
  (nExtnM,nExtn) <- second NExtn `fmap` mkTableWith 0 n
  (nBulgLoopM,nBulgLoop) <- second NBulgLoop `fmap` mkTable n
  (nInteLoopM,nInteLoop) <- second NInteLoop `fmap` mkTable n -- interior loop helper table
  (nMultLoopM,nMultLoop) <- second NMultLoop `fmap` mkTable n -- multibranched loop helper table
  -- This version of the (i,j) pair generation walks along the diagonals. It is
  -- required to calculate this way, as otherwise the shared nucleotide
  -- variants will fail.
  forM_ (mkIJ n) $ \(i,j) -> do
    forM_ citr $ \ct -> forM_ wsh $ \eI -> forM_ wsh $ \eJ -> do
      -- weak table (everything is weak)
      let vHairpin  = minimumVU $ Hp.fHairpin imi    ps inp           i j ct eI eJ
      let vStem     = minimumVU $ Stem.fStem         ps inp eStem     i j ct eI eJ
      let vInterior = minimumVU $ Int.fInteriorOuter ps inp nInteLoop i j ct eI eJ
      let vMlClose  = minimumVU $ Mul.fMlClose       ps inp nMultLoop i j ct eI eJ
      let vBulge    = minimumVU $ Bul.fBulgeOuter    ps inp nBulgLoop i j ct eI eJ
      writeM eStemM ((i,j),(ct,eI,eJ)) $ minimum [vHairpin,vStem,vInterior,vMlClose,vBulge] -- FIXME vTStem
    -- fill stem table that ignores extended annotations
    writeM nStemM (i,j) . minimumVU $ Stem.fNstem ps inp eStem i j
    -- fill the inner interior table
    writeM nInteM (i,j) . minimumVU $ Int.fInteriorInner ps inp eStem i j
    -- fill the interior LOOP table (includes everything except the closing pair)
    writeM nInteLoopM (i,j) . minimumVU $ Int.fInteriorLoop ps inp nInte i j
    -- fill multibranch helper table
    writeM nMultM (i,j) . minimumVU $ Mul.fMlHelix ps inp eStem i j
    -- multibranched close helper table (should improve speed for MLs by 2x3x3)
    writeM nMultLoopM (i,j) . minimumVU $ Mul.fMlLoop ps inp nMbr nMbr1 i j
    -- fill bulge close helper table
    writeM nBulgM (i,j) . minimumVU $ Bul.fBulgeInner ps inp eStem i j
    -- fill bulge LOOP table
    writeM nBulgLoopM (i,j) . minimumVU $ Bul.fBulgeLoop ps inp nBulg i j
    -- one or more multibranched stems
    let vUnpaired = minimumVU $ Mul.fUnpairedRight ps inp nMbr i j
    let vStem = minimumVU $ Mul.fMlStem ps inp nMult i j
    let vStems = minimumVU $ Mul.fMlStems ps inp nMbr nMult i j
    writeM nMbrM (i,j) $ minimum [vUnpaired, vStem, vStems]
    -- exactly one multibranched stem
    let vUnpaired = minimumVU $ Mul.fUnpairedRight1 ps inp nMbr1 i j
    let vStem = minimumVU $ Mul.fMl1Stem ps inp nMult i j
    writeM nMbr1M (i,j) $ minimum [vUnpaired,vStem]
  let j=n
  forM_ [n-2,n-3..0] $ \i -> do
    let unp = minimumVU $ Ext.fLeftUnpaired ps inp nExtn i j
    let es  = minimumVU $ Ext.fStem ps inp nStem i j
    let esl = minimumVU $ Ext.fStems ps inp nStem nExtn i j
    let one = minimumVU $ Ext.fOne ps inp i j
    writeM nExtnM (i,j) $ minimum [unp,esl,es,one]
  return  ( eStem
          , nStem
          , nInte
          , nInteLoop
          , nBulg
          , nBulgLoop
          , nMult
          , nMbr
          , nMbr1
          , nMultLoop
          , nExtn)


-- * Backtracking

-- | Given parameters, input, score band, and filled tables we can backtrack.
--
-- NOTE the order in which backtracking for individual functions is performed,
-- is important. In case of ties in energy, the first result is taken. This
-- should be considered!
--
-- [1] We consider unpaired stretches always first. This is kind of arbitrary.
--
-- [2] extended stems always come last. This is because they can potentially
-- introduce many co-optimal structures before they are all discarded.
--
-- TODO all the crap in comments are bug-fix backtracking options.

rnaWolfBacktrack :: Params -> Primary -> Double -> Tables -> [([ExtPairIdx],Double)]
rnaWolfBacktrack ps inp delta ( estem@(EStem eStem)
                              , nstem@(NStem nStem)
                              , ninte@(NInte nInte)
                              , ninteloop@(NInteLoop nInteLoop)
                              , nbulg@(NBulg nBulg)
                              , nbulgloop@(NBulgLoop nBulgLoop)
                              , nmult@(NMult nMult)
                              , nmbr@(NMbr nMbr)
                              , nmbr1@(NMbr1 nMbr1)
                              , nmultloop@(NMultLoop nMultLoop)
                              , nextn@(NExtn nExtn)
                              )
  | otherwise = let finalScore = nExtn ! (0,n)
                in filter ((<=0).snd) . map (second (\z -> finalScore + delta -z)) $ btE 0 n delta
  where
    btE i j d = -- trace (show ("btE",i,j,d)) $
      Ext.btOne ps inp nextn i j d ++ -- [1]
      Ext.btLeftUnpaired ps inp nextn btE i j d ++
      Ext.btStem ps inp nextn nstem btNS i j d ++
      Ext.btStems ps inp nstem nextn btNS btE i j d
    btNS i j d =
      Stem.btNstem ps inp nstem estem btES i j d
    btES :: Int -> Int -> CTisomerism -> Edge -> Edge -> Double -> [([ExtPairIdx],Double)]
    btES i j ct eI eJ d = -- trace (show ("btES",i,j,ct,eI,eJ,d)) $
      Hp.btHairpin ps inp estem i j ct eI eJ d ++
      Int.btInteriorOuter ps inp estem ninteloop btILoop i j ct eI eJ d ++
      Bul.btBulgeOuter ps inp estem nbulgloop btBULoop i j ct eI eJ d ++
      Mul.btMlClose ps inp estem nmultloop btMultLoop i j ct eI eJ d ++
      Stem.btStem ps inp estem btES i j ct eI eJ d -- [2]
    btILoop i j d = -- trace (show ("btILoop",i,j,d)) $
      Int.btInteriorLoop ps inp ninteloop ninte btIL i j d
    btIL i j d = -- trace (show ("btIL",i,j,d)) $
      Int.btInteriorInner ps inp ninte estem btES i j d
    btBULoop i j d = -- trace (show ("btBULoop",i,j,d)) $
      Bul.btBulgeLoop ps inp nbulgloop nbulg btBU i j d
    btBU i j d = -- trace (show ("btBU",i,j,d)) $
      Bul.btBulgeInner ps inp nbulg estem btES i j d
    btMH i j d = -- trace (show ("btMH",i,j,d)) $
      Mul.btMlHelix ps inp nmult estem btES i j d
    btMultLoop i j d =
      Mul.btMlLoop ps inp nmultloop nmbr nmbr1 btM btM1 i j d
    btM i j d = {- trace (show ("btM",i,j,d)) $ -}
      Mul.btUnpairedRight ps inp nmbr btM i j d ++
      Mul.btMlStem ps inp nmbr nmult btMH i j d ++
      Mul.btMlStems ps inp nmbr nmult btM btMH i j d
    btM1 i j d = let ehere = nMbr1!(i,j) in
      Mul.btUnpairedRight1 ps inp nmbr1 btM1 i j d ++
      Mul.btMl1Stem ps inp nmbr1 nmult btMH i j d

    newD d here next = d - (next - here)
    testD d = d>=0
    n = VU.length inp -1
    epsilon = 0.001
    imi = map fst . filter ((==nIMI).snd) $ zip [0..] (VU.toList inp)


-- * Helper functions

-- | Given an unboxed vector with (index,value) elements, return the minimum
-- over the values.
--
-- TODO with vector-0.7.2 / vector-0.8, rewrite using "snd . unzip" (or not,
-- see next todo)
--
-- TODO http://trac.haskell.org/vector/ticket/51

minimumVU xs = VU.foldl' (\(!acc) (!k,!v) -> min acc v) 999999 xs
{-# INLINE minimumVU #-}

-- | Create 2d-tables, initialized with "infinity"
--
-- TODO use (infinity :: Energy)

mkTable n = mkTableWith 9999999 n

-- | Create 2d-tables, initialized with 'z'

mkTableWith z n = do
  tM <- fromAssocsM (0,0) (n,n) z []
  t  <- unsafeFreezeM tM
  return (tM,t)

-- | 2d-tables with extended information.

mkExtTable n = mkExtTableWith 9999999 n

-- | 2d-tables with extended information.

mkExtTableWith z n = do
  tM <- fromAssocsM ((0,0),(cis,wc,wc)) ((n,n),(trans,hoogsteen,hoogsteen)) z []
  t  <- unsafeFreezeM tM
  return (tM,t)

-- | Produces indices in correct diagonal order.
--
-- TODO this is a stupid way to create the indices...

mkIJ n = [ (i,j) | d <- [0..n], j<-[n,n-1..0], let i=j-d, j>=0, i>=0 ]


-- * Types


-- * debugging

trc k x = trace (show (k,x)) x
trc' k x = trace (show k) x
trci' c k x = if c then trace (show k) x else x