{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE RecordWildCards #-}

module Biobase.Vienna.Import.ViennaPar
  ( importPar
  ) where

import Text.Parsec hiding ((<|>), many, optional)
import Text.Parsec.String
import Text.Parsec.Char
import Text.Parsec.Combinator hiding (optional)
import Control.Applicative
import Data.List
import qualified Data.Map as M

import Text.Parsec.Numbers
import Biobase.Constants
import Biobase.RNA
import Biobase.Turner.Tables
import Data.PrimitiveArray

import Biobase.Vienna



-- | Imports Vienna tables from a given string.

importPar :: String -> (ViennaIntTables,ViennaIntTables)
importPar s
  | Right _  <- p = foldr updateTables (trnr,trnrH) bs
  | Left err <- p = error $ show err
  where
    p = parse blocks "(stdin)" s
    Right bs = p
    (bgs,bhs) = partition (not . isHblock) bs
    trnr  = foldr updateTable emptyTables bgs
    trnrH = foldr updateTable emptyTables bhs

-- | Apply all parsed blocks to a Turner2004 structure.

updateTable :: Block -> ViennaIntTables -> ViennaIntTables
updateTable Block{..} trnr -- @Turner2004{..}
  | bhdr =? "stack" = trnr{stack = fromAssocs minBound maxBound z $ zip pp ints}
  | bhdr =? "dangle3" = trnr{dangle3 = fromAssocs minBound maxBound z $ zip pb ints}
  | bhdr =? "dangle5" = trnr{dangle5 = fromAssocs minBound maxBound z $ zip pb ints}
  | bhdr =? "hairpin" = trnr{hairpinL = fromList 0 30 ints}
  | bhdr =? "mismatch_hairpin" = trnr{hairpinMM = fromAssocs minBound maxBound z $ zip pbb ints}
  | bhdr =? "bulge" = trnr{bulgeL = fromList 0 30 ints}
  | bhdr =? "int11" = trnr{iloop1x1 = fromAssocs minBound maxBound z $ zip ppbb ints}
  | bhdr =? "int21" = trnr{iloop1x2 = fromAssocs minBound maxBound z $ zip ppbbb ints}
  | bhdr =? "int22" = trnr{iloop2x2 = fromAssocs minBound maxBound z $ zip ppbbbb ints}
  | bhdr =? "mismatch_interior" = trnr{iloopMM = fromAssocs minBound maxBound z $ zip pbb ints}
  | bhdr =? "mismatch_interior_23" = trnr{iloop2x3MM = fromAssocs minBound maxBound z $ zip pbb ints}
  | bhdr =? "mismatch_interior_1n" = trnr{iloop1xnMM = fromAssocs minBound maxBound z $ zip pbb ints}
  | bhdr =? "interior" = trnr{iloopL = fromList 0 30 ints}
  | bhdr =? "mismatch_multi" = trnr{multiMM = fromAssocs minBound maxBound z $ zip pbb ints}
  | bhdr =? "mismatch_exterior" = trnr{extMM = fromAssocs minBound maxBound z $ zip pbb ints}
  | otherwise = trnr -- unknown block!
updateTable ABlock{..} trnr
  | otherwise = trnr -- CORRECT!

-- | update with info where no separation between G/H was done.

updateTables Block{..} (trnr,trnrH)
  | bhdr =? "ML_params"
  = ( trnr {multiNuc = ints!!0, multiOffset = ints!!2, multiHelix = ints!!4}
    , trnrH{multiNuc = ints!!1, multiOffset = ints!!3, multiHelix = ints!!5}
    )
  | bhdr =? "NINIO"
  = ( trnr  {ninio = ints!!0, maxNinio = ints!!2}
    , trnrH {ninio = ints!!1}
    )
  | bhdr =? "Misc"
  = ( trnr  {intermolecularInit = ints!!0, termAU = ints!!2}
    , trnrH {intermolecularInit = ints!!1, termAU = ints!!3}
    )
  | otherwise = (trnr,trnrH)
updateTables ABlock{..} (trnr,trnrH)
  | bhdr =? "Triloops" || bhdr =? "Tetraloops" || bhdr =? "Hexaloops"
  = ( trnr {hairpinLookup = hairpinLookup trnr `M.union` (M.fromList $ map (\(k,v,w) -> (s2ns k,v)) ascs)}
    , trnrH {hairpinLookup = hairpinLookup trnrH `M.union` (M.fromList $ map (\(k,v,w) -> (s2ns k,w)) ascs)}
    )
  | otherwise = (trnr,trnrH)

pp = [(x,y) | x<-cgnsP,y<-cgnsP]
pb = [(x,y) | x<-cgnsP,y<-eacgu]
pbb = [(x,y,z) | x<-cgnsP,y<-eacgu,z<-eacgu]
ppbb = [(p1,p2,b1,b2) | p1<-cgnsP,p2<-cgnsP,b1<-eacgu,b2<-eacgu]
ppbbb = [(p1,p2,b1,b2,b3) | p1<-cgnsP,p2<-cgnsP,b1<-eacgu,b2<-eacgu,b3<-eacgu]
ppbbbb = [(p1,p2,b1,b2,b3,b4) | p1<-cguaP,p2<-cguaP,b1<-acgu,b2<-acgu,b3<-acgu,b4<-acgu]
z = eInf
-- xs is the block header name, ys the constant string
xs =? ys = xs == ys || xs == ys++"_enthalpies" -- and $ zipWith (==) xs ys
s2ns = map charToNucleotide

-- * Simple parser for the vienna 2.0 format.

isHblock :: Block -> Bool
isHblock b = "enthalpies" `elem` (tails $ bhdr b)

data Block
  = Block {bhdr :: String, ints :: [Int]}
  | ABlock {bhdr :: String, ascs :: [(String,Int,Int)]}
  deriving (Show)

blocks :: GenParser Char st [Block]
blocks = vrna2 *> spaces *> (try assocBlock <|> try block) `sepEndBy1` spaces <* string "#END" <* spaces <* eof

vrna2 = string "## RNAfold parameter file v2.0"

block :: GenParser Char st Block
block = Block <$> header <* spaces <*> num `sepEndBy1` (try comment <|> spaces) where
  num = ((eInf <$ string "INF") <|> parseIntegral)

assocBlock :: GenParser Char st Block
assocBlock = ABlock <$> header <* spaces <*> asc `sepEndBy1` spaces where
  asc = (,,) <$> (many1 $ oneOf "ACGU") <* spaces <*> parseIntegral <* spaces <*> parseIntegral

header :: GenParser Char st String
header = char '#' *> space *> (alphaNum <|> char '_') `manyTill` newline

comment :: GenParser Char st ()
comment = const () <$ spaces <*> between (string "/*") (string "*/") (many $ noneOf "/*") <* spaces