module Biobase.Vienna.Import.ViennaPar
( importPar
) where
import Text.Parsec hiding ((<|>), many, optional)
import Text.Parsec.String
import Text.Parsec.Char
import Text.Parsec.Combinator hiding (optional)
import Control.Applicative
import Data.List
import qualified Data.Map as M
import Text.Parsec.Numbers
import Biobase.Constants
import Biobase.RNA
import Biobase.Turner.Tables
import Data.PrimitiveArray
import Biobase.Vienna
importPar :: String -> (ViennaIntTables,ViennaIntTables)
importPar s
| Right _ <- p = foldr updateTables (trnr,trnrH) bs
| Left err <- p = error $ show err
where
p = parse blocks "(stdin)" s
Right bs = p
(bgs,bhs) = partition (not . isHblock) bs
trnr = foldr updateTable emptyTables bgs
trnrH = foldr updateTable emptyTables bhs
updateTable :: Block -> ViennaIntTables -> ViennaIntTables
updateTable Block{..} trnr
| bhdr =? "stack" = trnr{stack = fromAssocs minBound maxBound z $ zip pp ints}
| bhdr =? "dangle3" = trnr{dangle3 = fromAssocs minBound maxBound z $ zip pb ints}
| bhdr =? "dangle5" = trnr{dangle5 = fromAssocs minBound maxBound z $ zip pb ints}
| bhdr =? "hairpin" = trnr{hairpinL = fromList 0 30 ints}
| bhdr =? "mismatch_hairpin" = trnr{hairpinMM = fromAssocs minBound maxBound z $ zip pbb ints}
| bhdr =? "bulge" = trnr{bulgeL = fromList 0 30 ints}
| bhdr =? "int11" = trnr{iloop1x1 = fromAssocs minBound maxBound z $ zip ppbb ints}
| bhdr =? "int21" = trnr{iloop1x2 = fromAssocs minBound maxBound z $ zip ppbbb ints}
| bhdr =? "int22" = trnr{iloop2x2 = fromAssocs minBound maxBound z $ zip ppbbbb ints}
| bhdr =? "mismatch_interior" = trnr{iloopMM = fromAssocs minBound maxBound z $ zip pbb ints}
| bhdr =? "mismatch_interior_23" = trnr{iloop2x3MM = fromAssocs minBound maxBound z $ zip pbb ints}
| bhdr =? "mismatch_interior_1n" = trnr{iloop1xnMM = fromAssocs minBound maxBound z $ zip pbb ints}
| bhdr =? "interior" = trnr{iloopL = fromList 0 30 ints}
| bhdr =? "mismatch_multi" = trnr{multiMM = fromAssocs minBound maxBound z $ zip pbb ints}
| bhdr =? "mismatch_exterior" = trnr{extMM = fromAssocs minBound maxBound z $ zip pbb ints}
| otherwise = trnr
updateTable ABlock{..} trnr
| otherwise = trnr
updateTables Block{..} (trnr,trnrH)
| bhdr =? "ML_params"
= ( trnr {multiNuc = ints!!0, multiOffset = ints!!2, multiHelix = ints!!4}
, trnrH{multiNuc = ints!!1, multiOffset = ints!!3, multiHelix = ints!!5}
)
| bhdr =? "NINIO"
= ( trnr {ninio = ints!!0, maxNinio = ints!!2}
, trnrH {ninio = ints!!1}
)
| bhdr =? "Misc"
= ( trnr {intermolecularInit = ints!!0, termAU = ints!!2}
, trnrH {intermolecularInit = ints!!1, termAU = ints!!3}
)
| otherwise = (trnr,trnrH)
updateTables ABlock{..} (trnr,trnrH)
| bhdr =? "Triloops" || bhdr =? "Tetraloops" || bhdr =? "Hexaloops"
= ( trnr {hairpinLookup = hairpinLookup trnr `M.union` (M.fromList $ map (\(k,v,w) -> (s2ns k,v)) ascs)}
, trnrH {hairpinLookup = hairpinLookup trnrH `M.union` (M.fromList $ map (\(k,v,w) -> (s2ns k,w)) ascs)}
)
| otherwise = (trnr,trnrH)
pp = [(x,y) | x<-cgnsP,y<-cgnsP]
pb = [(x,y) | x<-cgnsP,y<-eacgu]
pbb = [(x,y,z) | x<-cgnsP,y<-eacgu,z<-eacgu]
ppbb = [(p1,p2,b1,b2) | p1<-cgnsP,p2<-cgnsP,b1<-eacgu,b2<-eacgu]
ppbbb = [(p1,p2,b1,b2,b3) | p1<-cgnsP,p2<-cgnsP,b1<-eacgu,b2<-eacgu,b3<-eacgu]
ppbbbb = [(p1,p2,b1,b2,b3,b4) | p1<-cguaP,p2<-cguaP,b1<-acgu,b2<-acgu,b3<-acgu,b4<-acgu]
z = eInf
xs =? ys = xs == ys || xs == ys++"_enthalpies"
s2ns = map charToNucleotide
isHblock :: Block -> Bool
isHblock b = "enthalpies" `elem` (tails $ bhdr b)
data Block
= Block {bhdr :: String, ints :: [Int]}
| ABlock {bhdr :: String, ascs :: [(String,Int,Int)]}
deriving (Show)
blocks :: GenParser Char st [Block]
blocks = vrna2 *> spaces *> (try assocBlock <|> try block) `sepEndBy1` spaces <* string "#END" <* spaces <* eof
vrna2 = string "## RNAfold parameter file v2.0"
block :: GenParser Char st Block
block = Block <$> header <* spaces <*> num `sepEndBy1` (try comment <|> spaces) where
num = ((eInf <$ string "INF") <|> parseIntegral)
assocBlock :: GenParser Char st Block
assocBlock = ABlock <$> header <* spaces <*> asc `sepEndBy1` spaces where
asc = (,,) <$> (many1 $ oneOf "ACGU") <* spaces <*> parseIntegral <* spaces <*> parseIntegral
header :: GenParser Char st String
header = char '#' *> space *> (alphaNum <|> char '_') `manyTill` newline
comment :: GenParser Char st ()
comment = const () <$ spaces <*> between (string "/*") (string "*/") (many $ noneOf "/*") <* spaces