{-# LANGUAGE PatternGuards #-} {-# LANGUAGE RecordWildCards #-} module Biobase.DataSource.Vienna.Import.ViennaPar ( importPar ) where import Text.Parsec hiding ((<|>), many, optional) import Text.Parsec.String import Text.Parsec.Char import Text.Parsec.Combinator hiding (optional) import Control.Applicative import Data.List import qualified Data.Map as M import Text.Parsec.Numbers import Biobase.Constants import Biobase.RNA import Biobase.RNA.ViennaPair import Biobase.DataSource.Turner.Tables import Data.PrimitiveArray import Biobase.DataSource.Vienna -- | Imports Vienna tables from a given string. importPar :: String -> (ViennaIntTables,ViennaIntTables) importPar s | Right _ <- p = foldr updateTables (trnr,trnrH) bs | Left err <- p = error $ show err where p = parse blocks "(stdin)" s Right bs = p (bgs,bhs) = partition (not . isHblock) bs trnr = foldr updateTable emptyTables bgs trnrH = foldr updateTable emptyTables bhs -- | Apply all parsed blocks to a Turner2004 structure. updateTable :: Block -> ViennaIntTables -> ViennaIntTables updateTable Block{..} trnr -- @Turner2004{..} | bhdr =? "stack" = trnr{stack = fromAssocs minBound maxBound z $ zip pp ints} | bhdr =? "dangle3" = trnr{dangle3 = fromAssocs minBound (vpNS,nucU) z $ zip pb ints} | bhdr =? "dangle5" = trnr{dangle5 = fromAssocs minBound (vpNS,nucU) z $ zip pb ints} | bhdr =? "hairpin" = trnr{hairpinL = fromList 0 30 ints} | bhdr =? "mismatch_hairpin" = trnr{hairpinMM = fromAssocs minBound (vpNS,nucU,nucU) z $ zip pbb ints} | bhdr =? "bulge" = trnr{bulgeL = fromList 0 30 ints} | bhdr =? "int11" = trnr{iloop1x1 = fromAssocs minBound (vpNS,vpNS,nucU,nucU) z $ zip ppbb ints} | bhdr =? "int21" = trnr{iloop1x2 = fromAssocs minBound (vpNS,vpNS,nucU,nucU,nucU) z $ zip ppbbb ints} | bhdr =? "int22" = trnr{iloop2x2 = fromAssocs minBound (vpNS,vpNS,nucU,nucU,nucU,nucU) z $ zip ppbbbb ints} | bhdr =? "mismatch_interior" = trnr{iloopMM = fromAssocs minBound (vpNS,nucU,nucU) z $ zip pbb ints} | bhdr =? "mismatch_interior_23" = trnr{iloop2x3MM = fromAssocs minBound (vpNS,nucU,nucU) z $ zip pbb ints} | bhdr =? "mismatch_interior_1n" = trnr{iloop1xnMM = fromAssocs minBound (vpNS,nucU,nucU) z $ zip pbb ints} | bhdr =? "interior" = trnr{iloopL = fromList 0 30 ints} | bhdr =? "mismatch_multi" = trnr{multiMM = fromAssocs minBound (vpNS,nucU,nucU) z $ zip pbb ints} | bhdr =? "mismatch_exterior" = trnr{extMM = fromAssocs minBound (vpNS,nucU,nucU) z $ zip pbb ints} | otherwise = trnr -- unknown block! updateTable ABlock{..} trnr | otherwise = trnr -- CORRECT! -- | update with info where no separation between G/H was done. updateTables Block{..} (trnr,trnrH) | bhdr =? "ML_params" = ( trnr {multiNuc = ints!!0, multiOffset = ints!!2, multiHelix = ints!!4} , trnrH{multiNuc = ints!!1, multiOffset = ints!!3, multiHelix = ints!!5} ) | bhdr =? "NINIO" = ( trnr {ninio = ints!!0, maxNinio = ints!!2} , trnrH {ninio = ints!!1} ) | bhdr =? "Misc" = ( trnr {intermolecularInit = ints!!0, termAU = ints!!2} , trnrH {intermolecularInit = ints!!1, termAU = ints!!3} ) | otherwise = (trnr,trnrH) updateTables ABlock{..} (trnr,trnrH) | bhdr =? "Triloops" || bhdr =? "Tetraloops" || bhdr =? "Hexaloops" = ( trnr {hairpinLookup = hairpinLookup trnr `M.union` (M.fromList $ map (\(k,v,w) -> (s2ns k,v)) ascs)} , trnrH {hairpinLookup = hairpinLookup trnrH `M.union` (M.fromList $ map (\(k,v,w) -> (s2ns k,w)) ascs)} ) | otherwise = (trnr,trnrH) pp = [(x,y) | x<-cgnsP,y<-cgnsP] pb = [(x,y) | x<-cgnsP,y<-eacgu] pbb = [(x,y,z) | x<-cgnsP,y<-eacgu,z<-eacgu] ppbb = [(p1,p2,b1,b2) | p1<-cgnsP,p2<-cgnsP,b1<-eacgu,b2<-eacgu] ppbbb = [(p1,p2,b1,b2,b3) | p1<-cgnsP,p2<-cgnsP,b1<-eacgu,b2<-eacgu,b3<-eacgu] ppbbbb = [(p1,p2,b1,b2,b3,b4) | p1<-cguaP,p2<-cguaP,b1<-acgu,b2<-acgu,b3<-acgu,b4<-acgu] z = eInf -- xs is the block header name, ys the constant string xs =? ys = xs == ys || xs == ys++"_enthalpies" -- and $ zipWith (==) xs ys s2ns = map mkNuc -- * Simple parser for the vienna 2.0 format. isHblock :: Block -> Bool isHblock b = "enthalpies" `elem` (tails $ bhdr b) data Block = Block {bhdr :: String, ints :: [Int]} | ABlock {bhdr :: String, ascs :: [(String,Int,Int)]} deriving (Show) blocks :: GenParser Char st [Block] blocks = vrna2 *> spaces *> (try assocBlock <|> try block) `sepEndBy1` spaces <* string "#END" <* spaces <* eof vrna2 = string "## RNAfold parameter file v2.0" block :: GenParser Char st Block block = Block <$> header <* spaces <*> num `sepEndBy1` (try comment <|> spaces) where num = ((eInf <$ string "INF") <|> parseIntegral) assocBlock :: GenParser Char st Block assocBlock = ABlock <$> header <* spaces <*> asc `sepEndBy1` spaces where asc = (,,) <$> (many1 $ oneOf "ACGU") <* spaces <*> parseIntegral <* spaces <*> parseIntegral header :: GenParser Char st String header = char '#' *> space *> (alphaNum <|> char '_') `manyTill` newline comment :: GenParser Char st () comment = const () <$ spaces <*> between (string "/*") (string "*/") (many $ noneOf "/*") <* spaces