module Biobase.Turner.Import
( parseTurner
) where
import Control.Applicative
import Control.Arrow (first)
import Control.Monad
import Data.Either
import Data.List.Split (splitEvery)
import Data.List (transpose)
import qualified Data.Map as M
import System.FilePath.Find
import Text.Parsec hiding ((<|>), many)
import Text.Parsec.String
import Biobase.RNA
import Data.Ix.Tuple
import Data.PrimitiveArray
import Data.PrimitiveArray.Ix
import Text.Parsec.Numbers
import Biobase.Turner.Tables
minp = (nucA,nucA)
maxp = (nucU,nucU)
minpb = (minp,nucA)
maxpb = (maxp,nucU)
minpp = (minp,minp)
maxpp = (maxp,maxp)
minppbb = (minp,minp,nucA,nucA)
maxppbb = (maxp,maxp,nucU,nucU)
minppbbb = (minp,minp,nucA,nucA,nucA)
maxppbbb = (maxp,maxp,nucU,nucU,nucU)
minppbbbb = (minp,minp,nucA,nucA,nucA,nucA)
maxppbbbb = (maxp,maxp,nucU,nucU,nucU,nucU)
minpbb = (minp,nucA,nucA)
maxpbb = (maxp,nucU,nucU)
keyspp = [((k1,k2),(k4,k3)) | k1 <- acgu, k3 <- acgu, k2 <- acgu, k4 <- acgu]
keyspb = [((k1,k2),k3) | k1 <- acgu, k2 <- acgu, k3 <- acgu]
keyspbb = [((k1,k2),k3,k4) | k1 <- acgu, k3 <- acgu, k2 <- acgu, k4 <- acgu]
plist11 = [(nucA,nucU),(nucC,nucG),(nucG,nucC),(nucU,nucA),(nucG,nucU),(nucU,nucG)]
plist22 = [(nucA,nucU),(nucC,nucG),(nucG,nucC),(nucG,nucU),(nucU,nucA),(nucU,nucG)]
keysppbb = [((k1,k2),(k4,k3),k5,k6) | (k1,k2) <- plist11, k5 <- acgu, (k3,k4) <- plist11, k6 <- acgu]
keysppbbb = [((k1,k2),(k4,k3),k5,k6,k7) | (k1,k2) <- plist11, k6 <- acgu, k5 <- acgu, (k3,k4) <- plist11, k7 <- acgu]
keysppbbbb = [((k1,k2),(k4,k3),k5,k6,k7,k8) | (k1,k2) <- plist22, (k3,k4) <- plist22, k5 <- acgu, k8 <- acgu, k6 <- acgu, k7 <- acgu]
z = 999999
parseTurner prefix basedir fsuffix = do
vstack <- grabB basedir fsuffix $ prefix ++ "stack"
vdangles <- grabB basedir fsuffix $ prefix ++ "dangle"
vlengths <- grabB basedir fsuffix $ prefix ++ "loop"
vhairmm <- grabB basedir fsuffix $ prefix ++ "tstackh"
viloopmm <- grabB basedir fsuffix $ prefix ++ "tstacki"
viloop23mm <- grabB basedir fsuffix $ prefix ++ "tstacki23"
viloop1nmm <- grabB basedir fsuffix $ prefix ++ "tstacki1n"
vmultimm <- grabB basedir fsuffix $ prefix ++ "tstackm"
vextmm <- grabB basedir fsuffix $ prefix ++ "tstack"
viloop11 <- grabB basedir fsuffix $ prefix ++ "int11"
viloop12 <- grabB basedir fsuffix $ prefix ++ "int21"
viloop22 <- grabB basedir fsuffix $ prefix ++ "int22"
vlookups <- fmap (map (first (map charToNucleotide)) . concat) $ mapM (\f -> grabA basedir fsuffix $ prefix ++ f) ["triloop","tloop","hexaloop"]
vmisc <- grabB basedir fsuffix $ prefix ++ "miscloop"
let (vdangle3,vdangle5) = splitAt (length vdangles `div` 2) vdangles
let (_:viloopl:vbulgel:vhairpinl:[]) = transpose $ splitEvery 4 vlengths
return $ Turner2004
{ stack = fromAssocs minpp maxpp z $ zip keyspp vstack
, dangle3 = fromAssocs minpb maxpb z $ zip keyspb vdangle3
, dangle5 = fromAssocs minpb maxpb z $ zip keyspb vdangle5
, hairpinL = fromAssocs 0 30 z $ zip [1..] vhairpinl
, bulgeL = fromAssocs 0 30 z $ zip [1..] vbulgel
, iloopL = fromAssocs 0 30 z $ zip [1..] viloopl
, hairpinMM = fromAssocs minpbb maxpbb z $ zip keyspbb vhairmm
, iloopMM = fromAssocs minpbb maxpbb z $ zip keyspbb viloopmm
, iloop2x3MM = fromAssocs minpbb maxpbb z $ zip keyspbb viloop23mm
, iloop1xnMM = fromAssocs minpbb maxpbb z $ zip keyspbb viloop1nmm
, multiMM = fromAssocs minpbb maxpbb z $ zip keyspbb vmultimm
, extMM = fromAssocs minpbb maxpbb z $ zip keyspbb vextmm
, hairpinLookup = M.fromList vlookups
, iloop1x1 = fromAssocs minppbb maxppbb z $ zip keysppbb viloop11
, iloop1x2 = fromAssocs minppbbb maxppbbb z $ zip keysppbbb viloop12
, iloop2x2 = fromAssocs minppbbbb maxppbbbb z $ zip keysppbbbb viloop22
, ninio = vmisc !! 2
, maxNinio = vmisc !! 1
, multiOffset = vmisc !! 6
, multiNuc = vmisc !! 7
, multiHelix = vmisc !! 8
, largeLoop = vmisc !! 0
, termAU = vmisc !! 14
, intermolecularInit = vmisc !! 19
}
grabB basedir fsuffix fname = do
p <- parseFromFile pBlockFile (basedir ++ "/" ++ fname ++ fsuffix)
case p of
(Right ans) -> return ans
(Left err) -> error $ show err
grabA basedir fsuffix fname = do
p <- parseFromFile pAssocFile (basedir ++ "/" ++ fname ++ fsuffix)
case p of
(Right ans) -> return ans
(Left err) -> error $ show err
pBlockFile = concat <$> many (try numberRow <|> infoRow) <* eof where
numberRow = many (char ' ') *> (number <* notFollowedBy (char '\'')) `sepEndBy1` (many $ char ' ') <* newline
number = (try dotFloat) <|> (999999 <$ char '.') <|> parseExtFloat <?> "floating point number"
pAssocFile = concat <$> many1 (try assocRow <|> infoRow) where
assocRow = (\a b -> [(a,b)]) <$ many (char ' ') <*> many1 (oneOf nuc) <* many (char ' ') <*> parseExtFloat <* many (char ' ')
infoRow = [] <$ anyChar `manyTill` newline
dotFloat = (\s a b -> read $ s:'0':a:b) <$> (char '-' <|> pure ' ') <*> char '.' <*> many1 digit
nuc = "ACGUT"