module Biobase.Vienna.ImportPar where
import Data.Iteratee as I
import Data.Iteratee.Char as I
import Data.Iteratee.IO as I
import Data.ByteString.Char8 as BS
import Prelude as P
import Data.Array.Repa.Index
import Data.Array.Repa.Shape
import qualified Data.Map as M
import Control.Monad.Identity
import Data.Maybe (fromJust)
import Data.PrimitiveArray
import Data.PrimitiveArray.Unboxed.Zero
import Biobase.Secondary.Vienna
import Biobase.Primary
import Biobase.Vienna
import Biobase.Vienna.Import
import Debug.Trace
data BL
= Block {fromBlock :: [Int]}
| Lookup {fromLookup :: [(ByteString,Int,Int)]}
deriving (Show)
lookupStructure =
[ "# Triloops"
, "# Tetraloops"
, "# Hexaloops"
]
iterBlocks = enumLinesBS ><> I.filter (not . BS.null) ><> convStream f where
f = do
h <- I.head
if h `P.elem` lookupStructure
then do
xs' <- I.takeWhile ((/='#') . BS.head)
let xs = P.map (\[w,x,y] -> (w,getInt x, getInt y)) . P.map BS.words $ xs'
return [(h, Lookup xs)]
else do
xs' <- I.takeWhile ((/='#') . BS.head)
let xs = P.map getInt . P.concat . P.map BS.words . P.map (BS.takeWhile (/='/')) $ xs'
return [(h, Block xs)]
getInt :: ByteString -> Int
getInt s
| s == "INF" = 999999
| otherwise = read . BS.unpack $ s
fromFile :: FilePath -> IO (Vienna2004,Vienna2004)
fromFile fp = do
i <- enumFile 8192 fp (joinI $ iterBlocks stream2list)
bs <- run i
return $ makeStructures bs
fromByteString :: ByteString -> (Vienna2004,Vienna2004)
fromByteString s = runIdentity $ do
i <- enumPure1Chunk s (joinI $ iterBlocks stream2list)
bs <- run i
return $ makeStructures bs
makeStructures bs =
let vEner = Vienna2004
{ stack = blockAssocs minPP maxPP ppKeys $ lookup "# stack" bs
, dangle3 = blockAssocs minPB maxPB pbKeys $ lookup "# dangle3" bs
, dangle5 = blockAssocs minPB maxPB pbKeys $ lookup "# dangle5" bs
, hairpinL = blockAssocs (Z:.0) (Z:.30) thirty $ lookup "# hairpin" bs
, hairpinMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_hairpin" bs
, hairpinLookup = fst $ allLookups bs
, hairpinGGG = 999999
, hairpinCslope = 999999
, hairpinCintercept = 999999
, hairpinC3 = 999999
, bulgeL = blockAssocs (Z:.0) (Z:.30) thirty $ lookup "# bulge" bs
, bulgeSingleC = 999999
, iloop1x1 = blockAssocs minPPBB maxPPBB ppbbKeys $ lookup "# int11" bs
, iloop2x1 = blockAssocs minPPBBB maxPPBBB ppbbbKeys $ lookup "# int21" bs
, iloop2x2 = blockAssocs minPPBBBB maxPPBBBB ppbbbbKeys $ lookup "# int22" bs
, iloopMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_interior" bs
, iloop2x3MM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_interior_23" bs
, iloop1xnMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_interior_1n" bs
, iloopL = blockAssocs (Z:.0) (Z:.30) thirty $ lookup "# interior" bs
, multiMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_multi" bs
, ninio = single 0 $ lookup "# NINIO" bs
, maxNinio = single 2 $ lookup "# NINIO" bs
, multiOffset = single 2 $ lookup "# ML_params" bs
, multiNuc = single 0 $ lookup "# ML_params" bs
, multiHelix = single 4 $ lookup "# ML_params" bs
, multiAsym = 999999
, multiStrain = 999999
, extMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_exterior" bs
, coaxial = fromAssocs minPP maxPP 999999 []
, coaxStack = fromAssocs minPBB maxPBB 999999 []
, tStackCoax = fromAssocs minPBB maxPBB 999999 []
, largeLoop = 999999
, termAU = single 2 $ lookup "# Misc" bs
, intermolecularInit = 999999
}
vEnth = Vienna2004
{ stack = blockAssocs minPP maxPP ppKeys $ lookup "# stack_enthalpies" bs
, dangle3 = blockAssocs minPB maxPB pbKeys $ lookup "# dangle3_enthalpies" bs
, dangle5 = blockAssocs minPB maxPB pbKeys $ lookup "# dangle5_enthalpies" bs
, hairpinL = blockAssocs (Z:.0) (Z:.30) thirty $ lookup "# hairpin_enthalpies" bs
, hairpinMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_hairpin_enthalpies" bs
, hairpinLookup = snd $ allLookups bs
, hairpinGGG = 999999
, hairpinCslope = 999999
, hairpinCintercept = 999999
, hairpinC3 = 999999
, bulgeL = blockAssocs (Z:.0) (Z:.30) thirty $ lookup "# bulge_enthalpies" bs
, bulgeSingleC = 999999
, iloop1x1 = blockAssocs minPPBB maxPPBB ppbbKeys $ lookup "# int11_enthalpies" bs
, iloop2x1 = blockAssocs minPPBBB maxPPBBB ppbbbKeys $ lookup "# int21_enthalpies" bs
, iloop2x2 = blockAssocs minPPBBBB maxPPBBBB ppbbbbKeys $ lookup "# int22_enthalpies" bs
, iloopMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_interior_enthalpies" bs
, iloop2x3MM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_interior_23_enthalpies" bs
, iloop1xnMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_interior_1n_enthalpies" bs
, iloopL = blockAssocs (Z:.0) (Z:.30) thirty $ lookup "# interior_enthalpies" bs
, multiMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_multi_enthalpies" bs
, ninio = 999999
, maxNinio = 999999
, multiOffset = 999999
, multiNuc = 999999
, multiHelix = 999999
, multiAsym = 999999
, multiStrain = 999999
, extMM = blockAssocs minPBB maxPBB pbbKeys $ lookup "# mismatch_exterior_enthalpies" bs
, coaxial = fromAssocs minPP maxPP 999999 []
, coaxStack = fromAssocs minPBB maxPBB 999999 []
, tStackCoax = fromAssocs minPBB maxPBB 999999 []
, largeLoop = 999999
, termAU = 999999
, intermolecularInit = 999999
}
in (vEner,vEnth)
single k (Just (Block xs)) = xs !! k
thirty = P.map (Z:.) [0..30]
pbKeys = [ (Z:.x:.y) | x<-cgnsP, y<-nacgu ]
ppKeys = [ (Z:.x:.y) | x<-cgnsP, y<-cgnsP ]
pbbKeys = [ (Z:.a:.b:.c) | a<-cgnsP, b<-nacgu, c<-nacgu ]
ppbbKeys = [ pp:.a:.b | pp<-ppKeys, a<-nacgu, b<-nacgu ]
ppbbbKeys = [ ppbb:.a | ppbb<-ppbbKeys, a<-nacgu ]
ppbbbbKeys = [ Z:.a:.b:.c:.d:.e:.f | a<-cguaP, b<-cguaP, c<-acgu, d<-acgu, e<-acgu, f<-acgu ]
blockAssocs minKey maxKey keys (Just (Block xs)) = fromAssocs minKey maxKey 999999 $ P.zip keys xs
allLookups bs = (M.fromList $ P.map (\(a,b,c) -> (a,b)) ls, M.fromList $ P.map (\(a,b,c) -> (a,c)) ls) where
ls = P.map (\(a,b,c) -> (mkPrimary a,b,c)) $ P.concatMap (fromLookup . snd) $ P.filter (isL . snd) bs
isL (Lookup _) = True
isL _ = False