module Biobase.Infernal.CM.Import (fromFile,fromString) where
import Data.Maybe (fromJust)
import qualified Data.Array.IArray as A
import qualified Data.List as L
import Text.ParserCombinators.Parsec hiding (State)
import Biobase.Infernal.CM
import Biobase.RNA hiding (nucE)
myChar = alphaNum <|> char '-' <|> char '_' <|> char '.'
headerChar = myChar <|> char '[' <|> char ']'
floating = many1 $ digit <|> char '.' <|> char 'e' <|> char '-'
headerLine = do
key <- many1 headerChar
--spaces
many1 $ char ' '
val <- many1 (headerChar <|> char ' ' <|> char '/' <|> char ':')
newline
return (key,val)
unknownNumbers = do
many1 $ char ' '
ns <- floating `sepEndBy` (many1 $ char ' ')
newline
return ("",concat ns)
theHeader = do
hs <- many $ (try headerLine <|> try unknownNumbers)
string "MODEL:"
newline
return hs
node = do
spaces
string "[ "
name <- many letter
spaces
num <- many digit >>= (return . read)
spaces
string "]"
newline
states <- many $ try (state num)
return $ (Node
{ ntype = read name
, nid = num
, nstates = map sid states
, nparents = []
, nchildren = []
, ntag = ()
}
, states)
aNum = many1 $ char '-' <|> digit
addneginf :: String -> Double
addneginf "*" = (1)/0 --"-1000000000.0"
addneginf x = read x
state nodeID = do
spaces
name <- many1 myChar
spaces
sid <- aNum >>= (return . read)
spaces
plast <- aNum >>= (return . read)
spaces
pnum <- aNum >>= (return . read)
spaces
cfirst <- aNum >>= (return . read)
spaces
cnum <- aNum >>= (return . read)
probs <- ( (many $ myChar <|> char '*') `sepBy` (many1 $ char ' ') >>=
return . map addneginf . filter ((/=) "") )
newline
let s = case name of
"B" -> State
{ stype = read name
, sid = sid
, snode = nodeID
, sparents = [plastpnum+1 .. plast]
, schildren = [Branch cfirst, Branch cnum]
, semission = []
, stag = ()
}
_ -> State
{ stype = read name
, sid = sid
, snode = nodeID
, sparents = [plastpnum+1 .. plast]
, schildren = zipWith Transition [cfirst .. cfirst+cnum1] probs
, semission = let keep = drop cnum probs in
case length keep of
0 -> []
4 -> zipWith EmitS acgu keep
16 -> zipWith (\(k1,k2) v -> EmitP k1 k2 v) acguPairs keep
_ -> error $ "strange number of probabilities" ++ show (keep)
, stag = ()
}
return s
models = do
ms <- many model
eof
return ms
model = do
h <- theHeader
ns <- many node
let states = concatMap snd ns
let nodes = map (addPCinfo states . fst) ns
string "//"
newline
return $ CM
{ nodes = A.array (0, length nodes 1) $ zip (map nid nodes) nodes
, states = A.array (0, length states 1) $ zip (map sid states) states
, header = h
, localBegin = A.array (0, length states 1) $ zip [0 .. length states 1] (0.0 : repeat (1/0))
, localEnd = A.array (0, length states 1) $ zip [0 .. length states 1] (repeat (1/0))
, cmType = CMScore
, nullModel = A.array (nucA,nucU) $ zip acgu (map read . words . fromJust $ "NULL" `lookup` h)
}
addPCinfo states n =
let
s = nstates n
sp = L.nub $ L.sort $ concatMap (sparents . (states !!)) s
sc = L.nub $ L.sort $ concatMap (transitionTargets . schildren . (states !!)) s
np = (L.nub $ L.sort $ map (snode . (states !!)) sp) L.\\ [nid n]
nc = (L.nub $ L.sort $ map (snode . (states !!)) sc) L.\\ [nid n]
in
n {nparents = np, nchildren = nc}
fromFile f = parseFromFile models f
fromString s = parse models "(stdin)" s