module Biobase.Infernal.CM where
import Data.Array.IArray
import Data.List (genericLength)
import Biobase.RNA hiding (nucE)
data CM n s = CM
{ nodes :: Array Int (Node n)
, states :: Array Int (State s)
, header :: [(String,String)]
, localBegin :: Array Int Double
, localEnd :: Array Int Double
, cmType :: CMType
, nullModel :: Array Nucleotide Double
} deriving (Show)
data Node n = Node
{ nid :: Int
, ntype :: NodeType
, nparents :: [Int]
, nchildren :: [Int]
, nstates :: [Int]
, ntag :: n
} deriving (Show)
data State s = State
{ sid :: Int
, stype :: StateType
, snode :: Int
, sparents :: [Int]
, schildren :: [Transition]
, semission :: [Emission]
, stag :: s
} deriving (Show)
data CMType = CMProb | CMScore
deriving (Show,Eq)
data Emission
= EmitS {eNuc :: Nucleotide, escore :: Double}
| EmitP { eNucL :: Nucleotide, eNucR :: Nucleotide, escore :: Double}
deriving (Show)
data Transition
= Branch {tchild :: Int}
| Transition {tchild :: Int, tscore :: Double}
deriving (Show)
data NodeType = MATP | MATL | MATR | BIF | ROOT | BEGL | BEGR | END
deriving (Read,Show,Eq,Ord,Enum,Bounded)
data StateType = MP | IL | IR | D | ML | MR | B | S | E
deriving (Read,Show,Eq,Ord,Enum,Bounded)
cmMakeLocal :: Double -> Double -> CM n s -> CM n s
cmMakeLocal pbegin pend cm = cmMakeLocalBegin pbegin $ cmMakeLocalEnd pend cm
cmMakeLocalBegin :: Double -> CM n s -> CM n s
cmMakeLocalBegin pbegin cm = cm{localBegin = localBegin cm // changes} where
changes = rootS : (start : intern)
rootS = (0, prob2Score 0 1.0)
start = (head . nstates $ nodes cm ! 1, prob2Score (1pbegin) 1.0)
intern = map (\k -> (sid $ nodeMainState cm k,prob2Score (pbegin / l) 1.0)) nds
nds = filter (localBeginPossible cm) . elems $ nodes cm
l = genericLength nds
cmMakeLocalEnd :: Double -> CM n s -> CM n s
cmMakeLocalEnd pend cm = cm{localEnd = localEnd cm // changes} where
changes = map (\k -> (sid $ nodeMainState cm k,prob2Score (pend / l) 1.0)) nds
nds = filter (localBeginPossible cm) . elems $ nodes cm
l = genericLength nds
cmScore2Prob :: CM n s -> CM n s
cmScore2Prob cm' = if cmType cm' == CMProb then cm' else CM
(nodes cm)
(statesScore2Prob cm $ states cm)
(header cm)
(localBeginScore2Prob $ localBegin cm)
(localEndScore2Prob $ localEnd cm)
CMProb
nm
where
nm = amap (flip score2Prob 0.25) $ nullModel cm'
cm = cm' {nullModel = nm}
cmProb2Score :: CM n s -> CM n s
cmProb2Score cm' = if cmType cm' == CMScore then cm' else CM
(nodes cm)
(statesProb2Score cm $ states cm)
(header cm)
(localBeginProb2Score $ localBegin cm)
(localEndProb2Score $ localEnd cm)
CMScore
nm
where
nm = amap (flip prob2Score 0.25) $ nullModel cm'
cm = cm' {nullModel = nm}
cmNormalizeProbabilities :: CM n s -> CM n s
cmNormalizeProbabilities cm
| cmType cm == CMScore = error "cannot normalize score-type CM"
| otherwise = cm
statesScore2Prob :: CM n s -> Array Int (State s) -> Array Int (State s)
statesScore2Prob cm sA = amap f sA where
f s = s {schildren = map fT $ schildren s, semission = map fE $ semission s}
fT b@(Branch _) = b
fT (Transition k v) = Transition k (score2Prob v 1.0)
fE (EmitS k v) = EmitS k (score2Prob v $ nullModel cm ! k)
fE (EmitP k1 k2 v) = EmitP k1 k2 (score2Prob v $ (nullModel cm ! k1) * (nullModel cm ! k2))
localBeginScore2Prob :: Array Int Double -> Array Int Double
localBeginScore2Prob sA = amap f sA where
f s = score2Prob s 1.0
localEndScore2Prob :: Array Int Double -> Array Int Double
localEndScore2Prob sA = amap f sA where
f s = score2Prob s 1.0
statesProb2Score :: CM n s -> Array Int (State s) -> Array Int (State s)
statesProb2Score cm sA = amap f sA where
f s = s {schildren = map fT $ schildren s, semission = map fE $ semission s}
fT b@(Branch _) = b
fT (Transition k v) = Transition k (prob2Score v 1.0)
fE (EmitS k v) = EmitS k (prob2Score v $ nullModel cm ! k)
fE (EmitP k1 k2 v) = EmitP k1 k2 (prob2Score v $ (nullModel cm ! k1) * (nullModel cm ! k2))
localBeginProb2Score :: Array Int Double -> Array Int Double
localBeginProb2Score sA = amap f sA where
f s = prob2Score s 1.0
localEndProb2Score :: Array Int Double -> Array Int Double
localEndProb2Score sA = amap f sA where
f s = score2Prob s 1.0
nodeMainState :: CM n s -> Node n -> State s
nodeMainState cm n = head $ filter ((==st) . stype) ss where
(Just st) = (ntype n) `lookup` nodeMainStateAssocs
ss = map (states cm !) $ nstates n
localBeginPossible :: CM n s -> Node n -> Bool
localBeginPossible cm n =
if ntype n `elem` okNodes
&& (not . any (==0) $ nparents n)
then True
else False
where
okNodes = [MATP,MATL,MATR,BIF]
localEndPossible :: CM n s -> Node n -> Bool
localEndPossible cm n =
if ntype n `elem` okNodes
&& (END /= (ntype $ nodes cm ! (nid n +1)))
then True
else False
where
okNodes = [MATP,MATL,MATR,BEGL,BEGR]
score2Prob x null
| x == (1/0) = 0
| otherwise = exp (x * log 2) * null
prob2Score x null
| x == 0 = (1/0)
| otherwise = log (x / null) / log 2
stateScore2Prob :: CM n s -> State s -> State s
stateScore2Prob cm s = error "implement me"
stateProb2Score :: CM n s -> State s -> State s
stateProb2Score cm s = error "implement me"
transitionTargets :: [Transition] -> [Int]
transitionTargets xs = map f xs where
f (Branch x) = x
f (Transition x _) = x
nodeMainStateAssocs :: [(NodeType,StateType)]
nodeMainStateAssocs =
[ (MATP, MP)
, (MATL, ML)
, (MATR, MR)
, (BIF, B)
, (ROOT, S)
, (BEGL, S)
, (BEGR, S)
, (END, E)
]