module Biobase.SElab.CM where
import Control.Lens
import Data.ByteString.Char8 as BS
import Data.Ix (Ix)
import Data.Map as M
import Data.Primitive.Types
import Data.Vector as V
import Data.Vector.Unboxed as VU
import GHC.Base (quotInt,remInt)
import Prelude as P
import Data.List (genericLength)
import Data.Array.Repa.Index
import Data.Array.Repa.Index as R
import Data.Array.Repa.Shape as R
import Data.Array.Repa.ExtShape as R
import Biobase.SElab.Types
import qualified Biobase.SElab.HMM as HMM
data CMVersion
= Infernal10 BS.ByteString
| Infernal11 BS.ByteString
deriving (Eq,Ord,Show,Read)
data NodeType
= BIF
| MATP
| MATL
| MATR
| BEGL
| BEGR
| ROOT
| END
deriving (Eq,Ord,Enum,Show,Read)
newtype NodeID = NodeID {unNodeID :: Int}
deriving (Eq,Ord,Show,Read)
data StateType
= D
| MP
| ML
| MR
| IL
| IR
| S
| E
| B
| EL
deriving (Eq,Ord,Enum,Show,Read)
newtype StateID = StateID {unStateID :: Int}
deriving (Eq,Ord,Show,Read,Prim,Ix,Enum,Num)
illegalState = StateID $ 1
data Emits
= EmitsSingle { _single :: [(Char, BitScore)] }
| EmitsPair { _pair :: [(Char, Char, BitScore)] }
| EmitNothing
deriving (Eq,Ord,Show,Read)
makeLenses ''Emits
data State = State
{ _stateID :: StateID
, _nodeID :: NodeID
, _nodeType :: NodeType
, _stateType :: StateType
, _transitions :: [(StateID,BitScore)]
, _emits :: Emits
} deriving (Eq,Ord,Show,Read)
makeLenses ''State
data CM = CM
{ _name :: Identification Rfam
, _accession :: Accession Rfam
, _version :: CMVersion
, _trustedCutoff :: BitScore
, _gathering :: BitScore
, _noiseCutoff :: Maybe BitScore
, _nullModel :: VU.Vector BitScore
, _nodes :: M.Map NodeID (NodeType,[StateID])
, _states :: M.Map StateID State
, _localBegin :: M.Map StateID BitScore
, _localEnd :: M.Map StateID BitScore
, _unsorted :: M.Map ByteString ByteString
, _hmm :: Maybe HMM.HMM3
} deriving (Show,Read)
makeLenses ''CM
type ID2CM = M.Map (Identification Rfam) CM
type AC2CM = M.Map (Accession Rfam) CM
makeLocal :: Double -> Double -> CM -> CM
makeLocal pbegin pend cm = makeLocalEnd pend $ makeLocalBegin pbegin cm
makeLocalBegin :: Double -> CM -> CM
makeLocalBegin pbegin cm = cm{_localBegin = lb} where
lb = M.fromList . P.map (\s -> (s^.stateID, if s^.nodeID==NodeID 1 then prob2Score 1 (1pbegin) else prob2Score 1 (pbegin/l))) $ ss
l = genericLength ss
ss = P.filter (\s -> s^.stateType `P.elem` [MP,ML,MR,B]) . M.elems $ cm ^. states
makeLocalEnd :: Double -> CM -> CM
makeLocalEnd pend cm = cm{_localEnd = le} where
le = M.fromList . P.map (\s -> (s^.stateID, prob2Score 1 (pend/l))) $ ss
l = genericLength ss
ss = P.filter (\s -> s^.stateType `P.elem` [MP,MP,MR,S] && s^.nodeType/=ROOT && notEnding s) . M.elems $ cm^.states
notEnding s = not . P.any (==E) . P.map ((^.stateType) . ((cm^.states) M.!) . fst) $ s^.transitions
instance Shape sh => Shape (sh:.StateID) where
rank (sh:._) = rank sh + 1
zeroDim = zeroDim :. (StateID 0)
unitDim = unitDim :. (StateID 1)
intersectDim (sh1 :. StateID n1) (sh2 :. StateID n2) = intersectDim sh1 sh2 :. StateID (min n1 n2)
addDim (sh1 :. StateID n1) (sh2 :. StateID n2) = addDim sh1 sh2 :. StateID (n1+n2)
size (sh :. StateID n) = R.size sh * n
sizeIsValid (sh :. StateID n)
| R.size sh > 0 = n <= maxBound `div` R.size sh
| otherwise = False
toIndex (sh1 :. StateID n1) (sh2 :. StateID n2) = toIndex sh1 sh2 * n1 + n2
fromIndex (ds :. StateID d) n = fromIndex ds (n `quotInt` d) :. StateID r where
r | rank ds == 0 = n
| otherwise = n `remInt` d
inShapeRange (sh1 :. StateID n1) (sh2 :. StateID n2) (zs :. StateID z) = (z >= n1) && (z < n2) && inShapeRange sh1 sh2 zs
listOfShape (sh :. StateID n) = n : listOfShape sh
shapeOfList xx
= case xx of
[] -> error $ "shapeOfList empty in StateID"
(x:xs) -> shapeOfList xs :. StateID x
deepSeq (sh :. n) x = deepSeq sh (n `seq` x)
instance ExtShape sh => ExtShape (sh:.StateID) where
subDim (sh1 :. StateID n1) (sh2 :. StateID n2) = subDim sh1 sh2 :. StateID (n1n2)
rangeList (sh1 :. StateID n1) (sh2 :. StateID n2) = [sh :. StateID n | sh <- rangeList sh1 sh2, n <- [n1 .. (n1+n2)] ]