-- | Infernal CMs.

module Biobase.Infernal.CM where

import Data.ByteString as BS
import Data.Map as M
import Data.Vector as V
import Data.Vector.Unboxed as VU

import Data.PrimitiveArray
import Data.PrimitiveArray.Ix

import Biobase.Infernal.Types



-- | A datatype representing Infernal covariance models. This is a new
-- representation that is incompatible with the one once found in "Biobase".
-- The most important difference is that lookups are mapped onto efficient data
-- structures, currently "PrimitiveArray".
--
-- [1] Each "State" of a covariance model has up to 6 transition scores, hence
-- we need s*6 cells for transitions.
--
-- [2] Each "State" of a covariance has up to 16 emission scores, so we have
-- s*16 cells for emissions, with unused cells set to a really high score.
--
-- On top of these basic structures, we then place additional high-level
-- constructs.
--
-- [3] 'paths' are allowed transitions. This can safe a check, if the
-- transition is encoded with a forbidden score.
--
-- [4] 'localBegin' and 'localEnd' are local entry and exit strategies. A
-- 'localBegin' is a transition score to certain states, all such transitions
-- are in 'begins'. A 'localEnd' is a transition score to a local end state.
--
-- NOTE that trustedCutoff > gathering > noiseCutoff
--
-- TODO as with other projects, we should not use Double's but "Score" and
-- "Probability" newtypes.

data CM = CM
  { name          :: ModelIdentification  -- ^ name of model as in "tRNA"
  , accession     :: ModelAccession       -- ^ RFxxxxx identification
  , trustedCutoff :: BitScore -- ^ lowest score of true member
  , gathering     :: BitScore -- ^ all scores at or above 'gathering' score are in the "full" alignment
  , noiseCutoff   :: Maybe BitScore -- ^ highest score NOT included as member
  , transition :: PrimArray (Int,Int) Double
  , emission :: PrimArray (Int,Int) Double
  , paths :: V.Vector (VU.Vector Double)
  , localBegin :: VU.Vector Double
  , begins :: VU.Vector Int
  , localEnd :: VU.Vector (Double)
  , nodes :: V.Vector (VU.Vector Int)
  }
  deriving (Show)

-- | Map of model names to individual CMs.

type ID2CM = M.Map ModelIdentification CM

-- | Map of model accession numbers to individual CMs.

type AC2CM = M.Map ModelAccession CM