{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE EmptyDataDecls #-}

-- | HMMER3 HMMs. Since we do not understand HMMER3 HMMs yet, this is actually
-- just a small ``throw-away'' parser to successfully parse Infernal 1.1 CMs.
-- The next version should have a real working parser.
--
-- TODO in the future, we should split parsing into just grabbing lines between
-- HMMER and "//" and handling in-between. We need extraction of individual
-- models and similar fun.

module Biobase.SElab.HMM where

import Data.ByteString.Char8 as BS
import Control.Lens

import Biobase.SElab.Types



data HMM

data Alphabet
  = Amino
  | DNA
  | RNA
  | Coins
  | Dice
  | Custom
  deriving (Eq,Show,Read)

-- | Negated natural logarithm of probability.
--
-- TODO put into types stuff

newtype NegLogProb = NLP Double
  deriving (Show,Read)

-- | The nodes in an HMM. Starting with Node "0" for BEGIN.

data Node = Node
  { _nid :: Int
  , _matchE :: [NegLogProb] -- [] for BEGIN
  , _insertE :: [NegLogProb] -- insertions
  , _trans :: [NegLogProb] -- transitions: B->M1 B->I0 B->D1 I0->M1 I0->I0 0.0 * ||| Mk->Mk+1 Mk->Ik Mk->Dk+1 Ik->Mk+1 Ik->Ik Dk->Mk+1 Dk->Dk+1
  }
  deriving (Show,Read)

makeLenses ''Node

-- | The HMM3 data structure in ``slow mode''.
--
-- TODO shouldn't this be "Identification Pfam" ?
--
-- TODO maybe redo the whole "idd" idea and just keep the string?

data HMM3 = HMM3
  { _version :: (ByteString,ByteString)
  , _idd :: Identification HMM
  , _acc :: Maybe (Accession HMM)
  , _description :: Maybe ByteString
  , _leng :: Int -- mandatory >0 count of match states
  , _alph :: Alphabet
  , _rf :: Bool
  , _cs :: Bool
  , _alignMap :: Bool
  , _date :: ByteString
  , _symAlph :: [ByteString]
  , _transHeaders :: [ByteString]
  , _compo :: [NegLogProb]
  , _nodes :: [Node]
  } deriving (Show,Read)

makeLenses ''HMM3