{-# LANGUAGE OverloadedStrings #-}
module Bio.GO
    ( GO(..)
    , GOId
    , GOMap
    , getParentById
    , addTerm
    , enrichment
    ) where

import           Bio.Utils.Functions (hyperquick)
import qualified Data.HashMap.Strict as M
import qualified Data.HashSet        as S
import qualified Data.Text           as T

data GO = GO
    { _oboId        :: !GOId
    , _label        :: !T.Text
    , _subProcessOf :: !(Maybe GOId)
    , _oboNS        :: !T.Text
    } deriving (Show, Read)

type GOId = Int

type GOMap = M.HashMap GOId GO

type TermCount = M.HashMap GOId Int

getParentById :: GOId -> GOMap -> Maybe GO
getParentById gid goMap = M.lookup gid goMap >>= _subProcessOf
                                             >>= (`M.lookup` goMap)
{-# INLINE getParentById #-}

-- | Add a GO term to the count table. Term counts will propogate from child to
-- its parents. This function works for cyclical graph as well.
addTerm :: GO -> GOMap -> TermCount -> TermCount
addTerm g m t = loop S.empty g t
  where
    loop visited go table
        | _oboId go `S.member` visited = table
        | otherwise = case _subProcessOf go of
            Nothing -> table'
            Just gid -> loop (S.insert (_oboId go) visited)
                (M.lookupDefault undefined gid m) table'
      where
        table' = M.insertWith (+) (_oboId go) 1 table

enrichment :: (TermCount, Int)  -- ^ Background frequency and the total number
           -> (TermCount, Int)  -- ^ Foreground
           -> [(GOId, Double, Double)]
enrichment (bg, bg_total) (fg, fg_total) =
    flip map (M.toList fg) $ \(gid, fg_count) ->
        let enrich = fromIntegral (fg_count * bg_total) /
                     fromIntegral (fg_total * bg_count)
            bg_count = M.lookupDefault undefined gid bg
            p = 1 - hyperquick fg_count bg_count fg_total bg_total
        in (gid, enrich, p)