module Bio.GO
( GO(..)
, GOId
, GOMap
, getParentById
, addTerm
, enrichment
) where
import Bio.Utils.Functions (hyperquick)
import qualified Data.HashMap.Strict as M
import qualified Data.HashSet as S
import qualified Data.Text as T
data GO = GO
{ _oboId :: !GOId
, _label :: !T.Text
, _subProcessOf :: !(Maybe GOId)
, _oboNS :: !T.Text
} deriving (Show, Read)
type GOId = Int
type GOMap = M.HashMap GOId GO
type TermCount = M.HashMap GOId Int
getParentById :: GOId -> GOMap -> Maybe GO
getParentById gid goMap = M.lookup gid goMap >>= _subProcessOf
>>= (`M.lookup` goMap)
addTerm :: GO -> GOMap -> TermCount -> TermCount
addTerm g m t = loop S.empty g t
where
loop visited go table
| _oboId go `S.member` visited = table
| otherwise = case _subProcessOf go of
Nothing -> table'
Just gid -> loop (S.insert (_oboId go) visited)
(M.lookupDefault undefined gid m) table'
where
table' = M.insertWith (+) (_oboId go) 1 table
enrichment :: (TermCount, Int)
-> (TermCount, Int)
-> [(GOId, Double, Double)]
enrichment (bg, bg_total) (fg, fg_total) =
flip map (M.toList fg) $ \(gid, fg_count) ->
let enrich = fromIntegral (fg_count * bg_total) /
fromIntegral (fg_total * bg_count)
bg_count = M.lookupDefault undefined gid bg
p = 1 hyperquick fg_count bg_count fg_total bg_total
in (gid, enrich, p)