-- | This module defines a simple scoring scheme based on pairs of unigrams. module NLP.Scoring.SimpleUnigram where import GHC.Generics import Data.HashMap.Strict import Data.Aeson import NLP.Text.BTI -- | Score 'BTI's @x@ and @y@ based on the simple scoring system: (i) -- lookup (x,y) and use the score if found; (ii) if (x,y) is not in the -- database, then return the default matching 'defMatch' score if @x==y@, -- otherwise return the default mismatch 'defMismatch' score. scoreUnigram :: SimpleScoring -> BTI -> BTI -> Double scoreUnigram SimpleScoring {..} x y = lookupDefault (if x==y then defMatch else defMismatch) (x,y) simpleScore {-# INLINE scoreUnigram #-} -- | Collect the hashtable and scalar values for simple scoring. -- -- TODO binary and cereal instances data SimpleScoring = SimpleScoring { simpleScore :: !(HashMap (BTI,BTI) Double) , gapScore :: !Double , gapOpen :: !Double , gapExtend :: !Double , defMatch :: !Double , defMismatch :: !Double } deriving (Read,Show,Eq,Generic) instance FromJSON SimpleScoring where parseJSON (Object v) = SimpleScoring <$> (fromList `fmap` (v .: "simpleScore")) <*> v .: "gapScore" <*> v .: "gapOpen" <*> v .: "gapExtend" <*> v .: "defMatch" <*> v .: "defMismatch" instance ToJSON SimpleScoring where toJSON (SimpleScoring ss gs go ge dm di) = object [ "simpleScore" .= toList ss , "gapScore" .= gs , "gapOpen" .= go , "gapExtend" .= ge , "defMatch" .= dm , "defMismatch" .= di ]