-- | This module defines a simple scoring scheme based on pairs of unigrams. module NLP.Scoring.SimpleUnigram where import GHC.Generics import Data.HashMap.Strict import Data.Aeson import NLP.Text.BTI -- | Score 'BTI's @x@ and @y@ based on the simple scoring system: (i) -- lookup (x,y) and use the score if found; (ii) if (x,y) is not in the -- database, then return the default matching 'defMatch' score if @x==y@, -- otherwise return the default mismatch 'defMismatch' score. scoreUnigram :: SimpleScoring -> BTI -> BTI -> Double scoreUnigram SimpleScoring {..} x y = lookupDefault (if x==y then defMatch else defMismatch) (x,y) simpleScore {-# INLINE scoreUnigram #-} -- | Collect the hashtable and scalar values for simple scoring. -- -- TODO binary and cereal instances data SimpleScoring = SimpleScoring { simpleScore :: !(HashMap (BTI,BTI) Double) , gapScore :: !Double , gapOpen :: !Double , gapExt :: !Double , defMatch :: !Double , defMismatch :: !Double , preSufOpen :: !Double , preSufExt :: !Double } deriving (Read,Show,Eq,Generic) instance FromJSON SimpleScoring where parseJSON (Object v) = SimpleScoring <$> (fromList `fmap` (v .: "simpleScore")) <*> v .: "gapScore" <*> v .: "gapOpen" <*> v .: "gapExt" <*> v .: "defMatch" <*> v .: "defMismatch" <*> v .: "preSufOpen" <*> v .: "preSufExt" instance ToJSON SimpleScoring where toJSON SimpleScoring {..} = object [ "simpleScore" .= toList simpleScore , "gapScore" .= gapScore , "gapOpen" .= gapOpen , "gapExt" .= gapExt , "defMatch" .= defMatch , "defMismatch" .= defMismatch , "preSufOpen" .= preSufOpen , "preSufExt" .= preSufExt ]