{-# LANGUAGE OverloadedStrings #-} module NLP.Brillig.Unigram where import Data.List import Data.Text (Text) import qualified Data.Text as T import Data.Function import Data.Ord import qualified Data.Map as Map import NLP.Brillig tag :: Count -> [Text] -> [Tagged Text] tag m = map tagw where unknown = Tag "UNK" -- or mostfreq? tagw w = (w, look w) look w = case Map.lookup (T.toLower w) m of Nothing -> unknown Just sm -> best (Map.toList sm) best :: [(Tag,Int)] -> Tag best = fst . maximumBy (compare `on` snd) mostfreq :: Count -> Tag mostfreq = best . Map.toList . Map.unionsWith (+) . Map.elems