{-# LANGUAGE OverloadedStrings #-}

module NLP.Brillig.Unigram where

import Data.List
import Data.Text (Text)
import qualified Data.Text as T
import Data.Function
import Data.Ord
import qualified Data.Map as Map

import NLP.Brillig

tag :: Count -> [Text] -> [Tagged Text]
tag m = map tagw
 where
  unknown = Tag "UNK" -- or mostfreq?
  tagw w  = (w, look w)
  look w  = case Map.lookup (T.toLower w) m of
             Nothing -> unknown
             Just sm -> best (Map.toList sm)

best :: [(Tag,Int)] -> Tag
best = fst . maximumBy (compare `on` snd)

mostfreq :: Count -> Tag
mostfreq = best . Map.toList . Map.unionsWith (+) . Map.elems