module NLP.Brillig.Unigram where
import Data.List
import Data.Text (Text)
import qualified Data.Text as T
import Data.Function
import Data.Ord
import qualified Data.Map as Map
import NLP.Brillig
tag :: Count -> [Text] -> [Tagged Text]
tag m = map tagw
where
unknown = Tag "UNK"
tagw w = (w, look w)
look w = case Map.lookup (T.toLower w) m of
Nothing -> unknown
Just sm -> best (Map.toList sm)
best :: [(Tag,Int)] -> Tag
best = fst . maximumBy (compare `on` snd)
mostfreq :: Count -> Tag
mostfreq = best . Map.toList . Map.unionsWith (+) . Map.elems