module NLP.Probability.Example.Trigram (
) where
import qualified Data.Text as T
import qualified Data.Map as M
import Data.Monoid
import NLP.Probability.ConditionalDistribution
import NLP.Probability.Observation
newtype Word = Word T.Text
deriving (Ord, Eq)
newtype TrigramContext = Trigram (Word, Word)
instance Event Word where type EventMap Word = M.Map
instance Context TrigramContext where
type Sub TrigramContext = Word
type SubMap TrigramContext = M.Map
decompose (Trigram (w1, w2)) = [w1, w2]
makeTrigrams :: T.Text -> CondObserved Word TrigramContext
makeTrigrams sentence =
mconcat $ map (uncurry condObservation) $ take3 $ map Word words
where words = ["*S1*", "*S2*"] ++ (T.split " " sentence) ++ ["*E1*", "*E2*"]
take3 [_,_] = []
take3 (a:b:c:rest) = (c, Trigram (a, b)):(take3 (b:c:rest))
languageModel :: String -> CondDistribution Word TrigramContext
languageModel sentences =
mkDist $ estimateGeneralLinear (wittenBell 5) $
mconcat $ map makeTrigrams $ T.split "." $ T.pack sentences
prob lm (w1, w2, w3) =
lm (Trigram (Word $ T.pack w1, Word $ T.pack w2)) $ Word $ T.pack w3