{-# LANGUAGE RecordWildCards #-} module NLP.Concraft.Morphosyntax ( Word (..) , mapWord , Sent , Choice , mapChoice , Positive (unPositive) , (<+>) , mkPositive , best , known ) where import Control.Arrow (first) import Data.Ord (comparing) import Data.List (maximumBy) import qualified Data.Set as S import qualified Data.Map as M import qualified Data.Text as T -- | A word parametrized over the tag type. data Word t = Word { -- | Orthographic form. orth :: T.Text -- | Set of word interpretations. , tags :: S.Set t } deriving (Show, Read, Eq, Ord) mapWord :: Ord b => (a -> b) -> Word a -> Word b mapWord f Word{..} = Word { orth = orth , tags = S.fromList . map f . S.toList $ tags } -- | A sentence of 'Word's. type Sent t = [Word t] -- | Interpretations chosen in the given context with -- corresponding positive weights. type Choice t = M.Map t (Positive Double) -- | Positive number. newtype Positive a = Positive { unPositive :: a } deriving (Show, Eq, Ord) (<+>) :: Num a => Positive a -> Positive a -> Positive a Positive x <+> Positive y = Positive (x + y) {-# INLINE (<+>) #-} mapChoice :: Ord b => (a -> b) -> Choice a -> Choice b mapChoice f = M.fromListWith (<+>) . map (first f) . M.toList mkPositive :: (Num a, Ord a) => a -> Positive a mkPositive x | x > 0 = Positive x | otherwise = error "mkPositive: not a positive number" {-# INLINE mkPositive #-} -- | Retrieve the most probable interpretation. best :: Choice t -> t best c | M.null c = error "best: null choice" | otherwise = fst . maximumBy (comparing snd) $ M.toList c -- | A word is considered to be known when the set of possible -- interpretations is not empty. known :: Word t -> Bool known = not . S.null . tags {-# INLINE known #-}