{-|
Module: Multiwords
Description: Multiwords represent non deterministic words.
License: GPL-3
A multiword is given by some finite set of meaning/gramamr pairs. They
generalize words and can be used to represent reductions where multiple
parsings are possible but also words with multiple acceptations.
This is experimental and it is not part of the standard DisCoCat
framework.
|-}
module Discokitty.Multiwords where
import Data.List
import Data.Semigroup
import Discokitty.HasCups
import Discokitty.Lambek
import Discokitty.Words
-- | The probability is given by real numbers.
type Probability = Double
-- | A multiword is given by a list of different words with different
-- probabilities. Note that these words do not need to have the same
-- grammar types.
newtype Multiword m = Multiword [(Words m , Probability)]
-- | Shows a multiword as a list of acceptations.
instance (Show m) => Show (Multiword m) where
show =
intercalate "\n" .
fmap (\ (w, p) -> show w ++ " with p=" ++ show p) .
toList
toList :: Multiword m -> [(Words m , Probability)]
toList (Multiword a) = a
fromList :: [(Words m , Probability)] -> Multiword m
fromList = Multiword
singleton :: Words m -> Multiword m
singleton w = fromList [(w,1.0)]
-- | Concatenates the meaning of multiple multiwords using the formal
-- cups on the meaning category.
multiconcat :: (HasCups m) => Multiword m -> Multiword m -> Multiword m
multiconcat x y = fromList $ do
(w , p) <- toList x
(v , q) <- toList y
let concats = concatenate w v
let newprob = (p * q) / fromIntegral (length concats)
zip concats (repeat newprob)
infixr 4 `multiconcat`
-- | The empty word for a formal cups multiword.
multiempty :: (HasCups m) => Multiword m
multiempty = fromList [( emptyWord , 1 )]
instance (HasCups m) => Semigroup (Multiword m) where
(<>) = multiconcat
instance (HasCups m) => Monoid (Multiword m) where
mempty = multiempty
mappend = multiconcat
-- | Concatenates a whole sentence of multiwords into a single one.
sentence :: (HasCups m) => [Multiword m] -> Multiword m
sentence = mconcat
-- | Filters the acceptations of the multiword that match the given
-- Lambek type.
(@@) :: Multiword m -> Lambek -> Multiword m
ws @@ l = fromList $ fmap (\ (x,p) -> (x , p / totalprob)) newlist
where
totalprob = sum $ fmap snd newlist
newlist = filter (\ (x , _) -> grammar x == l) (toList ws)