module Discokitty.Multiwords where
import           Data.List
import           Data.Semigroup
import           Discokitty.HasCups
import           Discokitty.Lambek
import           Discokitty.Words
type Probability = Double
newtype Multiword m = Multiword [(Words m , Probability)]
instance (Show m) => Show (Multiword m) where
  show =
    intercalate "\n" .
    fmap (\ (w, p) -> show w ++ " with p=" ++ show p) .
    toList
toList :: Multiword m -> [(Words m , Probability)]
toList (Multiword a) = a
fromList :: [(Words m , Probability)] -> Multiword m
fromList = Multiword
singleton :: Words m -> Multiword m
singleton w = fromList [(w,1.0)]
multiconcat :: (HasCups m) => Multiword m -> Multiword m -> Multiword m
multiconcat x y = fromList $ do
  (w , p) <- toList x
  (v , q) <- toList y
  let concats = concatenate w v
  let newprob = (p * q) / fromIntegral (length concats)
  zip concats (repeat newprob)
infixr 4 `multiconcat`
multiempty :: (HasCups m) => Multiword m
multiempty = fromList [( emptyWord , 1 )]
instance (HasCups m) => Semigroup (Multiword m) where
  (<>) = multiconcat
instance (HasCups m) => Monoid (Multiword m) where
  mempty = multiempty
  mappend = multiconcat
sentence :: (HasCups m) => [Multiword m] -> Multiword m
sentence = mconcat
(@@) :: Multiword m -> Lambek -> Multiword m
ws @@ l = fromList $ fmap (\ (x,p) -> (x , p / totalprob)) newlist
  where
    totalprob = sum $ fmap snd newlist
    newlist = filter (\ (x , _) -> grammar x == l) (toList ws)