{-# LANGUAGE RecordWildCards #-}

-- | Types and functions related to the morphosyntax data layer.

module NLP.Concraft.Morphosyntax
( 
-- * Morphosyntax data
  Sent
, Word (..)
, mapWord
, mapSent
, interpsSet
, interps
-- * Weighted collection
, WMap (unWMap)
, mkWMap
, mapWMap
) where

import Control.Arrow (first)
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Text as T


-- | A sentence of 'Word's.
type Sent t = [Word t]

-- | A word parametrized over a tag type.
data Word t = Word {
    -- | Orthographic form.
      orth      :: T.Text
    -- | Set of word interpretations.  To each interpretation
    -- a "weight of correctness within the context" is assigned.
    , tagWMap   :: WMap t
    -- | Out-of-vocabulary (OOV) word, i.e. word unknown to the
    -- morphosyntactic analyser.
    , oov       :: Bool }
    deriving (Show, Eq, Ord)

-- | Map function over word tags.
mapWord :: Ord b => (a -> b) -> Word a -> Word b
mapWord f w = w { tagWMap = mapWMap f (tagWMap w) }

-- | Map function over sentence tags.
mapSent :: Ord b => (a -> b) -> Sent a -> Sent b
mapSent = map . mapWord

-- | Interpretations of the word.
interpsSet :: Word t -> S.Set t
interpsSet = M.keysSet . unWMap . tagWMap

-- | Interpretations of the word.
interps :: Word t -> [t]
interps = S.toList . interpsSet


-- | A weighted collection of type @a@ elements.
newtype WMap a = WMap { unWMap :: M.Map a Double }
    deriving (Show, Eq, Ord)

-- | Make a weighted collection.
mkWMap :: Ord a => [(a, Double)] -> WMap a
mkWMap = WMap . M.fromListWith (+) . filter ((>=0).snd)

-- | Map function over weighted collection elements. 
mapWMap :: Ord b => (a -> b) -> WMap a -> WMap b
mapWMap f = mkWMap . map (first f) . M.toList . unWMap