module NLP.Concraft.Morphosyntax
(
Seg (..)
, mapSeg
, interpsSet
, interps
, Word (..)
, Sent
, mapSent
, SentO (..)
, mapSentO
, WMap (unWMap)
, mapWMap
, mkWMap
) where
import Control.Applicative ((<$>), (<*>))
import Control.Arrow (first)
import Data.Aeson
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
data Seg w t = Seg {
word :: w
, tags :: WMap t }
deriving (Show)
instance ToJSON w => ToJSON (Seg w T.Text) where
toJSON Seg{..} = object
[ "word" .= word
, "tags" .= unWMap tags ]
instance FromJSON w => FromJSON (Seg w T.Text) where
parseJSON (Object v) = Seg
<$> v .: "word"
<*> (WMap <$> v .: "tags")
parseJSON _ = error "parseJSON (segment): absurd"
mapSeg :: Ord b => (a -> b) -> Seg w a -> Seg w b
mapSeg f w = w { tags = mapWMap f (tags w) }
interpsSet :: Seg w t -> S.Set t
interpsSet = M.keysSet . unWMap . tags
interps :: Seg w t -> [t]
interps = S.toList . interpsSet
class Word a where
orth :: a -> T.Text
oov :: a -> Bool
instance Word w => Word (Seg w t) where
orth = orth . word
oov = oov . word
type Sent w t = [Seg w t]
mapSent :: Ord b => (a -> b) -> Sent w a -> Sent w b
mapSent = map . mapSeg
data SentO w t = SentO
{ segs :: Sent w t
, orig :: L.Text }
deriving (Show)
mapSentO :: Ord b => (a -> b) -> SentO w a -> SentO w b
mapSentO f x =
let segs' = mapSent f (segs x)
in x { segs = segs' }
newtype WMap a = WMap { unWMap :: M.Map a Double }
deriving (Show, Eq, Ord)
mkWMap :: Ord a => [(a, Double)] -> WMap a
mkWMap = WMap . M.fromListWith (+) . filter ((>=0).snd)
mapWMap :: Ord b => (a -> b) -> WMap a -> WMap b
mapWMap f = mkWMap . map (first f) . M.toList . unWMap