{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE OverloadedStrings #-} -- | The module provides functions for splitting positional tags. -- They can be used together with the layered disambiguation model. module NLP.Concraft.Disamb.Positional ( Tier (..) , Atom (..) , select , split ) where import Control.Applicative ((<$>), (<*>)) import Data.Binary (Binary, put, get) import Data.Text.Binary () import qualified Data.Set as S import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Tagset.Positional as TP -- | A tier description. data Tier = Tier { -- | Does it include the part of speech? withPos :: Bool -- | Tier grammatical attributes. , withAtts :: S.Set TP.Attr } instance Binary Tier where put Tier{..} = put withPos >> put withAtts get = Tier <$> get <*> get -- | An atomic part of morphosyntactic tag with optional POS. data Atom = Atom { pos :: Maybe TP.POS , atts :: M.Map TP.Attr T.Text } deriving (Show, Eq, Ord) instance Binary Atom where put Atom{..} = put pos >> put atts get = Atom <$> get <*> get -- | Select tier attributes. select :: Tier -> TP.Tag -> Atom select Tier{..} tag = Atom { pos = if withPos then Just (TP.pos tag) else Nothing , atts = M.filterWithKey (\k _ -> k `S.member` withAtts) (TP.atts tag) } -- | Split the positional tag. split :: [Tier] -> TP.Tag -> [Atom] split tiers tag = [ select tier tag | tier <- tiers ]