concraft-0.14.2: Morphological disambiguation based on constrained CRFs

Safe HaskellNone
LanguageHaskell98

NLP.Concraft.DAG.Guess

Contents

Synopsis

Types

data Guesser t s Source #

A guessing model.

Constructors

Guesser 

Fields

  • schemaConf :: SchemaConf
     
  • crf :: CRF Ob s
     
  • zeroProbLab :: s
     
  • unkTagSet :: Set t

    The tagset considered for the unknown words (TODO: a solution parallel and not 100% consistent with what is implemented in the CRF library) TODO: with complexify, unkTagSet is not needed anymore!

  • simplify :: t -> s

    A tag simplification function

  • complexify :: s -> t

    NEW: instead of an unkTagSet, a function which makes a complex tag out of a simple tag.

    WARNING: we assume, that this function does not conflate simplified tags, i.e., tag to tags of type s cannot lead to one and the same complex tag of type t.

putGuesser :: (Binary t, Binary s, Ord s) => Guesser t s -> Put Source #

Store the entire guessing model apart from the simplification function.

getGuesser :: (Binary t, Binary s, Ord s, Ord t) => (t -> s) -> (s -> t) -> Get (Guesser t s) Source #

Get the disambiguation model, provided the simplification function. getGuesser :: (M.Map t T.Tag) -> Get (Guesser t)

Marginals

marginals :: (Word w, Ord t, Ord s) => Config s -> Guesser t s -> Sent w t -> DAG () (WMap t) Source #

Determine the marginal probabilities of the individual labels in the sentence.

marginalsSent :: (Word w, Ord t, Ord s) => Config s -> Guesser t s -> Sent w t -> Sent w t Source #

Replace the probabilities of the sentence labels with the marginal probabilities stemming from the model.

Training

data TrainConf t s Source #

Training configuration.

Constructors

TrainConf 

Fields

data R0T Source #

Method of constructing the default set of labels (R0).

Instances
Enum R0T Source # 
Instance details

Defined in NLP.Concraft.DAG.Guess

Methods

succ :: R0T -> R0T #

pred :: R0T -> R0T #

toEnum :: Int -> R0T #

fromEnum :: R0T -> Int #

enumFrom :: R0T -> [R0T] #

enumFromThen :: R0T -> R0T -> [R0T] #

enumFromTo :: R0T -> R0T -> [R0T] #

enumFromThenTo :: R0T -> R0T -> R0T -> [R0T] #

Eq R0T Source # 
Instance details

Defined in NLP.Concraft.DAG.Guess

Methods

(==) :: R0T -> R0T -> Bool #

(/=) :: R0T -> R0T -> Bool #

Data R0T Source # 
Instance details

Defined in NLP.Concraft.DAG.Guess

Methods

gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> R0T -> c R0T #

gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c R0T #

toConstr :: R0T -> Constr #

dataTypeOf :: R0T -> DataType #

dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c R0T) #

dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c R0T) #

gmapT :: (forall b. Data b => b -> b) -> R0T -> R0T #

gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> R0T -> r #

gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> R0T -> r #

gmapQ :: (forall d. Data d => d -> u) -> R0T -> [u] #

gmapQi :: Int -> (forall d. Data d => d -> u) -> R0T -> u #

gmapM :: Monad m => (forall d. Data d => d -> m d) -> R0T -> m R0T #

gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> R0T -> m R0T #

gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> R0T -> m R0T #

Ord R0T Source # 
Instance details

Defined in NLP.Concraft.DAG.Guess

Methods

compare :: R0T -> R0T -> Ordering #

(<) :: R0T -> R0T -> Bool #

(<=) :: R0T -> R0T -> Bool #

(>) :: R0T -> R0T -> Bool #

(>=) :: R0T -> R0T -> Bool #

max :: R0T -> R0T -> R0T #

min :: R0T -> R0T -> R0T #

Show R0T Source # 
Instance details

Defined in NLP.Concraft.DAG.Guess

Methods

showsPrec :: Int -> R0T -> ShowS #

show :: R0T -> String #

showList :: [R0T] -> ShowS #

train Source #

Arguments

:: (Word w, Ord t, Ord s) 
=> TrainConf t s

Training configuration

-> IO [Sent w t]

Training data

-> IO [Sent w t]

Evaluation data

-> IO (Guesser t s) 

Train guesser.

Utils

schemed :: (Word w, Ord t, Ord s) => (t -> s) -> Schema w s a -> [Sent w t] -> [SentL Ob s] Source #

Schematized dataset.