Safe Haskell	None
Language	Haskell2010

NLP.Concraft.Polish.DAGSeg

Contents

Types
- Simplification
- Model
Tagging
- High level
Training
Pruning

Description

DAG-based model for morphosyntactic tagging.

Synopsis

type Tag = Interp Tag
simplify4gsr :: Tagset -> Interp Tag -> Tag
simplify4dmb :: Tagset -> Interp Tag -> Tag
data Concraft t
saveModel :: (Ord t, Binary t) => FilePath -> Concraft t -> IO ()
loadModel :: (Ord t, Binary t) => (Tagset -> t -> Tag) -> (Tagset -> t -> Tag) -> FilePath -> IO (Concraft t)
guess :: Config Tag -> Concraft Tag -> Sent Tag -> Sent Tag
data AnnoSent = AnnoSent {
- guessSent :: Sent Tag
- disambs :: Anno Tag Bool
- marginals :: Anno Tag Double
- maxProbs :: Anno Tag Double
}
data AnnoConf = AnnoConf {
- trimParam :: Int
- pickPath :: Maybe PathTyp
- blackSet :: Set Text
}
annoAll :: AnnoConf -> Concraft Tag -> Sent Tag -> [AnnoSent]
data TrainConf = TrainConf {
- tagset :: Tagset
- sgdArgs :: SgdArgs
- onDisk :: Bool
- guessNum :: Int
- r0 :: R0T
- zeroProbLabel :: Tag
- guessOnlyVisible :: Bool
- globalConfig :: Config
}
train :: TrainConf -> IO [Sent Tag] -> IO [Sent Tag] -> IO (Concraft Tag)

Types

type Tag = Interp Tag Source #

Simplification

simplify4gsr :: Tagset -> Interp Tag -> Tag Source #

Simplify the tag for the sake of the guessing model. TODO: it is also used in the evaluation script, which assumes that simplify4gsr simplifies to a positional tag. The name of the function should reflect this, perhaps, or there should be two separate functions: one dedicated to guesser, one dedicated to evaluation (and other more generic things).

simplify4dmb :: Tagset -> Interp Tag -> Tag Source #

Simplify the tag for the sake of the disambiguation model.

Model

data Concraft t #

Concraft data.

saveModel :: (Ord t, Binary t) => FilePath -> Concraft t -> IO () #

Save model in a file. Data is compressed using the gzip format.

loadModel #

Arguments

:: (Ord t, Binary t)
=> (Tagset -> t -> Tag)	Guesser simplification function
-> (Tagset -> t -> Tag)	Disamb simplification function
-> FilePath
-> IO (Concraft t)

Load model from a file.

Tagging

guess :: Config Tag -> Concraft Tag -> Sent Tag -> Sent Tag Source #

Tag the sentence with guessing marginal probabilities.

High level

data AnnoSent Source #

Annotated sentence.

Constructors

AnnoSent
Fields guessSent :: Sent Tag The sentence after guessing and segmentation (TODO: and annotated with marginal probabilities?) disambs :: Anno Tag Bool Disambiguation markers marginals :: Anno Tag Double Marginal probabilities according to the disambiguation model maxProbs :: Anno Tag Double Maximal probabilities according to the disambiguation model

data AnnoConf Source #

Annotation config.

Constructors

AnnoConf
Fields trimParam :: Int How many morphosyntactic tags should be kept for OOV words pickPath :: Maybe PathTyp Which path picking method should be used. The function takes the blackSet :: Set Text The set of blacklisted tags

annoAll :: AnnoConf -> Concraft Tag -> Sent Tag -> [AnnoSent] Source #

Annotate all possibly interesting information.

Training

data TrainConf Source #

Training configuration.

Constructors

TrainConf
Fields tagset :: Tagset Tagset. sgdArgs :: SgdArgs SGD parameters. onDisk :: Bool Store SGD dataset on disk. guessNum :: Int Numer of guessed tags for each word. r0 :: R0T `r0T` parameter. zeroProbLabel :: Tag `zeroProbLabel` parameter guessOnlyVisible :: Bool Extract only visible features for the guesser globalConfig :: Config Global configuration

train Source #

Arguments

:: TrainConf
-> IO [Sent Tag]	Training data
-> IO [Sent Tag]	Evaluation data
-> IO (Concraft Tag)

Train concraft model. TODO: It should be possible to supply the two training procedures with different SGD arguments.

Types

Simplification

Model

Tagging

High level

Training

Pruning