Safe Haskell | None |
---|---|
Language | Haskell98 |
Top-level module adated to DAGs, guessing and disambiguation.
Synopsis
- data Concraft t = Concraft {}
- saveModel :: (Ord t, Binary t) => FilePath -> Concraft t -> IO ()
- loadModel :: (Ord t, Binary t) => (Tagset -> t -> Tag) -> FilePath -> IO (Concraft t)
- type Anno a b = DAG () (Map a b)
- replace :: Ord t => Anno t Double -> Sent w t -> Sent w t
- findOptimalPaths :: Anno t Double -> [[(EdgeID, t)]]
- disambPath :: Ord t => [(EdgeID, t)] -> Anno t Double -> Anno t Bool
- guessMarginals :: (Word w, Ord t) => Guesser t Tag -> Sent w t -> Anno t Double
- disambMarginals :: (Word w, Ord t) => Disamb t -> Sent w t -> Anno t Double
- disambProbs :: (Word w, Ord t) => ProbType -> Disamb t -> Sent w t -> Anno t Double
- guess :: (Word w, Ord t) => Int -> Guesser t Tag -> Sent w t -> Anno t Double
- guessSent :: (Word w, Ord t) => Int -> Guesser t Tag -> Sent w t -> Sent w t
- tag :: (Word w, Ord t) => Int -> Concraft t -> Sent w t -> Anno t Double
- train :: (Word w, Ord t) => Tagset -> Int -> TrainConf t Tag -> TrainConf t -> IO [Sent w t] -> IO [Sent w t] -> IO (Concraft t)
- prune :: Double -> Concraft t -> Concraft t
Model
Concraft data.
saveModel :: (Ord t, Binary t) => FilePath -> Concraft t -> IO () Source #
Save model in a file. Data is compressed using the gzip format.
loadModel :: (Ord t, Binary t) => (Tagset -> t -> Tag) -> FilePath -> IO (Concraft t) Source #
Load model from a file.
Annotation
type Anno a b = DAG () (Map a b) Source #
DAG annotation, assignes b
values to a
labels for each edge in the
graph.
replace :: Ord t => Anno t Double -> Sent w t -> Sent w t Source #
Replace sentence probability values with the given annotation.
Best paths
findOptimalPaths :: Anno t Double -> [[(EdgeID, t)]] Source #
Find all optimal paths in the given annotation. Optimal paths are those which go through tags with the assigned probability 1.
disambPath :: Ord t => [(EdgeID, t)] -> Anno t Double -> Anno t Bool Source #
Make the given path with disamb markers in the given annotation and produce a new disamb annotation.
Marginals
guessMarginals :: (Word w, Ord t) => Guesser t Tag -> Sent w t -> Anno t Double Source #
Determine marginal probabilities corresponding to individual tags w.r.t. the guessing model.
disambMarginals :: (Word w, Ord t) => Disamb t -> Sent w t -> Anno t Double Source #
Determine marginal probabilities corresponding to individual tags w.r.t. the guessing model.
disambProbs :: (Word w, Ord t) => ProbType -> Disamb t -> Sent w t -> Anno t Double Source #
Determine probabilities corresponding to individual tags w.r.t. the guessing model.
Tagging
guess :: (Word w, Ord t) => Int -> Guesser t Tag -> Sent w t -> Anno t Double Source #
Perform guessing, trimming, and finally determine marginal probabilities corresponding to individual tags w.r.t. the guessing model.
guessSent :: (Word w, Ord t) => Int -> Guesser t Tag -> Sent w t -> Sent w t Source #
Determine marginal probabilities corresponding to individual tags w.r.t.
the guessing model and, afterwards, trim the sentence to keep only the k
most probably labels for each OOV edge. Note that, for OOV words, the entire
set of default tags is considered.
tag :: (Word w, Ord t) => Int -> Concraft t -> Sent w t -> Anno t Double Source #
Perform guessing, trimming, and finally determine marginal probabilities corresponding to individual tags w.r.t. the disambiguation model.
Training
:: (Word w, Ord t) | |
=> Tagset | A morphosyntactic tagset to which |
-> Int | How many tags is the guessing model supposed
to produce for a given OOV word? It will be
used (see |
-> TrainConf t Tag | Training configuration for the guessing model. |
-> TrainConf t | Training configuration for the disambiguation model. |
-> IO [Sent w t] | Training dataset. This IO action will be executed a couple of times, so consider using lazy IO if your dataset is big. |
-> IO [Sent w t] | Evaluation dataset IO action. Consider using lazy IO if your dataset is big. |
-> IO (Concraft t) |