-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Colada implements incremental word class class induction using online LDA -- -- Colada implements incremental word class class induction using Latent -- Dirichlet Allocation (LDA) with an Online Gibbs sampler. @package colada @version 0.4.2 module NLP.CoNLL -- | Token is a representation of a word, which consists of a -- number of fields. type Token = Vector Text -- | Field is a part of a word token, such as word form, lemma or -- POS tag type Field = Text -- | Sentence is a vector of tokens. type Sentence = Vector Token -- | parse text returns a lazy list of sentences. parse :: Text -> [Sentence] -- | Word Class induction with LDA -- -- This module provides function which implement word class induction -- using the generic algorithm implemented in Colada.LDA. -- -- You can access and set options in the Options record using -- lenses. Example: -- --
-- import Data.Label -- let options = set passes 5 -- . set beta 0.01 -- . set topicNum 100 -- $ defaultOptions -- in run options sentences --module Colada.WordClass -- | learn options xs runs the LDA Gibbs sampler for word classes -- with options on sentences xs, and returns the -- resulting model together progressive class the assignments learn :: Options -> [Sentence] -> (WordClass, [Vector D]) defaultOptions :: Options -- | summary m returns a textual summary of word classes found in -- model m summary :: WordClass -> Text -- | wordTypeClasses m returns a Map from word types to -- unnormalized distributions over word classes wordTypeClasses :: WordClass -> Map Text (IntMap Double) -- | label m s returns for each word in sentences s, unnormalized -- probabilities of word classes. label :: Bool -> WordClass -> Sentence -> Vector (Vector Double) -- | predict m s returns for each word in sentence s, unnormalized -- probabilities of words given predicted word class. predict :: WordClass -> Sentence -> [Vector (Double, Text)] -- | Container for the Word Class model data WordClass ldaModel :: Arrow ~> => Lens ~> WordClass Finalized wordTypeTable :: Arrow ~> => Lens ~> WordClass (AtomTable (Vector Char)) featureTable :: Arrow ~> => Lens ~> WordClass (AtomTable (Vector Char)) options :: Arrow ~> => Lens ~> WordClass Options data Finalized :: * -- | Document topic counts docTopics :: Finalized -> Table2D -- | Word topic counts wordTopics :: Finalized -> Table2D -- | Topics counts topics :: Finalized -> Table1D -- | Inverse document-topic counts topicDocs :: Finalized -> Table2D -- | Inverse word-topic counts topicWords :: Finalized -> Table2D data Options featIds :: Arrow ~> => Lens ~> Options [Int] topicNum :: Arrow ~> => Lens ~> Options Int alphasum :: Arrow ~> => Lens ~> Options Double beta :: Arrow ~> => Lens ~> Options Double passes :: Arrow ~> => Lens ~> Options Int repeats :: Arrow ~> => Lens ~> Options Int batchSize :: Arrow ~> => Lens ~> Options Int seed :: Arrow ~> => Lens ~> Options Word32 topn :: Arrow ~> => Lens ~> Options Int initSize :: Arrow ~> => Lens ~> Options Int initPasses :: Arrow ~> => Lens ~> Options Int exponent :: Arrow ~> => Lens ~> Options (Maybe Double) progressive :: Arrow ~> => Lens ~> Options Bool lambda :: Arrow ~> => Lens ~> Options Double instance Serialize WordClass instance Serialize (AtomTable (Vector Char)) instance Serialize (Vector Char) instance Serialize Text instance Serialize Finalized instance Typeable Options instance Eq Options instance Show Options instance Data Options instance Generic Options instance Generic WordClass instance Datatype D1Options instance Constructor C1_0Options instance Selector S1_0_0Options instance Selector S1_0_1Options instance Selector S1_0_2Options instance Selector S1_0_3Options instance Selector S1_0_4Options instance Selector S1_0_5Options instance Selector S1_0_6Options instance Selector S1_0_7Options instance Selector S1_0_8Options instance Selector S1_0_9Options instance Selector S1_0_10Options instance Selector S1_0_11Options instance Selector S1_0_12Options instance Selector S1_0_13Options instance Datatype D1WordClass instance Constructor C1_0WordClass instance Selector S1_0_0WordClass instance Selector S1_0_1WordClass instance Selector S1_0_2WordClass instance Selector S1_0_3WordClass instance Serialize Options