-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Online Latent Dirichlet Allocation -- -- Online Gibbs sampler for Latent Dirichlet Allocation. LDA is a -- generative admixture model frequently used for topic modeling and -- other applications. The primary goal of this implementation is to be -- used for probabilistic soft word class induction. The sampler can be -- used in an online as well as batch mode. @package lda @version 0.0.1 module NLP.LDA.UnboxedMaybeVector instance (Num a, Unbox a) => Vector Vector (Maybe a) instance (Num a, Unbox a) => MVector MVector (Maybe a) instance (Num a, Unbox a) => Unbox (Maybe a) -- | Latent Dirichlet Allocation -- -- Simple implementation of a collapsed Gibbs sampler for LDA. This -- library uses the topic modeling terminology (documents, words, -- topics), even though it is generic. For example if used for word class -- induction, replace documents with word types, words with features and -- topics with word classes. module NLP.LDA -- | runSampler seed m s runs sampler s with -- seed and initial model m. The random number -- generator used is System.Random.Mersenne.Pure64. runSampler :: Word64 -> LDA -> Sampler a -> (a, LDA) -- | pass batch runs one pass of Gibbs sampling on documents in -- batch pass :: Vector Doc -> Sampler (Vector Doc) -- | runLDA seed n m ds creates and runs an LDA sampler with -- seed for n passes with initial model m on -- the batch of documents ds. The random number generator used -- is System.Random.Mersenne.Pure64. runLDA :: Word64 -> Int -> LDA -> Vector Doc -> (Vector Doc, LDA) -- | Custom random variable representing the LDA Gibbs sampler type Sampler a = RVarT (State LDA) a -- | Abstract type holding the settings and the state of the sampler data LDA -- | Abstract type holding the LDA model, and the inverse count tables data Finalized type Doc = (D, Vector (W, Maybe Z)) type D = Int type W = Int type Z = Int -- | Document-topic counts docTopics :: LDA -> Table2D -- | Word-topic counts wordTopics :: LDA -> Table2D -- | Topic counts topics :: LDA -> Table1D -- | alpha * K Dirichlet parameter (topic sparseness) alphasum :: LDA -> Double -- | beta Dirichlet parameter (word sparseness) beta :: LDA -> Double -- | Number of topics K topicNum :: LDA -> Int -- | Number of unique words vSize :: LDA -> Int -- | LDA model model :: Finalized -> LDA -- | Inverse document-topic counts topicDocs :: Finalized -> Table2D -- | Inverse word-topic counts topicWords :: Finalized -> Table2D -- | initial k a b initializes model with k topics, -- a/k alpha hyperparameter and b beta hyperparameter. initial :: Int -> Double -> Double -> LDA -- | finalize m creates a finalized model from LDA model -- m finalize :: LDA -> Finalized -- | docTopicWeights m doc returns unnormalized topic -- probabilities for document doc given LDA model m docTopicWeights :: LDA -> Doc -> Vector Double -- | Remove zero counts from the doc/topic table compress :: IntMap (IntMap Double) -> IntMap (IntMap Double) type Table2D = IntMap Table1D type Table1D = IntMap Double instance Generic LDA instance Generic Finalized instance Datatype D1LDA instance Constructor C1_0LDA instance Selector S1_0_0LDA instance Selector S1_0_1LDA instance Selector S1_0_2LDA instance Selector S1_0_3LDA instance Selector S1_0_4LDA instance Selector S1_0_5LDA instance Selector S1_0_6LDA instance Datatype D1Finalized instance Constructor C1_0Finalized instance Selector S1_0_0Finalized instance Selector S1_0_1Finalized instance Selector S1_0_2Finalized