-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Online Latent Dirichlet Allocation
--
-- Online Gibbs sampler for Latent Dirichlet Allocation. LDA is a
-- generative admixture model frequently used for topic modeling and
-- other applications. The primary goal of this implementation is to be
-- used for probabilistic soft word class induction. The sampler can be
-- used in an online as well as batch mode.
@package lda
@version 0.0.1
module NLP.LDA.UnboxedMaybeVector
instance (Num a, Unbox a) => Vector Vector (Maybe a)
instance (Num a, Unbox a) => MVector MVector (Maybe a)
instance (Num a, Unbox a) => Unbox (Maybe a)
-- | Latent Dirichlet Allocation
--
-- Simple implementation of a collapsed Gibbs sampler for LDA. This
-- library uses the topic modeling terminology (documents, words,
-- topics), even though it is generic. For example if used for word class
-- induction, replace documents with word types, words with features and
-- topics with word classes.
module NLP.LDA
-- | runSampler seed m s runs sampler s with
-- seed and initial model m. The random number
-- generator used is System.Random.Mersenne.Pure64.
runSampler :: Word64 -> LDA -> Sampler a -> (a, LDA)
-- | pass batch runs one pass of Gibbs sampling on documents in
-- batch
pass :: Vector Doc -> Sampler (Vector Doc)
-- | runLDA seed n m ds creates and runs an LDA sampler with
-- seed for n passes with initial model m on
-- the batch of documents ds. The random number generator used
-- is System.Random.Mersenne.Pure64.
runLDA :: Word64 -> Int -> LDA -> Vector Doc -> (Vector Doc, LDA)
-- | Custom random variable representing the LDA Gibbs sampler
type Sampler a = RVarT (State LDA) a
-- | Abstract type holding the settings and the state of the sampler
data LDA
-- | Abstract type holding the LDA model, and the inverse count tables
data Finalized
type Doc = (D, Vector (W, Maybe Z))
type D = Int
type W = Int
type Z = Int
-- | Document-topic counts
docTopics :: LDA -> Table2D
-- | Word-topic counts
wordTopics :: LDA -> Table2D
-- | Topic counts
topics :: LDA -> Table1D
-- | alpha * K Dirichlet parameter (topic sparseness)
alphasum :: LDA -> Double
-- | beta Dirichlet parameter (word sparseness)
beta :: LDA -> Double
-- | Number of topics K
topicNum :: LDA -> Int
-- | Number of unique words
vSize :: LDA -> Int
-- | LDA model
model :: Finalized -> LDA
-- | Inverse document-topic counts
topicDocs :: Finalized -> Table2D
-- | Inverse word-topic counts
topicWords :: Finalized -> Table2D
-- | initial k a b initializes model with k topics,
-- a/k alpha hyperparameter and b beta hyperparameter.
initial :: Int -> Double -> Double -> LDA
-- | finalize m creates a finalized model from LDA model
-- m
finalize :: LDA -> Finalized
-- | docTopicWeights m doc returns unnormalized topic
-- probabilities for document doc given LDA model m
docTopicWeights :: LDA -> Doc -> Vector Double
-- | Remove zero counts from the doc/topic table
compress :: IntMap (IntMap Double) -> IntMap (IntMap Double)
type Table2D = IntMap Table1D
type Table1D = IntMap Double
instance Generic LDA
instance Generic Finalized
instance Datatype D1LDA
instance Constructor C1_0LDA
instance Selector S1_0_0LDA
instance Selector S1_0_1LDA
instance Selector S1_0_2LDA
instance Selector S1_0_3LDA
instance Selector S1_0_4LDA
instance Selector S1_0_5LDA
instance Selector S1_0_6LDA
instance Datatype D1Finalized
instance Constructor C1_0Finalized
instance Selector S1_0_0Finalized
instance Selector S1_0_1Finalized
instance Selector S1_0_2Finalized