-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Online Latent Dirichlet Allocation
--   
--   Online Gibbs sampler for Latent Dirichlet Allocation. LDA is a
--   generative admixture model frequently used for topic modeling and
--   other applications. The primary goal of this implementation is to be
--   used for probabilistic soft word class induction. The sampler can be
--   used in an online as well as batch mode.
@package lda
@version 0.0.1

module NLP.LDA.UnboxedMaybeVector
instance (Num a, Unbox a) => Vector Vector (Maybe a)
instance (Num a, Unbox a) => MVector MVector (Maybe a)
instance (Num a, Unbox a) => Unbox (Maybe a)


-- | Latent Dirichlet Allocation
--   
--   Simple implementation of a collapsed Gibbs sampler for LDA. This
--   library uses the topic modeling terminology (documents, words,
--   topics), even though it is generic. For example if used for word class
--   induction, replace documents with word types, words with features and
--   topics with word classes.
module NLP.LDA

-- | <tt>runSampler seed m s</tt> runs sampler <tt>s</tt> with
--   <tt>seed</tt> and initial model <tt>m</tt>. The random number
--   generator used is System.Random.Mersenne.Pure64.
runSampler :: Word64 -> LDA -> Sampler a -> (a, LDA)

-- | <tt>pass batch</tt> runs one pass of Gibbs sampling on documents in
--   <tt>batch</tt>
pass :: Vector Doc -> Sampler (Vector Doc)

-- | <tt>runLDA seed n m ds</tt> creates and runs an LDA sampler with
--   <tt>seed</tt> for <tt>n</tt> passes with initial model <tt>m</tt> on
--   the batch of documents <tt>ds</tt>. The random number generator used
--   is System.Random.Mersenne.Pure64.
runLDA :: Word64 -> Int -> LDA -> Vector Doc -> (Vector Doc, LDA)

-- | Custom random variable representing the LDA Gibbs sampler
type Sampler a = RVarT (State LDA) a

-- | Abstract type holding the settings and the state of the sampler
data LDA

-- | Abstract type holding the LDA model, and the inverse count tables
data Finalized
type Doc = (D, Vector (W, Maybe Z))
type D = Int
type W = Int
type Z = Int

-- | Document-topic counts
docTopics :: LDA -> Table2D

-- | Word-topic counts
wordTopics :: LDA -> Table2D

-- | Topic counts
topics :: LDA -> Table1D

-- | alpha * K Dirichlet parameter (topic sparseness)
alphasum :: LDA -> Double

-- | beta Dirichlet parameter (word sparseness)
beta :: LDA -> Double

-- | Number of topics K
topicNum :: LDA -> Int

-- | Number of unique words
vSize :: LDA -> Int

-- | LDA model
model :: Finalized -> LDA

-- | Inverse document-topic counts
topicDocs :: Finalized -> Table2D

-- | Inverse word-topic counts
topicWords :: Finalized -> Table2D

-- | <tt>initial k a b</tt> initializes model with <tt>k</tt> topics,
--   <tt>a/k</tt> alpha hyperparameter and <tt>b</tt> beta hyperparameter.
initial :: Int -> Double -> Double -> LDA

-- | <tt>finalize m</tt> creates a finalized model from LDA model
--   <tt>m</tt>
finalize :: LDA -> Finalized

-- | <tt>docTopicWeights m doc</tt> returns unnormalized topic
--   probabilities for document doc given LDA model <tt>m</tt>
docTopicWeights :: LDA -> Doc -> Vector Double

-- | Remove zero counts from the doc/topic table
compress :: IntMap (IntMap Double) -> IntMap (IntMap Double)
type Table2D = IntMap Table1D
type Table1D = IntMap Double
instance Generic LDA
instance Generic Finalized
instance Datatype D1LDA
instance Constructor C1_0LDA
instance Selector S1_0_0LDA
instance Selector S1_0_1LDA
instance Selector S1_0_2LDA
instance Selector S1_0_3LDA
instance Selector S1_0_4LDA
instance Selector S1_0_5LDA
instance Selector S1_0_6LDA
instance Datatype D1Finalized
instance Constructor C1_0Finalized
instance Selector S1_0_0Finalized
instance Selector S1_0_1Finalized
instance Selector S1_0_2Finalized