-      ``Data/Random/Distribution/Categorical''
    FlexibleInstances, FlexibleContexts

module Data.Random.Distribution.Categorical where

import Data.Random.RVar
import Data.Random.Distribution
import Data.Random.Distribution.Uniform

import Control.Arrow
import Control.Monad
import Control.Applicative
import Data.Foldable (Foldable(foldMap))
import Data.Traversable (Traversable(traverse, sequenceA))

import Data.List
import Data.Function

-- |Construct a 'Categorical' random variable from a list of probabilities
-- and categories, where the probabilities all sum to 1.
categorical :: Distribution (Categorical p) a => [(p,a)] -> RVar a
categorical ps = rvar (Categorical ps)

-- |Construct a 'Categorical' random process from a list of probabilities
-- and categories, where the probabilities all sum to 1.
categoricalT :: Distribution (Categorical p) a => [(p,a)] -> RVarT m a
categoricalT ps = rvarT (Categorical ps)

-- | Construct a 'Categorical' distribution from a list of weighted categories,
-- where the weights do not necessarily sum to 1.
{-# INLINE weightedCategorical #-}
weightedCategorical :: (Fractional p) => [(p,a)] -> Categorical p a
weightedCategorical = normalizeCategoricalPs . Categorical

-- |Construct a 'Categorical' distribution from a list of observed outcomes.
-- Equivalent events will be grouped and counted, and the probabilities of each
-- event in the returned distribution will be proportional to the number of 
-- occurrences of that event.
empirical :: (Fractional p, Ord a) => [a] -> Categorical p a
empirical xs = normalizeCategoricalPs (Categorical bins)
    where bins = [ (genericLength bin, x)
                 | bin@(x:_) <- group (sort xs)

-- |Categorical distribution; a list of events with corresponding probabilities.
-- The sum of the probabilities must be 1, and no event should have a zero 
-- or negative probability (at least, at time of sampling; very clever users
-- can do what they want with the numbers before sampling, just make sure 
-- that if you're one of those clever ones, you normalize before sampling).
newtype Categorical p a = Categorical [(p, a)]
    deriving (Eq, Show)

instance (Fractional p, Ord p, Distribution StdUniform p) => Distribution (Categorical p) a where
    rvarT (Categorical []) = fail "categorical distribution over empty set cannot be sampled"
    rvarT (Categorical ds) = do
        let (ps, xs) = unzip ds
            cs = scanl1 (+) ps
        u <- stdUniformT
        getEvent u cs xs
            -- In the (hopefully) extremely rare event that, due to numerical
            -- instability, the last 'c' is less than 1 _and_ a number greater than 
            -- it is drawn, simply retry the sampling.  If it comes to that, also
            -- do one last sanity check that lastC > 0, to make sure that there
            -- is some nonzero chance of termination.
            getEvent u cs0 xs0 = go 0 cs0 xs0
                    go lastC [] _
                        | lastC > 0 = do {newU <- stdUniformT; getEvent newU cs0 xs0}
                        | otherwise = fail "categorical distribution sampling error: total probablility not greater than zero"
                    go lastC (c:cs) (x:xs)
                        | c < lastC = fail "categorical distribution sampling error: negative probability for an event!"
                        | u > c     = go c cs xs
                        | c == c    = return x
                        | otherwise = fail "categorical distribution sampling error: NaN probability"
                    go _ _ _ = error "rvar/Categorical: programming error! this case should be impossible!"

instance Functor (Categorical p) where
    fmap f (Categorical ds) = Categorical [(p, f x) | ~(p, x) <- ds]

instance Foldable (Categorical p) where
    foldMap f (Categorical ds) = foldMap (f . snd) ds

instance Traversable (Categorical p) where
    traverse f (Categorical ds) = Categorical <$> traverse (\(p,e) -> (\e' -> (p,e')) <$> f e) ds
    sequenceA  (Categorical ds) = Categorical <$> traverse (\(p,e) -> (\e' -> (p,e')) <$>   e) ds

instance Fractional p => Monad (Categorical p) where
    return x = Categorical [(1, x)]
    -- I'm not entirely sure whether this is a valid form of failure; see next
    -- set of comments.
    fail _ = Categorical []
    -- Should the normalize step be included here, or should normalization
    -- be assumed?  It seems like there is (at least) 1 valid situation where
    -- non-normal results would arise:  the distribution being modeled is 
    -- "conditional" and some event arose that contradicted the assumed 
    -- condition and thus was eliminated ('f' returned an empty or 
    -- zero-probability consequent, possibly by 'fail'ing).
    -- It seems reasonable to continue in such circumstances, but should there
    -- be any renormalization?  If so, does it make a difference when that 
    -- renormalization is done?  I'm pretty sure it does, actually.  So, the
    -- normalization will be omitted here for now, as it's easier for the
    -- user (who really better know what they mean if they're returning
    -- non-normalized probability anyway) to normalize explicitly than to
    -- undo any normalization that was done automatically.
    (Categorical xs) >>= f = {- normalizeCategoricalPs . -} Categorical $ do
        (p, x) <- xs
        let Categorical fx = f x
        (q, y) <- fx
        return (p * q, y)

instance Fractional p => Applicative (Categorical p) where
    pure = return
    (<*>) = ap

-- |Like 'fmap', but for the probabilities of a categorical distribution.
mapCategoricalPs :: (p -> q) -> Categorical p e -> Categorical q e
mapCategoricalPs f (Categorical ds) = Categorical [(f p, x) | (p, x) <- ds]

-- |Adjust all the weights of a categorical distribution so that they 
-- sum to unity.
normalizeCategoricalPs :: (Fractional p) => Categorical p e -> Categorical p e
normalizeCategoricalPs orig@(Categorical ds) = 
    -- For practical purposes the scale factor is strict anyway,
    -- so check if the total probability is 1 and, if so, skip 
    -- the actual scaling part.
    -- Along the way, discard any zero-probability events.
    if null ds || ps =~ 1
        then orig
        else Categorical
                [ (p * scale, e)
                | (p, e) <- ds
                , p /= 0
        ps = foldl1' (+) (map fst ds)
        scale = recip ps
        -- Using same implicit-epsilon trick as in Distribution instance
        -- (see comments there)
        x =~ y  = (100 + (x-y) == 100)

-- |Simplify a categorical distribution by combining equivalent categories (the new
-- category will have a probability equal to the sum of all the originals).
collectEvents :: (Ord e, Num p, Ord p) => Categorical p e -> Categorical p e
collectEvents = collectEventsBy compare ((sum *** head) . unzip)
-- |Simplify a categorical distribution by combining equivalent events (the new
-- event will have a weight equal to the sum of all the originals).
-- The comparator function is used to identify events to combine.  Once chosen,
-- the events and their weights are combined by the provided probability and
-- event aggregation function.
collectEventsBy :: (e -> e -> Ordering) -> ([(p,e)] -> (p,e))-> Categorical p e -> Categorical p e
collectEventsBy compareE combine (Categorical ds) = 
    Categorical . map combine . groupEvents . sortEvents $ ds
        groupEvents = groupBy (\x y -> snd x `compareE` snd y == EQ)
        sortEvents  = sortBy (compareE `on` snd)