module HLearn.Models.Distributions.Univariate.Categorical
(
Categorical (Categorical)
, dist2list
, mostLikely
)
where
import Control.DeepSeq
import Control.Monad.Random
import Data.List
import Data.List.Extras
import Debug.Trace
import qualified Data.Map.Strict as Map
import qualified Data.Foldable as F
import qualified Control.ConstraintKinds as CK
import HLearn.Algebra
import HLearn.Models.Distributions.Common
import HLearn.Models.Distributions.Visualization.Gnuplot
newtype Categorical prob label = Categorical
{ pdfmap :: Map.Map label prob
}
deriving (Show,Read,Eq,Ord)
instance (NFData label, NFData prob) => NFData (Categorical prob label) where
rnf d = rnf $ pdfmap d
uniformNoise :: (Fractional prob, Ord label) => prob -> [label] -> label -> Categorical prob label
uniformNoise n xs dp = trainW xs'
where
xs' = (1n,dp):(map (\x -> (weight,x)) xs)
weight = n/(fromIntegral $ length xs)
instance (Ord label, Num prob) => Abelian (Categorical prob label)
instance (Ord label, Num prob) => Monoid (Categorical prob label) where
mempty = Categorical Map.empty
mappend !d1 !d2 = Categorical $ res
where
res = Map.unionWith (+) (pdfmap d1) (pdfmap d2)
instance (Ord label, Num prob) => Group (Categorical prob label) where
inverse d1 = d1 {pdfmap=Map.map (0) (pdfmap d1)}
instance (Num prob) => HasRing (Categorical prob label) where
type Ring (Categorical prob label) = prob
instance (Ord label, Num prob) => Module (Categorical prob label) where
p .* (Categorical pdf) = Categorical $ Map.map (*p) pdf
instance CK.Functor (Categorical prob) where
type FunctorConstraint (Categorical prob) label = (Ord label, Num prob)
fmap f cat = Categorical $ Map.mapKeysWith (+) f $ pdfmap cat
instance (Num prob, Ord prob) => CK.Monad (Categorical prob) where
return dp = Categorical $ Map.singleton dp 1
x >>= f = join $ CK.fmap f x
join :: (Num prob, Ord label) => Categorical prob (Categorical prob label) -> Categorical prob label
join cat = reduce . map f $ Map.assocs $ pdfmap cat
where
f (cat,v) = v .* cat
instance (Ord label, Num prob) => HomTrainer (Categorical prob label) where
type Datapoint (Categorical prob label) = label
train1dp dp = Categorical $ Map.singleton dp 1
instance (Num prob) => NumDP (Categorical prob label) where
numdp dist = F.foldl' (+) 0 $ pdfmap dist
instance Probabilistic (Categorical prob label) where
type Probability (Categorical prob label) = prob
instance (Ord label, Ord prob, Fractional prob) => PDF (Categorical prob label) where
pdf dist label = (val/tot)
where
val = case Map.lookup label (pdfmap dist) of
Nothing -> 0
Just x -> x
tot = F.foldl' (+) 0 $ pdfmap dist
instance (Ord label, Ord prob, Fractional prob) => CDF (Categorical prob label) where
cdf dist label = (Map.foldl' (+) 0 $ Map.filterWithKey (\k a -> k<=label) $ pdfmap dist)
/ (Map.foldl' (+) 0 $ pdfmap dist)
cdfInverse dist prob = go cdfL
where
cdfL = sortBy (\(k1,p1) (k2,p2) -> compare p2 p1) $ map (\k -> (k,pdf dist k)) $ Map.keys $ pdfmap dist
go (x:[]) = fst $ last cdfL
go (x:xs) = if prob < snd x
then fst x
else go xs
instance (Num prob, Ord prob, Ord label) => Mean (Categorical prob label) where
mean dist = fst $ argmax snd $ Map.toList $ pdfmap dist
mostLikely :: Ord prob => Categorical prob label -> label
mostLikely dist = fst $ argmax snd $ Map.toList $ pdfmap dist
dist2list :: Categorical prob label -> [(label,prob)]
dist2list (Categorical pdfmap) = Map.toList pdfmap
instance
( Ord label, Show label
, Ord prob, Show prob, Fractional prob
) => PlottableDistribution (Categorical prob label)
where
samplePoints (Categorical dist) = Map.keys dist
plotType dist = Bar