Safe Haskell | None |
---|

This module contains the type classes for manipulating distributions.

We use the same classes for both discrete and continuous distributions. Unfortunately, we cannot use the type classes from the `statistics`

package because we require more flexibility than they offer.

- class Probabilistic model where
- type Probability model

- class Probabilistic dist => CDF dist where
- cdf :: dist -> Datapoint dist -> Probability dist
- cdfInverse :: dist -> Probability dist -> Datapoint dist

- class Probabilistic dist => PDF dist where
- pdf :: dist -> Datapoint dist -> Probability dist

- class HomTrainer dist => Mean dist where
- class Probabilistic dist => Variance dist where
- variance :: dist -> Probability dist

- nonoverlap :: (Enum (Probability dist), Fractional (Probability dist), Ord (Probability dist), PDF dist, CDF dist) => [dist] -> Probability dist

# Type classes

class Probabilistic model Source

type Probability model Source

Num prob => Probabilistic (Unital prob) | |

Probabilistic (Categorical prob label) | |

(HList2List (HList dpL) prob, Unbox prob, Num prob, SingI Nat (FromNat1 (Length1 * dpL))) => Probabilistic (MultiNormal prob dpL) | |

Num dp => Probabilistic (Binomial prob dp) | |

Num prob => Probabilistic (Exponential prob dp) | |

Num dp => Probabilistic (Geometric prob dp) | |

Probabilistic (LogNormal prob dp) | |

Num prob => Probabilistic (Normal prob dp) | |

Num dp => Probabilistic (Poisson prob dp) | |

Probabilistic (MissingData response basedist prob) | |

Probabilistic (Ignore' label basedist prob) | |

Probabilistic (CatContainer label basedist prob) | |

Probabilistic (Multivariate dp xs prob) | |

Probabilistic (KDE kernel h prob dp) | |

Probabilistic (MultiContainer k dist sample basedist prob) | |

Probabilistic (Container k dist sample basedist prob) |

class Probabilistic dist => CDF dist whereSource

Technically, every distribution has a Cumulative Distribution Function (CDF), and so this type class should be merged with the Distribution type class. However, I haven't had a chance to implement the CDF for most distributions yet, so this type class has been separated out.

cdf :: dist -> Datapoint dist -> Probability distSource

cdfInverse :: dist -> Probability dist -> Datapoint distSource

class Probabilistic dist => PDF dist whereSource

Not every distribution has a Probability Density Function (PDF), however most distributions in the HLearn library do. For many applications, the PDF is much more intuitive and easier to work with than the CDF. For discrete distributions, this is often called a Probability Mass Function (PMF); however, for simplicity we use the same type class for both continuous and discrete data.

pdf :: dist -> Datapoint dist -> Probability distSource

Num prob => PDF (Unital prob) | |

(Ord label, Ord prob, Fractional prob) => PDF (Categorical prob label) | |

(HList2List (HList dpL) prob, Unbox prob, Floating prob, Field prob, Enum prob, SingI Nat (FromNat1 (Length1 * dpL)), Storable prob) => PDF (MultiNormal prob dpL) | |

Floating prob => PDF (Binomial Double Int) | |

Floating prob => PDF (Exponential prob prob) | |

(Integral dp, Floating prob) => PDF (Geometric prob dp) | |

Floating prob => PDF (LogNormal prob prob) | |

Floating prob => PDF (Normal prob prob) | |

(Integral dp, Floating prob) => PDF (Poisson Double dp) | |

(~ * (Probability (basedist prob)) prob, PDF (basedist prob), Num prob) => PDF (MissingData Ignore basedist prob) | |

(~ * (Probability basedist) prob, HomTrainer (Ignore' label basedist prob), ~ * (Datapoint (Ignore' label basedist prob)) (HList dpL), ~ * (Datapoint basedist) (HList basedpL), PDF basedist) => PDF (Ignore' label basedist prob) | |

(Ord prob, Fractional prob, Show prob, ~ * (Probability basedist) prob, Ord label, PDF basedist, ~ * (Datapoint basedist) (HList ys), Show (Datapoint basedist), Show label) => PDF (CatContainer label basedist prob) | |

(PDF (MultivariateTF (Concat (* -> * -> *) xs) prob), ~ * (Probability (MultivariateTF (Concat (* -> * -> *) xs) prob)) prob, ~ * (Datapoint (MultivariateTF (Concat (* -> * -> *) xs) prob)) (GetHList dp), Trainable dp, HomTrainer (Multivariate dp xs prob)) => PDF (Multivariate dp xs prob) | |

(Kernel kernel prob, SingI Nat h, Fractional prob, ~ * prob (Ring (SortedVector prob)), NumDP (SortedVector prob)) => PDF (KDE kernel h prob prob) | |

(PDF (dist prob sample), PDF basedist, ~ * prob (Probability (dist prob sample)), ~ * prob (Probability basedist), Num prob, ~ * (Datapoint (dist prob sample)) (HList dpL), ~ * (Datapoint basedist) (HList basedpL), HTake1 (Nat1Box (Length1 * dpL)) (HList (++ * dpL basedpL)) (HList dpL), HDrop1 (Nat1Box (Length1 * dpL)) (HList (++ * dpL basedpL)) (HList basedpL)) => PDF (MultiContainer k dist sample basedist prob) | |

(PDF (dist prob sample), PDF basedist, ~ * (Probability (dist prob sample)) prob, ~ * (Probability basedist) prob, Probabilistic (Container k dist sample basedist prob), ~ * (Datapoint basedist) (HList ys), ~ * (Datapoint (dist prob sample)) y, ~ * (Datapoint (Container k dist sample basedist prob)) (HList (: * y ys)), Num prob) => PDF (Container k dist sample basedist prob) |

class HomTrainer dist => Mean dist whereSource

(Num prob, Ord prob, Ord label) => Mean (Categorical prob label) | |

Fractional prob => Mean (Exponential prob prob) | |

(Integral dp, RealFrac prob) => Mean (Geometric prob dp) | |

Floating prob => Mean (LogNormal prob prob) | |

Fractional prob => Mean (Normal prob prob) |

class Probabilistic dist => Variance dist whereSource

variance :: dist -> Probability distSource

Fractional prob => Variance (Exponential prob prob) | |

(Integral dp, Fractional prob) => Variance (Geometric prob dp) | |

(Show prob, Floating prob) => Variance (LogNormal prob prob) | |

Fractional prob => Variance (Normal prob prob) |

# Utility functions

nonoverlap :: (Enum (Probability dist), Fractional (Probability dist), Ord (Probability dist), PDF dist, CDF dist) => [dist] -> Probability distSource

If you were to plot a list of distributions, nonoverlap returns the amount of area that only a single distribution covers. That is, it will be equal to number of distributions - the overlap.

This function is used by the HomTree classifier.