-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Collection of useful statistical methods.
--
-- High-level statistical methods.
--
--
-- - Confusion matrix
-- - Confusion matrix dependent statistics (sensitivity, specificity,
-- F-measure, mcc)
-- - EM algorithm for two-component Gaussian mixture.
-- - GMM (Gaussian Mixture Models) with >=1 Gaussians fitted to the
-- data.
--
--
-- Note that some methods are for testing only (two-component Gaussian
-- mixture EM).
@package StatisticalMethods
@version 0.0.0.1
-- | This is a simplified version of the Expectation-Maximization algorithm
-- for a two-component Gaussian mixture model. Cf. Hastie et al, The
-- Elements of Statistical Learning, Springer. Chapter 8.5.1.
module Statistics.EM.TwoGaussian
-- | Finds the fix-points of the EM step iterations.
emFix :: Vector Double -> (Weight, Normal, Normal) -> (Weight, Normal, Normal)
-- | Finds the best fix-point with all elements xs as starting
-- points for the means. It holds that mu_1 < mu_2.
emStarts :: Vector Double -> (Weight, Normal, Normal)
-- | EM for a mixture of k one-dimensional Gaussians. This procedure tends
-- to produce NaNs whenever more Gaussians are being selected than
-- are called for. This is rather convenient. ;-)
--
-- TODO cite paper
module Statistics.EM.GMM
-- | Find an optimal set of parameters Theta. The additional
-- takeWhile (not . isnan . fst) makes sure that in cases of
-- overfitting, emFix does terminate. Due to the way we check and
-- take, in case of NaNs, the returned values will be NaNs (checking fst,
-- returning snd).
emFix :: Data -> Theta -> Theta
-- | Given a set of Data and a number k of Gaussian peaks,
-- try to find the optimal GMM. This is done by trying each data point as
-- mu for each Gaussian. Note that this will be rather slow for larger
-- k (larger than, say 2 or 3). In that case, a random-drawing
-- method should be chosen.
--
-- TODO xs' -> xs sorting makes me cry!
emStarts :: Int -> Data -> Theta
-- | This module contains test data taken from Elements of Statistical
-- Learning.
--
-- TODO correct citation
module TestData.Elements
table_8_1 :: Vector Double
-- | The confusion matrix contains four data points: the true and false
-- positives and the true and false negatives. From these four data
-- points, other statistics can be extracted.
--
-- Fawcett, ROC Graphs: Notes and Practical Considerations for
-- Researchers, 2004, Kluwer Academic Publishers
module Statistics.ConfusionMatrix
-- | The confusion matrix.
data ConfusionMatrix
ConfusionMatrix :: WrappedDouble -> WrappedDouble -> WrappedDouble -> WrappedDouble -> ConfusionMatrix
fn :: ConfusionMatrix -> WrappedDouble
fp :: ConfusionMatrix -> WrappedDouble
tn :: ConfusionMatrix -> WrappedDouble
tp :: ConfusionMatrix -> WrappedDouble
type WrappedDouble = Either String Double
-- | Given a certain data-set, create a confusion matrix.
class MkConfusionMatrix a
mkConfusionMatrix :: MkConfusionMatrix a => a -> ConfusionMatrix
instance Read ConfusionMatrix
instance Show ConfusionMatrix
instance Eq ConfusionMatrix
-- | In general, it is not easy to define the whole confusion matrix
-- generically without knowing anything about the source data. For
-- certain elements however, it is possible. These instances are all
-- defined on newtypes in order to not create instances on generic data
-- types like lists.
module Statistics.ConfusionMatrix.Instances
-- | The ctor expects the total number of possibilities first, then a list
-- of true positive elements, followed by a list of predicted elements.
newtype (Eq a, Ord a) => ListSimilar a
ListSimilar :: (Int, [a], [a]) -> ListSimilar a
instance (Eq a, Ord a) => MkConfusionMatrix (ListSimilar a)
-- | Common performance metrics which can be calculated using the confusion
-- matrix.
--
-- Fawcett, ROC Graphs: Notes and Practical Considerations for
-- Researchers, 2004, Kluwer Academic Publishers
module Statistics.PerformanceMetrics
-- | sensitivity
sensitivity :: ConfusionMatrix -> WrappedDouble
-- | specificity
specificity :: ConfusionMatrix -> WrappedDouble
-- | positive predictive value
ppv :: ConfusionMatrix -> WrappedDouble
-- | mathews correlation coefficient
mcc :: ConfusionMatrix -> WrappedDouble
-- | F-measure
fmeasure :: ConfusionMatrix -> WrappedDouble