Copyright	Phillip Seeber 2021
License	AGPL-3
Maintainer	phillip.seeber@googlemail.com
Stability	experimental
Portability	POSIX, Windows
Safe Haskell	None
Language	Haskell2010

ConClusion.Numeric.Statistics

Contents

PCA
Variance
Distance Metrics
Cluster Algorithms
- DBScan
- Hierarchical Cluster Analysis

Description

Synopsis

data PCA = PCA {
- x :: Matrix U Double
- x' :: Matrix U Double
- y :: Matrix U Double
- a :: Matrix U Double
- mse :: Double
- remaining :: Double
- allEigenValues :: Vector U Double
- pcaEigenValues :: Vector U Double
- allEigenVecs :: Matrix U Double
- pcaEigenVecs :: Matrix U Double
}
pca :: (Numeric r Double, Mutable r Double, Load r Ix1 Double, Load r Ix2 Double, MonadThrow m) => Int -> Matrix r Double -> m PCA
normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Mutable r e) => Array r Ix2 e -> Array r Ix2 e
meanDeviation :: (Source r e, Fractional e, Unbox e, Numeric r e, Mutable r e) => Matrix r e -> Matrix r e
covariance :: (Numeric r e, Mutable r e, Fractional e) => Matrix r e -> Matrix r e
type DistFn r e = Matrix r e -> Matrix r e
lpNorm :: (Mutable r e, Floating e) => Int -> DistFn r e
manhattan :: (Mutable r e, Floating e) => DistFn r e
euclidean :: (Mutable r e, Floating e) => DistFn r e
mahalanobis :: (Unbox e, Numeric r e, Mutable r e, Field e, Load r Ix1 e) => DistFn r e
type Clusters = Vector B IntSet
newtype DistanceInvalidException e = DistanceInvalidException e
dbscan :: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r e) => DistFn r e -> Int -> e -> Matrix r e -> m Clusters
data Dendrogram e
data JoinStrat e
- = SingleLinkage
- | CompleteLinkage
- | Median
- | UPGMA
- | WPGMA
- | Centroid
- | Ward
- | LWFB e
- | LW e e e e
hca :: (MonadThrow m, Mutable r e, Mutable r (e, Ix1), Load r Ix1 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e)
cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters

PCA

data PCA Source #

Constructors

PCA

Fields

x :: Matrix U Double
Original feature matrix.
x' :: Matrix U Double
Feature matrix in mean deviation form.
y :: Matrix U Double
Transformed data.
a :: Matrix U Double
Transformation matrix to transform feature matrix into PCA result matrix.
mse :: Double
Mean squared error introduced by PCA.
remaining :: Double
Percentage of the behaviour captured in the remaining dimensions.
allEigenValues :: Vector U Double
All eigenvalues from the diagonalisation of the covariance matrix.
pcaEigenValues :: Vector U Double
Eigenvalues that were kept for PCA.
allEigenVecs :: Matrix U Double
All eigenvectors from the diagonalisation of the covariance matrix.
pcaEigenVecs :: Matrix U Double
Eigenvectors that were kept for PCA.

pca Source #

Arguments

:: (Numeric r Double, Mutable r Double, Load r Ix1 Double, Load r Ix2 Double, MonadThrow m)
=> Int	Dimensionalty after PCA transformation.
-> Matrix r Double	\(m \times n\) Feaute matrix \(\mathbf{X}\), with \(m\) different measurements (rows) in \(n\) different trials (columns).
-> m PCA

Performs a PCA on the feature matrix \(\mathbf{X}\) by solving the eigenproblem of the covariance matrix. The function takes the feature matrix directly and perfoms the conversion to mean deviation form, the calculation of the covariance matrix and the eigenvalue problem automatically.

Variance

normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Mutable r e) => Array r Ix2 e -> Array r Ix2 e Source #

Normalise each value so that the maximum absolute value in each row becomes one.

meanDeviation :: (Source r e, Fractional e, Unbox e, Numeric r e, Mutable r e) => Matrix r e -> Matrix r e Source #

Subtract the mean value of all columns from the feature matrix. Brings the feature matrix to mean deviation form.

covariance :: (Numeric r e, Mutable r e, Fractional e) => Matrix r e -> Matrix r e Source #

Obtains the covariance matrix \(\mathbf{C_X}\) from the feature matrix \(\mathbf{X}\). \[ \mathbf{C_X} \equiv \frac{1}{n - 1} \mathbf{X} \mathbf{X}^T \] where \(n\) is the number of columns in the matrix.

The feature matrix should be in mean deviation form, see meanDeviation.

Distance Metrics

type DistFn r e = Matrix r e -> Matrix r e Source #

Distance matrix generator functions.

lpNorm :: (Mutable r e, Floating e) => Int -> DistFn r e Source #

The \(L_p\) norm between two vectors. Generalisation of Manhattan and Euclidean distances. \[ d(\mathbf{a}, \mathbf{b}) = \left( \sum \limits_{i=1}^n \lvert \mathbf{a}_i - \mathbf{b}_i \rvert ^p \right) ^ \frac{1}{p} \]

manhattan :: (Mutable r e, Floating e) => DistFn r e Source #

The Manhattan distance between two vectors. Specialisation of the \(L_p\) norm for \(p = 1\). \[ d(\mathbf{a}, \mathbf{b}) = \sum \limits_{i=1}^n \lvert \mathbf{a}_i - \mathbf{b}_i \rvert \]

euclidean :: (Mutable r e, Floating e) => DistFn r e Source #

The Euclidean distance between two vectors. Specialisation of the \(L_p\) norm for \(p = 2\). \[ d(\mathbf{a}, \mathbf{b}) = \sqrt{\sum \limits_{i=1}^n (\mathbf{a}_i - \mathbf{b}_i)^2} \]

mahalanobis :: (Unbox e, Numeric r e, Mutable r e, Field e, Load r Ix1 e) => DistFn r e Source #

Mahalanobis distance between points. Suitable for non correlated axes. \[ d(\mathbf{a}, \mathbf{b}) = \sqrt{(\mathbf{a} - \mathbf{b})^T \mathbf{S}^{-1} (\mathbf{a} - \mathbf{b})} \] where \(\mathbf{S}\) is the covariance matrix.

Cluster Algorithms

type Clusters = Vector B IntSet Source #

Representation of clusters.

DBScan

newtype DistanceInvalidException e Source #

Exception for invalid search distances.

Constructors

DistanceInvalidException e

Instances

Instances details

Eq e => Eq (DistanceInvalidException e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods (==) :: DistanceInvalidException e -> DistanceInvalidException e -> Bool # (/=) :: DistanceInvalidException e -> DistanceInvalidException e -> Bool #
Show e => Show (DistanceInvalidException e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods showsPrec :: Int -> DistanceInvalidException e -> ShowS # show :: DistanceInvalidException e -> String # showList :: [DistanceInvalidException e] -> ShowS #
(Typeable e, Show e) => Exception (DistanceInvalidException e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods toException :: DistanceInvalidException e -> SomeException # fromException :: SomeException -> Maybe (DistanceInvalidException e) # displayException :: DistanceInvalidException e -> String #

dbscan Source #

Arguments

:: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r e)
=> DistFn r e	Distance measure to build the distance matrix of all points.
-> Int	Minimal number of members in a cluster.
-> e	Search radius \(\epsilon\)
-> Matrix r e	\(n\) \(m\)-dimensional data points as column vectors of a \(m \times n\) matrix.
-> m Clusters	Resulting clusters.

DBScan algorithm.

Hierarchical Cluster Analysis

data Dendrogram e Source #

A dendrogram as a binary tree.

Instances

Instances details

Eq e => Eq (Dendrogram e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods (==) :: Dendrogram e -> Dendrogram e -> Bool # (/=) :: Dendrogram e -> Dendrogram e -> Bool #
Show e => Show (Dendrogram e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods showsPrec :: Int -> Dendrogram e -> ShowS # show :: Dendrogram e -> String # showList :: [Dendrogram e] -> ShowS #
Generic (Dendrogram e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Associated Types type Rep (Dendrogram e) :: Type -> Type # Methods from :: Dendrogram e -> Rep (Dendrogram e) x # to :: Rep (Dendrogram e) x -> Dendrogram e #
ToJSON e => ToJSON (Dendrogram e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods toJSON :: Dendrogram e -> Value # toEncoding :: Dendrogram e -> Encoding # toJSONList :: [Dendrogram e] -> Value # toEncodingList :: [Dendrogram e] -> Encoding #
FromJSON e => FromJSON (Dendrogram e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods parseJSON :: Value -> Parser (Dendrogram e) # parseJSONList :: Value -> Parser [Dendrogram e] #
type Rep (Dendrogram e) Source #
Instance details Defined in ConClusion.Numeric.Statistics type Rep (Dendrogram e)

data JoinStrat e Source #

A strategy/distance measure for clusters.

Constructors

SingleLinkage
CompleteLinkage
Median
UPGMA
WPGMA
Centroid
Ward
LWFB e
LW e e e e

Instances

Instances details

Eq e => Eq (JoinStrat e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods (==) :: JoinStrat e -> JoinStrat e -> Bool # (/=) :: JoinStrat e -> JoinStrat e -> Bool #
Show e => Show (JoinStrat e) Source #
Instance details Defined in ConClusion.Numeric.Statistics Methods showsPrec :: Int -> JoinStrat e -> ShowS # show :: JoinStrat e -> String # showList :: [JoinStrat e] -> ShowS #

hca :: (MonadThrow m, Mutable r e, Mutable r (e, Ix1), Load r Ix1 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e) Source #

Performance improved hierarchical clustering algorithm. GENERIC_LINKAGE from figure 3, https://arxiv.org/pdf/1109.2378.pdf.

cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters Source #

Cut a Dendrogram at a given distance and obtain all clusters from it.