Copyright | Phillip Seeber 2023 |
---|---|
License | AGPL-3 |
Maintainer | phillip.seeber@googlemail.com |
Stability | experimental |
Portability | POSIX, Windows |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
Synopsis
- data PCA = PCA {}
- pca :: (Numeric r Double, Manifest r Double, Load r Ix1 Double, Load r Ix2 Double, MonadThrow m) => Int -> Matrix r Double -> m PCA
- normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Manifest r e) => Array r Ix2 e -> Array r Ix2 e
- meanDeviation :: (Fractional e, Unbox e, Numeric r e, Manifest r e) => Matrix r e -> Matrix r e
- covariance :: (Numeric r e, Manifest r e, Fractional e) => Matrix r e -> Matrix r e
- type DistFn r e = Matrix r e -> Matrix r e
- lpNorm :: (Manifest r e, Floating e) => Int -> DistFn r e
- manhattan :: (Manifest r e, Floating e) => DistFn r e
- euclidean :: (Manifest r e, Floating e) => DistFn r e
- mahalanobis :: (Unbox e, Numeric r e, Manifest r e, Field e, Load r Ix1 e) => DistFn r e
- type Clusters = Vector B IntSet
- newtype DistanceInvalidException e = DistanceInvalidException e
- dbscan :: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r e) => DistFn r e -> Int -> e -> Matrix r e -> m Clusters
- data Dendrogram e
- data JoinStrat e
- = SingleLinkage
- | CompleteLinkage
- | Median
- | UPGMA
- | WPGMA
- | Centroid
- | Ward
- | LWFB e
- | LW e e e e
- hca :: (MonadThrow m, Manifest r e, Manifest r (e, Ix1), Load r Ix1 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e)
- cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters
PCA
PCA | |
|
:: (Numeric r Double, Manifest r Double, Load r Ix1 Double, Load r Ix2 Double, MonadThrow m) | |
=> Int | Dimensionalty after PCA transformation. |
-> Matrix r Double | \(m \times n\) Feaute matrix \(\mathbf{X}\), with \(m\) different measurements (rows) in \(n\) different trials (columns). |
-> m PCA |
Performs a PCA on the feature matrix \(\mathbf{X}\) by solving the eigenproblem of the covariance matrix. The function takes the feature matrix directly and perfoms the conversion to mean deviation form, the calculation of the covariance matrix and the eigenvalue problem automatically.
Variance
normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Manifest r e) => Array r Ix2 e -> Array r Ix2 e Source #
Normalise each value so that the maximum absolute value in each row becomes one.
meanDeviation :: (Fractional e, Unbox e, Numeric r e, Manifest r e) => Matrix r e -> Matrix r e Source #
Subtract the mean value of all columns from the feature matrix. Brings the feature matrix to mean deviation form.
covariance :: (Numeric r e, Manifest r e, Fractional e) => Matrix r e -> Matrix r e Source #
Obtains the covariance matrix \(\mathbf{C_X}\) from the feature matrix \(\mathbf{X}\). [ mathbf{C_X} equiv frac{1}{n - 1} mathbf{X} mathbf{X}^T ] where \(n\) is the number of columns in the matrix.
The feature matrix should be in mean deviation form, see meanDeviation
.
Distance Metrics
lpNorm :: (Manifest r e, Floating e) => Int -> DistFn r e Source #
The \(L_p\) norm between two vectors. Generalisation of Manhattan and Euclidean distances. [ d(mathbf{a}, mathbf{b}) = left( sum limits_{i=1}^n lvert mathbf{a}_i - mathbf{b}_i rvert ^p right) ^ frac{1}{p} ]
manhattan :: (Manifest r e, Floating e) => DistFn r e Source #
The Manhattan distance between two vectors. Specialisation of the \(L_p\) norm for \(p = 1\). [ d(mathbf{a}, mathbf{b}) = sum limits_{i=1}^n lvert mathbf{a}_i - mathbf{b}_i rvert ]
euclidean :: (Manifest r e, Floating e) => DistFn r e Source #
The Euclidean distance between two vectors. Specialisation of the \(L_p\) norm for \(p = 2\). [ d(mathbf{a}, mathbf{b}) = sqrt{sum limits_{i=1}^n (mathbf{a}_i - mathbf{b}_i)^2} ]
mahalanobis :: (Unbox e, Numeric r e, Manifest r e, Field e, Load r Ix1 e) => DistFn r e Source #
Mahalanobis distance between points. Suitable for non correlated axes. [ d(mathbf{a}, mathbf{b}) = sqrt{(mathbf{a} - mathbf{b})^T mathbf{S}^{-1} (mathbf{a} - mathbf{b})} ] where \(\mathbf{S}\) is the covariance matrix.
Cluster Algorithms
DBScan
newtype DistanceInvalidException e Source #
Exception for invalid search distances.
Instances
(Typeable e, Show e) => Exception (DistanceInvalidException e) Source # | |
Defined in ConClusion.Numeric.Statistics | |
Show e => Show (DistanceInvalidException e) Source # | |
Defined in ConClusion.Numeric.Statistics showsPrec :: Int -> DistanceInvalidException e -> ShowS # show :: DistanceInvalidException e -> String # showList :: [DistanceInvalidException e] -> ShowS # | |
Eq e => Eq (DistanceInvalidException e) Source # | |
Defined in ConClusion.Numeric.Statistics (==) :: DistanceInvalidException e -> DistanceInvalidException e -> Bool # (/=) :: DistanceInvalidException e -> DistanceInvalidException e -> Bool # |
:: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r e) | |
=> DistFn r e | Distance measure to build the distance matrix of all points. |
-> Int | Minimal number of members in a cluster. |
-> e | Search radius \(\epsilon\) |
-> Matrix r e | \(n\) \(m\)-dimensional data points as column vectors of a \(m \times n\) matrix. |
-> m Clusters | Resulting clusters. |
DBScan algorithm.
Hierarchical Cluster Analysis
data Dendrogram e Source #
A dendrogram as a binary tree.
Instances
A strategy/distance measure for clusters.
SingleLinkage | |
CompleteLinkage | |
Median | |
UPGMA | |
WPGMA | |
Centroid | |
Ward | |
LWFB e | |
LW e e e e |
hca :: (MonadThrow m, Manifest r e, Manifest r (e, Ix1), Load r Ix1 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e) Source #
Performance improved hierarchical clustering algorithm. GENERIC_LINKAGE
from figure 3,
https://arxiv.org/pdf/1109.2378.pdf.
cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters Source #
Cut a Dendrogram
at a given distance and obtain all clusters from it.