Copyright | Phillip Seeber 2023 |
---|---|
License | AGPL-3 |
Maintainer | phillip.seeber@googlemail.com |
Stability | experimental |
Portability | POSIX, Windows |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
ConClusion.Numeric.Statistics
Description
Synopsis
- data PCA = PCA {}
- pca :: (Numeric r Double, Manifest r Double, Load r Ix1 Double, Load r Ix2 Double, MonadThrow m) => Int -> Matrix r Double -> m PCA
- normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Manifest r e) => Array r Ix2 e -> Array r Ix2 e
- meanDeviation :: (Fractional e, Unbox e, Numeric r e, Manifest r e) => Matrix r e -> Matrix r e
- covariance :: (Numeric r e, Manifest r e, Fractional e) => Matrix r e -> Matrix r e
- type DistFn r e = Matrix r e -> Matrix r e
- lpNorm :: (Manifest r e, Floating e) => Int -> DistFn r e
- manhattan :: (Manifest r e, Floating e) => DistFn r e
- euclidean :: (Manifest r e, Floating e) => DistFn r e
- mahalanobis :: (Unbox e, Numeric r e, Manifest r e, Field e, Load r Ix1 e) => DistFn r e
- type Clusters = Vector B IntSet
- newtype DistanceInvalidException e = DistanceInvalidException e
- dbscan :: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r e) => DistFn r e -> Int -> e -> Matrix r e -> m Clusters
- data Dendrogram e
- data JoinStrat e
- = SingleLinkage
- | CompleteLinkage
- | Median
- | UPGMA
- | WPGMA
- | Centroid
- | Ward
- | LWFB e
- | LW e e e e
- hca :: (MonadThrow m, Manifest r e, Manifest r (e, Ix1), Load r Ix1 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e)
- cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters
PCA
Constructors
PCA | |
Fields
|
Arguments
:: (Numeric r Double, Manifest r Double, Load r Ix1 Double, Load r Ix2 Double, MonadThrow m) | |
=> Int | Dimensionalty after PCA transformation. |
-> Matrix r Double | \(m \times n\) Feaute matrix \(\mathbf{X}\), with \(m\) different measurements (rows) in \(n\) different trials (columns). |
-> m PCA |
Performs a PCA on the feature matrix \(\mathbf{X}\) by solving the eigenproblem of the covariance matrix. The function takes the feature matrix directly and perfoms the conversion to mean deviation form, the calculation of the covariance matrix and the eigenvalue problem automatically.
Variance
normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Manifest r e) => Array r Ix2 e -> Array r Ix2 e Source #
Normalise each value so that the maximum absolute value in each row becomes one.
meanDeviation :: (Fractional e, Unbox e, Numeric r e, Manifest r e) => Matrix r e -> Matrix r e Source #
Subtract the mean value of all columns from the feature matrix. Brings the feature matrix to mean deviation form.
covariance :: (Numeric r e, Manifest r e, Fractional e) => Matrix r e -> Matrix r e Source #
Obtains the covariance matrix \(\mathbf{C_X}\) from the feature matrix \(\mathbf{X}\). [ mathbf{C_X} equiv frac{1}{n - 1} mathbf{X} mathbf{X}^T ] where \(n\) is the number of columns in the matrix.
The feature matrix should be in mean deviation form, see meanDeviation
.
Distance Metrics
lpNorm :: (Manifest r e, Floating e) => Int -> DistFn r e Source #
The \(L_p\) norm between two vectors. Generalisation of Manhattan and Euclidean distances. [ d(mathbf{a}, mathbf{b}) = left( sum limits_{i=1}^n lvert mathbf{a}_i - mathbf{b}_i rvert ^p right) ^ frac{1}{p} ]
manhattan :: (Manifest r e, Floating e) => DistFn r e Source #
The Manhattan distance between two vectors. Specialisation of the \(L_p\) norm for \(p = 1\). [ d(mathbf{a}, mathbf{b}) = sum limits_{i=1}^n lvert mathbf{a}_i - mathbf{b}_i rvert ]
euclidean :: (Manifest r e, Floating e) => DistFn r e Source #
The Euclidean distance between two vectors. Specialisation of the \(L_p\) norm for \(p = 2\). [ d(mathbf{a}, mathbf{b}) = sqrt{sum limits_{i=1}^n (mathbf{a}_i - mathbf{b}_i)^2} ]
mahalanobis :: (Unbox e, Numeric r e, Manifest r e, Field e, Load r Ix1 e) => DistFn r e Source #
Mahalanobis distance between points. Suitable for non correlated axes. [ d(mathbf{a}, mathbf{b}) = sqrt{(mathbf{a} - mathbf{b})^T mathbf{S}^{-1} (mathbf{a} - mathbf{b})} ] where \(\mathbf{S}\) is the covariance matrix.
Cluster Algorithms
DBScan
newtype DistanceInvalidException e Source #
Exception for invalid search distances.
Constructors
DistanceInvalidException e |
Instances
(Typeable e, Show e) => Exception (DistanceInvalidException e) Source # | |
Defined in ConClusion.Numeric.Statistics Methods toException :: DistanceInvalidException e -> SomeException # fromException :: SomeException -> Maybe (DistanceInvalidException e) # | |
Show e => Show (DistanceInvalidException e) Source # | |
Defined in ConClusion.Numeric.Statistics Methods showsPrec :: Int -> DistanceInvalidException e -> ShowS # show :: DistanceInvalidException e -> String # showList :: [DistanceInvalidException e] -> ShowS # | |
Eq e => Eq (DistanceInvalidException e) Source # | |
Defined in ConClusion.Numeric.Statistics Methods (==) :: DistanceInvalidException e -> DistanceInvalidException e -> Bool # (/=) :: DistanceInvalidException e -> DistanceInvalidException e -> Bool # |
Arguments
:: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r e) | |
=> DistFn r e | Distance measure to build the distance matrix of all points. |
-> Int | Minimal number of members in a cluster. |
-> e | Search radius \(\epsilon\) |
-> Matrix r e | \(n\) \(m\)-dimensional data points as column vectors of a \(m \times n\) matrix. |
-> m Clusters | Resulting clusters. |
DBScan algorithm.
Hierarchical Cluster Analysis
data Dendrogram e Source #
A dendrogram as a binary tree.
Instances
FromJSON e => FromJSON (Dendrogram e) Source # | |
Defined in ConClusion.Numeric.Statistics Methods parseJSON :: Value -> Parser (Dendrogram e) # parseJSONList :: Value -> Parser [Dendrogram e] # | |
ToJSON e => ToJSON (Dendrogram e) Source # | |
Defined in ConClusion.Numeric.Statistics Methods toJSON :: Dendrogram e -> Value # toEncoding :: Dendrogram e -> Encoding # toJSONList :: [Dendrogram e] -> Value # toEncodingList :: [Dendrogram e] -> Encoding # | |
Generic (Dendrogram e) Source # | |
Defined in ConClusion.Numeric.Statistics Associated Types type Rep (Dendrogram e) :: Type -> Type # | |
Show e => Show (Dendrogram e) Source # | |
Defined in ConClusion.Numeric.Statistics Methods showsPrec :: Int -> Dendrogram e -> ShowS # show :: Dendrogram e -> String # showList :: [Dendrogram e] -> ShowS # | |
Eq e => Eq (Dendrogram e) Source # | |
Defined in ConClusion.Numeric.Statistics | |
type Rep (Dendrogram e) Source # | |
Defined in ConClusion.Numeric.Statistics |
A strategy/distance measure for clusters.
Constructors
SingleLinkage | |
CompleteLinkage | |
Median | |
UPGMA | |
WPGMA | |
Centroid | |
Ward | |
LWFB e | |
LW e e e e |
hca :: (MonadThrow m, Manifest r e, Manifest r (e, Ix1), Load r Ix1 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e) Source #
Performance improved hierarchical clustering algorithm. GENERIC_LINKAGE
from figure 3,
https://arxiv.org/pdf/1109.2378.pdf.
cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters Source #
Cut a Dendrogram
at a given distance and obtain all clusters from it.