| Copyright | (c) 2015 Kai Zhang |
|---|---|
| License | MIT |
| Maintainer | kai@kzhang.org |
| Stability | experimental |
| Portability | portable |
| Safe Haskell | None |
| Language | Haskell2010 |
AI.Clustering.Hierarchical
Contents
Description
High performance agglomerative hierarchical clustering library. Example:
>>>:set -XOverloadedLists>>>import qualified Data.Vector as V>>>let points = [[2, 3, 4], [2, 1, 2], [2, 1, 6], [2, 4, 6], [5, 1, 2]] :: V.Vector (V.Vector Double)>>>let dendro = hclust Average points euclidean>>>print dendroBranch 5 4.463747440868191 (Branch 3 2.914213562373095 (Leaf (fromList [2.0,1.0,6.0])) (Branch 2 2.23606797749979 (Leaf (fromList [2.0,3.0,4.0])) (Leaf (fromList [2.0,4.0,6.0])))) (Branch 2 3.0 (Leaf (fromList [2.0,1.0,2.0])) (Leaf (fromList [5.0,1.0,2.0])))>>>putStr $ drawDendrogram $ fmap show dendroh: 4.4637 | +- h: 2.9142 | | | +- fromList [2.0,1.0,6.0] | | | `- h: 2.2361 | | | +- fromList [2.0,3.0,4.0] | | | `- fromList [2.0,4.0,6.0] | `- h: 3.0000 | +- fromList [2.0,1.0,2.0] | `- fromList [5.0,1.0,2.0]
Synopsis
- data Dendrogram a
- = Leaf !a
- | Branch !Size !Distance !(Dendrogram a) !(Dendrogram a)
- size :: Dendrogram a -> Int
- data Linkage
- hclust :: Vector v a => Linkage -> v a -> DistFn a -> Dendrogram a
- normalize :: Dendrogram a -> Dendrogram a
- cutAt :: Dendrogram a -> Distance -> [Dendrogram a]
- flatten :: Dendrogram a -> [a]
- drawDendrogram :: Dendrogram String -> String
- euclidean :: Vector v Double => DistFn (v Double)
- hamming :: (Vector v a, Vector v Bool, Eq a) => DistFn (v a)
Documentation
data Dendrogram a Source #
Constructors
| Leaf !a | |
| Branch !Size !Distance !(Dendrogram a) !(Dendrogram a) |
Instances
| Functor Dendrogram Source # | |
Defined in AI.Clustering.Hierarchical.Types Methods fmap :: (a -> b) -> Dendrogram a -> Dendrogram b # (<$) :: a -> Dendrogram b -> Dendrogram a # | |
| Eq a => Eq (Dendrogram a) Source # | |
Defined in AI.Clustering.Hierarchical.Types | |
| Show a => Show (Dendrogram a) Source # | |
Defined in AI.Clustering.Hierarchical.Types Methods showsPrec :: Int -> Dendrogram a -> ShowS # show :: Dendrogram a -> String # showList :: [Dendrogram a] -> ShowS # | |
| Binary a => Binary (Dendrogram a) Source # | |
Defined in AI.Clustering.Hierarchical.Types | |
size :: Dendrogram a -> Int Source #
O(1) Return the size of a dendrogram
Different hierarchical clustering schemes.
Constructors
| Single | O(n^2) Single linkage, $d(A,B) = min_{a in A, b in B} d(a,b)$. |
| Complete | O(n^2) Complete linkage, $d(A,B) = max_{a in A, b in B} d(a,b)$. |
| Average | O(n^2) Average linkage or UPGMA, $d(A,B) = frac{sum_{a in A}sum_{b in B}d(a,b)}{|A||B|}$. |
| Weighted | O(n^2) Weighted linkage. |
| Ward | O(n^2) Ward's method. |
| Centroid | O(n^3) Centroid linkage, not implemented. |
| Median | O(n^3) Median linkage, not implemented. |
hclust :: Vector v a => Linkage -> v a -> DistFn a -> Dendrogram a Source #
Perform hierarchical clustering.
normalize :: Dendrogram a -> Dendrogram a Source #
Normalize the tree heights so that the highest is 1.
cutAt :: Dendrogram a -> Distance -> [Dendrogram a] Source #
Cut a dendrogram at given height.
flatten :: Dendrogram a -> [a] Source #
Return the elements of a dendrogram in pre-order.
drawDendrogram :: Dendrogram String -> String Source #
2-dimensional drawing of a dendrogram
Distance functions
euclidean :: Vector v Double => DistFn (v Double) Source #
Compute euclidean distance between two points.
References
Müllner D (2011). Modern Hierarchical, Agglomerative Clustering Algorithms. ArXiv:1109.2378 [stat.ML]. http://arxiv.org/abs/1109.2378