{-# LANGUAGE BangPatterns #-}
module Math.Clustering.Hierarchical.Spectral.Dense
( hierarchicalSpectralCluster
, AdjacencyMatrix (..)
, Items (..)
) where
import Data.Bool (bool)
import Data.Clustering.Hierarchical (Dendrogram (..))
import Data.Maybe (fromMaybe)
import Data.Tree (Tree (..))
import Math.Clustering.Spectral.Dense (spectralClusterNorm, spectralClusterKNorm)
import Math.Modularity.Dense (getModularity)
import Math.Modularity.Types (Q (..))
import qualified Data.Foldable as F
import qualified Data.Set as Set
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import qualified Numeric.LinearAlgebra as H
import Math.Clustering.Hierarchical.Spectral.Types
import Math.Clustering.Hierarchical.Spectral.Utility
type AdjacencyMatrix = H.Matrix Double
type Items a = V.Vector a
hasMultipleClusters :: H.Vector Double -> Bool
hasMultipleClusters = (> 1) . Set.size . Set.fromList . H.toList
hierarchicalSpectralCluster :: (Show a) => EigenGroup
-> Maybe NumEigen
-> Maybe Int
-> Maybe Q
-> Items a
-> AdjacencyMatrix
-> ClusteringTree a
hierarchicalSpectralCluster !eigenGroup !numEigenMay !minSizeMay !minModMay !items !adjMat =
if H.rows adjMat > 1
&& hasMultipleClusters clusters
&& ngMod > minMod
&& H.rows left >= minSize
&& H.rows right >= minSize
then
Node { rootLabel = vertex
, subForest =
[ hierarchicalSpectralCluster
eigenGroup
numEigenMay
minSizeMay
minModMay
(subsetVector items leftIdxs)
left
, hierarchicalSpectralCluster
eigenGroup
numEigenMay
minSizeMay
minModMay
(subsetVector items rightIdxs)
right
]
}
else
Node {rootLabel = vertex, subForest = []}
where
minMod = fromMaybe (Q 0) minModMay
minSize = fromMaybe 1 minSizeMay
numEigen = fromMaybe 1 numEigenMay
vertex = ClusteringVertex { _clusteringItems = items
, _ngMod = ngMod
}
clusters = spectralClustering eigenGroup adjMat
spectralClustering :: EigenGroup -> AdjacencyMatrix -> H.Vector Double
spectralClustering SignGroup = spectralClusterNorm
spectralClustering KMeansGroup = spectralClusterKNorm numEigen 2
ngMod = getModularity clusters $ adjMat
getIdxs val = VS.ifoldr' (\ !i !v !acc -> bool acc (i:acc) $ v == val) []
leftIdxs = getIdxs 0 $ clusters
rightIdxs = getIdxs 1 $ clusters
left = adjMat H.?? (H.Pos (H.idxs leftIdxs), H.Pos (H.idxs leftIdxs))
right =
adjMat H.?? (H.Pos (H.idxs rightIdxs), H.Pos (H.idxs rightIdxs))