{-# LANGUAGE BangPatterns #-}
module Math.Clustering.Hierarchical.Spectral.Sparse
( hierarchicalSpectralCluster
, hierarchicalSpectralClusterAdj
, FeatureMatrix (..)
, B (..)
, Items (..)
, ShowB (..)
) where
import Data.Bool (bool)
import Data.Clustering.Hierarchical (Dendrogram (..))
import Data.Maybe (fromMaybe)
import Data.Tree (Tree (..))
import Math.Clustering.Spectral.Sparse (B (..), AdjacencyMatrix (..), getB, spectralCluster, spectralClusterK, spectralClusterNorm, spectralClusterKNorm)
import Math.Modularity.Sparse (getBModularity, getModularity)
import Math.Modularity.Types (Q (..))
import qualified Data.Foldable as F
import qualified Data.Set as Set
import qualified Data.Sparse.Common as S
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Unboxed as U
import qualified Numeric.LinearAlgebra.Sparse as S
import Math.Clustering.Hierarchical.Spectral.Types
import Math.Clustering.Hierarchical.Spectral.Utility
type FeatureMatrix = S.SpMatrix Double
type Items a = V.Vector a
type ShowB = ((Int, Int), [(Int, Int, Double)])
type NormalizeFlag = Bool
hasMultipleClusters :: S.SpVector Double -> Bool
hasMultipleClusters = (> 1) . Set.size . Set.fromList . S.toDenseListSV
hierarchicalSpectralCluster :: EigenGroup
-> NormalizeFlag
-> Maybe NumEigen
-> Maybe Int
-> Maybe Q
-> Maybe Runs
-> Items a
-> Either FeatureMatrix B
-> IO (ClusteringTree a)
hierarchicalSpectralCluster eigenGroup normFlag numEigenMay minSizeMay minModMay runsMay initItems initMat =
go initItems initB
where
initB = either (getB normFlag) id $ initMat
minMod = fromMaybe (Q 0) minModMay
minSize = fromMaybe 1 minSizeMay
numEigen = fromMaybe 1 numEigenMay
go :: Items a -> B -> IO (ClusteringTree a)
go !items !b =
if (S.nrows $ unB b) > 1
&& hasMultipleClusters clusters
&& ngMod > minMod
&& S.nrows (unB left) >= minSize
&& S.nrows (unB right) >= minSize
then do
goLeft <- go (subsetVector items leftIdxs) left
goRight <- go (subsetVector items rightIdxs) right
vertex <- getVertex
return $ Node { rootLabel = vertex
, subForest = [goLeft, goRight]
}
else do
vertex <- getVertex
return $ Node {rootLabel = vertex, subForest = []}
where
getVertex = do
pVal <- mapM (\ x -> permutationTestSparse
(\l m -> unQ $ getBModularity l m)
x
clusters
b
)
runsMay
return $ ClusteringVertex
{ _clusteringItems = items
, _ngMod = ngMod
, _pVal = pVal
}
clusters :: S.SpVector Double
clusters = spectralClustering eigenGroup b
spectralClustering :: EigenGroup -> B -> S.SpVector Double
spectralClustering SignGroup = spectralCluster
spectralClustering KMeansGroup = spectralClusterK numEigen 2
ngMod :: Q
ngMod = getBModularity clusters $ b
getSortedIdxs :: Double -> S.SpVector Double -> [Int]
getSortedIdxs val = VS.ifoldr' (\ !i !v !acc -> bool acc (i:acc) $ v == val) []
. VS.fromList
. S.toDenseListSV
leftIdxs :: [Int]
leftIdxs = getSortedIdxs 0 $ clusters
rightIdxs :: [Int]
rightIdxs = getSortedIdxs 1 $ clusters
left :: B
left = B $ extractRows (unB b) leftIdxs
right :: B
right = B $ extractRows (unB b) rightIdxs
extractRows :: S.SpMatrix Double -> [Int] -> S.SpMatrix Double
extractRows mat [] = S.zeroSM 0 0
extractRows mat xs =
S.transposeSM . S.fromColsL . fmap (S.extractRow mat) $ xs
hierarchicalSpectralClusterAdj :: EigenGroup
-> Maybe NumEigen
-> Maybe Int
-> Maybe Q
-> Maybe Runs
-> Items a
-> AdjacencyMatrix
-> IO (ClusteringTree a)
hierarchicalSpectralClusterAdj eigenGroup numEigenMay minSizeMay minModMay runsMay initItems initMat =
go initItems initMat
where
minMod = fromMaybe (Q 0) minModMay
minSize = fromMaybe 1 minSizeMay
numEigen = fromMaybe 1 numEigenMay
go :: Items a -> AdjacencyMatrix -> IO (ClusteringTree a)
go !items !mat =
if S.nrows mat > 1
&& hasMultipleClusters clusters
&& ngMod > minMod
&& S.nrows left >= minSize
&& S.nrows right >= minSize
then do
goLeft <- go (subsetVector items leftIdxs) left
goRight <- go (subsetVector items rightIdxs) right
vertex <- getVertex
return $ Node { rootLabel = vertex
, subForest = [ goLeft, goRight]
}
else do
vertex <- getVertex
return $ Node {rootLabel = vertex, subForest = []}
where
getVertex = do
pVal <- mapM (\ x -> permutationTestSparse
(\l m -> unQ $ getModularity l m)
x
clusters
mat
)
runsMay
return $ ClusteringVertex
{ _clusteringItems = items
, _ngMod = ngMod
, _pVal = pVal
}
clusters :: S.SpVector Double
clusters = spectralClustering eigenGroup mat
spectralClustering :: EigenGroup -> AdjacencyMatrix -> S.SpVector Double
spectralClustering SignGroup = spectralClusterNorm
spectralClustering KMeansGroup = spectralClusterKNorm numEigen 2
ngMod :: Q
ngMod = getModularity clusters mat
getSortedIdxs :: Double -> S.SpVector Double -> [Int]
getSortedIdxs val = VS.ifoldr' (\ !i !v !acc -> bool acc (i:acc) $ v == val) []
. VS.fromList
. S.toDenseListSV
leftIdxs :: [Int]
leftIdxs = getSortedIdxs 0 clusters
rightIdxs :: [Int]
rightIdxs = getSortedIdxs 1 clusters
left :: AdjacencyMatrix
left = getSubMat mat leftIdxs
right :: AdjacencyMatrix
right = getSubMat mat rightIdxs
getSubMat :: S.SpMatrix Double -> [Int] -> S.SpMatrix Double
getSubMat mat [] = S.zeroSM 0 0
getSubMat mat is = S.fromColsL
. (\x -> fmap (S.extractCol x) is)
. S.transposeSM
. S.fromColsL
. fmap (S.extractRow mat)
$ is