{-# LANGUAGE BangPatterns #-}
module Math.Clustering.Spectral.Sparse
( B (..)
, B1 (..)
, B2 (..)
, AdjacencyMatrix (..)
, LabelVector (..)
, spectral
, spectralCluster
, spectralClusterK
, spectralNorm
, spectralClusterNorm
, spectralClusterKNorm
, getB
, b1ToB2
, getSimilarityFromB2
) where
import Data.Bool (bool)
import Data.Maybe (fromMaybe)
import Data.Function (on)
import Data.List (sortBy, foldl1', maximumBy, transpose)
import Safe (headMay)
import qualified AI.Clustering.KMeans as K
import qualified Data.Map.Strict as Map
import qualified Data.Sparse.Common as S
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as U
import qualified Numeric.LinearAlgebra as H
import qualified Numeric.LinearAlgebra.Devel as H
import qualified Numeric.LinearAlgebra.SVD.SVDLIBC as SVD
type LabelVector = S.SpVector Double
type AdjacencyMatrix = S.SpMatrix Double
newtype B1 = B1 { unB1 :: S.SpMatrix Double } deriving (Show)
newtype B2 = B2 { unB2 :: S.SpMatrix Double } deriving (Show)
newtype D = D { unD :: S.SpMatrix Double } deriving (Show)
newtype C = C { unC :: S.SpMatrix Double } deriving (Show)
newtype B = B { unB :: S.SpMatrix Double } deriving (Show)
b1ToB2 :: B1 -> B2
b1ToB2 (B1 b1) =
B2
. S.fromListSM (n, m)
. fmap (\ (!i, !j, !x)
-> (i, j, (log (fromIntegral n / (S.lookupDenseSV j dVec))) * x)
)
. S.toListSM
$ b1
where
dVec :: S.SpVector Double
dVec = S.vr
. fmap (sum . fmap (\x -> if x > 0 then 1 else 0))
. S.toRowsL
. S.transposeSM
$ b1
n = S.nrows b1
m = S.ncols b1
b2ToB :: B2 -> B
b2ToB (B2 b2) =
B
. S.fromListSM (n, m)
. fmap (\(!i, !j, !x) -> (i, j, x / (S.lookupDenseSV i eVec)))
. S.toListSM
$ b2
where
eVec :: S.SpVector Double
eVec = S.vr . fmap S.norm2 . S.toRowsL $ b2
n = S.nrows b2
m = S.ncols b2
norm2 :: S.SpVector Double -> Double
norm2 = sqrt . sum . fmap (** 2)
bToD :: B -> D
bToD (B b) = D
. S.diagonalSM
. flip S.extractCol 0
$ (fmap abs b)
S.#~# ((fmap abs $ S.transposeSM b) S.#~# (S.fromColsL [S.onesSV n]))
where
n = S.nrows b
bdToC :: B -> D -> C
bdToC (B b) (D d) = C $ (fmap (\x -> x ** (- 1 / 2)) d) S.#~# b
secondLeft :: Int -> Int -> S.SpMatrix Double -> [S.SpVector Double]
secondLeft n e m =
fmap (S.sparsifySV . S.fromListDenseSV e . drop (n - 1) . H.toList)
. H.toColumns
. (\(!x, _, _) -> x)
. SVD.sparseSvd' (SVD.defaultSVDParams {SVD.maxIters = Just 1000}) (e + (n - 1))
. H.mkCSR
. fmap (\(!i, !j, !x) -> ((i, j), x))
. S.toListSM
$ m
getB :: Bool -> S.SpMatrix Double -> B
getB True = b2ToB . b1ToB2 . B1
getB False = b2ToB . B2
spectral :: Int -> Int -> B -> [S.SpVector Double]
spectral n e b
| e < 1 = error "Less than 1 eigenvector chosen for clustering."
| n < 1 = error "N < 1, cannot go before first eigenvector."
| otherwise =
fmap S.sparsifySV . secondLeft n e . unC . bdToC b . bToD $ b
spectralCluster :: B -> LabelVector
spectralCluster (B b)
| S.nrows b < 1 = S.zeroSV 0
| S.nrows b == 1 = S.zeroSV 1
| otherwise = S.sparsifySV
. S.vr
. fmap (bool 0 1 . (>= 0))
. S.toDenseListSV
. foldl1' S.concatSV
. spectral 2 1
$ B b
spectralClusterK :: Int -> Int -> B -> LabelVector
spectralClusterK e k (B b)
| S.nrows b < 1 = S.zeroSV 0
| S.nrows b == 1 = S.zeroSV 1
| otherwise = kmeansVec k . spectral 2 e $ B b
kmeansVec :: Int -> [S.SpVector Double] -> LabelVector
kmeansVec k = consensusKmeans 100
. V.fromList
. fmap (U.fromList . S.toDenseListSV)
. S.toRowsL
. S.fromColsL
. fmap S.normalize2
. S.toColsL
. S.transpose
. S.fromColsL
consensusKmeans :: Int -> V.Vector (U.Vector Double) -> LabelVector
consensusKmeans x vs = S.sparsifySV
. S.vr
. fmap (fromIntegral . mostCommon)
. transpose
. fmap kmeansFunc
$ [1 .. fromIntegral x]
where
kmeansFunc run =
(\xs -> if headMay xs == Just 1 then fmap (bool 0 1 . (== 0)) xs else xs)
. U.toList
. K.membership
. K.kmeansBy 2 vs id
$ K.defaultKMeansOpts
{ K.kmeansMethod = K.Forgy
, K.kmeansClusters = False
, K.kmeansSeed = U.fromList [run]
}
mostCommon :: (Ord a) => [a] -> a
mostCommon [] = error "Cannot find most common element of empty list."
mostCommon [x] = x
mostCommon xs = fst
. maximumBy (compare `on` snd)
. Map.toAscList
. Map.fromListWith (+)
. flip zip [1,1..]
$ xs
getSimilarityFromB2 :: B2 -> Int -> Int -> Double
getSimilarityFromB2 (B2 b2) i j =
S.dot (S.extractRow b2 i) (S.extractRow b2 j)
/ (S.norm2 (S.extractRow b2 i) * S.norm2 (S.extractRow b2 j))
spectralNorm :: Int -> Int -> AdjacencyMatrix -> [S.SpVector Double]
spectralNorm n e mat = fmap S.sparsifySV $ secondLeft n e lNorm
where
lNorm = i S.^+^ (S.transpose invRootD S.#~# (mat S.#~# invRootD))
invRootD = S.diagonalSM
. S.vr
. fmap ((\x -> if x == 0 then x else x ** (- 1 / 2)) . sum)
. S.toRowsL
. fmap abs
$ mat
i = S.eye . S.nrows $ mat
spectralClusterKNorm :: Int -> Int -> AdjacencyMatrix -> LabelVector
spectralClusterKNorm e k = kmeansVec k . spectralNorm 2 e
spectralClusterNorm :: AdjacencyMatrix -> LabelVector
spectralClusterNorm = S.sparsifySV
. S.vr
. fmap (bool 0 1 . (>= 0))
. S.toDenseListSV
. foldl1' S.concatSV
. spectralNorm 2 1