{-# LANGUAGE BangPatterns #-}
module Math.Clustering.Spectral.Eigen.FeatureMatrix
( B (..)
, B1 (..)
, B2 (..)
, LabelVector (..)
, spectral
, spectralCluster
, spectralClusterK
, getB
, b1ToB2
, getSimilarityFromB2
) where
import Data.Bool (bool)
import Data.Function (on)
import Data.List (sortBy, maximumBy, transpose)
import Data.Maybe (fromMaybe)
import Safe (headMay)
import qualified AI.Clustering.KMeans as K
import qualified Data.Eigen.SparseMatrix as S
import qualified Data.Map.Strict as Map
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Unboxed as U
import qualified Numeric.LinearAlgebra as H
import qualified Numeric.LinearAlgebra.Devel as H
import qualified Numeric.LinearAlgebra.SVD.SVDLIBC as SVD
type LabelVector = S.SparseMatrixXd
newtype B1 = B1 { unB1 :: S.SparseMatrixXd } deriving (Show)
newtype B2 = B2 { unB2 :: S.SparseMatrixXd } deriving (Show)
newtype D = D { unD :: S.SparseMatrixXd } deriving (Show)
newtype C = C { unC :: S.SparseMatrixXd } deriving (Show)
newtype B = B { unB :: S.SparseMatrixXd } deriving (Show)
epsilonZero :: Double -> Double
epsilonZero x = if abs x < 1e-12 then 0 else x
b1ToB2 :: B1 -> B2
b1ToB2 (B1 b1) =
B2
. S._imap (\ i j x
-> (log (fromIntegral n / (fromMaybe 0 $ dVec VS.!? j))) * x
)
$ b1
where
dVec :: VS.Vector Double
dVec = maybe (error "Cannot get number of non-zeros.") (VS.map fromIntegral)
. S.innerNNZs
. S.uncompress
$ b1
n = S.rows b1
b2ToB :: B2 -> B
b2ToB (B2 b2) =
B
. S._imap (\ i j x
-> x / (fromMaybe (error "Norm is 0.") $ eVec VS.!? i)
)
$ b2
where
eVec :: VS.Vector Double
eVec = VS.fromList . fmap S.norm . S.getRows $ b2
bToD :: B -> D
bToD (B b) = D . S.diagCol 0 $ (S._map abs b) * ((S._map abs $ S.transpose b) * S.ones n)
where
n = S.rows b
bdToC :: B -> D -> C
bdToC (B b) (D d) = C $ (S._map (\x -> x ** (- 1 / 2)) d) * b
secondLeft :: Int -> Int -> S.SparseMatrixXd -> S.SparseMatrixXd
secondLeft n e m = S.transpose
. S.fromDenseList
. fmap H.toList
. drop (n - 1)
. H.toRows
. (\(!x, _, _) -> x)
. SVD.sparseSvd (e + (n - 1))
. H.mkCSR
. fmap (\(!i, !j, !x) -> ((i, j), x))
. S.toList
$ m
getB :: Bool -> S.SparseMatrixXd -> B
getB True = b2ToB . b1ToB2 . B1
getB False = b2ToB . B2
spectral :: Int -> Int -> B -> S.SparseMatrixXd
spectral n e b = S._map epsilonZero . secondLeft n e . unC . bdToC b . bToD $ b
spectralCluster :: B -> LabelVector
spectralCluster (B b)
| S.rows b < 1 = S.fromDenseList [[]]
| S.rows b == 1 = S.fromDenseList [[0]]
| otherwise = S.fromDenseList
. (fmap . fmap) (bool 0 1 . (>= 0))
. S.toDenseList
. spectral 2 1
$ B b
spectralClusterK :: Int -> Int -> B -> LabelVector
spectralClusterK e k (B b)
| S.rows b < 1 = S.fromDenseList [[]]
| S.rows b == 1 = S.fromDenseList [[0]]
| otherwise = consensusKmeans 100
. V.fromList
. fmap U.fromList
. concatMap S.toDenseList
. S.getRows
. S.fromCols
. fmap normNormalize
. S.getCols
. spectral 2 e
$ B b
consensusKmeans :: Int -> V.Vector (U.Vector Double) -> LabelVector
consensusKmeans x vs = S.fromDenseList
. fmap ((:[]) . fromIntegral . mostCommon)
. transpose
. fmap kmeansFunc
$ [1 .. fromIntegral x]
where
kmeansFunc run =
(\xs -> if headMay xs == Just 1 then fmap (bool 0 1 . (== 0)) xs else xs)
. U.toList
. K.membership
. K.kmeansBy 2 vs id
$ K.defaultKMeansOpts
{ K.kmeansMethod = K.Forgy
, K.kmeansClusters = False
, K.kmeansSeed = U.fromList [run]
}
mostCommon :: (Ord a) => [a] -> a
mostCommon [] = error "Cannot find most common element of empty list."
mostCommon [x] = x
mostCommon xs = fst
. maximumBy (compare `on` snd)
. Map.toAscList
. Map.fromListWith (+)
. flip zip [1,1..]
$ xs
normNormalize :: S.SparseMatrixXd -> S.SparseMatrixXd
normNormalize xs = S._map (/ norm) xs
where
norm = S.norm xs
getSimilarityFromB2 :: B2 -> Int -> Int -> Double
getSimilarityFromB2 (B2 b2) i j =
(((S.getRow i b2) * (S.transpose $ S.getRow j b2)) S.! (0, 0))
/ (S.norm (S.getRow i b2) * S.norm (S.getRow j b2))