{- Math.Clustering.Spectral.Eigen.AdjacencyMatrix
Gregory W. Schwartz

Collects the functions pertaining to spectral clustering.
-}

{-# LANGUAGE BangPatterns #-}

module Math.Clustering.Spectral.Eigen.AdjacencyMatrix
    ( spectralClusterKNorm
    , spectralClusterNorm
    , spectralNorm
    , getDegreeMatrix
    , secondLeft
    , AdjacencyMatrix (..)
    , LabelVector (..)
    ) where

-- Remote
import Control.Monad (replicateM)
import Data.Bool (bool)
import Data.Function (on)
import Data.List (sortBy, maximumBy, transpose)
import Data.Maybe (fromMaybe)
import Safe (headMay)
import System.Random.MWC (createSystemRandom, uniform)
import qualified AI.Clustering.KMeans as K
import qualified Data.Eigen.SparseMatrix as S
import qualified Data.Map.Strict as Map
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as U
import qualified Numeric.LinearAlgebra as H
import qualified Numeric.LinearAlgebra.Devel as H
import qualified Numeric.LinearAlgebra.SVD.SVDLIBC as SVD
import qualified Statistics.Quantile as Stat

-- Local

type LabelVector     = S.SparseMatrixXd
type AdjacencyMatrix = S.SparseMatrixXd

-- | Assign close to 0 as 0.
epsilonZero :: Double -> Double
epsilonZero x = if abs x < 1e-12 then 0 else x

-- | Returns the clustering of the eigenvectors with the second smallest
-- eigenvalues of the symmetric normalized Laplacian L. Computes real symmetric
-- part of L, so ensure the input is real and symmetric. Diagonal should be 0s
-- for adjacency matrix. Clusters the eigenvector using kmeans into k groups.
spectralClusterKNorm :: Int -> Int -> AdjacencyMatrix -> LabelVector
spectralClusterKNorm e k mat
  | S.rows mat < 1  = S.fromDenseList [[]]
  | S.rows mat == 1 = S.fromDenseList [[0]]
  | otherwise       = kmeansVec k . spectralNorm 2 e $ mat

-- | Returns the clustering of the eigenvectors with the second smallest
-- eigenvalues of the symmetric normalized Laplacian L. Computes real symmetric
-- part of L, so ensure the input is real and symmetric. Diagonal should be 0s
-- for adjacency matrix. Clusters the eigenvector by sign.
spectralClusterNorm :: AdjacencyMatrix -> LabelVector
spectralClusterNorm mat
  | S.rows mat < 1  = S.fromDenseList [[]]
  | S.rows mat == 1 = S.fromDenseList [[0]]
  | otherwise       = S.fromDenseList
                    . (fmap . fmap) (bool 0 1 . (>= 0))
                    . S.toDenseList
                    . spectralNorm 2 1
                    $ mat

-- | Returns the eigenvector with the second smallest eigenvalue (or N start)
-- and E on of the symmetric normalized Laplacian L. Computes real symmetric
-- part of L, so ensure the input is real and symmetric. Diagonal should be 0s
-- for adjacency matrix. Uses I + Lnorm instead of I - Lnorm to find second
-- largest singular value instead of second smallest for Lnorm.
spectralNorm :: Int -> Int -> AdjacencyMatrix -> S.SparseMatrixXd
spectralNorm n e mat
    | e < 1 = error "Less than 1 eigenvector chosen for clustering."
    | n < 1 = error "N < 1, cannot go before first eigenvector."
    | otherwise = S._map epsilonZero $ secondLeft n e lNorm
  where
    lNorm    = i + (S.transpose invRootD * (mat * invRootD))
    invRootD = S.diagRow 0
             . S._map (\x -> if x == 0 then x else x ** (- 1 / 2))
             . getDegreeVector
             $ mat
    i        = S.ident . S.rows $ mat

-- | Second largest eigenvector. Unused, untested, don't use.
secondLargest :: S.SparseMatrixXd -> IO S.SparseMatrixXd
secondLargest a = do
    (first, firstVal) <- powerIt a

    let firstScale = S.scale firstVal (first * S.transpose first)
        b          = a - firstScale

    fmap fst $ powerIt b

-- | Rayleigh quotient. Takes a matrix and an eigenvector to find the
-- corresponding eigenvalue. Unused, untested, don't use.
rayQuot :: S.SparseMatrixXd -> S.SparseMatrixXd -> Double
rayQuot a x = ((S.transpose (a * x) * x) S.! (0, 0))
            / ((S.transpose x * x) S.! (0, 0))

-- | Power iteration. Unused, untested, don't use.
powerIt :: S.SparseMatrixXd -> IO (S.SparseMatrixXd, Double)
powerIt a = do
    g     <- createSystemRandom
    start <-
        fmap (S.fromDenseList . fmap (: [])) . replicateM (S.rows a) . uniform $ g
    let go
            :: Int
            -> Double
            -> S.SparseMatrixXd -- Eigenvector guess.
            -> Double -- Eigenvalue guess.
            -> (S.SparseMatrixXd, Double)
        go !i !e !b !lambda =
            if (abs (lambda' - lambda) < e) || (i < 0)
                then (b', lambda')
                else go (i - 1) e b' lambda'
          where
            absMat :: S.SparseMatrixXd -> S.SparseMatrixXd
            absMat = S._map abs
            b' :: S.SparseMatrixXd
            b' = S.scale (1 / maxElement ab) ab
            ab :: S.SparseMatrixXd
            ab = a * b
            maxElement = maximum . fmap (\(_, _, !x) -> abs x) . S.toList
            lambda' = S.norm ab / S.norm b
    return $ go 1000 0.000001 start (1 / 0)

-- | Executes kmeans to cluster a one dimensional vector.
kmeansVec :: Int -> S.SparseMatrixXd -> LabelVector
kmeansVec k = consensusKmeans 100
            . V.fromList
            . fmap U.fromList
            . concatMap S.toDenseList
            . S.getRows
            . S.fromCols
            . fmap normNormalize
            . S.getCols

-- | Consensus kmeans.
consensusKmeans :: Int -> V.Vector (U.Vector Double) -> LabelVector
consensusKmeans x vs = S.fromDenseList
                     . fmap ((:[]) . fromIntegral . mostCommon)
                     . transpose
                     . fmap kmeansFunc
                     $ [1 .. fromIntegral x]
  where
    kmeansFunc run =
      (\xs -> if headMay xs == Just 1 then fmap (bool 0 1 . (== 0)) xs else xs)
        . U.toList
        . K.membership
        . K.kmeansBy 2 vs id
        $ K.defaultKMeansOpts
            { K.kmeansMethod = K.Forgy
            , K.kmeansClusters = False
            , K.kmeansSeed = U.fromList [run]
            }

-- | Get the most common element of a list.
mostCommon :: (Ord a) => [a] -> a
mostCommon [] = error "Cannot find most common element of empty list."
mostCommon [x] = x
mostCommon xs = fst
               . maximumBy (compare `on` snd)
               . Map.toAscList
               . Map.fromListWith (+)
               . flip zip [1,1..]
               $ xs

-- | Normalize by the norm of a vector.
normNormalize :: S.SparseMatrixXd -> S.SparseMatrixXd
normNormalize xs = S._map (/ norm) xs
  where
    norm = S.norm xs

-- | Obtain the second largest value singular vector (or Nth) and E on of a
-- sparse matrix.
secondLeft :: Int -> Int -> S.SparseMatrixXd -> S.SparseMatrixXd
secondLeft n e m = S.transpose
               . S.fromDenseList
               . fmap H.toList
               . drop (n - 1)
               . H.toRows
               . (\(!x, _, _) -> x)
               . SVD.sparseSvd (e + (n - 1))
               . H.mkCSR
               . fmap (\(!i, !j, !x) -> ((i, j), x))
               . S.toList
               $ m

-- | Obtain the second largest value singular vector of a sparse matrix.
denseSecondLeft :: S.SparseMatrixXd -> S.SparseMatrixXd
denseSecondLeft m = S.fromDenseList
                  . fmap (:[])
                  . H.toList
                  . (!! 2)
                  . H.toColumns
                  . (\(!x, _, _) -> x)
                  . H.svd
                  . H.assoc (S.rows m, S.cols m) 0
                  . fmap (\(!i, !j, !x) -> ((i, j), x))
                  . S.toList
                  $ m

-- | Obtain the signed degree matrix. Faster for columns.
getDegreeMatrix :: AdjacencyMatrix -> S.SparseMatrixXd
getDegreeMatrix = S.diagRow 0 . getDegreeVector

-- | Obtain the signed degree vector. Faster for columns.
getDegreeVector :: AdjacencyMatrix -> S.SparseMatrixXd
getDegreeVector = S.getColSums . S._map abs