module MachineLearning.PCA
(
getDimReducer
, getDimReducer_rv
)
where
import Data.Maybe (fromMaybe)
import qualified Data.Vector.Storable as V
import qualified Numeric.LinearAlgebra as LA
import Numeric.LinearAlgebra ((<>), (??))
import qualified MachineLearning as ML
import MachineLearning.Types (R, Vector, Matrix)
covarianceMatrix :: Matrix -> Matrix
covarianceMatrix x = ((LA.tr x) <> x) / (fromIntegral $ LA.rows x)
pca :: Matrix -> (Matrix, Vector)
pca x = (u, s)
where sigma = covarianceMatrix x
(u, s, _) = LA.svd sigma
getDimReducer :: Matrix -> Int -> (Matrix -> Matrix, R, Matrix)
getDimReducer x k = (reducer, retainedVariance, reducer xNorm)
where muSigma = ML.meanStddev x
xNorm = ML.featureNormalization muSigma x
(u, s) = pca xNorm
u' = u ?? (LA.All, LA.Take k)
reducer xx = (ML.featureNormalization muSigma xx) <> u'
retainedVariance = (V.sum $ V.slice 0 k s) / (V.sum s)
getDimReducer_rv :: Matrix -> R -> (Matrix -> Matrix, Int, Matrix)
getDimReducer_rv x rv = (reducer, k, reducer xNorm)
where muSigma = ML.meanStddev x
xNorm = ML.featureNormalization muSigma x
(u, s) = pca xNorm
sum_s = V.sum s
variances = (V.scanl' (+) 0 s) / (LA.scalar sum_s)
k = fromMaybe ((V.length s) - 1) $ V.findIndex (\v -> v >= rv) variances
u' = u ?? (LA.All, LA.Take k)
reducer xx = (ML.featureNormalization muSigma xx) <> u'