module Numeric.Statistics.PCA (
pca, pcaN, pcaTransform, pcaReduce, pcaReduceN
) where
import qualified Data.Array.IArray as I
import Data.List(sortBy)
import Data.Ord(comparing)
import Numeric.LinearAlgebra
import Numeric.GSL.Statistics
import Numeric.Statistics
pca :: I.Array Int (Vector Double)
-> Double
-> Matrix Double
pca d q = let d' = fmap (\x -> x (scalar $ mean x)) d
cv = covarianceMatrix d'
(val',vec') = eigSH $ trustSym cv
val = toList val'
vec = toColumns vec'
v' = zip val vec
v = filter (\(x,_) -> x > q) v'
in fromColumns $ snd $ unzip v
pcaN :: I.Array Int (Vector Double)
-> Int
-> Matrix Double
pcaN d n = let d' = fmap (\x -> x (scalar $ mean x)) d
cv = covarianceMatrix d'
(val',vec') = eigSH $ trustSym cv
val = toList val'
vec = toColumns vec'
v' = zip val vec
v = take n $ reverse $ sortBy (comparing fst) v'
in fromColumns $ snd $ unzip v
pcaTransform :: I.Array Int (Vector Double)
-> Matrix Double
-> I.Array Int (Vector Double)
pcaTransform d m = let d' = fmap (\x -> x (scalar $ mean x)) d
in I.listArray (1,cols m) $ toRows $ (tr' m) <> (fromRows $ I.elems d')
pcaReduce :: I.Array Int (Vector Double)
-> Double
-> I.Array Int (Vector Double)
pcaReduce d q = let u = fmap (scalar . mean) d
d' = zipWith () (I.elems d) (I.elems u)
cv = covarianceMatrix $ I.listArray (I.bounds d) d'
(val',vec') = eigSH $ trustSym cv
val = toList val'
vec = toColumns vec'
v' = zip val vec
v = filter (\(x,_) -> x > q) v'
m = fromColumns $ snd $ unzip v
in I.listArray (I.bounds d) $ zipWith (+) (toRows $ m <> (tr' m) <> fromRows d') (I.elems u)
pcaReduceN :: I.Array Int (Vector Double)
-> Int
-> I.Array Int (Vector Double)
pcaReduceN d n = let u = fmap (scalar . mean) d
d' = zipWith () (I.elems d) (I.elems u)
cv = covarianceMatrix $ I.listArray (I.bounds d) d'
(val',vec') = eigSH $ trustSym cv
val = toList val'
vec = toColumns vec'
v' = zip val vec
v = take n $ reverse $ sortBy (comparing fst) v'
m = fromColumns $ snd $ unzip v
in I.listArray (I.bounds d) $ zipWith (+) (toRows $ m <> (tr' m) <> fromRows d') (I.elems u)