module Numeric.Statistics.PCA (
pca, pcaN, pcaTransform, pcaReduce, pcaReduceN
) where
import qualified Data.Array.IArray as I
import Data.List(sortBy)
import Data.Ord(comparing)
import Numeric.LinearAlgebra
import Numeric.GSL.Statistics
pca :: I.Array Int (Vector Double)
-> Double
-> (Vector Double, Matrix Double)
pca d q = let d' = fmap (\x -> x (scalar $ mean x)) d
d'' = fromColumns $ I.elems d'
(_,vec',uni') = svd d''
vec = toList vec'
uni = toColumns uni'
v' = zip vec uni
v = filter (\(x,_) -> x > q) v'
(eigs,vs) = unzip v
in (fromList eigs,fromColumns vs)
pcaN :: I.Array Int (Vector Double)
-> Int
-> (Vector Double, Matrix Double)
pcaN d n = let d' = fmap (\x -> x (scalar $ mean x)) d
d'' = fromColumns $ I.elems d'
(_,vec',uni') = svd d''
vec = toList vec'
uni = toColumns uni'
v' = zip vec uni
v = take n $ reverse $ sortBy (comparing fst) v'
(eigs,vs) = unzip v
in (fromList eigs,fromColumns vs)
v1 = fromList [1,2,3,4,5,6::Double]
v2 = fromList [2,3,4,5,6,7::Double]
v3 = fromList [3,4,5,6,7,8::Double]
a = fromColumns [v1,v2,v3]
b = I.listArray (1,3::Int) [v1,v2,v3] :: I.Array Int (Vector Double)
pcaTransform :: I.Array Int (Vector Double)
-> Matrix Double
-> I.Array Int (Vector Double)
pcaTransform d m = let d' = fmap (\x -> x (scalar $ mean x)) d
in I.listArray (1,cols m) $ toRows $ (tr' m) <> (fromRows $ I.elems d')
pcaReduce :: I.Array Int (Vector Double)
-> Double
-> I.Array Int (Vector Double)
pcaReduce d q = let u = fmap (scalar . mean) d
d' = zipWith () (I.elems d) (I.elems u)
d'' = fromColumns d'
(_,vec',uni') = svd d''
vec = toList vec'
uni = toColumns uni'
v' = zip vec uni
v = filter (\(x,_) -> x > q) v'
m = fromColumns $ snd $ unzip v
in I.listArray (I.bounds d) $ zipWith (+) (toRows $ m <> (tr' m) <> fromRows d') (I.elems u)
pcaReduceN :: I.Array Int (Vector Double)
-> Int
-> I.Array Int (Vector Double)
pcaReduceN d n = let u = fmap (scalar . mean) d
d' = zipWith () (I.elems d) (I.elems u)
d'' = fromColumns d'
(_,vec',uni') = svd d''
vec = toList vec'
uni = toColumns uni'
v' = zip vec uni
v = take n $ reverse $ sortBy (comparing fst) v'
m = fromColumns $ snd $ unzip v
in I.listArray (I.bounds d) $ zipWith (+) (toRows $ m <> (tr' m) <> fromRows d') (I.elems u)