{-# LANGUAGE BangPatterns #-}
module Math.Clustering.Hierarchical.Spectral.Utility
( subsetVector
, permutationTest
, permutationTestSparse
) where
import Data.List (genericLength)
import Data.Maybe (fromMaybe)
import qualified Data.Foldable as F
import qualified Data.Sparse.Common as S
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Generic as G
import qualified System.Random.MWC as R
import qualified System.Random.MWC.Distributions as R
subsetVector :: V.Vector a -> [Int] -> V.Vector a
subsetVector xs =
V.fromList
. F.foldr' (\ !i !acc
-> ( fromMaybe (error "Out of bounds in subsetVector (do the number of observations and features match the size of the matrix? Is the input format correct?).")
$ xs V.!? i
) : acc
) []
permutationTest :: (G.Vector v a)
=> (v a -> b -> Double)
-> Int
-> v a
-> b
-> IO Double
permutationTest f runs labs samples = do
let obs = f labs samples
run r = do
g <- R.initialize . V.singleton $ fromIntegral r
ls <- R.uniformShuffle labs g
return $ f ls samples
n <- genericLength . filter (>= obs) <$> mapM run [1..runs]
return $ n / fromIntegral runs
permutationTestSparse :: (S.SpVector Double -> b -> Double)
-> Int
-> S.SpVector Double
-> b
-> IO Double
permutationTestSparse f runs labsInit samples = do
let obs = f labsInit samples
labs = U.fromList $ S.toDenseListSV labsInit
run r = do
g <- R.initialize . V.singleton $ fromIntegral r
ls <- S.vr . U.toList <$> R.uniformShuffle labs g
return $ f ls samples
n <- genericLength . filter (>= obs) <$> mapM run [1..runs]
return $ n / fromIntegral runs