module Numeric.Statistics (
Sample,Samples
, covarianceMatrix, correlationCoefficientMatrix
, meanList, meanArray, meanMatrix
, varianceList, varianceArray, varianceMatrix
, centre, cloglog, corcoeff, cut
, ranks, kendall, logit
, mahalanobis
, mode, moment
, ols, percentile, range
, run_count
, spearman, studentize
) where
import Numeric.LinearAlgebra
import qualified Data.Array.IArray as I
import qualified Data.List as DL
import qualified Data.Vector.Generic as GV
import Foreign.Storable
import Numeric.GSL.Statistics
import Numeric.GSL.Sort(sort)
type Sample a = Vector a
type Samples a = I.Array Int (Vector a)
covarianceMatrix :: Samples Double
-> Matrix Double
covarianceMatrix d = let (s,f) = I.bounds d
in fromArray2D $ I.array ((s,s),(f,f)) $ concat $ map (\(x,y) -> let c = covariance (d I.! x) (d I.! y) in if x == y then [((x,y),c)] else [((x,y),c),((y,x),c)]) $ filter (\(x,y) -> x <= y) $ I.range ((s,s),(f,f))
correlationCoefficientMatrix :: Samples Double -> Matrix Double
correlationCoefficientMatrix d = let (s,f) = I.bounds d
in fromArray2D $ I.array ((s,s),(f,f)) $ concat $ map (\(x,y) -> let { x' = d I.! x ; y' = d I.! y ; c = (covariance x' y') / ((stddev x') * (stddev y')) } in if x == y then [((x,y),c)] else [((x,y),c),((y,x),c)]) $ filter (\(x,y) -> x <= y) $ I.range ((s,s),(f,f))
meanList :: (Container Vector a, Num (Vector a)) => [Sample a] -> Sample a
meanList [] = error "meanVectors: empty list"
meanList [s] = s
meanList (s:ss) = let ln = fromIntegral $ length ss + 1
in scale (recip ln) $ foldl (+) s ss
meanArray :: (Container Vector a, Num (Vector a)) => Samples a -> Sample a
meanArray a = meanList $ I.elems a
meanMatrix :: (Container Vector a, Num (Vector a), Element a) => Matrix a -> Sample a
meanMatrix a = meanList $ toRows a
varianceList :: (Container Vector a, Floating (Vector a)) => [Sample a] -> Sample a
varianceList [] = error "varianceList: empty list"
varianceList [s] = constant 0 (dim s)
varianceList l = let mxs = meanList (map (** 2) l)
msx = (meanList l) ** 2
in mxs msx
varianceArray :: (Container Vector a, Floating (Vector a)) => Samples a -> Sample a
varianceArray a = varianceList $ I.elems a
varianceMatrix :: (Container Vector a, Floating (Vector a), Element a) => Matrix a -> Sample a
varianceMatrix a = varianceList $ toRows a
centre :: Vector Double -> Vector Double
centre v = v (realToFrac (mean v))
cloglog :: Floating a => a -> a
cloglog v = log ( (log v))
corcoeff :: Vector Double -> Vector Double -> Double
corcoeff x y = (covariance x y)/((stddev x)*(stddev y))
cut :: Vector Double
-> Vector Double
-> Vector Int
cut v c = let c' = sort c
in mapVector (\x -> cut_helper 0 x c') v
where
cut_helper i x c
| i >= dim c = error "Numeric.Statistics: cut: data point not within interval"
| x >= (c @> i) && x <= (c @> (i+1)) = i
| otherwise = cut_helper (i + 1) x c
ranks :: (Fractional b, Storable b) => Vector Double -> Vector b
ranks v = let v' = sort v
in mapVector (\x -> 1 + rank_helper x v') v
where rank_helper x v' = let is = GV.elemIndices x v'
in (realToFrac (GV.foldl (+) 0 is)) / (fromIntegral $ dim is)
kendall :: Vector Double -> Vector Double -> Matrix Double
kendall x y = let ln = dim x
rx = ranks x
ry = ranks y
r = fromColumns [rx,ry]
m = signum $ (kronecker r (asColumn $ constant 1.0 ln)) (kronecker (asRow $ constant 1.0 ln) r)
c = rows m 1
in correlationCoefficientMatrix $ I.listArray (0,c) (toColumns m)
logit :: (Floating b, Storable b)
=> Vector b -> Vector b
logit v = mapVector (\x -> (log ((1 / x) 1))) v
mahalanobis :: Samples Double
-> Maybe (Sample Double)
-> Double
mahalanobis x u = let (_,xr) = I.bounds x
xl = I.elems x
s' = pinv $ covarianceMatrix x
xu = case u of
Nothing -> fromList $ map mean xl
Just m -> m
xm = fromRows $ map (() xu) $ toRows $ fromColumns xl
in ((xm <> s' <> (trans xm)) @@> (0,0))
mode :: Vector Double -> [(Double,Integer)]
mode v = let w = sort v
in DL.sortBy (\(_,n) (_,n') -> compare n' n) $ foldVector freqs [] w
where freqs x [] = [(x,1)]
freqs x ((f,n):fns)
| f == x = ((f,n+1):fns)
| otherwise = ((x,1):(f,n):fns)
moment :: Integral a
=> a
-> Bool
-> Bool
-> Vector Double
-> Double
moment p c a v
| p <= 0 = error "Numeric.Statistics.moment: negative moment requested"
| otherwise = let u = if c then centre v else v
w = if a then abs u else u
x = mapVector (** (fromIntegral p)) w
in mean x
ols :: (Num (Vector t), Field t)
=> Matrix t
-> Matrix t
-> (Matrix t, Matrix t, Matrix t)
ols x y
| rows x /= rows y = error "Numeric.Statistics: ols: incorrect matrix dimensions"
| otherwise = let (xr,xc) = (rows x,cols x)
(yr,yc) = (rows y,cols y)
z = (trans x) <> x
r = rank z
beta = if r == xc
then (inv z) <> (trans x) <> y
else (pinv x) <> y
rr = y x <> beta
sigma = ((trans rr) <> rr) / (fromIntegral $ xr r)
in (beta,rr,sigma)
percentile :: Double
-> Vector Double
-> Double
percentile p d = quantile (0.01*p) d
range :: Container c e => c e -> e
range v = maxElement v minElement v
run_count :: (Num a, Num t, Ord b, Ord a, Storable b)
=> a
-> Vector b
-> [(a, t)]
run_count n v = let w = subVector 1 (dim v 1) v
x = foldVector run_count' [(1,v @> 0)] w
y = map fst x
z = takeWhile (<= n) $ DL.sort y
in foldr count [] z
where run_count' m ((c,g):cs)
| m < g = ((c+1,m):cs)
| otherwise = ((1,m):(c,g):cs)
count x [] = [(x,1)]
count x ((yv,yc):ys)
| x == yv = ((yv,yc+1):ys)
| otherwise = ((x,1):(yv,yc):ys)
spearman :: Vector Double -> Vector Double -> Double
spearman x y = corcoeff (ranks x) (ranks y)
studentize :: Vector Double -> Vector Double
studentize x = (centre x)/(fromList $ [stddev x])
--table