----------------------------------------------------------------------------- -- Module : Math.Statistics -- Copyright : (c) 2007 SFTank -- License : BSD3 -- -- Maintainer : mbeddoe@<nospam>sftank.net -- Stability : experimental -- Portability : portable -- -- Description : -- A collection of commonly used statistical functions. ----------------------------------------------------------------------------- module Math.Statistics where import List -- Arithmetic mean mean :: (Floating a) => [a] -> a mean xs = sum xs / (fromIntegral . length) xs -- Harmonic mean hmean :: (Floating a) => [a] -> a hmean xs = fromIntegral (length xs) / (sum $ map (1/) xs) -- Geometric mean gmean :: (Floating a) => [a] -> a gmean xs = (foldr1 (*) xs)**(1 / fromIntegral (length xs)) -- Median median :: (Floating a, Ord a) => [a] -> a median x | odd n = head $ drop (n `div` 2) x' | even n = mean $ take 2 $ drop i x' where i = (length x' `div` 2) - 1 x' = sort x n = length x -- Modes -- Returns a sorted list of modes in descending order modes :: (Ord a) => [a] -> [(Int, a)] modes xs = sortOn (negate.fst) $ map (\x->(length x, head x)) $ (group.sort) xs where sortOn :: Ord b => (a -> b) -> [a] -> [a] sortOn f = sortBy (\x y -> compare (f x) (f y)) -- Range range :: (Num a, Ord a) => [a] -> a range xs = maximum xs - minimum xs -- Average deviation avgdev :: (Floating a) => [a] -> a avgdev xs = mean $ map (\x -> abs(x - m)) xs where m = mean xs -- Standard Deviation stddev :: (Floating a) => [a] -> a stddev xs = sqrt $ var xs -- Population variance pvar :: (Floating a) => [a] -> a pvar xs = mean $ map (\x -> (x - m)^2) xs where m = mean xs -- Sample variance var :: (Floating a) => [a] -> a var xs = (sum $ map (\x -> (x - m)^2) xs) / (fromIntegral (length xs)-1) where m = mean xs -- Interquartile range -- XXX: Add case that takes into account even vs odd length iqr xs = take (length xs - 2*q) $ drop q xs where q = ((length xs) + 1) `div` 4 -- Kurtosis kurtosis :: (Floating b) => [b] -> b kurtosis xs = sum (map (\x -> ((x - m) / (stddev xs))^4) xs) / n - 3 where m = mean xs n = fromIntegral $ length $ xs -- Skew skew :: (Floating a) => [a] -> a skew xs = mean $ (map (\x -> ((x - (mean xs)) / (stddev xs))^3) xs) -- Covariance cov :: (Floating a) => [a] -> [a] -> a cov xs ys = sum (zipWith (*) (map f1 xs) (map f2 ys)) / (n - 1) where n = fromIntegral $ length $ xs m1 = mean xs m2 = mean ys f1 = \x -> (x - m1) f2 = \x -> (x - m2) -- Covariance matrix covm :: (Floating a) => [[a]] -> [[a]] covm xs = split' (length xs) cs where cs = [ cov a b | a <- xs, b <- xs] split' n = unfoldr (\y -> if null y then Nothing else Just $ splitAt n y) -- Pearson's product-moment correlation coefficient corr :: (Floating a) => [a] -> [a] -> a corr x y = cov x y / (stddev x * stddev y)