--------------------------------------------------------------------------- -- | Module : Math.Statistics.Fusion -- Copyright : (c) 2008 Don Stewart -- License : BSD3 -- -- Maintainer : dons@galois.com -- Stability : experimental -- Portability : portable -- -- Description : -- -- A collection of commonly used statistical functions designed to -- fuse under stream fusion, with attention paid to the generated assembly. -- -- These are high performance replacements for various list functions, -- implemented in pure Haskell using stream fusion for sequences. -- -- To illustrate the performance gap, consider the task of calculating -- the numerically stable mean of a sequence of 1e9 double values. -- -- Using the standard list implementation provided by the hstats -- package, -- -- > $ time ./mean -- > 3.141592653589793 -- > ./mean 26.80s user 0.08s system 99% cpu 26.965 total -- -- And this package, -- -- > $ time ./mean -- > 3.141592653589793 -- > ./mean 6.25s user 0.00s system 99% cpu 6.261 total -- -------------------------------------------------------------------------- module Math.Statistics.Fusion ( mean , harmonic , geometric ) where import Data.Array.Vector -- | A numerically stable mean. mean :: UArr Double -> Double mean = fstT . foldlU k (T 0 1) where k (T b a) n = T b' (a+1) where b' = b + (n - b) / fromIntegral a {-# INLINE mean #-} -- ^ required. -- | The harmonic mean harmonic :: UArr Double -> Double harmonic xs = fromIntegral a / b where T b a = foldlU k (T 0 0) xs k (T b a) n = T (b + (1/n)) (a+1) {-# INLINE harmonic #-} -- | The geometric mean of a non-negative list. geometric :: UArr Double -> Double geometric xs = p ** (1 / fromIntegral n) where T p n = foldlU k (T 1 0) xs k (T p n) a = T (p * a) (n + 1) {-# INLINE geometric #-} ------------------------------------------------------------------------ -- Helper code. Monomorphic unpacked accumulators. -- don't support polymorphism, as we can't get unboxed returns if we use it. data T = T {-# UNPACK #-}!Double {-# UNPACK #-}!Int fstT :: T -> Double fstT (T a _) = a {- Consider this core: with data T a = T !a !Int $wfold :: Double# -> Int# -> Int# -> (# Double, Int# #) and without, $wfold :: Double# -> Int# -> Int# -> (# Double#, Int# #) yielding to boxed returns and heap checks. -}