{-| Module : TimeSeries.Correlation Copyright : (C) 2013 Parallel Scientific Labs, LLC License : GPL-2 Stability : experimental Portability : portable Functions for correlation analysis. -} module TimeSeries.Correlation where import Data.Array.Base import Data.List (sort) import TimeSeries.PRG64 import TimeSeries.Window (Window(..)) import qualified TimeSeries.Window as W -- | Direct computation of correlation of two windows X and Y. -- Resulting value /r/ is: -- -- <<http://bit.ly/15XPwC3>> -- -- See: <http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#Definition> -- -- Where: X̄ is mean of X, Xi is i-th element of X. -- direct :: Window -> Window -> Double direct (Window x) (Window y) = sigma_x_y_minus_means / sqrt_sigma_x_y_squared where sigma_x_y_minus_means = sum [ (xi-x_mean)*(yi-y_mean) | (xi,yi) <- zip (elems x) (elems y)] sqrt_sigma_x_y_squared = sqrt (sum xs' * sum ys') where xs' = [square (xi-x_mean) | xi <- elems x] ys' = [square (yi-y_mean) | yi <- elems y] x_mean = mean (elems x) y_mean = mean (elems y) {- Or: 1 Xi-X̄ Yi-Ȳ r = ————— Σi=1,n( —————— )( –————— ) n-1 sx sy where sx is standard deviation of x. ...r*2? -} -- | Naive correlation from sketch distance, using: -- -- <<http://bit.ly/1fWIkjp>> -- -- sketch distance is computated without incremental update. -- sketch_naive :: [Window] -- ^ Random vectors, length of list is /d/ (the sketch size). -- Length of 'Window' is /sw/ (sliding window size). -> Int -- ^ Size of sliding window. -> Window -- ^ First sliding window. -> Window -- ^ Second sliding window. -> Double sketch_naive rs sz (Window wx) (Window wy) = 1 - (d'/len') where d' = sqrt $ sum ds ds = [ square (xj-yj) | (xi,yi) <- zip skX skY , let xj = (xi-avg_x) / var_x , let yj = (yi-avg_y) / var_y ] skX = [dotp (elems r) (elems wx)|Window r <- rs] skY = [dotp (elems r) (elems wy)|Window r <- rs] len' = 2 * (len x-1) avg_x = mean x avg_y = mean y var_x = (sum [square (xi - avg_x)|xi<-x]) / sz' var_y = (sum [square (yi - avg_y)|yi<-y]) / sz' x = elems wx y = elems wy sz' = fromIntegral sz -- | Sketch distance based correlation. sketch :: Window -- ^ Sketch of sliding window X -> Window -- ^ Sketch of sliding window Y -> Double sketch wx wy = 1 - (d'/len') where d' = sum [square (x-y)|(x,y)<-zip (W.toList wx) (W.toList wy)] len' = 2 * fromIntegral (W.length wx) {- -- | Correlation from sketch distance with ∑i=0,nb-1(sum(X)) and -- ∑i=0,nb-1(sum(X²)). sketch' :: Int -- ^ Sketch size. -> Int -- ^ Sliding window size. -> (Window, Double, Double) -- ^ Triple of window X, ∑i=0,nb-1(sum(X)), and ∑i=0,nb-1(sum(X²)). -> (Window, Double, Double) -- ^ Triple of window Y, ∑i=0,nb-1(sum(Y)), and ∑i=0,nb-1(sum(Y²)). -> Double sketch' d sw (Window wx, sx, sqx) (Window wy, sy, sqy) = 1 - (d'/len') where -- XXX: Not using sumx/sumsqx in sketch_distance. d' = square (sketch_distance d x' y') x' = [(xi-avg_x)/var_x|xi<-x] y' = [(yi-avg_y)/var_y|yi<-y] len' = 2 * sw' avg_x = sx / sw' avg_y = sy / sw' var_x = sqrt ((sqx/sw') - (square avg_x)) var_y = sqrt ((sqy/sw') - (square avg_y)) x = elems wx y = elems wy sw' = fromIntegral sw -} -- | Sketch distance. sketch_distance :: Int -- ^ Sketch size /d/. -> [Double] -- ^ First vector. -> [Double] -- ^ Second vector. -> Double sketch_distance d x w = median norms / sqrt (fromIntegral d) where norms = map norm $ zipWith (zipWith (-)) ys zs median xs = sort xs !! (length xs `div` 2) -- -- d random vectors with m elements: -- -- > ri = {1, -1}ᵐ, i = [1,2,..,2b+1] -- -- Random vectors are whole random vector, which means that not -- separated to pair of unit random vector and control vector. -- b = 8 m = length x prg64s = chunks m $ concat $ prg64Bits (prg64Init 0x9834219) (ys,zs) = foldr f ([],[]) [0..2*b] where f n (yacc,zacc) = let y = [dotp x r | r <- rs] z = [dotp w r | r <- rs] rs = take d $ drop (n*d) prg64s in (y:yacc,z:zacc) -- | Vector norm: -- -- > norm as = sqrt $ sum [a^2|a <- as] -- norm :: Floating a => [a] -> a norm as = sqrt $ sum [square a|a <- as] mean :: Fractional a => [a] -> a mean zs = sum zs / len zs stddev :: [Double] -> Double stddev zs = sqrt (mean [square zi|zi<-zs] - (square (mean zs))) stddev' :: Window -> Double stddev' (Window x) = stddev $ elems x -- XXX: Use i2f from or1200 somewhere to do conversion. -- Length may handled with counting up a state, then used in floating -- point division... len :: Num b => [a] -> b len zs = fromIntegral (length zs) dotp :: Num a => [a] -> [a] -> a dotp a b = sum $ zipWith (*) a b chunks :: Int -> [a] -> [[a]] chunks sz ys | null ys = [] | otherwise = let (pre,post) = splitAt sz ys in pre : chunks sz post square :: Fractional a => a -> a square = (^ (2::Int))