```{-|
Module      : TimeSeries.Correlation
Copyright   : (C) 2013 Parallel Scientific Labs, LLC
License     : GPL-2
Stability   : experimental
Portability : portable

Functions for correlation analysis.
-}

module TimeSeries.Correlation where

import Data.Array.Base
import Data.List (sort)

import TimeSeries.PRG64
import TimeSeries.Window (Window(..))
import qualified TimeSeries.Window as W

-- | Direct computation of correlation of two windows X and Y.
-- Resulting value /r/ is:
--
-- <<http://bit.ly/15XPwC3>>
--
-- See: <http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#Definition>
--
-- Where: X̄ is mean of X, Xi is i-th element of X.
--
direct :: Window -> Window -> Double
direct (Window x) (Window y) = sigma_x_y_minus_means / sqrt_sigma_x_y_squared
where
sigma_x_y_minus_means =
sum [ (xi-x_mean)*(yi-y_mean)
| (xi,yi) <- zip (elems x) (elems y)]
sqrt_sigma_x_y_squared = sqrt (sum xs' * sum ys') where
xs' = [square (xi-x_mean) | xi <- elems x]
ys' = [square (yi-y_mean) | yi <- elems y]
x_mean = mean (elems x)
y_mean = mean (elems y)

{-

Or:
1            Xi-X̄      Yi-Ȳ
r = ————— Σi=1,n( —————— )( –————— )
n-1            sx        sy

where sx is standard deviation of x.
...r*2?

-}

-- | Naive correlation from sketch distance, using:
--
-- <<http://bit.ly/1fWIkjp>>
--
-- sketch distance is computated without incremental update.
--
sketch_naive ::
[Window]
-- ^ Random vectors, length of list is /d/ (the sketch size).
-- Length of 'Window' is /sw/ (sliding window size).
-> Int    -- ^ Size of sliding window.
-> Window -- ^ First sliding window.
-> Window -- ^ Second sliding window.
-> Double
sketch_naive rs sz (Window wx) (Window wy) = 1 - (d'/len')
where
d' = sqrt \$ sum ds
ds = [ square (xj-yj)
| (xi,yi) <- zip skX skY
, let xj = (xi-avg_x) / var_x
, let yj = (yi-avg_y) / var_y
]
skX = [dotp (elems r) (elems wx)|Window r <- rs]
skY = [dotp (elems r) (elems wy)|Window r <- rs]
len' = 2 * (len x-1)
avg_x = mean x
avg_y = mean y
var_x = (sum [square (xi - avg_x)|xi<-x]) / sz'
var_y = (sum [square (yi - avg_y)|yi<-y]) / sz'
x = elems wx
y = elems wy
sz' = fromIntegral sz

-- | Sketch distance based correlation.
sketch ::
Window
-- ^ Sketch of sliding window X
-> Window
-- ^ Sketch of sliding window Y
-> Double
sketch wx wy = 1 - (d'/len') where
d' = sum [square (x-y)|(x,y)<-zip (W.toList wx) (W.toList wy)]
len' = 2 * fromIntegral (W.length wx)

{-
-- | Correlation from sketch distance with ∑i=0,nb-1(sum(X)) and
-- ∑i=0,nb-1(sum(X²)).
sketch' ::
Int
-- ^ Sketch size.
-> Int
-- ^ Sliding window size.
-> (Window, Double, Double)
-- ^ Triple of window X, ∑i=0,nb-1(sum(X)), and ∑i=0,nb-1(sum(X²)).
-> (Window, Double, Double)
-- ^ Triple of window Y, ∑i=0,nb-1(sum(Y)), and ∑i=0,nb-1(sum(Y²)).
-> Double
sketch' d sw (Window wx, sx, sqx) (Window wy, sy, sqy) = 1 - (d'/len') where
-- XXX: Not using sumx/sumsqx in sketch_distance.
d' = square (sketch_distance d x' y')
x' = [(xi-avg_x)/var_x|xi<-x]
y' = [(yi-avg_y)/var_y|yi<-y]
len' = 2 * sw'
avg_x = sx / sw'
avg_y = sy / sw'
var_x = sqrt ((sqx/sw') - (square avg_x))
var_y = sqrt ((sqy/sw') - (square avg_y))
x = elems wx
y = elems wy
sw' = fromIntegral sw
-}

-- | Sketch distance.
sketch_distance ::
Int         -- ^ Sketch size /d/.
-> [Double] -- ^ First vector.
-> [Double] -- ^ Second vector.
-> Double
sketch_distance d x w = median norms / sqrt (fromIntegral d) where
norms = map norm \$ zipWith (zipWith (-)) ys zs
median xs = sort xs !! (length xs `div` 2)
--
-- d random vectors with m elements:
--
-- > ri = {1, -1}ᵐ, i = [1,2,..,2b+1]
--
-- Random vectors are whole random vector, which means that not
-- separated to pair of unit random vector and control vector.
--
b = 8
m = length x
prg64s = chunks m \$ concat \$ prg64Bits (prg64Init 0x9834219)
(ys,zs) = foldr f ([],[]) [0..2*b] where
f n (yacc,zacc) =
let y = [dotp x r | r <- rs]
z = [dotp w r | r <- rs]
rs = take d \$ drop (n*d) prg64s
in  (y:yacc,z:zacc)

-- | Vector norm:
--
-- > norm as = sqrt \$ sum [a^2|a <- as]
--
norm :: Floating a => [a] -> a
norm as = sqrt \$ sum [square a|a <- as]

mean :: Fractional a => [a] -> a
mean zs = sum zs / len zs

stddev :: [Double] -> Double
stddev zs = sqrt (mean [square zi|zi<-zs] - (square (mean zs)))

stddev' :: Window -> Double
stddev' (Window x) = stddev \$ elems x

-- XXX: Use i2f from or1200 somewhere to do conversion.
-- Length may handled with counting up a state, then used in floating
-- point division...
len :: Num b => [a] -> b
len zs = fromIntegral (length zs)

dotp :: Num a => [a] -> [a] -> a
dotp a b = sum \$ zipWith (*) a b

chunks :: Int -> [a] -> [[a]]
chunks sz ys
| null ys   = []
| otherwise = let (pre,post) = splitAt sz ys in pre : chunks sz post

square :: Fractional a => a -> a
square = (^ (2::Int))
```