lib/Numeric/Statistics/ICA.hs

-----------------------------------------------------------------------------
-- |
-- Module      :  Numeric.Statistics.ICA
-- Copyright   :  (c) A. V. H. McPhail 2010
-- License     :  GPL-style
--
-- Maintainer  :  haskell.vivian.mcphail <at> gmail <dot> com
-- Stability   :  provisional
-- Portability :  portable
--
-- Independent Components Analysis
--
--  implements the FastICA algorithm found in:
--
-- * Aapo Hyvärinen and Erkki Oja,
--   Independent Component Analysis: Algorithms and Applications,
--   /Neural Networks/, 13(4-5):411-430, 2000
--
--   <http://www.google.com/url?sa=t&source=web&cd=2&ved=0CBgQFjAB&url=http%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fdownload%3Fdoi%3D10.1.1.79.7003%26rep%3Drep1%26type%3Dpdf&ei=RQozTJb6L4_fcbCV6cMD&usg=AFQjCNGClLIB9MAvbrEj45SyUx9cYubLyA&sig2=hg5Wnfy3dLPkoIc1hqSfjg>
--
-----------------------------------------------------------------------------

module Numeric.Statistics.ICA (
                               sigmoid, sigmoid',
                               demean, whiten,
                               ica, icaDefaults
                          ) where


-----------------------------------------------------------------------------

import qualified Data.Array.IArray as I 

import Numeric.LinearAlgebra

import Numeric.GSL.Statistics

import Numeric.Statistics

import System.Random

-----------------------------------------------------------------------------

-- | sigmoid transfer function
sigmoid :: Double -> Double
sigmoid u = u * exp((-u**2)/2)

-- | derivative of sigmoid transfer function
sigmoid' :: Double -> Double
sigmoid' u = -u**2 * exp((-u**2)/2)

-----------------------------------------------------------------------------

-- preprocessing:
--   demean
--   whiten
--        eigenvalue decomposition of covariance matrix E{xx^T} = EDE^T
--                   E orthogonal matrix of eigenvectors
--                   D diagonal matrix of eigenvalues, D = diag(d_1,...,d_n)
--                   x_white = ED^{-1/2}E^Tx
--                   D^{-1/2} = diag{d_1^{-1/2},...}
--

-----------------------------------------------------------------------------

-- | remove the mean from data
demean :: I.Array Int (Vector Double)                  -- ^ the data
       -> (I.Array Int (Vector Double),Vector Double)  -- ^ (demeaned data,mean)
demean d = let u = I.elems $ fmap mean d
               d' = I.listArray (I.bounds d) (zipWith (-) (I.elems d) (map scalar u))
               u' = fromList u
               in (d',u')

-- | whiten data
whiten :: I.Array Int (Vector Double)                 -- ^ the data
       -> Double                                      -- ^ eigenvalue threshold
       -> (I.Array Int (Vector Double),Matrix Double) -- ^ (whitened data,transform)
whiten d q = let cv = covarianceMatrix d
                 (val',vec') = eigSH cv           -- the covariance matrix is real symmetric
                 val = toList val'
                 vec = toColumns vec'
                 v' = zip val vec
                 v = filter ((> q) . fst) v'        -- keep only eigens > than parameter
                 (dd',e') = unzip v
                 dd = diag $ (** (-0.5)) $ fromList dd'  -- square root of eigenvalues diagonalised
                 e = fromColumns e'
                 x = fromRows $ I.elems d
                 t = e <> dd <> trans e          -- the actual mathematics
                 x' = t <> x                     -- the actual mathematics
                 d' = I.listArray (I.bounds d) (toRows x') 
             in (d',t)
                 
-----------------------------------------------------------------------------

-- assuming that a weight vector is a row

-- algorithm:
-- 1  initial random weight vectors w_i
-- 2  w_i^+ = E{xg(w^Tx)} - E{g'(w^Tx)}w     (newton phase)
-- 3  W = W = (WW^T)^{-1/2)W                 (decorrelation) W = ( ..., w_i, ...)^T
--                                           WW^T = FDF^T (eigenvalue decomposition)
-- 4  w_i = w^+/norm(w^+)                    (normalisation) (almost any norm but not Frobenius)
-- 5  if not converged (dot w w^+ ~ 1 implies convergence) go to step 2
--
-- in matrix form, 2 becomes:
--    W^+ = W + (diag a_i)[(diag b_i) + E{g(y)y^T}]W
--
--      where
--            y = Wx
--            b_i = -E{y_ig(y_i)}
--            a_i = -1/(b_i-E{g'(y_i)})
--
--    g(u) = tanh(au) 0<=a<=2, often a = 1
--    g(u) = u exp(-u^2/2)

-----------------------------------------------------------------------------

unconcat 0 _ _  = []
unconcat r c xs = [take c xs] ++ unconcat (r-1) c (drop c xs)

random_vector :: Int -> (Int,Int) -> Matrix Double
random_vector s (r,c) = fromLists $ unconcat r c $ randomRs (-1,1) (mkStdGen s)

-- g g' w x -> w'
update :: (Double -> Double) -> (Double -> Double) -> Matrix Double -> Matrix Double -> Matrix Double
update g g' w x = let y = w <> x
                      ys = toRows y
                      bis = map (\y' -> - mean (y' * (mapVector g y'))) ys
                      ais = zipWith (\b y' -> -1 / (b - mean (mapVector g y'))) bis ys
                      r = rows y
                      ix = ((1,1),(r,r))
                      cov = fromArray2D $ I.listArray ix $ map (\(m,n) -> covariance (mapVector g' (ys!!(m-1))) (ys!!(n-1))) $ I.range ix
                  in w + (diag $ fromList ais) <> ((diag $ fromList bis) + cov) <> w  

decorrelate :: Matrix Double -> Matrix Double
decorrelate m = let (d',v') = eig m
                    d = fst $ fromComplex d'
                    v = fst $ fromComplex v'
                in v <> (diag (d ** (-0.5))) <> trans v <> m
{-decorrelate n t w = let w' = w / (scalar $ sqrt $ pnorm n (w <> trans w))
                    in decorrelate' t w w'
    where decorrelate' t' m m' 
              | converged t' m m' = m'
              | otherwise         = decorrelate' t' m' ((scale 1.5 m') - (scale 0.5 (m' <> trans m' <> m')))
-}

normalise :: NormType -> Matrix Double -> Matrix Double
normalise t m = fromRows $ map (\v -> v / (scalar $ pnorm t v)) (toRows m)

converged :: Double -> Matrix Double -> Matrix Double -> Bool
converged t m m' = let d' = map ((-) 1) $ zipWith dot (toRows m) (toRows m')
                   in maximum d' <= t

-----------------------------------------------------------------------------

ica' :: (Double -> Double)          -- ^ transfer function (tanh,u exp(u^2/2), etc...)
     -> (Double -> Double)          -- ^ derivative of transfer function
     -> NormType                    -- ^ type of normalisation: Infinity, PNorm1, PNorm2
     -> Double                      -- ^ convergence tolerance for feature vectors
     -> Matrix Double               -- ^ weight matrix
     -> [Matrix Double]             -- ^ input data in chunks
     -> Matrix Double               -- ^ ica transform (weight matrix)
ica' _ _  _ _ _ []     = error "no sample data"
ica' g g' n t w (x:xs) = let w' = normalise n $ decorrelate $ update g g' w x
                             in if converged t w w' 
                                then w'
                                else ica' g g' n t w' (xs ++ [x])

-- | perform an ICA transform
ica :: Int                         -- ^ random seed
    -> (Double -> Double)          -- ^ transfer function (tanh,u exp(u^2/2), etc...)
    -> (Double -> Double)          -- ^ derivative of transfer function
    -> NormType                    -- ^ type of normalisation: Infinity, PNorm1, PNorm2
    -> Double                      -- ^ convergence tolerance for feature vectors
--    -> Int                         -- ^ output dimensions
    -> Int                         -- ^ sampling size (must be smaller than length of data)
    -> I.Array Int (Vector Double) -- ^ data
    -> (I.Array Int (Vector Double),Matrix Double) -- ^ transformed data, ica transform
ica r g g' n t s a = let i = I.rangeSize $ I.bounds a
                         w = random_vector r (i,i)
                         x' = fromRows $ I.elems a
                         -- next line is BAD if distribution not stationary
                         x = concat $ toBlocksEvery i s x'
                         w' = ica' g g' n t w x
                         y = w' <> x'
                     in (I.listArray (1,1) $ toRows y,w') 

-----------------------------------------------------------------------------

-- | ICA with default values: no dimension reduction, euclidean norms, 16 sample groups, sigmoid
icaDefaults :: Int                         -- ^ random seed
            -> I.Array Int (Vector Double) -- ^ data
            -> (I.Array Int (Vector Double),Matrix Double) -- ^ transformed data, ica transform
icaDefaults r a = let c = I.rangeSize $ I.bounds a
                      s = (dim $ (a I.! 1)) `div` 16
                  in ica r sigmoid sigmoid' Infinity 0.0000001 s a

-----------------------------------------------------------------------------