src/Numeric/LinearAlgebra/NIPALS.hs

{-# LANGUAGE ScopedTypeVariables, FlexibleContexts #-}
-- | Nonlinear Iterative Partial Least Squares
module Numeric.LinearAlgebra.NIPALS
    ( -- * Simplified Interface
      firstPC
    , firstPCFromScores
    ) where

import Numeric.LinearAlgebra

-- | Calculate the first principal component of a set of samples.
--
-- Each row in the matrix is one sample. Note that this is transposed
-- compared to the implementation of principal components using 'svd'
-- or 'leftSV'
--
-- Example:
-- 
-- > let (pc,scores,residuals) = firstPC $ fromRows samples
-- 
-- This is calculated by providing a default estimate of the scores to
-- 'firstPCFromScores'
firstPC :: Matrix Double -> (Vector Double, Vector Double, Matrix Double)
firstPC m = firstPCFromScores m t0
    where
      t0 = head $ toColumns m

-- | Calculate the first principal component of a set of samples given
-- a starting estimate of the scores.
--
-- Each row in the matrix is one sample. Note that this is transposed
-- compared to the implementation of principal components using 'svd'
-- or 'leftSV'
--
-- The second argument is a starting guess for the score vector. If
-- this is close to the actual score vector, then this will cause the
-- algorthm to converge much faster.
--
-- Example:
-- 
-- > let (pc,scores,residuals) = firstPCFromScores (fromRows samples) scoresGuess
-- 
firstPCFromScores :: Matrix Double
                  -> Vector Double
                  -> (Vector Double, Vector Double, Matrix Double)
firstPCFromScores m t0 = (p,t,r)
    where
      steps = iterate refine (t0,undefined)
      convergence = let scores = map fst steps
                        dscores = zipWith diffScores scores $ tail scores
                    in smooth dscores
      (t,p) = let steps' = zip convergence $ tail steps
                  steps'' = dropWhile (\(c,_) -> c > threshold) steps'
              in snd $ head steps''
      r = m `sub` (t `outer` p)

      refine (t,_) = (t', p')
          where
            p' = toUnit (trans m <> t)
            t' = m <> p'

      diffScores ta tb = let xa = ta<.>ta
                             xb = tb<.>tb
                             x = abs $ xa - xb
                         in (x / xb) :: Double

      threshold = sqrt (fromIntegral (cols m + rows m)) * eps

toUnit :: Vector Double -> Vector Double
toUnit v = if mag <= 0.0 then dim v |> (1 : repeat 0) else scale (1/mag) v
    where
      mag = norm2 v

smooth :: [Double] -> [Double]
smooth (x0:xs@(x1:_)) = (2*x0+x1)/3 : smooth xs
smooth _ = []

-- | Calculate the first principal component -- calculating the
-- samples fresh on every pass.
--
-- This function calculates the exact same results as 'firstPC', but
-- instead of an input 'Matrix', it takes a monad action that yields
-- the list of samples, and it guarantees that the list returned by
-- the action will be consumed in a single pass. However the action
-- may be demanded many times.
firstPCM :: (Product t, Monad m) => m [Vector t] -> m (Vector t, Vector t, [Vector t])
firstPCM = undefined