-- | -- Module : Statistics.Covariance.LedoitWolf -- Description : Shrinkage based covariance estimator by Ledoit and Wolf -- Copyright : (c) 2021 Dominik Schrempf -- License : GPL-3.0-or-later -- -- Maintainer : dominik.schrempf@gmail.com -- Stability : experimental -- Portability : portable -- -- Creation date: Thu Sep 9 14:08:26 2021. module Statistics.Covariance.LedoitWolf ( ledoitWolf, ) where import Data.Foldable import qualified Numeric.LinearAlgebra as L import Statistics.Covariance.Internal.Tools -- | Shrinkage based covariance estimator by Ledoit and Wolf. -- -- See Ledoit, O., & Wolf, M., A well-conditioned estimator for -- large-dimensional covariance matrices, Journal of Multivariate Analysis, -- 88(2), 365–411 (2004). http://dx.doi.org/10.1016/s0047-259x(03)00096-4. -- -- Return 'Left' if -- -- - only one sample is available. -- -- - no parameters are available. -- -- NOTE: This function may call 'error' due to partial library functions. ledoitWolf :: -- | Sample data matrix of dimension \(n \times p\), where \(n\) is the number -- of samples (rows), and \(p\) is the number of parameters (columns). L.Matrix Double -> Either String (L.Herm Double) ledoitWolf xs | n < 2 = Left "ledoitWolf: Need more than one sample." | p < 1 = Left "ledoitWolf: Need at least one parameter." -- The Ledoit and Wolf shrinkage estimator of the covariance matrix -- (Equation 14). However, a different, more general formula avoiding a2 is -- used. See Equation (4) in Chen2010b. | otherwise = Right $ shrinkWith rho sigma mu im where n = L.rows xs p = L.cols xs (means, sigma) = L.meanCov xs xsCentered = centerWith means xs im = L.trustSym $ L.ident p mu = muE sigma d2 = d2E im sigma mu b2 = b2E xsCentered sigma d2 rho = b2 / d2 -- Inner product for symmetric matrices based on an adjusted Frobenius norm (p -- 376). -- -- NOTE: This function is commutative (and therefor qualified as an inner -- product) for symmetric matrices only, and not in the general case. frobenius :: L.Matrix Double -> L.Matrix Double -> Double frobenius xs ys | xsRows /= xsCols = error "frobenius: Bug! Left matrix is not square." | ysRows /= ysCols = error "frobenius: Bug! Right matrix is not square." | xsRows /= ysRows = error "frobenius: Bug! Matrices have different size." | otherwise = recip (fromIntegral xsRows) * trace (xs L.<> L.tr' ys) where xsRows = L.rows xs xsCols = L.cols xs ysRows = L.rows ys ysCols = L.cols ys -- Estimator of mu (Lemma 3.2). muE :: -- Sample covariance matrix. L.Herm Double -> Double -- Avoid matrix multiplication in Frobenius norm. muE sigma' = recip xsRows * trace sigma where sigma = L.unSym sigma' xsRows = fromIntegral $ L.rows sigma -- Estimator of d2 (Lemma 3.3). d2E :: -- Identity matrix. L.Herm Double -> -- Sample covariance matrix. L.Herm Double -> -- Estimate of mu. Double -> Double d2E im sigma mu = frobenius m m where m = L.unSym sigma - L.scale mu (L.unSym im) -- Estimator of b2 (Lemma 3.4). b2E :: -- Data matrix. L.Matrix Double -> -- Sample covariance matrix. L.Herm Double -> -- Estimate of d2. Double -> Double b2E xs sigma = min b2m where n = fromIntegral $ L.rows xs -- NOTE: The authors use a transposed data matrix. They refer to one out of -- n columns, each with p rows. Here, we have one out of n rows, each with p -- columns. -- -- Long story short. Each y is an observation, a vector of length p. ys = [ frobenius d d | y <- L.toRows xs, let d = (L.asColumn y L.<> L.asRow y) - L.unSym sigma ] b2m = recip (n * n) * foldl' (+) 0 ys