-- |
-- Module      :  Statistics.Covariance
-- Description :  Estimate covariance matrices from sample data
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- Creation date: Tue Sep 14 13:02:15 2021.
module Statistics.Covariance
  ( -- * Empirical estimator
    empiricalCovariance,

    -- * Shrinkage based estimators

    -- | See the overview on shrinkage estimators provided by
    -- [scikit-learn](https://scikit-learn.org/dev/modules/covariance.html#shrunk-covariance).
    module Statistics.Covariance.LedoitWolf,
    module Statistics.Covariance.RaoBlackwellLedoitWolf,
    module Statistics.Covariance.OracleApproximatingShrinkage,

    -- * Gaussian graphical model based estimators
    module Statistics.Covariance.GraphicalLasso,

    -- * Misc
    DoCenter (..),

    -- * Helper functions
    scale,
    rescaleSWith,
    rescalePWith,
  )
where

import qualified Data.Vector.Storable as VS
import qualified Numeric.LinearAlgebra as L
import qualified Numeric.LinearAlgebra.Devel as L
import Statistics.Covariance.GraphicalLasso
import Statistics.Covariance.LedoitWolf
import Statistics.Covariance.OracleApproximatingShrinkage
import Statistics.Covariance.RaoBlackwellLedoitWolf
import Statistics.Covariance.Types
import qualified Statistics.Sample as S

-- | Empirical or sample covariance.
--
-- Classical maximum-likelihood estimator; asymptotically unbiased but sensitive
-- to outliers.
--
-- Re-export of the empirical covariance 'L.meanCov' provided by
-- [hmatrix](https://hackage.haskell.org/package/hmatrix).
--
-- NOTE: This function may call 'error'.
empiricalCovariance ::
  -- | Data matrix of dimension NxP, where N is the number of observations, and
  -- P is the number of parameters.
  L.Matrix Double ->
  L.Herm Double
empiricalCovariance :: Matrix Double -> Herm Double
empiricalCovariance = (Vector Double, Herm Double) -> Herm Double
forall a b. (a, b) -> b
snd ((Vector Double, Herm Double) -> Herm Double)
-> (Matrix Double -> (Vector Double, Herm Double))
-> Matrix Double
-> Herm Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Matrix Double -> (Vector Double, Herm Double)
L.meanCov

scaleWith ::
  -- Vector of means (length P).
  L.Vector Double ->
  -- Vector of standard deviations (length P)
  L.Vector Double ->
  -- Data matrix of dimension N x P.
  L.Matrix Double ->
  -- Data matrix with means 0 and variance 1.0.
  L.Matrix Double
scaleWith :: Vector Double -> Vector Double -> Matrix Double -> Matrix Double
scaleWith Vector Double
ms Vector Double
ss = ((Int, Int) -> Double -> Double) -> Matrix Double -> Matrix Double
forall a b.
(Element a, Storable b) =>
((Int, Int) -> a -> b) -> Matrix a -> Matrix b
L.mapMatrixWithIndex (\(Int
_, Int
j) Double
x -> (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Vector Double
ms Vector Double -> Int -> Double
forall a. Storable a => Vector a -> Int -> a
VS.! Int
j) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Vector Double
ss Vector Double -> Int -> Double
forall a. Storable a => Vector a -> Int -> a
VS.! Int
j))

-- | Center and scales columns.
--
-- Normalize a data matrix to have means 0 and standard deviations/variances
-- 1.0. The estimated covariance matrix of a scaled data matrix is a correlation
-- matrix, which is easier to estimate.
scale ::
  -- | Sample data matrix of dimension \(n \times p\), where \(n\) is the number
  -- of samples (rows), and \(p\) is the number of parameters (columns).
  L.Matrix Double ->
  -- | (Means, Standard deviations, Centered and scaled matrix)
  (L.Vector Double, L.Vector Double, L.Matrix Double)
scale :: Matrix Double -> (Vector Double, Vector Double, Matrix Double)
scale Matrix Double
xs = (Vector Double
ms, Vector Double
ss, Vector Double -> Vector Double -> Matrix Double -> Matrix Double
scaleWith Vector Double
ms Vector Double
ss Matrix Double
xs)
  where
    msVs :: [(Double, Double)]
msVs = (Vector Double -> (Double, Double))
-> [Vector Double] -> [(Double, Double)]
forall a b. (a -> b) -> [a] -> [b]
map Vector Double -> (Double, Double)
forall (v :: * -> *).
Vector v Double =>
v Double -> (Double, Double)
S.meanVariance ([Vector Double] -> [(Double, Double)])
-> [Vector Double] -> [(Double, Double)]
forall a b. (a -> b) -> a -> b
$ Matrix Double -> [Vector Double]
forall t. Element t => Matrix t -> [Vector t]
L.toColumns Matrix Double
xs
    ms :: Vector Double
ms = [Double] -> Vector Double
forall a. Storable a => [a] -> Vector a
L.fromList ([Double] -> Vector Double) -> [Double] -> Vector Double
forall a b. (a -> b) -> a -> b
$ ((Double, Double) -> Double) -> [(Double, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Double, Double) -> Double
forall a b. (a, b) -> a
fst [(Double, Double)]
msVs
    ss :: Vector Double
ss = [Double] -> Vector Double
forall a. Storable a => [a] -> Vector a
L.fromList ([Double] -> Vector Double) -> [Double] -> Vector Double
forall a b. (a -> b) -> a -> b
$ ((Double, Double) -> Double) -> [(Double, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double)
-> ((Double, Double) -> Double) -> (Double, Double) -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double, Double) -> Double
forall a b. (a, b) -> b
snd) [(Double, Double)]
msVs

-- | Convert a correlation matrix with given standard deviations to original
-- scale.
rescaleSWith ::
  -- | Vector of standard deviations of length \(p\).
  L.Vector Double ->
  -- | Normalized correlation matrix of dimension \(p \times p\).
  L.Matrix Double ->
  -- | Covariance matrix of original scale and of dimension \(p \times p\).
  L.Matrix Double
rescaleSWith :: Vector Double -> Matrix Double -> Matrix Double
rescaleSWith Vector Double
ss = ((Int, Int) -> Double -> Double) -> Matrix Double -> Matrix Double
forall a b.
(Element a, Storable b) =>
((Int, Int) -> a -> b) -> Matrix a -> Matrix b
L.mapMatrixWithIndex (\(Int
i, Int
j) Double
x -> Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Vector Double
ss Vector Double -> Int -> Double
forall a. Storable a => Vector a -> Int -> a
VS.! Int
i) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Vector Double
ss Vector Double -> Int -> Double
forall a. Storable a => Vector a -> Int -> a
VS.! Int
j))

-- | See 'rescaleSWith' but for precision matrices.
rescalePWith ::
  L.Vector Double ->
  L.Matrix Double ->
  L.Matrix Double
rescalePWith :: Vector Double -> Matrix Double -> Matrix Double
rescalePWith Vector Double
ss = Vector Double -> Matrix Double -> Matrix Double
rescaleSWith ((Double -> Double) -> Vector Double -> Vector Double
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
VS.map Double -> Double
forall a. Fractional a => a -> a
recip Vector Double
ss)