{- | Regression is a module in the HasGP Gaussian process
   library. It implements basic Gaussian process regression.
   For the technical details see www.gaussianprocesses.org.

   Copyright (C) 2011 Sean Holden. sbh11\@cl.cam.ac.uk.
-}
{- This file is part of HasGP.

   HasGP is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   HasGP is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with HasGP.  If not, see <http://www.gnu.org/licenses/>.
-}
module HasGP.Regression.Regression 
   (
     gpRMain,
     gpRPredict,
     gpRPredict',
     gpRLogEvidence,
     gpRGradLogEvidence,
     gpRLogHyperToEvidence
   ) where 

import Numeric.LinearAlgebra

import HasGP.Types.MainTypes
import HasGP.Covariance.Basic
import HasGP.Support.MatrixFunction
import HasGP.Support.Solve
import HasGP.Support.Linear

-- | Compute the main quantities required to do regression, specifically:
--    the Cholesky decomposition L of the covariance matrix, and the parameters 
--    \alpha such that L L^t y = alpha.
gpRMain :: CovarianceFunction cF => cF  
        -> Double                        -- ^ The log noise variance
        -> Inputs                        
        -> Targets                       
        -> (DMatrix,DVector)             -- ^ L and alpha.
gpRMain cov lognvar d t = (l,alpha)
    where
      n = rows d
      k = covarianceMatrix cov d
      m = (k + (scale (exp lognvar) (ident n)))
      l = trans $ chol m
      alpha = upperSolve (trans l) (lowerSolve l t)

-- | Compute the expected value and variance for a collection of 
--   new points supplied as the rows of a matrix. Differs from 
--   gpRPredict' as l and alpha need to be computed in advance.
gpRPredict :: CovarianceFunction cF => cF
           -> DMatrix                    -- ^ l
           -> DVector                    -- ^ alpha
           -> Inputs                     
           -> Targets                    
           -> Inputs                     -- ^ The new inputs
           -> (DVector, DVector)         -- ^ Mean, variance
gpRPredict cov l alpha d t xStars = 
    ((fromRows kxxStar) <> alpha, 
     fromList $ zipWith (-) kxStarxStar (zipWith dot vs vs))
    where
      new = toRows xStars
      kxxStar = covarianceWithPoints cov d new
      kxStarxStar = zipWith (covariance cov) new new
      vs = map (lowerSolve l) kxxStar

-- | Compute the expected value and variance for a collection of 
--   new points supplied as the rows of a matrix.       
gpRPredict' :: CovarianceFunction cF => cF 
            -> Double                      -- ^ The log noise variance
            -> Inputs                      
            -> Targets                     
            -> Inputs                      -- ^ The new inputs
            -> (DVector, DVector)          -- ^ Mean, variance
gpRPredict' cov lognvar d t xStars = 
    ((fromRows kxxStar) <> alpha, 
     fromList $ zipWith (-) kxStarxStar (zipWith dot vs vs))
    where
      (l, alpha) = gpRMain cov lognvar d t 
      new = toRows xStars
      kxxStar = covarianceWithPoints cov d new
      kxStarxStar = zipWith (covariance cov) new new
      vs = map (lowerSolve l) kxxStar

-- | Compute the log of the marginal likelihood.
gpRLogEvidence :: DMatrix    -- ^ l
               -> DVector    -- ^ alpha
               -> Targets    
               -> Double     -- ^ log marginal likelihood
gpRLogEvidence l alpha t = 
    (-0.5) * ((t <.> alpha) + 
              (2.0 * (sum $ map log (toList $ takeDiag l))) + 
              ((fromIntegral n) * (log (2 * pi))))
    where
      n = rows l

-- | Compute the gradient of the log marginal likelihood.
--   Output contains derivative with respect to noise variance 
--   followed by the derivatives with respect to the hyperparameters 
--   in the covariance function.
gpRGradLogEvidence :: CovarianceFunction cF => cF
                   -> Double  -- ^ the log noise variance
                   -> DMatrix -- ^ l
                   -> DVector -- ^ alpha 
                   -> Inputs  
                   -> DVector -- ^ Derivatives
gpRGradLogEvidence cov lognvar l alpha i = 
    join [fromList [dZdtheta $ scale (exp lognvar) $ ident (rows l)], 
          fromList $ map dZdtheta dKdthetaList]
        where
          invK = HasGP.Support.Solve.cholSolve l
          dKdthetaList = makeMatricesFromPairs (dCovarianceDParameters cov) i
          dZdtheta dKdtheta = 
              (0.5) * (sum $ toList $ abDiagOnly 
                       ((asColumn alpha <> asRow alpha) - invK) dKdtheta)

-- | Given the log parameters and other necessary inputs, compute 
--   the NEGATIVE of the log marginal likelihood and its derivatives with 
--   respect to the LOG hyperparameters.
gpRLogHyperToEvidence :: CovarianceFunction cF => cF 
                      -> Inputs  
                      -> Targets 
                      -> DVector -- ^ log hyperparameters, noise variance first
                      -> (Double, DVector)
gpRLogHyperToEvidence cov inputs targets parameters = 
    (-(gpRLogEvidence l alpha targets), 
     zipVectorWith (*) negGradLE (mapVector exp parameters))
        where
          lognvar = parameters @> 0
          loghyper = subVector 1 ((dim parameters) - 1) parameters 
          newCov = (makeCovarianceFromList cov) $ toList loghyper
          (l, alpha) = gpRMain newCov lognvar inputs targets
          negGradLE = -(gpRGradLogEvidence newCov lognvar l alpha inputs)