{-# LANGUAGE RecordWildCards #-}

-- | Stochastic gradient descent implementation using mutable
-- vectors for efficient update of the parameters vector.
-- A user is provided with the immutable version of parameters vector
-- so he is able to compute the gradient outside the IO/ST monad.
-- Currently only the Gaussian priors are implemented.
--
-- This is a preliminary version of the SGD library and API may change
-- in future versions.

module Numeric.SGD
( SgdArgs (..)
, sgdArgsDefault
, Dataset
, Para
, sgd
, sgdM
, module Numeric.SGD.Grad
) where

import Control.Monad (forM_)
import Control.Monad.ST (ST, runST)
import qualified System.Random as R
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as UM
import qualified Control.Monad.Primitive as Prim

import Numeric.SGD.Grad

-- | SGD parameters controlling the learning process.
data SgdArgs = SgdArgs
    { -- | Size of the batch
      batchSize :: Int
    -- | Regularization variance
    , regVar    :: Double
    -- | Number of iterations
    , iterNum   :: Double
    -- | Initial gain parameter
    , gain0     :: Double
    -- | After how many iterations over the entire dataset
    -- the gain parameter is halved
    , tau       :: Double }

-- | Default SGD parameter values.
sgdArgsDefault :: SgdArgs
sgdArgsDefault = SgdArgs
    { batchSize = 30
    , regVar    = 10
    , iterNum   = 10
    , gain0     = 1
    , tau       = 5 }

-- | Dataset with elements of x type.
type Dataset x  = V.Vector x

-- | Vector of parameters.
type Para       = U.Vector Double 

-- | Type synonym for mutable vector with Double values.
type MVect m    = UM.MVector (Prim.PrimState m) Double

-- | Pure version of the stochastic gradient descent method.
sgd :: SgdArgs              -- ^ SGD parameter values
    -> (Para -> x -> Grad)  -- ^ Gradient for dataset element
    -> Dataset x            -- ^ Dataset
    -> Para                 -- ^ Starting point
    -> Para                 -- ^ SGD result
sgd sgdArgs mkGrad dataset x0 =
    let dummy _ _ = return ()
    in  runST $ sgdM sgdArgs dummy mkGrad dataset x0

-- | Monadic version of the stochastic gradient descent method.
-- A notification function can be used to provide user with
-- information about the progress of the learning.
{-# SPECIALIZE sgdM :: SgdArgs
                    -> (Para -> Int -> IO ())
                    -> (Para -> x -> Grad)
                    -> Dataset x -> Para -> IO Para #-}
{-# SPECIALIZE sgdM :: SgdArgs
                    -> (Para -> Int -> ST s ())
                    -> (Para -> x -> Grad)
                    -> Dataset x -> Para -> ST s Para #-}
sgdM
    :: (Prim.PrimMonad m)
    => SgdArgs              -- ^ SGD parameter values
    -> (Para -> Int -> m ())    -- ^ Notification run every update
    -> (Para -> x -> Grad)  -- ^ Gradient for dataset element
    -> Dataset x            -- ^ Dataset
    -> Para                 -- ^ Starting point
    -> m Para               -- ^ SGD result
sgdM SgdArgs{..} notify mkGrad dataset x0 = do
    u <- UM.new (U.length x0)
    doIt u 0 (R.mkStdGen 0) =<< U.thaw x0
  where
    -- | Gain in k-th iteration.
    gain k = (gain0 * tau) / (tau + done k)
    -- | Number of completed iterations over the full dataset.
    done k
        = fromIntegral (k * batchSize)
        / fromIntegral (V.length dataset) 

    doIt u k stdGen x
      | done k > iterNum = do
        frozen <- U.unsafeFreeze x
        notify frozen k
        return frozen
      | otherwise = do
        let (batch, stdGen') = sample stdGen batchSize dataset

        -- Freeze mutable vector of parameters. The frozen version is
        -- then supplied to external mkGrad function provided by user.
        frozen <- U.unsafeFreeze x
        notify frozen k

        -- let grad = M.unionsWith (<+>) (map (mkGrad frozen) batch)
        let grad = parUnions (map (mkGrad frozen) batch)
        addUp grad u
        scale (gain k) u

        x' <- U.unsafeThaw frozen
        apply u x'
        doIt u (k+1) stdGen' x'

-- | Add up all gradients and store results in normal domain.
{-# SPECIALIZE addUp :: Grad -> MVect IO -> IO () #-}
{-# SPECIALIZE addUp :: Grad -> MVect (ST s) -> ST s () #-}
addUp :: Prim.PrimMonad m => Grad -> MVect m -> m ()
addUp grad v = do
    UM.set v 0
    forM_ (toList grad) $ \(i, x) -> do
        y <- UM.unsafeRead v i
        UM.unsafeWrite v i (x + y)

-- | Scale the vector by the given value.
{-# SPECIALIZE scale :: Double -> MVect IO -> IO () #-}
{-# SPECIALIZE scale :: Double -> MVect (ST s) -> ST s () #-}
scale :: Prim.PrimMonad m => Double -> MVect m -> m ()
scale c v = do
    forM_ [0 .. UM.length v - 1] $ \i -> do
        y <- UM.unsafeRead v i
        UM.unsafeWrite v i (c * y)

-- | Apply gradient to the parameters vector, that is add the first vector to
-- the second one.
{-# SPECIALIZE apply :: MVect IO -> MVect IO -> IO () #-}
{-# SPECIALIZE apply :: MVect (ST s) -> MVect (ST s) -> ST s () #-}
apply :: Prim.PrimMonad m => MVect m -> MVect m -> m ()
apply w v = do 
    forM_ [0 .. UM.length v - 1] $ \i -> do
        x <- UM.unsafeRead v i
        y <- UM.unsafeRead w i
        UM.unsafeWrite v i (x + y)

sample :: R.RandomGen g => g -> Int -> Dataset x -> ([x], g)
sample g 0 _       = ([], g)
sample g n dataset =
    let (xs, g') = sample g (n-1) dataset
        (i, g'') = R.next g'
        x = dataset V.! (i `mod` V.length dataset)
    in  (x:xs, g'')