{-# LANGUAGE RecordWildCards #-} -- | A version of `Numeric.SGD.Sparse` extended with momentum. module Numeric.SGD.Sparse.Momentum ( SgdArgs (..) , sgdArgsDefault , Para , sgd , module Numeric.SGD.Sparse.Grad , module Numeric.SGD.DataSet ) where import Control.Monad (forM_) -- import qualified System.Random as R import qualified Data.Vector.Unboxed as U import qualified Data.Vector.Unboxed.Mutable as UM import qualified Control.Monad.Primitive as Prim import Numeric.SGD.Sparse.Grad import Numeric.SGD.DataSet -- | SGD parameters controlling the learning process. data SgdArgs = SgdArgs { -- | Size of the batch batchSize :: Int -- | Regularization variance , regVar :: Double -- | Number of iterations , iterNum :: Double -- | Initial gain parameter , gain0 :: Double -- | After how many iterations over the entire dataset -- the gain parameter is halved , tau :: Double } -- | Default SGD parameter values. sgdArgsDefault :: SgdArgs sgdArgsDefault = SgdArgs { batchSize = 50 , regVar = 10 , iterNum = 10 , gain0 = 0.25 -- Without momentum I would rather go for '1', but with momentum the -- gradient gets significantly larger. , tau = 5 } -- | The gamma parameter which drives momentum. -- -- TODO: put in SgdArgs. -- gamma :: Double gamma = 0.9 -- | Vector of parameters. type Para = U.Vector Double -- | Type synonym for mutable vector with Double values. type MVect = UM.MVector (Prim.PrimState IO) Double -- | A stochastic gradient descent method. -- A notification function can be used to provide user with -- information about the progress of the learning. sgd :: SgdArgs -- ^ SGD parameter values -> (Para -> Int -> IO ()) -- ^ Notification run every update -> (Para -> x -> Grad) -- ^ Gradient for dataset element -> DataSet x -- ^ DataSet -> Para -- ^ Starting point -> IO Para -- ^ SGD result sgd SgdArgs{..} notify mkGrad dataset x0 = do putStrLn $ "Running momentum!" -- A vector for the momentum gradient momentum <- UM.new (U.length x0) -- A worker vector for computing the actual gradients u <- UM.new (U.length x0) doIt momentum u 0 =<< U.thaw x0 where -- Gain in k-th iteration. gain k = (gain0 * tau) / (tau + done k) -- Number of completed iterations over the full dataset. done :: Int -> Double done k = fromIntegral (k * batchSize) / fromIntegral (size dataset) -- Regularization (Guassian prior) parameter regularizationParam = regCoef where regCoef = iVar ** coef iVar = 1.0 / regVar coef = fromIntegral (size dataset) / fromIntegral batchSize doIt momentum u k x | done k > iterNum = do frozen <- U.unsafeFreeze x notify frozen k return frozen | otherwise = do -- Sample the dataset batch <- randomSample batchSize dataset -- NEW: comment out -- -- Apply regularization to the parameters vector. -- scale (regularization k) x -- Freeze mutable vector of parameters. The frozen version is -- then supplied to external mkGrad function provided by user. frozen <- U.unsafeFreeze x notify frozen k -- Compute the gradient and put it in `u` let grad = parUnions (map (mkGrad frozen) batch) addUp grad u -- Apply regularization to `u` applyRegularization regularizationParam x u -- Scale the gradient scale (gain k) u -- Compute the new momentum updateMomentum gamma momentum u x' <- U.unsafeThaw frozen momentum `addTo` x' doIt momentum u (k+1) x' -- | Compute the new momentum (gradient) vector. applyRegularization :: Double -- ^ Regularization parameter -> MVect -- ^ The parameters -> MVect -- ^ The current gradient -> IO () applyRegularization regParam params grad = do forM_ [0 .. UM.length grad - 1] $ \i -> do x <- UM.unsafeRead grad i y <- UM.unsafeRead params i UM.unsafeWrite grad i $ x - regParam * y -- | Compute the new momentum (gradient) vector. updateMomentum :: Double -- ^ The gamma parameter -> MVect -- ^ The previous momentum -> MVect -- ^ The scaled current gradient -> IO () updateMomentum gammaCoef momentum grad = do forM_ [0 .. UM.length momentum - 1] $ \i -> do x <- UM.unsafeRead momentum i y <- UM.unsafeRead grad i UM.unsafeWrite momentum i (gammaCoef * x + y) -- | Add up all gradients and store results in normal domain. addUp :: Grad -> MVect -> IO () addUp grad v = do UM.set v 0 forM_ (toList grad) $ \(i, x) -> do y <- UM.unsafeRead v i UM.unsafeWrite v i (x + y) -- | Scale the vector by the given value. scale :: Double -> MVect -> IO () scale c v = do forM_ [0 .. UM.length v - 1] $ \i -> do y <- UM.unsafeRead v i UM.unsafeWrite v i (c * y) -- | Apply gradient to the parameters vector, that is add the first vector to -- the second one. addTo :: MVect -> MVect -> IO () addTo w v = do forM_ [0 .. UM.length v - 1] $ \i -> do x <- UM.unsafeRead v i y <- UM.unsafeRead w i UM.unsafeWrite v i (x + y)