{-# LANGUAGE RecordWildCards #-} import Control.Applicative ((<$>), (<*>)) import Control.Monad (replicateM) import System.IO (hSetBuffering, stdout, BufferMode (NoBuffering)) import qualified System.Random as R import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U import qualified Numeric.SGD as S ------------------------------------------------------------------------------ -- Dataset generation ------------------------------------------------------------------------------ -- | Element of a dataset. type Elem = [(Int, Double)] -- | Random dataset element. elemR :: Int -- ^ Maximum number of element items -> (Int, Int) -- ^ Range for item's first component -> (Double, Double) -- ^ Range for item's second component -> IO Elem -- ^ Result elemR nMax xr yr = do n <- R.randomRIO (0, max 0 nMax) replicateM n ((,) <$> R.randomRIO xr <*> R.randomRIO yr) -- | Random dataset. dataSetR :: Int -- ^ Dataset size -> Int -- ^ Number of model parameters -> Int -- ^ Maximum number of items in data element -> (Double, Double) -- ^ Range for item's second component -> IO (V.Vector Elem) -- ^ Result dataSetR m n k yRan = V.fromList <$> replicateM m (elemR k (0, n-1) yRan) ------------------------------------------------------------------------------ -- Objective function and gradient ------------------------------------------------------------------------------ -- | An objective function. The SGD method can be used when -- the objective function is defined in a form of a sum. goal :: S.Para -> [Elem] -> Double goal para = sum . map perElem where perElem xs = sum [ (para U.! k - x) ^ (2 :: Int) | (k, x) <- xs ] -- | Since the goal function has a form of a sum, it is sufficient to define -- the gradient over one element only. The gradient with respect to the dataset -- is a sum of gradients over its individual elements. grad :: S.Para -> Elem -> S.Grad grad para xs = S.fromList -- [ (k, 2 * (x - para U.! k)) [ (k, 2 * (para U.! k - x)) | (k, x) <- xs ] -- | Negate gradient. We use it to find the minimum of the objective function. negGrad :: (S.Para -> Elem -> S.Grad) -> (S.Para -> Elem -> S.Grad) negGrad g para x = fmap negate (g para x) ------------------------------------------------------------------------------ -- SGD ------------------------------------------------------------------------------ -- | Notification run by the sgdM function every parameters update. notify :: S.SgdArgs -> V.Vector Elem -> S.Para -> Int -> IO () notify S.SgdArgs{..} dataSet para k = if doneTotal k /= doneTotal (k - 1) then do let n = doneTotal k x = goal para (V.toList dataSet) putStrLn ("\n" ++ "[" ++ show n ++ "] f = " ++ show x) else putStr "." where doneTotal :: Int -> Int doneTotal = floor . done done :: Int -> Double done i = fromIntegral (i * batchSize) / fromIntegral (V.length dataSet) -- | Run the monadic version of SGD. runSgdM :: Int -- ^ Dataset size -> Int -- ^ Number of model parameters -> Int -- ^ Maximum number of items in data element -> S.SgdArgs -- ^ SGD parameters -> IO S.Para runSgdM m n k sgdArgs = do dataSet <- dataSetR m n k (-10, 10) let para = U.replicate n 0 hSetBuffering stdout NoBuffering S.sgdM sgdArgs (notify sgdArgs dataSet) (negGrad grad) dataSet para -- | Run the monadic version of SGD with some default parameter values. main = do let sgdArgs = S.sgdArgsDefault { S.iterNum = 50 } runSgdM 1000 1000000 10 sgdArgs