------------------------------------------------------------ -- | -- Module : Data.NeuralNetwork.Backend.BLASHS.Utils -- Description : A backend for neuralnetwork with blas-hs. -- Copyright : (c) 2016 Jiasen Wu -- License : BSD-style (see the file LICENSE) -- Maintainer : Jiasen Wu -- Stability : experimental -- Portability : portable -- -- -- This module supplies a high level abstraction of the rather -- low-level blas-hs interfaces. ------------------------------------------------------------ {-# LANGUAGE BangPatterns, TypeFamilies, TypeOperators, FlexibleInstances, FlexibleContexts, GADTs #-} module Data.NeuralNetwork.Backend.BLASHS.Layers( SinglVec, MultiMat, F, C, A, P, T, S, RunLayer(..), newFLayer, newCLayer ) where import qualified Data.Vector as V import System.Random.MWC import System.Random.MWC.Distributions import Control.Monad.ST import Control.Monad (liftM2, forM_, when) import GHC.Float import Data.STRef import Data.NeuralNetwork import Data.NeuralNetwork.Backend.BLASHS.Utils import Data.NeuralNetwork.Backend.BLASHS.SIMD type R = Float type M = IO -- | We parameterise the activation layer T, where the parameter indicates how -- elements are contained: data SinglVec data MultiMat -- | tag for the full-connect component data F -- | tag for the convolution component data C -- | tag for the component that converts 2D as 1D data A -- | tag for the max-pooling component data P -- | tag for the activation component data T c -- | tag for the stacking component data S a b -- | basic components of neural network data RunLayer :: * -> * where -- | Densely connected layer -- input: vector of size m -- output: vector of size n -- weights: matrix of size m x n -- biases: vector of size n Full :: !(DenseMatrix R) -> !(DenseVector R) -> RunLayer F -- | Convolutional layer -- input: channels of 2D floats, of the same size (a x b), # of input channels: m -- output: channels of 2D floats, of the same size (c x d), # of output channels: n -- where c = a + 2*padding + 1 - s -- d = b + 2*padding + 1 - t -- feature: matrix of (s x t), # of features: m x n -- padding: number of 0s padded at each side of channel -- biases: bias for each output, # of biases: n Conv :: !(V.Vector (DenseMatrixArray R)) -> !(V.Vector R) -> Int -> RunLayer C -- | Reshape from channels of matrix to a single vector -- input: m channels of 2D matrices -- assuming that all matrices are of the same size a x b -- output: 1D vector of the concatenation of all input channels -- its size: m x a x b As1D :: RunLayer A -- | max pooling layer -- input: channels of 2D floats, of the same size (a x b), # of input channels: m -- assuming that a and b are both multiple of stride -- output: channels of 2D floats, of the same size (c x d), # of output channels: m -- where c = a / stride -- d = b / stride MaxP :: Int -> RunLayer P -- | Activator -- the input can be either a 1D vector, 2D matrix, or channels of either. Activation :: (SIMDPACK R -> SIMDPACK R, SIMDPACK R -> SIMDPACK R) -> RunLayer (T c) -- | stacking two components a and b -- the output of a should matches the input of b Stack :: !(RunLayer a) -> !(RunLayer b) -> RunLayer (S a b) instance Component (RunLayer F) where type Run (RunLayer F) = IO type Inp (RunLayer F) = DenseVector R type Out (RunLayer F) = DenseVector R -- trace is (input, weighted-sum) newtype Trace (RunLayer F) = DTrace (DenseVector R, DenseVector R) forwardT (Full !w !b) !inp = do bv <- newDenseVectorCopy b bv <<+ inp :<# w return $ DTrace (inp,bv) output (DTrace (_,!a)) = a backward (Full !w !b) (DTrace (!iv,!bv)) !odelta rate = do -- back-propagated error at input idelta <- newDenseVector (fst $ size w) idelta <<= w :#> odelta -- odelta is not used any more, so we reuse it for an intermediate value. odelta <<= Scale (negate rate) w <<+ iv :## odelta b <<= b :.+ odelta return (Full w b, idelta) instance Component (RunLayer A) where type Run (RunLayer A) = IO type Inp (RunLayer A) = V.Vector (DenseMatrix R) type Out (RunLayer A) = DenseVector R -- trace keeps information of (m, a, b, output) newtype Trace (RunLayer A) = ReshapeTrace (Int, Int, Int, DenseVector R) forwardT _ !inp = do let !b = V.length inp (!r,!c) = size (V.head inp) o <- denseVectorConcat $ V.map m2v inp return $ ReshapeTrace (b, r, c, o) output (ReshapeTrace (_,_,_,a)) = a backward a (ReshapeTrace (b,r,c,_)) !odelta _ = do let !idelta = V.map (v2m r c) $ denseVectorSplit b (r*c) odelta return $ (a, idelta) instance Component (RunLayer C) where type Run (RunLayer C) = IO type Inp (RunLayer C) = V.Vector (DenseMatrix R) type Out (RunLayer C) = V.Vector (DenseMatrix R) -- trace is (input, convoluted output) newtype Trace (RunLayer C) = CTrace (Inp (RunLayer C), Out (RunLayer C)) forwardT (Conv fss bs pd) !inp = do ma <- newDenseMatrixArray outn outr outc V.zipWithM_ (\fs i -> corr2 pd (denseMatrixArrayToVector fs) i (ma <<+)) fss inp let ov = denseMatrixArrayToVector ma V.zipWithM_ (\m b -> m <<= Apply (plus (konst b))) ov bs return $ CTrace (inp, ov) where outn = V.length bs (outr,outc) = let (x,y) = size (V.head inp) (_,u,v) = size (V.head fss) in (x+2*pd-u+1, y+2*pd-v+1) output (CTrace (_,!a)) = a backward (Conv fss bs pd) (CTrace (iv, av)) !odelta rate = do let (ir,ic) = size (V.head iv) idelta <- newDenseMatrixArray (V.length iv) ir ic fss' <- transpose fss -- a + 2p - k + 1 = b -- b + 2q - a + 1 = a -- ------------------- -- q = k - p -- where -- a = |i|, k = |f|, b = |o| let qd = let (kr,_) = size (V.head $ V.head fss') in kr-1-pd V.zipWithM_ (\fs d -> conv2 qd fs d (idelta <<+)) fss' odelta !nb <- V.zipWithM (\b d -> do s <- sumElements d return $ b + negate rate * s ) bs odelta -- when updating kernels, it originally should be -- conv2 pd iv od. But we use the equalivalent form -- conv2 (|od|-|iv|+pd) od iv. Because there are typically -- more output channels than input. -- a + 2p - b + 1 = c -- b + 2q - a + 1 = c -- ------------------ -- q = a - b + p -- where -- a = |o|, b = |i|, c = |f| let qd = let (or,_) = size (V.head odelta) in or - ir + pd V.zipWithM_ (\fs i -> do -- i: one input channel -- fs: all features used for chn corr2 qd odelta i ((fs <<+) . Scale' (negate rate)) ) fss iv let !ideltaV = denseMatrixArrayToVector idelta return $ (Conv fss nb pd, ideltaV) instance (Component (RunLayer a), Component (RunLayer b), Run (RunLayer a) ~ IO, Run (RunLayer b) ~ IO, Out (RunLayer a) ~ Inp (RunLayer b) ) => Component (RunLayer (S a b)) where type Run (RunLayer (S a b)) = IO type Inp (RunLayer (S a b)) = Inp (RunLayer a) type Out (RunLayer (S a b)) = Out (RunLayer b) newtype Trace (RunLayer (S a b)) = TTrace (Trace (RunLayer b), Trace (RunLayer a)) forwardT (Stack a b) !i = do !tra <- forwardT a i !trb <- forwardT b (output tra) return $ TTrace (trb, tra) output (TTrace !a) = output (fst a) backward (Stack a b) (TTrace (!trb,!tra)) !odeltb rate = do (b', !odelta) <- backward b trb odeltb rate (a', !idelta) <- backward a tra odelta rate return (Stack a' b', idelta) instance Component (RunLayer (T SinglVec)) where type Run (RunLayer (T SinglVec)) = IO type Inp (RunLayer (T SinglVec)) = DenseVector R type Out (RunLayer (T SinglVec)) = DenseVector R newtype Trace (RunLayer (T SinglVec)) = TTraceS (DenseVector R, DenseVector R) forwardT (Activation (af,_)) !inp = do out <- newDenseVectorCopy inp out <<= Apply af return $ TTraceS (inp, out) output (TTraceS (_,!a)) = a backward a@(Activation (_,ag)) (TTraceS (!iv,_)) !odelta _ = do idelta <- newDenseVectorCopy iv idelta <<= Apply ag idelta <<= odelta :.* idelta return $ (a, idelta) instance Component (RunLayer (T MultiMat)) where type Run (RunLayer (T MultiMat)) = IO type Inp (RunLayer (T MultiMat)) = V.Vector (DenseMatrix R) type Out (RunLayer (T MultiMat)) = V.Vector (DenseMatrix R) newtype Trace (RunLayer (T MultiMat)) = TTraceM (V.Vector (DenseMatrix R), V.Vector (DenseMatrix R)) forwardT (Activation (af,_)) !inp = do out <- V.mapM (\i -> do o <- newDenseMatrixCopy i o <<= Apply af return o ) inp return $ TTraceM (inp, out) output (TTraceM (_,!a)) = a backward a@(Activation (_,ag)) (TTraceM (!iv,_)) !odelta _ = do idelta <- V.zipWithM (\i d -> do o <- newDenseMatrixCopy i o <<= Apply ag o <<= d :.* o return o ) iv odelta return $ (a, idelta) instance Component (RunLayer P) where type Run (RunLayer P) = IO type Inp (RunLayer P) = V.Vector (DenseMatrix R) type Out (RunLayer P) = V.Vector (DenseMatrix R) -- trace is (dimension of pools, index of max in each pool, pooled matrix) -- for each channel. newtype Trace (RunLayer P) = PTrace (V.Vector ((Int,Int), DenseVector Int, DenseMatrix R)) -- forward is to divide the input matrix in stride x stride sub matrices, -- and then find the max element in each sub matrices. forwardT (MaxP stride) !inp = V.mapM mk inp >>= return . PTrace where mk inp = do (!i,!v) <- pool stride inp return (size v, i, v) output (PTrace a) = V.map (\(_,_,!o) ->o) a -- use the saved index-of-max in each pool to propagate the error. backward l@(MaxP stride) (PTrace t) odelta _ = do !idelta <- V.zipWithM gen t odelta return $ (l, idelta) where gen (!si,!iv,_) od = unpool stride iv od -- | create a new full connect component newFLayer :: Int -- ^ number of input values -> Int -- ^ number of neurons (output values) -> IO (RunLayer F) -- ^ the new layer newFLayer m n = withSystemRandom . asGenIO $ \gen -> do raw <- newDenseVectorByGen (double2Float <$> normal 0 0.01 gen) (m*n) let w = v2m m n raw b <- newDenseVectorConst n 1 return $ Full w b -- | create a new convolutional component newCLayer :: Int -- ^ number of input channels -> Int -- ^ number of output channels -> Int -- ^ size of each feature -> Int -- ^ size of padding -> IO (RunLayer C) -- ^ the new layer newCLayer inpsize outsize sfilter npadding = withSystemRandom . asGenIO $ \gen -> do fss <- V.replicateM inpsize $ do raw <- newDenseVectorByGen (double2Float <$> truncNormal 0 0.1 gen) (outsize*sfilter*sfilter) return $ v2ma outsize sfilter sfilter raw bs <- return $ V.replicate outsize 0.1 return $ Conv fss bs npadding where truncNormal m s g = do x <- standard g if x >= 2.0 || x <= -2.0 then truncNormal m s g else return $! m + s * x