// ------------------------------------------------------------- // cuDPP -- CUDA Data Parallel Primitives library // ------------------------------------------------------------- // $Revision: 5633 $ // $Date: 2009-07-01 15:02:51 +1000 (Wed, 01 Jul 2009) $ // ------------------------------------------------------------- // This source code is distributed under the terms of license.txt // in the root directory of this source distribution. // ------------------------------------------------------------- /** * @file * sharedmem.h * * @brief Shared memory declaration struct for templatized types. * * Because dynamically sized shared memory arrays are declared "extern" in CUDA, * we can't templatize their types directly. To get around this, we declare a * simple wrapper struct that will declare the extern array with a different * name depending on the type. This avoids linker errors about multiple * definitions. * * To use dynamically allocated shared memory in a templatized __global__ or * __device__ function, just replace code like this: * *
* template* * With this ** __global__ void * foo( T* d_out, T* d_in) * { * // Shared mem size is determined by the host app at run time * extern __shared__ T sdata[]; * ... * doStuff(sdata); * ... * } *
* template*/ #ifndef __SHARED_MEM_H__ #define __SHARED_MEM_H__ /** @brief Wrapper class for templatized dynamic shared memory arrays. * * This struct uses template specialization on the type \a T to declare * a differently named dynamic shared memory array for each type * (\code extern __shared__ T s_type[] \endcode). * * Currently there are specializations for the following types: * \c int, \c uint, \c char, \c uchar, \c short, \c ushort, \c long, * \c unsigned long, \c bool, \c float, and \c double. One can also specialize it * for user defined types. */ template* __global__ void * foo( T* d_out, T* d_in) * { * // Shared mem size is determined by the host app at run time * SharedMemory smem; * T* sdata = smem.getPointer(); * ... * doStuff(sdata); * ... * } *