// ------------------------------------------------------------- // cuDPP -- CUDA Data Parallel Primitives library // ------------------------------------------------------------- // $Revision: 5633 $ // $Date: 2009-07-01 15:02:51 +1000 (Wed, 01 Jul 2009) $ // ------------------------------------------------------------- // This source code is distributed under the terms of license.txt // in the root directory of this source distribution. // ------------------------------------------------------------- /** * @file * sharedmem.h * * @brief Shared memory declaration struct for templatized types. * * Because dynamically sized shared memory arrays are declared "extern" in CUDA, * we can't templatize their types directly. To get around this, we declare a * simple wrapper struct that will declare the extern array with a different * name depending on the type. This avoids linker errors about multiple * definitions. * * To use dynamically allocated shared memory in a templatized __global__ or * __device__ function, just replace code like this: * *
 *  template
 *  __global__ void
 *  foo( T* d_out, T* d_in)
 *  {
 *      // Shared mem size is determined by the host app at run time
 *      extern __shared__  T sdata[];
 *      ...
 *      doStuff(sdata);
 *      ...
 *  }
 * 
* * With this *
 *  template
 *  __global__ void
 *  foo( T* d_out, T* d_in)
 *  {
 *      // Shared mem size is determined by the host app at run time
 *      SharedMemory smem;
 *      T* sdata = smem.getPointer();
 *      ...
 *      doStuff(sdata);
 *      ...
 *  }
 * 
*/ #ifndef __SHARED_MEM_H__ #define __SHARED_MEM_H__ /** @brief Wrapper class for templatized dynamic shared memory arrays. * * This struct uses template specialization on the type \a T to declare * a differently named dynamic shared memory array for each type * (\code extern __shared__ T s_type[] \endcode). * * Currently there are specializations for the following types: * \c int, \c uint, \c char, \c uchar, \c short, \c ushort, \c long, * \c unsigned long, \c bool, \c float, and \c double. One can also specialize it * for user defined types. */ template struct SharedMemory { /** Return a pointer to the runtime-sized shared memory array. **/ __device__ T* getPointer() { extern __device__ void Error_UnsupportedType(); // Ensure that we won't compile any un-specialized types Error_UnsupportedType(); return (T*)0; } // TODO: Use operator overloading to make this class look like a regular array }; // Following are the specializations for the following types. // int, uint, char, uchar, short, ushort, long, ulong, bool, float, and double // One could also specialize it for user-defined types. #define SPEC_SHAREDMEM(T, name) \ template <> struct SharedMemory \ { \ __device__ T* getPointer() { extern __shared__ T s_##name[]; return s_##name; } \ } SPEC_SHAREDMEM(int, int); SPEC_SHAREDMEM(char, char); SPEC_SHAREDMEM(long, long); SPEC_SHAREDMEM(short, short); SPEC_SHAREDMEM(bool, bool); SPEC_SHAREDMEM(float, float); SPEC_SHAREDMEM(double, double); SPEC_SHAREDMEM(unsigned int, uint); SPEC_SHAREDMEM(unsigned char, uchar); SPEC_SHAREDMEM(unsigned long, ulong); SPEC_SHAREDMEM(unsigned short, ushort); SPEC_SHAREDMEM(uchar4, uchar4); #undef SPEC_SHAREDMEM #endif // __SHARED_MEM_H__ // Leave this at the end of the file // Local Variables: // mode:c++ // c-file-style: "NVIDIA" // End: