// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision: 5632 $
// $Date: 2009-07-01 14:36:01 +1000 (Wed, 01 Jul 2009) $
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
// -------------------------------------------------------------

/**
 * @file
 * cudpp_globals.h
 *
 * @brief Global declarations defining machine characteristics of GPU target
 * These are currently set for best performance on G8X GPUs.  The optimal
 * parameters may change on future GPUs. In the future, we hope to make
 * CUDPP a self-tuning library.
 */

#ifndef __CUDPP_GLOBALS_H__
#define __CUDPP_GLOBALS_H__

const int NUM_BANKS = 16;                        /**< Number of shared memory banks */
const int LOG_NUM_BANKS = 4;                     /**< log_2(NUM_BANKS) */
const int CTA_SIZE = 128;                        /**< Number of threads in a CTA */
const int WARP_SIZE = 32;                        /**< Number of threads in a warp */
const int LOG_CTA_SIZE = 7;                      /**< log_2(CTA_SIZE) */
const int LOG_WARP_SIZE = 5;                     /**< log_2(WARP_SIZE) */
const int LOG_SIZEOF_FLOAT = 2;                  /**< log_2(sizeof(float)) */
const int SCAN_ELTS_PER_THREAD = 8;              /**< Number of elements per scan thread */
const int SEGSCAN_ELTS_PER_THREAD = 8;     /**< Number of elements per segmented scan thread */

const int maxSharedMemoryPerBlock = 16384; /**< Number of bytes of shared
                                              memory in each block */
const int maxThreadsPerBlock = CTA_SIZE;   /**< Maximum number of
                                             * threads in a CTA */

#define AVOID_BANK_CONFLICTS /**< Set if by default, we want our
                              * shared memory allocation to perform
                              * additional computation to avoid bank
                              * conflicts */

#ifdef AVOID_BANK_CONFLICTS
#define CONFLICT_FREE_OFFSET(index) ((index) >> LOG_NUM_BANKS)
#else
#define CONFLICT_FREE_OFFSET(index) (0)
#endif

#endif // __CUDPP_GLOBALS_H__

// Leave this at the end of the file
// Local Variables:
// mode:c++
// c-file-style: "NVIDIA"
// End: