-------------------------------------------------------------------------------- -- | -- Module : Foreign.CUDA.Analysis.Device -- Copyright : (c) [2009..2012] Trevor L. McDonell -- License : BSD -- -- Common device functions -- -------------------------------------------------------------------------------- module Foreign.CUDA.Analysis.Device ( Compute(..), ComputeMode(..), DeviceProperties(..), DeviceResources(..), Allocation(..), PCI(..), deviceResources ) where #include import Data.Int import Debug.Trace -- | -- The compute mode the device is currently in -- {# enum CUcomputemode as ComputeMode { underscoreToCase } with prefix="CU_COMPUTEMODE" deriving (Eq, Show) #} -- | -- GPU compute capability, major and minor revision number respectively. -- data Compute = Compute !Int !Int deriving Eq instance Show Compute where show (Compute major minor) = show major ++ "." ++ show minor instance Ord Compute where compare (Compute m1 n1) (Compute m2 n2) = case compare m1 m2 of EQ -> compare n1 n2 x -> x {-- cap :: Int -> Int -> Double cap a 0 = fromIntegral a cap a b = let a' = fromIntegral a in let b' = fromIntegral b in a' + b' / max 10 (10^ ((ceiling . logBase 10) b' :: Int)) --} -- | -- The properties of a compute device -- data DeviceProperties = DeviceProperties { deviceName :: !String, -- ^ Identifier computeCapability :: !Compute, -- ^ Supported compute capability totalGlobalMem :: !Int64, -- ^ Available global memory on the device in bytes totalConstMem :: !Int64, -- ^ Available constant memory on the device in bytes sharedMemPerBlock :: !Int64, -- ^ Available shared memory per block in bytes regsPerBlock :: !Int, -- ^ 32-bit registers per block warpSize :: !Int, -- ^ Warp size in threads (SIMD width) maxThreadsPerBlock :: !Int, -- ^ Max number of threads per block #if CUDA_VERSION >= 4000 maxThreadsPerMultiProcessor :: !Int, -- ^ Max number of threads per multiprocessor #endif maxBlockSize :: !(Int,Int,Int), -- ^ Max size of each dimension of a block maxGridSize :: !(Int,Int,Int), -- ^ Max size of each dimension of a grid #if CUDA_VERSION >= 3000 maxTextureDim1D :: !Int, -- ^ Maximum texture dimensions maxTextureDim2D :: !(Int,Int), maxTextureDim3D :: !(Int,Int,Int), #endif clockRate :: !Int, -- ^ Clock frequency in kilohertz multiProcessorCount :: !Int, -- ^ Number of multiprocessors on the device memPitch :: !Int64, -- ^ Max pitch in bytes allowed by memory copies #if CUDA_VERSION >= 4000 memBusWidth :: !Int, -- ^ Global memory bus width in bits memClockRate :: !Int, -- ^ Peak memory clock frequency in kilohertz #endif textureAlignment :: !Int64, -- ^ Alignment requirement for textures computeMode :: !ComputeMode, deviceOverlap :: !Bool, -- ^ Device can concurrently copy memory and execute a kernel #if CUDA_VERSION >= 3000 concurrentKernels :: !Bool, -- ^ Device can possibly execute multiple kernels concurrently eccEnabled :: !Bool, -- ^ Device supports and has enabled error correction #endif #if CUDA_VERSION >= 4000 asyncEngineCount :: !Int, -- ^ Number of asynchronous engines cacheMemL2 :: !Int, -- ^ Size of the L2 cache in bytes tccDriverEnabled :: !Bool, -- ^ Whether this is a Tesla device using the TCC driver pciInfo :: !PCI, -- ^ PCI device information for the device #endif kernelExecTimeoutEnabled :: !Bool, -- ^ Whether there is a runtime limit on kernels integrated :: !Bool, -- ^ As opposed to discrete #if CUDA_VERSION >= 4000 canMapHostMemory :: !Bool, -- ^ Device can use pinned memory unifiedAddressing :: !Bool -- ^ Device shares a unified address space with the host #else canMapHostMemory :: !Bool -- ^ Device can use pinned memory #endif } deriving (Show) data PCI = PCI { busID :: !Int, -- ^ PCI bus ID of the device deviceID :: !Int, -- ^ PCI device ID domainID :: !Int -- ^ PCI domain ID } deriving (Show) -- GPU Hardware Resources -- data Allocation = Warp | Block data DeviceResources = DeviceResources { threadsPerWarp :: !Int, -- ^ Warp size threadsPerMP :: !Int, -- ^ Maximum number of in-flight threads on a multiprocessor threadBlocksPerMP :: !Int, -- ^ Maximum number of thread blocks resident on a multiprocessor warpsPerMP :: !Int, -- ^ Maximum number of in-flight warps per multiprocessor coresPerMP :: !Int, -- ^ Number of SIMD arithmetic units per multiprocessor sharedMemPerMP :: !Int, -- ^ Total amount of shared memory per multiprocessor (bytes) sharedMemAllocUnit :: !Int, -- ^ Shared memory allocation unit size (bytes) regFileSize :: !Int, -- ^ Total number of registers in a multiprocessor regAllocUnit :: !Int, -- ^ Register allocation unit size regAllocWarp :: !Int, -- ^ Register allocation granularity for warps allocation :: !Allocation -- ^ How multiprocessor resources are divided } -- | -- Extract some additional hardware resource limitations for a given device. -- deviceResources :: DeviceProperties -> DeviceResources deviceResources = resources . computeCapability where -- This is mostly extracted from tables in the CUDA occupancy calculator. -- resources compute = case compute of Compute 1 0 -> DeviceResources 32 768 8 24 8 16384 512 8192 256 2 Block -- Tesla G80 Compute 1 1 -> DeviceResources 32 768 8 24 8 16384 512 8192 256 2 Block -- Tesla G8x Compute 1 2 -> DeviceResources 32 1024 8 32 8 16384 512 16384 512 2 Block -- Tesla G9x Compute 1 3 -> DeviceResources 32 1024 8 32 8 16384 512 16384 512 2 Block -- Tesla GT200 Compute 2 0 -> DeviceResources 32 1536 8 48 32 49152 128 32768 64 2 Warp -- Fermi GF100 Compute 2 1 -> DeviceResources 32 1536 8 48 48 49152 128 32768 64 2 Warp -- Fermi GF10x Compute 3 0 -> DeviceResources 32 2048 16 64 192 49152 256 65536 256 4 Warp -- Kepler GK10x Compute 3 5 -> DeviceResources 32 2048 16 64 192 49152 256 65536 256 4 Warp -- Kepler GK11x -- Something might have gone wrong, or the library just needs to be -- updated for the next generation of hardware, in which case we just want -- to pick a sensible default and carry on. -- -- This is slightly dodgy as the warning message is coming from pure code. -- However, it should be OK because all library functions run in IO, so it -- is likely the user code is as well. -- _ -> trace warning $ resources (Compute 3 0) where warning = unlines [ "*** Warning: unknown CUDA device compute capability: " ++ show compute , "*** Please submit a bug report at https://github.com/tmcdonell/cuda/issues" ]