cuda-0.7.0.0: FFI binding to the CUDA interface for programming NVIDIA GPUs

Copyright[2009..2014] Trevor L. McDonell
LicenseBSD
Safe HaskellNone
LanguageHaskell98

Foreign.CUDA.Runtime.Device

Contents

Description

Device management routines

Synopsis

Device Management

type Device = Int Source

A device identifier

data DeviceProperties Source

The properties of a compute device

Constructors

DeviceProperties 

Fields

deviceName :: !String

Identifier

computeCapability :: !Compute

Supported compute capability

totalGlobalMem :: !Int64

Available global memory on the device in bytes

totalConstMem :: !Int64

Available constant memory on the device in bytes

sharedMemPerBlock :: !Int64

Available shared memory per block in bytes

regsPerBlock :: !Int

32-bit registers per block

warpSize :: !Int

Warp size in threads (SIMD width)

maxThreadsPerBlock :: !Int

Maximum number of threads per block

maxThreadsPerMultiProcessor :: !Int

Maximum number of threads per multiprocessor

maxBlockSize :: !(Int, Int, Int)

Maximum size of each dimension of a block

maxGridSize :: !(Int, Int, Int)

Maximum size of each dimension of a grid

maxTextureDim1D :: !Int

Maximum texture dimensions

maxTextureDim2D :: !(Int, Int)
 
maxTextureDim3D :: !(Int, Int, Int)
 
clockRate :: !Int

Clock frequency in kilohertz

multiProcessorCount :: !Int

Number of multiprocessors on the device

memPitch :: !Int64

Maximum pitch in bytes allowed by memory copies

memBusWidth :: !Int

Global memory bus width in bits

memClockRate :: !Int

Peak memory clock frequency in kilohertz

textureAlignment :: !Int64

Alignment requirement for textures

computeMode :: !ComputeMode
 
deviceOverlap :: !Bool

Device can concurrently copy memory and execute a kernel

concurrentKernels :: !Bool

Device can possibly execute multiple kernels concurrently

eccEnabled :: !Bool

Device supports and has enabled error correction

asyncEngineCount :: !Int

Number of asynchronous engines

cacheMemL2 :: !Int

Size of the L2 cache in bytes

pciInfo :: !PCI

PCI device information for the device

tccDriverEnabled :: !Bool

Whether this is a Tesla device using the TCC driver

kernelExecTimeoutEnabled :: !Bool

Whether there is a runtime limit on kernels

integrated :: !Bool

As opposed to discrete

canMapHostMemory :: !Bool

Device can use pinned memory

unifiedAddressing :: !Bool

Device shares a unified address space with the host

streamPriorities :: !Bool

Device supports stream priorities

globalL1Cache :: !Bool

Device supports caching globals in L1 cache

localL1Cache :: !Bool

Device supports caching locals in L1 cache

managedMemory :: !Bool

Device supports allocating managed memory on this system

multiGPUBoard :: !Bool

Device is on a multi-GPU board

multiGPUBoardGroupID :: !Int

Unique identifier for a group of devices associated with the same board

data ComputeMode Source

The compute mode the device is currently in

Instances

Enum ComputeMode Source

GPU compute capability, major and minor revision number respectively.

Eq ComputeMode Source 
Show ComputeMode Source 

choose :: DeviceProperties -> IO Device Source

Select the compute device which best matches the given criteria

get :: IO Device Source

Returns which device is currently being used

count :: IO Int Source

Returns the number of devices available for execution, with compute capability >= 1.0

props :: Device -> IO DeviceProperties Source

Return information about the selected compute device

set :: Device -> IO () Source

Set device to be used for GPU execution

setFlags :: [DeviceFlag] -> IO () Source

Set flags to be used for device executions

setOrder :: [Device] -> IO () Source

Set list of devices for CUDA execution in priority order

reset :: IO () Source

Explicitly destroys and cleans up all runtime resources associated with the current device in the current process. Any subsequent API call will reinitialise the device.

Note that this function will reset the device immediately. It is the caller’s responsibility to ensure that the device is not being accessed by any other host threads from the process when this function is called.

sync :: IO () Source

Block until the device has completed all preceding requested tasks. Returns an error if one of the tasks fails.

Peer Access

data PeerFlag Source

Possible option values for direct peer memory access

Instances

accessible :: Device -> Device -> IO Bool Source

Queries if the first device can directly access the memory of the second. If direct access is possible, it can then be enabled with add. Requires cuda-4.0.

add :: Device -> [PeerFlag] -> IO () Source

If the devices of both the current and supplied contexts support unified addressing, then enable allocations in the supplied context to be accessible by the current context. Requires cuda-4.0.

remove :: Device -> IO () Source

Disable direct memory access from the current context to the supplied context. Requires cuda-4.0.

Cache Configuration

getLimit :: Limit -> IO Int Source

Query compute 2.0 call stack limits. Requires cuda-3.1.

setLimit :: Limit -> Int -> IO () Source

Set compute 2.0 call stack limits. Requires cuda-3.1.