cuda- FFI binding to the CUDA interface for programming NVIDIA GPUs

Copyright[2009..2014] Trevor L. McDonell
Safe HaskellNone




Device management routines


Device Management

type Device = Int Source

A device identifier

data DeviceProperties Source

The properties of a compute device




deviceName :: !String


computeCapability :: !Compute

Supported compute capability

totalGlobalMem :: !Int64

Available global memory on the device in bytes

totalConstMem :: !Int64

Available constant memory on the device in bytes

sharedMemPerBlock :: !Int64

Available shared memory per block in bytes

regsPerBlock :: !Int

32-bit registers per block

warpSize :: !Int

Warp size in threads (SIMD width)

maxThreadsPerBlock :: !Int

Max number of threads per block

maxThreadsPerMultiProcessor :: !Int

Max number of threads per multiprocessor

maxBlockSize :: !(Int, Int, Int)

Max size of each dimension of a block

maxGridSize :: !(Int, Int, Int)

Max size of each dimension of a grid

maxTextureDim1D :: !Int

Maximum texture dimensions

maxTextureDim2D :: !(Int, Int)
maxTextureDim3D :: !(Int, Int, Int)
clockRate :: !Int

Clock frequency in kilohertz

multiProcessorCount :: !Int

Number of multiprocessors on the device

memPitch :: !Int64

Max pitch in bytes allowed by memory copies

memBusWidth :: !Int

Global memory bus width in bits

memClockRate :: !Int

Peak memory clock frequency in kilohertz

textureAlignment :: !Int64

Alignment requirement for textures

computeMode :: !ComputeMode
deviceOverlap :: !Bool

Device can concurrently copy memory and execute a kernel

concurrentKernels :: !Bool

Device can possibly execute multiple kernels concurrently

eccEnabled :: !Bool

Device supports and has enabled error correction

asyncEngineCount :: !Int

Number of asynchronous engines

cacheMemL2 :: !Int

Size of the L2 cache in bytes

tccDriverEnabled :: !Bool

Whether this is a Tesla device using the TCC driver

pciInfo :: !PCI

PCI device information for the device

kernelExecTimeoutEnabled :: !Bool

Whether there is a runtime limit on kernels

integrated :: !Bool

As opposed to discrete

canMapHostMemory :: !Bool

Device can use pinned memory

unifiedAddressing :: !Bool

Device shares a unified address space with the host

data Compute Source


Compute !Int !Int 

data ComputeMode Source

The compute mode the device is currently in


Enum ComputeMode

GPU compute capability, major and minor revision number respectively.

Eq ComputeMode 
Show ComputeMode 

choose :: DeviceProperties -> IO Device Source

Select the compute device which best matches the given criteria

get :: IO Device Source

Returns which device is currently being used

count :: IO Int Source

Returns the number of devices available for execution, with compute capability >= 1.0

props :: Device -> IO DeviceProperties Source

Return information about the selected compute device

set :: Device -> IO () Source

Set device to be used for GPU execution

setFlags :: [DeviceFlag] -> IO () Source

Set flags to be used for device executions

setOrder :: [Device] -> IO () Source

Set list of devices for CUDA execution in priority order

reset :: IO () Source

Explicitly destroys and cleans up all runtime resources associated with the current device in the current process. Any subsequent API call will reinitialise the device.

Note that this function will reset the device immediately. It is the caller’s responsibility to ensure that the device is not being accessed by any other host threads from the process when this function is called.

sync :: IO () Source

Block until the device has completed all preceding requested tasks. Returns an error if one of the tasks fails.

Peer Access

data PeerFlag Source

Possible option values for direct peer memory access


accessible :: Device -> Device -> IO Bool Source

Queries if the first device can directly access the memory of the second. If direct access is possible, it can then be enabled with add. Requires cuda-4.0.

add :: Device -> [PeerFlag] -> IO () Source

If the devices of both the current and supplied contexts support unified addressing, then enable allocations in the supplied context to be accessible by the current context. Requires cuda-4.0.

remove :: Device -> IO () Source

Disable direct memory access from the current context to the supplied context. Requires cuda-4.0.

Cache Configuration

getLimit :: Limit -> IO Int Source

Query compute 2.0 call stack limits. Requires cuda-3.1.

setLimit :: Limit -> Int -> IO () Source

Set compute 2.0 call stack limits. Requires cuda-3.1.