{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE EmptyDataDecls #-} {-# LANGUAGE ForeignFunctionInterface #-} -------------------------------------------------------------------------------- -- | -- Module : Foreign.CUDA.Driver.Context -- Copyright : (c) [2009..2012] Trevor L. McDonell -- License : BSD -- -- Context management for low-level driver interface -- -------------------------------------------------------------------------------- module Foreign.CUDA.Driver.Context ( -- * Context Management Context(..), ContextFlag(..), create, attach, detach, destroy, device, pop, push, sync, get, set, -- * Peer Access PeerFlag, accessible, add, remove, -- * Cache Configuration Cache(..), Limit(..), getLimit, setLimit, setCacheConfig ) where #include #include "cbits/stubs.h" {# context lib="cuda" #} -- Friends import Foreign.CUDA.Driver.Device (Device(..)) import Foreign.CUDA.Driver.Error import Foreign.CUDA.Internal.C2HS -- System import Foreign import Foreign.C import Control.Monad (liftM) -------------------------------------------------------------------------------- -- Data Types -------------------------------------------------------------------------------- -- | -- A device context -- newtype Context = Context { useContext :: {# type CUcontext #}} deriving (Eq, Show) -- | -- Context creation flags -- {# enum CUctx_flags as ContextFlag { underscoreToCase } with prefix="CU_CTX" deriving (Eq, Show) #} -- | -- Device limits flags -- #if CUDA_VERSION < 3010 data Limit #else {# enum CUlimit_enum as Limit { underscoreToCase } with prefix="CU_LIMIT" deriving (Eq, Show) #} #endif -- | -- Device cache configuration preference -- #if CUDA_VERSION < 3000 data Cache #else {# enum CUfunc_cache_enum as Cache { underscoreToCase } with prefix="CU_FUNC_CACHE" deriving (Eq, Show) #} #endif -- | -- Possible option values for direct peer memory access -- data PeerFlag instance Enum PeerFlag where #if CUDA_VERSION >= 4000 {-# DEPRECATED attach, detach "deprecated as of CUDA-4.0" #-} {-# DEPRECATED BlockingSync "use SchedBlockingSync instead" #-} #endif -------------------------------------------------------------------------------- -- Context management -------------------------------------------------------------------------------- -- | -- Create a new CUDA context and associate it with the calling thread -- {-# INLINEABLE create #-} create :: Device -> [ContextFlag] -> IO Context create !dev !flags = resultIfOk =<< cuCtxCreate flags dev {-# INLINE cuCtxCreate #-} {# fun unsafe cuCtxCreate { alloca- `Context' peekCtx* , combineBitMasks `[ContextFlag]' , useDevice `Device' } -> `Status' cToEnum #} where peekCtx = liftM Context . peek -- | -- Increments the usage count of the context. API: no context flags are -- currently supported, so this parameter must be empty. -- {-# INLINEABLE attach #-} attach :: Context -> [ContextFlag] -> IO () attach !ctx !flags = nothingIfOk =<< cuCtxAttach ctx flags {-# INLINE cuCtxAttach #-} {# fun unsafe cuCtxAttach { withCtx* `Context' , combineBitMasks `[ContextFlag]' } -> `Status' cToEnum #} where withCtx = with . useContext -- | -- Detach the context, and destroy if no longer used -- {-# INLINEABLE detach #-} detach :: Context -> IO () detach !ctx = nothingIfOk =<< cuCtxDetach ctx {-# INLINE cuCtxDetach #-} {# fun unsafe cuCtxDetach { useContext `Context' } -> `Status' cToEnum #} -- | -- Destroy the specified context. This fails if the context is more than a -- single attachment (including that from initial creation). -- {-# INLINEABLE destroy #-} destroy :: Context -> IO () destroy !ctx = nothingIfOk =<< cuCtxDestroy ctx {-# INLINE cuCtxDestroy #-} {# fun unsafe cuCtxDestroy { useContext `Context' } -> `Status' cToEnum #} -- | -- Return the context bound to the calling CPU thread. Requires cuda-4.0. -- {-# INLINEABLE get #-} get :: IO Context #if CUDA_VERSION < 4000 get = requireSDK 4.0 "get" #else get = resultIfOk =<< cuCtxGetCurrent {-# INLINE cuCtxGetCurrent #-} {# fun unsafe cuCtxGetCurrent { alloca- `Context' peekCtx* } -> `Status' cToEnum #} where peekCtx = liftM Context . peek #endif -- | -- Bind the specified context to the calling thread. Requires cuda-4.0. -- {-# INLINEABLE set #-} set :: Context -> IO () #if CUDA_VERSION < 4000 set _ = requireSDK 4.0 "set" #else set !ctx = nothingIfOk =<< cuCtxSetCurrent ctx {-# INLINE cuCtxSetCurrent #-} {# fun unsafe cuCtxSetCurrent { useContext `Context' } -> `Status' cToEnum #} #endif -- | -- Return the device of the currently active context -- {-# INLINEABLE device #-} device :: IO Device device = resultIfOk =<< cuCtxGetDevice {-# INLINE cuCtxGetDevice #-} {# fun unsafe cuCtxGetDevice { alloca- `Device' dev* } -> `Status' cToEnum #} where dev = liftM Device . peekIntConv -- | -- Pop the current CUDA context from the CPU thread. The context must have a -- single usage count (matching calls to 'attach' and 'detach'). If successful, -- the new context is returned, and the old may be attached to a different CPU. -- {-# INLINEABLE pop #-} pop :: IO Context pop = resultIfOk =<< cuCtxPopCurrent {-# INLINE cuCtxPopCurrent #-} {# fun unsafe cuCtxPopCurrent { alloca- `Context' peekCtx* } -> `Status' cToEnum #} where peekCtx = liftM Context . peek -- | -- Push the given context onto the CPU's thread stack of current contexts. The -- context must be floating (via 'pop'), i.e. not attached to any thread. -- {-# INLINEABLE push #-} push :: Context -> IO () push !ctx = nothingIfOk =<< cuCtxPushCurrent ctx {-# INLINE cuCtxPushCurrent #-} {# fun unsafe cuCtxPushCurrent { useContext `Context' } -> `Status' cToEnum #} -- | -- Block until the device has completed all preceding requests -- {-# INLINEABLE sync #-} sync :: IO () sync = nothingIfOk =<< cuCtxSynchronize {-# INLINE cuCtxSynchronize #-} {# fun unsafe cuCtxSynchronize { } -> `Status' cToEnum #} -------------------------------------------------------------------------------- -- Peer access -------------------------------------------------------------------------------- -- | -- Queries if the first device can directly access the memory of the second. If -- direct access is possible, it can then be enabled with 'add'. Requires -- cuda-4.0. -- {-# INLINEABLE accessible #-} accessible :: Device -> Device -> IO Bool #if CUDA_VERSION < 4000 accessible _ _ = requireSDK 4.0 "accessible" #else accessible !dev !peer = resultIfOk =<< cuDeviceCanAccessPeer dev peer {-# INLINE cuDeviceCanAccessPeer #-} {# fun unsafe cuDeviceCanAccessPeer { alloca- `Bool' peekBool* , useDevice `Device' , useDevice `Device' } -> `Status' cToEnum #} #endif -- | -- If the devices of both the current and supplied contexts support unified -- addressing, then enable allocations in the supplied context to be accessible -- by the current context. Requires cuda-4.0. -- {-# INLINEABLE add #-} add :: Context -> [PeerFlag] -> IO () #if CUDA_VERSION < 4000 add _ _ = requireSDK 4.0 "add" #else add !ctx !flags = nothingIfOk =<< cuCtxEnablePeerAccess ctx flags {-# INLINE cuCtxEnablePeerAccess #-} {# fun unsafe cuCtxEnablePeerAccess { useContext `Context' , combineBitMasks `[PeerFlag]' } -> `Status' cToEnum #} #endif -- | -- Disable direct memory access from the current context to the supplied -- context. Requires cuda-4.0. -- {-# INLINEABLE remove #-} remove :: Context -> IO () #if CUDA_VERSION < 4000 remove _ = requireSDK 4.0 "remove" #else remove !ctx = nothingIfOk =<< cuCtxDisablePeerAccess ctx {-# INLINE cuCtxDisablePeerAccess #-} {# fun unsafe cuCtxDisablePeerAccess { useContext `Context' } -> `Status' cToEnum #} #endif -------------------------------------------------------------------------------- -- Cache configuration -------------------------------------------------------------------------------- -- | -- Query compute 2.0 call stack limits. Requires cuda-3.1. -- {-# INLINEABLE getLimit #-} getLimit :: Limit -> IO Int #if CUDA_VERSION < 3010 getLimit _ = requireSDK 3.1 "getLimit" #else getLimit !l = resultIfOk =<< cuCtxGetLimit l {-# INLINE cuCtxGetLimit #-} {# fun unsafe cuCtxGetLimit { alloca- `Int' peekIntConv* , cFromEnum `Limit' } -> `Status' cToEnum #} #endif -- | -- Specify the size of the call stack, for compute 2.0 devices. Requires -- cuda-3.1. -- {-# INLINEABLE setLimit #-} setLimit :: Limit -> Int -> IO () #if CUDA_VERSION < 3010 setLimit _ _ = requireSDK 3.1 "setLimit" #else setLimit !l !n = nothingIfOk =<< cuCtxSetLimit l n {-# INLINE cuCtxSetLimit #-} {# fun unsafe cuCtxSetLimit { cFromEnum `Limit' , cIntConv `Int' } -> `Status' cToEnum #} #endif -- | -- On devices where the L1 cache and shared memory use the same hardware -- resources, this sets the preferred cache configuration for the current -- context. This is only a preference. Requires cuda-3.2. -- {-# INLINEABLE setCacheConfig #-} setCacheConfig :: Cache -> IO () #if CUDA_VERSION < 3020 setCacheConfig _ = requireSDK 3.2 "setCacheConfig" #else setCacheConfig !c = nothingIfOk =<< cuCtxSetCacheConfig c {-# INLINE cuCtxSetCacheConfig #-} {# fun unsafe cuCtxSetCacheConfig { cFromEnum `Cache' } -> `Status' cToEnum #} #endif