{-# LANGUAGE GADTs, CPP, ForeignFunctionInterface, EmptyDataDecls #-} -------------------------------------------------------------------------------- -- | -- Module : Foreign.CUDA.Driver.Exec -- Copyright : (c) [2009..2011] Trevor L. McDonell -- License : BSD -- -- Kernel execution control for low-level driver interface -- -------------------------------------------------------------------------------- module Foreign.CUDA.Driver.Exec ( -- * Kernel Execution Fun(Fun), FunParam(..), FunAttribute(..), CacheConfig(..), requires, setBlockShape, setSharedSize, setParams, setCacheConfigFun, launch, launchKernel ) where #include {# context lib="cuda" #} -- Friends import Foreign.CUDA.Internal.C2HS import Foreign.CUDA.Driver.Error import Foreign.CUDA.Driver.Stream (Stream(..)) import Foreign.CUDA.Driver.Texture (Texture(..)) -- System import Foreign import Foreign.C import Data.Maybe import Control.Monad (zipWithM_) #if CUDA_VERSION >= 3020 {-# DEPRECATED TArg "as of CUDA version 3.2" #-} #endif #if CUDA_VERSION >= 4000 {-# DEPRECATED setBlockShape, setSharedSize, setParams, launch "use launchKernel instead" #-} #endif -------------------------------------------------------------------------------- -- Data Types -------------------------------------------------------------------------------- -- | -- A @__global__@ device function -- newtype Fun = Fun { useFun :: {# type CUfunction #}} -- | -- Function attributes -- {# enum CUfunction_attribute as FunAttribute { underscoreToCase , MAX_THREADS_PER_BLOCK as MaxKernelThreadsPerBlock } with prefix="CU_FUNC_ATTRIBUTE" deriving (Eq, Show) #} -- | -- Cache configuration preference -- #if CUDA_VERSION < 3000 data CacheConfig #else {# enum CUfunc_cache_enum as CacheConfig { underscoreToCase } with prefix="CU_FUNC_CACHE_PREFER" deriving (Eq, Show) #} #endif -- | -- Kernel function parameters -- data FunParam where IArg :: Int -> FunParam FArg :: Float -> FunParam TArg :: Texture -> FunParam VArg :: Storable a => a -> FunParam -------------------------------------------------------------------------------- -- Execution Control -------------------------------------------------------------------------------- -- | -- Returns the value of the selected attribute requirement for the given kernel -- requires :: Fun -> FunAttribute -> IO Int requires fn att = resultIfOk =<< cuFuncGetAttribute att fn {# fun unsafe cuFuncGetAttribute { alloca- `Int' peekIntConv* , cFromEnum `FunAttribute' , useFun `Fun' } -> `Status' cToEnum #} -- | -- Specify the @(x,y,z)@ dimensions of the thread blocks that are created when -- the given kernel function is launched. -- setBlockShape :: Fun -> (Int,Int,Int) -> IO () setBlockShape fn (x,y,z) = nothingIfOk =<< cuFuncSetBlockShape fn x y z {# fun unsafe cuFuncSetBlockShape { useFun `Fun' , `Int' , `Int' , `Int' } -> `Status' cToEnum #} -- | -- Set the number of bytes of dynamic shared memory to be available to each -- thread block when the function is launched -- setSharedSize :: Fun -> Integer -> IO () setSharedSize fn bytes = nothingIfOk =<< cuFuncSetSharedSize fn bytes {# fun unsafe cuFuncSetSharedSize { useFun `Fun' , cIntConv `Integer' } -> `Status' cToEnum #} -- | -- On devices where the L1 cache and shared memory use the same hardware -- resources, this sets the preferred cache configuration for the given device -- function. This is only a preference; the driver is free to choose a different -- configuration as required to execute the function. -- -- Switching between configuration modes may insert a device-side -- synchronisation point for streamed kernel launches. -- setCacheConfigFun :: Fun -> CacheConfig -> IO () #if CUDA_VERSION < 3000 setCacheConfigFun _ _ = requireSDK 3.0 "setCacheConfigFun" #else setCacheConfigFun fn pref = nothingIfOk =<< cuFuncSetCacheConfig fn pref {# fun unsafe cuFuncSetCacheConfig { useFun `Fun' , cFromEnum `CacheConfig' } -> `Status' cToEnum #} #endif -- | -- Invoke the kernel on a size @(w,h)@ grid of blocks. Each block contains the -- number of threads specified by a previous call to 'setBlockShape'. The launch -- may also be associated with a specific 'Stream'. -- launch :: Fun -> (Int,Int) -> Maybe Stream -> IO () launch fn (w,h) mst = nothingIfOk =<< case mst of Nothing -> cuLaunchGridAsync fn w h (Stream nullPtr) Just st -> cuLaunchGridAsync fn w h st {# fun unsafe cuLaunchGridAsync { useFun `Fun' , `Int' , `Int' , useStream `Stream' } -> `Status' cToEnum #} -- | -- Invoke a kernel on a @(gx * gy * gz)@ grid of blocks, where each block -- contains @(tx * ty * tz)@ threads and has access to a given number of bytes -- of shared memory. The launch may also be associated with a specific 'Stream'. -- launchKernel :: Fun -- ^ function to execute -> (Int,Int,Int) -- ^ block grid dimension -> (Int,Int,Int) -- ^ thread block shape -> Int -- ^ shared memory (bytes) -> Maybe Stream -- ^ (optional) stream to execute in -> [FunParam] -- ^ list of function parameters -> IO () #if CUDA_VERSION >= 4000 launchKernel fn (gx,gy,gz) (tx,ty,tz) sm mst args = (=<<) nothingIfOk $ withMany withFP args $ \pa -> withArray pa $ \pp -> cuLaunchKernel fn gx gy gz tx ty tz sm st pp nullPtr where st = fromMaybe (Stream nullPtr) mst withFP :: FunParam -> (Ptr FunParam -> IO b) -> IO b withFP p f = case p of IArg v -> with v (f . castPtr) FArg v -> with v (f . castPtr) VArg v -> with v (f . castPtr) TArg _ -> error "launchKernel: TArg is deprecated" {# fun unsafe cuLaunchKernel { useFun `Fun' , `Int', `Int', `Int' , `Int', `Int', `Int' , `Int' , useStream `Stream' , castPtr `Ptr (Ptr FunParam)' , castPtr `Ptr (Ptr ())' } -> `Status' cToEnum #} #else launchKernel fn (gx,gy,_) (tx,ty,tz) sm mst args = do setParams fn args setSharedSize fn (toInteger sm) setBlockShape fn (tx,ty,tz) launch fn (gx,gy) mst #endif -------------------------------------------------------------------------------- -- Kernel function parameters -------------------------------------------------------------------------------- -- | -- Set the parameters that will specified next time the kernel is invoked -- setParams :: Fun -> [FunParam] -> IO () setParams fn prs = do zipWithM_ (set fn) offsets prs nothingIfOk =<< cuParamSetSize fn (last offsets) where offsets = scanl (\a b -> a + size b) 0 prs size (IArg _) = sizeOf (undefined::CUInt) size (FArg _) = sizeOf (undefined::CFloat) size (TArg _) = 0 size (VArg v) = sizeOf v set f o (IArg v) = nothingIfOk =<< cuParamSeti f o v set f o (FArg v) = nothingIfOk =<< cuParamSetf f o v set f _ (TArg v) = nothingIfOk =<< cuParamSetTexRef f (-1) v set f o (VArg v) = with v $ \p -> (nothingIfOk =<< cuParamSetv f o p (sizeOf v)) {# fun unsafe cuParamSetSize { useFun `Fun' , `Int' } -> `Status' cToEnum #} {# fun unsafe cuParamSeti { useFun `Fun' , `Int' , `Int' } -> `Status' cToEnum #} {# fun unsafe cuParamSetf { useFun `Fun' , `Int' , `Float' } -> `Status' cToEnum #} {# fun unsafe cuParamSetv `Storable a' => { useFun `Fun' , `Int' , castPtr `Ptr a' , `Int' } -> `Status' cToEnum #} {# fun unsafe cuParamSetTexRef { useFun `Fun' , `Int' -- must be CU_PARAM_TR_DEFAULT (-1) , useTexture `Texture' } -> `Status' cToEnum #}