-- GENERATED by C->Haskell Compiler, version 0.28.6 Switcheroo, 25 November 2017 (Haskell)
-- Edit the ORIGNAL .chs file instead!


{-# LINE 1 "src/Foreign/CUDA/Driver/Exec.chs" #-}
{-# LANGUAGE BangPatterns             #-}
{-# LANGUAGE CPP                      #-}
{-# LANGUAGE EmptyDataDecls           #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GADTs                    #-}
{-# LANGUAGE TemplateHaskell          #-}
--------------------------------------------------------------------------------
-- |
-- Module    : Foreign.CUDA.Driver.Exec
-- Copyright : [2009..2018] Trevor L. McDonell
-- License   : BSD
--
-- Kernel execution control for low-level driver interface
--
--------------------------------------------------------------------------------

module Foreign.CUDA.Driver.Exec (

  -- * Kernel Execution
  Fun(..), FunParam(..), FunAttribute(..), SharedMem(..),
  requires,
  setCacheConfigFun,
  setSharedMemConfigFun,
  launchKernel, launchKernel',
  launchKernelCooperative,

  -- Deprecated since CUDA-4.0
  setBlockShape, setSharedSize, setParams, launch,

) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.Ptr as C2HSImp





{-# LINE 33 "src/Foreign/CUDA/Driver/Exec.chs" #-}


-- Friends
import Foreign.CUDA.Internal.C2HS
import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Driver.Context                      ( Cache(..), SharedMem(..) )
import Foreign.CUDA.Driver.Stream                       ( Stream(..), defaultStream )

-- System
import Foreign
import Foreign.C
import Data.Maybe
import Control.Monad                                    ( zipWithM_ )


{-# DEPRECATED setBlockShape, setSharedSize, setParams, launch
      "use launchKernel instead" #-}


--------------------------------------------------------------------------------
-- Data Types
--------------------------------------------------------------------------------

-- |
-- A @\_\_global\_\_@ device function
--
newtype Fun = Fun { Fun -> Ptr ()
useFun :: ((C2HSImp.Ptr ()))}


-- |
-- Function attributes
--
data FunAttribute = MaxKernelThreadsPerBlock
                  | SharedSizeBytes
                  | ConstSizeBytes
                  | LocalSizeBytes
                  | NumRegs
                  | PtxVersion
                  | BinaryVersion
                  | CacheModeCa
                  | MaxDynamicSharedSizeBytes
                  | PreferredSharedMemoryCarveout
                  | CU_FUNC_ATTRIBUTE_MAX
  deriving (FunAttribute -> FunAttribute -> Bool
(FunAttribute -> FunAttribute -> Bool)
-> (FunAttribute -> FunAttribute -> Bool) -> Eq FunAttribute
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: FunAttribute -> FunAttribute -> Bool
$c/= :: FunAttribute -> FunAttribute -> Bool
== :: FunAttribute -> FunAttribute -> Bool
$c== :: FunAttribute -> FunAttribute -> Bool
Eq,Int -> FunAttribute -> ShowS
[FunAttribute] -> ShowS
FunAttribute -> String
(Int -> FunAttribute -> ShowS)
-> (FunAttribute -> String)
-> ([FunAttribute] -> ShowS)
-> Show FunAttribute
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FunAttribute] -> ShowS
$cshowList :: [FunAttribute] -> ShowS
show :: FunAttribute -> String
$cshow :: FunAttribute -> String
showsPrec :: Int -> FunAttribute -> ShowS
$cshowsPrec :: Int -> FunAttribute -> ShowS
Show)
instance Enum FunAttribute where
  succ :: FunAttribute -> FunAttribute
succ FunAttribute
MaxKernelThreadsPerBlock = FunAttribute
SharedSizeBytes
  succ FunAttribute
SharedSizeBytes = FunAttribute
ConstSizeBytes
  succ FunAttribute
ConstSizeBytes = FunAttribute
LocalSizeBytes
  succ FunAttribute
LocalSizeBytes = FunAttribute
NumRegs
  succ FunAttribute
NumRegs = FunAttribute
PtxVersion
  succ FunAttribute
PtxVersion = FunAttribute
BinaryVersion
  succ FunAttribute
BinaryVersion = FunAttribute
CacheModeCa
  succ FunAttribute
CacheModeCa = FunAttribute
MaxDynamicSharedSizeBytes
  succ MaxDynamicSharedSizeBytes = FunAttribute
PreferredSharedMemoryCarveout
  succ FunAttribute
PreferredSharedMemoryCarveout = FunAttribute
CU_FUNC_ATTRIBUTE_MAX
  succ CU_FUNC_ATTRIBUTE_MAX = error String
"FunAttribute.succ: CU_FUNC_ATTRIBUTE_MAX has no successor"

  pred :: FunAttribute -> FunAttribute
poke :: Ptr FunParam -> FunParam -> IO ()
pred FunAttribute
SharedSizeBytes = FunAttribute
MaxKernelThreadsPerBlock
  pred FunAttribute
ConstSizeBytes = FunAttribute
SharedSizeBytes
  pred FunAttribute
LocalSizeBytes = FunAttribute
ConstSizeBytes
  pred FunAttribute
NumRegs = FunAttribute
LocalSizeBytes
  peek :: Ptr FunParam -> IO FunParam
pred FunAttribute
PtxVersion = FunAttribute
NumRegs
  pred FunAttribute
BinaryVersion = FunAttribute
PtxVersion
  pred FunAttribute
CacheModeCa = FunAttribute
BinaryVersion
  pred FunAttribute
MaxDynamicSharedSizeBytes = FunAttribute
CacheModeCa
  pred FunAttribute
PreferredSharedMemoryCarveout = FunAttribute
MaxDynamicSharedSizeBytes
  pred FunAttribute
CU_FUNC_ATTRIBUTE_MAX = FunAttribute
PreferredSharedMemoryCarveout
  pred FunAttribute
MaxKernelThreadsPerBlock = String -> FunAttribute
forall a. HasCallStack => String -> a
error String
"FunAttribute.pred: MaxKernelThreadsPerBlock has no predecessor"

  enumFromTo :: FunAttribute -> FunAttribute -> [FunAttribute]
enumFromTo FunAttribute
from FunAttribute
to = FunAttribute -> [FunAttribute]
go FunAttribute
from
    where
      end :: Int
end = FunAttribute -> Int
forall a. Enum a => a -> Int
fromEnum FunAttribute
to
      go :: FunAttribute -> [FunAttribute]
go FunAttribute
v = case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (FunAttribute -> Int
forall a. Enum a => a -> Int
fromEnum FunAttribute
v) Int
end of
                 LT -> v : go (succ v)
                 EQ -> [v]
                 GT -> []

  enumFrom from = enumFromTo from CU_FUNC_ATTRIBUTE_MAX

  fromEnum MaxKernelThreadsPerBlock = 0
  fromEnum SharedSizeBytes = 1
  fromEnum ConstSizeBytes = 2
  fromEnum LocalSizeBytes = 3
  fromEnum NumRegs = 4
  fromEnum PtxVersion = 5
  fromEnum BinaryVersion = 6
  fromEnum CacheModeCa = 7
  fromEnum MaxDynamicSharedSizeBytes = 8
  fromEnum PreferredSharedMemoryCarveout = 9
  fromEnum CU_FUNC_ATTRIBUTE_MAX = 10

  toEnum 0 = MaxKernelThreadsPerBlock
  toEnum 1 = SharedSizeBytes
  toEnum 2 = ConstSizeBytes
  toEnum 3 = LocalSizeBytes
  toEnum 4 = NumRegs
  toEnum 5 = PtxVersion
  toEnum 6 = BinaryVersion
  toEnum 7 = CacheModeCa
  toEnum 8 = MaxDynamicSharedSizeBytes
  toEnum 9 = PreferredSharedMemoryCarveout
  toEnum 10 = CU_FUNC_ATTRIBUTE_MAX
  toEnum unmatched = error ("FunAttribute.toEnum: Cannot match " ++ show unmatched)

{-# LINE 71 "src/Foreign/CUDA/Driver/Exec.chs" #-}


-- |
-- Kernel function parameters
--
data FunParam where
  IArg :: !Int32           -> FunParam
  FArg :: !Float           -> FunParam
  VArg :: Storable a => !a -> FunParam

instance Storable FunParam where
  sizeOf (IArg _)       = sizeOf (undefined :: CUInt)
  sizeOf (FArg _)       = sizeOf (undefined :: CFloat)
  sizeOf (VArg v)       = sizeOf v

  alignment (IArg _)    = alignment (undefined :: CUInt)
  alignment (FArg _)    = alignment (undefined :: CFloat)
  alignment (VArg v)    = alignment v

  poke p (IArg i)       = poke (castPtr p) i
  poke p (FArg f)       = poke (castPtr p) f
  poke p (VArg v)       = poke (castPtr p) v

  peek _                = error "Can not peek Foreign.CUDA.Driver.FunParam"


--------------------------------------------------------------------------------
-- Execution Control
--------------------------------------------------------------------------------

-- |
-- Returns the value of the selected attribute requirement for the given kernel.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXEC.html#group__CUDA__EXEC_1g5e92a1b0d8d1b82cb00dcfb2de15961b>
--
{-# INLINEABLE requires #-}
requires :: Fun -> FunAttribute -> IO Int
requires !fn !att = resultIfOk =<< cuFuncGetAttribute att fn

{-# INLINE cuFuncGetAttribute #-}
cuFuncGetAttribute :: (FunAttribute) -> (Fun) -> IO ((Status), (Int))
cuFuncGetAttribute a2 a3 =
  alloca $ \a1' -> 
  let {a2' = cFromEnum a2} in 
  let {a3' = useFun a3} in 
  cuFuncGetAttribute'_ a1' a2' a3' >>= \res ->
  let {res' = cToEnum res} in
  peekIntConv  a1'>>= \a1'' -> 
  return (res', a1'')

{-# LINE 114 "src/Foreign/CUDA/Driver/Exec.chs" #-}



-- |
-- On devices where the L1 cache and shared memory use the same hardware
-- resources, this sets the preferred cache configuration for the given device
-- function. This is only a preference; the driver is free to choose a different
-- configuration as required to execute the function.
--
-- Switching between configuration modes may insert a device-side
-- synchronisation point for streamed kernel launches.
--
-- Requires CUDA-3.0.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXEC.html#group__CUDA__EXEC_1g40f8c11e81def95dc0072a375f965681>
--
{-# INLINEABLE setCacheConfigFun #-}
setCacheConfigFun :: Fun -> Cache -> IO ()
setCacheConfigFun !fn !pref = nothingIfOk =<< cuFuncSetCacheConfig fn pref

{-# INLINE cuFuncSetCacheConfig #-}
cuFuncSetCacheConfig :: (Fun) -> (Cache) -> IO ((Status))
cuFuncSetCacheConfig a1 a2 =
  let {a1' = useFun a1} in 
  let {a2' = cFromEnum a2} in 
  cuFuncSetCacheConfig'_ a1' a2' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 141 "src/Foreign/CUDA/Driver/Exec.chs" #-}



-- |
-- Set the shared memory configuration of a device function.
--
-- On devices with configurable shared memory banks, this will force all
-- subsequent launches of the given device function to use the specified
-- shared memory bank size configuration. On launch of the function, the
-- shared memory configuration of the device will be temporarily changed if
-- needed to suit the function configuration. Changes in shared memory
-- configuration may introduction a device side synchronisation between
-- kernel launches.
--
-- Any per-function configuration specified by 'setSharedMemConfig' will
-- override the context-wide configuration set with
-- 'Foreign.CUDA.Driver.Context.Config.setSharedMem'.
--
-- Changing the shared memory bank size will not increase shared memory
-- usage or affect occupancy of kernels, but may have major effects on
-- performance. Larger bank sizes will allow for greater potential
-- bandwidth to shared memory, but will change what kinds of accesses to
-- shared memory will result in bank conflicts.
--
-- This function will do nothing on devices with fixed shared memory bank
-- size.
--
-- Requires CUDA-5.0.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXEC.html#group__CUDA__EXEC_1g430b913f24970e63869635395df6d9f5>
--
{-# INLINEABLE setSharedMemConfigFun #-}
setSharedMemConfigFun :: Fun -> SharedMem -> IO ()
setSharedMemConfigFun :: Fun -> SharedMem -> IO ()
setSharedMemConfigFun !Fun
fun !SharedMem
pref = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Fun -> SharedMem -> IO Status
cuFuncSetSharedMemConfig Fun
fun SharedMem
pref

{-# INLINE cuFuncSetSharedMemConfig #-}
cuFuncSetSharedMemConfig :: (Fun) -> (SharedMem) -> IO ((Status))
cuFuncSetSharedMemConfig :: Fun -> SharedMem -> IO Status
cuFuncSetSharedMemConfig Fun
a1 SharedMem
a2 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CInt
a2' = SharedMem -> CInt
forall e i. (Enum e, Integral i) => e -> i
cFromEnum SharedMem
a2} in 
  Ptr () -> CInt -> IO CInt
cuFuncSetSharedMemConfig'_ Ptr ()
a1' CInt
a2' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 185 "src/Foreign/CUDA/Driver/Exec.chs" #-}



-- |
-- Invoke a kernel on a @(gx * gy * gz)@ grid of blocks, where each block
-- contains @(tx * ty * tz)@ threads and has access to a given number of bytes
-- of shared memory. The launch may also be associated with a specific 'Stream'.
--
-- In 'launchKernel', the number of kernel parameters and their offsets and
-- sizes do not need to be specified, as this information is retrieved directly
-- from the kernel's image. This requires the kernel to have been compiled with
-- toolchain version 3.2 or later.
--
-- The alternative 'launchKernel'' will pass the arguments in directly,
-- requiring the application to know the size and alignment/padding of each
-- kernel parameter.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXEC.html#group__CUDA__EXEC_1gb8f3dc3031b40da29d5f9a7139e52e15>
--
{-# INLINEABLE launchKernel  #-}
{-# INLINEABLE launchKernel' #-}
launchKernel, launchKernel'
    :: Fun                      -- ^ function to execute
    -> (Int,Int,Int)            -- ^ block grid dimension
    -> (Int,Int,Int)            -- ^ thread block shape
    -> Int                      -- ^ shared memory (bytes)
    -> Maybe Stream             -- ^ (optional) stream to execute in
    -> [FunParam]               -- ^ list of function parameters
    -> IO ()
launchKernel :: Fun
-> (Int, Int, Int)
-> (Int, Int, Int)
-> Int
-> Maybe Stream
-> [FunParam]
-> IO ()
launchKernel !Fun
fn (!Int
gx,!Int
gy,!Int
gz) (!Int
tx,!Int
ty,!Int
tz) !Int
sm !Maybe Stream
mst ![FunParam]
args
  = (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
(=<<) Status -> IO ()
nothingIfOk
  (IO Status -> IO ()) -> IO Status -> IO ()
forall a b. (a -> b) -> a -> b
$ (FunParam -> (Ptr FunParam -> IO Status) -> IO Status)
-> [FunParam] -> ([Ptr FunParam] -> IO Status) -> IO Status
forall a b res.
(a -> (b -> res) -> res) -> [a] -> ([b] -> res) -> res
withMany FunParam -> (Ptr FunParam -> IO Status) -> IO Status
forall b. FunParam -> (Ptr FunParam -> IO b) -> IO b
withFP [FunParam]
args
  (([Ptr FunParam] -> IO Status) -> IO Status)
-> ([Ptr FunParam] -> IO Status) -> IO Status
forall a b. (a -> b) -> a -> b
$ \[Ptr FunParam]
pa -> [Ptr FunParam] -> (Ptr (Ptr FunParam) -> IO Status) -> IO Status
forall a b. Storable a => [a] -> (Ptr a -> IO b) -> IO b
withArray [Ptr FunParam]
pa
  ((Ptr (Ptr FunParam) -> IO Status) -> IO Status)
-> (Ptr (Ptr FunParam) -> IO Status) -> IO Status
forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr FunParam)
pp -> Fun
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> Ptr (Ptr FunParam)
-> Ptr (Ptr ())
-> IO Status
cuLaunchKernel Fun
fn Int
gx Int
gy Int
gz Int
tx Int
ty Int
tz Int
sm Stream
st Ptr (Ptr FunParam)
pp Ptr (Ptr ())
forall a. Ptr a
nullPtr
  where
    !st :: Stream
st = Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst

    withFP :: FunParam -> (Ptr FunParam -> IO b) -> IO b
    withFP :: FunParam -> (Ptr FunParam -> IO b) -> IO b
withFP !FunParam
p !Ptr FunParam -> IO b
f = case FunParam
p of
      IArg Int32
v -> Int32 -> (Ptr Int32 -> IO b) -> IO b
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with' Int32
v (Ptr FunParam -> IO b
f (Ptr FunParam -> IO b)
-> (Ptr Int32 -> Ptr FunParam) -> Ptr Int32 -> IO b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr Int32 -> Ptr FunParam
forall a b. Ptr a -> Ptr b
castPtr)
      FArg Float
v -> Float -> (Ptr Float -> IO b) -> IO b
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with' Float
v (Ptr FunParam -> IO b
f (Ptr FunParam -> IO b)
-> (Ptr Float -> Ptr FunParam) -> Ptr Float -> IO b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr Float -> Ptr FunParam
forall a b. Ptr a -> Ptr b
castPtr)
      VArg a
v -> a -> (Ptr a -> IO b) -> IO b
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with' a
v (Ptr FunParam -> IO b
f (Ptr FunParam -> IO b) -> (Ptr a -> Ptr FunParam) -> Ptr a -> IO b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr a -> Ptr FunParam
forall a b. Ptr a -> Ptr b
castPtr)

    -- can't use the standard 'with' because 'alloca' will pass an undefined
    -- dummy argument when determining 'sizeOf' and 'alignment', but sometimes
    -- instances in Accelerate need to evaluate this argument.
    --
    with' :: Storable a => a -> (Ptr a -> IO b) -> IO b
    with' :: a -> (Ptr a -> IO b) -> IO b
with' !a
val !Ptr a -> IO b
f =
      Int -> (Ptr a -> IO b) -> IO b
forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes (a -> Int
forall a. Storable a => a -> Int
sizeOf a
val) ((Ptr a -> IO b) -> IO b) -> (Ptr a -> IO b) -> IO b
forall a b. (a -> b) -> a -> b
$ \Ptr a
ptr -> do
        Ptr a -> a -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr a
ptr a
val
        Ptr a -> IO b
f Ptr a
ptr


launchKernel' :: Fun
-> (Int, Int, Int)
-> (Int, Int, Int)
-> Int
-> Maybe Stream
-> [FunParam]
-> IO ()
launchKernel' !Fun
fn (!Int
gx,!Int
gy,!Int
gz) (!Int
tx,!Int
ty,!Int
tz) !Int
sm !Maybe Stream
mst ![FunParam]
args
  = (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
(=<<) Status -> IO ()
nothingIfOk
  (IO Status -> IO ()) -> IO Status -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> (Ptr Int -> IO Status) -> IO Status
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with Int
bytes
  ((Ptr Int -> IO Status) -> IO Status)
-> (Ptr Int -> IO Status) -> IO Status
forall a b. (a -> b) -> a -> b
$ \Ptr Int
pb -> [FunParam] -> (Ptr FunParam -> IO Status) -> IO Status
withArray' [FunParam]
args
  ((Ptr FunParam -> IO Status) -> IO Status)
-> (Ptr FunParam -> IO Status) -> IO Status
forall a b. (a -> b) -> a -> b
$ \Ptr FunParam
pa -> Ptr () -> [Ptr ()] -> (Ptr (Ptr ()) -> IO Status) -> IO Status
forall a b. Storable a => a -> [a] -> (Ptr a -> IO b) -> IO b
withArray0 Ptr ()
forall a. Ptr a
nullPtr [Ptr ()
forall a. Ptr a
buffer, Ptr FunParam -> Ptr ()
forall a b. Ptr a -> Ptr b
castPtr Ptr FunParam
pa, Ptr ()
forall a. Ptr a
size, Ptr Int -> Ptr ()
forall a b. Ptr a -> Ptr b
castPtr Ptr Int
pb]
  ((Ptr (Ptr ()) -> IO Status) -> IO Status)
-> (Ptr (Ptr ()) -> IO Status) -> IO Status
forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr ())
pp -> Fun
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> Ptr (Ptr FunParam)
-> Ptr (Ptr ())
-> IO Status
cuLaunchKernel Fun
fn Int
gx Int
gy Int
gz Int
tx Int
ty Int
tz Int
sm Stream
st Ptr (Ptr FunParam)
forall a. Ptr a
nullPtr Ptr (Ptr ())
pp
  where
    buffer :: Ptr a
buffer      = WordPtr -> Ptr a
forall a. WordPtr -> Ptr a
wordPtrToPtr WordPtr
0x01     -- CU_LAUNCH_PARAM_BUFFER_POINTER
    size :: Ptr a
size        = WordPtr -> Ptr a
forall a. WordPtr -> Ptr a
wordPtrToPtr WordPtr
0x02     -- CU_LAUNCH_PARAM_BUFFER_SIZE
    bytes :: Int
bytes       = (Int -> FunParam -> Int) -> Int -> [FunParam] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\Int
a FunParam
x -> Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ FunParam -> Int
forall a. Storable a => a -> Int
sizeOf FunParam
x) Int
0 [FunParam]
args
    st :: Stream
st          = Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst

    -- can't use the standard 'withArray' because 'mallocArray' will pass
    -- 'undefined' to 'sizeOf' when determining how many bytes to allocate, but
    -- our Storable instance for FunParam needs to dispatch on each constructor,
    -- hence evaluating the undefined.
    --
    withArray' :: [FunParam] -> (Ptr FunParam -> IO Status) -> IO Status
withArray' ![FunParam]
vals !Ptr FunParam -> IO Status
f =
      Int -> (Ptr FunParam -> IO Status) -> IO Status
forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes Int
bytes ((Ptr FunParam -> IO Status) -> IO Status)
-> (Ptr FunParam -> IO Status) -> IO Status
forall a b. (a -> b) -> a -> b
$ \Ptr FunParam
ptr -> do
        Ptr FunParam -> [FunParam] -> IO ()
forall a. Storable a => Ptr a -> [a] -> IO ()
pokeArray Ptr FunParam
ptr [FunParam]
vals
        Ptr FunParam -> IO Status
f Ptr FunParam
ptr


{-# INLINE cuLaunchKernel #-}
cuLaunchKernel :: (Fun) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> (Ptr (Ptr FunParam)) -> (Ptr (Ptr ())) -> IO ((Status))
cuLaunchKernel :: Fun
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> Ptr (Ptr FunParam)
-> Ptr (Ptr ())
-> IO Status
cuLaunchKernel Fun
a1 Int
a2 Int
a3 Int
a4 Int
a5 Int
a6 Int
a7 Int
a8 Stream
a9 Ptr (Ptr FunParam)
a10 Ptr (Ptr ())
a11 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CUInt
a4' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  let {a5' :: CUInt
a5' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a5} in 
  let {a6' :: CUInt
a6' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a6} in 
  let {a7' :: CUInt
a7' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a7} in 
  let {a8' :: CUInt
a8' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a8} in 
  let {a9' :: Ptr ()
a9' = Stream -> Ptr ()
useStream Stream
a9} in 
  let {a10' :: Ptr (Ptr ())
a10' = Ptr (Ptr FunParam) -> Ptr (Ptr ())
forall a b. Ptr a -> Ptr b
castPtr Ptr (Ptr FunParam)
a10} in 
  let {a11' :: Ptr (Ptr ())
a11' = Ptr (Ptr ()) -> Ptr (Ptr ())
forall a b. Ptr a -> Ptr b
castPtr Ptr (Ptr ())
a11} in 
  Ptr ()
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> Ptr ()
-> Ptr (Ptr ())
-> Ptr (Ptr ())
-> IO CInt
cuLaunchKernel'_ Ptr ()
a1' CUInt
a2' CUInt
a3' CUInt
a4' CUInt
a5' CUInt
a6' CUInt
a7' CUInt
a8' Ptr ()
a9' Ptr (Ptr ())
a10' Ptr (Ptr ())
a11' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 271 "src/Foreign/CUDA/Driver/Exec.chs" #-}




-- |
-- Invoke a kernel on a @(gx * gy * gz)@ grid of blocks, where each block
-- contains @(tx * ty * tz)@ threads and has access to a given number of bytes
-- of shared memory. The launch may also be associated with a specific stream.
--
-- The thread blocks can cooperate and synchronise as they execute.
--
-- The device on which this kernel is invoked must have
-- 'Foreign.CUDA.Driver.Device.attribute'
-- 'Foreign.CUDA.Driver.Device.CooperativeLaunch'.
--
-- The total number of blocks launched can not exceed the maximum number of
-- active thread blocks per multiprocessor
-- ('Foreign.CUDA.Analysis.Device.threadBlocksPerMP'), multiplied by the number
-- of multiprocessors ('Foreign.CUDA.Analysis.Device.multiProcessorCount').
--
-- The kernel can not make use of dynamic parallelism.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXEC.html#group__CUDA__EXEC_1g06d753134145c4584c0c62525c1894cb>
--
-- Requires CUDA-9.0
--
-- @since 0.9.0.0@
--
{-# INLINEABLE launchKernelCooperative #-}
launchKernelCooperative
    :: Fun                      -- ^ function to execute
    -> (Int,Int,Int)            -- ^ block grid dimension
    -> (Int,Int,Int)            -- ^ thread block shape
    -> Int                      -- ^ shared memory (bytes)
    -> Maybe Stream             -- ^ (optional) stream to execute in
    -> [FunParam]               -- ^ list of function parameters
    -> IO ()
launchKernelCooperative :: Fun
-> (Int, Int, Int)
-> (Int, Int, Int)
-> Int
-> Maybe Stream
-> [FunParam]
-> IO ()
launchKernelCooperative !Fun
fn (!Int
gx,!Int
gy,!Int
gz) (!Int
tx,!Int
ty,!Int
tz) !Int
sm !Maybe Stream
mst ![FunParam]
args
  = (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
(=<<) Status -> IO ()
nothingIfOk
  (IO Status -> IO ()) -> IO Status -> IO ()
forall a b. (a -> b) -> a -> b
$ (FunParam -> (Ptr FunParam -> IO Status) -> IO Status)
-> [FunParam] -> ([Ptr FunParam] -> IO Status) -> IO Status
forall a b res.
(a -> (b -> res) -> res) -> [a] -> ([b] -> res) -> res
withMany FunParam -> (Ptr FunParam -> IO Status) -> IO Status
forall b. FunParam -> (Ptr FunParam -> IO b) -> IO b
withFP [FunParam]
args
  (([Ptr FunParam] -> IO Status) -> IO Status)
-> ([Ptr FunParam] -> IO Status) -> IO Status
forall a b. (a -> b) -> a -> b
$ \[Ptr FunParam]
pa -> [Ptr FunParam] -> (Ptr (Ptr FunParam) -> IO Status) -> IO Status
forall a b. Storable a => [a] -> (Ptr a -> IO b) -> IO b
withArray [Ptr FunParam]
pa
  ((Ptr (Ptr FunParam) -> IO Status) -> IO Status)
-> (Ptr (Ptr FunParam) -> IO Status) -> IO Status
forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr FunParam)
pp -> Fun
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> Ptr (Ptr FunParam)
-> IO Status
cuLaunchCooperativeKernel Fun
fn Int
gx Int
gy Int
gz Int
tx Int
ty Int
tz Int
sm Stream
st Ptr (Ptr FunParam)
pp
  where
    !st :: Stream
st = Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst

    withFP :: FunParam -> (Ptr FunParam -> IO b) -> IO b
    withFP :: FunParam -> (Ptr FunParam -> IO b) -> IO b
withFP !FunParam
p !Ptr FunParam -> IO b
f = case FunParam
p of
      IArg Int32
v -> Int32 -> (Ptr Int32 -> IO b) -> IO b
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with' Int32
v (Ptr FunParam -> IO b
f (Ptr FunParam -> IO b)
-> (Ptr Int32 -> Ptr FunParam) -> Ptr Int32 -> IO b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr Int32 -> Ptr FunParam
forall a b. Ptr a -> Ptr b
castPtr)
      FArg Float
v -> Float -> (Ptr Float -> IO b) -> IO b
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with' Float
v (Ptr FunParam -> IO b
f (Ptr FunParam -> IO b)
-> (Ptr Float -> Ptr FunParam) -> Ptr Float -> IO b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr Float -> Ptr FunParam
forall a b. Ptr a -> Ptr b
castPtr)
      VArg a
v -> a -> (Ptr a -> IO b) -> IO b
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with' a
v (Ptr FunParam -> IO b
f (Ptr FunParam -> IO b) -> (Ptr a -> Ptr FunParam) -> Ptr a -> IO b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr a -> Ptr FunParam
forall a b. Ptr a -> Ptr b
castPtr)

    -- can't use the standard 'with' because 'alloca' will pass an undefined
    -- dummy argument when determining 'sizeOf' and 'alignment', but sometimes
    -- instances in Accelerate need to evaluate this argument.
    --
    with' :: Storable a => a -> (Ptr a -> IO b) -> IO b
    with' :: a -> (Ptr a -> IO b) -> IO b
with' !a
val !Ptr a -> IO b
f =
      Int -> (Ptr a -> IO b) -> IO b
forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes (a -> Int
forall a. Storable a => a -> Int
sizeOf a
val) ((Ptr a -> IO b) -> IO b) -> (Ptr a -> IO b) -> IO b
forall a b. (a -> b) -> a -> b
$ \Ptr a
ptr -> do
        Ptr a -> a -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr a
ptr a
val
        Ptr a -> IO b
f Ptr a
ptr

{-# INLINE cuLaunchCooperativeKernel #-}
cuLaunchCooperativeKernel :: (Fun) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> (Ptr (Ptr FunParam)) -> IO ((Status))
cuLaunchCooperativeKernel :: Fun
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> Ptr (Ptr FunParam)
-> IO Status
cuLaunchCooperativeKernel Fun
a1 Int
a2 Int
a3 Int
a4 Int
a5 Int
a6 Int
a7 Int
a8 Stream
a9 Ptr (Ptr FunParam)
a10 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CUInt
a4' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  let {a5' :: CUInt
a5' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a5} in 
  let {a6' :: CUInt
a6' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a6} in 
  let {a7' :: CUInt
a7' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a7} in 
  let {a8' :: CUInt
a8' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a8} in 
  let {a9' :: Ptr ()
a9' = Stream -> Ptr ()
useStream Stream
a9} in 
  let {a10' :: Ptr (Ptr ())
a10' = Ptr (Ptr FunParam) -> Ptr (Ptr ())
forall a b. Ptr a -> Ptr b
castPtr Ptr (Ptr FunParam)
a10} in 
  Ptr ()
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> Ptr ()
-> Ptr (Ptr ())
-> IO CInt
cuLaunchCooperativeKernel'_ Ptr ()
a1' CUInt
a2' CUInt
a3' CUInt
a4' CUInt
a5' CUInt
a6' CUInt
a7' CUInt
a8' Ptr ()
a9' Ptr (Ptr ())
a10' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 353 "src/Foreign/CUDA/Driver/Exec.chs" #-}



--------------------------------------------------------------------------------
-- Deprecated
--------------------------------------------------------------------------------

-- |
-- Invoke the kernel on a size @(w,h)@ grid of blocks. Each block contains the
-- number of threads specified by a previous call to 'setBlockShape'. The launch
-- may also be associated with a specific 'Stream'.
--
{-# INLINEABLE launch #-}
launch :: Fun -> (Int,Int) -> Maybe Stream -> IO ()
launch :: Fun -> (Int, Int) -> Maybe Stream -> IO ()
launch !Fun
fn (!Int
w,!Int
h) Maybe Stream
mst =
  Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Fun -> Int -> Int -> Stream -> IO Status
cuLaunchGridAsync Fun
fn Int
w Int
h (Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst)

{-# INLINE cuLaunchGridAsync #-}
cuLaunchGridAsync :: (Fun) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuLaunchGridAsync :: Fun -> Int -> Int -> Stream -> IO Status
cuLaunchGridAsync Fun
a1 Int
a2 Int
a3 Stream
a4 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CInt
a2' = Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CInt
a3' = Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: Ptr ()
a4' = Stream -> Ptr ()
useStream Stream
a4} in 
  Ptr () -> CInt -> CInt -> Ptr () -> IO CInt
cuLaunchGridAsync'_ Ptr ()
a1' CInt
a2' CInt
a3' Ptr ()
a4' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 375 "src/Foreign/CUDA/Driver/Exec.chs" #-}



-- |
-- Specify the @(x,y,z)@ dimensions of the thread blocks that are created when
-- the given kernel function is launched.
--
{-# INLINEABLE setBlockShape #-}
setBlockShape :: Fun -> (Int,Int,Int) -> IO ()
setBlockShape :: Fun -> (Int, Int, Int) -> IO ()
setBlockShape !Fun
fn (!Int
x,!Int
y,!Int
z) = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Fun -> Int -> Int -> Int -> IO Status
cuFuncSetBlockShape Fun
fn Int
x Int
y Int
z

{-# INLINE cuFuncSetBlockShape #-}
cuFuncSetBlockShape :: (Fun) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuFuncSetBlockShape :: Fun -> Int -> Int -> Int -> IO Status
cuFuncSetBlockShape Fun
a1 Int
a2 Int
a3 Int
a4 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CInt
a2' = Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CInt
a3' = Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CInt
a4' = Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  Ptr () -> CInt -> CInt -> CInt -> IO CInt
cuFuncSetBlockShape'_ Ptr ()
a1' CInt
a2' CInt
a3' CInt
a4' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 391 "src/Foreign/CUDA/Driver/Exec.chs" #-}



-- |
-- Set the number of bytes of dynamic shared memory to be available to each
-- thread block when the function is launched
--
{-# INLINEABLE setSharedSize #-}
setSharedSize :: Fun -> Integer -> IO ()
setSharedSize :: Fun -> Integer -> IO ()
setSharedSize !Fun
fn !Integer
bytes = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Fun -> Integer -> IO Status
cuFuncSetSharedSize Fun
fn Integer
bytes

{-# INLINE cuFuncSetSharedSize #-}
cuFuncSetSharedSize :: (Fun) -> (Integer) -> IO ((Status))
cuFuncSetSharedSize :: Fun -> Integer -> IO Status
cuFuncSetSharedSize Fun
a1 Integer
a2 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CUInt
a2' = Integer -> CUInt
forall a. Num a => Integer -> a
fromInteger Integer
a2} in 
  Ptr () -> CUInt -> IO CInt
cuFuncSetSharedSize'_ Ptr ()
a1' CUInt
a2' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 405 "src/Foreign/CUDA/Driver/Exec.chs" #-}



-- |
-- Set the parameters that will specified next time the kernel is invoked
--
{-# INLINEABLE setParams #-}
setParams :: Fun -> [FunParam] -> IO ()
setParams :: Fun -> [FunParam] -> IO ()
setParams !Fun
fn ![FunParam]
prs = do
  (Int -> FunParam -> IO ()) -> [Int] -> [FunParam] -> IO ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (Fun -> Int -> FunParam -> IO ()
set Fun
fn) [Int]
offsets [FunParam]
prs
  Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Fun -> Int -> IO Status
cuParamSetSize Fun
fn ([Int] -> Int
forall a. [a] -> a
last [Int]
offsets)
  where
    offsets :: [Int]
offsets = (Int -> FunParam -> Int) -> Int -> [FunParam] -> [Int]
forall b a. (b -> a -> b) -> b -> [a] -> [b]
scanl (\Int
a FunParam
b -> Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ FunParam -> Int
size FunParam
b) Int
0 [FunParam]
prs

    size :: FunParam -> Int
size (IArg Int32
_)    = CUInt -> Int
forall a. Storable a => a -> Int
sizeOf (CUInt
forall a. HasCallStack => a
undefined :: CUInt)
    size (FArg Float
_)    = CFloat -> Int
forall a. Storable a => a -> Int
sizeOf (CFloat
forall a. HasCallStack => a
undefined :: CFloat)
    size (VArg a
v)    = a -> Int
forall a. Storable a => a -> Int
sizeOf a
v

    set :: Fun -> Int -> FunParam -> IO ()
set Fun
f Int
o (IArg Int32
v) = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Fun -> Int -> Int32 -> IO Status
cuParamSeti Fun
f Int
o Int32
v
    set Fun
f Int
o (FArg Float
v) = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Fun -> Int -> Float -> IO Status
cuParamSetf Fun
f Int
o Float
v
    set Fun
f Int
o (VArg a
v) = a -> (Ptr a -> IO ()) -> IO ()
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with a
v ((Ptr a -> IO ()) -> IO ()) -> (Ptr a -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Ptr a
p -> (Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Fun -> Int -> Ptr a -> Int -> IO Status
forall a. Storable a => Fun -> Int -> Ptr a -> Int -> IO Status
cuParamSetv Fun
f Int
o Ptr a
p (a -> Int
forall a. Storable a => a -> Int
sizeOf a
v))


{-# INLINE cuParamSetSize #-}
cuParamSetSize :: (Fun) -> (Int) -> IO ((Status))
cuParamSetSize :: Fun -> Int -> IO Status
cuParamSetSize Fun
a1 Int
a2 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  Ptr () -> CUInt -> IO CInt
cuParamSetSize'_ Ptr ()
a1' CUInt
a2' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 431 "src/Foreign/CUDA/Driver/Exec.chs" #-}


{-# INLINE cuParamSeti #-}
cuParamSeti :: (Fun) -> (Int) -> (Int32) -> IO ((Status))
cuParamSeti :: Fun -> Int -> Int32 -> IO Status
cuParamSeti Fun
a1 Int
a2 Int32
a3 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CInt
a2' = Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int32 -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
a3} in 
  Ptr () -> CInt -> CUInt -> IO CInt
cuParamSeti'_ Ptr ()
a1' CInt
a2' CUInt
a3' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 437 "src/Foreign/CUDA/Driver/Exec.chs" #-}


{-# INLINE cuParamSetf #-}
cuParamSetf :: (Fun) -> (Int) -> (Float) -> IO ((Status))
cuParamSetf :: Fun -> Int -> Float -> IO Status
cuParamSetf Fun
a1 Int
a2 Float
a3 =
  let {a1' :: Ptr ()
a1' = Fun -> Ptr ()
useFun Fun
a1} in 
  let {a2' :: CInt
a2' = Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CFloat
a3' = Float -> CFloat
CFloat Float
a3} in 
  Ptr () -> CInt -> CFloat -> IO CInt
cuParamSetf'_ Ptr ()
a1' CInt
a2' CFloat
a3' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 443 "src/Foreign/CUDA/Driver/Exec.chs" #-}


{-# INLINE cuParamSetv #-}
cuParamSetv :: Storable a => (Fun) -> (Int) -> (Ptr a) -> (Int) -> IO ((Status))
cuParamSetv a1 a2 a3 a4 =
  let {a1' = useFun a1} in 
  let {a2' = fromIntegral a2} in 
  let {a3' = castPtr a3} in 
  let {a4' = fromIntegral a4} in 
  cuParamSetv'_ a1' a2' a3' a4' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 451 "src/Foreign/CUDA/Driver/Exec.chs" #-}




foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncGetAttribute"
  cuFuncGetAttribute'_ :: ((C2HSImp.Ptr C2HSImp.CInt) -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetCacheConfig"
  cuFuncSetCacheConfig'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetSharedMemConfig"
  cuFuncSetSharedMemConfig'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuLaunchKernel"
  cuLaunchKernel'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr ()) -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (IO C2HSImp.CInt))))))))))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuLaunchCooperativeKernel"
  cuLaunchCooperativeKernel'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr ()) -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (IO C2HSImp.CInt)))))))))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuLaunchGridAsync"
  cuLaunchGridAsync'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetBlockShape"
  cuFuncSetBlockShape'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CInt -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetSharedSize"
  cuFuncSetSharedSize'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetSize"
  cuParamSetSize'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSeti"
  cuParamSeti'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CUInt -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetf"
  cuParamSetf'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CFloat -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetv"
  cuParamSetv'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))))