{-# LINE 1 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE EmptyDataDecls #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE TemplateHaskell #-}
module Foreign.CUDA.Driver.Exec (
Fun(Fun), FunParam(..), FunAttribute(..), SharedMem(..),
requires,
setCacheConfigFun,
setSharedMemConfigFun,
launchKernel, launchKernel',
setBlockShape, setSharedSize, setParams, launch,
) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.Ptr as C2HSImp
{-# LINE 32 "./Foreign/CUDA/Driver/Exec.chs" #-}
import Foreign.CUDA.Internal.C2HS
import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Driver.Context ( Cache(..), SharedMem(..) )
import Foreign.CUDA.Driver.Stream ( Stream(..), defaultStream )
import Foreign
import Foreign.C
import Data.Maybe
import Control.Monad ( zipWithM_ )
{-# DEPRECATED setBlockShape, setSharedSize, setParams, launch
"use launchKernel instead" #-}
newtype Fun = Fun { useFun :: ((C2HSImp.Ptr ()))}
data FunAttribute = MaxKernelThreadsPerBlock
| SharedSizeBytes
| ConstSizeBytes
| LocalSizeBytes
| NumRegs
| PtxVersion
| BinaryVersion
| CacheModeCa
| CU_FUNC_ATTRIBUTE_MAX
deriving (Eq,Show)
instance Enum FunAttribute where
succ MaxKernelThreadsPerBlock = SharedSizeBytes
succ SharedSizeBytes = ConstSizeBytes
succ ConstSizeBytes = LocalSizeBytes
succ LocalSizeBytes = NumRegs
succ NumRegs = PtxVersion
succ PtxVersion = BinaryVersion
succ BinaryVersion = CacheModeCa
succ CacheModeCa = CU_FUNC_ATTRIBUTE_MAX
succ CU_FUNC_ATTRIBUTE_MAX = error "FunAttribute.succ: CU_FUNC_ATTRIBUTE_MAX has no successor"
pred SharedSizeBytes = MaxKernelThreadsPerBlock
pred ConstSizeBytes = SharedSizeBytes
pred LocalSizeBytes = ConstSizeBytes
pred NumRegs = LocalSizeBytes
pred PtxVersion = NumRegs
pred BinaryVersion = PtxVersion
pred CacheModeCa = BinaryVersion
pred CU_FUNC_ATTRIBUTE_MAX = CacheModeCa
pred MaxKernelThreadsPerBlock = error "FunAttribute.pred: MaxKernelThreadsPerBlock has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from CU_FUNC_ATTRIBUTE_MAX
fromEnum MaxKernelThreadsPerBlock = 0
fromEnum SharedSizeBytes = 1
fromEnum ConstSizeBytes = 2
fromEnum LocalSizeBytes = 3
fromEnum NumRegs = 4
fromEnum PtxVersion = 5
fromEnum BinaryVersion = 6
fromEnum CacheModeCa = 7
fromEnum CU_FUNC_ATTRIBUTE_MAX = 8
toEnum 0 = MaxKernelThreadsPerBlock
toEnum 1 = SharedSizeBytes
toEnum 2 = ConstSizeBytes
toEnum 3 = LocalSizeBytes
toEnum 4 = NumRegs
toEnum 5 = PtxVersion
toEnum 6 = BinaryVersion
toEnum 7 = CacheModeCa
toEnum 8 = CU_FUNC_ATTRIBUTE_MAX
toEnum unmatched = error ("FunAttribute.toEnum: Cannot match " ++ show unmatched)
{-# LINE 70 "./Foreign/CUDA/Driver/Exec.chs" #-}
data FunParam where
IArg :: !Int32 -> FunParam
FArg :: !Float -> FunParam
VArg :: Storable a => !a -> FunParam
instance Storable FunParam where
sizeOf (IArg _) = sizeOf (undefined :: CUInt)
sizeOf (FArg _) = sizeOf (undefined :: CFloat)
sizeOf (VArg v) = sizeOf v
alignment (IArg _) = alignment (undefined :: CUInt)
alignment (FArg _) = alignment (undefined :: CFloat)
alignment (VArg v) = alignment v
poke p (IArg i) = poke (castPtr p) i
poke p (FArg f) = poke (castPtr p) f
poke p (VArg v) = poke (castPtr p) v
peek _ = error "Can not peek Foreign.CUDA.Driver.FunParam"
{-# INLINEABLE requires #-}
requires :: Fun -> FunAttribute -> IO Int
requires !fn !att = resultIfOk =<< cuFuncGetAttribute att fn
{-# INLINE cuFuncGetAttribute #-}
cuFuncGetAttribute :: (FunAttribute) -> (Fun) -> IO ((Status), (Int))
cuFuncGetAttribute a2 a3 =
alloca $ \a1' ->
let {a2' = cFromEnum a2} in
let {a3' = useFun a3} in
cuFuncGetAttribute'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
peekIntConv a1'>>= \a1'' ->
return (res', a1'')
{-# LINE 113 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINEABLE setCacheConfigFun #-}
setCacheConfigFun :: Fun -> Cache -> IO ()
setCacheConfigFun !fn !pref = nothingIfOk =<< cuFuncSetCacheConfig fn pref
{-# INLINE cuFuncSetCacheConfig #-}
cuFuncSetCacheConfig :: (Fun) -> (Cache) -> IO ((Status))
cuFuncSetCacheConfig a1 a2 =
let {a1' = useFun a1} in
let {a2' = cFromEnum a2} in
cuFuncSetCacheConfig'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 140 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINEABLE setSharedMemConfigFun #-}
setSharedMemConfigFun :: Fun -> SharedMem -> IO ()
setSharedMemConfigFun !fun !pref = nothingIfOk =<< cuFuncSetSharedMemConfig fun pref
{-# INLINE cuFuncSetSharedMemConfig #-}
cuFuncSetSharedMemConfig :: (Fun) -> (SharedMem) -> IO ((Status))
cuFuncSetSharedMemConfig a1 a2 =
let {a1' = useFun a1} in
let {a2' = cFromEnum a2} in
cuFuncSetSharedMemConfig'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 184 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINEABLE launchKernel #-}
{-# INLINEABLE launchKernel' #-}
launchKernel, launchKernel'
:: Fun
-> (Int,Int,Int)
-> (Int,Int,Int)
-> Int
-> Maybe Stream
-> [FunParam]
-> IO ()
launchKernel !fn (!gx,!gy,!gz) (!tx,!ty,!tz) !sm !mst !args
= (=<<) nothingIfOk
$ withMany withFP args
$ \pa -> withArray pa
$ \pp -> cuLaunchKernel fn gx gy gz tx ty tz sm st pp nullPtr
where
!st = fromMaybe defaultStream mst
withFP :: FunParam -> (Ptr FunParam -> IO b) -> IO b
withFP !p !f = case p of
IArg v -> with' v (f . castPtr)
FArg v -> with' v (f . castPtr)
VArg v -> with' v (f . castPtr)
with' :: Storable a => a -> (Ptr a -> IO b) -> IO b
with' !val !f =
allocaBytes (sizeOf val) $ \ptr -> do
poke ptr val
f ptr
launchKernel' !fn (!gx,!gy,!gz) (!tx,!ty,!tz) !sm !mst !args
= (=<<) nothingIfOk
$ with bytes
$ \pb -> withArray' args
$ \pa -> withArray0 nullPtr [buffer, castPtr pa, size, castPtr pb]
$ \pp -> cuLaunchKernel fn gx gy gz tx ty tz sm st nullPtr pp
where
buffer = wordPtrToPtr 0x01
size = wordPtrToPtr 0x02
bytes = foldl (\a x -> a + sizeOf x) 0 args
st = fromMaybe defaultStream mst
withArray' !vals !f =
allocaBytes bytes $ \ptr -> do
pokeArray ptr vals
f ptr
{-# INLINE cuLaunchKernel #-}
cuLaunchKernel :: (Fun) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> (Ptr (Ptr FunParam)) -> (Ptr (Ptr ())) -> IO ((Status))
cuLaunchKernel a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = fromIntegral a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = fromIntegral a8} in
let {a9' = useStream a9} in
let {a10' = castPtr a10} in
let {a11' = castPtr a11} in
cuLaunchKernel'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' a11' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 270 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINEABLE launch #-}
launch :: Fun -> (Int,Int) -> Maybe Stream -> IO ()
launch !fn (!w,!h) mst =
nothingIfOk =<< cuLaunchGridAsync fn w h (fromMaybe defaultStream mst)
{-# INLINE cuLaunchGridAsync #-}
cuLaunchGridAsync :: (Fun) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuLaunchGridAsync a1 a2 a3 a4 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = useStream a4} in
cuLaunchGridAsync'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 302 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINEABLE setBlockShape #-}
setBlockShape :: Fun -> (Int,Int,Int) -> IO ()
setBlockShape !fn (!x,!y,!z) = nothingIfOk =<< cuFuncSetBlockShape fn x y z
{-# INLINE cuFuncSetBlockShape #-}
cuFuncSetBlockShape :: (Fun) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuFuncSetBlockShape a1 a2 a3 a4 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
cuFuncSetBlockShape'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 318 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINEABLE setSharedSize #-}
setSharedSize :: Fun -> Integer -> IO ()
setSharedSize !fn !bytes = nothingIfOk =<< cuFuncSetSharedSize fn bytes
{-# INLINE cuFuncSetSharedSize #-}
cuFuncSetSharedSize :: (Fun) -> (Integer) -> IO ((Status))
cuFuncSetSharedSize a1 a2 =
let {a1' = useFun a1} in
let {a2' = cIntConv a2} in
cuFuncSetSharedSize'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 332 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINEABLE setParams #-}
setParams :: Fun -> [FunParam] -> IO ()
setParams !fn !prs = do
zipWithM_ (set fn) offsets prs
nothingIfOk =<< cuParamSetSize fn (last offsets)
where
offsets = scanl (\a b -> a + size b) 0 prs
size (IArg _) = sizeOf (undefined :: CUInt)
size (FArg _) = sizeOf (undefined :: CFloat)
size (VArg v) = sizeOf v
set f o (IArg v) = nothingIfOk =<< cuParamSeti f o v
set f o (FArg v) = nothingIfOk =<< cuParamSetf f o v
set f o (VArg v) = with v $ \p -> (nothingIfOk =<< cuParamSetv f o p (sizeOf v))
{-# INLINE cuParamSetSize #-}
cuParamSetSize :: (Fun) -> (Int) -> IO ((Status))
cuParamSetSize a1 a2 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
cuParamSetSize'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 358 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINE cuParamSeti #-}
cuParamSeti :: (Fun) -> (Int) -> (Int32) -> IO ((Status))
cuParamSeti a1 a2 a3 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
cuParamSeti'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 364 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINE cuParamSetf #-}
cuParamSetf :: (Fun) -> (Int) -> (Float) -> IO ((Status))
cuParamSetf a1 a2 a3 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = realToFrac a3} in
cuParamSetf'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 370 "./Foreign/CUDA/Driver/Exec.chs" #-}
{-# INLINE cuParamSetv #-}
cuParamSetv :: Storable a => (Fun) -> (Int) -> (Ptr a) -> (Int) -> IO ((Status))
cuParamSetv a1 a2 a3 a4 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = castPtr a3} in
let {a4' = fromIntegral a4} in
cuParamSetv'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 378 "./Foreign/CUDA/Driver/Exec.chs" #-}
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncGetAttribute"
cuFuncGetAttribute'_ :: ((C2HSImp.Ptr C2HSImp.CInt) -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetCacheConfig"
cuFuncSetCacheConfig'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetSharedMemConfig"
cuFuncSetSharedMemConfig'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuLaunchKernel"
cuLaunchKernel'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr ()) -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (IO C2HSImp.CInt))))))))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuLaunchGridAsync"
cuLaunchGridAsync'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetBlockShape"
cuFuncSetBlockShape'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CInt -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetSharedSize"
cuFuncSetSharedSize'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetSize"
cuParamSetSize'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSeti"
cuParamSeti'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CUInt -> (IO C2HSImp.CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetf"
cuParamSetf'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (C2HSImp.CFloat -> (IO C2HSImp.CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetv"
cuParamSetv'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))))