module Foreign.CUDA.Driver.Exec (
Fun(Fun), FunParam(..), FunAttribute(..),
requires, setBlockShape, setSharedSize, setParams, setCacheConfigFun,
launch, launchKernel, launchKernel'
) where
import Foreign.CUDA.Internal.C2HS
import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Driver.Context ( Cache(..) )
import Foreign.CUDA.Driver.Stream ( Stream(..), defaultStream )
import Foreign
import Foreign.C
import Data.Maybe
import Control.Monad ( zipWithM_ )
newtype Fun = Fun { useFun :: ((Ptr ()))}
data FunAttribute = MaxKernelThreadsPerBlock
| SharedSizeBytes
| ConstSizeBytes
| LocalSizeBytes
| NumRegs
| PtxVersion
| BinaryVersion
| CacheModeCa
| CU_FUNC_ATTRIBUTE_MAX
deriving (Eq,Show)
instance Enum FunAttribute where
succ MaxKernelThreadsPerBlock = SharedSizeBytes
succ SharedSizeBytes = ConstSizeBytes
succ ConstSizeBytes = LocalSizeBytes
succ LocalSizeBytes = NumRegs
succ NumRegs = PtxVersion
succ PtxVersion = BinaryVersion
succ BinaryVersion = CacheModeCa
succ CacheModeCa = CU_FUNC_ATTRIBUTE_MAX
succ CU_FUNC_ATTRIBUTE_MAX = error "FunAttribute.succ: CU_FUNC_ATTRIBUTE_MAX has no successor"
pred SharedSizeBytes = MaxKernelThreadsPerBlock
pred ConstSizeBytes = SharedSizeBytes
pred LocalSizeBytes = ConstSizeBytes
pred NumRegs = LocalSizeBytes
pred PtxVersion = NumRegs
pred BinaryVersion = PtxVersion
pred CacheModeCa = BinaryVersion
pred CU_FUNC_ATTRIBUTE_MAX = CacheModeCa
pred MaxKernelThreadsPerBlock = error "FunAttribute.pred: MaxKernelThreadsPerBlock has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from CU_FUNC_ATTRIBUTE_MAX
fromEnum MaxKernelThreadsPerBlock = 0
fromEnum SharedSizeBytes = 1
fromEnum ConstSizeBytes = 2
fromEnum LocalSizeBytes = 3
fromEnum NumRegs = 4
fromEnum PtxVersion = 5
fromEnum BinaryVersion = 6
fromEnum CacheModeCa = 7
fromEnum CU_FUNC_ATTRIBUTE_MAX = 8
toEnum 0 = MaxKernelThreadsPerBlock
toEnum 1 = SharedSizeBytes
toEnum 2 = ConstSizeBytes
toEnum 3 = LocalSizeBytes
toEnum 4 = NumRegs
toEnum 5 = PtxVersion
toEnum 6 = BinaryVersion
toEnum 7 = CacheModeCa
toEnum 8 = CU_FUNC_ATTRIBUTE_MAX
toEnum unmatched = error ("FunAttribute.toEnum: Cannot match " ++ show unmatched)
data FunParam where
IArg :: !Int32 -> FunParam
FArg :: !Float -> FunParam
VArg :: Storable a => !a -> FunParam
instance Storable FunParam where
sizeOf (IArg _) = sizeOf (undefined :: CUInt)
sizeOf (FArg _) = sizeOf (undefined :: CFloat)
sizeOf (VArg v) = sizeOf v
alignment (IArg _) = alignment (undefined :: CUInt)
alignment (FArg _) = alignment (undefined :: CFloat)
alignment (VArg v) = alignment v
poke p (IArg i) = poke (castPtr p) i
poke p (FArg f) = poke (castPtr p) f
poke p (VArg v) = poke (castPtr p) v
peek _ = error "Can not peek Foreign.CUDA.Driver.FunParam"
requires :: Fun -> FunAttribute -> IO Int
requires !fn !att = resultIfOk =<< cuFuncGetAttribute att fn
cuFuncGetAttribute :: (FunAttribute) -> (Fun) -> IO ((Status), (Int))
cuFuncGetAttribute a2 a3 =
alloca $ \a1' ->
let {a2' = cFromEnum a2} in
let {a3' = useFun a3} in
cuFuncGetAttribute'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
peekIntConv a1'>>= \a1'' ->
return (res', a1'')
setBlockShape :: Fun -> (Int,Int,Int) -> IO ()
setBlockShape !fn (!x,!y,!z) = nothingIfOk =<< cuFuncSetBlockShape fn x y z
cuFuncSetBlockShape :: (Fun) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuFuncSetBlockShape a1 a2 a3 a4 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
cuFuncSetBlockShape'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
setSharedSize :: Fun -> Integer -> IO ()
setSharedSize !fn !bytes = nothingIfOk =<< cuFuncSetSharedSize fn bytes
cuFuncSetSharedSize :: (Fun) -> (Integer) -> IO ((Status))
cuFuncSetSharedSize a1 a2 =
let {a1' = useFun a1} in
let {a2' = cIntConv a2} in
cuFuncSetSharedSize'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
setCacheConfigFun :: Fun -> Cache -> IO ()
setCacheConfigFun !fn !pref = nothingIfOk =<< cuFuncSetCacheConfig fn pref
cuFuncSetCacheConfig :: (Fun) -> (Cache) -> IO ((Status))
cuFuncSetCacheConfig a1 a2 =
let {a1' = useFun a1} in
let {a2' = cFromEnum a2} in
cuFuncSetCacheConfig'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
launch :: Fun -> (Int,Int) -> Maybe Stream -> IO ()
launch !fn (!w,!h) mst =
nothingIfOk =<< cuLaunchGridAsync fn w h (fromMaybe defaultStream mst)
cuLaunchGridAsync :: (Fun) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuLaunchGridAsync a1 a2 a3 a4 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = useStream a4} in
cuLaunchGridAsync'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
launchKernel, launchKernel'
:: Fun
-> (Int,Int,Int)
-> (Int,Int,Int)
-> Int
-> Maybe Stream
-> [FunParam]
-> IO ()
launchKernel !fn (!gx,!gy,!gz) (!tx,!ty,!tz) !sm !mst !args
= (=<<) nothingIfOk
$ withMany withFP args
$ \pa -> withArray pa
$ \pp -> cuLaunchKernel fn gx gy gz tx ty tz sm st pp nullPtr
where
!st = fromMaybe defaultStream mst
withFP :: FunParam -> (Ptr FunParam -> IO b) -> IO b
withFP !p !f = case p of
IArg v -> with' v (f . castPtr)
FArg v -> with' v (f . castPtr)
VArg v -> with' v (f . castPtr)
with' :: Storable a => a -> (Ptr a -> IO b) -> IO b
with' !val !f =
allocaBytes (sizeOf val) $ \ptr -> do
poke ptr val
f ptr
launchKernel' !fn (!gx,!gy,!gz) (!tx,!ty,!tz) !sm !mst !args
= (=<<) nothingIfOk
$ with bytes
$ \pb -> withArray' args
$ \pa -> withArray0 nullPtr [buffer, castPtr pa, size, castPtr pb]
$ \pp -> cuLaunchKernel fn gx gy gz tx ty tz sm st nullPtr pp
where
buffer = wordPtrToPtr 0x01
size = wordPtrToPtr 0x02
bytes = foldl (\a x -> a + sizeOf x) 0 args
st = fromMaybe defaultStream mst
withArray' !vals !f =
allocaBytes bytes $ \ptr -> do
pokeArray ptr vals
f ptr
cuLaunchKernel :: (Fun) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> (Ptr (Ptr FunParam)) -> (Ptr (Ptr ())) -> IO ((Status))
cuLaunchKernel a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = fromIntegral a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = fromIntegral a8} in
let {a9' = useStream a9} in
let {a10' = castPtr a10} in
let {a11' = castPtr a11} in
cuLaunchKernel'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' a11' >>= \res ->
let {res' = cToEnum res} in
return (res')
setParams :: Fun -> [FunParam] -> IO ()
setParams !fn !prs = do
zipWithM_ (set fn) offsets prs
nothingIfOk =<< cuParamSetSize fn (last offsets)
where
offsets = scanl (\a b -> a + size b) 0 prs
size (IArg _) = sizeOf (undefined :: CUInt)
size (FArg _) = sizeOf (undefined :: CFloat)
size (VArg v) = sizeOf v
set f o (IArg v) = nothingIfOk =<< cuParamSeti f o v
set f o (FArg v) = nothingIfOk =<< cuParamSetf f o v
set f o (VArg v) = with v $ \p -> (nothingIfOk =<< cuParamSetv f o p (sizeOf v))
cuParamSetSize :: (Fun) -> (Int) -> IO ((Status))
cuParamSetSize a1 a2 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
cuParamSetSize'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
cuParamSeti :: (Fun) -> (Int) -> (Int32) -> IO ((Status))
cuParamSeti a1 a2 a3 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
cuParamSeti'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
cuParamSetf :: (Fun) -> (Int) -> (Float) -> IO ((Status))
cuParamSetf a1 a2 a3 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = realToFrac a3} in
cuParamSetf'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
cuParamSetv :: Storable a => (Fun) -> (Int) -> (Ptr a) -> (Int) -> IO ((Status))
cuParamSetv a1 a2 a3 a4 =
let {a1' = useFun a1} in
let {a2' = fromIntegral a2} in
let {a3' = castPtr a3} in
let {a4' = fromIntegral a4} in
cuParamSetv'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncGetAttribute"
cuFuncGetAttribute'_ :: ((Ptr CInt) -> (CInt -> ((Ptr ()) -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetBlockShape"
cuFuncSetBlockShape'_ :: ((Ptr ()) -> (CInt -> (CInt -> (CInt -> (IO CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetSharedSize"
cuFuncSetSharedSize'_ :: ((Ptr ()) -> (CUInt -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuFuncSetCacheConfig"
cuFuncSetCacheConfig'_ :: ((Ptr ()) -> (CInt -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuLaunchGridAsync"
cuLaunchGridAsync'_ :: ((Ptr ()) -> (CInt -> (CInt -> ((Ptr ()) -> (IO CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuLaunchKernel"
cuLaunchKernel'_ :: ((Ptr ()) -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> ((Ptr ()) -> ((Ptr (Ptr ())) -> ((Ptr (Ptr ())) -> (IO CInt))))))))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetSize"
cuParamSetSize'_ :: ((Ptr ()) -> (CUInt -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSeti"
cuParamSeti'_ :: ((Ptr ()) -> (CInt -> (CUInt -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetf"
cuParamSetf'_ :: ((Ptr ()) -> (CInt -> (CFloat -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Exec.chs.h cuParamSetv"
cuParamSetv'_ :: ((Ptr ()) -> (CInt -> ((Ptr ()) -> (CUInt -> (IO CInt)))))