module Foreign.CUDA.Runtime.Exec (
Fun, FunAttributes(..), FunParam(..), CacheConfig(..),
attributes, setConfig, setParams, setCacheConfig, launch, launchKernel,
) where
import Foreign.CUDA.Runtime.Stream ( Stream(..), defaultStream )
import Foreign.CUDA.Runtime.Error
import Foreign.CUDA.Internal.C2HS
import Foreign
import Foreign.C
import Control.Monad
type Fun = FunPtr ()
data FunAttributes = FunAttributes
{
constSizeBytes :: !Int64,
localSizeBytes :: !Int64,
sharedSizeBytes :: !Int64,
maxKernelThreadsPerBlock :: !Int,
numRegs :: !Int
}
deriving (Show)
instance Storable FunAttributes where
sizeOf _ = 44
alignment _ = alignment (undefined :: Ptr ())
poke _ _ = error "Can not poke Foreign.CUDA.Runtime.FunAttributes"
peek p = do
cs <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 8 :: IO CULong}) p
ls <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 16 :: IO CULong}) p
ss <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 0 :: IO CULong}) p
tb <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 24 :: IO CInt}) p
nr <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 28 :: IO CInt}) p
return FunAttributes
{
constSizeBytes = cs,
localSizeBytes = ls,
sharedSizeBytes = ss,
maxKernelThreadsPerBlock = tb,
numRegs = nr
}
data CacheConfig = None
| Shared
| L1
| Equal
deriving (Eq,Show)
instance Enum CacheConfig where
succ None = Shared
succ Shared = L1
succ L1 = Equal
succ Equal = error "CacheConfig.succ: Equal has no successor"
pred Shared = None
pred L1 = Shared
pred Equal = L1
pred None = error "CacheConfig.pred: None has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from Equal
fromEnum None = 0
fromEnum Shared = 1
fromEnum L1 = 2
fromEnum Equal = 3
toEnum 0 = None
toEnum 1 = Shared
toEnum 2 = L1
toEnum 3 = Equal
toEnum unmatched = error ("CacheConfig.toEnum: Cannot match " ++ show unmatched)
data FunParam where
IArg :: !Int -> FunParam
FArg :: !Float -> FunParam
DArg :: !Double -> FunParam
VArg :: Storable a => !a -> FunParam
attributes :: Fun -> IO FunAttributes
attributes !fn = resultIfOk =<< cudaFuncGetAttributes fn
cudaFuncGetAttributes :: (Fun) -> IO ((Status), (FunAttributes))
cudaFuncGetAttributes a2 =
alloca $ \a1' ->
withFun a2 $ \a2' ->
cudaFuncGetAttributes'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peek a1'>>= \a1'' ->
return (res', a1'')
setConfig :: (Int,Int)
-> (Int,Int,Int)
-> Int64
-> Maybe Stream
-> IO ()
setConfig (!gx,!gy) (!bx,!by,!bz) !sharedMem !mst =
nothingIfOk =<<
cudaConfigureCallSimple gx gy bx by bz sharedMem (maybe defaultStream id mst)
cudaConfigureCallSimple :: (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int64) -> (Stream) -> IO ((Status))
cudaConfigureCallSimple a1 a2 a3 a4 a5 a6 a7 =
let {a1' = fromIntegral a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = fromIntegral a5} in
let {a6' = cIntConv a6} in
let {a7' = useStream a7} in
cudaConfigureCallSimple'_ a1' a2' a3' a4' a5' a6' a7' >>= \res ->
let {res' = cToEnum res} in
return (res')
setParams :: [FunParam] -> IO ()
setParams = foldM_ k 0
where
k !offset !arg = do
let s = size arg
set arg s offset >>= nothingIfOk
return (offset + s)
size (IArg _) = sizeOf (undefined :: Int)
size (FArg _) = sizeOf (undefined :: Float)
size (DArg _) = sizeOf (undefined :: Double)
size (VArg a) = sizeOf a
set (IArg v) s o = cudaSetupArgument v s o
set (FArg v) s o = cudaSetupArgument v s o
set (VArg v) s o = cudaSetupArgument v s o
set (DArg v) s o =
cudaSetDoubleForDevice v >>= resultIfOk >>= \d ->
cudaSetupArgument d s o
cudaSetupArgument :: Storable a => (a) -> (Int) -> (Int) -> IO ((Status))
cudaSetupArgument a1 a2 a3 =
with' a1 $ \a1' ->
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
cudaSetupArgument'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
where
with' v a = with v $ \p -> a (castPtr p)
cudaSetDoubleForDevice :: (Double) -> IO ((Status), (Double))
cudaSetDoubleForDevice a1 =
with' a1 $ \a1' ->
cudaSetDoubleForDevice'_ a1' >>= \res ->
let {res' = cToEnum res} in
peek' a1'>>= \a1'' ->
return (res', a1'')
where
with' v a = with v $ \p -> a (castPtr p)
peek' = peek . castPtr
setCacheConfig :: Fun -> CacheConfig -> IO ()
setCacheConfig !fn !pref = nothingIfOk =<< cudaFuncSetCacheConfig fn pref
cudaFuncSetCacheConfig :: (Fun) -> (CacheConfig) -> IO ((Status))
cudaFuncSetCacheConfig a1 a2 =
withFun a1 $ \a1' ->
let {a2' = cFromEnum a2} in
cudaFuncSetCacheConfig'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
launch :: Fun -> IO ()
launch !fn = nothingIfOk =<< cudaLaunch fn
cudaLaunch :: (Fun) -> IO ((Status))
cudaLaunch a1 =
withFun a1 $ \a1' ->
cudaLaunch'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
launchKernel
:: Fun
-> (Int,Int)
-> (Int,Int,Int)
-> Int64
-> Maybe Stream
-> [FunParam]
-> IO ()
launchKernel !fn !grid !block !sm !mst !args = do
setConfig grid block sm mst
setParams args
launch fn
withFun :: Fun -> (Ptr a -> IO b) -> IO b
withFun fn action = action (castFunPtrToPtr fn)
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaFuncGetAttributes"
cudaFuncGetAttributes'_ :: ((Ptr (FunAttributes)) -> ((Ptr ()) -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaConfigureCallSimple"
cudaConfigureCallSimple'_ :: (CInt -> (CInt -> (CInt -> (CInt -> (CInt -> (CULong -> ((Ptr ()) -> (IO CInt))))))))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaSetupArgument"
cudaSetupArgument'_ :: ((Ptr ()) -> (CULong -> (CULong -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaSetDoubleForDevice"
cudaSetDoubleForDevice'_ :: ((Ptr CDouble) -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaFuncSetCacheConfig"
cudaFuncSetCacheConfig'_ :: ((Ptr ()) -> (CInt -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaLaunch"
cudaLaunch'_ :: ((Ptr ()) -> (IO CInt))