{-# LINE 1 "src/Foreign/CUDA/Runtime/Exec.chs" #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE TemplateHaskell #-}
module Foreign.CUDA.Runtime.Exec (
Fun, FunAttributes(..), FunParam(..), CacheConfig(..),
attributes, setCacheConfig, launchKernel,
) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.ForeignPtr as C2HSImp
import qualified Foreign.Ptr as C2HSImp
import qualified Foreign.Storable as C2HSImp
{-# LINE 25 "src/Foreign/CUDA/Runtime/Exec.chs" #-}
import Foreign.CUDA.Runtime.Stream ( Stream(..), defaultStream )
import Foreign.CUDA.Runtime.Error
import Foreign.CUDA.Internal.C2HS
import Foreign
import Foreign.C
import Control.Monad
import Data.Maybe
type Fun = FunPtr ()
{-# LINE 62 "src/Foreign/CUDA/Runtime/Exec.chs" #-}
data FunAttributes = FunAttributes
{
constSizeBytes :: !Int64,
localSizeBytes :: !Int64,
sharedSizeBytes :: !Int64,
maxKernelThreadsPerBlock :: !Int,
numRegs :: !Int
}
deriving (Show)
instance Storable FunAttributes where
sizeOf _ = 56
{-# LINE 75 "src/Foreign/CUDA/Runtime/Exec.chs" #-}
alignment _ = alignment (undefined :: Ptr ())
poke _ _ = error "Can not poke Foreign.CUDA.Runtime.FunAttributes"
peek p = do
cs <- cIntConv `fmap` (\ptr -> do {C2HSImp.peekByteOff ptr 8 :: IO C2HSImp.CULong}) p
ls <- cIntConv `fmap` (\ptr -> do {C2HSImp.peekByteOff ptr 16 :: IO C2HSImp.CULong}) p
ss <- cIntConv `fmap` (\ptr -> do {C2HSImp.peekByteOff ptr 0 :: IO C2HSImp.CULong}) p
tb <- cIntConv `fmap` (\ptr -> do {C2HSImp.peekByteOff ptr 24 :: IO C2HSImp.CInt}) p
nr <- cIntConv `fmap` (\ptr -> do {C2HSImp.peekByteOff ptr 28 :: IO C2HSImp.CInt}) p
return FunAttributes
{
constSizeBytes = cs,
localSizeBytes = ls,
sharedSizeBytes = ss,
maxKernelThreadsPerBlock = tb,
numRegs = nr
}
data CacheConfig = None
| Shared
| L1
| Equal
deriving (Eq,Show)
instance Enum CacheConfig where
succ None = Shared
succ Shared = L1
succ L1 = Equal
succ Equal = error "CacheConfig.succ: Equal has no successor"
pred Shared = None
pred L1 = Shared
pred Equal = L1
pred None = error "CacheConfig.pred: None has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from Equal
fromEnum None = 0
fromEnum Shared = 1
fromEnum L1 = 2
fromEnum Equal = 3
toEnum 0 = None
toEnum 1 = Shared
toEnum 2 = L1
toEnum 3 = Equal
toEnum unmatched = error ("CacheConfig.toEnum: Cannot match " ++ show unmatched)
{-# LINE 104 "src/Foreign/CUDA/Runtime/Exec.chs" #-}
data FunParam where
IArg :: !Int -> FunParam
FArg :: !Float -> FunParam
DArg :: !Double -> FunParam
VArg :: Storable a => !a -> FunParam
{-# INLINEABLE attributes #-}
attributes :: Fun -> IO FunAttributes
attributes !fn = resultIfOk =<< cudaFuncGetAttributes fn
{-# INLINE cudaFuncGetAttributes #-}
cudaFuncGetAttributes :: (Fun) -> IO ((Status), (FunAttributes))
cudaFuncGetAttributes a2 =
alloca $ \a1' ->
withFun a2 $ \a2' ->
cudaFuncGetAttributes'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peek a1'>>= \a1'' ->
return (res', a1'')
{-# LINE 132 "src/Foreign/CUDA/Runtime/Exec.chs" #-}
{-# INLINEABLE setCacheConfig #-}
setCacheConfig :: Fun -> CacheConfig -> IO ()
setCacheConfig !fn !pref = nothingIfOk =<< cudaFuncSetCacheConfig fn pref
{-# INLINE cudaFuncSetCacheConfig #-}
cudaFuncSetCacheConfig :: (Fun) -> (CacheConfig) -> IO ((Status))
cudaFuncSetCacheConfig a1 a2 =
withFun a1 $ \a1' ->
let {a2' = cFromEnum a2} in
cudaFuncSetCacheConfig'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 155 "src/Foreign/CUDA/Runtime/Exec.chs" #-}
{-# INLINEABLE launchKernel #-}
launchKernel
:: Fun
-> (Int,Int)
-> (Int,Int,Int)
-> Int64
-> Maybe Stream
-> [FunParam]
-> IO ()
launchKernel !fn (!gx,!gy) (!bx,!by,!bz) !sm !mst !args
= (=<<) nothingIfOk
$ withMany withFP args
$ \pa -> withArray pa
$ \pp -> cudaLaunchKernel_simple fn gx gy 1 bx by bz pp sm (fromMaybe defaultStream mst)
where
withFP :: FunParam -> (Ptr FunParam -> IO b) -> IO b
withFP p f = case p of
IArg v -> with' v (f . castPtr)
FArg v -> with' v (f . castPtr)
DArg v -> with' v (f . castPtr)
VArg v -> with' v (f . castPtr)
with' :: Storable a => a -> (Ptr a -> IO b) -> IO b
with' !val !f =
allocaBytes (sizeOf val) $ \ptr -> do
poke ptr val
f ptr
{-# INLINE cudaLaunchKernel_simple #-}
cudaLaunchKernel_simple :: (Fun) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Ptr (Ptr FunParam)) -> (Int64) -> (Stream) -> IO ((Status))
cudaLaunchKernel_simple a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 =
withFun a1 $ \a1' ->
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = fromIntegral a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = castPtr a8} in
let {a9' = fromIntegral a9} in
let {a10' = useStream a10} in
cudaLaunchKernel_simple'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' >>= \res ->
let {res' = cToEnum res} in
return (res')
{-# LINE 201 "src/Foreign/CUDA/Runtime/Exec.chs" #-}
withFun :: Fun -> (Ptr a -> IO b) -> IO b
withFun fn action = action (castFunPtrToPtr fn)
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaFuncGetAttributes"
cudaFuncGetAttributes'_ :: ((C2HSImp.Ptr (FunAttributes)) -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaFuncSetCacheConfig"
cudaFuncSetCacheConfig'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CInt -> (IO C2HSImp.CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Exec.chs.h cudaLaunchKernel_simple"
cudaLaunchKernel_simple'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))))))))