module Foreign.CUDA.Runtime.Device (
Device, DeviceFlag(..), DeviceProperties(..), Compute(..), ComputeMode(..),
choose, get, count, props, set, setFlags, setOrder, reset, sync,
PeerFlag,
accessible, add, remove,
Limit(..),
getLimit, setLimit
) where
import Foreign.CUDA.Analysis.Device
import Foreign.CUDA.Runtime.Error
import Foreign.CUDA.Internal.C2HS
import Foreign.CUDA.Internal.Offsets
import Foreign
import Foreign.C
type Device = Int
data DeviceFlag = ScheduleAuto
| ScheduleSpin
| ScheduleYield
| BlockingSync
| MapHost
| LMemResizeToMax
deriving (Eq,Show)
instance Enum DeviceFlag where
succ ScheduleAuto = ScheduleSpin
succ ScheduleSpin = ScheduleYield
succ ScheduleYield = BlockingSync
succ BlockingSync = MapHost
succ MapHost = LMemResizeToMax
succ LMemResizeToMax = error "DeviceFlag.succ: LMemResizeToMax has no successor"
pred ScheduleSpin = ScheduleAuto
pred ScheduleYield = ScheduleSpin
pred BlockingSync = ScheduleYield
pred MapHost = BlockingSync
pred LMemResizeToMax = MapHost
pred ScheduleAuto = error "DeviceFlag.pred: ScheduleAuto has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from LMemResizeToMax
fromEnum ScheduleAuto = 0
fromEnum ScheduleSpin = 1
fromEnum ScheduleYield = 2
fromEnum BlockingSync = 4
fromEnum MapHost = 8
fromEnum LMemResizeToMax = 16
toEnum 0 = ScheduleAuto
toEnum 1 = ScheduleSpin
toEnum 2 = ScheduleYield
toEnum 4 = BlockingSync
toEnum 8 = MapHost
toEnum 16 = LMemResizeToMax
toEnum unmatched = error ("DeviceFlag.toEnum: Cannot match " ++ show unmatched)
instance Storable DeviceProperties where
sizeOf _ = 632
alignment _ = alignment (undefined :: Ptr ())
poke _ _ = error "no instance for Foreign.Storable.poke DeviceProperties"
peek p = do
gm <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 256 ::IO CULong}) p
sm <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 264 ::IO CULong}) p
rb <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 272 ::IO CInt}) p
ws <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 276 ::IO CInt}) p
mp <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 280 ::IO CULong}) p
tb <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 288 ::IO CInt}) p
cl <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 316 ::IO CInt}) p
cm <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 320 ::IO CULong}) p
v1 <- fromIntegral `fmap` (\ptr -> do {peekByteOff ptr 328 ::IO CInt}) p
v2 <- fromIntegral `fmap` (\ptr -> do {peekByteOff ptr 332 ::IO CInt}) p
ta <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 336 ::IO CULong}) p
ov <- cToBool `fmap` (\ptr -> do {peekByteOff ptr 352 ::IO CInt}) p
pc <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 356 ::IO CInt}) p
ke <- cToBool `fmap` (\ptr -> do {peekByteOff ptr 360 ::IO CInt}) p
tg <- cToBool `fmap` (\ptr -> do {peekByteOff ptr 364 ::IO CInt}) p
hm <- cToBool `fmap` (\ptr -> do {peekByteOff ptr 368 ::IO CInt}) p
md <- cToEnum `fmap` (\ptr -> do {peekByteOff ptr 372 ::IO CInt}) p
ck <- cToBool `fmap` (\ptr -> do {peekByteOff ptr 544 ::IO CInt}) p
u1 <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 376 ::IO CInt}) p
ee <- cToBool `fmap` (\ptr -> do {peekByteOff ptr 548 ::IO CInt}) p
ae <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 568 ::IO CInt}) p
l2 <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 584 ::IO CInt}) p
tm <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 588 ::IO CInt}) p
mw <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 580 ::IO CInt}) p
mc <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 576 ::IO CInt}) p
pb <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 552 ::IO CInt}) p
pd <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 556 ::IO CInt}) p
pm <- cIntConv `fmap` (\ptr -> do {peekByteOff ptr 560 ::IO CInt}) p
tc <- cToBool `fmap` (\ptr -> do {peekByteOff ptr 564 ::IO CInt}) p
ua <- cToBool `fmap` (\ptr -> do {peekByteOff ptr 572 ::IO CInt}) p
n <- peekCString (p `plusPtr` devNameOffset)
(t1:t2:t3:_) <- map cIntConv `fmap` peekArray 3 (p `plusPtr` devMaxThreadDimOffset :: Ptr CInt)
(g1:g2:g3:_) <- map cIntConv `fmap` peekArray 3 (p `plusPtr` devMaxGridSizeOffset :: Ptr CInt)
(u21:u22:_) <- map cIntConv `fmap` peekArray 2 (p `plusPtr` devMaxTexture2DOffset :: Ptr CInt)
(u31:u32:u33:_) <- map cIntConv `fmap` peekArray 3 (p `plusPtr` devMaxTexture3DOffset :: Ptr CInt)
return DeviceProperties
{
deviceName = n,
computeCapability = Compute v1 v2,
totalGlobalMem = gm,
totalConstMem = cm,
sharedMemPerBlock = sm,
regsPerBlock = rb,
warpSize = ws,
maxThreadsPerBlock = tb,
maxBlockSize = (t1,t2,t3),
maxGridSize = (g1,g2,g3),
clockRate = cl,
multiProcessorCount = pc,
memPitch = mp,
textureAlignment = ta,
computeMode = md,
deviceOverlap = ov,
concurrentKernels = ck,
maxTextureDim1D = u1,
maxTextureDim2D = (u21,u22),
maxTextureDim3D = (u31,u32,u33),
eccEnabled = ee,
asyncEngineCount = ae,
cacheMemL2 = l2,
maxThreadsPerMultiProcessor = tm,
memBusWidth = mw,
memClockRate = mc,
tccDriverEnabled = tc,
unifiedAddressing = ua,
pciInfo = PCI pb pd pm,
kernelExecTimeoutEnabled = ke,
integrated = tg,
canMapHostMemory = hm
}
choose :: DeviceProperties -> IO Device
choose !dev = resultIfOk =<< cudaChooseDevice dev
cudaChooseDevice :: (DeviceProperties) -> IO ((Status), (Int))
cudaChooseDevice a2 =
alloca $ \a1' ->
withDevProp a2 $ \a2' ->
cudaChooseDevice'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peekIntConv a1'>>= \a1'' ->
return (res', a1'')
where
withDevProp = with
get :: IO Device
get = resultIfOk =<< cudaGetDevice
cudaGetDevice :: IO ((Status), (Int))
cudaGetDevice =
alloca $ \a1' ->
cudaGetDevice'_ a1' >>= \res ->
let {res' = cToEnum res} in
peekIntConv a1'>>= \a1'' ->
return (res', a1'')
count :: IO Int
count = resultIfOk =<< cudaGetDeviceCount
cudaGetDeviceCount :: IO ((Status), (Int))
cudaGetDeviceCount =
alloca $ \a1' ->
cudaGetDeviceCount'_ a1' >>= \res ->
let {res' = cToEnum res} in
peekIntConv a1'>>= \a1'' ->
return (res', a1'')
props :: Device -> IO DeviceProperties
props !n = resultIfOk =<< cudaGetDeviceProperties n
cudaGetDeviceProperties :: (Int) -> IO ((Status), (DeviceProperties))
cudaGetDeviceProperties a2 =
alloca $ \a1' ->
let {a2' = fromIntegral a2} in
cudaGetDeviceProperties'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peek a1'>>= \a1'' ->
return (res', a1'')
set :: Device -> IO ()
set !n = nothingIfOk =<< cudaSetDevice n
cudaSetDevice :: (Int) -> IO ((Status))
cudaSetDevice a1 =
let {a1' = fromIntegral a1} in
cudaSetDevice'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
setFlags :: [DeviceFlag] -> IO ()
setFlags !f = nothingIfOk =<< cudaSetDeviceFlags (combineBitMasks f)
cudaSetDeviceFlags :: (Int) -> IO ((Status))
cudaSetDeviceFlags a1 =
let {a1' = fromIntegral a1} in
cudaSetDeviceFlags'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
setOrder :: [Device] -> IO ()
setOrder !l = nothingIfOk =<< cudaSetValidDevices l (length l)
cudaSetValidDevices :: ([Int]) -> (Int) -> IO ((Status))
cudaSetValidDevices a1 a2 =
withArrayIntConv a1 $ \a1' ->
let {a2' = fromIntegral a2} in
cudaSetValidDevices'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
where
withArrayIntConv = withArray . map cIntConv
sync :: IO ()
sync = nothingIfOk =<< cudaDeviceSynchronize
cudaDeviceSynchronize :: IO ((Status))
cudaDeviceSynchronize =
cudaDeviceSynchronize'_ >>= \res ->
let {res' = cToEnum res} in
return (res')
reset :: IO ()
reset = nothingIfOk =<< cudaDeviceReset
cudaDeviceReset :: IO ((Status))
cudaDeviceReset =
cudaDeviceReset'_ >>= \res ->
let {res' = cToEnum res} in
return (res')
data PeerFlag
instance Enum PeerFlag where
toEnum x = case x of {}
fromEnum x = case x of {}
accessible :: Device -> Device -> IO Bool
accessible !dev !peer = resultIfOk =<< cudaDeviceCanAccessPeer dev peer
cudaDeviceCanAccessPeer :: (Device) -> (Device) -> IO ((Status), (Bool))
cudaDeviceCanAccessPeer a2 a3 =
alloca $ \a1' ->
let {a2' = cIntConv a2} in
let {a3' = cIntConv a3} in
cudaDeviceCanAccessPeer'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
peekBool a1'>>= \a1'' ->
return (res', a1'')
add :: Device -> [PeerFlag] -> IO ()
add !dev !flags = nothingIfOk =<< cudaDeviceEnablePeerAccess dev flags
cudaDeviceEnablePeerAccess :: (Device) -> ([PeerFlag]) -> IO ((Status))
cudaDeviceEnablePeerAccess a1 a2 =
let {a1' = cIntConv a1} in
let {a2' = combineBitMasks a2} in
cudaDeviceEnablePeerAccess'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
remove :: Device -> IO ()
remove !dev = nothingIfOk =<< cudaDeviceDisablePeerAccess dev
cudaDeviceDisablePeerAccess :: (Device) -> IO ((Status))
cudaDeviceDisablePeerAccess a1 =
let {a1' = cIntConv a1} in
cudaDeviceDisablePeerAccess'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
data Limit = Stacksize
| Printffifosize
| Mallocheapsize
| Devruntimesyncdepth
| Devruntimependinglaunchcount
deriving (Eq,Show)
instance Enum Limit where
succ Stacksize = Printffifosize
succ Printffifosize = Mallocheapsize
succ Mallocheapsize = Devruntimesyncdepth
succ Devruntimesyncdepth = Devruntimependinglaunchcount
succ Devruntimependinglaunchcount = error "Limit.succ: Devruntimependinglaunchcount has no successor"
pred Printffifosize = Stacksize
pred Mallocheapsize = Printffifosize
pred Devruntimesyncdepth = Mallocheapsize
pred Devruntimependinglaunchcount = Devruntimesyncdepth
pred Stacksize = error "Limit.pred: Stacksize has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from Devruntimependinglaunchcount
fromEnum Stacksize = 0
fromEnum Printffifosize = 1
fromEnum Mallocheapsize = 2
fromEnum Devruntimesyncdepth = 3
fromEnum Devruntimependinglaunchcount = 4
toEnum 0 = Stacksize
toEnum 1 = Printffifosize
toEnum 2 = Mallocheapsize
toEnum 3 = Devruntimesyncdepth
toEnum 4 = Devruntimependinglaunchcount
toEnum unmatched = error ("Limit.toEnum: Cannot match " ++ show unmatched)
getLimit :: Limit -> IO Int
getLimit !l = resultIfOk =<< cudaDeviceGetLimit l
cudaDeviceGetLimit :: (Limit) -> IO ((Status), (Int))
cudaDeviceGetLimit a2 =
alloca $ \a1' ->
let {a2' = cFromEnum a2} in
cudaDeviceGetLimit'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peekIntConv a1'>>= \a1'' ->
return (res', a1'')
setLimit :: Limit -> Int -> IO ()
setLimit !l !n = nothingIfOk =<< cudaDeviceSetLimit l n
cudaDeviceSetLimit :: (Limit) -> (Int) -> IO ((Status))
cudaDeviceSetLimit a1 a2 =
let {a1' = cFromEnum a1} in
let {a2' = cIntConv a2} in
cudaDeviceSetLimit'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
return (res')
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaChooseDevice"
cudaChooseDevice'_ :: ((Ptr CInt) -> ((Ptr (DeviceProperties)) -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaGetDevice"
cudaGetDevice'_ :: ((Ptr CInt) -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaGetDeviceCount"
cudaGetDeviceCount'_ :: ((Ptr CInt) -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaGetDeviceProperties"
cudaGetDeviceProperties'_ :: ((Ptr (DeviceProperties)) -> (CInt -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaSetDevice"
cudaSetDevice'_ :: (CInt -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaSetDeviceFlags"
cudaSetDeviceFlags'_ :: (CUInt -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaSetValidDevices"
cudaSetValidDevices'_ :: ((Ptr CInt) -> (CInt -> (IO CInt)))
foreign import ccall safe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceSynchronize"
cudaDeviceSynchronize'_ :: (IO CInt)
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceReset"
cudaDeviceReset'_ :: (IO CInt)
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceCanAccessPeer"
cudaDeviceCanAccessPeer'_ :: ((Ptr CInt) -> (CInt -> (CInt -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceEnablePeerAccess"
cudaDeviceEnablePeerAccess'_ :: (CInt -> (CUInt -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceDisablePeerAccess"
cudaDeviceDisablePeerAccess'_ :: (CInt -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceGetLimit"
cudaDeviceGetLimit'_ :: ((Ptr CULong) -> (CInt -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceSetLimit"
cudaDeviceSetLimit'_ :: (CInt -> (CULong -> (IO CInt)))