module Foreign.CUDA.Driver.Marshal (
AllocFlag(..),
mallocHostArray, freeHost, registerArray, unregisterArray,
mallocArray, allocaArray, free,
AttachFlag(..),
mallocManagedArray,
peekArray, peekArrayAsync, peekArray2D, peekArray2DAsync, peekListArray,
pokeArray, pokeArrayAsync, pokeArray2D, pokeArray2DAsync, pokeListArray,
copyArray, copyArrayAsync, copyArray2D, copyArray2DAsync,
copyArrayPeer, copyArrayPeerAsync,
newListArray, newListArrayLen,
withListArray, withListArrayLen,
memset, memsetAsync,
getDevicePtr, getBasePtr, getMemInfo,
useDeviceHandle, peekDeviceHandle
) where
import Foreign.CUDA.Ptr
import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Driver.Stream (Stream(..))
import Foreign.CUDA.Driver.Context (Context(..))
import Foreign.CUDA.Internal.C2HS
import Data.Int
import Data.Maybe
import Unsafe.Coerce
import Control.Applicative
import Control.Exception
import Foreign.C
import Foreign.Ptr
import Foreign.Storable
import qualified Foreign.Marshal as F
data AllocFlag = Portable
| DeviceMapped
| WriteCombined
deriving (Eq,Show)
instance Enum AllocFlag where
fromEnum Portable = 1
fromEnum DeviceMapped = 2
fromEnum WriteCombined = 4
toEnum 1 = Portable
toEnum 2 = DeviceMapped
toEnum 4 = WriteCombined
toEnum unmatched = error ("AllocFlag.toEnum: Cannot match " ++ show unmatched)
mallocHostArray :: Storable a => [AllocFlag] -> Int -> IO (HostPtr a)
mallocHostArray !flags = doMalloc undefined
where
doMalloc :: Storable a' => a' -> Int -> IO (HostPtr a')
doMalloc x !n = resultIfOk =<< cuMemHostAlloc (n * sizeOf x) flags
cuMemHostAlloc :: (Int) -> ([AllocFlag]) -> IO ((Status), (HostPtr a))
cuMemHostAlloc a2 a3 =
alloca' $ \a1' ->
let {a2' = fromIntegral a2} in
let {a3' = combineBitMasks a3} in
cuMemHostAlloc'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
peekHP a1'>>= \a1'' ->
return (res', a1'')
where
alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)
peekHP !p = HostPtr . castPtr <$> peek p
freeHost :: HostPtr a -> IO ()
freeHost !p = nothingIfOk =<< cuMemFreeHost p
cuMemFreeHost :: (HostPtr a) -> IO ((Status))
cuMemFreeHost a1 =
let {a1' = useHP a1} in
cuMemFreeHost'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
where
useHP = castPtr . useHostPtr
registerArray :: Storable a => [AllocFlag] -> Int -> Ptr a -> IO (HostPtr a)
registerArray !flags !n = go undefined
where
go :: Storable b => b -> Ptr b -> IO (HostPtr b)
go x !p = do
status <- cuMemHostRegister p (n * sizeOf x) flags
resultIfOk (status,HostPtr p)
cuMemHostRegister :: (Ptr a) -> (Int) -> ([AllocFlag]) -> IO ((Status))
cuMemHostRegister a1 a2 a3 =
let {a1' = castPtr a1} in
let {a2' = fromIntegral a2} in
let {a3' = combineBitMasks a3} in
cuMemHostRegister'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
unregisterArray :: HostPtr a -> IO (Ptr a)
unregisterArray (HostPtr !p) = do
status <- cuMemHostUnregister p
resultIfOk (status,p)
cuMemHostUnregister :: (Ptr a) -> IO ((Status))
cuMemHostUnregister a1 =
let {a1' = castPtr a1} in
cuMemHostUnregister'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
mallocArray :: Storable a => Int -> IO (DevicePtr a)
mallocArray = doMalloc undefined
where
doMalloc :: Storable a' => a' -> Int -> IO (DevicePtr a')
doMalloc x !n = resultIfOk =<< cuMemAlloc (n * sizeOf x)
cuMemAlloc :: (Int) -> IO ((Status), (DevicePtr a))
cuMemAlloc a2 =
alloca' $ \a1' ->
let {a2' = fromIntegral a2} in
cuMemAlloc'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peekDeviceHandle a1'>>= \a1'' ->
return (res', a1'')
where
alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)
allocaArray :: Storable a => Int -> (DevicePtr a -> IO b) -> IO b
allocaArray !n = bracket (mallocArray n) free
free :: DevicePtr a -> IO ()
free !dp = nothingIfOk =<< cuMemFree dp
cuMemFree :: (DevicePtr a) -> IO ((Status))
cuMemFree a1 =
let {a1' = useDeviceHandle a1} in
cuMemFree'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
data AttachFlag = Global
| Host
| Single
deriving (Eq,Show)
instance Enum AttachFlag where
fromEnum Global = 1
fromEnum Host = 2
fromEnum Single = 4
toEnum 1 = Global
toEnum 2 = Host
toEnum 4 = Single
toEnum unmatched = error ("AttachFlag.toEnum: Cannot match " ++ show unmatched)
mallocManagedArray :: Storable a => [AttachFlag] -> Int -> IO (DevicePtr a)
mallocManagedArray !flags = doMalloc undefined
where
doMalloc :: Storable a' => a' -> Int -> IO (DevicePtr a')
doMalloc x !n = resultIfOk =<< cuMemAllocManaged (n * sizeOf x) flags
cuMemAllocManaged :: (Int) -> ([AttachFlag]) -> IO ((Status), (DevicePtr a))
cuMemAllocManaged a2 a3 =
alloca' $ \a1' ->
let {a2' = fromIntegral a2} in
let {a3' = combineBitMasks a3} in
cuMemAllocManaged'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
peekDeviceHandle a1'>>= \a1'' ->
return (res', a1'')
where
alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)
peekArray :: Storable a => Int -> DevicePtr a -> Ptr a -> IO ()
peekArray !n !dptr !hptr = doPeek undefined dptr
where
doPeek :: Storable a' => a' -> DevicePtr a' -> IO ()
doPeek x _ = nothingIfOk =<< cuMemcpyDtoH hptr dptr (n * sizeOf x)
cuMemcpyDtoH :: (Ptr a) -> (DevicePtr a) -> (Int) -> IO ((Status))
cuMemcpyDtoH a1 a2 a3 =
let {a1' = castPtr a1} in
let {a2' = useDeviceHandle a2} in
let {a3' = fromIntegral a3} in
cuMemcpyDtoH'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
peekArrayAsync :: Storable a => Int -> DevicePtr a -> HostPtr a -> Maybe Stream -> IO ()
peekArrayAsync !n !dptr !hptr !mst = doPeek undefined dptr
where
doPeek :: Storable a' => a' -> DevicePtr a' -> IO ()
doPeek x _ = nothingIfOk =<< cuMemcpyDtoHAsync hptr dptr (n * sizeOf x) (fromMaybe (Stream nullPtr) mst)
cuMemcpyDtoHAsync :: (HostPtr a) -> (DevicePtr a) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpyDtoHAsync a1 a2 a3 a4 =
let {a1' = useHP a1} in
let {a2' = useDeviceHandle a2} in
let {a3' = fromIntegral a3} in
let {a4' = useStream a4} in
cuMemcpyDtoHAsync'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
where
useHP = castPtr . useHostPtr
peekArray2D
:: Storable a
=> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> IO ()
peekArray2D !w !h !dptr !dw !dx !dy !hptr !hw !hx !hy = doPeek undefined dptr
where
doPeek :: Storable a' => a' -> DevicePtr a' -> IO ()
doPeek x _ =
let bytes = sizeOf x
w' = w * bytes
hw' = hw * bytes
hx' = hx * bytes
dw' = dw * bytes
dx' = dx * bytes
in
nothingIfOk =<< cuMemcpy2DDtoH hptr hw' hx' hy dptr dw' dx' dy w' h
cuMemcpy2DDtoH :: (Ptr a) -> (Int) -> (Int) -> (Int) -> (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuMemcpy2DDtoH a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 =
let {a1' = castPtr a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = useDeviceHandle a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = fromIntegral a8} in
let {a9' = fromIntegral a9} in
let {a10' = fromIntegral a10} in
cuMemcpy2DDtoH'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' >>= \res ->
let {res' = cToEnum res} in
return (res')
peekArray2DAsync
:: Storable a
=> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> HostPtr a
-> Int
-> Int
-> Int
-> Maybe Stream
-> IO ()
peekArray2DAsync !w !h !dptr !dw !dx !dy !hptr !hw !hx !hy !mst = doPeek undefined dptr
where
doPeek :: Storable a' => a' -> DevicePtr a' -> IO ()
doPeek x _ =
let bytes = sizeOf x
w' = w * bytes
hw' = hw * bytes
hx' = hx * bytes
dw' = dw * bytes
dx' = dx * bytes
st = fromMaybe (Stream nullPtr) mst
in
nothingIfOk =<< cuMemcpy2DDtoHAsync hptr hw' hx' hy dptr dw' dx' dy w' h st
cuMemcpy2DDtoHAsync :: (HostPtr a) -> (Int) -> (Int) -> (Int) -> (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpy2DDtoHAsync a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 =
let {a1' = useHP a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = useDeviceHandle a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = fromIntegral a8} in
let {a9' = fromIntegral a9} in
let {a10' = fromIntegral a10} in
let {a11' = useStream a11} in
cuMemcpy2DDtoHAsync'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' a11' >>= \res ->
let {res' = cToEnum res} in
return (res')
where
useHP = castPtr . useHostPtr
peekListArray :: Storable a => Int -> DevicePtr a -> IO [a]
peekListArray !n !dptr =
F.allocaArray n $ \p -> do
peekArray n dptr p
F.peekArray n p
pokeArray :: Storable a => Int -> Ptr a -> DevicePtr a -> IO ()
pokeArray !n !hptr !dptr = doPoke undefined dptr
where
doPoke :: Storable a' => a' -> DevicePtr a' -> IO ()
doPoke x _ = nothingIfOk =<< cuMemcpyHtoD dptr hptr (n * sizeOf x)
cuMemcpyHtoD :: (DevicePtr a) -> (Ptr a) -> (Int) -> IO ((Status))
cuMemcpyHtoD a1 a2 a3 =
let {a1' = useDeviceHandle a1} in
let {a2' = castPtr a2} in
let {a3' = fromIntegral a3} in
cuMemcpyHtoD'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
pokeArrayAsync :: Storable a => Int -> HostPtr a -> DevicePtr a -> Maybe Stream -> IO ()
pokeArrayAsync !n !hptr !dptr !mst = dopoke undefined dptr
where
dopoke :: Storable a' => a' -> DevicePtr a' -> IO ()
dopoke x _ = nothingIfOk =<< cuMemcpyHtoDAsync dptr hptr (n * sizeOf x) (fromMaybe (Stream nullPtr) mst)
cuMemcpyHtoDAsync :: (DevicePtr a) -> (HostPtr a) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpyHtoDAsync a1 a2 a3 a4 =
let {a1' = useDeviceHandle a1} in
let {a2' = useHP a2} in
let {a3' = fromIntegral a3} in
let {a4' = useStream a4} in
cuMemcpyHtoDAsync'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
where
useHP = castPtr . useHostPtr
pokeArray2D
:: Storable a
=> Int
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> IO ()
pokeArray2D !w !h !hptr !hw !hx !hy !dptr !dw !dx !dy = doPoke undefined dptr
where
doPoke :: Storable a' => a' -> DevicePtr a' -> IO ()
doPoke x _ =
let bytes = sizeOf x
w' = w * bytes
hw' = hw * bytes
hx' = hx * bytes
dw' = dw * bytes
dx' = dx * bytes
in
nothingIfOk =<< cuMemcpy2DHtoD dptr dw' dx' dy hptr hw' hx' hy w' h
cuMemcpy2DHtoD :: (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Ptr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuMemcpy2DHtoD a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 =
let {a1' = useDeviceHandle a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = castPtr a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = fromIntegral a8} in
let {a9' = fromIntegral a9} in
let {a10' = fromIntegral a10} in
cuMemcpy2DHtoD'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' >>= \res ->
let {res' = cToEnum res} in
return (res')
pokeArray2DAsync
:: Storable a
=> Int
-> Int
-> HostPtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Maybe Stream
-> IO ()
pokeArray2DAsync !w !h !hptr !hw !hx !hy !dptr !dw !dx !dy !mst = doPoke undefined dptr
where
doPoke :: Storable a' => a' -> DevicePtr a' -> IO ()
doPoke x _ =
let bytes = sizeOf x
w' = w * bytes
hw' = hw * bytes
hx' = hx * bytes
dw' = dw * bytes
dx' = dx * bytes
st = fromMaybe (Stream nullPtr) mst
in
nothingIfOk =<< cuMemcpy2DHtoDAsync dptr dw' dx' dy hptr hw' hx' hy w' h st
cuMemcpy2DHtoDAsync :: (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (HostPtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpy2DHtoDAsync a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 =
let {a1' = useDeviceHandle a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = useHP a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = fromIntegral a8} in
let {a9' = fromIntegral a9} in
let {a10' = fromIntegral a10} in
let {a11' = useStream a11} in
cuMemcpy2DHtoDAsync'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' a11' >>= \res ->
let {res' = cToEnum res} in
return (res')
where
useHP = castPtr . useHostPtr
pokeListArray :: Storable a => [a] -> DevicePtr a -> IO ()
pokeListArray !xs !dptr = F.withArrayLen xs $ \ !len !p -> pokeArray len p dptr
copyArray :: Storable a => Int -> DevicePtr a -> DevicePtr a -> IO ()
copyArray !n = docopy undefined
where
docopy :: Storable a' => a' -> DevicePtr a' -> DevicePtr a' -> IO ()
docopy x src dst = nothingIfOk =<< cuMemcpyDtoD dst src (n * sizeOf x)
cuMemcpyDtoD :: (DevicePtr a) -> (DevicePtr a) -> (Int) -> IO ((Status))
cuMemcpyDtoD a1 a2 a3 =
let {a1' = useDeviceHandle a1} in
let {a2' = useDeviceHandle a2} in
let {a3' = fromIntegral a3} in
cuMemcpyDtoD'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
copyArrayAsync :: Storable a => Int -> DevicePtr a -> DevicePtr a -> Maybe Stream -> IO ()
copyArrayAsync !n !src !dst !mst = docopy undefined src
where
docopy :: Storable a' => a' -> DevicePtr a' -> IO ()
docopy x _ = nothingIfOk =<< cuMemcpyDtoDAsync dst src (n * sizeOf x) (fromMaybe (Stream nullPtr) mst)
cuMemcpyDtoDAsync :: (DevicePtr a) -> (DevicePtr a) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpyDtoDAsync a1 a2 a3 a4 =
let {a1' = useDeviceHandle a1} in
let {a2' = useDeviceHandle a2} in
let {a3' = fromIntegral a3} in
let {a4' = useStream a4} in
cuMemcpyDtoDAsync'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
copyArray2D
:: Storable a
=> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> IO ()
copyArray2D !w !h !src !hw !hx !hy !dst !dw !dx !dy = doCopy undefined dst
where
doCopy :: Storable a' => a' -> DevicePtr a' -> IO ()
doCopy x _ =
let bytes = sizeOf x
w' = w * bytes
hw' = hw * bytes
hx' = hx * bytes
dw' = dw * bytes
dx' = dx * bytes
in
nothingIfOk =<< cuMemcpy2DDtoD dst dw' dx' dy src hw' hx' hy w' h
cuMemcpy2DDtoD :: (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuMemcpy2DDtoD a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 =
let {a1' = useDeviceHandle a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = useDeviceHandle a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = fromIntegral a8} in
let {a9' = fromIntegral a9} in
let {a10' = fromIntegral a10} in
cuMemcpy2DDtoD'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' >>= \res ->
let {res' = cToEnum res} in
return (res')
copyArray2DAsync
:: Storable a
=> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Maybe Stream
-> IO ()
copyArray2DAsync !w !h !src !hw !hx !hy !dst !dw !dx !dy !mst = doCopy undefined dst
where
doCopy :: Storable a' => a' -> DevicePtr a' -> IO ()
doCopy x _ =
let bytes = sizeOf x
w' = w * bytes
hw' = hw * bytes
hx' = hx * bytes
dw' = dw * bytes
dx' = dx * bytes
st = fromMaybe (Stream nullPtr) mst
in
nothingIfOk =<< cuMemcpy2DDtoDAsync dst dw' dx' dy src hw' hx' hy w' h st
cuMemcpy2DDtoDAsync :: (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpy2DDtoDAsync a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 =
let {a1' = useDeviceHandle a1} in
let {a2' = fromIntegral a2} in
let {a3' = fromIntegral a3} in
let {a4' = fromIntegral a4} in
let {a5' = useDeviceHandle a5} in
let {a6' = fromIntegral a6} in
let {a7' = fromIntegral a7} in
let {a8' = fromIntegral a8} in
let {a9' = fromIntegral a9} in
let {a10' = fromIntegral a10} in
let {a11' = useStream a11} in
cuMemcpy2DDtoDAsync'_ a1' a2' a3' a4' a5' a6' a7' a8' a9' a10' a11' >>= \res ->
let {res' = cToEnum res} in
return (res')
copyArrayPeer :: Storable a
=> Int
-> DevicePtr a -> Context
-> DevicePtr a -> Context
-> IO ()
copyArrayPeer !n !src !srcCtx !dst !dstCtx = go undefined src dst
where
go :: Storable b => b -> DevicePtr b -> DevicePtr b -> IO ()
go x _ _ = nothingIfOk =<< cuMemcpyPeer dst dstCtx src srcCtx (n * sizeOf x)
cuMemcpyPeer :: (DevicePtr a) -> (Context) -> (DevicePtr a) -> (Context) -> (Int) -> IO ((Status))
cuMemcpyPeer a1 a2 a3 a4 a5 =
let {a1' = useDeviceHandle a1} in
let {a2' = useContext a2} in
let {a3' = useDeviceHandle a3} in
let {a4' = useContext a4} in
let {a5' = fromIntegral a5} in
cuMemcpyPeer'_ a1' a2' a3' a4' a5' >>= \res ->
let {res' = cToEnum res} in
return (res')
copyArrayPeerAsync :: Storable a
=> Int
-> DevicePtr a -> Context
-> DevicePtr a -> Context
-> Maybe Stream
-> IO ()
copyArrayPeerAsync !n !src !srcCtx !dst !dstCtx !st = go undefined src dst
where
go :: Storable b => b -> DevicePtr b -> DevicePtr b -> IO ()
go x _ _ = nothingIfOk =<< cuMemcpyPeerAsync dst dstCtx src srcCtx (n * sizeOf x) stream
stream = fromMaybe (Stream nullPtr) st
cuMemcpyPeerAsync :: (DevicePtr a) -> (Context) -> (DevicePtr a) -> (Context) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpyPeerAsync a1 a2 a3 a4 a5 a6 =
let {a1' = useDeviceHandle a1} in
let {a2' = useContext a2} in
let {a3' = useDeviceHandle a3} in
let {a4' = useContext a4} in
let {a5' = fromIntegral a5} in
let {a6' = useStream a6} in
cuMemcpyPeerAsync'_ a1' a2' a3' a4' a5' a6' >>= \res ->
let {res' = cToEnum res} in
return (res')
newListArrayLen :: Storable a => [a] -> IO (DevicePtr a, Int)
newListArrayLen xs =
F.withArrayLen xs $ \len p ->
bracketOnError (mallocArray len) free $ \d_xs -> do
pokeArray len p d_xs
return (d_xs, len)
newListArray :: Storable a => [a] -> IO (DevicePtr a)
newListArray xs = fst `fmap` newListArrayLen xs
withListArray :: Storable a => [a] -> (DevicePtr a -> IO b) -> IO b
withListArray xs = withListArrayLen xs . const
withListArrayLen :: Storable a => [a] -> (Int -> DevicePtr a -> IO b) -> IO b
withListArrayLen xs f =
bracket (newListArrayLen xs) (free . fst) (uncurry . flip $ f)
memset :: Storable a => DevicePtr a -> Int -> a -> IO ()
memset !dptr !n !val = case sizeOf val of
1 -> nothingIfOk =<< cuMemsetD8 dptr val n
2 -> nothingIfOk =<< cuMemsetD16 dptr val n
4 -> nothingIfOk =<< cuMemsetD32 dptr val n
_ -> cudaError "can only memset 8-, 16-, and 32-bit values"
cuMemsetD8 :: (DevicePtr a) -> (a) -> (Int) -> IO ((Status))
cuMemsetD8 a1 a2 a3 =
let {a1' = useDeviceHandle a1} in
let {a2' = unsafeCoerce a2} in
let {a3' = fromIntegral a3} in
cuMemsetD8'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
cuMemsetD16 :: (DevicePtr a) -> (a) -> (Int) -> IO ((Status))
cuMemsetD16 a1 a2 a3 =
let {a1' = useDeviceHandle a1} in
let {a2' = unsafeCoerce a2} in
let {a3' = fromIntegral a3} in
cuMemsetD16'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
cuMemsetD32 :: (DevicePtr a) -> (a) -> (Int) -> IO ((Status))
cuMemsetD32 a1 a2 a3 =
let {a1' = useDeviceHandle a1} in
let {a2' = unsafeCoerce a2} in
let {a3' = fromIntegral a3} in
cuMemsetD32'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
return (res')
memsetAsync :: Storable a => DevicePtr a -> Int -> a -> Maybe Stream -> IO ()
memsetAsync !dptr !n !val !mst = case sizeOf val of
1 -> nothingIfOk =<< cuMemsetD8Async dptr val n stream
2 -> nothingIfOk =<< cuMemsetD16Async dptr val n stream
4 -> nothingIfOk =<< cuMemsetD32Async dptr val n stream
_ -> cudaError "can only memset 8-, 16-, and 32-bit values"
where
stream = fromMaybe (Stream nullPtr) mst
cuMemsetD8Async :: (DevicePtr a) -> (a) -> (Int) -> (Stream) -> IO ((Status))
cuMemsetD8Async a1 a2 a3 a4 =
let {a1' = useDeviceHandle a1} in
let {a2' = unsafeCoerce a2} in
let {a3' = fromIntegral a3} in
let {a4' = useStream a4} in
cuMemsetD8Async'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
cuMemsetD16Async :: (DevicePtr a) -> (a) -> (Int) -> (Stream) -> IO ((Status))
cuMemsetD16Async a1 a2 a3 a4 =
let {a1' = useDeviceHandle a1} in
let {a2' = unsafeCoerce a2} in
let {a3' = fromIntegral a3} in
let {a4' = useStream a4} in
cuMemsetD16Async'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
cuMemsetD32Async :: (DevicePtr a) -> (a) -> (Int) -> (Stream) -> IO ((Status))
cuMemsetD32Async a1 a2 a3 a4 =
let {a1' = useDeviceHandle a1} in
let {a2' = unsafeCoerce a2} in
let {a3' = fromIntegral a3} in
let {a4' = useStream a4} in
cuMemsetD32Async'_ a1' a2' a3' a4' >>= \res ->
let {res' = cToEnum res} in
return (res')
getDevicePtr :: [AllocFlag] -> HostPtr a -> IO (DevicePtr a)
getDevicePtr !flags !hp = resultIfOk =<< cuMemHostGetDevicePointer hp flags
cuMemHostGetDevicePointer :: (HostPtr a) -> ([AllocFlag]) -> IO ((Status), (DevicePtr a))
cuMemHostGetDevicePointer a2 a3 =
alloca' $ \a1' ->
let {a2' = useHP a2} in
let {a3' = combineBitMasks a3} in
cuMemHostGetDevicePointer'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
peekDeviceHandle a1'>>= \a1'' ->
return (res', a1'')
where
alloca' = F.alloca
useHP = castPtr . useHostPtr
getBasePtr :: DevicePtr a -> IO (DevicePtr a, Int64)
getBasePtr !dptr = do
(status,base,size) <- cuMemGetAddressRange dptr
resultIfOk (status, (base,size))
cuMemGetAddressRange :: (DevicePtr a) -> IO ((Status), (DevicePtr a), (Int64))
cuMemGetAddressRange a3 =
alloca' $ \a1' ->
alloca' $ \a2' ->
let {a3' = useDeviceHandle a3} in
cuMemGetAddressRange'_ a1' a2' a3' >>= \res ->
let {res' = cToEnum res} in
peekDeviceHandle a1'>>= \a1'' ->
peekIntConv a2'>>= \a2'' ->
return (res', a1'', a2'')
where
alloca' :: Storable a => (Ptr a -> IO b) -> IO b
alloca' = F.alloca
getMemInfo :: IO (Int64, Int64)
getMemInfo = do
(!status,!f,!t) <- cuMemGetInfo
resultIfOk (status,(f,t))
cuMemGetInfo :: IO ((Status), (Int64), (Int64))
cuMemGetInfo =
alloca' $ \a1' ->
alloca' $ \a2' ->
cuMemGetInfo'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peekIntConv a1'>>= \a1'' ->
peekIntConv a2'>>= \a2'' ->
return (res', a1'', a2'')
where
alloca' = F.alloca
type DeviceHandle = (CULLong)
peekDeviceHandle :: Ptr DeviceHandle -> IO (DevicePtr a)
peekDeviceHandle !p = DevicePtr . intPtrToPtr . fromIntegral <$> peek p
useDeviceHandle :: DevicePtr a -> DeviceHandle
useDeviceHandle = fromIntegral . ptrToIntPtr . useDevicePtr
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemHostAlloc"
cuMemHostAlloc'_ :: ((Ptr (Ptr ())) -> (CULong -> (CUInt -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemFreeHost"
cuMemFreeHost'_ :: ((Ptr ()) -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemHostRegister"
cuMemHostRegister'_ :: ((Ptr ()) -> (CULong -> (CUInt -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemHostUnregister"
cuMemHostUnregister'_ :: ((Ptr ()) -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemAlloc"
cuMemAlloc'_ :: ((Ptr CULLong) -> (CULong -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemFree"
cuMemFree'_ :: (CULLong -> (IO CInt))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemAllocManaged"
cuMemAllocManaged'_ :: ((Ptr CULLong) -> (CULong -> (CUInt -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyDtoH"
cuMemcpyDtoH'_ :: ((Ptr ()) -> (CULLong -> (CULong -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyDtoHAsync"
cuMemcpyDtoHAsync'_ :: ((Ptr ()) -> (CULLong -> (CULong -> ((Ptr ()) -> (IO CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DDtoH"
cuMemcpy2DDtoH'_ :: ((Ptr ()) -> (CUInt -> (CUInt -> (CUInt -> (CULLong -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (IO CInt)))))))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DDtoHAsync"
cuMemcpy2DDtoHAsync'_ :: ((Ptr ()) -> (CUInt -> (CUInt -> (CUInt -> (CULLong -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> ((Ptr ()) -> (IO CInt))))))))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyHtoD"
cuMemcpyHtoD'_ :: (CULLong -> ((Ptr ()) -> (CULong -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyHtoDAsync"
cuMemcpyHtoDAsync'_ :: (CULLong -> ((Ptr ()) -> (CULong -> ((Ptr ()) -> (IO CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DHtoD"
cuMemcpy2DHtoD'_ :: (CULLong -> (CUInt -> (CUInt -> (CUInt -> ((Ptr ()) -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (IO CInt)))))))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DHtoDAsync"
cuMemcpy2DHtoDAsync'_ :: (CULLong -> (CUInt -> (CUInt -> (CUInt -> ((Ptr ()) -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> ((Ptr ()) -> (IO CInt))))))))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyDtoD"
cuMemcpyDtoD'_ :: (CULLong -> (CULLong -> (CULong -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyDtoDAsync"
cuMemcpyDtoDAsync'_ :: (CULLong -> (CULLong -> (CULong -> ((Ptr ()) -> (IO CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DDtoD"
cuMemcpy2DDtoD'_ :: (CULLong -> (CUInt -> (CUInt -> (CUInt -> (CULLong -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (IO CInt)))))))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DDtoDAsync"
cuMemcpy2DDtoDAsync'_ :: (CULLong -> (CUInt -> (CUInt -> (CUInt -> (CULLong -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> (CUInt -> ((Ptr ()) -> (IO CInt))))))))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyPeer"
cuMemcpyPeer'_ :: (CULLong -> ((Ptr ()) -> (CULLong -> ((Ptr ()) -> (CULong -> (IO CInt))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyPeerAsync"
cuMemcpyPeerAsync'_ :: (CULLong -> ((Ptr ()) -> (CULLong -> ((Ptr ()) -> (CULong -> ((Ptr ()) -> (IO CInt)))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD8"
cuMemsetD8'_ :: (CULLong -> (CUChar -> (CULong -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD16"
cuMemsetD16'_ :: (CULLong -> (CUShort -> (CULong -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD32"
cuMemsetD32'_ :: (CULLong -> (CUInt -> (CULong -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD8Async"
cuMemsetD8Async'_ :: (CULLong -> (CUChar -> (CULong -> ((Ptr ()) -> (IO CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD16Async"
cuMemsetD16Async'_ :: (CULLong -> (CUShort -> (CULong -> ((Ptr ()) -> (IO CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD32Async"
cuMemsetD32Async'_ :: (CULLong -> (CUInt -> (CULong -> ((Ptr ()) -> (IO CInt)))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemHostGetDevicePointer"
cuMemHostGetDevicePointer'_ :: ((Ptr CULLong) -> ((Ptr ()) -> (CUInt -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemGetAddressRange"
cuMemGetAddressRange'_ :: ((Ptr CULLong) -> ((Ptr CULong) -> (CULLong -> (IO CInt))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemGetInfo"
cuMemGetInfo'_ :: ((Ptr CULong) -> ((Ptr CULong) -> (IO CInt)))