-- GENERATED by C->Haskell Compiler, version 0.28.6 Switcheroo, 25 November 2017 (Haskell)
-- Edit the ORIGNAL .chs file instead!


{-# LINE 1 "src/Foreign/CUDA/Driver/Marshal.chs" #-}
{-# LANGUAGE BangPatterns             #-}
{-# LANGUAGE CPP                      #-}
{-# LANGUAGE EmptyDataDecls           #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE MagicHash                #-}
{-# LANGUAGE ScopedTypeVariables      #-}
{-# LANGUAGE TemplateHaskell          #-}
{-# OPTIONS_HADDOCK prune #-}
--------------------------------------------------------------------------------
-- |
-- Module    : Foreign.CUDA.Driver.Marshal
-- Copyright : [2009..2018] Trevor L. McDonell
-- License   : BSD
--
-- Memory management for low-level driver interface
--
--------------------------------------------------------------------------------

module Foreign.CUDA.Driver.Marshal (

  -- * Host Allocation
  AllocFlag(..),
  mallocHostArray, mallocHostForeignPtr, freeHost,
  registerArray, unregisterArray,

  -- * Device Allocation
  mallocArray, allocaArray, free,

  -- * Unified Memory Allocation
  AttachFlag(..),
  mallocManagedArray,
  prefetchArrayAsync,
  attachArrayAsync,

  -- * Marshalling
  peekArray, peekArrayAsync, peekArray2D, peekArray2DAsync, peekListArray,
  pokeArray, pokeArrayAsync, pokeArray2D, pokeArray2DAsync, pokeListArray,
  copyArray, copyArrayAsync, copyArray2D, copyArray2DAsync,
  copyArrayPeer, copyArrayPeerAsync,

  -- * Combined Allocation and Marshalling
  newListArray,  newListArrayLen,
  withListArray, withListArrayLen,

  -- * Utility
  memset, memsetAsync,
  getDevicePtr, getBasePtr, getMemInfo,

  -- Internal
  useDeviceHandle, peekDeviceHandle

) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.Ptr as C2HSImp





{-# LINE 55 "src/Foreign/CUDA/Driver/Marshal.chs" #-}


-- Friends
import Foreign.CUDA.Ptr
import Foreign.CUDA.Driver.Device
import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Driver.Stream                       ( Stream(..), defaultStream )
import Foreign.CUDA.Driver.Context.Base                 ( Context(..) )
import Foreign.CUDA.Internal.C2HS

-- System
import Data.Int
import Data.Maybe
import Data.Word
import Unsafe.Coerce
import Control.Applicative
import Control.Exception
import Prelude

import Foreign.C
import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.Storable
import qualified Foreign.Marshal                        as F

import GHC.Ptr
import GHC.Word
import GHC.Base

--------------------------------------------------------------------------------
-- Host Allocation
--------------------------------------------------------------------------------

-- |
-- Options for host allocation
--
data AllocFlag = Portable
               | DeviceMapped
               | WriteCombined
  deriving (AllocFlag -> AllocFlag -> Bool
(AllocFlag -> AllocFlag -> Bool)
-> (AllocFlag -> AllocFlag -> Bool) -> Eq AllocFlag
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: AllocFlag -> AllocFlag -> Bool
$c/= :: AllocFlag -> AllocFlag -> Bool
== :: AllocFlag -> AllocFlag -> Bool
$c== :: AllocFlag -> AllocFlag -> Bool
Eq,Int -> AllocFlag -> ShowS
[AllocFlag] -> ShowS
AllocFlag -> String
(Int -> AllocFlag -> ShowS)
-> (AllocFlag -> String)
-> ([AllocFlag] -> ShowS)
-> Show AllocFlag
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AllocFlag] -> ShowS
$cshowList :: [AllocFlag] -> ShowS
show :: AllocFlag -> String
$cshow :: AllocFlag -> String
showsPrec :: Int -> AllocFlag -> ShowS
$cshowsPrec :: Int -> AllocFlag -> ShowS
Show,AllocFlag
AllocFlag -> AllocFlag -> Bounded AllocFlag
forall a. a -> a -> Bounded a
maxBound :: AllocFlag
$cmaxBound :: AllocFlag
minBound :: AllocFlag
$cminBound :: AllocFlag
Bounded)
instance Enum AllocFlag where
  succ :: AllocFlag -> AllocFlag
succ AllocFlag
Portable = AllocFlag
DeviceMapped
  succ AllocFlag
DeviceMapped = AllocFlag
WriteCombined
  succ AllocFlag
WriteCombined = String -> AllocFlag
forall a. HasCallStack => String -> a
error String
"AllocFlag.succ: WriteCombined has no successor"

  pred :: AllocFlag -> AllocFlag
pred AllocFlag
DeviceMapped = AllocFlag
Portable
  pred AllocFlag
WriteCombined = AllocFlag
DeviceMapped
  pred AllocFlag
Portable = String -> AllocFlag
forall a. HasCallStack => String -> a
error String
"AllocFlag.pred: Portable has no predecessor"

  enumFromTo :: AllocFlag -> AllocFlag -> [AllocFlag]
enumFromTo AllocFlag
from AllocFlag
to = AllocFlag -> [AllocFlag]
forall t. Enum t => t -> [t]
go AllocFlag
from
    where
      end :: Int
end = AllocFlag -> Int
forall a. Enum a => a -> Int
fromEnum AllocFlag
to
      go :: t -> [t]
go t
v = case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (t -> Int
forall a. Enum a => a -> Int
fromEnum t
v) Int
end of
                 Ordering
LT -> t
v t -> [t] -> [t]
forall a. a -> [a] -> [a]
: t -> [t]
go (t -> t
forall a. Enum a => a -> a
succ t
v)
                 Ordering
EQ -> [t
v]
                 Ordering
GT -> []

  enumFrom :: AllocFlag -> [AllocFlag]
enumFrom AllocFlag
from = AllocFlag -> AllocFlag -> [AllocFlag]
forall a. Enum a => a -> a -> [a]
enumFromTo AllocFlag
from AllocFlag
WriteCombined

  fromEnum :: AllocFlag -> Int
fromEnum AllocFlag
Portable = Int
1
  fromEnum AllocFlag
DeviceMapped = Int
2
  fromEnum AllocFlag
WriteCombined = Int
4

  toEnum 1 = Portable
  toEnum 2 = DeviceMapped
  toEnum 4 = WriteCombined
  toEnum unmatched = error ("AllocFlag.toEnum: Cannot match " ++ show unmatched)

{-# LINE 101 "src/Foreign/CUDA/Driver/Marshal.chs" #-}


-- |
-- Allocate a section of linear memory on the host which is page-locked and
-- directly accessible from the device. The storage is sufficient to hold the
-- given number of elements of a storable type.
--
-- Note that since the amount of pageable memory is thusly reduced, overall
-- system performance may suffer. This is best used sparingly to allocate
-- staging areas for data exchange.
--
-- Host memory allocated in this way is automatically and immediately
-- accessible to all contexts on all devices which support unified
-- addressing.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gdd8311286d2c2691605362c689bc64e0>
--
{-# INLINEABLE mallocHostArray #-}
mallocHostArray :: Storable a => [AllocFlag] -> Int -> IO (HostPtr a)
mallocHostArray !flags = doMalloc undefined
  where
    doMalloc :: Storable a' => a' -> Int -> IO (HostPtr a')
    doMalloc x !n = resultIfOk =<< cuMemHostAlloc (n * sizeOf x) flags

-- |
-- As 'mallocHostArray', but return a 'ForeignPtr' instead. The array will be
-- deallocated automatically once the last reference to the 'ForeignPtr' is
-- dropped.
--
{-# INLINEABLE mallocHostForeignPtr #-}
{-# SPECIALISE mallocHostForeignPtr :: [AllocFlag] -> Int -> IO (ForeignPtr Word8) #-}
mallocHostForeignPtr :: Storable a => [AllocFlag] -> Int -> IO (ForeignPtr a)
mallocHostForeignPtr :: [AllocFlag] -> Int -> IO (ForeignPtr a)
mallocHostForeignPtr ![AllocFlag]
flags !Int
size = do
  HostPtr Ptr a
ptr <- [AllocFlag] -> Int -> IO (HostPtr a)
forall a. Storable a => [AllocFlag] -> Int -> IO (HostPtr a)
mallocHostArray [AllocFlag]
flags Int
size
  FinalizerPtr a -> Ptr a -> IO (ForeignPtr a)
forall a. FinalizerPtr a -> Ptr a -> IO (ForeignPtr a)
newForeignPtr FinalizerPtr a
forall a. FinalizerPtr a
finalizerMemFreeHost Ptr a
ptr

{-# INLINE cuMemHostAlloc #-}
cuMemHostAlloc :: (Int) -> ([AllocFlag]) -> IO ((Status), (HostPtr a))
cuMemHostAlloc :: Int -> [AllocFlag] -> IO (Status, HostPtr a)
cuMemHostAlloc Int
a2 [AllocFlag]
a3 =
  (Ptr (Ptr ()) -> IO (Status, HostPtr a)) -> IO (Status, HostPtr a)
forall b b. (Ptr b -> IO b) -> IO b
alloca' ((Ptr (Ptr ()) -> IO (Status, HostPtr a))
 -> IO (Status, HostPtr a))
-> (Ptr (Ptr ()) -> IO (Status, HostPtr a))
-> IO (Status, HostPtr a)
forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr ())
a1' -> 
  let {a2' :: CULong
a2' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = [AllocFlag] -> CUInt
forall a b. (Enum a, Num b, Bits b) => [a] -> b
combineBitMasks [AllocFlag]
a3} in 
  Ptr (Ptr ()) -> CULong -> CUInt -> IO CInt
cuMemHostAlloc'_ Ptr (Ptr ())
a1' CULong
a2' CUInt
a3' IO CInt
-> (CInt -> IO (Status, HostPtr a)) -> IO (Status, HostPtr a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Ptr (Ptr ()) -> IO (HostPtr a)
forall a a. Ptr (Ptr a) -> IO (HostPtr a)
peekHP  Ptr (Ptr ())
a1'IO (HostPtr a)
-> (HostPtr a -> IO (Status, HostPtr a)) -> IO (Status, HostPtr a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \HostPtr a
a1'' -> 
  (Status, HostPtr a) -> IO (Status, HostPtr a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res', HostPtr a
a1'')

{-# LINE 141 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)
    peekHP !p  = HostPtr . castPtr <$> peek p

-- Pointer to the foreign function to release host arrays, which may be used as
-- a finalizer. Technically this function has a non-void return type, but I am
-- hoping that that doesn't matter...
--
foreign import ccall "&cuMemFreeHost" finalizerMemFreeHost :: FinalizerPtr a

-- |
-- Free a section of page-locked host memory.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g62e0fdbe181dab6b1c90fa1a51c7b92c>
--
{-# INLINEABLE freeHost #-}
freeHost :: HostPtr a -> IO ()
freeHost :: HostPtr a -> IO ()
freeHost !HostPtr a
p = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< HostPtr a -> IO Status
forall a. HostPtr a -> IO Status
cuMemFreeHost HostPtr a
p

{-# INLINE cuMemFreeHost #-}
cuMemFreeHost :: (HostPtr a) -> IO ((Status))
cuMemFreeHost :: HostPtr a -> IO Status
cuMemFreeHost HostPtr a
a1 =
  let {a1' :: Ptr b
a1' = HostPtr a -> Ptr b
forall a b. HostPtr a -> Ptr b
useHP HostPtr a
a1} in 
  Ptr () -> IO CInt
cuMemFreeHost'_ Ptr ()
forall b. Ptr b
a1' IO CInt -> (CInt -> IO Status) -> IO Status
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 163 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    useHP = castPtr . useHostPtr


-- |
-- Page-locks the specified array (on the host) and maps it for the device(s) as
-- specified by the given allocation flags. Subsequently, the memory is accessed
-- directly by the device so can be read and written with much higher bandwidth
-- than pageable memory that has not been registered. The memory range is added
-- to the same tracking mechanism as 'mallocHostArray' to automatically
-- accelerate calls to functions such as 'pokeArray'.
--
-- Note that page-locking excessive amounts of memory may degrade system
-- performance, since it reduces the amount of pageable memory available. This
-- is best used sparingly to allocate staging areas for data exchange.
--
-- This function has limited support on Mac OS X. OS 10.7 or later is
-- required.
--
-- Requires CUDA-4.0.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gf0a9fe11544326dabd743b7aa6b54223>
--
{-# INLINEABLE registerArray #-}
registerArray :: Storable a => [AllocFlag] -> Int -> Ptr a -> IO (HostPtr a)
registerArray :: [AllocFlag] -> Int -> Ptr a -> IO (HostPtr a)
registerArray ![AllocFlag]
flags !Int
n = a -> Ptr a -> IO (HostPtr a)
forall b. Storable b => b -> Ptr b -> IO (HostPtr b)
go a
forall a. HasCallStack => a
undefined
  where
    go :: Storable b => b -> Ptr b -> IO (HostPtr b)
    go :: b -> Ptr b -> IO (HostPtr b)
go b
x !Ptr b
p = do
      Status
status <- Ptr b -> Int -> [AllocFlag] -> IO Status
forall a. Ptr a -> Int -> [AllocFlag] -> IO Status
cuMemHostRegister Ptr b
p (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* b -> Int
forall a. Storable a => a -> Int
sizeOf b
x) [AllocFlag]
flags
      (Status, HostPtr b) -> IO (HostPtr b)
forall a. (Status, a) -> IO a
resultIfOk (Status
status,Ptr b -> HostPtr b
forall a. Ptr a -> HostPtr a
HostPtr Ptr b
p)

{-# INLINE cuMemHostRegister #-}
cuMemHostRegister :: (Ptr a) -> (Int) -> ([AllocFlag]) -> IO ((Status))
cuMemHostRegister :: Ptr a -> Int -> [AllocFlag] -> IO Status
cuMemHostRegister Ptr a
a1 Int
a2 [AllocFlag]
a3 =
  let {a1' :: Ptr b
a1' = Ptr a -> Ptr b
forall a b. Ptr a -> Ptr b
castPtr Ptr a
a1} in 
  let {a2' :: CULong
a2' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = [AllocFlag] -> CUInt
forall a b. (Enum a, Num b, Bits b) => [a] -> b
combineBitMasks [AllocFlag]
a3} in 
  Ptr () -> CULong -> CUInt -> IO CInt
cuMemHostRegister'_ Ptr ()
forall b. Ptr b
a1' CULong
a2' CUInt
a3' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 204 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Unmaps the memory from the given pointer, and makes it pageable again.
--
-- Requires CUDA-4.0.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g63f450c8125359be87b7623b1c0b2a14>
--
{-# INLINEABLE unregisterArray #-}
unregisterArray :: HostPtr a -> IO (Ptr a)
unregisterArray :: HostPtr a -> IO (Ptr a)
unregisterArray (HostPtr !Ptr a
p) = do
  Status
status <- Ptr a -> IO Status
forall a. Ptr a -> IO Status
cuMemHostUnregister Ptr a
p
  (Status, Ptr a) -> IO (Ptr a)
forall a. (Status, a) -> IO a
resultIfOk (Status
status,Ptr a
p)

{-# INLINE cuMemHostUnregister #-}
cuMemHostUnregister :: (Ptr a) -> IO ((Status))
cuMemHostUnregister :: Ptr a -> IO Status
cuMemHostUnregister Ptr a
a1 =
  let {a1' :: Ptr b
a1' = Ptr a -> Ptr b
forall a b. Ptr a -> Ptr b
castPtr Ptr a
a1} in 
  Ptr () -> IO CInt
cuMemHostUnregister'_ Ptr ()
forall b. Ptr b
a1' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 226 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



--------------------------------------------------------------------------------
-- Device Allocation
--------------------------------------------------------------------------------

-- |
-- Allocate a section of linear memory on the device, and return a reference to
-- it. The memory is sufficient to hold the given number of elements of storable
-- type. It is suitably aligned for any type, and is not cleared.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gb82d2a09844a58dd9e744dc31e8aa467>
--
{-# INLINEABLE mallocArray #-}
mallocArray :: Storable a => Int -> IO (DevicePtr a)
mallocArray :: Int -> IO (DevicePtr a)
mallocArray = a -> Int -> IO (DevicePtr a)
forall a'. Storable a' => a' -> Int -> IO (DevicePtr a')
doMalloc a
forall a. HasCallStack => a
undefined
  where
    doMalloc :: Storable a' => a' -> Int -> IO (DevicePtr a')
    doMalloc :: a' -> Int -> IO (DevicePtr a')
doMalloc a'
x !Int
n = (Status, DevicePtr a') -> IO (DevicePtr a')
forall a. (Status, a) -> IO a
resultIfOk ((Status, DevicePtr a') -> IO (DevicePtr a'))
-> IO (Status, DevicePtr a') -> IO (DevicePtr a')
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Int -> IO (Status, DevicePtr a')
forall a. Int -> IO (Status, DevicePtr a)
cuMemAlloc (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x)

{-# INLINE cuMemAlloc #-}
cuMemAlloc :: (Int) -> IO ((Status), (DevicePtr a))
cuMemAlloc :: Int -> IO (Status, DevicePtr a)
cuMemAlloc Int
a2 =
  (Ptr CULLong -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall b b. (Ptr b -> IO b) -> IO b
alloca' ((Ptr CULLong -> IO (Status, DevicePtr a))
 -> IO (Status, DevicePtr a))
-> (Ptr CULLong -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall a b. (a -> b) -> a -> b
$ \Ptr CULLong
a1' -> 
  let {a2' :: CULong
a2' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  Ptr CULLong -> CULong -> IO CInt
cuMemAlloc'_ Ptr CULLong
a1' CULong
a2' IO CInt
-> (CInt -> IO (Status, DevicePtr a)) -> IO (Status, DevicePtr a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Ptr CULLong -> IO (DevicePtr a)
forall a. Ptr CULLong -> IO (DevicePtr a)
peekDeviceHandle  Ptr CULLong
a1'IO (DevicePtr a)
-> (DevicePtr a -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \DevicePtr a
a1'' -> 
  (Status, DevicePtr a) -> IO (Status, DevicePtr a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res', DevicePtr a
a1'')

{-# LINE 250 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)


-- |
-- Execute a computation on the device, passing a pointer to a temporarily
-- allocated block of memory sufficient to hold the given number of elements of
-- storable type. The memory is freed when the computation terminates (normally
-- or via an exception), so the pointer must not be used after this.
--
-- Note that kernel launches can be asynchronous, so you may want to add a
-- synchronisation point using 'Foreign.CUDA.Driver.Context.sync' as part
-- of the continuation.
--
{-# INLINEABLE allocaArray #-}
allocaArray :: Storable a => Int -> (DevicePtr a -> IO b) -> IO b
allocaArray :: Int -> (DevicePtr a -> IO b) -> IO b
allocaArray !Int
n = IO (DevicePtr a)
-> (DevicePtr a -> IO ()) -> (DevicePtr a -> IO b) -> IO b
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket (Int -> IO (DevicePtr a)
forall a. Storable a => Int -> IO (DevicePtr a)
mallocArray Int
n) DevicePtr a -> IO ()
forall a. DevicePtr a -> IO ()
free


-- |
-- Release a section of device memory.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g89b3f154e17cc89b6eea277dbdf5c93a>
--
{-# INLINEABLE free #-}
free :: DevicePtr a -> IO ()
free :: DevicePtr a -> IO ()
free !DevicePtr a
dp = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> IO Status
forall a. DevicePtr a -> IO Status
cuMemFree DevicePtr a
dp

{-# INLINE cuMemFree #-}
cuMemFree :: (DevicePtr a) -> IO ((Status))
cuMemFree :: DevicePtr a -> IO Status
cuMemFree DevicePtr a
a1 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  CULLong -> IO CInt
cuMemFree'_ CULLong
a1' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 281 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



--------------------------------------------------------------------------------
-- Unified memory allocations
--------------------------------------------------------------------------------

-- |
-- Options for unified memory allocations
--
data AttachFlag = CuMemAttachGlobal
                | CuMemAttachHost
                | CuMemAttachSingle
  deriving (AttachFlag -> AttachFlag -> Bool
(AttachFlag -> AttachFlag -> Bool)
-> (AttachFlag -> AttachFlag -> Bool) -> Eq AttachFlag
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: AttachFlag -> AttachFlag -> Bool
$c/= :: AttachFlag -> AttachFlag -> Bool
== :: AttachFlag -> AttachFlag -> Bool
$c== :: AttachFlag -> AttachFlag -> Bool
Eq,Int -> AttachFlag -> ShowS
[AttachFlag] -> ShowS
AttachFlag -> String
(Int -> AttachFlag -> ShowS)
-> (AttachFlag -> String)
-> ([AttachFlag] -> ShowS)
-> Show AttachFlag
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AttachFlag] -> ShowS
$cshowList :: [AttachFlag] -> ShowS
show :: AttachFlag -> String
$cshow :: AttachFlag -> String
showsPrec :: Int -> AttachFlag -> ShowS
$cshowsPrec :: Int -> AttachFlag -> ShowS
Show,AttachFlag
AttachFlag -> AttachFlag -> Bounded AttachFlag
forall a. a -> a -> Bounded a
maxBound :: AttachFlag
$cmaxBound :: AttachFlag
minBound :: AttachFlag
$cminBound :: AttachFlag
Bounded)
instance Enum AttachFlag where
  succ :: AttachFlag -> AttachFlag
succ AttachFlag
CuMemAttachGlobal = AttachFlag
CuMemAttachHost
  succ AttachFlag
CuMemAttachHost = AttachFlag
CuMemAttachSingle
  succ AttachFlag
CuMemAttachSingle = String -> AttachFlag
forall a. HasCallStack => String -> a
error String
"AttachFlag.succ: CuMemAttachSingle has no successor"

  pred :: AttachFlag -> AttachFlag
pred AttachFlag
CuMemAttachHost = AttachFlag
CuMemAttachGlobal
  pred AttachFlag
CuMemAttachSingle = AttachFlag
CuMemAttachHost
  pred AttachFlag
CuMemAttachGlobal = String -> AttachFlag
forall a. HasCallStack => String -> a
error String
"AttachFlag.pred: CuMemAttachGlobal has no predecessor"

  enumFromTo :: AttachFlag -> AttachFlag -> [AttachFlag]
enumFromTo AttachFlag
from AttachFlag
to = AttachFlag -> [AttachFlag]
forall t. Enum t => t -> [t]
go AttachFlag
from
    where
      end :: Int
end = AttachFlag -> Int
forall a. Enum a => a -> Int
fromEnum AttachFlag
to
      go :: t -> [t]
go t
v = case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (t -> Int
forall a. Enum a => a -> Int
fromEnum t
v) Int
end of
                 Ordering
LT -> t
v t -> [t] -> [t]
forall a. a -> [a] -> [a]
: t -> [t]
go (t -> t
forall a. Enum a => a -> a
succ t
v)
                 Ordering
EQ -> [t
v]
                 Ordering
GT -> []

  enumFrom :: AttachFlag -> [AttachFlag]
enumFrom AttachFlag
from = AttachFlag -> AttachFlag -> [AttachFlag]
forall a. Enum a => a -> a -> [a]
enumFromTo AttachFlag
from AttachFlag
CuMemAttachSingle

  fromEnum :: AttachFlag -> Int
fromEnum AttachFlag
CuMemAttachGlobal = Int
1
  fromEnum AttachFlag
CuMemAttachHost = Int
2
  fromEnum AttachFlag
CuMemAttachSingle = Int
4

  toEnum :: Int -> AttachFlag
toEnum Int
1 = AttachFlag
CuMemAttachGlobal
  toEnum Int
2 = AttachFlag
CuMemAttachHost
  toEnum Int
4 = AttachFlag
CuMemAttachSingle
  toEnum unmatched = error ("AttachFlag.toEnum: Cannot match " ++ show unmatched)

{-# LINE 297 "src/Foreign/CUDA/Driver/Marshal.chs" #-}


-- |
-- Allocates memory that will be automatically managed by the Unified Memory
-- system. The returned pointer is valid on the CPU and on all GPUs which
-- supported managed memory. All accesses to this pointer must obey the
-- Unified Memory programming model.
--
-- On a multi-GPU system with peer-to-peer support, where multiple GPUs
-- support managed memory, the physical storage is created on the GPU which
-- is active at the time 'mallocManagedArray' is called. All other GPUs
-- will access the array at reduced bandwidth via peer mapping over the
-- PCIe bus. The Unified Memory system does not migrate memory between
-- GPUs.
--
-- On a multi-GPU system where multiple GPUs support managed memory, but
-- not all pairs of such GPUs have peer-to-peer support between them, the
-- physical storage is allocated in system memory (zero-copy memory) and
-- all GPUs will access the data at reduced bandwidth over the PCIe bus.
--
-- Requires CUDA-6.0
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gb347ded34dc326af404aa02af5388a32>
--
{-# INLINEABLE mallocManagedArray #-}
mallocManagedArray :: Storable a => [AttachFlag] -> Int -> IO (DevicePtr a)
mallocManagedArray !flags = doMalloc undefined
  where
    doMalloc :: Storable a' => a' -> Int -> IO (DevicePtr a')
    doMalloc x !n = resultIfOk =<< cuMemAllocManaged (n * sizeOf x) flags

{-# INLINE cuMemAllocManaged #-}
cuMemAllocManaged :: (Int) -> ([AttachFlag]) -> IO ((Status), (DevicePtr a))
cuMemAllocManaged :: Int -> [AttachFlag] -> IO (Status, DevicePtr a)
cuMemAllocManaged Int
a2 [AttachFlag]
a3 =
  (Ptr CULLong -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall b b. (Ptr b -> IO b) -> IO b
alloca' ((Ptr CULLong -> IO (Status, DevicePtr a))
 -> IO (Status, DevicePtr a))
-> (Ptr CULLong -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall a b. (a -> b) -> a -> b
$ \Ptr CULLong
a1' -> 
  let {a2' :: CULong
a2' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = [AttachFlag] -> CUInt
forall a b. (Enum a, Num b, Bits b) => [a] -> b
combineBitMasks [AttachFlag]
a3} in 
  Ptr CULLong -> CULong -> CUInt -> IO CInt
cuMemAllocManaged'_ Ptr CULLong
a1' CULong
a2' CUInt
a3' IO CInt
-> (CInt -> IO (Status, DevicePtr a)) -> IO (Status, DevicePtr a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Ptr CULLong -> IO (DevicePtr a)
forall a. Ptr CULLong -> IO (DevicePtr a)
peekDeviceHandle  Ptr CULLong
a1'IO (DevicePtr a)
-> (DevicePtr a -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \DevicePtr a
a1'' -> 
  (Status, DevicePtr a) -> IO (Status, DevicePtr a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res', a1'')

{-# LINE 335 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    alloca' !f = F.alloca $ \ !p -> poke p nullPtr >> f (castPtr p)


-- | Pre-fetches the given number of elements to the specified destination
-- device. If the specified device is Nothing, the data is pre-fetched to host
-- memory. The pointer must refer to a memory range allocated with
-- 'mallocManagedArray'.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__UNIFIED.html#group__CUDA__UNIFIED_1gfe94f8b7fb56291ebcea44261aa4cb84>
--
-- Requires CUDA-8.0.
--
prefetchArrayAsync :: Storable a => DevicePtr a -> Int -> Maybe Device -> Maybe Stream -> IO ()
prefetchArrayAsync :: DevicePtr a -> Int -> Maybe Device -> Maybe Stream -> IO ()
prefetchArrayAsync DevicePtr a
ptr Int
n Maybe Device
mdev Maybe Stream
mst = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
go a
forall a. HasCallStack => a
undefined DevicePtr a
ptr
  where
    go :: Storable a' => a' -> DevicePtr a' -> IO ()
    go :: a' -> DevicePtr a' -> IO ()
go a'
x DevicePtr a'
_ = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> Int -> CInt -> Stream -> IO Status
forall a. DevicePtr a -> Int -> CInt -> Stream -> IO Status
cuMemPrefetchAsync DevicePtr a
ptr (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x) (CInt -> (Device -> CInt) -> Maybe Device -> CInt
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (-CInt
1) Device -> CInt
useDevice Maybe Device
mdev) (Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst)

{-# INLINE cuMemPrefetchAsync #-}
cuMemPrefetchAsync :: (DevicePtr a) -> (Int) -> (CInt) -> (Stream) -> IO ((Status))
cuMemPrefetchAsync :: DevicePtr a -> Int -> CInt -> Stream -> IO Status
cuMemPrefetchAsync DevicePtr a
a1 Int
a2 CInt
a3 Stream
a4 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: CULong
a2' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CInt
a3' = CInt -> CInt
forall a. a -> a
id CInt
a3} in 
  let {a4' :: Ptr ()
a4' = Stream -> Ptr ()
useStream Stream
a4} in 
  CULLong -> CULong -> CInt -> Ptr () -> IO CInt
cuMemPrefetchAsync'_ CULLong
a1' CULong
a2' CInt
a3' Ptr ()
a4' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 367 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- | Attach an array of the given number of elements to a stream asynchronously
--
-- <https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__STREAM.html#group__CUDA__STREAM_1g6e468d680e263e7eba02a56643c50533>
--
-- @since 0.10.0.0
--
{-# INLINEABLE attachArrayAsync #-}
attachArrayAsync :: forall a. Storable a => [AttachFlag] -> Stream -> DevicePtr a -> Int -> IO ()
attachArrayAsync :: [AttachFlag] -> Stream -> DevicePtr a -> Int -> IO ()
attachArrayAsync ![AttachFlag]
flags !Stream
stream !DevicePtr a
ptr !Int
n = Stream -> DevicePtr a -> Int -> [AttachFlag] -> IO ()
cuStreamAttachMemAsync Stream
stream DevicePtr a
ptr (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a -> Int
forall a. Storable a => a -> Int
sizeOf (a
forall a. HasCallStack => a
undefined::a)) [AttachFlag]
flags
  where
    cuStreamAttachMemAsync :: (Stream) -> (DevicePtr a) -> (Int) -> ([AttachFlag]) -> IO ()
    cuStreamAttachMemAsync :: Stream -> DevicePtr a -> Int -> [AttachFlag] -> IO ()
cuStreamAttachMemAsync Stream
a1 DevicePtr a
a2 Int
a3 [AttachFlag]
a4 =
      let {a1' :: Ptr ()
a1' = Stream -> Ptr ()
useStream Stream
a1} in 
      let {a2' :: CULLong
a2' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a2} in 
      let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
      let {a4' :: CUInt
a4' = [AttachFlag] -> CUInt
forall a b. (Enum a, Num b, Bits b) => [a] -> b
combineBitMasks [AttachFlag]
a4} in 
      Ptr () -> CULLong -> CULong -> CUInt -> IO CInt
cuStreamAttachMemAsync'_ Ptr ()
a1' CULLong
a2' CULong
a3' CUInt
a4' IO CInt -> (CInt -> IO ()) -> IO ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
      CInt -> IO ()
checkStatus CInt
res IO () -> IO () -> IO ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> 
      () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

{-# LINE 386 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



--------------------------------------------------------------------------------
-- Marshalling
--------------------------------------------------------------------------------

-- Device -> Host
-- --------------

-- |
-- Copy a number of elements from the device to host memory. This is a
-- synchronous operation.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g3480368ee0208a98f75019c9a8450893>
--
{-# INLINEABLE peekArray #-}
peekArray :: Storable a => Int -> DevicePtr a -> Ptr a -> IO ()
peekArray :: Int -> DevicePtr a -> Ptr a -> IO ()
peekArray !Int
n !DevicePtr a
dptr !Ptr a
hptr = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doPeek a
forall a. HasCallStack => a
undefined DevicePtr a
dptr
  where
    doPeek :: Storable a' => a' -> DevicePtr a' -> IO ()
    doPeek :: a' -> DevicePtr a' -> IO ()
doPeek a'
x DevicePtr a'
_ = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Ptr a -> DevicePtr a -> Int -> IO Status
forall a. Ptr a -> DevicePtr a -> Int -> IO Status
cuMemcpyDtoH Ptr a
hptr DevicePtr a
dptr (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x)

{-# INLINE cuMemcpyDtoH #-}
cuMemcpyDtoH :: (Ptr a) -> (DevicePtr a) -> (Int) -> IO ((Status))
cuMemcpyDtoH :: Ptr a -> DevicePtr a -> Int -> IO Status
cuMemcpyDtoH Ptr a
a1 DevicePtr a
a2 Int
a3 =
  let {a1' :: Ptr b
a1' = Ptr a -> Ptr b
forall a b. Ptr a -> Ptr b
castPtr Ptr a
a1} in 
  let {a2' :: CULLong
a2' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  Ptr () -> CULLong -> CULong -> IO CInt
cuMemcpyDtoH'_ Ptr ()
forall b. Ptr b
a1' CULLong
a2' CULong
a3' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 413 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Copy memory from the device asynchronously, possibly associated with a
-- particular stream. The destination host memory must be page-locked.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g56f30236c7c5247f8e061b59d3268362>
--
{-# INLINEABLE peekArrayAsync #-}
peekArrayAsync :: Storable a => Int -> DevicePtr a -> HostPtr a -> Maybe Stream -> IO ()
peekArrayAsync :: Int -> DevicePtr a -> HostPtr a -> Maybe Stream -> IO ()
peekArrayAsync !Int
n !DevicePtr a
dptr !HostPtr a
hptr !Maybe Stream
mst = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doPeek a
forall a. HasCallStack => a
undefined DevicePtr a
dptr
  where
    doPeek :: Storable a' => a' -> DevicePtr a' -> IO ()
    doPeek :: a' -> DevicePtr a' -> IO ()
doPeek a'
x DevicePtr a'
_ = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< HostPtr a -> DevicePtr a -> Int -> Stream -> IO Status
forall a. HostPtr a -> DevicePtr a -> Int -> Stream -> IO Status
cuMemcpyDtoHAsync HostPtr a
hptr DevicePtr a
dptr (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x) (Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst)

{-# INLINE cuMemcpyDtoHAsync #-}
cuMemcpyDtoHAsync :: (HostPtr a) -> (DevicePtr a) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpyDtoHAsync :: HostPtr a -> DevicePtr a -> Int -> Stream -> IO Status
cuMemcpyDtoHAsync HostPtr a
a1 DevicePtr a
a2 Int
a3 Stream
a4 =
  let {a1' :: Ptr b
a1' = HostPtr a -> Ptr b
forall a b. HostPtr a -> Ptr b
useHP HostPtr a
a1} in 
  let {a2' :: CULLong
a2' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: Ptr ()
a4' = Stream -> Ptr ()
useStream Stream
a4} in 
  Ptr () -> CULLong -> CULong -> Ptr () -> IO CInt
cuMemcpyDtoHAsync'_ Ptr ()
forall b. Ptr b
a1' CULLong
a2' CULong
a3' Ptr ()
a4' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 434 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    useHP = castPtr . useHostPtr


-- |
-- Copy a 2D array from the device to the host.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g27f885b30c34cc20a663a671dbf6fc27>
--
{-# INLINEABLE peekArray2D #-}
peekArray2D
    :: Storable a
    => Int                      -- ^ width to copy (elements)
    -> Int                      -- ^ height to copy (elements)
    -> DevicePtr a              -- ^ source array
    -> Int                      -- ^ source array width
    -> Int                      -- ^ source x-coordinate
    -> Int                      -- ^ source y-coordinate
    -> Ptr a                    -- ^ destination array
    -> Int                      -- ^ destination array width
    -> Int                      -- ^ destination x-coordinate
    -> Int                      -- ^ destination y-coordinate
    -> IO ()
peekArray2D :: Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> IO ()
peekArray2D !Int
w !Int
h !DevicePtr a
dptr !Int
dw !Int
dx !Int
dy !Ptr a
hptr !Int
hw !Int
hx !Int
hy = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doPeek a
forall a. HasCallStack => a
undefined DevicePtr a
dptr
  where
    doPeek :: Storable a' => a' -> DevicePtr a' -> IO ()
    doPeek :: a' -> DevicePtr a' -> IO ()
doPeek a'
x DevicePtr a'
_ =
      let bytes :: Int
bytes = a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x
          w' :: Int
w'    = Int
w  Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hw' :: Int
hw'   = Int
hw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hx' :: Int
hx'   = Int
hx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dw' :: Int
dw'   = Int
dw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dx' :: Int
dx'   = Int
dx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
      in
      Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Ptr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
forall a.
Ptr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
cuMemcpy2DDtoH Ptr a
hptr Int
hw' Int
hx' Int
hy DevicePtr a
dptr Int
dw' Int
dx' Int
dy Int
w' Int
h

{-# INLINE cuMemcpy2DDtoH #-}
cuMemcpy2DDtoH :: (Ptr a) -> (Int) -> (Int) -> (Int) -> (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuMemcpy2DDtoH :: Ptr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
cuMemcpy2DDtoH Ptr a
a1 Int
a2 Int
a3 Int
a4 DevicePtr a
a5 Int
a6 Int
a7 Int
a8 Int
a9 Int
a10 =
  let {a1' :: Ptr b
a1' = Ptr a -> Ptr b
forall a b. Ptr a -> Ptr b
castPtr Ptr a
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CUInt
a4' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  let {a5' :: CULLong
a5' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a5} in 
  let {a6' :: CUInt
a6' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a6} in 
  let {a7' :: CUInt
a7' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a7} in 
  let {a8' :: CUInt
a8' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a8} in 
  let {a9' :: CUInt
a9' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a9} in 
  let {a10' :: CUInt
a10' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a10} in 
  Ptr ()
-> CUInt
-> CUInt
-> CUInt
-> CULLong
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> IO CInt
cuMemcpy2DDtoH'_ Ptr ()
forall b. Ptr b
a1' CUInt
a2' CUInt
a3' CUInt
a4' CULLong
a5' CUInt
a6' CUInt
a7' CUInt
a8' CUInt
a9' CUInt
a10' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 484 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Copy a 2D array from the device to the host asynchronously, possibly
-- associated with a particular execution stream. The destination host memory
-- must be page-locked.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g4acf155faeb969d9d21f5433d3d0f274>
--
{-# INLINEABLE peekArray2DAsync #-}
peekArray2DAsync
    :: Storable a
    => Int                      -- ^ width to copy (elements)
    -> Int                      -- ^ height to copy (elements)
    -> DevicePtr a              -- ^ source array
    -> Int                      -- ^ source array width
    -> Int                      -- ^ source x-coordinate
    -> Int                      -- ^ source y-coordinate
    -> HostPtr a                -- ^ destination array
    -> Int                      -- ^ destination array width
    -> Int                      -- ^ destination x-coordinate
    -> Int                      -- ^ destination y-coordinate
    -> Maybe Stream             -- ^ stream to associate to
    -> IO ()
peekArray2DAsync :: Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> HostPtr a
-> Int
-> Int
-> Int
-> Maybe Stream
-> IO ()
peekArray2DAsync !Int
w !Int
h !DevicePtr a
dptr !Int
dw !Int
dx !Int
dy !HostPtr a
hptr !Int
hw !Int
hx !Int
hy !Maybe Stream
mst = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doPeek a
forall a. HasCallStack => a
undefined DevicePtr a
dptr
  where
    doPeek :: Storable a' => a' -> DevicePtr a' -> IO ()
    doPeek :: a' -> DevicePtr a' -> IO ()
doPeek a'
x DevicePtr a'
_ =
      let bytes :: Int
bytes = a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x
          w' :: Int
w'    = Int
w  Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hw' :: Int
hw'   = Int
hw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hx' :: Int
hx'   = Int
hx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dw' :: Int
dw'   = Int
dw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dx' :: Int
dx'   = Int
dx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          st :: Stream
st    = Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst
      in
      Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< HostPtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
forall a.
HostPtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
cuMemcpy2DDtoHAsync HostPtr a
hptr Int
hw' Int
hx' Int
hy DevicePtr a
dptr Int
dw' Int
dx' Int
dy Int
w' Int
h Stream
st

{-# INLINE cuMemcpy2DDtoHAsync #-}
cuMemcpy2DDtoHAsync :: (HostPtr a) -> (Int) -> (Int) -> (Int) -> (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpy2DDtoHAsync :: HostPtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
cuMemcpy2DDtoHAsync HostPtr a
a1 Int
a2 Int
a3 Int
a4 DevicePtr a
a5 Int
a6 Int
a7 Int
a8 Int
a9 Int
a10 Stream
a11 =
  let {a1' :: Ptr b
a1' = HostPtr a -> Ptr b
forall a b. HostPtr a -> Ptr b
useHP HostPtr a
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CUInt
a4' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  let {a5' :: CULLong
a5' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a5} in 
  let {a6' :: CUInt
a6' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a6} in 
  let {a7' :: CUInt
a7' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a7} in 
  let {a8' :: CUInt
a8' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a8} in 
  let {a9' :: CUInt
a9' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a9} in 
  let {a10' :: CUInt
a10' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a10} in 
  let {a11' :: Ptr ()
a11' = Stream -> Ptr ()
useStream Stream
a11} in 
  Ptr ()
-> CUInt
-> CUInt
-> CUInt
-> CULLong
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> Ptr ()
-> IO CInt
cuMemcpy2DDtoHAsync'_ Ptr ()
forall b. Ptr b
a1' CUInt
a2' CUInt
a3' CUInt
a4' CULLong
a5' CUInt
a6' CUInt
a7' CUInt
a8' CUInt
a9' CUInt
a10' Ptr ()
a11' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 537 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    useHP = castPtr . useHostPtr


-- |
-- Copy a number of elements from the device into a new Haskell list. Note that
-- this requires two memory copies: firstly from the device into a heap
-- allocated array, and from there marshalled into a list.
--
{-# INLINEABLE peekListArray #-}
peekListArray :: Storable a => Int -> DevicePtr a -> IO [a]
peekListArray :: Int -> DevicePtr a -> IO [a]
peekListArray !Int
n !DevicePtr a
dptr =
  Int -> (Ptr a -> IO [a]) -> IO [a]
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
F.allocaArray Int
n ((Ptr a -> IO [a]) -> IO [a]) -> (Ptr a -> IO [a]) -> IO [a]
forall a b. (a -> b) -> a -> b
$ \Ptr a
p -> do
    Int -> DevicePtr a -> Ptr a -> IO ()
forall a. Storable a => Int -> DevicePtr a -> Ptr a -> IO ()
peekArray   Int
n DevicePtr a
dptr Ptr a
p
    Int -> Ptr a -> IO [a]
forall a. Storable a => Int -> Ptr a -> IO [a]
F.peekArray Int
n Ptr a
p


-- Host -> Device
-- --------------

-- |
-- Copy a number of elements onto the device. This is a synchronous operation.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g4d32266788c440b0220b1a9ba5795169>
--
{-# INLINEABLE pokeArray #-}
pokeArray :: Storable a => Int -> Ptr a -> DevicePtr a -> IO ()
pokeArray :: Int -> Ptr a -> DevicePtr a -> IO ()
pokeArray !Int
n !Ptr a
hptr !DevicePtr a
dptr = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doPoke a
forall a. HasCallStack => a
undefined DevicePtr a
dptr
  where
    doPoke :: Storable a' => a' -> DevicePtr a' -> IO ()
    doPoke :: a' -> DevicePtr a' -> IO ()
doPoke a'
x DevicePtr a'
_ = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> Ptr a -> Int -> IO Status
forall a. DevicePtr a -> Ptr a -> Int -> IO Status
cuMemcpyHtoD DevicePtr a
dptr Ptr a
hptr (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x)

{-# INLINE cuMemcpyHtoD #-}
cuMemcpyHtoD :: (DevicePtr a) -> (Ptr a) -> (Int) -> IO ((Status))
cuMemcpyHtoD :: DevicePtr a -> Ptr a -> Int -> IO Status
cuMemcpyHtoD DevicePtr a
a1 Ptr a
a2 Int
a3 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: Ptr b
a2' = Ptr a -> Ptr b
forall a b. Ptr a -> Ptr b
castPtr Ptr a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  CULLong -> Ptr () -> CULong -> IO CInt
cuMemcpyHtoD'_ CULLong
a1' Ptr ()
forall b. Ptr b
a2' CULong
a3' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 574 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Copy memory onto the device asynchronously, possibly associated with a
-- particular stream. The source host memory must be page-locked.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g1572263fe2597d7ba4f6964597a354a3>
--
{-# INLINEABLE pokeArrayAsync #-}
pokeArrayAsync :: Storable a => Int -> HostPtr a -> DevicePtr a -> Maybe Stream -> IO ()
pokeArrayAsync :: Int -> HostPtr a -> DevicePtr a -> Maybe Stream -> IO ()
pokeArrayAsync !Int
n !HostPtr a
hptr !DevicePtr a
dptr !Maybe Stream
mst = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
dopoke a
forall a. HasCallStack => a
undefined DevicePtr a
dptr
  where
    dopoke :: Storable a' => a' -> DevicePtr a' -> IO ()
    dopoke :: a' -> DevicePtr a' -> IO ()
dopoke a'
x DevicePtr a'
_ = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> HostPtr a -> Int -> Stream -> IO Status
forall a. DevicePtr a -> HostPtr a -> Int -> Stream -> IO Status
cuMemcpyHtoDAsync DevicePtr a
dptr HostPtr a
hptr (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x) (Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst)

{-# INLINE cuMemcpyHtoDAsync #-}
cuMemcpyHtoDAsync :: (DevicePtr a) -> (HostPtr a) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpyHtoDAsync :: DevicePtr a -> HostPtr a -> Int -> Stream -> IO Status
cuMemcpyHtoDAsync DevicePtr a
a1 HostPtr a
a2 Int
a3 Stream
a4 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: Ptr b
a2' = HostPtr a -> Ptr b
forall a b. HostPtr a -> Ptr b
useHP HostPtr a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: Ptr ()
a4' = Stream -> Ptr ()
useStream Stream
a4} in 
  CULLong -> Ptr () -> CULong -> Ptr () -> IO CInt
cuMemcpyHtoDAsync'_ CULLong
a1' Ptr ()
forall b. Ptr b
a2' CULong
a3' Ptr ()
a4' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 595 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    useHP = castPtr . useHostPtr


-- |
-- Copy a 2D array from the host to the device.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g27f885b30c34cc20a663a671dbf6fc27>
--
{-# INLINEABLE pokeArray2D #-}
pokeArray2D
    :: Storable a
    => Int                      -- ^ width to copy (elements)
    -> Int                      -- ^ height to copy (elements)
    -> Ptr a                    -- ^ source array
    -> Int                      -- ^ source array width
    -> Int                      -- ^ source x-coordinate
    -> Int                      -- ^ source y-coordinate
    -> DevicePtr a              -- ^ destination array
    -> Int                      -- ^ destination array width
    -> Int                      -- ^ destination x-coordinate
    -> Int                      -- ^ destination y-coordinate
    -> IO ()
pokeArray2D :: Int
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> IO ()
pokeArray2D !Int
w !Int
h !Ptr a
hptr !Int
hw !Int
hx !Int
hy !DevicePtr a
dptr !Int
dw !Int
dx !Int
dy = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doPoke a
forall a. HasCallStack => a
undefined DevicePtr a
dptr
  where
    doPoke :: Storable a' => a' -> DevicePtr a' -> IO ()
    doPoke :: a' -> DevicePtr a' -> IO ()
doPoke a'
x DevicePtr a'
_ =
      let bytes :: Int
bytes = a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x
          w' :: Int
w'    = Int
w  Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hw' :: Int
hw'   = Int
hw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hx' :: Int
hx'   = Int
hx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dw' :: Int
dw'   = Int
dw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dx' :: Int
dx'   = Int
dx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
      in
      Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a
-> Int
-> Int
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
forall a.
DevicePtr a
-> Int
-> Int
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
cuMemcpy2DHtoD DevicePtr a
dptr Int
dw' Int
dx' Int
dy Ptr a
hptr Int
hw' Int
hx' Int
hy Int
w' Int
h

{-# INLINE cuMemcpy2DHtoD #-}
cuMemcpy2DHtoD :: (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Ptr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuMemcpy2DHtoD :: DevicePtr a
-> Int
-> Int
-> Int
-> Ptr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
cuMemcpy2DHtoD DevicePtr a
a1 Int
a2 Int
a3 Int
a4 Ptr a
a5 Int
a6 Int
a7 Int
a8 Int
a9 Int
a10 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CUInt
a4' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  let {a5' :: Ptr b
a5' = Ptr a -> Ptr b
forall a b. Ptr a -> Ptr b
castPtr Ptr a
a5} in 
  let {a6' :: CUInt
a6' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a6} in 
  let {a7' :: CUInt
a7' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a7} in 
  let {a8' :: CUInt
a8' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a8} in 
  let {a9' :: CUInt
a9' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a9} in 
  let {a10' :: CUInt
a10' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a10} in 
  CULLong
-> CUInt
-> CUInt
-> CUInt
-> Ptr ()
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> IO CInt
cuMemcpy2DHtoD'_ CULLong
a1' CUInt
a2' CUInt
a3' CUInt
a4' Ptr ()
forall b. Ptr b
a5' CUInt
a6' CUInt
a7' CUInt
a8' CUInt
a9' CUInt
a10' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 645 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Copy a 2D array from the host to the device asynchronously, possibly
-- associated with a particular execution stream. The source host memory must be
-- page-locked.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g4acf155faeb969d9d21f5433d3d0f274>
--
{-# INLINEABLE pokeArray2DAsync #-}
pokeArray2DAsync
    :: Storable a
    => Int                      -- ^ width to copy (elements)
    -> Int                      -- ^ height to copy (elements)
    -> HostPtr a                -- ^ source array
    -> Int                      -- ^ source array width
    -> Int                      -- ^ source x-coordinate
    -> Int                      -- ^ source y-coordinate
    -> DevicePtr a              -- ^ destination array
    -> Int                      -- ^ destination array width
    -> Int                      -- ^ destination x-coordinate
    -> Int                      -- ^ destination y-coordinate
    -> Maybe Stream             -- ^ stream to associate to
    -> IO ()
pokeArray2DAsync :: Int
-> Int
-> HostPtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Maybe Stream
-> IO ()
pokeArray2DAsync !Int
w !Int
h !HostPtr a
hptr !Int
hw !Int
hx !Int
hy !DevicePtr a
dptr !Int
dw !Int
dx !Int
dy !Maybe Stream
mst = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doPoke a
forall a. HasCallStack => a
undefined DevicePtr a
dptr
  where
    doPoke :: Storable a' => a' -> DevicePtr a' -> IO ()
    doPoke :: a' -> DevicePtr a' -> IO ()
doPoke a'
x DevicePtr a'
_ =
      let bytes :: Int
bytes = a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x
          w' :: Int
w'    = Int
w  Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hw' :: Int
hw'   = Int
hw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hx' :: Int
hx'   = Int
hx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dw' :: Int
dw'   = Int
dw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dx' :: Int
dx'   = Int
dx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          st :: Stream
st    = Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst
      in
      Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a
-> Int
-> Int
-> Int
-> HostPtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
forall a.
DevicePtr a
-> Int
-> Int
-> Int
-> HostPtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
cuMemcpy2DHtoDAsync DevicePtr a
dptr Int
dw' Int
dx' Int
dy HostPtr a
hptr Int
hw' Int
hx' Int
hy Int
w' Int
h Stream
st

{-# INLINE cuMemcpy2DHtoDAsync #-}
cuMemcpy2DHtoDAsync :: (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (HostPtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpy2DHtoDAsync :: DevicePtr a
-> Int
-> Int
-> Int
-> HostPtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
cuMemcpy2DHtoDAsync DevicePtr a
a1 Int
a2 Int
a3 Int
a4 HostPtr a
a5 Int
a6 Int
a7 Int
a8 Int
a9 Int
a10 Stream
a11 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CUInt
a4' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  let {a5' :: Ptr b
a5' = HostPtr a -> Ptr b
forall a b. HostPtr a -> Ptr b
useHP HostPtr a
a5} in 
  let {a6' :: CUInt
a6' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a6} in 
  let {a7' :: CUInt
a7' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a7} in 
  let {a8' :: CUInt
a8' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a8} in 
  let {a9' :: CUInt
a9' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a9} in 
  let {a10' :: CUInt
a10' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a10} in 
  let {a11' :: Ptr ()
a11' = Stream -> Ptr ()
useStream Stream
a11} in 
  CULLong
-> CUInt
-> CUInt
-> CUInt
-> Ptr ()
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> Ptr ()
-> IO CInt
cuMemcpy2DHtoDAsync'_ CULLong
a1' CUInt
a2' CUInt
a3' CUInt
a4' Ptr ()
forall b. Ptr b
a5' CUInt
a6' CUInt
a7' CUInt
a8' CUInt
a9' CUInt
a10' Ptr ()
a11' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 698 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    useHP = castPtr . useHostPtr


-- |
-- Write a list of storable elements into a device array. The device array must
-- be sufficiently large to hold the entire list. This requires two marshalling
-- operations.
--
{-# INLINEABLE pokeListArray #-}
pokeListArray :: Storable a => [a] -> DevicePtr a -> IO ()
pokeListArray :: [a] -> DevicePtr a -> IO ()
pokeListArray ![a]
xs !DevicePtr a
dptr = [a] -> (Int -> Ptr a -> IO ()) -> IO ()
forall a b. Storable a => [a] -> (Int -> Ptr a -> IO b) -> IO b
F.withArrayLen [a]
xs ((Int -> Ptr a -> IO ()) -> IO ())
-> (Int -> Ptr a -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \ !Int
len !Ptr a
p -> Int -> Ptr a -> DevicePtr a -> IO ()
forall a. Storable a => Int -> Ptr a -> DevicePtr a -> IO ()
pokeArray Int
len Ptr a
p DevicePtr a
dptr


-- Device -> Device
-- ----------------

-- |
-- Copy the given number of elements from the first device array (source) to the
-- second device (destination). The copied areas may not overlap. This operation
-- is asynchronous with respect to the host, but will never overlap with kernel
-- execution.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g1725774abf8b51b91945f3336b778c8b>
--
{-# INLINEABLE copyArray #-}
copyArray :: Storable a => Int -> DevicePtr a -> DevicePtr a -> IO ()
copyArray :: Int -> DevicePtr a -> DevicePtr a -> IO ()
copyArray !Int
n = a -> DevicePtr a -> DevicePtr a -> IO ()
forall a'.
Storable a' =>
a' -> DevicePtr a' -> DevicePtr a' -> IO ()
docopy a
forall a. HasCallStack => a
undefined
  where
    docopy :: Storable a' => a' -> DevicePtr a' -> DevicePtr a' -> IO ()
    docopy :: a' -> DevicePtr a' -> DevicePtr a' -> IO ()
docopy a'
x DevicePtr a'
src DevicePtr a'
dst = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a' -> DevicePtr a' -> Int -> IO Status
forall a. DevicePtr a -> DevicePtr a -> Int -> IO Status
cuMemcpyDtoD DevicePtr a'
dst DevicePtr a'
src (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x)

{-# INLINE cuMemcpyDtoD #-}
cuMemcpyDtoD :: (DevicePtr a) -> (DevicePtr a) -> (Int) -> IO ((Status))
cuMemcpyDtoD :: DevicePtr a -> DevicePtr a -> Int -> IO Status
cuMemcpyDtoD DevicePtr a
a1 DevicePtr a
a2 Int
a3 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: CULLong
a2' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  CULLong -> CULLong -> CULong -> IO CInt
cuMemcpyDtoD'_ CULLong
a1' CULLong
a2' CULong
a3' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 735 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Copy the given number of elements from the first device array (source) to the
-- second device array (destination). The copied areas may not overlap. The
-- operation is asynchronous with respect to the host, and can be asynchronous
-- to other device operations by associating it with a particular stream.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g39ea09ba682b8eccc9c3e0c04319b5c8>
--
{-# INLINEABLE copyArrayAsync #-}
copyArrayAsync :: Storable a => Int -> DevicePtr a -> DevicePtr a -> Maybe Stream -> IO ()
copyArrayAsync :: Int -> DevicePtr a -> DevicePtr a -> Maybe Stream -> IO ()
copyArrayAsync !Int
n !DevicePtr a
src !DevicePtr a
dst !Maybe Stream
mst = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
docopy a
forall a. HasCallStack => a
undefined DevicePtr a
src
  where
    docopy :: Storable a' => a' -> DevicePtr a' -> IO ()
    docopy :: a' -> DevicePtr a' -> IO ()
docopy a'
x DevicePtr a'
_ = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> DevicePtr a -> Int -> Stream -> IO Status
forall a. DevicePtr a -> DevicePtr a -> Int -> Stream -> IO Status
cuMemcpyDtoDAsync DevicePtr a
dst DevicePtr a
src (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x) (Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst)

{-# INLINE cuMemcpyDtoDAsync #-}
cuMemcpyDtoDAsync :: (DevicePtr a) -> (DevicePtr a) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpyDtoDAsync :: DevicePtr a -> DevicePtr a -> Int -> Stream -> IO Status
cuMemcpyDtoDAsync DevicePtr a
a1 DevicePtr a
a2 Int
a3 Stream
a4 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: CULLong
a2' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: Ptr ()
a4' = Stream -> Ptr ()
useStream Stream
a4} in 
  CULLong -> CULLong -> CULong -> Ptr () -> IO CInt
cuMemcpyDtoDAsync'_ CULLong
a1' CULLong
a2' CULong
a3' Ptr ()
a4' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 758 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Copy a 2D array from the first device array (source) to the second device
-- array (destination). The copied areas must not overlap. This operation is
-- asynchronous with respect to the host, but will never overlap with kernel
-- execution.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g27f885b30c34cc20a663a671dbf6fc27>
--
{-# INLINEABLE copyArray2D #-}
copyArray2D
    :: Storable a
    => Int                      -- ^ width to copy (elements)
    -> Int                      -- ^ height to copy (elements)
    -> DevicePtr a              -- ^ source array
    -> Int                      -- ^ source array width
    -> Int                      -- ^ source x-coordinate
    -> Int                      -- ^ source y-coordinate
    -> DevicePtr a              -- ^ destination array
    -> Int                      -- ^ destination array width
    -> Int                      -- ^ destination x-coordinate
    -> Int                      -- ^ destination y-coordinate
    -> IO ()
copyArray2D :: Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> IO ()
copyArray2D !Int
w !Int
h !DevicePtr a
src !Int
hw !Int
hx !Int
hy !DevicePtr a
dst !Int
dw !Int
dx !Int
dy = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doCopy a
forall a. HasCallStack => a
undefined DevicePtr a
dst
  where
    doCopy :: Storable a' => a' -> DevicePtr a' -> IO ()
    doCopy :: a' -> DevicePtr a' -> IO ()
doCopy a'
x DevicePtr a'
_ =
      let bytes :: Int
bytes = a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x
          w' :: Int
w'    = Int
w  Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hw' :: Int
hw'   = Int
hw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hx' :: Int
hx'   = Int
hx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dw' :: Int
dw'   = Int
dw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dx' :: Int
dx'   = Int
dx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
      in
      Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
forall a.
DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
cuMemcpy2DDtoD DevicePtr a
dst Int
dw' Int
dx' Int
dy DevicePtr a
src Int
hw' Int
hx' Int
hy Int
w' Int
h

{-# INLINE cuMemcpy2DDtoD #-}
cuMemcpy2DDtoD :: (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> IO ((Status))
cuMemcpy2DDtoD :: DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO Status
cuMemcpy2DDtoD DevicePtr a
a1 Int
a2 Int
a3 Int
a4 DevicePtr a
a5 Int
a6 Int
a7 Int
a8 Int
a9 Int
a10 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CUInt
a4' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  let {a5' :: CULLong
a5' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a5} in 
  let {a6' :: CUInt
a6' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a6} in 
  let {a7' :: CUInt
a7' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a7} in 
  let {a8' :: CUInt
a8' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a8} in 
  let {a9' :: CUInt
a9' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a9} in 
  let {a10' :: CUInt
a10' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a10} in 
  CULLong
-> CUInt
-> CUInt
-> CUInt
-> CULLong
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> IO CInt
cuMemcpy2DDtoD'_ CULLong
a1' CUInt
a2' CUInt
a3' CUInt
a4' CULLong
a5' CUInt
a6' CUInt
a7' CUInt
a8' CUInt
a9' CUInt
a10' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 809 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Copy a 2D array from the first device array (source) to the second device
-- array (destination). The copied areas may not overlap. The operation is
-- asynchronous with respect to the host, and can be asynchronous to other
-- device operations by associating it with a particular execution stream.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g4acf155faeb969d9d21f5433d3d0f274>
--
{-# INLINEABLE copyArray2DAsync #-}
copyArray2DAsync
    :: Storable a
    => Int                      -- ^ width to copy (elements)
    -> Int                      -- ^ height to copy (elements)
    -> DevicePtr a              -- ^ source array
    -> Int                      -- ^ source array width
    -> Int                      -- ^ source x-coordinate
    -> Int                      -- ^ source y-coordinate
    -> DevicePtr a              -- ^ destination array
    -> Int                      -- ^ destination array width
    -> Int                      -- ^ destination x-coordinate
    -> Int                      -- ^ destination y-coordinate
    -> Maybe Stream             -- ^ stream to associate to
    -> IO ()
copyArray2DAsync :: Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Maybe Stream
-> IO ()
copyArray2DAsync !Int
w !Int
h !DevicePtr a
src !Int
hw !Int
hx !Int
hy !DevicePtr a
dst !Int
dw !Int
dx !Int
dy !Maybe Stream
mst = a -> DevicePtr a -> IO ()
forall a'. Storable a' => a' -> DevicePtr a' -> IO ()
doCopy a
forall a. HasCallStack => a
undefined DevicePtr a
dst
  where
    doCopy :: Storable a' => a' -> DevicePtr a' -> IO ()
    doCopy :: a' -> DevicePtr a' -> IO ()
doCopy a'
x DevicePtr a'
_ =
      let bytes :: Int
bytes = a' -> Int
forall a. Storable a => a -> Int
sizeOf a'
x
          w' :: Int
w'    = Int
w  Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hw' :: Int
hw'   = Int
hw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          hx' :: Int
hx'   = Int
hx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dw' :: Int
dw'   = Int
dw Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          dx' :: Int
dx'   = Int
dx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bytes
          st :: Stream
st    = Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst
      in
      Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
forall a.
DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
cuMemcpy2DDtoDAsync DevicePtr a
dst Int
dw' Int
dx' Int
dy DevicePtr a
src Int
hw' Int
hx' Int
hy Int
w' Int
h Stream
st

{-# INLINE cuMemcpy2DDtoDAsync #-}
cuMemcpy2DDtoDAsync :: (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (DevicePtr a) -> (Int) -> (Int) -> (Int) -> (Int) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpy2DDtoDAsync :: DevicePtr a
-> Int
-> Int
-> Int
-> DevicePtr a
-> Int
-> Int
-> Int
-> Int
-> Int
-> Stream
-> IO Status
cuMemcpy2DDtoDAsync DevicePtr a
a1 Int
a2 Int
a3 Int
a4 DevicePtr a
a5 Int
a6 Int
a7 Int
a8 Int
a9 Int
a10 Stream
a11 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: CUInt
a2' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a2} in 
  let {a3' :: CUInt
a3' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: CUInt
a4' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a4} in 
  let {a5' :: CULLong
a5' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a5} in 
  let {a6' :: CUInt
a6' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a6} in 
  let {a7' :: CUInt
a7' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a7} in 
  let {a8' :: CUInt
a8' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a8} in 
  let {a9' :: CUInt
a9' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a9} in 
  let {a10' :: CUInt
a10' = Int -> CUInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a10} in 
  let {a11' :: Ptr ()
a11' = Stream -> Ptr ()
useStream Stream
a11} in 
  CULLong
-> CUInt
-> CUInt
-> CUInt
-> CULLong
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> CUInt
-> Ptr ()
-> IO CInt
cuMemcpy2DDtoDAsync'_ CULLong
a1' CUInt
a2' CUInt
a3' CUInt
a4' CULLong
a5' CUInt
a6' CUInt
a7' CUInt
a8' CUInt
a9' CUInt
a10' Ptr ()
a11' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 863 "src/Foreign/CUDA/Driver/Marshal.chs" #-}




-- Context -> Context
-- ------------------

-- |
-- Copies an array from device memory in one context to device memory in another
-- context. Note that this function is asynchronous with respect to the host,
-- but serialised with respect to all pending and future asynchronous work in
-- the source and destination contexts. To avoid this synchronisation, use
-- 'copyArrayPeerAsync' instead.
--
-- Requires CUDA-4.0.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1ge1f5c7771544fee150ada8853c7cbf4a>
--
{-# INLINEABLE copyArrayPeer #-}
copyArrayPeer :: Storable a
              => Int                            -- ^ number of array elements
              -> DevicePtr a -> Context         -- ^ source array and context
              -> DevicePtr a -> Context         -- ^ destination array and context
              -> IO ()
copyArrayPeer :: Int -> DevicePtr a -> Context -> DevicePtr a -> Context -> IO ()
copyArrayPeer !Int
n !DevicePtr a
src !Context
srcCtx !DevicePtr a
dst !Context
dstCtx = a -> DevicePtr a -> DevicePtr a -> IO ()
forall a'.
Storable a' =>
a' -> DevicePtr a' -> DevicePtr a' -> IO ()
go a
forall a. HasCallStack => a
undefined DevicePtr a
src DevicePtr a
dst
  where
    go :: Storable b => b -> DevicePtr b -> DevicePtr b -> IO ()
    go :: b -> DevicePtr b -> DevicePtr b -> IO ()
go b
x DevicePtr b
_ DevicePtr b
_ = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a
-> Context -> DevicePtr a -> Context -> Int -> IO Status
forall a.
DevicePtr a
-> Context -> DevicePtr a -> Context -> Int -> IO Status
cuMemcpyPeer DevicePtr a
dst Context
dstCtx DevicePtr a
src Context
srcCtx (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* b -> Int
forall a. Storable a => a -> Int
sizeOf b
x)

{-# INLINE cuMemcpyPeer #-}
cuMemcpyPeer :: (DevicePtr a) -> (Context) -> (DevicePtr a) -> (Context) -> (Int) -> IO ((Status))
cuMemcpyPeer :: DevicePtr a
-> Context -> DevicePtr a -> Context -> Int -> IO Status
cuMemcpyPeer DevicePtr a
a1 Context
a2 DevicePtr a
a3 Context
a4 Int
a5 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: Ptr ()
a2' = Context -> Ptr ()
useContext Context
a2} in 
  let {a3' :: CULLong
a3' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a3} in 
  let {a4' :: Ptr ()
a4' = Context -> Ptr ()
useContext Context
a4} in 
  let {a5' :: CULong
a5' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a5} in 
  CULLong -> Ptr () -> CULLong -> Ptr () -> CULong -> IO CInt
cuMemcpyPeer'_ CULLong
a1' Ptr ()
a2' CULLong
a3' Ptr ()
a4' CULong
a5' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 902 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Copies from device memory in one context to device memory in another context.
-- Note that this function is asynchronous with respect to the host and all work
-- in other streams and devices.
--
-- Requires CUDA-4.0.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g82fcecb38018e64b98616a8ac30112f2>
--
{-# INLINEABLE copyArrayPeerAsync #-}
copyArrayPeerAsync :: Storable a
                   => Int                       -- ^ number of array elements
                   -> DevicePtr a -> Context    -- ^ source array and context
                   -> DevicePtr a -> Context    -- ^ destination array and device context
                   -> Maybe Stream              -- ^ stream to associate with
                   -> IO ()
copyArrayPeerAsync :: Int
-> DevicePtr a
-> Context
-> DevicePtr a
-> Context
-> Maybe Stream
-> IO ()
copyArrayPeerAsync !Int
n !DevicePtr a
src !Context
srcCtx !DevicePtr a
dst !Context
dstCtx !Maybe Stream
st = a -> DevicePtr a -> DevicePtr a -> IO ()
forall a'.
Storable a' =>
a' -> DevicePtr a' -> DevicePtr a' -> IO ()
go a
forall a. HasCallStack => a
undefined DevicePtr a
src DevicePtr a
dst
  where
    go :: Storable b => b -> DevicePtr b -> DevicePtr b -> IO ()
    go :: b -> DevicePtr b -> DevicePtr b -> IO ()
go b
x DevicePtr b
_ DevicePtr b
_ = Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a
-> Context -> DevicePtr a -> Context -> Int -> Stream -> IO Status
forall a.
DevicePtr a
-> Context -> DevicePtr a -> Context -> Int -> Stream -> IO Status
cuMemcpyPeerAsync DevicePtr a
dst Context
dstCtx DevicePtr a
src Context
srcCtx (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* b -> Int
forall a. Storable a => a -> Int
sizeOf b
x) Stream
stream
    stream :: Stream
stream   = Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
st

{-# INLINE cuMemcpyPeerAsync #-}
cuMemcpyPeerAsync :: (DevicePtr a) -> (Context) -> (DevicePtr a) -> (Context) -> (Int) -> (Stream) -> IO ((Status))
cuMemcpyPeerAsync :: DevicePtr a
-> Context -> DevicePtr a -> Context -> Int -> Stream -> IO Status
cuMemcpyPeerAsync DevicePtr a
a1 Context
a2 DevicePtr a
a3 Context
a4 Int
a5 Stream
a6 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: Ptr ()
a2' = Context -> Ptr ()
useContext Context
a2} in 
  let {a3' :: CULLong
a3' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a3} in 
  let {a4' :: Ptr ()
a4' = Context -> Ptr ()
useContext Context
a4} in 
  let {a5' :: CULong
a5' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a5} in 
  let {a6' :: Ptr ()
a6' = Stream -> Ptr ()
useStream Stream
a6} in 
  CULLong
-> Ptr () -> CULLong -> Ptr () -> CULong -> Ptr () -> IO CInt
cuMemcpyPeerAsync'_ CULLong
a1' Ptr ()
a2' CULLong
a3' Ptr ()
a4' CULong
a5' Ptr ()
a6' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 938 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



--------------------------------------------------------------------------------
-- Combined Allocation and Marshalling
--------------------------------------------------------------------------------

-- |
-- Write a list of storable elements into a newly allocated device array,
-- returning the device pointer together with the number of elements that were
-- written. Note that this requires two memory copies: firstly from a Haskell
-- list to a heap allocated array, and from there onto the graphics device. The
-- memory should be 'free'd when no longer required.
--
{-# INLINEABLE newListArrayLen #-}
newListArrayLen :: Storable a => [a] -> IO (DevicePtr a, Int)
newListArrayLen :: [a] -> IO (DevicePtr a, Int)
newListArrayLen [a]
xs =
  [a]
-> (Int -> Ptr a -> IO (DevicePtr a, Int)) -> IO (DevicePtr a, Int)
forall a b. Storable a => [a] -> (Int -> Ptr a -> IO b) -> IO b
F.withArrayLen [a]
xs                     ((Int -> Ptr a -> IO (DevicePtr a, Int)) -> IO (DevicePtr a, Int))
-> (Int -> Ptr a -> IO (DevicePtr a, Int)) -> IO (DevicePtr a, Int)
forall a b. (a -> b) -> a -> b
$ \Int
len Ptr a
p ->
  IO (DevicePtr a)
-> (DevicePtr a -> IO ())
-> (DevicePtr a -> IO (DevicePtr a, Int))
-> IO (DevicePtr a, Int)
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracketOnError (Int -> IO (DevicePtr a)
forall a. Storable a => Int -> IO (DevicePtr a)
mallocArray Int
len) DevicePtr a -> IO ()
forall a. DevicePtr a -> IO ()
free ((DevicePtr a -> IO (DevicePtr a, Int)) -> IO (DevicePtr a, Int))
-> (DevicePtr a -> IO (DevicePtr a, Int)) -> IO (DevicePtr a, Int)
forall a b. (a -> b) -> a -> b
$ \DevicePtr a
d_xs  -> do
    Int -> Ptr a -> DevicePtr a -> IO ()
forall a. Storable a => Int -> Ptr a -> DevicePtr a -> IO ()
pokeArray Int
len Ptr a
p DevicePtr a
d_xs
    (DevicePtr a, Int) -> IO (DevicePtr a, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (DevicePtr a
d_xs, Int
len)


-- |
-- Write a list of storable elements into a newly allocated device array. This
-- is 'newListArrayLen' composed with 'fst'.
--
{-# INLINEABLE newListArray #-}
newListArray :: Storable a => [a] -> IO (DevicePtr a)
newListArray :: [a] -> IO (DevicePtr a)
newListArray [a]
xs = (DevicePtr a, Int) -> DevicePtr a
forall a b. (a, b) -> a
fst ((DevicePtr a, Int) -> DevicePtr a)
-> IO (DevicePtr a, Int) -> IO (DevicePtr a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [a] -> IO (DevicePtr a, Int)
forall a. Storable a => [a] -> IO (DevicePtr a, Int)
newListArrayLen [a]
xs


-- |
-- Temporarily store a list of elements into a newly allocated device array. An
-- IO action is applied to to the array, the result of which is returned.
-- Similar to 'newListArray', this requires copying the data twice.
--
-- As with 'allocaArray', the memory is freed once the action completes, so you
-- should not return the pointer from the action, and be wary of asynchronous
-- kernel execution.
--
{-# INLINEABLE withListArray #-}
withListArray :: Storable a => [a] -> (DevicePtr a -> IO b) -> IO b
withListArray :: [a] -> (DevicePtr a -> IO b) -> IO b
withListArray [a]
xs = [a] -> (Int -> DevicePtr a -> IO b) -> IO b
forall a b.
Storable a =>
[a] -> (Int -> DevicePtr a -> IO b) -> IO b
withListArrayLen [a]
xs ((Int -> DevicePtr a -> IO b) -> IO b)
-> ((DevicePtr a -> IO b) -> Int -> DevicePtr a -> IO b)
-> (DevicePtr a -> IO b)
-> IO b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DevicePtr a -> IO b) -> Int -> DevicePtr a -> IO b
forall a b. a -> b -> a
const


-- |
-- A variant of 'withListArray' which also supplies the number of elements in
-- the array to the applied function
--
{-# INLINEABLE withListArrayLen #-}
withListArrayLen :: Storable a => [a] -> (Int -> DevicePtr a -> IO b) -> IO b
withListArrayLen :: [a] -> (Int -> DevicePtr a -> IO b) -> IO b
withListArrayLen [a]
xs Int -> DevicePtr a -> IO b
f =
  IO (DevicePtr a, Int)
-> ((DevicePtr a, Int) -> IO ())
-> ((DevicePtr a, Int) -> IO b)
-> IO b
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket ([a] -> IO (DevicePtr a, Int)
forall a. Storable a => [a] -> IO (DevicePtr a, Int)
newListArrayLen [a]
xs) (DevicePtr a -> IO ()
forall a. DevicePtr a -> IO ()
free (DevicePtr a -> IO ())
-> ((DevicePtr a, Int) -> DevicePtr a)
-> (DevicePtr a, Int)
-> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DevicePtr a, Int) -> DevicePtr a
forall a b. (a, b) -> a
fst) ((DevicePtr a -> Int -> IO b) -> (DevicePtr a, Int) -> IO b
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry ((DevicePtr a -> Int -> IO b) -> (DevicePtr a, Int) -> IO b)
-> ((Int -> DevicePtr a -> IO b) -> DevicePtr a -> Int -> IO b)
-> (Int -> DevicePtr a -> IO b)
-> (DevicePtr a, Int)
-> IO b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> DevicePtr a -> IO b) -> DevicePtr a -> Int -> IO b
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((Int -> DevicePtr a -> IO b) -> (DevicePtr a, Int) -> IO b)
-> (Int -> DevicePtr a -> IO b) -> (DevicePtr a, Int) -> IO b
forall a b. (a -> b) -> a -> b
$ Int -> DevicePtr a -> IO b
f)
--
-- XXX: Will this attempt to double-free the device array on error (together
-- with newListArrayLen)?
--


--------------------------------------------------------------------------------
-- Utility
--------------------------------------------------------------------------------

-- |
-- Set a number of data elements to the specified value, which may be either 8-,
-- 16-, or 32-bits wide.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g6e582bf866e9e2fb014297bfaf354d7b>
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g7d805e610054392a4d11e8a8bf5eb35c>
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g983e8d8759acd1b64326317481fbf132>
--
{-# INLINEABLE memset #-}
memset :: Storable a => DevicePtr a -> Int -> a -> IO ()
memset :: DevicePtr a -> Int -> a -> IO ()
memset !DevicePtr a
dptr !Int
n !a
val = case a -> Int
forall a. Storable a => a -> Int
sizeOf a
val of
    Int
1 -> Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> a -> Int -> IO Status
forall a. DevicePtr a -> a -> Int -> IO Status
cuMemsetD8  DevicePtr a
dptr a
val Int
n
    Int
2 -> Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> a -> Int -> IO Status
forall a. DevicePtr a -> a -> Int -> IO Status
cuMemsetD16 DevicePtr a
dptr a
val Int
n
    Int
4 -> Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> a -> Int -> IO Status
forall a. DevicePtr a -> a -> Int -> IO Status
cuMemsetD32 DevicePtr a
dptr a
val Int
n
    Int
_ -> String -> IO ()
forall a. String -> IO a
cudaErrorIO String
"can only memset 8-, 16-, and 32-bit values"

--
-- We use unsafe coerce below to reinterpret the bits of the value to memset as,
-- into the integer type required by the setting functions.
--
{-# INLINE cuMemsetD8 #-}
cuMemsetD8 :: (DevicePtr a) -> (a) -> (Int) -> IO ((Status))
cuMemsetD8 :: DevicePtr a -> a -> Int -> IO Status
cuMemsetD8 DevicePtr a
a1 a
a2 Int
a3 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: b
a2' = a -> b
forall a b. a -> b
unsafeCoerce a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  CULLong -> CUChar -> CULong -> IO CInt
cuMemsetD8'_ CULLong
a1' CUChar
forall b. b
a2' CULong
a3' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 1028 "src/Foreign/CUDA/Driver/Marshal.chs" #-}


{-# INLINE cuMemsetD16 #-}
cuMemsetD16 :: (DevicePtr a) -> (a) -> (Int) -> IO ((Status))
cuMemsetD16 a1 a2 a3 =
  let {a1' = useDeviceHandle a1} in 
  let {a2' = unsafeCoerce a2} in 
  let {a3' = fromIntegral a3} in 
  cuMemsetD16'_ a1' a2' a3' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 1034 "src/Foreign/CUDA/Driver/Marshal.chs" #-}


{-# INLINE cuMemsetD32 #-}
cuMemsetD32 :: (DevicePtr a) -> (a) -> (Int) -> IO ((Status))
cuMemsetD32 a1 a2 a3 =
  let {a1' = useDeviceHandle a1} in 
  let {a2' = unsafeCoerce a2} in 
  let {a3' = fromIntegral a3} in 
  cuMemsetD32'_ a1' a2' a3' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 1040 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Set the number of data elements to the specified value, which may be either
-- 8-, 16-, or 32-bits wide. The operation is asynchronous and may optionally be
-- associated with a stream.
--
-- Requires CUDA-3.2.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gaef08a7ccd61112f94e82f2b30d43627>
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gf731438877dd8ec875e4c43d848c878c>
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g58229da5d30f1c0cdf667b320ec2c0f5>
--
{-# INLINEABLE memsetAsync #-}
memsetAsync :: Storable a => DevicePtr a -> Int -> a -> Maybe Stream -> IO ()
memsetAsync :: DevicePtr a -> Int -> a -> Maybe Stream -> IO ()
memsetAsync !DevicePtr a
dptr !Int
n !a
val !Maybe Stream
mst = case a -> Int
forall a. Storable a => a -> Int
sizeOf a
val of
    Int
1 -> Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> a -> Int -> Stream -> IO Status
forall a. DevicePtr a -> a -> Int -> Stream -> IO Status
cuMemsetD8Async  DevicePtr a
dptr a
val Int
n Stream
stream
    Int
2 -> Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> a -> Int -> Stream -> IO Status
forall a. DevicePtr a -> a -> Int -> Stream -> IO Status
cuMemsetD16Async DevicePtr a
dptr a
val Int
n Stream
stream
    Int
4 -> Status -> IO ()
nothingIfOk (Status -> IO ()) -> IO Status -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DevicePtr a -> a -> Int -> Stream -> IO Status
forall a. DevicePtr a -> a -> Int -> Stream -> IO Status
cuMemsetD32Async DevicePtr a
dptr a
val Int
n Stream
stream
    Int
_ -> String -> IO ()
forall a. String -> IO a
cudaErrorIO String
"can only memset 8-, 16-, and 32-bit values"
    where
      stream :: Stream
stream = Stream -> Maybe Stream -> Stream
forall a. a -> Maybe a -> a
fromMaybe Stream
defaultStream Maybe Stream
mst

{-# INLINE cuMemsetD8Async #-}
cuMemsetD8Async :: (DevicePtr a) -> (a) -> (Int) -> (Stream) -> IO ((Status))
cuMemsetD8Async :: DevicePtr a -> a -> Int -> Stream -> IO Status
cuMemsetD8Async DevicePtr a
a1 a
a2 Int
a3 Stream
a4 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: b
a2' = a -> b
forall a b. a -> b
unsafeCoerce a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: Ptr ()
a4' = Stream -> Ptr ()
useStream Stream
a4} in 
  CULLong -> CUChar -> CULong -> Ptr () -> IO CInt
cuMemsetD8Async'_ CULLong
a1' CUChar
forall b. b
a2' CULong
a3' Ptr ()
a4' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 1074 "src/Foreign/CUDA/Driver/Marshal.chs" #-}


{-# INLINE cuMemsetD16Async #-}
cuMemsetD16Async :: (DevicePtr a) -> (a) -> (Int) -> (Stream) -> IO ((Status))
cuMemsetD16Async :: DevicePtr a -> a -> Int -> Stream -> IO Status
cuMemsetD16Async DevicePtr a
a1 a
a2 Int
a3 Stream
a4 =
  let {a1' :: CULLong
a1' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a1} in 
  let {a2' :: b
a2' = a -> b
forall a b. a -> b
unsafeCoerce a
a2} in 
  let {a3' :: CULong
a3' = Int -> CULong
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
a3} in 
  let {a4' :: Ptr ()
a4' = Stream -> Ptr ()
useStream Stream
a4} in 
  CULLong -> CUShort -> CULong -> Ptr () -> IO CInt
cuMemsetD16Async'_ CULLong
a1' CUShort
forall b. b
a2' CULong
a3' Ptr ()
a4' IO CInt -> (CInt -> IO Status) -> IO Status
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Status -> IO Status
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res')

{-# LINE 1081 "src/Foreign/CUDA/Driver/Marshal.chs" #-}


{-# INLINE cuMemsetD32Async #-}
cuMemsetD32Async :: (DevicePtr a) -> (a) -> (Int) -> (Stream) -> IO ((Status))
cuMemsetD32Async a1 a2 a3 a4 =
  let {a1' = useDeviceHandle a1} in 
  let {a2' = unsafeCoerce a2} in 
  let {a3' = fromIntegral a3} in 
  let {a4' = useStream a4} in 
  cuMemsetD32Async'_ a1' a2' a3' a4' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 1089 "src/Foreign/CUDA/Driver/Marshal.chs" #-}



-- |
-- Return the device pointer associated with a mapped, pinned host buffer, which
-- was allocated with the 'DeviceMapped' option by 'mallocHostArray'.
--
-- Currently, no options are supported and this must be empty.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g57a39e5cba26af4d06be67fc77cc62f0>
--
{-# INLINEABLE getDevicePtr #-}
getDevicePtr :: [AllocFlag] -> HostPtr a -> IO (DevicePtr a)
getDevicePtr :: [AllocFlag] -> HostPtr a -> IO (DevicePtr a)
getDevicePtr ![AllocFlag]
flags !HostPtr a
hp = (Status, DevicePtr a) -> IO (DevicePtr a)
forall a. (Status, a) -> IO a
resultIfOk ((Status, DevicePtr a) -> IO (DevicePtr a))
-> IO (Status, DevicePtr a) -> IO (DevicePtr a)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< HostPtr a -> [AllocFlag] -> IO (Status, DevicePtr a)
forall a. HostPtr a -> [AllocFlag] -> IO (Status, DevicePtr a)
cuMemHostGetDevicePointer HostPtr a
hp [AllocFlag]
flags

{-# INLINE cuMemHostGetDevicePointer #-}
cuMemHostGetDevicePointer :: (HostPtr a) -> ([AllocFlag]) -> IO ((Status), (DevicePtr a))
cuMemHostGetDevicePointer :: HostPtr a -> [AllocFlag] -> IO (Status, DevicePtr a)
cuMemHostGetDevicePointer HostPtr a
a2 [AllocFlag]
a3 =
  (Ptr CULLong -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall b. (Ptr CULLong -> IO b) -> IO b
alloca' ((Ptr CULLong -> IO (Status, DevicePtr a))
 -> IO (Status, DevicePtr a))
-> (Ptr CULLong -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall a b. (a -> b) -> a -> b
$ \Ptr CULLong
a1' -> 
  let {a2' :: Ptr b
a2' = HostPtr a -> Ptr b
forall a b. HostPtr a -> Ptr b
useHP HostPtr a
a2} in 
  let {a3' :: CUInt
a3' = [AllocFlag] -> CUInt
forall a b. (Enum a, Num b, Bits b) => [a] -> b
combineBitMasks [AllocFlag]
a3} in 
  Ptr CULLong -> Ptr () -> CUInt -> IO CInt
cuMemHostGetDevicePointer'_ Ptr CULLong
a1' Ptr ()
forall b. Ptr b
a2' CUInt
a3' IO CInt
-> (CInt -> IO (Status, DevicePtr a)) -> IO (Status, DevicePtr a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Ptr CULLong -> IO (DevicePtr a)
forall a. Ptr CULLong -> IO (DevicePtr a)
peekDeviceHandle  Ptr CULLong
a1'IO (DevicePtr a)
-> (DevicePtr a -> IO (Status, DevicePtr a))
-> IO (Status, DevicePtr a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \DevicePtr a
a1'' -> 
  (Status, DevicePtr a) -> IO (Status, DevicePtr a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res', DevicePtr a
a1'')

{-# LINE 1108 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    alloca'  = F.alloca
    useHP    = castPtr . useHostPtr


-- |
-- Return the base address and allocation size of the given device pointer.
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g64fee5711274a2a0573a789c94d8299b>
--
{-# INLINEABLE getBasePtr #-}
getBasePtr :: DevicePtr a -> IO (DevicePtr a, Int64)
getBasePtr :: DevicePtr a -> IO (DevicePtr a, Int64)
getBasePtr !DevicePtr a
dptr = do
  (Status
status,DevicePtr a
base,Int64
size) <- DevicePtr a -> IO (Status, DevicePtr a, Int64)
forall a. DevicePtr a -> IO (Status, DevicePtr a, Int64)
cuMemGetAddressRange DevicePtr a
dptr
  (Status, (DevicePtr a, Int64)) -> IO (DevicePtr a, Int64)
forall a. (Status, a) -> IO a
resultIfOk (Status
status, (DevicePtr a
base,Int64
size))

{-# INLINE cuMemGetAddressRange #-}
cuMemGetAddressRange :: (DevicePtr a) -> IO ((Status), (DevicePtr a), (Int64))
cuMemGetAddressRange :: DevicePtr a -> IO (Status, DevicePtr a, Int64)
cuMemGetAddressRange DevicePtr a
a3 =
  (Ptr CULLong -> IO (Status, DevicePtr a, Int64))
-> IO (Status, DevicePtr a, Int64)
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca' ((Ptr CULLong -> IO (Status, DevicePtr a, Int64))
 -> IO (Status, DevicePtr a, Int64))
-> (Ptr CULLong -> IO (Status, DevicePtr a, Int64))
-> IO (Status, DevicePtr a, Int64)
forall a b. (a -> b) -> a -> b
$ \Ptr CULLong
a1' -> 
  (Ptr CULong -> IO (Status, DevicePtr a, Int64))
-> IO (Status, DevicePtr a, Int64)
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca' ((Ptr CULong -> IO (Status, DevicePtr a, Int64))
 -> IO (Status, DevicePtr a, Int64))
-> (Ptr CULong -> IO (Status, DevicePtr a, Int64))
-> IO (Status, DevicePtr a, Int64)
forall a b. (a -> b) -> a -> b
$ \Ptr CULong
a2' -> 
  let {a3' :: CULLong
a3' = DevicePtr a -> CULLong
forall a. DevicePtr a -> CULLong
useDeviceHandle DevicePtr a
a3} in 
  Ptr CULLong -> Ptr CULong -> CULLong -> IO CInt
cuMemGetAddressRange'_ Ptr CULLong
a1' Ptr CULong
a2' CULLong
a3' IO CInt
-> (CInt -> IO (Status, DevicePtr a, Int64))
-> IO (Status, DevicePtr a, Int64)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = cToEnum CInt
res} in
  Ptr CULLong -> IO (DevicePtr a)
forall a. Ptr CULLong -> IO (DevicePtr a)
peekDeviceHandle  Ptr CULLong
a1'IO (DevicePtr a)
-> (DevicePtr a -> IO (Status, DevicePtr a, Int64))
-> IO (Status, DevicePtr a, Int64)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \DevicePtr a
a1'' -> 
  Ptr CULong -> IO Int64
forall a b. (Storable a, Integral a, Integral b) => Ptr a -> IO b
peekIntConv  Ptr CULong
a2'IO Int64
-> (Int64 -> IO (Status, DevicePtr a, Int64))
-> IO (Status, DevicePtr a, Int64)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Int64
a2'' -> 
  (Status, DevicePtr a, Int64) -> IO (Status, DevicePtr a, Int64)
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res', DevicePtr a
a1'', Int64
a2'')

{-# LINE 1129 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    alloca' :: Storable a => (Ptr a -> IO b) -> IO b
    alloca' = F.alloca

-- |
-- Return the amount of free and total memory respectively available to the
-- current context (bytes).
--
-- <http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g808f555540d0143a331cc42aa98835c0>
--
{-# INLINEABLE getMemInfo #-}
getMemInfo :: IO (Int64, Int64)
getMemInfo :: IO (Int64, Int64)
getMemInfo = do
  (!Status
status,!Int64
f,!Int64
t) <- IO (Status, Int64, Int64)
cuMemGetInfo
  (Status, (Int64, Int64)) -> IO (Int64, Int64)
forall a. (Status, a) -> IO a
resultIfOk (Status
status,(Int64
f,Int64
t))

{-# INLINE cuMemGetInfo #-}
cuMemGetInfo :: IO ((Status), (Int64), (Int64))
cuMemGetInfo :: IO (Status, Int64, Int64)
cuMemGetInfo =
  (Ptr CULong -> IO (Status, Int64, Int64))
-> IO (Status, Int64, Int64)
forall b. (Ptr CULong -> IO b) -> IO b
alloca' ((Ptr CULong -> IO (Status, Int64, Int64))
 -> IO (Status, Int64, Int64))
-> (Ptr CULong -> IO (Status, Int64, Int64))
-> IO (Status, Int64, Int64)
forall a b. (a -> b) -> a -> b
$ \Ptr CULong
a1' -> 
  (Ptr CULong -> IO (Status, Int64, Int64))
-> IO (Status, Int64, Int64)
forall b. (Ptr CULong -> IO b) -> IO b
alloca' ((Ptr CULong -> IO (Status, Int64, Int64))
 -> IO (Status, Int64, Int64))
-> (Ptr CULong -> IO (Status, Int64, Int64))
-> IO (Status, Int64, Int64)
forall a b. (a -> b) -> a -> b
$ \Ptr CULong
a2' -> 
  Ptr CULong -> Ptr CULong -> IO CInt
cuMemGetInfo'_ Ptr CULong
a1' Ptr CULong
a2' IO CInt
-> (CInt -> IO (Status, Int64, Int64)) -> IO (Status, Int64, Int64)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \CInt
res ->
  let {res' :: Status
res' = CInt -> Status
forall i e. (Integral i, Enum e) => i -> e
cToEnum CInt
res} in
  Ptr CULong -> IO Int64
forall a b. (Storable a, Integral a, Integral b) => Ptr a -> IO b
peekIntConv  Ptr CULong
a1'IO Int64
-> (Int64 -> IO (Status, Int64, Int64))
-> IO (Status, Int64, Int64)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Int64
a1'' -> 
  Ptr CULong -> IO Int64
forall a b. (Storable a, Integral a, Integral b) => Ptr a -> IO b
peekIntConv  Ptr CULong
a2'IO Int64
-> (Int64 -> IO (Status, Int64, Int64))
-> IO (Status, Int64, Int64)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Int64
a2'' -> 
  (Status, Int64, Int64) -> IO (Status, Int64, Int64)
forall (m :: * -> *) a. Monad m => a -> m a
return (Status
res', Int64
a1'', Int64
a2'')

{-# LINE 1149 "src/Foreign/CUDA/Driver/Marshal.chs" #-}

  where
    alloca' = F.alloca


--------------------------------------------------------------------------------
-- Internal
--------------------------------------------------------------------------------

type DeviceHandle = (C2HSImp.CULLong)
{-# LINE 1158 "src/Foreign/CUDA/Driver/Marshal.chs" #-}


-- Lift an opaque handle to a typed DevicePtr representation. This occasions
-- arcane distinctions for the different driver versions and Tesla (compute 1.x)
-- and Fermi (compute 2.x) class architectures on 32- and 64-bit hosts.
--
{-# INLINE peekDeviceHandle #-}
peekDeviceHandle :: Ptr DeviceHandle -> IO (DevicePtr a)
peekDeviceHandle :: Ptr CULLong -> IO (DevicePtr a)
peekDeviceHandle !Ptr CULLong
p = do
  CULLong (W64# Word#
w#) <- Ptr CULLong -> IO CULLong
forall a. Storable a => Ptr a -> IO a
peek Ptr CULLong
p
  DevicePtr a -> IO (DevicePtr a)
forall (m :: * -> *) a. Monad m => a -> m a
return (DevicePtr a -> IO (DevicePtr a))
-> DevicePtr a -> IO (DevicePtr a)
forall a b. (a -> b) -> a -> b
$! Ptr a -> DevicePtr a
forall a. Ptr a -> DevicePtr a
DevicePtr (Addr# -> Ptr a
forall a. Addr# -> Ptr a
Ptr (Int# -> Addr#
int2Addr# (Word# -> Int#
word2Int# Word#
w#)))

-- Use a device pointer as an opaque handle type
--
{-# INLINE useDeviceHandle #-}
useDeviceHandle :: DevicePtr a -> DeviceHandle
useDeviceHandle :: DevicePtr a -> CULLong
useDeviceHandle (DevicePtr (Ptr Addr#
addr#)) =
  Word64 -> CULLong
CULLong (Word# -> Word64
W64# (Int# -> Word#
int2Word# (Addr# -> Int#
addr2Int# Addr#
addr#)))


foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemHostAlloc"
  cuMemHostAlloc'_ :: ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> (C2HSImp.CULong -> (C2HSImp.CUInt -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemFreeHost"
  cuMemFreeHost'_ :: ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemHostRegister"
  cuMemHostRegister'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> (C2HSImp.CUInt -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemHostUnregister"
  cuMemHostUnregister'_ :: ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemAlloc"
  cuMemAlloc'_ :: ((C2HSImp.Ptr C2HSImp.CULLong) -> (C2HSImp.CULong -> (IO C2HSImp.CInt)))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemFree"
  cuMemFree'_ :: (C2HSImp.CULLong -> (IO C2HSImp.CInt))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemAllocManaged"
  cuMemAllocManaged'_ :: ((C2HSImp.Ptr C2HSImp.CULLong) -> (C2HSImp.CULong -> (C2HSImp.CUInt -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemPrefetchAsync"
  cuMemPrefetchAsync'_ :: (C2HSImp.CULLong -> (C2HSImp.CULong -> (C2HSImp.CInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuStreamAttachMemAsync"
  cuStreamAttachMemAsync'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CULLong -> (C2HSImp.CULong -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))))

foreign import ccall safe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyDtoH"
  cuMemcpyDtoH'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CULLong -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))

foreign import ccall safe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyDtoHAsync"
  cuMemcpyDtoHAsync'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CULLong -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))

foreign import ccall safe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DDtoH"
  cuMemcpy2DDtoH'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))))))))))

foreign import ccall safe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DDtoHAsync"
  cuMemcpy2DDtoHAsync'_ :: ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))))))))))))

foreign import ccall safe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyHtoD"
  cuMemcpyHtoD'_ :: (C2HSImp.CULLong -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))

foreign import ccall safe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyHtoDAsync"
  cuMemcpyHtoDAsync'_ :: (C2HSImp.CULLong -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))

foreign import ccall safe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DHtoD"
  cuMemcpy2DHtoD'_ :: (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))))))))))

foreign import ccall safe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DHtoDAsync"
  cuMemcpy2DHtoDAsync'_ :: (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))))))))))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyDtoD"
  cuMemcpyDtoD'_ :: (C2HSImp.CULLong -> (C2HSImp.CULLong -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyDtoDAsync"
  cuMemcpyDtoDAsync'_ :: (C2HSImp.CULLong -> (C2HSImp.CULLong -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DDtoD"
  cuMemcpy2DDtoD'_ :: (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (IO C2HSImp.CInt)))))))))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpy2DDtoDAsync"
  cuMemcpy2DDtoDAsync'_ :: (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> (C2HSImp.CUInt -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))))))))))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyPeer"
  cuMemcpyPeer'_ :: (C2HSImp.CULLong -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULLong -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemcpyPeerAsync"
  cuMemcpyPeerAsync'_ :: (C2HSImp.CULLong -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULLong -> ((C2HSImp.Ptr ()) -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD8"
  cuMemsetD8'_ :: (C2HSImp.CULLong -> (C2HSImp.CUChar -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD16"
  cuMemsetD16'_ :: (C2HSImp.CULLong -> (C2HSImp.CUShort -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD32"
  cuMemsetD32'_ :: (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD8Async"
  cuMemsetD8Async'_ :: (C2HSImp.CULLong -> (C2HSImp.CUChar -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD16Async"
  cuMemsetD16Async'_ :: (C2HSImp.CULLong -> (C2HSImp.CUShort -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemsetD32Async"
  cuMemsetD32Async'_ :: (C2HSImp.CULLong -> (C2HSImp.CUInt -> (C2HSImp.CULong -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemHostGetDevicePointer"
  cuMemHostGetDevicePointer'_ :: ((C2HSImp.Ptr C2HSImp.CULLong) -> ((C2HSImp.Ptr ()) -> (C2HSImp.CUInt -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemGetAddressRange"
  cuMemGetAddressRange'_ :: ((C2HSImp.Ptr C2HSImp.CULLong) -> ((C2HSImp.Ptr C2HSImp.CULong) -> (C2HSImp.CULLong -> (IO C2HSImp.CInt))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Marshal.chs.h cuMemGetInfo"
  cuMemGetInfo'_ :: ((C2HSImp.Ptr C2HSImp.CULong) -> ((C2HSImp.Ptr C2HSImp.CULong) -> (IO C2HSImp.CInt)))