-- GENERATED by C->Haskell Compiler, version 0.20.1 The shapeless maps, 31 Oct 2014 (Haskell)
-- Edit the ORIGNAL .chs file instead!


{-# LINE 1 "./Foreign/CUDA/Runtime/Device.chs" #-}
{-# LANGUAGE BangPatterns             #-}
{-# LANGUAGE CPP                      #-}
{-# LANGUAGE EmptyCase                #-}
{-# LANGUAGE EmptyDataDecls           #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
--------------------------------------------------------------------------------
-- |
-- Module    : Foreign.CUDA.Runtime.Device
-- Copyright : [2009..2014] Trevor L. McDonell
-- License   : BSD
--
-- Device management routines
--
--------------------------------------------------------------------------------

module Foreign.CUDA.Runtime.Device (

  -- * Device Management
  Device, DeviceFlag(..), DeviceProperties(..), Compute(..), ComputeMode(..),
  choose, get, count, props, set, setFlags, setOrder, reset, sync,

  -- * Peer Access
  PeerFlag,
  accessible, add, remove,

  -- * Cache Configuration
  Limit(..),
  getLimit, setLimit

) where



{-# LINE 34 "./Foreign/CUDA/Runtime/Device.chs" #-}


-- Friends
import Foreign.CUDA.Analysis.Device
import Foreign.CUDA.Runtime.Error
import Foreign.CUDA.Internal.C2HS
import Foreign.CUDA.Internal.Offsets

-- System
import Foreign
import Foreign.C

--------------------------------------------------------------------------------
-- Data Types
--------------------------------------------------------------------------------

-- |
-- A device identifier
--
type Device = Int


{-# LINE 71 "./Foreign/CUDA/Runtime/Device.chs" #-}


-- |
-- Device execution flags
--
data DeviceFlag = ScheduleAuto
                | ScheduleSpin
                | ScheduleYield
                | BlockingSync
                | MapHost
                | LMemResizeToMax
  deriving (Eq,Show)
instance Enum DeviceFlag where
  succ ScheduleAuto = ScheduleSpin
  succ ScheduleSpin = ScheduleYield
  succ ScheduleYield = BlockingSync
  succ BlockingSync = MapHost
  succ MapHost = LMemResizeToMax
  succ LMemResizeToMax = error "DeviceFlag.succ: LMemResizeToMax has no successor"

  pred ScheduleSpin = ScheduleAuto
  pred ScheduleYield = ScheduleSpin
  pred BlockingSync = ScheduleYield
  pred MapHost = BlockingSync
  pred LMemResizeToMax = MapHost
  pred ScheduleAuto = error "DeviceFlag.pred: ScheduleAuto has no predecessor"

  enumFromTo from to = go from
    where
      end = fromEnum to
      go v = case compare (fromEnum v) end of
                 LT -> v : go (succ v)
                 EQ -> [v]
                 GT -> []

  enumFrom from = enumFromTo from LMemResizeToMax

  fromEnum ScheduleAuto = 0
  fromEnum ScheduleSpin = 1
  fromEnum ScheduleYield = 2
  fromEnum BlockingSync = 4
  fromEnum MapHost = 8
  fromEnum LMemResizeToMax = 16

  toEnum 0 = ScheduleAuto
  toEnum 1 = ScheduleSpin
  toEnum 2 = ScheduleYield
  toEnum 4 = BlockingSync
  toEnum 8 = MapHost
  toEnum 16 = LMemResizeToMax
  toEnum unmatched = error ("DeviceFlag.toEnum: Cannot match " ++ show unmatched)

{-# LINE 77 "./Foreign/CUDA/Runtime/Device.chs" #-}



instance Storable DeviceProperties where
  sizeOf _    = 632
{-# LINE 81 "./Foreign/CUDA/Runtime/Device.chs" #-}

  alignment _ = alignment (undefined :: Ptr ())

  poke _ _    = error "no instance for Foreign.Storable.poke DeviceProperties"
  peek p      = do
    gm <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 256 ::IO CULong}) p
    sm <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 264 ::IO CULong}) p
    rb <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 272 ::IO CInt}) p
    ws <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 276 ::IO CInt}) p
    mp <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 280 ::IO CULong}) p
    tb <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 288 ::IO CInt}) p
    cl <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 316 ::IO CInt}) p
    cm <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 320 ::IO CULong}) p
    v1 <- fromIntegral `fmap` (\ptr -> do {peekByteOff ptr 328 ::IO CInt}) p
    v2 <- fromIntegral `fmap` (\ptr -> do {peekByteOff ptr 332 ::IO CInt}) p
    ta <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 336 ::IO CULong}) p
    ov <- cToBool      `fmap` (\ptr -> do {peekByteOff ptr 352 ::IO CInt}) p
    pc <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 356 ::IO CInt}) p
    ke <- cToBool      `fmap` (\ptr -> do {peekByteOff ptr 360 ::IO CInt}) p
    tg <- cToBool      `fmap` (\ptr -> do {peekByteOff ptr 364 ::IO CInt}) p
    hm <- cToBool      `fmap` (\ptr -> do {peekByteOff ptr 368 ::IO CInt}) p
    md <- cToEnum      `fmap` (\ptr -> do {peekByteOff ptr 372 ::IO CInt}) p
    ck <- cToBool      `fmap` (\ptr -> do {peekByteOff ptr 544 ::IO CInt}) p
    u1 <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 376 ::IO CInt}) p
    ee <- cToBool      `fmap` (\ptr -> do {peekByteOff ptr 548 ::IO CInt}) p
    ae <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 568 ::IO CInt}) p
    l2 <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 584 ::IO CInt}) p
    tm <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 588 ::IO CInt}) p
    mw <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 580 ::IO CInt}) p
    mc <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 576 ::IO CInt}) p
    pb <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 552 ::IO CInt}) p
    pd <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 556 ::IO CInt}) p
    pm <- cIntConv     `fmap` (\ptr -> do {peekByteOff ptr 560 ::IO CInt}) p
    tc <- cToBool      `fmap` (\ptr -> do {peekByteOff ptr 564 ::IO CInt}) p
    ua <- cToBool      `fmap` (\ptr -> do {peekByteOff ptr 572 ::IO CInt}) p

    --
    -- C->Haskell returns the wrong type when accessing static arrays in
    -- structs, returning the dereferenced element but with a Ptr type. Work
    -- around this with manual pointer arithmetic...
    --
    n            <- peekCString (p `plusPtr` devNameOffset)
    (t1:t2:t3:_) <- map cIntConv `fmap` peekArray 3 (p `plusPtr` devMaxThreadDimOffset :: Ptr CInt)
    (g1:g2:g3:_) <- map cIntConv `fmap` peekArray 3 (p `plusPtr` devMaxGridSizeOffset  :: Ptr CInt)
    (u21:u22:_)     <- map cIntConv `fmap` peekArray 2 (p `plusPtr` devMaxTexture2DOffset :: Ptr CInt)
    (u31:u32:u33:_) <- map cIntConv `fmap` peekArray 3 (p `plusPtr` devMaxTexture3DOffset :: Ptr CInt)

    return DeviceProperties
      {
        deviceName                      = n,
        computeCapability               = Compute v1 v2,
        totalGlobalMem                  = gm,
        totalConstMem                   = cm,
        sharedMemPerBlock               = sm,
        regsPerBlock                    = rb,
        warpSize                        = ws,
        maxThreadsPerBlock              = tb,
        maxBlockSize                    = (t1,t2,t3),
        maxGridSize                     = (g1,g2,g3),
        clockRate                       = cl,
        multiProcessorCount             = pc,
        memPitch                        = mp,
        textureAlignment                = ta,
        computeMode                     = md,
        deviceOverlap                   = ov,
        concurrentKernels               = ck,
        maxTextureDim1D                 = u1,
        maxTextureDim2D                 = (u21,u22),
        maxTextureDim3D                 = (u31,u32,u33),
        eccEnabled                      = ee,
        asyncEngineCount                = ae,
        cacheMemL2                      = l2,
        maxThreadsPerMultiProcessor     = tm,
        memBusWidth                     = mw,
        memClockRate                    = mc,
        tccDriverEnabled                = tc,
        unifiedAddressing               = ua,
        pciInfo                         = PCI pb pd pm,
        kernelExecTimeoutEnabled        = ke,
        integrated                      = tg,
        canMapHostMemory                = hm
      }


--------------------------------------------------------------------------------
-- Device Management
--------------------------------------------------------------------------------

-- |
-- Select the compute device which best matches the given criteria
--
{-# INLINEABLE choose #-}
choose :: DeviceProperties -> IO Device
choose !dev = resultIfOk =<< cudaChooseDevice dev

{-# INLINE cudaChooseDevice #-}
cudaChooseDevice :: (DeviceProperties) -> IO ((Status), (Int))
cudaChooseDevice a2 =
  alloca $ \a1' -> 
  withDevProp a2 $ \a2' -> 
  cudaChooseDevice'_ a1' a2' >>= \res ->
  let {res' = cToEnum res} in
  peekIntConv  a1'>>= \a1'' -> 
  return (res', a1'')

{-# LINE 197 "./Foreign/CUDA/Runtime/Device.chs" #-}

  where
      withDevProp = with


-- |
-- Returns which device is currently being used
--
{-# INLINEABLE get #-}
get :: IO Device
get = resultIfOk =<< cudaGetDevice

{-# INLINE cudaGetDevice #-}
cudaGetDevice :: IO ((Status), (Int))
cudaGetDevice =
  alloca $ \a1' -> 
  cudaGetDevice'_ a1' >>= \res ->
  let {res' = cToEnum res} in
  peekIntConv  a1'>>= \a1'' -> 
  return (res', a1'')

{-# LINE 211 "./Foreign/CUDA/Runtime/Device.chs" #-}



-- |
-- Returns the number of devices available for execution, with compute
-- capability >= 1.0
--
{-# INLINEABLE count #-}
count :: IO Int
count = resultIfOk =<< cudaGetDeviceCount

{-# INLINE cudaGetDeviceCount #-}
cudaGetDeviceCount :: IO ((Status), (Int))
cudaGetDeviceCount =
  alloca $ \a1' -> 
  cudaGetDeviceCount'_ a1' >>= \res ->
  let {res' = cToEnum res} in
  peekIntConv  a1'>>= \a1'' -> 
  return (res', a1'')

{-# LINE 224 "./Foreign/CUDA/Runtime/Device.chs" #-}



-- |
-- Return information about the selected compute device
--
{-# INLINEABLE props #-}
props :: Device -> IO DeviceProperties
props !n = resultIfOk =<< cudaGetDeviceProperties n

{-# INLINE cudaGetDeviceProperties #-}
cudaGetDeviceProperties :: (Int) -> IO ((Status), (DeviceProperties))
cudaGetDeviceProperties a2 =
  alloca $ \a1' -> 
  let {a2' = fromIntegral a2} in 
  cudaGetDeviceProperties'_ a1' a2' >>= \res ->
  let {res' = cToEnum res} in
  peek  a1'>>= \a1'' -> 
  return (res', a1'')

{-# LINE 237 "./Foreign/CUDA/Runtime/Device.chs" #-}



-- |
-- Set device to be used for GPU execution
--
{-# INLINEABLE set #-}
set :: Device -> IO ()
set !n = nothingIfOk =<< cudaSetDevice n

{-# INLINE cudaSetDevice #-}
cudaSetDevice :: (Int) -> IO ((Status))
cudaSetDevice a1 =
  let {a1' = fromIntegral a1} in 
  cudaSetDevice'_ a1' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 249 "./Foreign/CUDA/Runtime/Device.chs" #-}



-- |
-- Set flags to be used for device executions
--
{-# INLINEABLE setFlags #-}
setFlags :: [DeviceFlag] -> IO ()
setFlags !f = nothingIfOk =<< cudaSetDeviceFlags (combineBitMasks f)

{-# INLINE cudaSetDeviceFlags #-}
cudaSetDeviceFlags :: (Int) -> IO ((Status))
cudaSetDeviceFlags a1 =
  let {a1' = fromIntegral a1} in 
  cudaSetDeviceFlags'_ a1' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 261 "./Foreign/CUDA/Runtime/Device.chs" #-}



-- |
-- Set list of devices for CUDA execution in priority order
--
{-# INLINEABLE setOrder #-}
setOrder :: [Device] -> IO ()
setOrder !l = nothingIfOk =<< cudaSetValidDevices l (length l)

{-# INLINE cudaSetValidDevices #-}
cudaSetValidDevices :: ([Int]) -> (Int) -> IO ((Status))
cudaSetValidDevices a1 a2 =
  withArrayIntConv a1 $ \a1' -> 
  let {a2' = fromIntegral a2} in 
  cudaSetValidDevices'_ a1' a2' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 274 "./Foreign/CUDA/Runtime/Device.chs" #-}

  where
      withArrayIntConv = withArray . map cIntConv

-- |
-- Block until the device has completed all preceding requested tasks. Returns
-- an error if one of the tasks fails.
--
{-# INLINEABLE sync #-}
sync :: IO ()
{-# INLINE cudaDeviceSynchronize #-}
sync = nothingIfOk =<< cudaDeviceSynchronize
cudaDeviceSynchronize :: IO ((Status))
cudaDeviceSynchronize =
  cudaDeviceSynchronize'_ >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 292 "./Foreign/CUDA/Runtime/Device.chs" #-}


-- |
-- Explicitly destroys and cleans up all runtime resources associated with the
-- current device in the current process. Any subsequent API call will
-- reinitialise the device.
--
-- Note that this function will reset the device immediately. It is the caller’s
-- responsibility to ensure that the device is not being accessed by any other
-- host threads from the process when this function is called.
--
{-# INLINEABLE reset #-}
reset :: IO ()
{-# INLINE cudaDeviceReset #-}
reset = nothingIfOk =<< cudaDeviceReset
cudaDeviceReset :: IO ((Status))
cudaDeviceReset =
  cudaDeviceReset'_ >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 313 "./Foreign/CUDA/Runtime/Device.chs" #-}



--------------------------------------------------------------------------------
-- Peer Access
--------------------------------------------------------------------------------

-- |
-- Possible option values for direct peer memory access
--
data PeerFlag
instance Enum PeerFlag where
  toEnum   x = case x of {}
  fromEnum x = case x of {}

-- |
-- Queries if the first device can directly access the memory of the second. If
-- direct access is possible, it can then be enabled with 'add'. Requires
-- cuda-4.0.
--
{-# INLINEABLE accessible #-}
accessible :: Device -> Device -> IO Bool
accessible !dev !peer = resultIfOk =<< cudaDeviceCanAccessPeer dev peer

{-# INLINE cudaDeviceCanAccessPeer #-}
cudaDeviceCanAccessPeer :: (Device) -> (Device) -> IO ((Status), (Bool))
cudaDeviceCanAccessPeer a2 a3 =
  alloca $ \a1' -> 
  let {a2' = cIntConv a2} in 
  let {a3' = cIntConv a3} in 
  cudaDeviceCanAccessPeer'_ a1' a2' a3' >>= \res ->
  let {res' = cToEnum res} in
  peekBool  a1'>>= \a1'' -> 
  return (res', a1'')

{-# LINE 345 "./Foreign/CUDA/Runtime/Device.chs" #-}


-- |
-- If the devices of both the current and supplied contexts support unified
-- addressing, then enable allocations in the supplied context to be accessible
-- by the current context. Requires cuda-4.0.
--
{-# INLINEABLE add #-}
add :: Device -> [PeerFlag] -> IO ()
add !dev !flags = nothingIfOk =<< cudaDeviceEnablePeerAccess dev flags

{-# INLINE cudaDeviceEnablePeerAccess #-}
cudaDeviceEnablePeerAccess :: (Device) -> ([PeerFlag]) -> IO ((Status))
cudaDeviceEnablePeerAccess a1 a2 =
  let {a1' = cIntConv a1} in 
  let {a2' = combineBitMasks a2} in 
  cudaDeviceEnablePeerAccess'_ a1' a2' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 363 "./Foreign/CUDA/Runtime/Device.chs" #-}



-- |
-- Disable direct memory access from the current context to the supplied
-- context. Requires cuda-4.0.
--
{-# INLINEABLE remove #-}
remove :: Device -> IO ()
remove !dev = nothingIfOk =<< cudaDeviceDisablePeerAccess dev

{-# INLINE cudaDeviceDisablePeerAccess #-}
cudaDeviceDisablePeerAccess :: (Device) -> IO ((Status))
cudaDeviceDisablePeerAccess a1 =
  let {a1' = cIntConv a1} in 
  cudaDeviceDisablePeerAccess'_ a1' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 380 "./Foreign/CUDA/Runtime/Device.chs" #-}



--------------------------------------------------------------------------------
-- Cache Configuration
--------------------------------------------------------------------------------

-- |
-- Device limit flags
--
data Limit = Stacksize
           | Printffifosize
           | Mallocheapsize
           | Devruntimesyncdepth
           | Devruntimependinglaunchcount
  deriving (Eq,Show)
instance Enum Limit where
  succ Stacksize = Printffifosize
  succ Printffifosize = Mallocheapsize
  succ Mallocheapsize = Devruntimesyncdepth
  succ Devruntimesyncdepth = Devruntimependinglaunchcount
  succ Devruntimependinglaunchcount = error "Limit.succ: Devruntimependinglaunchcount has no successor"

  pred Printffifosize = Stacksize
  pred Mallocheapsize = Printffifosize
  pred Devruntimesyncdepth = Mallocheapsize
  pred Devruntimependinglaunchcount = Devruntimesyncdepth
  pred Stacksize = error "Limit.pred: Stacksize has no predecessor"

  enumFromTo from to = go from
    where
      end = fromEnum to
      go v = case compare (fromEnum v) end of
                 LT -> v : go (succ v)
                 EQ -> [v]
                 GT -> []

  enumFrom from = enumFromTo from Devruntimependinglaunchcount

  fromEnum Stacksize = 0
  fromEnum Printffifosize = 1
  fromEnum Mallocheapsize = 2
  fromEnum Devruntimesyncdepth = 3
  fromEnum Devruntimependinglaunchcount = 4

  toEnum 0 = Stacksize
  toEnum 1 = Printffifosize
  toEnum 2 = Mallocheapsize
  toEnum 3 = Devruntimesyncdepth
  toEnum 4 = Devruntimependinglaunchcount
  toEnum unmatched = error ("Limit.toEnum: Cannot match " ++ show unmatched)

{-# LINE 396 "./Foreign/CUDA/Runtime/Device.chs" #-}



-- |
-- Query compute 2.0 call stack limits. Requires cuda-3.1.
--
{-# INLINEABLE getLimit #-}
getLimit :: Limit -> IO Int
getLimit !l = resultIfOk =<< cudaDeviceGetLimit l

{-# INLINE cudaDeviceGetLimit #-}
cudaDeviceGetLimit :: (Limit) -> IO ((Status), (Int))
cudaDeviceGetLimit a2 =
  alloca $ \a1' -> 
  let {a2' = cFromEnum a2} in 
  cudaDeviceGetLimit'_ a1' a2' >>= \res ->
  let {res' = cToEnum res} in
  peekIntConv  a1'>>= \a1'' -> 
  return (res', a1'')

{-# LINE 420 "./Foreign/CUDA/Runtime/Device.chs" #-}



-- |
-- Set compute 2.0 call stack limits. Requires cuda-3.1.
--
{-# INLINEABLE setLimit #-}
setLimit :: Limit -> Int -> IO ()
setLimit !l !n = nothingIfOk =<< cudaDeviceSetLimit l n

{-# INLINE cudaDeviceSetLimit #-}
cudaDeviceSetLimit :: (Limit) -> (Int) -> IO ((Status))
cudaDeviceSetLimit a1 a2 =
  let {a1' = cFromEnum a1} in 
  let {a2' = cIntConv a2} in 
  cudaDeviceSetLimit'_ a1' a2' >>= \res ->
  let {res' = cToEnum res} in
  return (res')

{-# LINE 444 "./Foreign/CUDA/Runtime/Device.chs" #-}



foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaChooseDevice"
  cudaChooseDevice'_ :: ((Ptr CInt) -> ((Ptr (DeviceProperties)) -> (IO CInt)))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaGetDevice"
  cudaGetDevice'_ :: ((Ptr CInt) -> (IO CInt))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaGetDeviceCount"
  cudaGetDeviceCount'_ :: ((Ptr CInt) -> (IO CInt))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaGetDeviceProperties"
  cudaGetDeviceProperties'_ :: ((Ptr (DeviceProperties)) -> (CInt -> (IO CInt)))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaSetDevice"
  cudaSetDevice'_ :: (CInt -> (IO CInt))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaSetDeviceFlags"
  cudaSetDeviceFlags'_ :: (CUInt -> (IO CInt))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaSetValidDevices"
  cudaSetValidDevices'_ :: ((Ptr CInt) -> (CInt -> (IO CInt)))

foreign import ccall safe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceSynchronize"
  cudaDeviceSynchronize'_ :: (IO CInt)

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceReset"
  cudaDeviceReset'_ :: (IO CInt)

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceCanAccessPeer"
  cudaDeviceCanAccessPeer'_ :: ((Ptr CInt) -> (CInt -> (CInt -> (IO CInt))))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceEnablePeerAccess"
  cudaDeviceEnablePeerAccess'_ :: (CInt -> (CUInt -> (IO CInt)))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceDisablePeerAccess"
  cudaDeviceDisablePeerAccess'_ :: (CInt -> (IO CInt))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceGetLimit"
  cudaDeviceGetLimit'_ :: ((Ptr CULong) -> (CInt -> (IO CInt)))

foreign import ccall unsafe "Foreign/CUDA/Runtime/Device.chs.h cudaDeviceSetLimit"
  cudaDeviceSetLimit'_ :: (CInt -> (CULong -> (IO CInt)))