-- GENERATED by C->Haskell Compiler, version 0.28.6 Switcheroo, 25 November 2017 (Haskell)
-- Edit the ORIGNAL .chs file instead!


{-# LINE 1 "src/Foreign/CUDA/Driver/Graph/Exec.chs" #-}
{-# LANGUAGE BangPatterns             #-}
{-# LANGUAGE CPP                      #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE TemplateHaskell          #-}
--------------------------------------------------------------------------------
-- |
-- Module    : Foreign.CUDA.Driver.Graph.Exec
-- Copyright : [2018] Trevor L. McDonell
-- License   : BSD
--
-- Graph execution functions for the low-level driver interface
--
-- Requires CUDA-10
--
--------------------------------------------------------------------------------

module Foreign.CUDA.Driver.Graph.Exec (

  Executable(..),

  -- ** Execution
  launch,
  instantiate,
  destroy,
  setKernel,

) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.Ptr as C2HSImp





{-# LINE 30 "src/Foreign/CUDA/Driver/Graph/Exec.chs" #-}


import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Driver.Exec                           ( Fun(..), FunParam(..) )
import Foreign.CUDA.Driver.Graph.Base
import Foreign.CUDA.Driver.Stream                         ( Stream(..) )
import Foreign.CUDA.Internal.C2HS

import Foreign
import Foreign.C

import Control.Monad                                      ( liftM )
import Data.ByteString.Char8                              ( ByteString )
import qualified Data.ByteString.Char8                    as B
import qualified Data.ByteString.Internal                 as B


--------------------------------------------------------------------------------
-- Graph execution
--------------------------------------------------------------------------------

-- | Execute a graph in the given stream. Only one instance may execute at
-- a time; to execute a graph concurrently, it must be 'instantiate'd into
-- multiple executables.
--
-- Requires CUDA-10.0
--
-- <https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__GRAPH.html#group__CUDA__GRAPH_1g6b2dceb3901e71a390d2bd8b0491e471>
--
-- @since 0.10.0.0
--
{-# INLINEABLE launch #-}
launch :: (Executable) -> (Stream) -> IO ()
launch a1 a2 =
  let {a1' = useExecutable a1} in
  let {a2' = useStream a2} in
  launch'_ a1' a2' >>= \res ->
  checkStatus res >>
  return ()

{-# LINE 71 "src/Foreign/CUDA/Driver/Graph/Exec.chs" #-}



-- | Instantiate the task graph description of a program into an executable
-- graph.
--
-- Requires CUDA-10.0
--
-- <https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__GRAPH.html#group__CUDA__GRAPH_1g433ae118a751c9f2087f53d7add7bc2c>
--
-- @since 0.10.0.0
--
{-# INLINEABLE instantiate #-}
instantiate :: Graph -> IO Executable
instantiate !g = do
  let logSize = 2048
  allocaArray logSize $ \p_elog -> do
    (s, e, n) <- cuGraphInstantiate g p_elog logSize
    --
    case s of
      Success -> return e
      _       -> do
        errLog <- peekCStringLen (p_elog, logSize)
        cudaErrorIO (unlines [describe s, "phErrorNode = " ++ show n, errLog])

{-# INLINE cuGraphInstantiate #-}
cuGraphInstantiate :: (Graph) -> (CString) -> (Int) -> IO ((Status), (Executable), (Maybe Node))
cuGraphInstantiate a2 a4 a5 =
  alloca $ \a1' ->
  let {a2' = useGraph a2} in
  alloca $ \a3' ->
  let {a4' = castPtr a4} in
  let {a5' = fromIntegral a5} in
  cuGraphInstantiate'_ a1' a2' a3' a4' a5' >>= \res ->
  let {res' = cToEnum res} in
  peekExecutable  a1'>>= \a1'' ->
  peekErrNode  a3'>>= \a3'' ->
  return (res', a1'', a3'')

{-# LINE 107 "src/Foreign/CUDA/Driver/Graph/Exec.chs" #-}

  where
    peekExecutable  = liftM Executable . peek
    peekErrNode p   = if p == nullPtr
                        then return Nothing
                        else liftM (Just . Node) (peek p)


-- | Update the parameters for a kernel node in the given executable graph
--
-- Requires CUDA-10.1
--
-- <https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__GRAPH.html#group__CUDA__GRAPH_1gd84243569e4c3d6356b9f2eea20ed48c>
--
-- @since 0.10.1.0
--
setKernel
    :: Executable
    -> Node
    -> Fun
    -> (Int, Int, Int)  -- ^ grid dimension
    -> (Int, Int, Int)  -- ^ thread block dimensions
    -> Int              -- ^ shared memory (bytes)
    -> [FunParam]
    -> IO ()
setKernel = requireSDK 'setKernel 10.1


-- | Destroy an executable graph
--
-- Requires CUDA-10.0
--
-- <https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__GRAPH.html#group__CUDA__GRAPH_1ga32ad4944cc5d408158207c978bc43a7>
--
-- @since 0.10.0.0
--
{-# INLINEABLE destroy #-}
destroy :: (Executable) -> IO ()
destroy a1 =
  let {a1' = useExecutable a1} in
  destroy'_ a1' >>= \res ->
  checkStatus res >>
  return ()

{-# LINE 191 "src/Foreign/CUDA/Driver/Graph/Exec.chs" #-}



foreign import ccall unsafe "Foreign/CUDA/Driver/Graph/Exec.chs.h cuGraphLaunch"
  launch'_ :: ((C2HSImp.Ptr ()) -> ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt)))

foreign import ccall unsafe "Foreign/CUDA/Driver/Graph/Exec.chs.h cuGraphInstantiate"
  cuGraphInstantiate'_ :: ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> ((C2HSImp.Ptr ()) -> ((C2HSImp.Ptr (C2HSImp.Ptr ())) -> ((C2HSImp.Ptr C2HSImp.CChar) -> (C2HSImp.CULong -> (IO C2HSImp.CInt))))))

foreign import ccall unsafe "Foreign/CUDA/Driver/Graph/Exec.chs.h cuGraphExecDestroy"
  destroy'_ :: ((C2HSImp.Ptr ()) -> (IO C2HSImp.CInt))