module Foreign.CUDA.Driver.Module.Base (
Module(..),
JITOption(..), JITTarget(..), JITResult(..), JITFallback(..), JITInputType(..),
JITOptionInternal(..),
loadFile,
loadData, loadDataFromPtr,
loadDataEx, loadDataFromPtrEx,
unload,
jitOptionUnpack, jitTargetOfCompute,
) where
import Foreign.CUDA.Analysis.Device
import Foreign.CUDA.Driver.Error
import Foreign.CUDA.Internal.C2HS
import Foreign
import Foreign.C
import Unsafe.Coerce
import Control.Monad ( liftM )
import Data.ByteString.Char8 ( ByteString )
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Internal as B
newtype Module = Module { useModule :: ((Ptr ()))}
deriving (Eq, Show)
data JITOption
= MaxRegisters !Int
| ThreadsPerBlock !Int
| OptimisationLevel !Int
| Target !Compute
| FallbackStrategy !JITFallback
| GenerateDebugInfo
| GenerateLineInfo
| Verbose
deriving (Show)
data JITResult = JITResult
{
jitTime :: !Float,
jitInfoLog :: !ByteString,
jitModule :: !Module
}
deriving (Show)
data JITTarget = Compute10
| Compute11
| Compute12
| Compute13
| Compute20
| Compute21
| Compute30
| Compute32
| Compute35
| Compute37
| Compute50
| Compute52
deriving (Eq,Show)
instance Enum JITTarget where
succ Compute10 = Compute11
succ Compute11 = Compute12
succ Compute12 = Compute13
succ Compute13 = Compute20
succ Compute20 = Compute21
succ Compute21 = Compute30
succ Compute30 = Compute32
succ Compute32 = Compute35
succ Compute35 = Compute37
succ Compute37 = Compute50
succ Compute50 = Compute52
succ Compute52 = error "JITTarget.succ: Compute52 has no successor"
pred Compute11 = Compute10
pred Compute12 = Compute11
pred Compute13 = Compute12
pred Compute20 = Compute13
pred Compute21 = Compute20
pred Compute30 = Compute21
pred Compute32 = Compute30
pred Compute35 = Compute32
pred Compute37 = Compute35
pred Compute50 = Compute37
pred Compute52 = Compute50
pred Compute10 = error "JITTarget.pred: Compute10 has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from Compute52
fromEnum Compute10 = 10
fromEnum Compute11 = 11
fromEnum Compute12 = 12
fromEnum Compute13 = 13
fromEnum Compute20 = 20
fromEnum Compute21 = 21
fromEnum Compute30 = 30
fromEnum Compute32 = 32
fromEnum Compute35 = 35
fromEnum Compute37 = 37
fromEnum Compute50 = 50
fromEnum Compute52 = 52
toEnum 10 = Compute10
toEnum 11 = Compute11
toEnum 12 = Compute12
toEnum 13 = Compute13
toEnum 20 = Compute20
toEnum 21 = Compute21
toEnum 30 = Compute30
toEnum 32 = Compute32
toEnum 35 = Compute35
toEnum 37 = Compute37
toEnum 50 = Compute50
toEnum 52 = Compute52
toEnum unmatched = error ("JITTarget.toEnum: Cannot match " ++ show unmatched)
data JITFallback = PreferPTX
| PreferBinary
deriving (Eq,Show)
instance Enum JITFallback where
succ PreferPTX = PreferBinary
succ PreferBinary = error "JITFallback.succ: PreferBinary has no successor"
pred PreferBinary = PreferPTX
pred PreferPTX = error "JITFallback.pred: PreferPTX has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from PreferBinary
fromEnum PreferPTX = 0
fromEnum PreferBinary = 1
toEnum 0 = PreferPTX
toEnum 1 = PreferBinary
toEnum unmatched = error ("JITFallback.toEnum: Cannot match " ++ show unmatched)
data JITInputType = Cubin
| PTX
| Fatbinary
| Object
| Library
| CuJitNumInputTypes
deriving (Eq,Show)
instance Enum JITInputType where
succ Cubin = PTX
succ PTX = Fatbinary
succ Fatbinary = Object
succ Object = Library
succ Library = CuJitNumInputTypes
succ CuJitNumInputTypes = error "JITInputType.succ: CuJitNumInputTypes has no successor"
pred PTX = Cubin
pred Fatbinary = PTX
pred Object = Fatbinary
pred Library = Object
pred CuJitNumInputTypes = Library
pred Cubin = error "JITInputType.pred: Cubin has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from CuJitNumInputTypes
fromEnum Cubin = 0
fromEnum PTX = 1
fromEnum Fatbinary = 2
fromEnum Object = 3
fromEnum Library = 4
fromEnum CuJitNumInputTypes = 5
toEnum 0 = Cubin
toEnum 1 = PTX
toEnum 2 = Fatbinary
toEnum 3 = Object
toEnum 4 = Library
toEnum 5 = CuJitNumInputTypes
toEnum unmatched = error ("JITInputType.toEnum: Cannot match " ++ show unmatched)
data JITOptionInternal = JIT_MAX_REGISTERS
| JIT_THREADS_PER_BLOCK
| JIT_WALL_TIME
| JIT_INFO_LOG_BUFFER
| JIT_INFO_LOG_BUFFER_SIZE_BYTES
| JIT_ERROR_LOG_BUFFER
| JIT_ERROR_LOG_BUFFER_SIZE_BYTES
| JIT_OPTIMIZATION_LEVEL
| JIT_TARGET_FROM_CUCONTEXT
| JIT_TARGET
| JIT_FALLBACK_STRATEGY
| JIT_GENERATE_DEBUG_INFO
| JIT_LOG_VERBOSE
| JIT_GENERATE_LINE_INFO
| JIT_CACHE_MODE
| JIT_NUM_OPTIONS
deriving (Eq,Show)
instance Enum JITOptionInternal where
succ JIT_MAX_REGISTERS = JIT_THREADS_PER_BLOCK
succ JIT_THREADS_PER_BLOCK = JIT_WALL_TIME
succ JIT_WALL_TIME = JIT_INFO_LOG_BUFFER
succ JIT_INFO_LOG_BUFFER = JIT_INFO_LOG_BUFFER_SIZE_BYTES
succ JIT_INFO_LOG_BUFFER_SIZE_BYTES = JIT_ERROR_LOG_BUFFER
succ JIT_ERROR_LOG_BUFFER = JIT_ERROR_LOG_BUFFER_SIZE_BYTES
succ JIT_ERROR_LOG_BUFFER_SIZE_BYTES = JIT_OPTIMIZATION_LEVEL
succ JIT_OPTIMIZATION_LEVEL = JIT_TARGET_FROM_CUCONTEXT
succ JIT_TARGET_FROM_CUCONTEXT = JIT_TARGET
succ JIT_TARGET = JIT_FALLBACK_STRATEGY
succ JIT_FALLBACK_STRATEGY = JIT_GENERATE_DEBUG_INFO
succ JIT_GENERATE_DEBUG_INFO = JIT_LOG_VERBOSE
succ JIT_LOG_VERBOSE = JIT_GENERATE_LINE_INFO
succ JIT_GENERATE_LINE_INFO = JIT_CACHE_MODE
succ JIT_CACHE_MODE = JIT_NUM_OPTIONS
succ JIT_NUM_OPTIONS = error "JITOptionInternal.succ: JIT_NUM_OPTIONS has no successor"
pred JIT_THREADS_PER_BLOCK = JIT_MAX_REGISTERS
pred JIT_WALL_TIME = JIT_THREADS_PER_BLOCK
pred JIT_INFO_LOG_BUFFER = JIT_WALL_TIME
pred JIT_INFO_LOG_BUFFER_SIZE_BYTES = JIT_INFO_LOG_BUFFER
pred JIT_ERROR_LOG_BUFFER = JIT_INFO_LOG_BUFFER_SIZE_BYTES
pred JIT_ERROR_LOG_BUFFER_SIZE_BYTES = JIT_ERROR_LOG_BUFFER
pred JIT_OPTIMIZATION_LEVEL = JIT_ERROR_LOG_BUFFER_SIZE_BYTES
pred JIT_TARGET_FROM_CUCONTEXT = JIT_OPTIMIZATION_LEVEL
pred JIT_TARGET = JIT_TARGET_FROM_CUCONTEXT
pred JIT_FALLBACK_STRATEGY = JIT_TARGET
pred JIT_GENERATE_DEBUG_INFO = JIT_FALLBACK_STRATEGY
pred JIT_LOG_VERBOSE = JIT_GENERATE_DEBUG_INFO
pred JIT_GENERATE_LINE_INFO = JIT_LOG_VERBOSE
pred JIT_CACHE_MODE = JIT_GENERATE_LINE_INFO
pred JIT_NUM_OPTIONS = JIT_CACHE_MODE
pred JIT_MAX_REGISTERS = error "JITOptionInternal.pred: JIT_MAX_REGISTERS has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from JIT_NUM_OPTIONS
fromEnum JIT_MAX_REGISTERS = 0
fromEnum JIT_THREADS_PER_BLOCK = 1
fromEnum JIT_WALL_TIME = 2
fromEnum JIT_INFO_LOG_BUFFER = 3
fromEnum JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4
fromEnum JIT_ERROR_LOG_BUFFER = 5
fromEnum JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6
fromEnum JIT_OPTIMIZATION_LEVEL = 7
fromEnum JIT_TARGET_FROM_CUCONTEXT = 8
fromEnum JIT_TARGET = 9
fromEnum JIT_FALLBACK_STRATEGY = 10
fromEnum JIT_GENERATE_DEBUG_INFO = 11
fromEnum JIT_LOG_VERBOSE = 12
fromEnum JIT_GENERATE_LINE_INFO = 13
fromEnum JIT_CACHE_MODE = 14
fromEnum JIT_NUM_OPTIONS = 15
toEnum 0 = JIT_MAX_REGISTERS
toEnum 1 = JIT_THREADS_PER_BLOCK
toEnum 2 = JIT_WALL_TIME
toEnum 3 = JIT_INFO_LOG_BUFFER
toEnum 4 = JIT_INFO_LOG_BUFFER_SIZE_BYTES
toEnum 5 = JIT_ERROR_LOG_BUFFER
toEnum 6 = JIT_ERROR_LOG_BUFFER_SIZE_BYTES
toEnum 7 = JIT_OPTIMIZATION_LEVEL
toEnum 8 = JIT_TARGET_FROM_CUCONTEXT
toEnum 9 = JIT_TARGET
toEnum 10 = JIT_FALLBACK_STRATEGY
toEnum 11 = JIT_GENERATE_DEBUG_INFO
toEnum 12 = JIT_LOG_VERBOSE
toEnum 13 = JIT_GENERATE_LINE_INFO
toEnum 14 = JIT_CACHE_MODE
toEnum 15 = JIT_NUM_OPTIONS
toEnum unmatched = error ("JITOptionInternal.toEnum: Cannot match " ++ show unmatched)
loadFile :: FilePath -> IO Module
loadFile !ptx = resultIfOk =<< cuModuleLoad ptx
cuModuleLoad :: (FilePath) -> IO ((Status), (Module))
cuModuleLoad a2 =
alloca $ \a1' ->
withCString a2 $ \a2' ->
cuModuleLoad'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peekMod a1'>>= \a1'' ->
return (res', a1'')
loadData :: ByteString -> IO Module
loadData !img =
B.useAsCString img (\p -> loadDataFromPtr (castPtr p))
loadDataFromPtr :: Ptr Word8 -> IO Module
loadDataFromPtr !img = resultIfOk =<< cuModuleLoadData img
cuModuleLoadData :: (Ptr Word8) -> IO (( Status), (Module))
cuModuleLoadData a2 =
alloca $ \a1' ->
let {a2' = castPtr a2} in
cuModuleLoadData'_ a1' a2' >>= \res ->
let {res' = cToEnum res} in
peekMod a1'>>= \a1'' ->
return (res', a1'')
loadDataEx :: ByteString -> [JITOption] -> IO JITResult
loadDataEx !img !options =
B.useAsCString img (\p -> loadDataFromPtrEx (castPtr p) options)
loadDataFromPtrEx :: Ptr Word8 -> [JITOption] -> IO JITResult
loadDataFromPtrEx !img !options = do
let logSize = 2048
fp_ilog <- B.mallocByteString logSize
allocaArray logSize $ \p_elog -> do
withForeignPtr fp_ilog $ \p_ilog -> do
let (opt,val) = unzip $
[ (JIT_WALL_TIME, 0)
, (JIT_INFO_LOG_BUFFER_SIZE_BYTES, logSize)
, (JIT_ERROR_LOG_BUFFER_SIZE_BYTES, logSize)
, (JIT_INFO_LOG_BUFFER, unsafeCoerce (p_ilog :: CString))
, (JIT_ERROR_LOG_BUFFER, unsafeCoerce (p_elog :: CString))
]
++
map jitOptionUnpack options
withArrayLen (map cFromEnum opt) $ \i p_opts -> do
withArray (map unsafeCoerce val) $ \ p_vals -> do
(s,mdl) <- cuModuleLoadDataEx img i p_opts p_vals
case s of
Success -> do
time <- peek (castPtr p_vals)
infoLog <- B.fromForeignPtr (castForeignPtr fp_ilog) 0 `fmap` c_strnlen p_ilog logSize
return $! JITResult time infoLog mdl
_ -> do
errLog <- peekCString p_elog
cudaError (unlines [describe s, errLog])
cuModuleLoadDataEx :: (Ptr Word8) -> (Int) -> (Ptr CInt) -> (Ptr (Ptr ())) -> IO ((Status), (Module))
cuModuleLoadDataEx a2 a3 a4 a5 =
alloca $ \a1' ->
let {a2' = castPtr a2} in
let {a3' = fromIntegral a3} in
let {a4' = id a4} in
let {a5' = id a5} in
cuModuleLoadDataEx'_ a1' a2' a3' a4' a5' >>= \res ->
let {res' = cToEnum res} in
peekMod a1'>>= \a1'' ->
return (res', a1'')
unload :: Module -> IO ()
unload !m = nothingIfOk =<< cuModuleUnload m
cuModuleUnload :: (Module) -> IO ((Status))
cuModuleUnload a1 =
let {a1' = useModule a1} in
cuModuleUnload'_ a1' >>= \res ->
let {res' = cToEnum res} in
return (res')
peekMod :: Ptr ((Ptr ())) -> IO Module
peekMod = liftM Module . peek
jitOptionUnpack :: JITOption -> (JITOptionInternal, Int)
jitOptionUnpack (MaxRegisters x) = (JIT_MAX_REGISTERS, x)
jitOptionUnpack (ThreadsPerBlock x) = (JIT_THREADS_PER_BLOCK, x)
jitOptionUnpack (OptimisationLevel x) = (JIT_OPTIMIZATION_LEVEL, x)
jitOptionUnpack (Target x) = (JIT_TARGET, fromEnum (jitTargetOfCompute x))
jitOptionUnpack (FallbackStrategy x) = (JIT_FALLBACK_STRATEGY, fromEnum x)
jitOptionUnpack GenerateDebugInfo = (JIT_GENERATE_DEBUG_INFO, fromEnum True)
jitOptionUnpack GenerateLineInfo = (JIT_GENERATE_LINE_INFO, fromEnum True)
jitOptionUnpack Verbose = (JIT_LOG_VERBOSE, fromEnum True)
jitTargetOfCompute :: Compute -> JITTarget
jitTargetOfCompute (Compute 1 0) = Compute10
jitTargetOfCompute (Compute 1 1) = Compute11
jitTargetOfCompute (Compute 1 2) = Compute12
jitTargetOfCompute (Compute 1 3) = Compute13
jitTargetOfCompute (Compute 2 0) = Compute20
jitTargetOfCompute (Compute 2 1) = Compute21
jitTargetOfCompute (Compute 3 0) = Compute30
jitTargetOfCompute (Compute 3 5) = Compute35
jitTargetOfCompute (Compute 3 2) = Compute32
jitTargetOfCompute (Compute 5 0) = Compute50
jitTargetOfCompute (Compute 3 7) = Compute37
jitTargetOfCompute (Compute 5 2) = Compute52
jitTargetOfCompute compute = error ("Unknown JIT Target for Compute " ++ show compute)
foreign import ccall unsafe "string.h strnlen" c_strnlen'
:: CString -> CSize -> IO CSize
c_strnlen :: CString -> Int -> IO Int
c_strnlen str maxlen = cIntConv `fmap` c_strnlen' str (cIntConv maxlen)
foreign import ccall unsafe "Foreign/CUDA/Driver/Module/Base.chs.h cuModuleLoad"
cuModuleLoad'_ :: ((Ptr (Ptr ())) -> ((Ptr CChar) -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Module/Base.chs.h cuModuleLoadData"
cuModuleLoadData'_ :: ((Ptr (Ptr ())) -> ((Ptr ()) -> (IO CInt)))
foreign import ccall unsafe "Foreign/CUDA/Driver/Module/Base.chs.h cuModuleLoadDataEx"
cuModuleLoadDataEx'_ :: ((Ptr (Ptr ())) -> ((Ptr ()) -> (CUInt -> ((Ptr CInt) -> ((Ptr (Ptr ())) -> (IO CInt))))))
foreign import ccall unsafe "Foreign/CUDA/Driver/Module/Base.chs.h cuModuleUnload"
cuModuleUnload'_ :: ((Ptr ()) -> (IO CInt))