module Foreign.CUDA.Analysis.Device
(
Compute(..), ComputeMode(..),
DeviceProperties(..), DeviceResources(..), Allocation(..), PCI(..),
deviceResources
)
where
import Data.Int
import Debug.Trace
data ComputeMode = Default
| Exclusive
| Prohibited
| ExclusiveProcess
deriving (Eq,Show)
instance Enum ComputeMode where
succ Default = Exclusive
succ Exclusive = Prohibited
succ Prohibited = ExclusiveProcess
succ ExclusiveProcess = error "ComputeMode.succ: ExclusiveProcess has no successor"
pred Exclusive = Default
pred Prohibited = Exclusive
pred ExclusiveProcess = Prohibited
pred Default = error "ComputeMode.pred: Default has no predecessor"
enumFromTo from to = go from
where
end = fromEnum to
go v = case compare (fromEnum v) end of
LT -> v : go (succ v)
EQ -> [v]
GT -> []
enumFrom from = enumFromTo from ExclusiveProcess
fromEnum Default = 0
fromEnum Exclusive = 1
fromEnum Prohibited = 2
fromEnum ExclusiveProcess = 3
toEnum 0 = Default
toEnum 1 = Exclusive
toEnum 2 = Prohibited
toEnum 3 = ExclusiveProcess
toEnum unmatched = error ("ComputeMode.toEnum: Cannot match " ++ show unmatched)
data Compute = Compute !Int !Int
deriving Eq
instance Show Compute where
show (Compute major minor) = show major ++ "." ++ show minor
instance Ord Compute where
compare (Compute m1 n1) (Compute m2 n2) =
case compare m1 m2 of
EQ -> compare n1 n2
x -> x
data DeviceProperties = DeviceProperties
{
deviceName :: !String,
computeCapability :: !Compute,
totalGlobalMem :: !Int64,
totalConstMem :: !Int64,
sharedMemPerBlock :: !Int64,
regsPerBlock :: !Int,
warpSize :: !Int,
maxThreadsPerBlock :: !Int,
maxThreadsPerMultiProcessor :: !Int,
maxBlockSize :: !(Int,Int,Int),
maxGridSize :: !(Int,Int,Int),
maxTextureDim1D :: !Int,
maxTextureDim2D :: !(Int,Int),
maxTextureDim3D :: !(Int,Int,Int),
clockRate :: !Int,
multiProcessorCount :: !Int,
memPitch :: !Int64,
memBusWidth :: !Int,
memClockRate :: !Int,
textureAlignment :: !Int64,
computeMode :: !ComputeMode,
deviceOverlap :: !Bool,
concurrentKernels :: !Bool,
eccEnabled :: !Bool,
asyncEngineCount :: !Int,
cacheMemL2 :: !Int,
tccDriverEnabled :: !Bool,
pciInfo :: !PCI,
kernelExecTimeoutEnabled :: !Bool,
integrated :: !Bool,
canMapHostMemory :: !Bool,
unifiedAddressing :: !Bool
}
deriving (Show)
data PCI = PCI
{
busID :: !Int,
deviceID :: !Int,
domainID :: !Int
}
deriving (Show)
data Allocation = Warp | Block
data DeviceResources = DeviceResources
{
threadsPerWarp :: !Int,
threadsPerMP :: !Int,
threadBlocksPerMP :: !Int,
warpsPerMP :: !Int,
coresPerMP :: !Int,
sharedMemPerMP :: !Int,
sharedMemAllocUnit :: !Int,
regFileSize :: !Int,
regAllocUnit :: !Int,
regAllocWarp :: !Int,
regPerThread :: !Int,
allocation :: !Allocation
}
deviceResources :: DeviceProperties -> DeviceResources
deviceResources = resources . computeCapability
where
resources compute = case compute of
Compute 1 0 -> DeviceResources 32 768 8 24 8 16384 512 8192 256 2 124 Block
Compute 1 1 -> DeviceResources 32 768 8 24 8 16384 512 8192 256 2 124 Block
Compute 1 2 -> DeviceResources 32 1024 8 32 8 16384 512 16384 512 2 124 Block
Compute 1 3 -> DeviceResources 32 1024 8 32 8 16384 512 16384 512 2 124 Block
Compute 2 0 -> DeviceResources 32 1536 8 48 32 49152 128 32768 64 2 63 Warp
Compute 2 1 -> DeviceResources 32 1536 8 48 48 49152 128 32768 64 2 63 Warp
Compute 3 0 -> DeviceResources 32 2048 16 64 192 49152 256 65536 256 4 63 Warp
Compute 3 2 -> DeviceResources 32 2048 16 64 192 49152 256 65536 256 4 255 Warp
Compute 3 5 -> DeviceResources 32 2048 16 64 192 49152 256 65536 256 4 255 Warp
Compute 3 7 -> DeviceResources 32 2048 16 64 192 114688 256 131072 256 4 266 Warp
Compute 5 0 -> DeviceResources 32 2048 32 64 128 65536 256 65536 256 4 255 Warp
Compute 5 2 -> DeviceResources 32 2048 32 64 128 98304 256 65536 256 4 255 Warp
_ -> trace warning $ resources (Compute 3 0)
where warning = unlines [ "*** Warning: Unknown CUDA device compute capability: " ++ show compute
, "*** Please submit a bug report at https://github.com/tmcdonell/cuda/issues" ]