{-# LINE 1 "src/Foreign/CUDA/Analysis/Device.chs" #-}
module Foreign.CUDA.Analysis.Device (
Compute(..), ComputeMode(..),
DeviceProperties(..), DeviceResources(..), Allocation(..), PCI(..),
deviceResources,
describe
) where
import Data.Int
import Text.Show.Describe
import Debug.Trace
data ComputeMode = Default
| Prohibited
| ExclusiveProcess
deriving (ComputeMode -> ComputeMode -> Bool
(ComputeMode -> ComputeMode -> Bool)
-> (ComputeMode -> ComputeMode -> Bool) -> Eq ComputeMode
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: ComputeMode -> ComputeMode -> Bool
== :: ComputeMode -> ComputeMode -> Bool
$c/= :: ComputeMode -> ComputeMode -> Bool
/= :: ComputeMode -> ComputeMode -> Bool
Eq,Show)
instance Enum ComputeMode where
succ Default = Prohibited
succ Prohibited = ExclusiveProcess
succ ExclusiveProcess = error "ComputeMode.succ: ExclusiveProcess has no successor"
pred :: ComputeMode -> ComputeMode
pred ComputeMode
Prohibited = ComputeMode
Default
pred ComputeMode
ExclusiveProcess = ComputeMode
Prohibited
pred ComputeMode
Default = String -> ComputeMode
forall a. HasCallStack => String -> a
error String
"ComputeMode.pred: Default has no predecessor"
enumFromTo :: ComputeMode -> ComputeMode -> [ComputeMode]
enumFromTo ComputeMode
from ComputeMode
to = ComputeMode -> [ComputeMode]
forall {t}. Enum t => t -> [t]
go ComputeMode
from
where
end :: Int
end = ComputeMode -> Int
forall a. Enum a => a -> Int
fromEnum ComputeMode
to
go :: t -> [t]
go t
v = case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (t -> Int
forall a. Enum a => a -> Int
fromEnum t
v) Int
end of
Ordering
LT -> t
v t -> [t] -> [t]
forall a. a -> [a] -> [a]
: t -> [t]
go (t -> t
forall a. Enum a => a -> a
succ t
v)
Ordering
EQ -> [t
v]
Ordering
GT -> []
enumFrom from = enumFromTo from ExclusiveProcess
fromEnum :: ComputeMode -> Int
fromEnum ComputeMode
Default = Int
0
fromEnum ComputeMode
Prohibited = Int
2
fromEnum ComputeMode
ExclusiveProcess = Int
3
toEnum :: Int -> ComputeMode
toEnum Int
0 = ComputeMode
Default
toEnum Int
2 = ComputeMode
Prohibited
toEnum Int
3 = ComputeMode
ExclusiveProcess
toEnum Int
unmatched = String -> ComputeMode
forall a. HasCallStack => String -> a
error (String
"ComputeMode.toEnum: Cannot match " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
unmatched)
{-# LINE 33 "src/Foreign/CUDA/Analysis/Device.chs" #-}
instance Describe ComputeMode where
describe Default = "Multiple contexts are allowed on the device simultaneously"
describe Prohibited = "No contexts can be created on this device at this time"
describe ExclusiveProcess = "Only one context used by a single process can be present on this device at a time"
data Compute = Compute !Int !Int
deriving Eq
instance Show Compute where
show (Compute major minor) = show major ++ "." ++ show minor
instance Ord Compute where
compare (Compute m1 n1) (Compute m2 n2) =
case compare m1 m2 of
EQ -> compare n1 n2
x -> x
data DeviceProperties = DeviceProperties
{
DeviceProperties -> String
deviceName :: !String
, DeviceProperties -> Compute
computeCapability :: !Compute
, DeviceProperties -> Int64
totalGlobalMem :: !Int64
, DeviceProperties -> Int64
totalConstMem :: !Int64
, DeviceProperties -> Int64
sharedMemPerBlock :: !Int64
, DeviceProperties -> Int
regsPerBlock :: !Int
, DeviceProperties -> Int
warpSize :: !Int
, DeviceProperties -> Int
maxThreadsPerBlock :: !Int
, DeviceProperties -> Int
maxThreadsPerMultiProcessor :: !Int
, DeviceProperties -> (Int, Int, Int)
maxBlockSize :: !(Int,Int,Int)
, DeviceProperties -> (Int, Int, Int)
maxGridSize :: !(Int,Int,Int)
, DeviceProperties -> Int
maxTextureDim1D :: !Int
, DeviceProperties -> (Int, Int)
maxTextureDim2D :: !(Int,Int)
, DeviceProperties -> (Int, Int, Int)
maxTextureDim3D :: !(Int,Int,Int)
, DeviceProperties -> Int
clockRate :: !Int
, DeviceProperties -> Int
multiProcessorCount :: !Int
, DeviceProperties -> Int64
memPitch :: !Int64
, DeviceProperties -> Int
memBusWidth :: !Int
, DeviceProperties -> Int
memClockRate :: !Int
, DeviceProperties -> Int64
textureAlignment :: !Int64
, DeviceProperties -> ComputeMode
computeMode :: !ComputeMode
, DeviceProperties -> Bool
deviceOverlap :: !Bool
, DeviceProperties -> Bool
concurrentKernels :: !Bool
, DeviceProperties -> Bool
eccEnabled :: !Bool
, DeviceProperties -> Int
asyncEngineCount :: !Int
, DeviceProperties -> Int
cacheMemL2 :: !Int
, DeviceProperties -> PCI
pciInfo :: !PCI
, DeviceProperties -> Bool
tccDriverEnabled :: !Bool
, DeviceProperties -> Bool
kernelExecTimeoutEnabled :: !Bool
, DeviceProperties -> Bool
integrated :: !Bool
, DeviceProperties -> Bool
canMapHostMemory :: !Bool
, DeviceProperties -> Bool
unifiedAddressing :: !Bool
, DeviceProperties -> Bool
streamPriorities :: !Bool
, DeviceProperties -> Bool
globalL1Cache :: !Bool
, DeviceProperties -> Bool
localL1Cache :: !Bool
, DeviceProperties -> Bool
managedMemory :: !Bool
, DeviceProperties -> Bool
multiGPUBoard :: !Bool
, DeviceProperties -> Int
multiGPUBoardGroupID :: !Int
, DeviceProperties -> Bool
preemption :: !Bool
, DeviceProperties -> Int
singleToDoublePerfRatio :: !Int
, DeviceProperties -> Bool
cooperativeLaunch :: !Bool
, DeviceProperties -> Bool
cooperativeLaunchMultiDevice :: !Bool
}
deriving (Int -> DeviceProperties -> ShowS
[DeviceProperties] -> ShowS
DeviceProperties -> String
(Int -> DeviceProperties -> ShowS)
-> (DeviceProperties -> String)
-> ([DeviceProperties] -> ShowS)
-> Show DeviceProperties
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DeviceProperties -> ShowS
showsPrec :: Int -> DeviceProperties -> ShowS
$cshow :: DeviceProperties -> String
show :: DeviceProperties -> String
$cshowList :: [DeviceProperties] -> ShowS
showList :: [DeviceProperties] -> ShowS
Show)
data PCI = PCI
{
PCI -> Int
busID :: !Int,
PCI -> Int
deviceID :: !Int,
PCI -> Int
domainID :: !Int
}
deriving (Int -> PCI -> ShowS
[PCI] -> ShowS
PCI -> String
(Int -> PCI -> ShowS)
-> (PCI -> String) -> ([PCI] -> ShowS) -> Show PCI
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PCI -> ShowS
showsPrec :: Int -> PCI -> ShowS
$cshow :: PCI -> String
show :: PCI -> String
$cshowList :: [PCI] -> ShowS
showList :: [PCI] -> ShowS
Show)
data Allocation = Warp | Block
deriving Int -> Allocation -> ShowS
[Allocation] -> ShowS
Allocation -> String
(Int -> Allocation -> ShowS)
-> (Allocation -> String)
-> ([Allocation] -> ShowS)
-> Show Allocation
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Allocation -> ShowS
showsPrec :: Int -> Allocation -> ShowS
$cshow :: Allocation -> String
show :: Allocation -> String
$cshowList :: [Allocation] -> ShowS
showList :: [Allocation] -> ShowS
Show
data DeviceResources = DeviceResources
{ DeviceResources -> Int
threadsPerWarp :: !Int
, DeviceResources -> Int
coresPerMP :: !Int
, DeviceResources -> Int
warpsPerMP :: !Int
, DeviceResources -> Int
threadsPerMP :: !Int
, DeviceResources -> Int
threadBlocksPerMP :: !Int
, DeviceResources -> Int
sharedMemPerMP :: !Int
, DeviceResources -> Int
maxSharedMemPerBlock :: !Int
, DeviceResources -> Int
regFileSizePerMP :: !Int
, DeviceResources -> Int
maxRegPerBlock :: !Int
, DeviceResources -> Int
regAllocUnit :: !Int
, DeviceResources -> Allocation
regAllocationStyle :: !Allocation
, DeviceResources -> Int
maxRegPerThread :: !Int
, DeviceResources -> Int
sharedMemAllocUnit :: !Int
, DeviceResources -> Int
warpAllocUnit :: !Int
, DeviceResources -> Int
warpRegAllocUnit :: !Int
, DeviceResources -> Int
maxGridsPerDevice :: !Int
}
deriving Int -> DeviceResources -> ShowS
[DeviceResources] -> ShowS
DeviceResources -> String
(Int -> DeviceResources -> ShowS)
-> (DeviceResources -> String)
-> ([DeviceResources] -> ShowS)
-> Show DeviceResources
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DeviceResources -> ShowS
showsPrec :: Int -> DeviceResources -> ShowS
$cshow :: DeviceResources -> String
show :: DeviceResources -> String
$cshowList :: [DeviceResources] -> ShowS
showList :: [DeviceResources] -> ShowS
Show
deviceResources :: DeviceProperties -> DeviceResources
deviceResources :: DeviceProperties -> DeviceResources
deviceResources = Compute -> DeviceResources
resources (Compute -> DeviceResources)
-> (DeviceProperties -> Compute)
-> DeviceProperties
-> DeviceResources
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DeviceProperties -> Compute
computeCapability
where
resources :: Compute -> DeviceResources
resources Compute
compute = case Compute
compute of
Compute Int
1 Int
0 -> Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
1 Int
1)
Compute Int
1 Int
1 -> DeviceResources
{ threadsPerWarp :: Int
threadsPerWarp = Int
32
, coresPerMP :: Int
coresPerMP = Int
8
, warpsPerMP :: Int
warpsPerMP = Int
24
, threadsPerMP :: Int
threadsPerMP = Int
768
, threadBlocksPerMP :: Int
threadBlocksPerMP = Int
8
, sharedMemPerMP :: Int
sharedMemPerMP = Int
16384
, maxSharedMemPerBlock :: Int
maxSharedMemPerBlock = Int
16384
, regFileSizePerMP :: Int
regFileSizePerMP = Int
8192
, maxRegPerBlock :: Int
maxRegPerBlock = Int
8192
, regAllocUnit :: Int
regAllocUnit = Int
256
, regAllocationStyle :: Allocation
regAllocationStyle = Allocation
Block
, maxRegPerThread :: Int
maxRegPerThread = Int
124
, sharedMemAllocUnit :: Int
sharedMemAllocUnit = Int
512
, warpAllocUnit :: Int
warpAllocUnit = Int
2
, warpRegAllocUnit :: Int
warpRegAllocUnit = Int
256
, maxGridsPerDevice :: Int
maxGridsPerDevice = Int
1
}
Compute Int
1 Int
2 -> Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
1 Int
3)
Compute Int
1 Int
3 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
1 Int
1))
{ threadsPerMP = 1024
, warpsPerMP = 32
, regFileSizePerMP = 16384
, maxRegPerBlock = 16384
, regAllocUnit = 512
}
Compute Int
2 Int
0 -> DeviceResources
{ threadsPerWarp :: Int
threadsPerWarp = Int
32
, coresPerMP :: Int
coresPerMP = Int
32
, warpsPerMP :: Int
warpsPerMP = Int
48
, threadsPerMP :: Int
threadsPerMP = Int
1536
, threadBlocksPerMP :: Int
threadBlocksPerMP = Int
8
, sharedMemPerMP :: Int
sharedMemPerMP = Int
49152
, maxSharedMemPerBlock :: Int
maxSharedMemPerBlock = Int
49152
, regFileSizePerMP :: Int
regFileSizePerMP = Int
32768
, maxRegPerBlock :: Int
maxRegPerBlock = Int
32768
, regAllocUnit :: Int
regAllocUnit = Int
64
, regAllocationStyle :: Allocation
regAllocationStyle = Allocation
Warp
, maxRegPerThread :: Int
maxRegPerThread = Int
63
, sharedMemAllocUnit :: Int
sharedMemAllocUnit = Int
128
, warpAllocUnit :: Int
warpAllocUnit = Int
2
, warpRegAllocUnit :: Int
warpRegAllocUnit = Int
64
, maxGridsPerDevice :: Int
maxGridsPerDevice = Int
16
}
Compute Int
2 Int
1 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
2 Int
0))
{ coresPerMP = 48
}
Compute Int
3 Int
0 -> DeviceResources
{ threadsPerWarp :: Int
threadsPerWarp = Int
32
, coresPerMP :: Int
coresPerMP = Int
192
, warpsPerMP :: Int
warpsPerMP = Int
64
, threadsPerMP :: Int
threadsPerMP = Int
2048
, threadBlocksPerMP :: Int
threadBlocksPerMP = Int
16
, sharedMemPerMP :: Int
sharedMemPerMP = Int
49152
, maxSharedMemPerBlock :: Int
maxSharedMemPerBlock = Int
49152
, regFileSizePerMP :: Int
regFileSizePerMP = Int
65536
, maxRegPerBlock :: Int
maxRegPerBlock = Int
65536
, regAllocUnit :: Int
regAllocUnit = Int
256
, regAllocationStyle :: Allocation
regAllocationStyle = Allocation
Warp
, maxRegPerThread :: Int
maxRegPerThread = Int
63
, sharedMemAllocUnit :: Int
sharedMemAllocUnit = Int
256
, warpAllocUnit :: Int
warpAllocUnit = Int
4
, warpRegAllocUnit :: Int
warpRegAllocUnit = Int
256
, maxGridsPerDevice :: Int
maxGridsPerDevice = Int
16
}
Compute Int
3 Int
2 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
3 Int
5))
{ maxRegPerBlock = 32768
, maxGridsPerDevice = 4
}
Compute Int
3 Int
5 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
3 Int
0))
{ maxRegPerThread = 255
, maxGridsPerDevice = 32
}
Compute Int
3 Int
7 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
3 Int
5))
{ sharedMemPerMP = 114688
, regFileSizePerMP = 131072
}
Compute Int
5 Int
0 -> DeviceResources
{ threadsPerWarp :: Int
threadsPerWarp = Int
32
, coresPerMP :: Int
coresPerMP = Int
128
, warpsPerMP :: Int
warpsPerMP = Int
64
, threadsPerMP :: Int
threadsPerMP = Int
2048
, threadBlocksPerMP :: Int
threadBlocksPerMP = Int
32
, sharedMemPerMP :: Int
sharedMemPerMP = Int
65536
, maxSharedMemPerBlock :: Int
maxSharedMemPerBlock = Int
49152
, regFileSizePerMP :: Int
regFileSizePerMP = Int
65536
, maxRegPerBlock :: Int
maxRegPerBlock = Int
65536
, regAllocUnit :: Int
regAllocUnit = Int
256
, regAllocationStyle :: Allocation
regAllocationStyle = Allocation
Warp
, maxRegPerThread :: Int
maxRegPerThread = Int
255
, sharedMemAllocUnit :: Int
sharedMemAllocUnit = Int
256
, warpAllocUnit :: Int
warpAllocUnit = Int
4
, warpRegAllocUnit :: Int
warpRegAllocUnit = Int
256
, maxGridsPerDevice :: Int
maxGridsPerDevice = Int
32
}
Compute Int
5 Int
2 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
5 Int
0))
{ sharedMemPerMP = 98304
, maxRegPerBlock = 32768
, warpAllocUnit = 2
}
Compute Int
5 Int
3 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
5 Int
0))
{ maxRegPerBlock = 32768
, warpAllocUnit = 2
, maxGridsPerDevice = 16
}
Compute Int
6 Int
0 -> DeviceResources
{ threadsPerWarp :: Int
threadsPerWarp = Int
32
, coresPerMP :: Int
coresPerMP = Int
64
, warpsPerMP :: Int
warpsPerMP = Int
64
, threadsPerMP :: Int
threadsPerMP = Int
2048
, threadBlocksPerMP :: Int
threadBlocksPerMP = Int
32
, sharedMemPerMP :: Int
sharedMemPerMP = Int
65536
, maxSharedMemPerBlock :: Int
maxSharedMemPerBlock = Int
49152
, regFileSizePerMP :: Int
regFileSizePerMP = Int
65536
, maxRegPerBlock :: Int
maxRegPerBlock = Int
65536
, regAllocUnit :: Int
regAllocUnit = Int
256
, regAllocationStyle :: Allocation
regAllocationStyle = Allocation
Warp
, maxRegPerThread :: Int
maxRegPerThread = Int
255
, sharedMemAllocUnit :: Int
sharedMemAllocUnit = Int
256
, warpAllocUnit :: Int
warpAllocUnit = Int
2
, warpRegAllocUnit :: Int
warpRegAllocUnit = Int
256
, maxGridsPerDevice :: Int
maxGridsPerDevice = Int
128
}
Compute Int
6 Int
1 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
6 Int
0))
{ coresPerMP = 128
, sharedMemPerMP = 98304
, warpAllocUnit = 4
, maxGridsPerDevice = 32
}
Compute Int
6 Int
2 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
6 Int
0))
{ coresPerMP = 128
, warpsPerMP = 128
, threadBlocksPerMP = 4096
, maxRegPerBlock = 32768
, warpAllocUnit = 4
, maxGridsPerDevice = 16
}
Compute Int
7 Int
0 -> DeviceResources
{ threadsPerWarp :: Int
threadsPerWarp = Int
32
, coresPerMP :: Int
coresPerMP = Int
64
, warpsPerMP :: Int
warpsPerMP = Int
64
, threadsPerMP :: Int
threadsPerMP = Int
2048
, threadBlocksPerMP :: Int
threadBlocksPerMP = Int
32
, sharedMemPerMP :: Int
sharedMemPerMP = Int
98304
, maxSharedMemPerBlock :: Int
maxSharedMemPerBlock = Int
98304
, regFileSizePerMP :: Int
regFileSizePerMP = Int
65536
, maxRegPerBlock :: Int
maxRegPerBlock = Int
65536
, regAllocUnit :: Int
regAllocUnit = Int
256
, regAllocationStyle :: Allocation
regAllocationStyle = Allocation
Warp
, maxRegPerThread :: Int
maxRegPerThread = Int
255
, sharedMemAllocUnit :: Int
sharedMemAllocUnit = Int
256
, warpAllocUnit :: Int
warpAllocUnit = Int
4
, warpRegAllocUnit :: Int
warpRegAllocUnit = Int
256
, maxGridsPerDevice :: Int
maxGridsPerDevice = Int
128
}
Compute Int
7 Int
2 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
7 Int
0))
{ maxGridsPerDevice = 16
, maxSharedMemPerBlock = 49152
}
Compute Int
7 Int
5 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
7 Int
0))
{ warpsPerMP = 32
, threadBlocksPerMP = 16
, threadsPerMP = 1024
, maxGridsPerDevice = 128
, sharedMemPerMP = 65536
, maxSharedMemPerBlock = 65536
}
Compute Int
8 Int
0 -> DeviceResources
{ threadsPerWarp :: Int
threadsPerWarp = Int
32
, coresPerMP :: Int
coresPerMP = Int
64
, warpsPerMP :: Int
warpsPerMP = Int
64
, threadsPerMP :: Int
threadsPerMP = Int
2048
, threadBlocksPerMP :: Int
threadBlocksPerMP = Int
32
, sharedMemPerMP :: Int
sharedMemPerMP = Int
167936
, maxSharedMemPerBlock :: Int
maxSharedMemPerBlock = Int
167936
, regFileSizePerMP :: Int
regFileSizePerMP = Int
65536
, maxRegPerBlock :: Int
maxRegPerBlock = Int
65536
, regAllocUnit :: Int
regAllocUnit = Int
256
, regAllocationStyle :: Allocation
regAllocationStyle = Allocation
Warp
, maxRegPerThread :: Int
maxRegPerThread = Int
255
, sharedMemAllocUnit :: Int
sharedMemAllocUnit = Int
128
, warpAllocUnit :: Int
warpAllocUnit = Int
4
, warpRegAllocUnit :: Int
warpRegAllocUnit = Int
256
, maxGridsPerDevice :: Int
maxGridsPerDevice = Int
128
}
Compute Int
8 Int
6 -> (Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
8 Int
0))
{ warpsPerMP = 48
, threadsPerMP = 1536
, threadBlocksPerMP = 16
, sharedMemPerMP = 102400
, maxSharedMemPerBlock = 102400
}
Compute
_ -> String -> DeviceResources -> DeviceResources
forall a. String -> a -> a
trace String
warning (DeviceResources -> DeviceResources)
-> DeviceResources -> DeviceResources
forall a b. (a -> b) -> a -> b
$ Compute -> DeviceResources
resources (Int -> Int -> Compute
Compute Int
6 Int
0)
where warning :: String
warning = [String] -> String
unlines [ String
"*** Warning: Unknown CUDA device compute capability: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Compute -> String
forall a. Show a => a -> String
show Compute
compute
, String
"*** Please submit a bug report at https://github.com/tmcdonell/cuda/issues" ]