{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Data.Array.Accelerate.LLVM.PTX.Analysis.Launch (
DeviceProperties, Occupancy, LaunchConfig,
simpleLaunchConfig, launchConfig,
multipleOf, multipleOfQ,
) where
import Foreign.CUDA.Analysis as CUDA
import Language.Haskell.TH
type LaunchConfig
= Int
-> Int
-> Int
-> ( Occupancy
, Int
, Int -> Int
, Int
, Q (TExp (Int -> Int))
)
simpleLaunchConfig :: DeviceProperties -> LaunchConfig
simpleLaunchConfig :: DeviceProperties -> LaunchConfig
simpleLaunchConfig DeviceProperties
dev = DeviceProperties
-> [Int]
-> (Int -> Int)
-> (Int -> Int -> Int)
-> Q (TExp (Int -> Int -> Int))
-> LaunchConfig
launchConfig DeviceProperties
dev (DeviceProperties -> [Int]
decWarp DeviceProperties
dev) (Int -> Int -> Int
forall a b. a -> b -> a
const Int
0) Int -> Int -> Int
multipleOf Q (TExp (Int -> Int -> Int))
multipleOfQ
launchConfig
:: DeviceProperties
-> [Int]
-> (Int -> Int)
-> (Int -> Int -> Int)
-> Q (TExp (Int -> Int -> Int))
-> LaunchConfig
launchConfig :: DeviceProperties
-> [Int]
-> (Int -> Int)
-> (Int -> Int -> Int)
-> Q (TExp (Int -> Int -> Int))
-> LaunchConfig
launchConfig DeviceProperties
dev [Int]
candidates Int -> Int
dynamic_smem Int -> Int -> Int
grid_size Q (TExp (Int -> Int -> Int))
grid_sizeQ Int
maxThreads Int
registers Int
static_smem =
let
(Int
cta, Occupancy
occ) = DeviceProperties
-> [Int] -> (Int -> Int) -> (Int -> Int) -> (Int, Occupancy)
optimalBlockSizeOf DeviceProperties
dev ((Int -> Bool) -> [Int] -> [Int]
forall a. (a -> Bool) -> [a] -> [a]
filter (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
maxThreads) [Int]
candidates) (Int -> Int -> Int
forall a b. a -> b -> a
const Int
registers) Int -> Int
smem
maxGrid :: Int
maxGrid = DeviceProperties -> Int
multiProcessorCount DeviceProperties
dev Int -> Int -> Int
forall a. Num a => a -> a -> a
* Occupancy -> Int
activeThreadBlocks Occupancy
occ
grid :: Int -> Int
grid Int
n = Int
maxGrid Int -> Int -> Int
forall a. Ord a => a -> a -> a
`min` Int -> Int -> Int
grid_size Int
n Int
cta
smem :: Int -> Int
smem Int
n = Int
static_smem Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int
dynamic_smem Int
n
gridQ :: Q (TExp (Int -> Int))
gridQ = [|| \n -> (maxGrid::Int) `min` $$grid_sizeQ (n::Int) (cta::Int) ||]
in
( Occupancy
occ, Int
cta, Int -> Int
grid, Int -> Int
dynamic_smem Int
cta, Q (TExp (Int -> Int))
gridQ )
multipleOf :: Int -> Int -> Int
multipleOf :: Int -> Int -> Int
multipleOf Int
x Int
y = ((Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
y Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
y)
multipleOfQ :: Q (TExp (Int -> Int -> Int))
multipleOfQ :: Q (TExp (Int -> Int -> Int))
multipleOfQ = [|| multipleOf ||]