Safe Haskell | None |
---|---|
Language | Haskell2010 |
Synopsis
- data KernelConstants = KernelConstants {
- kernelGlobalThreadId :: TExp Int32
- kernelLocalThreadId :: TExp Int32
- kernelGroupId :: TExp Int32
- kernelGlobalThreadIdVar :: VName
- kernelLocalThreadIdVar :: VName
- kernelGroupIdVar :: VName
- kernelNumGroups :: TExp Int32
- kernelGroupSize :: TExp Int32
- kernelNumThreads :: TExp Int32
- kernelWaveSize :: TExp Int32
- kernelThreadActive :: TExp Bool
- kernelLocalIdMap :: Map [SubExp] [TExp Int32]
- keyWithEntryPoint :: Maybe Name -> Name -> Name
- type CallKernelGen = ImpM KernelsMem HostEnv HostOp
- type InKernelGen = ImpM KernelsMem KernelEnv KernelOp
- newtype HostEnv = HostEnv {}
- data KernelEnv = KernelEnv {}
- computeThreadChunkSize :: SplitOrdering -> TExp Int32 -> Count Elements (TExp Int32) -> Count Elements (TExp Int32) -> TV Int32 -> ImpM lore r op ()
- groupReduce :: TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen ()
- groupScan :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool) -> TExp Int32 -> TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen ()
- isActive :: [(VName, SubExp)] -> TExp Bool
- sKernelThread :: String -> Count NumGroups (TExp Int32) -> Count GroupSize (TExp Int32) -> VName -> InKernelGen () -> CallKernelGen ()
- sKernelGroup :: String -> Count NumGroups (TExp Int32) -> Count GroupSize (TExp Int32) -> VName -> InKernelGen () -> CallKernelGen ()
- sReplicate :: VName -> SubExp -> CallKernelGen ()
- sIota :: VName -> TExp Int32 -> Exp -> Exp -> IntType -> CallKernelGen ()
- sCopy :: CopyCompiler KernelsMem HostEnv HostOp
- compileThreadResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen ()
- compileGroupResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen ()
- virtualiseGroups :: SegVirt -> TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen ()
- groupLoop :: TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen ()
- kernelLoop :: IntExp t => TExp t -> TExp t -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen ()
- groupCoverSpace :: [TExp Int32] -> ([TExp Int32] -> InKernelGen ()) -> InKernelGen ()
- precomputeSegOpIDs :: Stms KernelsMem -> InKernelGen a -> InKernelGen a
- atomicUpdateLocking :: AtomicBinOp -> Lambda KernelsMem -> AtomicUpdate KernelsMem KernelEnv
- type AtomicBinOp = BinOp -> Maybe (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)
- data Locking = Locking {
- lockingArray :: VName
- lockingIsUnlocked :: TExp Int32
- lockingToLock :: TExp Int32
- lockingToUnlock :: TExp Int32
- lockingMapping :: [TExp Int32] -> [TExp Int32]
- data AtomicUpdate lore r
- = AtomicPrim (DoAtomicUpdate lore r)
- | AtomicCAS (DoAtomicUpdate lore r)
- | AtomicLocking (Locking -> DoAtomicUpdate lore r)
- type DoAtomicUpdate lore r = Space -> [VName] -> [TExp Int32] -> ImpM lore r KernelOp ()
Documentation
data KernelConstants Source #
KernelConstants | |
|
type CallKernelGen = ImpM KernelsMem HostEnv HostOp Source #
type InKernelGen = ImpM KernelsMem KernelEnv KernelOp Source #
computeThreadChunkSize :: SplitOrdering -> TExp Int32 -> Count Elements (TExp Int32) -> Count Elements (TExp Int32) -> TV Int32 -> ImpM lore r op () Source #
groupReduce :: TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen () Source #
groupScan :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool) -> TExp Int32 -> TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen () Source #
sKernelThread :: String -> Count NumGroups (TExp Int32) -> Count GroupSize (TExp Int32) -> VName -> InKernelGen () -> CallKernelGen () Source #
sKernelGroup :: String -> Count NumGroups (TExp Int32) -> Count GroupSize (TExp Int32) -> VName -> InKernelGen () -> CallKernelGen () Source #
sReplicate :: VName -> SubExp -> CallKernelGen () Source #
Perform a Replicate with a kernel.
sIota :: VName -> TExp Int32 -> Exp -> Exp -> IntType -> CallKernelGen () Source #
Perform an Iota with a kernel.
compileThreadResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen () Source #
compileGroupResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen () Source #
virtualiseGroups :: SegVirt -> TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen () Source #
For many kernels, we may not have enough physical groups to cover the logical iteration space. Some groups thus have to perform double duty; we put an outer loop to accomplish this. The advantage over just launching a bazillion threads is that the cost of memory expansion should be proportional to the number of *physical* threads (hardware parallelism), not the amount of application parallelism.
groupLoop :: TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen () Source #
Assign iterations of a for-loop to threads in the workgroup. The
passed-in function is invoked with the (symbolic) iteration. For
multidimensional loops, use groupCoverSpace
.
kernelLoop :: IntExp t => TExp t -> TExp t -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen () Source #
Assign iterations of a for-loop to all threads in the kernel.
The passed-in function is invoked with the (symbolic) iteration.
threadOperations
will be in effect in the body. For
multidimensional loops, use groupCoverSpace
.
groupCoverSpace :: [TExp Int32] -> ([TExp Int32] -> InKernelGen ()) -> InKernelGen () Source #
Iterate collectively though a multidimensional space, such that all threads in the group participate. The passed-in function is invoked with a (symbolic) point in the index space.
precomputeSegOpIDs :: Stms KernelsMem -> InKernelGen a -> InKernelGen a Source #
atomicUpdateLocking :: AtomicBinOp -> Lambda KernelsMem -> AtomicUpdate KernelsMem KernelEnv Source #
Do an atomic update corresponding to a binary operator lambda.
type AtomicBinOp = BinOp -> Maybe (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp) Source #
Locking strategy used for an atomic update.
Locking | |
|
data AtomicUpdate lore r Source #
The mechanism that will be used for performing the atomic update. Approximates how efficient it will be. Ordered from most to least efficient.
AtomicPrim (DoAtomicUpdate lore r) | Supported directly by primitive. |
AtomicCAS (DoAtomicUpdate lore r) | Can be done by efficient swaps. |
AtomicLocking (Locking -> DoAtomicUpdate lore r) | Requires explicit locking. |