Safe Haskell | None |
---|---|
Language | Haskell2010 |
Synopsis
- data KernelConstants = KernelConstants {
- kernelGlobalThreadId :: Exp
- kernelLocalThreadId :: Exp
- kernelGroupId :: Exp
- kernelGlobalThreadIdVar :: VName
- kernelLocalThreadIdVar :: VName
- kernelGroupIdVar :: VName
- kernelNumGroups :: Exp
- kernelGroupSize :: Exp
- kernelNumThreads :: Exp
- kernelWaveSize :: Exp
- kernelThreadActive :: Exp
- kernelLocalIdMap :: Map [SubExp] [Exp]
- keyWithEntryPoint :: Maybe Name -> Name -> Name
- type CallKernelGen = ImpM KernelsMem HostEnv HostOp
- type InKernelGen = ImpM KernelsMem KernelEnv KernelOp
- newtype HostEnv = HostEnv {}
- data KernelEnv = KernelEnv {}
- computeThreadChunkSize :: SplitOrdering -> Exp -> Count Elements Exp -> Count Elements Exp -> VName -> ImpM lore r op ()
- groupReduce :: Exp -> Lambda KernelsMem -> [VName] -> InKernelGen ()
- groupScan :: Maybe (Exp -> Exp -> Exp) -> Exp -> Exp -> Lambda KernelsMem -> [VName] -> InKernelGen ()
- isActive :: [(VName, SubExp)] -> Exp
- sKernelThread :: String -> Count NumGroups Exp -> Count GroupSize Exp -> VName -> InKernelGen () -> CallKernelGen ()
- sKernelGroup :: String -> Count NumGroups Exp -> Count GroupSize Exp -> VName -> InKernelGen () -> CallKernelGen ()
- sReplicate :: VName -> SubExp -> CallKernelGen ()
- sIota :: VName -> Exp -> Exp -> Exp -> IntType -> CallKernelGen ()
- sCopy :: CopyCompiler KernelsMem HostEnv HostOp
- compileThreadResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen ()
- compileGroupResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen ()
- virtualiseGroups :: SegVirt -> Exp -> (VName -> InKernelGen ()) -> InKernelGen ()
- groupLoop :: Exp -> (Exp -> InKernelGen ()) -> InKernelGen ()
- kernelLoop :: Exp -> Exp -> Exp -> (Exp -> InKernelGen ()) -> InKernelGen ()
- groupCoverSpace :: [Exp] -> ([Exp] -> InKernelGen ()) -> InKernelGen ()
- precomputeSegOpIDs :: Stms KernelsMem -> InKernelGen a -> InKernelGen a
- atomicUpdateLocking :: AtomicBinOp -> Lambda KernelsMem -> AtomicUpdate KernelsMem KernelEnv
- type AtomicBinOp = BinOp -> Maybe (VName -> VName -> Count Elements Exp -> Exp -> AtomicOp)
- data Locking = Locking {
- lockingArray :: VName
- lockingIsUnlocked :: Exp
- lockingToLock :: Exp
- lockingToUnlock :: Exp
- lockingMapping :: [Exp] -> [Exp]
- data AtomicUpdate lore r
- = AtomicPrim (DoAtomicUpdate lore r)
- | AtomicCAS (DoAtomicUpdate lore r)
- | AtomicLocking (Locking -> DoAtomicUpdate lore r)
- type DoAtomicUpdate lore r = Space -> [VName] -> [Exp] -> ImpM lore r KernelOp ()
Documentation
data KernelConstants Source #
KernelConstants | |
|
type CallKernelGen = ImpM KernelsMem HostEnv HostOp Source #
type InKernelGen = ImpM KernelsMem KernelEnv KernelOp Source #
computeThreadChunkSize :: SplitOrdering -> Exp -> Count Elements Exp -> Count Elements Exp -> VName -> ImpM lore r op () Source #
groupReduce :: Exp -> Lambda KernelsMem -> [VName] -> InKernelGen () Source #
groupScan :: Maybe (Exp -> Exp -> Exp) -> Exp -> Exp -> Lambda KernelsMem -> [VName] -> InKernelGen () Source #
sKernelThread :: String -> Count NumGroups Exp -> Count GroupSize Exp -> VName -> InKernelGen () -> CallKernelGen () Source #
sKernelGroup :: String -> Count NumGroups Exp -> Count GroupSize Exp -> VName -> InKernelGen () -> CallKernelGen () Source #
sReplicate :: VName -> SubExp -> CallKernelGen () Source #
Perform a Replicate with a kernel.
sIota :: VName -> Exp -> Exp -> Exp -> IntType -> CallKernelGen () Source #
Perform an Iota with a kernel.
compileThreadResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen () Source #
compileGroupResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen () Source #
virtualiseGroups :: SegVirt -> Exp -> (VName -> InKernelGen ()) -> InKernelGen () Source #
For many kernels, we may not have enough physical groups to cover the logical iteration space. Some groups thus have to perform double duty; we put an outer loop to accomplish this. The advantage over just launching a bazillion threads is that the cost of memory expansion should be proportional to the number of *physical* threads (hardware parallelism), not the amount of application parallelism.
groupLoop :: Exp -> (Exp -> InKernelGen ()) -> InKernelGen () Source #
Assign iterations of a for-loop to threads in the workgroup. The
passed-in function is invoked with the (symbolic) iteration. For
multidimensional loops, use groupCoverSpace
.
kernelLoop :: Exp -> Exp -> Exp -> (Exp -> InKernelGen ()) -> InKernelGen () Source #
Assign iterations of a for-loop to all threads in the kernel.
The passed-in function is invoked with the (symbolic) iteration.
threadOperations
will be in effect in the body. For
multidimensional loops, use groupCoverSpace
.
groupCoverSpace :: [Exp] -> ([Exp] -> InKernelGen ()) -> InKernelGen () Source #
Iterate collectively though a multidimensional space, such that all threads in the group participate. The passed-in function is invoked with a (symbolic) point in the index space.
precomputeSegOpIDs :: Stms KernelsMem -> InKernelGen a -> InKernelGen a Source #
atomicUpdateLocking :: AtomicBinOp -> Lambda KernelsMem -> AtomicUpdate KernelsMem KernelEnv Source #
Do an atomic update corresponding to a binary operator lambda.
type AtomicBinOp = BinOp -> Maybe (VName -> VName -> Count Elements Exp -> Exp -> AtomicOp) Source #
Locking strategy used for an atomic update.
Locking | |
|
data AtomicUpdate lore r Source #
The mechanism that will be used for performing the atomic update. Approximates how efficient it will be. Ordered from most to least efficient.
AtomicPrim (DoAtomicUpdate lore r) | Supported directly by primitive. |
AtomicCAS (DoAtomicUpdate lore r) | Can be done by efficient swaps. |
AtomicLocking (Locking -> DoAtomicUpdate lore r) | Requires explicit locking. |