futhark-0.17.3: An optimising compiler for a functional, array-oriented language.

Safe Haskell	None
Language	Haskell2010

Futhark.CodeGen.ImpGen.Kernels.Base

Synopsis

data KernelConstants = KernelConstants {
- kernelGlobalThreadId :: TExp Int32
- kernelLocalThreadId :: TExp Int32
- kernelGroupId :: TExp Int32
- kernelGlobalThreadIdVar :: VName
- kernelLocalThreadIdVar :: VName
- kernelGroupIdVar :: VName
- kernelNumGroups :: TExp Int32
- kernelGroupSize :: TExp Int32
- kernelNumThreads :: TExp Int32
- kernelWaveSize :: TExp Int32
- kernelThreadActive :: TExp Bool
- kernelLocalIdMap :: Map [SubExp] [TExp Int32]
}
keyWithEntryPoint :: Maybe Name -> Name -> Name
type CallKernelGen = ImpM KernelsMem HostEnv HostOp
type InKernelGen = ImpM KernelsMem KernelEnv KernelOp
newtype HostEnv = HostEnv {
- hostAtomics :: AtomicBinOp
}
data KernelEnv = KernelEnv {
- kernelAtomics :: AtomicBinOp
- kernelConstants :: KernelConstants
}
computeThreadChunkSize :: SplitOrdering -> TExp Int32 -> Count Elements (TExp Int32) -> Count Elements (TExp Int32) -> TV Int32 -> ImpM lore r op ()
groupReduce :: TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen ()
groupScan :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool) -> TExp Int32 -> TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen ()
isActive :: [(VName, SubExp)] -> TExp Bool
sKernelThread :: String -> Count NumGroups (TExp Int32) -> Count GroupSize (TExp Int32) -> VName -> InKernelGen () -> CallKernelGen ()
sKernelGroup :: String -> Count NumGroups (TExp Int32) -> Count GroupSize (TExp Int32) -> VName -> InKernelGen () -> CallKernelGen ()
sReplicate :: VName -> SubExp -> CallKernelGen ()
sIota :: VName -> TExp Int32 -> Exp -> Exp -> IntType -> CallKernelGen ()
sCopy :: CopyCompiler KernelsMem HostEnv HostOp
compileThreadResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen ()
compileGroupResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen ()
virtualiseGroups :: SegVirt -> TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen ()
groupLoop :: TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen ()
kernelLoop :: IntExp t => TExp t -> TExp t -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen ()
groupCoverSpace :: [TExp Int32] -> ([TExp Int32] -> InKernelGen ()) -> InKernelGen ()
precomputeSegOpIDs :: Stms KernelsMem -> InKernelGen a -> InKernelGen a
atomicUpdateLocking :: AtomicBinOp -> Lambda KernelsMem -> AtomicUpdate KernelsMem KernelEnv
type AtomicBinOp = BinOp -> Maybe (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)
data Locking = Locking {
- lockingArray :: VName
- lockingIsUnlocked :: TExp Int32
- lockingToLock :: TExp Int32
- lockingToUnlock :: TExp Int32
- lockingMapping :: [TExp Int32] -> [TExp Int32]
}
data AtomicUpdate lore r
- = AtomicPrim (DoAtomicUpdate lore r)
- | AtomicCAS (DoAtomicUpdate lore r)
- | AtomicLocking (Locking -> DoAtomicUpdate lore r)
type DoAtomicUpdate lore r = Space -> [VName] -> [TExp Int32] -> ImpM lore r KernelOp ()

Documentation

data KernelConstants Source #

Constructors

KernelConstants

Fields

kernelGlobalThreadId :: TExp Int32
kernelLocalThreadId :: TExp Int32
kernelGroupId :: TExp Int32
kernelGlobalThreadIdVar :: VName
kernelLocalThreadIdVar :: VName
kernelGroupIdVar :: VName
kernelNumGroups :: TExp Int32
kernelGroupSize :: TExp Int32
kernelNumThreads :: TExp Int32
kernelWaveSize :: TExp Int32
kernelThreadActive :: TExp Bool
kernelLocalIdMap :: Map [SubExp] [TExp Int32]
A mapping from dimensions of nested SegOps to already computed local thread IDs.

keyWithEntryPoint :: Maybe Name -> Name -> Name Source #

type CallKernelGen = ImpM KernelsMem HostEnv HostOp Source #

type InKernelGen = ImpM KernelsMem KernelEnv KernelOp Source #

newtype HostEnv Source #

Constructors

HostEnv
Fields hostAtomics :: AtomicBinOp

data KernelEnv Source #

Constructors

KernelEnv
Fields kernelAtomics :: AtomicBinOp kernelConstants :: KernelConstants

computeThreadChunkSize :: SplitOrdering -> TExp Int32 -> Count Elements (TExp Int32) -> Count Elements (TExp Int32) -> TV Int32 -> ImpM lore r op () Source #

groupReduce :: TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen () Source #

groupScan :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool) -> TExp Int32 -> TExp Int32 -> Lambda KernelsMem -> [VName] -> InKernelGen () Source #

isActive :: [(VName, SubExp)] -> TExp Bool Source #

sKernelThread :: String -> Count NumGroups (TExp Int32) -> Count GroupSize (TExp Int32) -> VName -> InKernelGen () -> CallKernelGen () Source #

sKernelGroup :: String -> Count NumGroups (TExp Int32) -> Count GroupSize (TExp Int32) -> VName -> InKernelGen () -> CallKernelGen () Source #

sReplicate :: VName -> SubExp -> CallKernelGen () Source #

Perform a Replicate with a kernel.

sIota :: VName -> TExp Int32 -> Exp -> Exp -> IntType -> CallKernelGen () Source #

Perform an Iota with a kernel.

sCopy :: CopyCompiler KernelsMem HostEnv HostOp Source #

compileThreadResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen () Source #

compileGroupResult :: SegSpace -> PatElem KernelsMem -> KernelResult -> InKernelGen () Source #

virtualiseGroups :: SegVirt -> TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen () Source #

For many kernels, we may not have enough physical groups to cover the logical iteration space. Some groups thus have to perform double duty; we put an outer loop to accomplish this. The advantage over just launching a bazillion threads is that the cost of memory expansion should be proportional to the number of *physical* threads (hardware parallelism), not the amount of application parallelism.

groupLoop :: TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen () Source #

Assign iterations of a for-loop to threads in the workgroup. The passed-in function is invoked with the (symbolic) iteration. For multidimensional loops, use groupCoverSpace.

kernelLoop :: IntExp t => TExp t -> TExp t -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen () Source #

Assign iterations of a for-loop to all threads in the kernel. The passed-in function is invoked with the (symbolic) iteration. threadOperations will be in effect in the body. For multidimensional loops, use groupCoverSpace.

groupCoverSpace :: [TExp Int32] -> ([TExp Int32] -> InKernelGen ()) -> InKernelGen () Source #

Iterate collectively though a multidimensional space, such that all threads in the group participate. The passed-in function is invoked with a (symbolic) point in the index space.

precomputeSegOpIDs :: Stms KernelsMem -> InKernelGen a -> InKernelGen a Source #

atomicUpdateLocking :: AtomicBinOp -> Lambda KernelsMem -> AtomicUpdate KernelsMem KernelEnv Source #

Do an atomic update corresponding to a binary operator lambda.

type AtomicBinOp = BinOp -> Maybe (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp) Source #

Is there an atomic BinOp corresponding to this BinOp?

data Locking Source #

Locking strategy used for an atomic update.

Constructors

Fields

lockingArray :: VName
Array containing the lock.
lockingIsUnlocked :: TExp Int32
Value for us to consider the lock free.
lockingToLock :: TExp Int32
What to write when we lock it.
lockingToUnlock :: TExp Int32
What to write when we unlock it.
lockingMapping :: [TExp Int32] -> [TExp Int32]
A transformation from the logical lock index to the physical position in the array. This can also be used to make the lock array smaller.

data AtomicUpdate lore r Source #

The mechanism that will be used for performing the atomic update. Approximates how efficient it will be. Ordered from most to least efficient.

Constructors

AtomicPrim (DoAtomicUpdate lore r)	Supported directly by primitive.
AtomicCAS (DoAtomicUpdate lore r)	Can be done by efficient swaps.
AtomicLocking (Locking -> DoAtomicUpdate lore r)	Requires explicit locking.

type DoAtomicUpdate lore r = Space -> [VName] -> [TExp Int32] -> ImpM lore r KernelOp () Source #

A function for generating code for an atomic update. Assumes that the bucket is in-bounds.