{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TypeFamilies #-}
module Futhark.CodeGen.ImpGen.GPU.Base
( KernelConstants (..),
keyWithEntryPoint,
CallKernelGen,
InKernelGen,
Locks (..),
HostEnv (..),
Target (..),
KernelEnv (..),
computeThreadChunkSize,
groupReduce,
groupScan,
isActive,
sKernelThread,
sKernelGroup,
sReplicate,
sIota,
sCopy,
compileThreadResult,
compileGroupResult,
virtualiseGroups,
groupLoop,
kernelLoop,
groupCoverSpace,
precomputeSegOpIDs,
atomicUpdateLocking,
AtomicBinOp,
Locking (..),
AtomicUpdate (..),
DoAtomicUpdate,
)
where
import Control.Monad.Except
import Data.List (zip4)
import qualified Data.Map.Strict as M
import Data.Maybe
import qualified Data.Set as S
import qualified Futhark.CodeGen.ImpCode.GPU as Imp
import Futhark.CodeGen.ImpGen
import Futhark.Error
import Futhark.IR.GPUMem
import qualified Futhark.IR.Mem.IxFun as IxFun
import Futhark.MonadFreshNames
import Futhark.Transform.Rename
import Futhark.Util (chunks, dropLast, mapAccumLM, nubOrd, takeLast)
import Futhark.Util.IntegralExp (divUp, quot, rem)
import Prelude hiding (quot, rem)
data Target = CUDA | OpenCL
data Locks = Locks
{ Locks -> VName
locksArray :: VName,
Locks -> Int
locksCount :: Int
}
data HostEnv = HostEnv
{ HostEnv -> AtomicBinOp
hostAtomics :: AtomicBinOp,
HostEnv -> Target
hostTarget :: Target,
HostEnv -> Map VName Locks
hostLocks :: M.Map VName Locks
}
data KernelEnv = KernelEnv
{ KernelEnv -> AtomicBinOp
kernelAtomics :: AtomicBinOp,
KernelEnv -> KernelConstants
kernelConstants :: KernelConstants,
KernelEnv -> Map VName Locks
kernelLocks :: M.Map VName Locks
}
type CallKernelGen = ImpM GPUMem HostEnv Imp.HostOp
type InKernelGen = ImpM GPUMem KernelEnv Imp.KernelOp
data KernelConstants = KernelConstants
{ KernelConstants -> TExp Int32
kernelGlobalThreadId :: Imp.TExp Int32,
KernelConstants -> TExp Int32
kernelLocalThreadId :: Imp.TExp Int32,
KernelConstants -> TExp Int32
kernelGroupId :: Imp.TExp Int32,
KernelConstants -> VName
kernelGlobalThreadIdVar :: VName,
KernelConstants -> VName
kernelLocalThreadIdVar :: VName,
KernelConstants -> VName
kernelGroupIdVar :: VName,
KernelConstants -> TExp Int64
kernelNumGroups :: Imp.TExp Int64,
KernelConstants -> TExp Int64
kernelGroupSize :: Imp.TExp Int64,
KernelConstants -> TExp Int32
kernelNumThreads :: Imp.TExp Int32,
KernelConstants -> TExp Int32
kernelWaveSize :: Imp.TExp Int32,
KernelConstants -> TExp Bool
kernelThreadActive :: Imp.TExp Bool,
KernelConstants -> Map [SubExp] [TExp Int32]
kernelLocalIdMap :: M.Map [SubExp] [Imp.TExp Int32]
}
segOpSizes :: Stms GPUMem -> S.Set [SubExp]
segOpSizes :: Stms GPUMem -> Set [SubExp]
segOpSizes = Stms GPUMem -> Set [SubExp]
onStms
where
onStms :: Stms GPUMem -> Set [SubExp]
onStms = (Stm GPUMem -> Set [SubExp]) -> Stms GPUMem -> Set [SubExp]
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (Exp GPUMem -> Set [SubExp]
onExp (Exp GPUMem -> Set [SubExp])
-> (Stm GPUMem -> Exp GPUMem) -> Stm GPUMem -> Set [SubExp]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stm GPUMem -> Exp GPUMem
forall rep. Stm rep -> Exp rep
stmExp)
onExp :: Exp GPUMem -> Set [SubExp]
onExp (Op (Inner (SegOp op))) =
[SubExp] -> Set [SubExp]
forall a. a -> Set a
S.singleton ([SubExp] -> Set [SubExp]) -> [SubExp] -> Set [SubExp]
forall a b. (a -> b) -> a -> b
$ ((VName, SubExp) -> SubExp) -> [(VName, SubExp)] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map (VName, SubExp) -> SubExp
forall a b. (a, b) -> b
snd ([(VName, SubExp)] -> [SubExp]) -> [(VName, SubExp)] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace (SegSpace -> [(VName, SubExp)]) -> SegSpace -> [(VName, SubExp)]
forall a b. (a -> b) -> a -> b
$ SegOp SegLevel GPUMem -> SegSpace
forall lvl rep. SegOp lvl rep -> SegSpace
segSpace SegOp SegLevel GPUMem
op
onExp (If SubExp
_ BodyT GPUMem
tbranch BodyT GPUMem
fbranch IfDec (BranchType GPUMem)
_) =
Stms GPUMem -> Set [SubExp]
onStms (BodyT GPUMem -> Stms GPUMem
forall rep. BodyT rep -> Stms rep
bodyStms BodyT GPUMem
tbranch) Set [SubExp] -> Set [SubExp] -> Set [SubExp]
forall a. Semigroup a => a -> a -> a
<> Stms GPUMem -> Set [SubExp]
onStms (BodyT GPUMem -> Stms GPUMem
forall rep. BodyT rep -> Stms rep
bodyStms BodyT GPUMem
fbranch)
onExp (DoLoop [(FParam GPUMem, SubExp)]
_ LoopForm GPUMem
_ BodyT GPUMem
body) =
Stms GPUMem -> Set [SubExp]
onStms (BodyT GPUMem -> Stms GPUMem
forall rep. BodyT rep -> Stms rep
bodyStms BodyT GPUMem
body)
onExp Exp GPUMem
_ = Set [SubExp]
forall a. Monoid a => a
mempty
precomputeSegOpIDs :: Stms GPUMem -> InKernelGen a -> InKernelGen a
precomputeSegOpIDs :: Stms GPUMem -> InKernelGen a -> InKernelGen a
precomputeSegOpIDs Stms GPUMem
stms InKernelGen a
m = do
TExp Int32
ltid <- KernelConstants -> TExp Int32
kernelLocalThreadId (KernelConstants -> TExp Int32)
-> (KernelEnv -> KernelConstants) -> KernelEnv -> TExp Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> TExp Int32)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp (TExp Int32)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
Map [SubExp] [TExp Int32]
new_ids <- [([SubExp], [TExp Int32])] -> Map [SubExp] [TExp Int32]
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([([SubExp], [TExp Int32])] -> Map [SubExp] [TExp Int32])
-> ImpM GPUMem KernelEnv KernelOp [([SubExp], [TExp Int32])]
-> ImpM GPUMem KernelEnv KernelOp (Map [SubExp] [TExp Int32])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([SubExp]
-> ImpM GPUMem KernelEnv KernelOp ([SubExp], [TExp Int32]))
-> [[SubExp]]
-> ImpM GPUMem KernelEnv KernelOp [([SubExp], [TExp Int32])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (TExp Int32
-> [SubExp]
-> ImpM GPUMem KernelEnv KernelOp ([SubExp], [TExp Int32])
forall a t rep r op.
(ToExp a, IntExp t) =>
TPrimExp t ExpLeaf -> [a] -> ImpM rep r op ([a], [TExp Int32])
mkMap TExp Int32
ltid) (Set [SubExp] -> [[SubExp]]
forall a. Set a -> [a]
S.toList (Stms GPUMem -> Set [SubExp]
segOpSizes Stms GPUMem
stms))
let f :: KernelEnv -> KernelEnv
f KernelEnv
env =
KernelEnv
env
{ kernelConstants :: KernelConstants
kernelConstants =
(KernelEnv -> KernelConstants
kernelConstants KernelEnv
env) {kernelLocalIdMap :: Map [SubExp] [TExp Int32]
kernelLocalIdMap = Map [SubExp] [TExp Int32]
new_ids}
}
(KernelEnv -> KernelEnv) -> InKernelGen a -> InKernelGen a
forall r rep op a. (r -> r) -> ImpM rep r op a -> ImpM rep r op a
localEnv KernelEnv -> KernelEnv
f InKernelGen a
m
where
mkMap :: TPrimExp t ExpLeaf -> [a] -> ImpM rep r op ([a], [TExp Int32])
mkMap TPrimExp t ExpLeaf
ltid [a]
dims = do
let dims' :: [TExp Int64]
dims' = (a -> TExp Int64) -> [a] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map a -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [a]
dims
[TExp Int64]
ids' <- String -> [TExp Int64] -> TExp Int64 -> ImpM rep r op [TExp Int64]
forall rep r op.
String -> [TExp Int64] -> TExp Int64 -> ImpM rep r op [TExp Int64]
dIndexSpace' String
"ltid_pre" [TExp Int64]
dims' (TPrimExp t ExpLeaf -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TPrimExp t ExpLeaf
ltid)
([a], [TExp Int32]) -> ImpM rep r op ([a], [TExp Int32])
forall (m :: * -> *) a. Monad m => a -> m a
return ([a]
dims, (TExp Int64 -> TExp Int32) -> [TExp Int64] -> [TExp Int32]
forall a b. (a -> b) -> [a] -> [b]
map TExp Int64 -> TExp Int32
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 [TExp Int64]
ids')
keyWithEntryPoint :: Maybe Name -> Name -> Name
keyWithEntryPoint :: Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname Name
key =
String -> Name
nameFromString (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$ String -> (Name -> String) -> Maybe Name -> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe String
"" ((String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
".") (String -> String) -> (Name -> String) -> Name -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Name -> String
nameToString) Maybe Name
fname String -> String -> String
forall a. [a] -> [a] -> [a]
++ Name -> String
nameToString Name
key
allocLocal :: AllocCompiler GPUMem r Imp.KernelOp
allocLocal :: AllocCompiler GPUMem r KernelOp
allocLocal VName
mem Count Bytes (TExp Int64)
size =
KernelOp -> ImpM GPUMem r KernelOp ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> ImpM GPUMem r KernelOp ())
-> KernelOp -> ImpM GPUMem r KernelOp ()
forall a b. (a -> b) -> a -> b
$ VName -> Count Bytes (TExp Int64) -> KernelOp
Imp.LocalAlloc VName
mem Count Bytes (TExp Int64)
size
kernelAlloc ::
Pat GPUMem ->
SubExp ->
Space ->
InKernelGen ()
kernelAlloc :: Pat GPUMem -> SubExp -> Space -> InKernelGen ()
kernelAlloc (Pat [PatElemT (LetDec GPUMem)
_]) SubExp
_ ScalarSpace {} =
() -> InKernelGen ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
kernelAlloc (Pat [PatElemT (LetDec GPUMem)
mem]) SubExp
size (Space String
"local") =
AllocCompiler GPUMem KernelEnv KernelOp
forall r. AllocCompiler GPUMem r KernelOp
allocLocal (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
mem) (Count Bytes (TExp Int64) -> InKernelGen ())
-> Count Bytes (TExp Int64) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ TExp Int64 -> Count Bytes (TExp Int64)
forall a. a -> Count Bytes a
Imp.bytes (TExp Int64 -> Count Bytes (TExp Int64))
-> TExp Int64 -> Count Bytes (TExp Int64)
forall a b. (a -> b) -> a -> b
$ SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
size
kernelAlloc (Pat [PatElemT (LetDec GPUMem)
mem]) SubExp
_ Space
_ =
String -> InKernelGen ()
forall a. String -> a
compilerLimitationS (String -> InKernelGen ()) -> String -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ String
"Cannot allocate memory block " String -> String -> String
forall a. [a] -> [a] -> [a]
++ PatElemT LetDecMem -> String
forall a. Pretty a => a -> String
pretty PatElemT (LetDec GPUMem)
PatElemT LetDecMem
mem String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" in kernel."
kernelAlloc Pat GPUMem
dest SubExp
_ Space
_ =
String -> InKernelGen ()
forall a. HasCallStack => String -> a
error (String -> InKernelGen ()) -> String -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ String
"Invalid target for in-kernel allocation: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ PatT LetDecMem -> String
forall a. Show a => a -> String
show Pat GPUMem
PatT LetDecMem
dest
splitSpace ::
(ToExp w, ToExp i, ToExp elems_per_thread) =>
Pat GPUMem ->
SplitOrdering ->
w ->
i ->
elems_per_thread ->
ImpM rep r op ()
splitSpace :: Pat GPUMem
-> SplitOrdering -> w -> i -> elems_per_thread -> ImpM rep r op ()
splitSpace (Pat [PatElemT (LetDec GPUMem)
size]) SplitOrdering
o w
w i
i elems_per_thread
elems_per_thread = do
Count Elements (TExp Int64)
num_elements <- TExp Int64 -> Count Elements (TExp Int64)
forall a. a -> Count Elements a
Imp.elements (TExp Int64 -> Count Elements (TExp Int64))
-> (PrimExp ExpLeaf -> TExp Int64)
-> PrimExp ExpLeaf
-> Count Elements (TExp Int64)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PrimExp ExpLeaf -> TExp Int64
forall t v. PrimExp v -> TPrimExp t v
TPrimExp (PrimExp ExpLeaf -> Count Elements (TExp Int64))
-> ImpM rep r op (PrimExp ExpLeaf)
-> ImpM rep r op (Count Elements (TExp Int64))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> w -> ImpM rep r op (PrimExp ExpLeaf)
forall a rep r op. ToExp a => a -> ImpM rep r op (PrimExp ExpLeaf)
toExp w
w
let i' :: TExp Int64
i' = i -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp i
i
Count Elements (TExp Int64)
elems_per_thread' <- TExp Int64 -> Count Elements (TExp Int64)
forall a. a -> Count Elements a
Imp.elements (TExp Int64 -> Count Elements (TExp Int64))
-> (PrimExp ExpLeaf -> TExp Int64)
-> PrimExp ExpLeaf
-> Count Elements (TExp Int64)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PrimExp ExpLeaf -> TExp Int64
forall t v. PrimExp v -> TPrimExp t v
TPrimExp (PrimExp ExpLeaf -> Count Elements (TExp Int64))
-> ImpM rep r op (PrimExp ExpLeaf)
-> ImpM rep r op (Count Elements (TExp Int64))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> elems_per_thread -> ImpM rep r op (PrimExp ExpLeaf)
forall a rep r op. ToExp a => a -> ImpM rep r op (PrimExp ExpLeaf)
toExp elems_per_thread
elems_per_thread
SplitOrdering
-> TExp Int64
-> Count Elements (TExp Int64)
-> Count Elements (TExp Int64)
-> TV Int64
-> ImpM rep r op ()
forall rep r op.
SplitOrdering
-> TExp Int64
-> Count Elements (TExp Int64)
-> Count Elements (TExp Int64)
-> TV Int64
-> ImpM rep r op ()
computeThreadChunkSize SplitOrdering
o TExp Int64
i' Count Elements (TExp Int64)
elems_per_thread' Count Elements (TExp Int64)
num_elements (VName -> PrimType -> TV Int64
forall t. VName -> PrimType -> TV t
mkTV (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
size) PrimType
int64)
splitSpace Pat GPUMem
pat SplitOrdering
_ w
_ i
_ elems_per_thread
_ =
String -> ImpM rep r op ()
forall a. HasCallStack => String -> a
error (String -> ImpM rep r op ()) -> String -> ImpM rep r op ()
forall a b. (a -> b) -> a -> b
$ String
"Invalid target for splitSpace: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ PatT LetDecMem -> String
forall a. Pretty a => a -> String
pretty Pat GPUMem
PatT LetDecMem
pat
updateAcc :: VName -> [SubExp] -> [SubExp] -> InKernelGen ()
updateAcc :: VName -> [SubExp] -> [SubExp] -> InKernelGen ()
updateAcc VName
acc [SubExp]
is [SubExp]
vs = String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"UpdateAcc" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
let is' :: [TExp Int64]
is' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
is
(VName
c, Space
space, [VName]
arrs, [TExp Int64]
dims, Maybe (Lambda GPUMem)
op) <- VName
-> [TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, [VName], [TExp Int64], Maybe (Lambda GPUMem))
forall rep r op.
VName
-> [TExp Int64]
-> ImpM
rep r op (VName, Space, [VName], [TExp Int64], Maybe (Lambda rep))
lookupAcc VName
acc [TExp Int64]
is'
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (Slice (TExp Int64) -> [TExp Int64] -> TExp Bool
inBounds ([DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall d. [DimIndex d] -> Slice d
Slice ((TExp Int64 -> DimIndex (TExp Int64))
-> [TExp Int64] -> [DimIndex (TExp Int64)]
forall a b. (a -> b) -> [a] -> [b]
map TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix [TExp Int64]
is')) [TExp Int64]
dims) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
case Maybe (Lambda GPUMem)
op of
Maybe (Lambda GPUMem)
Nothing ->
[(VName, SubExp)]
-> ((VName, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName] -> [SubExp] -> [(VName, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
arrs [SubExp]
vs) (((VName, SubExp) -> InKernelGen ()) -> InKernelGen ())
-> ((VName, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(VName
arr, SubExp
v) -> VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
arr [TExp Int64]
is' SubExp
v []
Just Lambda GPUMem
lam -> do
[LParam GPUMem] -> InKernelGen ()
forall rep inner r op.
Mem rep inner =>
[LParam rep] -> ImpM rep r op ()
dLParams ([LParam GPUMem] -> InKernelGen ())
-> [LParam GPUMem] -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam
let ([VName]
_x_params, [VName]
y_params) =
Int -> [VName] -> ([VName], [VName])
forall a. Int -> [a] -> ([a], [a])
splitAt ([SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SubExp]
vs) ([VName] -> ([VName], [VName])) -> [VName] -> ([VName], [VName])
forall a b. (a -> b) -> a -> b
$ (Param LetDecMem -> VName) -> [Param LetDecMem] -> [VName]
forall a b. (a -> b) -> [a] -> [b]
map Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName ([Param LetDecMem] -> [VName]) -> [Param LetDecMem] -> [VName]
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam
[(VName, SubExp)]
-> ((VName, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName] -> [SubExp] -> [(VName, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
y_params [SubExp]
vs) (((VName, SubExp) -> InKernelGen ()) -> InKernelGen ())
-> ((VName, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(VName
yp, SubExp
v) -> VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM VName
yp [] SubExp
v []
AtomicBinOp
atomics <- KernelEnv -> AtomicBinOp
kernelAtomics (KernelEnv -> AtomicBinOp)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp AtomicBinOp
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
case AtomicBinOp -> Lambda GPUMem -> AtomicUpdate GPUMem KernelEnv
atomicUpdateLocking AtomicBinOp
atomics Lambda GPUMem
lam of
AtomicPrim DoAtomicUpdate GPUMem KernelEnv
f -> DoAtomicUpdate GPUMem KernelEnv
f Space
space [VName]
arrs [TExp Int64]
is'
AtomicCAS DoAtomicUpdate GPUMem KernelEnv
f -> DoAtomicUpdate GPUMem KernelEnv
f Space
space [VName]
arrs [TExp Int64]
is'
AtomicLocking Locking -> DoAtomicUpdate GPUMem KernelEnv
f -> do
Maybe Locks
c_locks <- VName -> Map VName Locks -> Maybe Locks
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup VName
c (Map VName Locks -> Maybe Locks)
-> (KernelEnv -> Map VName Locks) -> KernelEnv -> Maybe Locks
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> Map VName Locks
kernelLocks (KernelEnv -> Maybe Locks)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp (Maybe Locks)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
case Maybe Locks
c_locks of
Just (Locks VName
locks Int
num_locks) -> do
let locking :: Locking
locking =
VName
-> TExp Int32
-> TExp Int32
-> TExp Int32
-> ([TExp Int64] -> [TExp Int64])
-> Locking
Locking VName
locks TExp Int32
0 TExp Int32
1 TExp Int32
0 (([TExp Int64] -> [TExp Int64]) -> Locking)
-> ([TExp Int64] -> [TExp Int64]) -> Locking
forall a b. (a -> b) -> a -> b
$
TExp Int64 -> [TExp Int64]
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TExp Int64 -> [TExp Int64])
-> ([TExp Int64] -> TExp Int64) -> [TExp Int64] -> [TExp Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`rem` Int -> TExp Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
num_locks) (TExp Int64 -> TExp Int64)
-> ([TExp Int64] -> TExp Int64) -> [TExp Int64] -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [TExp Int64] -> [TExp Int64] -> TExp Int64
forall num. IntegralExp num => [num] -> [num] -> num
flattenIndex [TExp Int64]
dims
Locking -> DoAtomicUpdate GPUMem KernelEnv
f Locking
locking Space
space [VName]
arrs [TExp Int64]
is'
Maybe Locks
Nothing ->
String -> InKernelGen ()
forall a. HasCallStack => String -> a
error (String -> InKernelGen ()) -> String -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ String
"Missing locks for " String -> String -> String
forall a. [a] -> [a] -> [a]
++ VName -> String
forall a. Pretty a => a -> String
pretty VName
acc
compileThreadExp :: ExpCompiler GPUMem KernelEnv Imp.KernelOp
compileThreadExp :: ExpCompiler GPUMem KernelEnv KernelOp
compileThreadExp (Pat [PatElemT (LetDec GPUMem)
pe]) (BasicOp (Opaque OpaqueOp
_ SubExp
se)) =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [] SubExp
se []
compileThreadExp (Pat [PatElemT (LetDec GPUMem)
dest]) (BasicOp (ArrayLit [SubExp]
es Type
_)) =
[(Int64, SubExp)]
-> ((Int64, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Int64] -> [SubExp] -> [(Int64, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int64
0 ..] [SubExp]
es) (((Int64, SubExp) -> InKernelGen ()) -> InKernelGen ())
-> ((Int64, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Int64
i, SubExp
e) ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
dest) [Int64 -> TExp Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64
i :: Int64)] SubExp
e []
compileThreadExp Pat GPUMem
_ (BasicOp (UpdateAcc VName
acc [SubExp]
is [SubExp]
vs)) =
VName -> [SubExp] -> [SubExp] -> InKernelGen ()
updateAcc VName
acc [SubExp]
is [SubExp]
vs
compileThreadExp Pat GPUMem
dest Exp GPUMem
e =
ExpCompiler GPUMem KernelEnv KernelOp
forall rep inner r op.
Mem rep inner =>
Pat rep -> Exp rep -> ImpM rep r op ()
defCompileExp Pat GPUMem
dest Exp GPUMem
e
kernelLoop ::
IntExp t =>
Imp.TExp t ->
Imp.TExp t ->
Imp.TExp t ->
(Imp.TExp t -> InKernelGen ()) ->
InKernelGen ()
kernelLoop :: TExp t
-> TExp t -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen ()
kernelLoop TExp t
tid TExp t
num_threads TExp t
n TExp t -> InKernelGen ()
f =
Operations GPUMem KernelEnv KernelOp
-> InKernelGen () -> InKernelGen ()
forall rep r op a.
Operations rep r op -> ImpM rep r op a -> ImpM rep r op a
localOps Operations GPUMem KernelEnv KernelOp
threadOperations (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
if TExp t
n TExp t -> TExp t -> Bool
forall a. Eq a => a -> a -> Bool
== TExp t
num_threads
then TExp t -> InKernelGen ()
f TExp t
tid
else do
let elems_for_this :: TExp t
elems_for_this = (TExp t
n TExp t -> TExp t -> TExp t
forall a. Num a => a -> a -> a
- TExp t
tid) TExp t -> TExp t -> TExp t
forall e. IntegralExp e => e -> e -> e
`divUp` TExp t
num_threads
String -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen ()
forall t rep r op.
String
-> TExp t -> (TExp t -> ImpM rep r op ()) -> ImpM rep r op ()
sFor String
"i" TExp t
elems_for_this ((TExp t -> InKernelGen ()) -> InKernelGen ())
-> (TExp t -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \TExp t
i -> TExp t -> InKernelGen ()
f (TExp t -> InKernelGen ()) -> TExp t -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ TExp t
i TExp t -> TExp t -> TExp t
forall a. Num a => a -> a -> a
* TExp t
num_threads TExp t -> TExp t -> TExp t
forall a. Num a => a -> a -> a
+ TExp t
tid
groupLoop ::
Imp.TExp Int64 ->
(Imp.TExp Int64 -> InKernelGen ()) ->
InKernelGen ()
groupLoop :: TExp Int64 -> (TExp Int64 -> InKernelGen ()) -> InKernelGen ()
groupLoop TExp Int64
n TExp Int64 -> InKernelGen ()
f = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
TExp Int64
-> TExp Int64
-> TExp Int64
-> (TExp Int64 -> InKernelGen ())
-> InKernelGen ()
forall t.
IntExp t =>
TExp t
-> TExp t -> TExp t -> (TExp t -> InKernelGen ()) -> InKernelGen ()
kernelLoop
(TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64) -> TExp Int32 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int32
kernelLocalThreadId KernelConstants
constants)
(KernelConstants -> TExp Int64
kernelGroupSize KernelConstants
constants)
TExp Int64
n
TExp Int64 -> InKernelGen ()
f
groupCoverSpace ::
[Imp.TExp Int64] ->
([Imp.TExp Int64] -> InKernelGen ()) ->
InKernelGen ()
groupCoverSpace :: [TExp Int64] -> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
groupCoverSpace [TExp Int64]
ds [TExp Int64] -> InKernelGen ()
f =
TExp Int64 -> (TExp Int64 -> InKernelGen ()) -> InKernelGen ()
groupLoop ([TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
ds) ((TExp Int64 -> InKernelGen ()) -> InKernelGen ())
-> (TExp Int64 -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> InKernelGen ()
f ([TExp Int64] -> InKernelGen ())
-> (TExp Int64 -> [TExp Int64]) -> TExp Int64 -> InKernelGen ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [TExp Int64] -> TExp Int64 -> [TExp Int64]
forall num. IntegralExp num => [num] -> num -> [num]
unflattenIndex [TExp Int64]
ds
compileGroupExp :: ExpCompiler GPUMem KernelEnv Imp.KernelOp
compileGroupExp :: ExpCompiler GPUMem KernelEnv KernelOp
compileGroupExp (Pat [PatElemT (LetDec GPUMem)
pe]) (BasicOp (Opaque OpaqueOp
_ SubExp
se)) =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [] SubExp
se []
compileGroupExp (Pat [PatElemT (LetDec GPUMem)
dest]) (BasicOp (ArrayLit [SubExp]
es Type
_)) =
[(Int64, SubExp)]
-> ((Int64, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Int64] -> [SubExp] -> [(Int64, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int64
0 ..] [SubExp]
es) (((Int64, SubExp) -> InKernelGen ()) -> InKernelGen ())
-> ((Int64, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Int64
i, SubExp
e) ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
dest) [Int64 -> TExp Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64
i :: Int64)] SubExp
e []
compileGroupExp Pat GPUMem
_ (BasicOp (UpdateAcc VName
acc [SubExp]
is [SubExp]
vs)) =
VName -> [SubExp] -> [SubExp] -> InKernelGen ()
updateAcc VName
acc [SubExp]
is [SubExp]
vs
compileGroupExp (Pat [PatElemT (LetDec GPUMem)
dest]) (BasicOp (Replicate Shape
ds SubExp
se)) = do
let ds' :: [TExp Int64]
ds' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp ([SubExp] -> [TExp Int64]) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> a -> b
$ Shape -> [SubExp]
forall d. ShapeBase d -> [d]
shapeDims Shape
ds
[TExp Int64] -> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
groupCoverSpace [TExp Int64]
ds' (([TExp Int64] -> InKernelGen ()) -> InKernelGen ())
-> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
is ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
dest) [TExp Int64]
is SubExp
se (Int -> [TExp Int64] -> [TExp Int64]
forall a. Int -> [a] -> [a]
drop (Shape -> Int
forall a. ArrayShape a => a -> Int
shapeRank Shape
ds) [TExp Int64]
is)
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
compileGroupExp (Pat [PatElemT (LetDec GPUMem)
dest]) (BasicOp (Iota SubExp
n SubExp
e SubExp
s IntType
it)) = do
PrimExp ExpLeaf
n' <- SubExp -> ImpM GPUMem KernelEnv KernelOp (PrimExp ExpLeaf)
forall a rep r op. ToExp a => a -> ImpM rep r op (PrimExp ExpLeaf)
toExp SubExp
n
PrimExp ExpLeaf
e' <- SubExp -> ImpM GPUMem KernelEnv KernelOp (PrimExp ExpLeaf)
forall a rep r op. ToExp a => a -> ImpM rep r op (PrimExp ExpLeaf)
toExp SubExp
e
PrimExp ExpLeaf
s' <- SubExp -> ImpM GPUMem KernelEnv KernelOp (PrimExp ExpLeaf)
forall a rep r op. ToExp a => a -> ImpM rep r op (PrimExp ExpLeaf)
toExp SubExp
s
TExp Int64 -> (TExp Int64 -> InKernelGen ()) -> InKernelGen ()
groupLoop (PrimExp ExpLeaf -> TExp Int64
forall t v. PrimExp v -> TPrimExp t v
TPrimExp PrimExp ExpLeaf
n') ((TExp Int64 -> InKernelGen ()) -> InKernelGen ())
-> (TExp Int64 -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i' -> do
TV Any
x <-
String -> TExp Any -> ImpM GPUMem KernelEnv KernelOp (TV Any)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"x" (TExp Any -> ImpM GPUMem KernelEnv KernelOp (TV Any))
-> TExp Any -> ImpM GPUMem KernelEnv KernelOp (TV Any)
forall a b. (a -> b) -> a -> b
$
PrimExp ExpLeaf -> TExp Any
forall t v. PrimExp v -> TPrimExp t v
TPrimExp (PrimExp ExpLeaf -> TExp Any) -> PrimExp ExpLeaf -> TExp Any
forall a b. (a -> b) -> a -> b
$
BinOp -> PrimExp ExpLeaf -> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall v. BinOp -> PrimExp v -> PrimExp v -> PrimExp v
BinOpExp (IntType -> Overflow -> BinOp
Add IntType
it Overflow
OverflowUndef) PrimExp ExpLeaf
e' (PrimExp ExpLeaf -> PrimExp ExpLeaf)
-> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$
BinOp -> PrimExp ExpLeaf -> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall v. BinOp -> PrimExp v -> PrimExp v -> PrimExp v
BinOpExp (IntType -> Overflow -> BinOp
Mul IntType
it Overflow
OverflowUndef) (TExp Int64 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped TExp Int64
i') PrimExp ExpLeaf
s'
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
dest) [TExp Int64
i'] (VName -> SubExp
Var (TV Any -> VName
forall t. TV t -> VName
tvVar TV Any
x)) []
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
compileGroupExp (Pat [PatElemT (LetDec GPUMem)
pe]) (BasicOp (Update Safety
safety VName
_ Slice SubExp
slice SubExp
se))
| [SubExp] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([SubExp] -> Bool) -> [SubExp] -> Bool
forall a b. (a -> b) -> a -> b
$ Slice SubExp -> [SubExp]
forall d. Slice d -> [d]
sliceDims Slice SubExp
slice = do
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
TExp Int32
ltid <- KernelConstants -> TExp Int32
kernelLocalThreadId (KernelConstants -> TExp Int32)
-> (KernelEnv -> KernelConstants) -> KernelEnv -> TExp Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> TExp Int32)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp (TExp Int32)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Int32
ltid TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
case Safety
safety of
Safety
Unsafe -> InKernelGen ()
write
Safety
Safe -> TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (Slice (TExp Int64) -> [TExp Int64] -> TExp Bool
inBounds Slice (TExp Int64)
slice' [TExp Int64]
dims) InKernelGen ()
write
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
where
slice' :: Slice (TExp Int64)
slice' = (SubExp -> TExp Int64) -> Slice SubExp -> Slice (TExp Int64)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp Slice SubExp
slice
dims :: [TExp Int64]
dims = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp ([SubExp] -> [TExp Int64]) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> a -> b
$ Type -> [SubExp]
forall u. TypeBase Shape u -> [SubExp]
arrayDims (Type -> [SubExp]) -> Type -> [SubExp]
forall a b. (a -> b) -> a -> b
$ PatElemT LetDecMem -> Type
forall dec. Typed dec => PatElemT dec -> Type
patElemType PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe
write :: InKernelGen ()
write = VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) (Slice (TExp Int64) -> [DimIndex (TExp Int64)]
forall d. Slice d -> [DimIndex d]
unSlice Slice (TExp Int64)
slice') SubExp
se []
compileGroupExp Pat GPUMem
dest Exp GPUMem
e =
ExpCompiler GPUMem KernelEnv KernelOp
forall rep inner r op.
Mem rep inner =>
Pat rep -> Exp rep -> ImpM rep r op ()
defCompileExp Pat GPUMem
dest Exp GPUMem
e
sanityCheckLevel :: SegLevel -> InKernelGen ()
sanityCheckLevel :: SegLevel -> InKernelGen ()
sanityCheckLevel SegThread {} = () -> InKernelGen ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
sanityCheckLevel SegGroup {} =
String -> InKernelGen ()
forall a. HasCallStack => String -> a
error String
"compileGroupOp: unexpected group-level SegOp."
localThreadIDs :: [SubExp] -> InKernelGen [Imp.TExp Int64]
localThreadIDs :: [SubExp] -> InKernelGen [TExp Int64]
localThreadIDs [SubExp]
dims = do
TExp Int64
ltid <- TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64)
-> (KernelEnv -> TExp Int32) -> KernelEnv -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelConstants -> TExp Int32
kernelLocalThreadId (KernelConstants -> TExp Int32)
-> (KernelEnv -> KernelConstants) -> KernelEnv -> TExp Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> TExp Int64)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp (TExp Int64)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
let dims' :: [TExp Int64]
dims' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
dims
[TExp Int64]
-> ([TExp Int32] -> [TExp Int64])
-> Maybe [TExp Int32]
-> [TExp Int64]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ([TExp Int64] -> TExp Int64 -> [TExp Int64]
forall num. IntegralExp num => [num] -> num -> [num]
unflattenIndex [TExp Int64]
dims' TExp Int64
ltid) ((TExp Int32 -> TExp Int64) -> [TExp Int32] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64)
(Maybe [TExp Int32] -> [TExp Int64])
-> (KernelEnv -> Maybe [TExp Int32]) -> KernelEnv -> [TExp Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [SubExp] -> Map [SubExp] [TExp Int32] -> Maybe [TExp Int32]
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup [SubExp]
dims
(Map [SubExp] [TExp Int32] -> Maybe [TExp Int32])
-> (KernelEnv -> Map [SubExp] [TExp Int32])
-> KernelEnv
-> Maybe [TExp Int32]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelConstants -> Map [SubExp] [TExp Int32]
kernelLocalIdMap
(KernelConstants -> Map [SubExp] [TExp Int32])
-> (KernelEnv -> KernelConstants)
-> KernelEnv
-> Map [SubExp] [TExp Int32]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> KernelConstants
kernelConstants
(KernelEnv -> [TExp Int64])
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> InKernelGen [TExp Int64]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
compileGroupSpace :: SegLevel -> SegSpace -> InKernelGen ()
compileGroupSpace :: SegLevel -> SegSpace -> InKernelGen ()
compileGroupSpace SegLevel
lvl SegSpace
space = do
SegLevel -> InKernelGen ()
sanityCheckLevel SegLevel
lvl
let ([VName]
ltids, [SubExp]
dims) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
(VName -> TExp Int64 -> InKernelGen ())
-> [VName] -> [TExp Int64] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ VName -> TExp Int64 -> InKernelGen ()
forall t rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ [VName]
ltids ([TExp Int64] -> InKernelGen ())
-> InKernelGen [TExp Int64] -> InKernelGen ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< [SubExp] -> InKernelGen [TExp Int64]
localThreadIDs [SubExp]
dims
TExp Int32
ltid <- KernelConstants -> TExp Int32
kernelLocalThreadId (KernelConstants -> TExp Int32)
-> (KernelEnv -> KernelConstants) -> KernelEnv -> TExp Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> TExp Int32)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp (TExp Int32)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
VName -> TExp Int32 -> InKernelGen ()
forall t rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ (SegSpace -> VName
segFlat SegSpace
space) TExp Int32
ltid
prepareIntraGroupSegHist ::
Count GroupSize SubExp ->
[HistOp GPUMem] ->
InKernelGen [[Imp.TExp Int64] -> InKernelGen ()]
prepareIntraGroupSegHist :: Count GroupSize SubExp
-> [HistOp GPUMem] -> InKernelGen [[TExp Int64] -> InKernelGen ()]
prepareIntraGroupSegHist Count GroupSize SubExp
group_size =
((Maybe Locking, [[TExp Int64] -> InKernelGen ()])
-> [[TExp Int64] -> InKernelGen ()])
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [[TExp Int64] -> InKernelGen ()])
-> InKernelGen [[TExp Int64] -> InKernelGen ()]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Maybe Locking, [[TExp Int64] -> InKernelGen ()])
-> [[TExp Int64] -> InKernelGen ()]
forall a b. (a, b) -> b
snd (ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [[TExp Int64] -> InKernelGen ()])
-> InKernelGen [[TExp Int64] -> InKernelGen ()])
-> ([HistOp GPUMem]
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [[TExp Int64] -> InKernelGen ()]))
-> [HistOp GPUMem]
-> InKernelGen [[TExp Int64] -> InKernelGen ()]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe Locking
-> HistOp GPUMem
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [TExp Int64] -> InKernelGen ()))
-> Maybe Locking
-> [HistOp GPUMem]
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [[TExp Int64] -> InKernelGen ()])
forall (m :: * -> *) acc x y.
Monad m =>
(acc -> x -> m (acc, y)) -> acc -> [x] -> m (acc, [y])
mapAccumLM Maybe Locking
-> HistOp GPUMem
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [TExp Int64] -> InKernelGen ())
onOp Maybe Locking
forall a. Maybe a
Nothing
where
onOp :: Maybe Locking
-> HistOp GPUMem
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [TExp Int64] -> InKernelGen ())
onOp Maybe Locking
l HistOp GPUMem
op = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
AtomicBinOp
atomicBinOp <- KernelEnv -> AtomicBinOp
kernelAtomics (KernelEnv -> AtomicBinOp)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp AtomicBinOp
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
let local_subhistos :: [VName]
local_subhistos = HistOp GPUMem -> [VName]
forall rep. HistOp rep -> [VName]
histDest HistOp GPUMem
op
case (Maybe Locking
l, AtomicBinOp -> Lambda GPUMem -> AtomicUpdate GPUMem KernelEnv
atomicUpdateLocking AtomicBinOp
atomicBinOp (Lambda GPUMem -> AtomicUpdate GPUMem KernelEnv)
-> Lambda GPUMem -> AtomicUpdate GPUMem KernelEnv
forall a b. (a -> b) -> a -> b
$ HistOp GPUMem -> Lambda GPUMem
forall rep. HistOp rep -> Lambda rep
histOp HistOp GPUMem
op) of
(Maybe Locking
_, AtomicPrim DoAtomicUpdate GPUMem KernelEnv
f) -> (Maybe Locking, [TExp Int64] -> InKernelGen ())
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [TExp Int64] -> InKernelGen ())
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Locking
l, DoAtomicUpdate GPUMem KernelEnv
f (String -> Space
Space String
"local") [VName]
local_subhistos)
(Maybe Locking
_, AtomicCAS DoAtomicUpdate GPUMem KernelEnv
f) -> (Maybe Locking, [TExp Int64] -> InKernelGen ())
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [TExp Int64] -> InKernelGen ())
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Locking
l, DoAtomicUpdate GPUMem KernelEnv
f (String -> Space
Space String
"local") [VName]
local_subhistos)
(Just Locking
l', AtomicLocking Locking -> DoAtomicUpdate GPUMem KernelEnv
f) -> (Maybe Locking, [TExp Int64] -> InKernelGen ())
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [TExp Int64] -> InKernelGen ())
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Locking
l, Locking -> DoAtomicUpdate GPUMem KernelEnv
f Locking
l' (String -> Space
Space String
"local") [VName]
local_subhistos)
(Maybe Locking
Nothing, AtomicLocking Locking -> DoAtomicUpdate GPUMem KernelEnv
f) -> do
VName
locks <- String -> ImpM GPUMem KernelEnv KernelOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"locks"
let num_locks :: TExp Int64
num_locks = SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp (SubExp -> TExp Int64) -> SubExp -> TExp Int64
forall a b. (a -> b) -> a -> b
$ Count GroupSize SubExp -> SubExp
forall u e. Count u e -> e
unCount Count GroupSize SubExp
group_size
dims :: [TExp Int64]
dims = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp ([SubExp] -> [TExp Int64]) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> a -> b
$ Shape -> [SubExp]
forall d. ShapeBase d -> [d]
shapeDims (HistOp GPUMem -> Shape
forall rep. HistOp rep -> Shape
histShape HistOp GPUMem
op) [SubExp] -> [SubExp] -> [SubExp]
forall a. [a] -> [a] -> [a]
++ [HistOp GPUMem -> SubExp
forall rep. HistOp rep -> SubExp
histWidth HistOp GPUMem
op]
l' :: Locking
l' = VName
-> TExp Int32
-> TExp Int32
-> TExp Int32
-> ([TExp Int64] -> [TExp Int64])
-> Locking
Locking VName
locks TExp Int32
0 TExp Int32
1 TExp Int32
0 (TExp Int64 -> [TExp Int64]
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TExp Int64 -> [TExp Int64])
-> ([TExp Int64] -> TExp Int64) -> [TExp Int64] -> [TExp Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`rem` TExp Int64
num_locks) (TExp Int64 -> TExp Int64)
-> ([TExp Int64] -> TExp Int64) -> [TExp Int64] -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [TExp Int64] -> [TExp Int64] -> TExp Int64
forall num. IntegralExp num => [num] -> [num] -> num
flattenIndex [TExp Int64]
dims)
locks_t :: Type
locks_t = PrimType -> Shape -> NoUniqueness -> Type
forall shape u. PrimType -> shape -> u -> TypeBase shape u
Array PrimType
int32 ([SubExp] -> Shape
forall d. [d] -> ShapeBase d
Shape [Count GroupSize SubExp -> SubExp
forall u e. Count u e -> e
unCount Count GroupSize SubExp
group_size]) NoUniqueness
NoUniqueness
VName
locks_mem <- String
-> Count Bytes (TExp Int64)
-> Space
-> ImpM GPUMem KernelEnv KernelOp VName
forall rep r op.
String -> Count Bytes (TExp Int64) -> Space -> ImpM rep r op VName
sAlloc String
"locks_mem" (Type -> Count Bytes (TExp Int64)
typeSize Type
locks_t) (Space -> ImpM GPUMem KernelEnv KernelOp VName)
-> Space -> ImpM GPUMem KernelEnv KernelOp VName
forall a b. (a -> b) -> a -> b
$ String -> Space
Space String
"local"
VName -> PrimType -> Shape -> MemBind -> InKernelGen ()
forall rep r op.
VName -> PrimType -> Shape -> MemBind -> ImpM rep r op ()
dArray VName
locks PrimType
int32 (Type -> Shape
forall shape u. ArrayShape shape => TypeBase shape u -> shape
arrayShape Type
locks_t) (MemBind -> InKernelGen ()) -> MemBind -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> IxFun -> MemBind
ArrayIn VName
locks_mem (IxFun -> MemBind) -> IxFun -> MemBind
forall a b. (a -> b) -> a -> b
$
Shape (TPrimExp Int64 VName) -> IxFun
forall num. IntegralExp num => Shape num -> IxFun num
IxFun.iota (Shape (TPrimExp Int64 VName) -> IxFun)
-> Shape (TPrimExp Int64 VName) -> IxFun
forall a b. (a -> b) -> a -> b
$
(SubExp -> TPrimExp Int64 VName)
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TPrimExp Int64 VName
pe64 ([SubExp] -> Shape (TPrimExp Int64 VName))
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> a -> b
$ Type -> [SubExp]
forall u. TypeBase Shape u -> [SubExp]
arrayDims Type
locks_t
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"All locks start out unlocked" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[TExp Int64] -> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
groupCoverSpace [KernelConstants -> TExp Int64
kernelGroupSize KernelConstants
constants] (([TExp Int64] -> InKernelGen ()) -> InKernelGen ())
-> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
is ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
locks [TExp Int64]
is (IntType -> Integer -> SubExp
intConst IntType
Int32 Integer
0) []
(Maybe Locking, [TExp Int64] -> InKernelGen ())
-> ImpM
GPUMem
KernelEnv
KernelOp
(Maybe Locking, [TExp Int64] -> InKernelGen ())
forall (m :: * -> *) a. Monad m => a -> m a
return (Locking -> Maybe Locking
forall a. a -> Maybe a
Just Locking
l', Locking -> DoAtomicUpdate GPUMem KernelEnv
f Locking
l' (String -> Space
Space String
"local") [VName]
local_subhistos)
whenActive :: SegLevel -> SegSpace -> InKernelGen () -> InKernelGen ()
whenActive :: SegLevel -> SegSpace -> InKernelGen () -> InKernelGen ()
whenActive SegLevel
lvl SegSpace
space InKernelGen ()
m
| SegVirt
SegNoVirtFull <- SegLevel -> SegVirt
segVirt SegLevel
lvl = InKernelGen ()
m
| Bool
otherwise = do
TExp Int64
group_size <- KernelConstants -> TExp Int64
kernelGroupSize (KernelConstants -> TExp Int64)
-> (KernelEnv -> KernelConstants) -> KernelEnv -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> TExp Int64)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp (TExp Int64)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
if [TExp Int64
group_size] [TExp Int64] -> [TExp Int64] -> Bool
forall a. Eq a => a -> a -> Bool
== ((VName, SubExp) -> TExp Int64)
-> [(VName, SubExp)] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map (SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp (SubExp -> TExp Int64)
-> ((VName, SubExp) -> SubExp) -> (VName, SubExp) -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VName, SubExp) -> SubExp
forall a b. (a, b) -> b
snd) (SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space)
then InKernelGen ()
m
else TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen ([(VName, SubExp)] -> TExp Bool
isActive ([(VName, SubExp)] -> TExp Bool) -> [(VName, SubExp)] -> TExp Bool
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space) InKernelGen ()
m
compileGroupOp :: OpCompiler GPUMem KernelEnv Imp.KernelOp
compileGroupOp :: OpCompiler GPUMem KernelEnv KernelOp
compileGroupOp Pat GPUMem
pat (Alloc size space) =
Pat GPUMem -> SubExp -> Space -> InKernelGen ()
kernelAlloc Pat GPUMem
pat SubExp
size Space
space
compileGroupOp Pat GPUMem
pat (Inner (SizeOp (SplitSpace o w i elems_per_thread))) =
Pat GPUMem
-> SplitOrdering -> SubExp -> SubExp -> SubExp -> InKernelGen ()
forall w i elems_per_thread rep r op.
(ToExp w, ToExp i, ToExp elems_per_thread) =>
Pat GPUMem
-> SplitOrdering -> w -> i -> elems_per_thread -> ImpM rep r op ()
splitSpace Pat GPUMem
pat SplitOrdering
o SubExp
w SubExp
i SubExp
elems_per_thread
compileGroupOp Pat GPUMem
pat (Inner (SegOp (SegMap lvl space _ body))) = do
InKernelGen () -> InKernelGen ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ SegLevel -> SegSpace -> InKernelGen ()
compileGroupSpace SegLevel
lvl SegSpace
space
SegLevel -> SegSpace -> InKernelGen () -> InKernelGen ()
whenActive SegLevel
lvl SegSpace
space (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Operations GPUMem KernelEnv KernelOp
-> InKernelGen () -> InKernelGen ()
forall rep r op a.
Operations rep r op -> ImpM rep r op a -> ImpM rep r op a
localOps Operations GPUMem KernelEnv KernelOp
threadOperations (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (KernelBody GPUMem -> Stms GPUMem
forall rep. KernelBody rep -> Stms rep
kernelBodyStms KernelBody GPUMem
body) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
(PatElemT LetDecMem -> KernelResult -> InKernelGen ())
-> [PatElemT LetDecMem] -> [KernelResult] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (SegSpace
-> PatElemT (LetDec GPUMem) -> KernelResult -> InKernelGen ()
compileThreadResult SegSpace
space) (PatT LetDecMem -> [PatElemT LetDecMem]
forall dec. PatT dec -> [PatElemT dec]
patElems Pat GPUMem
PatT LetDecMem
pat) ([KernelResult] -> InKernelGen ())
-> [KernelResult] -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
KernelBody GPUMem -> [KernelResult]
forall rep. KernelBody rep -> [KernelResult]
kernelBodyResult KernelBody GPUMem
body
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.ErrorSync Fence
Imp.FenceLocal
compileGroupOp Pat GPUMem
pat (Inner (SegOp (SegScan lvl space scans _ body))) = do
SegLevel -> SegSpace -> InKernelGen ()
compileGroupSpace SegLevel
lvl SegSpace
space
let ([VName]
ltids, [SubExp]
dims) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
dims' :: [TExp Int64]
dims' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
dims
SegLevel -> SegSpace -> InKernelGen () -> InKernelGen ()
whenActive SegLevel
lvl SegSpace
space (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (KernelBody GPUMem -> Stms GPUMem
forall rep. KernelBody rep -> Stms rep
kernelBodyStms KernelBody GPUMem
body) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[(VName, KernelResult)]
-> ((VName, KernelResult) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName] -> [KernelResult] -> [(VName, KernelResult)]
forall a b. [a] -> [b] -> [(a, b)]
zip (PatT LetDecMem -> [VName]
forall dec. PatT dec -> [VName]
patNames Pat GPUMem
PatT LetDecMem
pat) ([KernelResult] -> [(VName, KernelResult)])
-> [KernelResult] -> [(VName, KernelResult)]
forall a b. (a -> b) -> a -> b
$ KernelBody GPUMem -> [KernelResult]
forall rep. KernelBody rep -> [KernelResult]
kernelBodyResult KernelBody GPUMem
body) (((VName, KernelResult) -> InKernelGen ()) -> InKernelGen ())
-> ((VName, KernelResult) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(VName
dest, KernelResult
res) ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix
VName
dest
((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
Imp.vi64 [VName]
ltids)
(KernelResult -> SubExp
kernelResultSubExp KernelResult
res)
[]
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.ErrorSync Fence
Imp.FenceLocal
let segment_size :: TExp Int64
segment_size = [TExp Int64] -> TExp Int64
forall a. [a] -> a
last [TExp Int64]
dims'
crossesSegment :: TExp Int32 -> TExp Int32 -> TExp Bool
crossesSegment TExp Int32
from TExp Int32
to =
(TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
to TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
from) TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.>. (TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
to TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`rem` TExp Int64
segment_size)
TV Int64
dims_flat <- String -> TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TV Int64)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"dims_flat" (TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TV Int64))
-> TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TV Int64)
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
dims'
let flattened :: PatElemT LetDecMem -> ImpM GPUMem KernelEnv KernelOp VName
flattened PatElemT LetDecMem
pe = do
MemLoc VName
mem [SubExp]
_ IxFun (TExp Int64)
_ <-
ArrayEntry -> MemLoc
entryArrayLoc (ArrayEntry -> MemLoc)
-> ImpM GPUMem KernelEnv KernelOp ArrayEntry
-> ImpM GPUMem KernelEnv KernelOp MemLoc
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem KernelEnv KernelOp ArrayEntry
forall rep r op. VName -> ImpM rep r op ArrayEntry
lookupArray (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT LetDecMem
pe)
let pe_t :: Type
pe_t = PatElemT LetDecMem -> Type
forall t. Typed t => t -> Type
typeOf PatElemT LetDecMem
pe
arr_dims :: [SubExp]
arr_dims = VName -> SubExp
Var (TV Int64 -> VName
forall t. TV t -> VName
tvVar TV Int64
dims_flat) SubExp -> [SubExp] -> [SubExp]
forall a. a -> [a] -> [a]
: Int -> [SubExp] -> [SubExp]
forall a. Int -> [a] -> [a]
drop ([TExp Int64] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [TExp Int64]
dims') (Type -> [SubExp]
forall u. TypeBase Shape u -> [SubExp]
arrayDims Type
pe_t)
String
-> PrimType
-> Shape
-> MemBind
-> ImpM GPUMem KernelEnv KernelOp VName
forall rep r op.
String -> PrimType -> Shape -> MemBind -> ImpM rep r op VName
sArray
(VName -> String
baseString (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT LetDecMem
pe) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"_flat")
(Type -> PrimType
forall shape u. TypeBase shape u -> PrimType
elemType Type
pe_t)
([SubExp] -> Shape
forall d. [d] -> ShapeBase d
Shape [SubExp]
arr_dims)
(MemBind -> ImpM GPUMem KernelEnv KernelOp VName)
-> MemBind -> ImpM GPUMem KernelEnv KernelOp VName
forall a b. (a -> b) -> a -> b
$ VName -> IxFun -> MemBind
ArrayIn VName
mem (IxFun -> MemBind) -> IxFun -> MemBind
forall a b. (a -> b) -> a -> b
$ Shape (TPrimExp Int64 VName) -> IxFun
forall num. IntegralExp num => Shape num -> IxFun num
IxFun.iota (Shape (TPrimExp Int64 VName) -> IxFun)
-> Shape (TPrimExp Int64 VName) -> IxFun
forall a b. (a -> b) -> a -> b
$ (SubExp -> TPrimExp Int64 VName)
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TPrimExp Int64 VName
pe64 [SubExp]
arr_dims
num_scan_results :: Int
num_scan_results = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (SegBinOp GPUMem -> Int) -> [SegBinOp GPUMem] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ([SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([SubExp] -> Int)
-> (SegBinOp GPUMem -> [SubExp]) -> SegBinOp GPUMem -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp GPUMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral) [SegBinOp GPUMem]
scans
[VName]
arrs_flat <- (PatElemT LetDecMem -> ImpM GPUMem KernelEnv KernelOp VName)
-> [PatElemT LetDecMem] -> ImpM GPUMem KernelEnv KernelOp [VName]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM PatElemT LetDecMem -> ImpM GPUMem KernelEnv KernelOp VName
flattened ([PatElemT LetDecMem] -> ImpM GPUMem KernelEnv KernelOp [VName])
-> [PatElemT LetDecMem] -> ImpM GPUMem KernelEnv KernelOp [VName]
forall a b. (a -> b) -> a -> b
$ Int -> [PatElemT LetDecMem] -> [PatElemT LetDecMem]
forall a. Int -> [a] -> [a]
take Int
num_scan_results ([PatElemT LetDecMem] -> [PatElemT LetDecMem])
-> [PatElemT LetDecMem] -> [PatElemT LetDecMem]
forall a b. (a -> b) -> a -> b
$ PatT LetDecMem -> [PatElemT LetDecMem]
forall dec. PatT dec -> [PatElemT dec]
patElems Pat GPUMem
PatT LetDecMem
pat
[SegBinOp GPUMem]
-> (SegBinOp GPUMem -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [SegBinOp GPUMem]
scans ((SegBinOp GPUMem -> InKernelGen ()) -> InKernelGen ())
-> (SegBinOp GPUMem -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \SegBinOp GPUMem
scan -> do
let scan_op :: Lambda GPUMem
scan_op = SegBinOp GPUMem -> Lambda GPUMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp GPUMem
scan
Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
-> TExp Int64
-> TExp Int64
-> Lambda GPUMem
-> [VName]
-> InKernelGen ()
groupScan ((TExp Int32 -> TExp Int32 -> TExp Bool)
-> Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
forall a. a -> Maybe a
Just TExp Int32 -> TExp Int32 -> TExp Bool
crossesSegment) ([TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
dims') ([TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
dims') Lambda GPUMem
scan_op [VName]
arrs_flat
compileGroupOp Pat GPUMem
pat (Inner (SegOp (SegRed lvl space ops _ body))) = do
SegLevel -> SegSpace -> InKernelGen ()
compileGroupSpace SegLevel
lvl SegSpace
space
let ([VName]
ltids, [SubExp]
dims) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
([PatElemT LetDecMem]
red_pes, [PatElemT LetDecMem]
map_pes) =
Int
-> [PatElemT LetDecMem]
-> ([PatElemT LetDecMem], [PatElemT LetDecMem])
forall a. Int -> [a] -> ([a], [a])
splitAt ([SegBinOp GPUMem] -> Int
forall rep. [SegBinOp rep] -> Int
segBinOpResults [SegBinOp GPUMem]
ops) ([PatElemT LetDecMem]
-> ([PatElemT LetDecMem], [PatElemT LetDecMem]))
-> [PatElemT LetDecMem]
-> ([PatElemT LetDecMem], [PatElemT LetDecMem])
forall a b. (a -> b) -> a -> b
$ PatT LetDecMem -> [PatElemT LetDecMem]
forall dec. PatT dec -> [PatElemT dec]
patElems Pat GPUMem
PatT LetDecMem
pat
dims' :: [TExp Int64]
dims' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
dims
mkTempArr :: Type -> ImpM GPUMem KernelEnv KernelOp VName
mkTempArr Type
t =
String
-> PrimType
-> Shape
-> Space
-> ImpM GPUMem KernelEnv KernelOp VName
forall rep r op.
String -> PrimType -> Shape -> Space -> ImpM rep r op VName
sAllocArray String
"red_arr" (Type -> PrimType
forall shape u. TypeBase shape u -> PrimType
elemType Type
t) ([SubExp] -> Shape
forall d. [d] -> ShapeBase d
Shape [SubExp]
dims Shape -> Shape -> Shape
forall a. Semigroup a => a -> a -> a
<> Type -> Shape
forall shape u. ArrayShape shape => TypeBase shape u -> shape
arrayShape Type
t) (Space -> ImpM GPUMem KernelEnv KernelOp VName)
-> Space -> ImpM GPUMem KernelEnv KernelOp VName
forall a b. (a -> b) -> a -> b
$ String -> Space
Space String
"local"
[VName]
tmp_arrs <- (Type -> ImpM GPUMem KernelEnv KernelOp VName)
-> [Type] -> ImpM GPUMem KernelEnv KernelOp [VName]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Type -> ImpM GPUMem KernelEnv KernelOp VName
mkTempArr ([Type] -> ImpM GPUMem KernelEnv KernelOp [VName])
-> [Type] -> ImpM GPUMem KernelEnv KernelOp [VName]
forall a b. (a -> b) -> a -> b
$ (SegBinOp GPUMem -> [Type]) -> [SegBinOp GPUMem] -> [Type]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Lambda GPUMem -> [Type]
forall rep. LambdaT rep -> [Type]
lambdaReturnType (Lambda GPUMem -> [Type])
-> (SegBinOp GPUMem -> Lambda GPUMem) -> SegBinOp GPUMem -> [Type]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp GPUMem -> Lambda GPUMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda) [SegBinOp GPUMem]
ops
let tmps_for_ops :: [[VName]]
tmps_for_ops = [Int] -> [VName] -> [[VName]]
forall a. [Int] -> [a] -> [[a]]
chunks ((SegBinOp GPUMem -> Int) -> [SegBinOp GPUMem] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ([SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([SubExp] -> Int)
-> (SegBinOp GPUMem -> [SubExp]) -> SegBinOp GPUMem -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp GPUMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral) [SegBinOp GPUMem]
ops) [VName]
tmp_arrs
SegLevel -> SegSpace -> InKernelGen () -> InKernelGen ()
whenActive SegLevel
lvl SegSpace
space (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (KernelBody GPUMem -> Stms GPUMem
forall rep. KernelBody rep -> Stms rep
kernelBodyStms KernelBody GPUMem
body) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
let ([KernelResult]
red_res, [KernelResult]
map_res) =
Int -> [KernelResult] -> ([KernelResult], [KernelResult])
forall a. Int -> [a] -> ([a], [a])
splitAt ([SegBinOp GPUMem] -> Int
forall rep. [SegBinOp rep] -> Int
segBinOpResults [SegBinOp GPUMem]
ops) ([KernelResult] -> ([KernelResult], [KernelResult]))
-> [KernelResult] -> ([KernelResult], [KernelResult])
forall a b. (a -> b) -> a -> b
$ KernelBody GPUMem -> [KernelResult]
forall rep. KernelBody rep -> [KernelResult]
kernelBodyResult KernelBody GPUMem
body
[(VName, KernelResult)]
-> ((VName, KernelResult) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName] -> [KernelResult] -> [(VName, KernelResult)]
forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
tmp_arrs [KernelResult]
red_res) (((VName, KernelResult) -> InKernelGen ()) -> InKernelGen ())
-> ((VName, KernelResult) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(VName
dest, KernelResult
res) ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
dest ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
Imp.vi64 [VName]
ltids) (KernelResult -> SubExp
kernelResultSubExp KernelResult
res) []
(PatElemT LetDecMem -> KernelResult -> InKernelGen ())
-> [PatElemT LetDecMem] -> [KernelResult] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (SegSpace
-> PatElemT (LetDec GPUMem) -> KernelResult -> InKernelGen ()
compileThreadResult SegSpace
space) [PatElemT LetDecMem]
map_pes [KernelResult]
map_res
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.ErrorSync Fence
Imp.FenceLocal
case [TExp Int64]
dims' of
[TExp Int64
dim'] -> do
[(SegBinOp GPUMem, [VName])]
-> ((SegBinOp GPUMem, [VName]) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([SegBinOp GPUMem] -> [[VName]] -> [(SegBinOp GPUMem, [VName])]
forall a b. [a] -> [b] -> [(a, b)]
zip [SegBinOp GPUMem]
ops [[VName]]
tmps_for_ops) (((SegBinOp GPUMem, [VName]) -> InKernelGen ()) -> InKernelGen ())
-> ((SegBinOp GPUMem, [VName]) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(SegBinOp GPUMem
op, [VName]
tmps) ->
TExp Int32 -> Lambda GPUMem -> [VName] -> InKernelGen ()
groupReduce (TExp Int64 -> TExp Int32
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 TExp Int64
dim') (SegBinOp GPUMem -> Lambda GPUMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp GPUMem
op) [VName]
tmps
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.ErrorSync Fence
Imp.FenceLocal
[(PatElemT LetDecMem, VName)]
-> ((PatElemT LetDecMem, VName) -> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([PatElemT LetDecMem] -> [VName] -> [(PatElemT LetDecMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip [PatElemT LetDecMem]
red_pes [VName]
tmp_arrs) (((PatElemT LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ())
-> ((PatElemT LetDecMem, VName) -> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(PatElemT LetDecMem
pe, VName
arr) ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT LetDecMem
pe) [] (VName -> SubExp
Var VName
arr) [TExp Int64
0]
[TExp Int64]
_ -> do
TV Int64
dims_flat <- String -> TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TV Int64)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"dims_flat" (TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TV Int64))
-> TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TV Int64)
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
dims'
let flatten :: VName -> ImpM GPUMem KernelEnv KernelOp VName
flatten VName
arr = do
ArrayEntry MemLoc
arr_loc PrimType
pt <- VName -> ImpM GPUMem KernelEnv KernelOp ArrayEntry
forall rep r op. VName -> ImpM rep r op ArrayEntry
lookupArray VName
arr
let flat_shape :: Shape
flat_shape =
[SubExp] -> Shape
forall d. [d] -> ShapeBase d
Shape ([SubExp] -> Shape) -> [SubExp] -> Shape
forall a b. (a -> b) -> a -> b
$
VName -> SubExp
Var (TV Int64 -> VName
forall t. TV t -> VName
tvVar TV Int64
dims_flat) SubExp -> [SubExp] -> [SubExp]
forall a. a -> [a] -> [a]
:
Int -> [SubExp] -> [SubExp]
forall a. Int -> [a] -> [a]
drop ([VName] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [VName]
ltids) (MemLoc -> [SubExp]
memLocShape MemLoc
arr_loc)
String
-> PrimType
-> Shape
-> MemBind
-> ImpM GPUMem KernelEnv KernelOp VName
forall rep r op.
String -> PrimType -> Shape -> MemBind -> ImpM rep r op VName
sArray String
"red_arr_flat" PrimType
pt Shape
flat_shape (MemBind -> ImpM GPUMem KernelEnv KernelOp VName)
-> MemBind -> ImpM GPUMem KernelEnv KernelOp VName
forall a b. (a -> b) -> a -> b
$
VName -> IxFun -> MemBind
ArrayIn (MemLoc -> VName
memLocName MemLoc
arr_loc) (IxFun -> MemBind) -> IxFun -> MemBind
forall a b. (a -> b) -> a -> b
$
Shape (TPrimExp Int64 VName) -> IxFun
forall num. IntegralExp num => Shape num -> IxFun num
IxFun.iota (Shape (TPrimExp Int64 VName) -> IxFun)
-> Shape (TPrimExp Int64 VName) -> IxFun
forall a b. (a -> b) -> a -> b
$ (SubExp -> TPrimExp Int64 VName)
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TPrimExp Int64 VName
pe64 ([SubExp] -> Shape (TPrimExp Int64 VName))
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> a -> b
$ Shape -> [SubExp]
forall d. ShapeBase d -> [d]
shapeDims Shape
flat_shape
let segment_size :: TExp Int64
segment_size = [TExp Int64] -> TExp Int64
forall a. [a] -> a
last [TExp Int64]
dims'
crossesSegment :: TExp Int32 -> TExp Int32 -> TExp Bool
crossesSegment TExp Int32
from TExp Int32
to =
(TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
to TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
from) TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.>. (TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
to TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`rem` TExp Int64 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int64
segment_size)
[(SegBinOp GPUMem, [VName])]
-> ((SegBinOp GPUMem, [VName]) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([SegBinOp GPUMem] -> [[VName]] -> [(SegBinOp GPUMem, [VName])]
forall a b. [a] -> [b] -> [(a, b)]
zip [SegBinOp GPUMem]
ops [[VName]]
tmps_for_ops) (((SegBinOp GPUMem, [VName]) -> InKernelGen ()) -> InKernelGen ())
-> ((SegBinOp GPUMem, [VName]) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(SegBinOp GPUMem
op, [VName]
tmps) -> do
[VName]
tmps_flat <- (VName -> ImpM GPUMem KernelEnv KernelOp VName)
-> [VName] -> ImpM GPUMem KernelEnv KernelOp [VName]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM VName -> ImpM GPUMem KernelEnv KernelOp VName
flatten [VName]
tmps
Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
-> TExp Int64
-> TExp Int64
-> Lambda GPUMem
-> [VName]
-> InKernelGen ()
groupScan
((TExp Int32 -> TExp Int32 -> TExp Bool)
-> Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
forall a. a -> Maybe a
Just TExp Int32 -> TExp Int32 -> TExp Bool
crossesSegment)
([TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
dims')
([TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
dims')
(SegBinOp GPUMem -> Lambda GPUMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp GPUMem
op)
[VName]
tmps_flat
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.ErrorSync Fence
Imp.FenceLocal
[(PatElemT LetDecMem, VName)]
-> ((PatElemT LetDecMem, VName) -> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([PatElemT LetDecMem] -> [VName] -> [(PatElemT LetDecMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip [PatElemT LetDecMem]
red_pes [VName]
tmp_arrs) (((PatElemT LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ())
-> ((PatElemT LetDecMem, VName) -> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(PatElemT LetDecMem
pe, VName
arr) ->
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM
(PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT LetDecMem
pe)
[]
(VName -> SubExp
Var VName
arr)
((TExp Int64 -> DimIndex (TExp Int64))
-> [TExp Int64] -> [DimIndex (TExp Int64)]
forall a b. (a -> b) -> [a] -> [b]
map (TExp Int64 -> TExp Int64 -> DimIndex (TExp Int64)
forall d. Num d => d -> d -> DimIndex d
unitSlice TExp Int64
0) ([TExp Int64] -> [TExp Int64]
forall a. [a] -> [a]
init [TExp Int64]
dims') [DimIndex (TExp Int64)]
-> [DimIndex (TExp Int64)] -> [DimIndex (TExp Int64)]
forall a. [a] -> [a] -> [a]
++ [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64
forall a. [a] -> a
last [TExp Int64]
dims' TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
-TExp Int64
1])
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
compileGroupOp Pat GPUMem
pat (Inner (SegOp (SegHist lvl space ops _ kbody))) = do
SegLevel -> SegSpace -> InKernelGen ()
compileGroupSpace SegLevel
lvl SegSpace
space
let ltids :: [VName]
ltids = ((VName, SubExp) -> VName) -> [(VName, SubExp)] -> [VName]
forall a b. (a -> b) -> [a] -> [b]
map (VName, SubExp) -> VName
forall a b. (a, b) -> a
fst ([(VName, SubExp)] -> [VName]) -> [(VName, SubExp)] -> [VName]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
let num_red_res :: Int
num_red_res = [HistOp GPUMem] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [HistOp GPUMem]
ops Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((HistOp GPUMem -> Int) -> [HistOp GPUMem] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ([SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([SubExp] -> Int)
-> (HistOp GPUMem -> [SubExp]) -> HistOp GPUMem -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HistOp GPUMem -> [SubExp]
forall rep. HistOp rep -> [SubExp]
histNeutral) [HistOp GPUMem]
ops)
([PatElemT LetDecMem]
_red_pes, [PatElemT LetDecMem]
map_pes) =
Int
-> [PatElemT LetDecMem]
-> ([PatElemT LetDecMem], [PatElemT LetDecMem])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
num_red_res ([PatElemT LetDecMem]
-> ([PatElemT LetDecMem], [PatElemT LetDecMem]))
-> [PatElemT LetDecMem]
-> ([PatElemT LetDecMem], [PatElemT LetDecMem])
forall a b. (a -> b) -> a -> b
$ PatT LetDecMem -> [PatElemT LetDecMem]
forall dec. PatT dec -> [PatElemT dec]
patElems Pat GPUMem
PatT LetDecMem
pat
[[TExp Int64] -> InKernelGen ()]
ops' <- Count GroupSize SubExp
-> [HistOp GPUMem] -> InKernelGen [[TExp Int64] -> InKernelGen ()]
prepareIntraGroupSegHist (SegLevel -> Count GroupSize SubExp
segGroupSize SegLevel
lvl) [HistOp GPUMem]
ops
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
SegLevel -> SegSpace -> InKernelGen () -> InKernelGen ()
whenActive SegLevel
lvl SegSpace
space (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (KernelBody GPUMem -> Stms GPUMem
forall rep. KernelBody rep -> Stms rep
kernelBodyStms KernelBody GPUMem
kbody) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
let ([KernelResult]
red_res, [KernelResult]
map_res) = Int -> [KernelResult] -> ([KernelResult], [KernelResult])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
num_red_res ([KernelResult] -> ([KernelResult], [KernelResult]))
-> [KernelResult] -> ([KernelResult], [KernelResult])
forall a b. (a -> b) -> a -> b
$ KernelBody GPUMem -> [KernelResult]
forall rep. KernelBody rep -> [KernelResult]
kernelBodyResult KernelBody GPUMem
kbody
([SubExp]
red_is, [SubExp]
red_vs) = Int -> [SubExp] -> ([SubExp], [SubExp])
forall a. Int -> [a] -> ([a], [a])
splitAt ([HistOp GPUMem] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [HistOp GPUMem]
ops) ([SubExp] -> ([SubExp], [SubExp]))
-> [SubExp] -> ([SubExp], [SubExp])
forall a b. (a -> b) -> a -> b
$ (KernelResult -> SubExp) -> [KernelResult] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map KernelResult -> SubExp
kernelResultSubExp [KernelResult]
red_res
(PatElemT LetDecMem -> KernelResult -> InKernelGen ())
-> [PatElemT LetDecMem] -> [KernelResult] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (SegSpace
-> PatElemT (LetDec GPUMem) -> KernelResult -> InKernelGen ()
compileThreadResult SegSpace
space) [PatElemT LetDecMem]
map_pes [KernelResult]
map_res
let vs_per_op :: [[SubExp]]
vs_per_op = [Int] -> [SubExp] -> [[SubExp]]
forall a. [Int] -> [a] -> [[a]]
chunks ((HistOp GPUMem -> Int) -> [HistOp GPUMem] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ([VName] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([VName] -> Int)
-> (HistOp GPUMem -> [VName]) -> HistOp GPUMem -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HistOp GPUMem -> [VName]
forall rep. HistOp rep -> [VName]
histDest) [HistOp GPUMem]
ops) [SubExp]
red_vs
[(SubExp, [SubExp], [TExp Int64] -> InKernelGen (), HistOp GPUMem)]
-> ((SubExp, [SubExp], [TExp Int64] -> InKernelGen (),
HistOp GPUMem)
-> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([SubExp]
-> [[SubExp]]
-> [[TExp Int64] -> InKernelGen ()]
-> [HistOp GPUMem]
-> [(SubExp, [SubExp], [TExp Int64] -> InKernelGen (),
HistOp GPUMem)]
forall a b c d. [a] -> [b] -> [c] -> [d] -> [(a, b, c, d)]
zip4 [SubExp]
red_is [[SubExp]]
vs_per_op [[TExp Int64] -> InKernelGen ()]
ops' [HistOp GPUMem]
ops) (((SubExp, [SubExp], [TExp Int64] -> InKernelGen (), HistOp GPUMem)
-> InKernelGen ())
-> InKernelGen ())
-> ((SubExp, [SubExp], [TExp Int64] -> InKernelGen (),
HistOp GPUMem)
-> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
\(SubExp
bin, [SubExp]
op_vs, [TExp Int64] -> InKernelGen ()
do_op, HistOp SubExp
dest_w SubExp
_ [VName]
_ [SubExp]
_ Shape
shape Lambda GPUMem
lam) -> do
let bin' :: TExp Int64
bin' = SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
bin
dest_w' :: TExp Int64
dest_w' = SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
dest_w
bin_in_bounds :: TExp Bool
bin_in_bounds = TExp Int64
0 TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<=. TExp Int64
bin' TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Int64
bin' TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int64
dest_w'
bin_is :: [TExp Int64]
bin_is = (VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
Imp.vi64 ([VName] -> [VName]
forall a. [a] -> [a]
init [VName]
ltids) [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a] -> [a]
++ [TExp Int64
bin']
vs_params :: [Param LetDecMem]
vs_params = Int -> [Param LetDecMem] -> [Param LetDecMem]
forall a. Int -> [a] -> [a]
takeLast ([SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SubExp]
op_vs) ([Param LetDecMem] -> [Param LetDecMem])
-> [Param LetDecMem] -> [Param LetDecMem]
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"perform atomic updates" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
bin_in_bounds (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
[LParam GPUMem] -> InKernelGen ()
forall rep inner r op.
Mem rep inner =>
[LParam rep] -> ImpM rep r op ()
dLParams ([LParam GPUMem] -> InKernelGen ())
-> [LParam GPUMem] -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam
Shape -> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
forall rep r op.
Shape -> ([TExp Int64] -> ImpM rep r op ()) -> ImpM rep r op ()
sLoopNest Shape
shape (([TExp Int64] -> InKernelGen ()) -> InKernelGen ())
-> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
is -> do
[(Param LetDecMem, SubExp)]
-> ((Param LetDecMem, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem] -> [SubExp] -> [(Param LetDecMem, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
vs_params [SubExp]
op_vs) (((Param LetDecMem, SubExp) -> InKernelGen ()) -> InKernelGen ())
-> ((Param LetDecMem, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
p, SubExp
v) ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) [] SubExp
v [TExp Int64]
is
[TExp Int64] -> InKernelGen ()
do_op ([TExp Int64]
bin_is [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a] -> [a]
++ [TExp Int64]
is)
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.ErrorSync Fence
Imp.FenceLocal
compileGroupOp Pat GPUMem
pat Op GPUMem
_ =
String -> InKernelGen ()
forall a. String -> a
compilerBugS (String -> InKernelGen ()) -> String -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ String
"compileGroupOp: cannot compile rhs of binding " String -> String -> String
forall a. [a] -> [a] -> [a]
++ PatT LetDecMem -> String
forall a. Pretty a => a -> String
pretty Pat GPUMem
PatT LetDecMem
pat
compileThreadOp :: OpCompiler GPUMem KernelEnv Imp.KernelOp
compileThreadOp :: OpCompiler GPUMem KernelEnv KernelOp
compileThreadOp Pat GPUMem
pat (Alloc size space) =
Pat GPUMem -> SubExp -> Space -> InKernelGen ()
kernelAlloc Pat GPUMem
pat SubExp
size Space
space
compileThreadOp Pat GPUMem
pat (Inner (SizeOp (SplitSpace o w i elems_per_thread))) =
Pat GPUMem
-> SplitOrdering -> SubExp -> SubExp -> SubExp -> InKernelGen ()
forall w i elems_per_thread rep r op.
(ToExp w, ToExp i, ToExp elems_per_thread) =>
Pat GPUMem
-> SplitOrdering -> w -> i -> elems_per_thread -> ImpM rep r op ()
splitSpace Pat GPUMem
pat SplitOrdering
o SubExp
w SubExp
i SubExp
elems_per_thread
compileThreadOp Pat GPUMem
pat Op GPUMem
_ =
String -> InKernelGen ()
forall a. String -> a
compilerBugS (String -> InKernelGen ()) -> String -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ String
"compileThreadOp: cannot compile rhs of binding " String -> String -> String
forall a. [a] -> [a] -> [a]
++ PatT LetDecMem -> String
forall a. Pretty a => a -> String
pretty Pat GPUMem
PatT LetDecMem
pat
data Locking = Locking
{
Locking -> VName
lockingArray :: VName,
Locking -> TExp Int32
lockingIsUnlocked :: Imp.TExp Int32,
Locking -> TExp Int32
lockingToLock :: Imp.TExp Int32,
Locking -> TExp Int32
lockingToUnlock :: Imp.TExp Int32,
Locking -> [TExp Int64] -> [TExp Int64]
lockingMapping :: [Imp.TExp Int64] -> [Imp.TExp Int64]
}
type DoAtomicUpdate rep r =
Space -> [VName] -> [Imp.TExp Int64] -> ImpM rep r Imp.KernelOp ()
data AtomicUpdate rep r
=
AtomicPrim (DoAtomicUpdate rep r)
|
AtomicCAS (DoAtomicUpdate rep r)
|
AtomicLocking (Locking -> DoAtomicUpdate rep r)
type AtomicBinOp =
BinOp ->
Maybe (VName -> VName -> Count Imp.Elements (Imp.TExp Int64) -> Imp.Exp -> Imp.AtomicOp)
atomicUpdateLocking ::
AtomicBinOp ->
Lambda GPUMem ->
AtomicUpdate GPUMem KernelEnv
atomicUpdateLocking :: AtomicBinOp -> Lambda GPUMem -> AtomicUpdate GPUMem KernelEnv
atomicUpdateLocking AtomicBinOp
atomicBinOp Lambda GPUMem
lam
| Just [(BinOp, PrimType, VName, VName)]
ops_and_ts <- Lambda GPUMem -> Maybe [(BinOp, PrimType, VName, VName)]
forall rep.
ASTRep rep =>
Lambda rep -> Maybe [(BinOp, PrimType, VName, VName)]
lamIsBinOp Lambda GPUMem
lam,
((BinOp, PrimType, VName, VName) -> Bool)
-> [(BinOp, PrimType, VName, VName)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (\(BinOp
_, PrimType
t, VName
_, VName
_) -> PrimType -> Int
primBitSize PrimType
t Int -> [Int] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int
32, Int
64]) [(BinOp, PrimType, VName, VName)]
ops_and_ts =
[(BinOp, PrimType, VName, VName)]
-> DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv
primOrCas [(BinOp, PrimType, VName, VName)]
ops_and_ts (DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv)
-> DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv
forall a b. (a -> b) -> a -> b
$ \Space
space [VName]
arrs [TExp Int64]
bucket ->
[(VName, (BinOp, PrimType, VName, VName))]
-> ((VName, (BinOp, PrimType, VName, VName)) -> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName]
-> [(BinOp, PrimType, VName, VName)]
-> [(VName, (BinOp, PrimType, VName, VName))]
forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
arrs [(BinOp, PrimType, VName, VName)]
ops_and_ts) (((VName, (BinOp, PrimType, VName, VName)) -> InKernelGen ())
-> InKernelGen ())
-> ((VName, (BinOp, PrimType, VName, VName)) -> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(VName
a, (BinOp
op, PrimType
t, VName
x, VName
y)) -> do
TV Any
old <- String -> PrimType -> ImpM GPUMem KernelEnv KernelOp (TV Any)
forall rep r op t. String -> PrimType -> ImpM rep r op (TV t)
dPrim String
"old" PrimType
t
(VName
arr', Space
_a_space, Count Elements (TExp Int64)
bucket_offset) <- VName
-> [TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, Count Elements (TExp Int64))
forall rep r op.
VName
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray VName
a [TExp Int64]
bucket
case Space
-> VName
-> VName
-> Count Elements (TExp Int64)
-> BinOp
-> Maybe (PrimExp ExpLeaf -> KernelOp)
opHasAtomicSupport Space
space (TV Any -> VName
forall t. TV t -> VName
tvVar TV Any
old) VName
arr' Count Elements (TExp Int64)
bucket_offset BinOp
op of
Just PrimExp ExpLeaf -> KernelOp
f -> KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ PrimExp ExpLeaf -> KernelOp
f (PrimExp ExpLeaf -> KernelOp) -> PrimExp ExpLeaf -> KernelOp
forall a b. (a -> b) -> a -> b
$ VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
y PrimType
t
Maybe (PrimExp ExpLeaf -> KernelOp)
Nothing ->
Space
-> PrimType
-> VName
-> VName
-> [TExp Int64]
-> VName
-> InKernelGen ()
-> InKernelGen ()
atomicUpdateCAS Space
space PrimType
t VName
a (TV Any -> VName
forall t. TV t -> VName
tvVar TV Any
old) [TExp Int64]
bucket VName
x (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName
x VName -> PrimExp ExpLeaf -> InKernelGen ()
forall rep r op. VName -> PrimExp ExpLeaf -> ImpM rep r op ()
<~~ BinOp -> PrimExp ExpLeaf -> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall v. BinOp -> PrimExp v -> PrimExp v -> PrimExp v
Imp.BinOpExp BinOp
op (VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
x PrimType
t) (VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
y PrimType
t)
where
opHasAtomicSupport :: Space
-> VName
-> VName
-> Count Elements (TExp Int64)
-> BinOp
-> Maybe (PrimExp ExpLeaf -> KernelOp)
opHasAtomicSupport Space
space VName
old VName
arr' Count Elements (TExp Int64)
bucket' BinOp
bop = do
let atomic :: (VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp)
-> PrimExp ExpLeaf -> KernelOp
atomic VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp
f = Space -> AtomicOp -> KernelOp
Imp.Atomic Space
space (AtomicOp -> KernelOp)
-> (PrimExp ExpLeaf -> AtomicOp) -> PrimExp ExpLeaf -> KernelOp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp
f VName
old VName
arr' Count Elements (TExp Int64)
bucket'
(VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp)
-> PrimExp ExpLeaf -> KernelOp
atomic ((VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp)
-> PrimExp ExpLeaf -> KernelOp)
-> Maybe
(VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp)
-> Maybe (PrimExp ExpLeaf -> KernelOp)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> AtomicBinOp
atomicBinOp BinOp
bop
primOrCas :: [(BinOp, PrimType, VName, VName)]
-> DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv
primOrCas [(BinOp, PrimType, VName, VName)]
ops
| ((BinOp, PrimType, VName, VName) -> Bool)
-> [(BinOp, PrimType, VName, VName)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (BinOp, PrimType, VName, VName) -> Bool
isPrim [(BinOp, PrimType, VName, VName)]
ops = DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv
forall rep r. DoAtomicUpdate rep r -> AtomicUpdate rep r
AtomicPrim
| Bool
otherwise = DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv
forall rep r. DoAtomicUpdate rep r -> AtomicUpdate rep r
AtomicCAS
isPrim :: (BinOp, PrimType, VName, VName) -> Bool
isPrim (BinOp
op, PrimType
_, VName
_, VName
_) = Maybe
(VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp)
-> Bool
forall a. Maybe a -> Bool
isJust (Maybe
(VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp)
-> Bool)
-> Maybe
(VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> AtomicOp)
-> Bool
forall a b. (a -> b) -> a -> b
$ AtomicBinOp
atomicBinOp BinOp
op
atomicUpdateLocking AtomicBinOp
_ Lambda GPUMem
op
| [Prim PrimType
t] <- Lambda GPUMem -> [Type]
forall rep. LambdaT rep -> [Type]
lambdaReturnType Lambda GPUMem
op,
[LParam GPUMem
xp, LParam GPUMem
_] <- Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
op,
PrimType -> Int
primBitSize PrimType
t Int -> [Int] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int
32, Int
64] = DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv
forall rep r. DoAtomicUpdate rep r -> AtomicUpdate rep r
AtomicCAS (DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv)
-> DoAtomicUpdate GPUMem KernelEnv -> AtomicUpdate GPUMem KernelEnv
forall a b. (a -> b) -> a -> b
$ \Space
space [VName
arr] [TExp Int64]
bucket -> do
TV Any
old <- String -> PrimType -> ImpM GPUMem KernelEnv KernelOp (TV Any)
forall rep r op t. String -> PrimType -> ImpM rep r op (TV t)
dPrim String
"old" PrimType
t
Space
-> PrimType
-> VName
-> VName
-> [TExp Int64]
-> VName
-> InKernelGen ()
-> InKernelGen ()
atomicUpdateCAS Space
space PrimType
t VName
arr (TV Any -> VName
forall t. TV t -> VName
tvVar TV Any
old) [TExp Int64]
bucket (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName LParam GPUMem
Param LetDecMem
xp) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[Param LetDecMem] -> BodyT GPUMem -> InKernelGen ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [LParam GPUMem
Param LetDecMem
xp] (BodyT GPUMem -> InKernelGen ()) -> BodyT GPUMem -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> BodyT GPUMem
forall rep. LambdaT rep -> BodyT rep
lambdaBody Lambda GPUMem
op
atomicUpdateLocking AtomicBinOp
_ Lambda GPUMem
op = (Locking -> DoAtomicUpdate GPUMem KernelEnv)
-> AtomicUpdate GPUMem KernelEnv
forall rep r.
(Locking -> DoAtomicUpdate rep r) -> AtomicUpdate rep r
AtomicLocking ((Locking -> DoAtomicUpdate GPUMem KernelEnv)
-> AtomicUpdate GPUMem KernelEnv)
-> (Locking -> DoAtomicUpdate GPUMem KernelEnv)
-> AtomicUpdate GPUMem KernelEnv
forall a b. (a -> b) -> a -> b
$ \Locking
locking Space
space [VName]
arrs [TExp Int64]
bucket -> do
TV Int32
old <- String -> PrimType -> ImpM GPUMem KernelEnv KernelOp (TV Int32)
forall rep r op t. String -> PrimType -> ImpM rep r op (TV t)
dPrim String
"old" PrimType
int32
TV Bool
continue <- String
-> PrimType
-> TExp Bool
-> ImpM GPUMem KernelEnv KernelOp (TV Bool)
forall t rep r op.
String -> PrimType -> TExp t -> ImpM rep r op (TV t)
dPrimVol String
"continue" PrimType
Bool TExp Bool
forall v. TPrimExp Bool v
true
(VName
locks', Space
_locks_space, Count Elements (TExp Int64)
locks_offset) <-
VName
-> [TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, Count Elements (TExp Int64))
forall rep r op.
VName
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray (Locking -> VName
lockingArray Locking
locking) ([TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, Count Elements (TExp Int64)))
-> [TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, Count Elements (TExp Int64))
forall a b. (a -> b) -> a -> b
$ Locking -> [TExp Int64] -> [TExp Int64]
lockingMapping Locking
locking [TExp Int64]
bucket
let try_acquire_lock :: InKernelGen ()
try_acquire_lock =
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Space -> AtomicOp -> KernelOp
Imp.Atomic Space
space (AtomicOp -> KernelOp) -> AtomicOp -> KernelOp
forall a b. (a -> b) -> a -> b
$
PrimType
-> VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> PrimExp ExpLeaf
-> AtomicOp
Imp.AtomicCmpXchg
PrimType
int32
(TV Int32 -> VName
forall t. TV t -> VName
tvVar TV Int32
old)
VName
locks'
Count Elements (TExp Int64)
locks_offset
(TExp Int32 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped (TExp Int32 -> PrimExp ExpLeaf) -> TExp Int32 -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ Locking -> TExp Int32
lockingIsUnlocked Locking
locking)
(TExp Int32 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped (TExp Int32 -> PrimExp ExpLeaf) -> TExp Int32 -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ Locking -> TExp Int32
lockingToLock Locking
locking)
lock_acquired :: TExp Bool
lock_acquired = TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
old TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. Locking -> TExp Int32
lockingIsUnlocked Locking
locking
release_lock :: InKernelGen ()
release_lock =
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Space -> AtomicOp -> KernelOp
Imp.Atomic Space
space (AtomicOp -> KernelOp) -> AtomicOp -> KernelOp
forall a b. (a -> b) -> a -> b
$
PrimType
-> VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> PrimExp ExpLeaf
-> AtomicOp
Imp.AtomicCmpXchg
PrimType
int32
(TV Int32 -> VName
forall t. TV t -> VName
tvVar TV Int32
old)
VName
locks'
Count Elements (TExp Int64)
locks_offset
(TExp Int32 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped (TExp Int32 -> PrimExp ExpLeaf) -> TExp Int32 -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ Locking -> TExp Int32
lockingToLock Locking
locking)
(TExp Int32 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped (TExp Int32 -> PrimExp ExpLeaf) -> TExp Int32 -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ Locking -> TExp Int32
lockingToUnlock Locking
locking)
break_loop :: InKernelGen ()
break_loop = TV Bool
continue TV Bool -> TExp Bool -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Bool
forall v. TPrimExp Bool v
false
let ([Param LetDecMem]
acc_params, [Param LetDecMem]
_arr_params) = Int -> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a. Int -> [a] -> ([a], [a])
splitAt ([VName] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [VName]
arrs) ([Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem]))
-> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
op
bind_acc_params :: InKernelGen ()
bind_acc_params =
InKernelGen () -> InKernelGen ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"bind lhs" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[(Param LetDecMem, VName)]
-> ((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem] -> [VName] -> [(Param LetDecMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
acc_params [VName]
arrs) (((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ())
-> ((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
acc_p, VName
arr) ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
acc_p) [] (VName -> SubExp
Var VName
arr) [TExp Int64]
bucket
let op_body :: InKernelGen ()
op_body =
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"execute operation" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[Param LetDecMem] -> BodyT GPUMem -> InKernelGen ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [Param LetDecMem]
acc_params (BodyT GPUMem -> InKernelGen ()) -> BodyT GPUMem -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> BodyT GPUMem
forall rep. LambdaT rep -> BodyT rep
lambdaBody Lambda GPUMem
op
do_hist :: InKernelGen ()
do_hist =
InKernelGen () -> InKernelGen ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"update global result" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
(VName -> SubExp -> InKernelGen ())
-> [VName] -> [SubExp] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ ([TExp Int64] -> VName -> SubExp -> InKernelGen ()
forall rep r op.
[TExp Int64] -> VName -> SubExp -> ImpM rep r op ()
writeArray [TExp Int64]
bucket) [VName]
arrs ([SubExp] -> InKernelGen ()) -> [SubExp] -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ (Param LetDecMem -> SubExp) -> [Param LetDecMem] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map (VName -> SubExp
Var (VName -> SubExp)
-> (Param LetDecMem -> VName) -> Param LetDecMem -> SubExp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName) [Param LetDecMem]
acc_params
fence :: InKernelGen ()
fence = case Space
space of
Space String
"local" -> KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.MemFence Fence
Imp.FenceLocal
Space
_ -> KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.MemFence Fence
Imp.FenceGlobal
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhile (TV Bool -> TExp Bool
forall t. TV t -> TExp t
tvExp TV Bool
continue) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
InKernelGen ()
try_acquire_lock
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
lock_acquired (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
[LParam GPUMem] -> InKernelGen ()
forall rep inner r op.
Mem rep inner =>
[LParam rep] -> ImpM rep r op ()
dLParams [LParam GPUMem]
[Param LetDecMem]
acc_params
InKernelGen ()
bind_acc_params
InKernelGen ()
op_body
InKernelGen ()
do_hist
InKernelGen ()
fence
InKernelGen ()
release_lock
InKernelGen ()
break_loop
InKernelGen ()
fence
where
writeArray :: [TExp Int64] -> VName -> SubExp -> ImpM rep r op ()
writeArray [TExp Int64]
bucket VName
arr SubExp
val = VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
arr [TExp Int64]
bucket SubExp
val []
atomicUpdateCAS ::
Space ->
PrimType ->
VName ->
VName ->
[Imp.TExp Int64] ->
VName ->
InKernelGen () ->
InKernelGen ()
atomicUpdateCAS :: Space
-> PrimType
-> VName
-> VName
-> [TExp Int64]
-> VName
-> InKernelGen ()
-> InKernelGen ()
atomicUpdateCAS Space
space PrimType
t VName
arr VName
old [TExp Int64]
bucket VName
x InKernelGen ()
do_op = do
VName
assumed <- TV Any -> VName
forall t. TV t -> VName
tvVar (TV Any -> VName)
-> ImpM GPUMem KernelEnv KernelOp (TV Any)
-> ImpM GPUMem KernelEnv KernelOp VName
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> PrimType -> ImpM GPUMem KernelEnv KernelOp (TV Any)
forall rep r op t. String -> PrimType -> ImpM rep r op (TV t)
dPrim String
"assumed" PrimType
t
TV Bool
run_loop <- String -> TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TV Bool)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"run_loop" TExp Bool
forall v. TPrimExp Bool v
true
InKernelGen () -> InKernelGen ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
old [] (VName -> SubExp
Var VName
arr) [TExp Int64]
bucket
(VName
arr', Space
_a_space, Count Elements (TExp Int64)
bucket_offset) <- VName
-> [TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, Count Elements (TExp Int64))
forall rep r op.
VName
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray VName
arr [TExp Int64]
bucket
let (PrimExp ExpLeaf -> PrimExp ExpLeaf
toBits, PrimExp ExpLeaf -> PrimExp ExpLeaf
fromBits) =
case PrimType
t of
FloatType FloatType
Float32 ->
( \PrimExp ExpLeaf
v -> String -> [PrimExp ExpLeaf] -> PrimType -> PrimExp ExpLeaf
forall v. String -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp String
"to_bits32" [PrimExp ExpLeaf
v] PrimType
int32,
\PrimExp ExpLeaf
v -> String -> [PrimExp ExpLeaf] -> PrimType -> PrimExp ExpLeaf
forall v. String -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp String
"from_bits32" [PrimExp ExpLeaf
v] PrimType
t
)
FloatType FloatType
Float64 ->
( \PrimExp ExpLeaf
v -> String -> [PrimExp ExpLeaf] -> PrimType -> PrimExp ExpLeaf
forall v. String -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp String
"to_bits64" [PrimExp ExpLeaf
v] PrimType
int64,
\PrimExp ExpLeaf
v -> String -> [PrimExp ExpLeaf] -> PrimType -> PrimExp ExpLeaf
forall v. String -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp String
"from_bits64" [PrimExp ExpLeaf
v] PrimType
t
)
PrimType
_ -> (PrimExp ExpLeaf -> PrimExp ExpLeaf
forall a. a -> a
id, PrimExp ExpLeaf -> PrimExp ExpLeaf
forall a. a -> a
id)
int :: PrimType
int
| PrimType -> Int
primBitSize PrimType
t Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
32 = PrimType
int32
| Bool
otherwise = PrimType
int64
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhile (TV Bool -> TExp Bool
forall t. TV t -> TExp t
tvExp TV Bool
run_loop) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
VName
assumed VName -> PrimExp ExpLeaf -> InKernelGen ()
forall rep r op. VName -> PrimExp ExpLeaf -> ImpM rep r op ()
<~~ VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
old PrimType
t
VName
x VName -> PrimExp ExpLeaf -> InKernelGen ()
forall rep r op. VName -> PrimExp ExpLeaf -> ImpM rep r op ()
<~~ VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
assumed PrimType
t
InKernelGen ()
do_op
VName
old_bits_v <- String -> ImpM GPUMem KernelEnv KernelOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"old_bits"
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
old_bits_v PrimType
int
let old_bits :: PrimExp ExpLeaf
old_bits = VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
old_bits_v PrimType
int
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Space -> AtomicOp -> KernelOp
Imp.Atomic Space
space (AtomicOp -> KernelOp) -> AtomicOp -> KernelOp
forall a b. (a -> b) -> a -> b
$
PrimType
-> VName
-> VName
-> Count Elements (TExp Int64)
-> PrimExp ExpLeaf
-> PrimExp ExpLeaf
-> AtomicOp
Imp.AtomicCmpXchg
PrimType
int
VName
old_bits_v
VName
arr'
Count Elements (TExp Int64)
bucket_offset
(PrimExp ExpLeaf -> PrimExp ExpLeaf
toBits (VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
assumed PrimType
t))
(PrimExp ExpLeaf -> PrimExp ExpLeaf
toBits (VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
x PrimType
t))
VName
old VName -> PrimExp ExpLeaf -> InKernelGen ()
forall rep r op. VName -> PrimExp ExpLeaf -> ImpM rep r op ()
<~~ PrimExp ExpLeaf -> PrimExp ExpLeaf
fromBits PrimExp ExpLeaf
old_bits
let won :: PrimExp ExpLeaf
won = CmpOp -> PrimExp ExpLeaf -> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall v. CmpOp -> PrimExp v -> PrimExp v -> PrimExp v
CmpOpExp (PrimType -> CmpOp
CmpEq PrimType
int) (PrimExp ExpLeaf -> PrimExp ExpLeaf
toBits (VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
assumed PrimType
t)) PrimExp ExpLeaf
old_bits
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (PrimExp ExpLeaf -> TExp Bool
forall v. PrimExp v -> TPrimExp Bool v
isBool PrimExp ExpLeaf
won) (TV Bool
run_loop TV Bool -> TExp Bool -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Bool
forall v. TPrimExp Bool v
false)
computeKernelUses ::
FreeIn a =>
a ->
[VName] ->
CallKernelGen [Imp.KernelUse]
computeKernelUses :: a -> [VName] -> CallKernelGen [KernelUse]
computeKernelUses a
kernel_body [VName]
bound_in_kernel = do
let actually_free :: Names
actually_free = a -> Names
forall a. FreeIn a => a -> Names
freeIn a
kernel_body Names -> Names -> Names
`namesSubtract` [VName] -> Names
namesFromList [VName]
bound_in_kernel
[KernelUse] -> [KernelUse]
forall a. Ord a => [a] -> [a]
nubOrd ([KernelUse] -> [KernelUse])
-> CallKernelGen [KernelUse] -> CallKernelGen [KernelUse]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Names -> CallKernelGen [KernelUse]
readsFromSet Names
actually_free
readsFromSet :: Names -> CallKernelGen [Imp.KernelUse]
readsFromSet :: Names -> CallKernelGen [KernelUse]
readsFromSet Names
free =
([Maybe KernelUse] -> [KernelUse])
-> ImpM GPUMem HostEnv HostOp [Maybe KernelUse]
-> CallKernelGen [KernelUse]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Maybe KernelUse] -> [KernelUse]
forall a. [Maybe a] -> [a]
catMaybes (ImpM GPUMem HostEnv HostOp [Maybe KernelUse]
-> CallKernelGen [KernelUse])
-> ImpM GPUMem HostEnv HostOp [Maybe KernelUse]
-> CallKernelGen [KernelUse]
forall a b. (a -> b) -> a -> b
$
[VName]
-> (VName -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse))
-> ImpM GPUMem HostEnv HostOp [Maybe KernelUse]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM (Names -> [VName]
namesToList Names
free) ((VName -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse))
-> ImpM GPUMem HostEnv HostOp [Maybe KernelUse])
-> (VName -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse))
-> ImpM GPUMem HostEnv HostOp [Maybe KernelUse]
forall a b. (a -> b) -> a -> b
$ \VName
var -> do
Type
t <- VName -> ImpM GPUMem HostEnv HostOp Type
forall rep (m :: * -> *). HasScope rep m => VName -> m Type
lookupType VName
var
VTable GPUMem
vtable <- ImpM GPUMem HostEnv HostOp (VTable GPUMem)
forall rep r op. ImpM rep r op (VTable rep)
getVTable
case Type
t of
Array {} -> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe KernelUse
forall a. Maybe a
Nothing
Acc {} -> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe KernelUse
forall a. Maybe a
Nothing
Mem (Space String
"local") -> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe KernelUse
forall a. Maybe a
Nothing
Mem {} -> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse))
-> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall a b. (a -> b) -> a -> b
$ KernelUse -> Maybe KernelUse
forall a. a -> Maybe a
Just (KernelUse -> Maybe KernelUse) -> KernelUse -> Maybe KernelUse
forall a b. (a -> b) -> a -> b
$ VName -> KernelUse
Imp.MemoryUse VName
var
Prim PrimType
bt ->
VTable GPUMem
-> PrimExp ExpLeaf
-> ImpM GPUMem HostEnv HostOp (Maybe KernelConstExp)
forall rep r op.
VTable GPUMem
-> PrimExp ExpLeaf -> ImpM rep r op (Maybe KernelConstExp)
isConstExp VTable GPUMem
vtable (VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
var PrimType
bt) ImpM GPUMem HostEnv HostOp (Maybe KernelConstExp)
-> (Maybe KernelConstExp
-> ImpM GPUMem HostEnv HostOp (Maybe KernelUse))
-> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Just KernelConstExp
ce -> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse))
-> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall a b. (a -> b) -> a -> b
$ KernelUse -> Maybe KernelUse
forall a. a -> Maybe a
Just (KernelUse -> Maybe KernelUse) -> KernelUse -> Maybe KernelUse
forall a b. (a -> b) -> a -> b
$ VName -> KernelConstExp -> KernelUse
Imp.ConstUse VName
var KernelConstExp
ce
Maybe KernelConstExp
Nothing -> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse))
-> Maybe KernelUse -> ImpM GPUMem HostEnv HostOp (Maybe KernelUse)
forall a b. (a -> b) -> a -> b
$ KernelUse -> Maybe KernelUse
forall a. a -> Maybe a
Just (KernelUse -> Maybe KernelUse) -> KernelUse -> Maybe KernelUse
forall a b. (a -> b) -> a -> b
$ VName -> PrimType -> KernelUse
Imp.ScalarUse VName
var PrimType
bt
isConstExp ::
VTable GPUMem ->
Imp.Exp ->
ImpM rep r op (Maybe Imp.KernelConstExp)
isConstExp :: VTable GPUMem
-> PrimExp ExpLeaf -> ImpM rep r op (Maybe KernelConstExp)
isConstExp VTable GPUMem
vtable PrimExp ExpLeaf
size = do
Maybe Name
fname <- ImpM rep r op (Maybe Name)
forall rep r op. ImpM rep r op (Maybe Name)
askFunction
let onLeaf :: ExpLeaf -> PrimType -> Maybe KernelConstExp
onLeaf (Imp.ScalarVar VName
name) PrimType
_ = VName -> Maybe KernelConstExp
lookupConstExp VName
name
onLeaf Imp.Index {} PrimType
_ = Maybe KernelConstExp
forall a. Maybe a
Nothing
lookupConstExp :: VName -> Maybe KernelConstExp
lookupConstExp VName
name =
Exp GPUMem -> Maybe KernelConstExp
constExp (Exp GPUMem -> Maybe KernelConstExp)
-> Maybe (Exp GPUMem) -> Maybe KernelConstExp
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< VarEntry GPUMem -> Maybe (Exp GPUMem)
forall rep. VarEntry rep -> Maybe (Exp rep)
hasExp (VarEntry GPUMem -> Maybe (Exp GPUMem))
-> Maybe (VarEntry GPUMem) -> Maybe (Exp GPUMem)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< VName -> VTable GPUMem -> Maybe (VarEntry GPUMem)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup VName
name VTable GPUMem
vtable
constExp :: Exp GPUMem -> Maybe KernelConstExp
constExp (Op (Inner (SizeOp (GetSize key _)))) =
KernelConstExp -> Maybe KernelConstExp
forall a. a -> Maybe a
Just (KernelConstExp -> Maybe KernelConstExp)
-> KernelConstExp -> Maybe KernelConstExp
forall a b. (a -> b) -> a -> b
$ KernelConst -> PrimType -> KernelConstExp
forall v. v -> PrimType -> PrimExp v
LeafExp (Name -> KernelConst
Imp.SizeConst (Name -> KernelConst) -> Name -> KernelConst
forall a b. (a -> b) -> a -> b
$ Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname Name
key) PrimType
int32
constExp Exp GPUMem
e = (VName -> Maybe KernelConstExp)
-> Exp GPUMem -> Maybe KernelConstExp
forall (m :: * -> *) rep v.
(MonadFail m, RepTypes rep) =>
(VName -> m (PrimExp v)) -> Exp rep -> m (PrimExp v)
primExpFromExp VName -> Maybe KernelConstExp
lookupConstExp Exp GPUMem
e
Maybe KernelConstExp -> ImpM rep r op (Maybe KernelConstExp)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe KernelConstExp -> ImpM rep r op (Maybe KernelConstExp))
-> Maybe KernelConstExp -> ImpM rep r op (Maybe KernelConstExp)
forall a b. (a -> b) -> a -> b
$ (ExpLeaf -> PrimType -> Maybe KernelConstExp)
-> PrimExp ExpLeaf -> Maybe KernelConstExp
forall (m :: * -> *) a b.
Monad m =>
(a -> PrimType -> m (PrimExp b)) -> PrimExp a -> m (PrimExp b)
replaceInPrimExpM ExpLeaf -> PrimType -> Maybe KernelConstExp
onLeaf PrimExp ExpLeaf
size
where
hasExp :: VarEntry rep -> Maybe (Exp rep)
hasExp (ArrayVar Maybe (Exp rep)
e ArrayEntry
_) = Maybe (Exp rep)
e
hasExp (AccVar Maybe (Exp rep)
e (VName, Shape, [Type])
_) = Maybe (Exp rep)
e
hasExp (ScalarVar Maybe (Exp rep)
e ScalarEntry
_) = Maybe (Exp rep)
e
hasExp (MemVar Maybe (Exp rep)
e MemEntry
_) = Maybe (Exp rep)
e
computeThreadChunkSize ::
SplitOrdering ->
Imp.TExp Int64 ->
Imp.Count Imp.Elements (Imp.TExp Int64) ->
Imp.Count Imp.Elements (Imp.TExp Int64) ->
TV Int64 ->
ImpM rep r op ()
computeThreadChunkSize :: SplitOrdering
-> TExp Int64
-> Count Elements (TExp Int64)
-> Count Elements (TExp Int64)
-> TV Int64
-> ImpM rep r op ()
computeThreadChunkSize (SplitStrided SubExp
stride) TExp Int64
thread_index Count Elements (TExp Int64)
elements_per_thread Count Elements (TExp Int64)
num_elements TV Int64
chunk_var =
TV Int64
chunk_var
TV Int64 -> TExp Int64 -> ImpM rep r op ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Int64 -> TExp Int64 -> TExp Int64
forall v. TPrimExp Int64 v -> TPrimExp Int64 v -> TPrimExp Int64 v
sMin64
(Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
elements_per_thread)
((Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
num_elements TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
thread_index) TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`divUp` SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
stride)
computeThreadChunkSize SplitOrdering
SplitContiguous TExp Int64
thread_index Count Elements (TExp Int64)
elements_per_thread Count Elements (TExp Int64)
num_elements TV Int64
chunk_var = do
TV Int64
starting_point <-
String -> TExp Int64 -> ImpM rep r op (TV Int64)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"starting_point" (TExp Int64 -> ImpM rep r op (TV Int64))
-> TExp Int64 -> ImpM rep r op (TV Int64)
forall a b. (a -> b) -> a -> b
$
TExp Int64
thread_index TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
* Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
elements_per_thread
TV Int64
remaining_elements <-
String -> TExp Int64 -> ImpM rep r op (TV Int64)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"remaining_elements" (TExp Int64 -> ImpM rep r op (TV Int64))
-> TExp Int64 -> ImpM rep r op (TV Int64)
forall a b. (a -> b) -> a -> b
$
Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
num_elements TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TV Int64 -> TExp Int64
forall t. TV t -> TExp t
tvExp TV Int64
starting_point
let no_remaining_elements :: TExp Bool
no_remaining_elements = TV Int64 -> TExp Int64
forall t. TV t -> TExp t
tvExp TV Int64
remaining_elements TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<=. TExp Int64
0
beyond_bounds :: TExp Bool
beyond_bounds = Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
num_elements TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<=. TV Int64 -> TExp Int64
forall t. TV t -> TExp t
tvExp TV Int64
starting_point
TExp Bool
-> ImpM rep r op () -> ImpM rep r op () -> ImpM rep r op ()
forall rep r op.
TExp Bool
-> ImpM rep r op () -> ImpM rep r op () -> ImpM rep r op ()
sIf
(TExp Bool
no_remaining_elements TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.||. TExp Bool
beyond_bounds)
(TV Int64
chunk_var TV Int64 -> TExp Int64 -> ImpM rep r op ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Int64
0)
( TExp Bool
-> ImpM rep r op () -> ImpM rep r op () -> ImpM rep r op ()
forall rep r op.
TExp Bool
-> ImpM rep r op () -> ImpM rep r op () -> ImpM rep r op ()
sIf
TExp Bool
is_last_thread
(TV Int64
chunk_var TV Int64 -> TExp Int64 -> ImpM rep r op ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
last_thread_elements)
(TV Int64
chunk_var TV Int64 -> TExp Int64 -> ImpM rep r op ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
elements_per_thread)
)
where
last_thread_elements :: Count Elements (TExp Int64)
last_thread_elements =
Count Elements (TExp Int64)
num_elements Count Elements (TExp Int64)
-> Count Elements (TExp Int64) -> Count Elements (TExp Int64)
forall a. Num a => a -> a -> a
- TExp Int64 -> Count Elements (TExp Int64)
forall a. a -> Count Elements a
Imp.elements TExp Int64
thread_index Count Elements (TExp Int64)
-> Count Elements (TExp Int64) -> Count Elements (TExp Int64)
forall a. Num a => a -> a -> a
* Count Elements (TExp Int64)
elements_per_thread
is_last_thread :: TExp Bool
is_last_thread =
Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
num_elements
TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. (TExp Int64
thread_index TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
1) TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
* Count Elements (TExp Int64) -> TExp Int64
forall u e. Count u e -> e
Imp.unCount Count Elements (TExp Int64)
elements_per_thread
kernelInitialisationSimple ::
Count NumGroups (Imp.TExp Int64) ->
Count GroupSize (Imp.TExp Int64) ->
CallKernelGen (KernelConstants, InKernelGen ())
kernelInitialisationSimple :: Count NumGroups (TExp Int64)
-> Count GroupSize (TExp Int64)
-> CallKernelGen (KernelConstants, InKernelGen ())
kernelInitialisationSimple (Count TExp Int64
num_groups) (Count TExp Int64
group_size) = do
VName
global_tid <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"global_tid"
VName
local_tid <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"local_tid"
VName
group_id <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"group_tid"
VName
wave_size <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"wave_size"
VName
inner_group_size <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"group_size"
let constants :: KernelConstants
constants =
TExp Int32
-> TExp Int32
-> TExp Int32
-> VName
-> VName
-> VName
-> TExp Int64
-> TExp Int64
-> TExp Int32
-> TExp Int32
-> TExp Bool
-> Map [SubExp] [TExp Int32]
-> KernelConstants
KernelConstants
(VName -> TExp Int32
Imp.vi32 VName
global_tid)
(VName -> TExp Int32
Imp.vi32 VName
local_tid)
(VName -> TExp Int32
Imp.vi32 VName
group_id)
VName
global_tid
VName
local_tid
VName
group_id
TExp Int64
num_groups
TExp Int64
group_size
(TExp Int64 -> TExp Int32
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 (TExp Int64
group_size TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
* TExp Int64
num_groups))
(VName -> TExp Int32
Imp.vi32 VName
wave_size)
TExp Bool
forall v. TPrimExp Bool v
true
Map [SubExp] [TExp Int32]
forall a. Monoid a => a
mempty
let set_constants :: InKernelGen ()
set_constants = do
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
global_tid PrimType
int32
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
local_tid PrimType
int32
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
inner_group_size PrimType
int64
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
wave_size PrimType
int32
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
group_id PrimType
int32
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (VName -> Int -> KernelOp
Imp.GetGlobalId VName
global_tid Int
0)
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (VName -> Int -> KernelOp
Imp.GetLocalId VName
local_tid Int
0)
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (VName -> Int -> KernelOp
Imp.GetLocalSize VName
inner_group_size Int
0)
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (VName -> KernelOp
Imp.GetLockstepWidth VName
wave_size)
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (VName -> Int -> KernelOp
Imp.GetGroupId VName
group_id Int
0)
(KernelConstants, InKernelGen ())
-> CallKernelGen (KernelConstants, InKernelGen ())
forall (m :: * -> *) a. Monad m => a -> m a
return (KernelConstants
constants, InKernelGen ()
set_constants)
isActive :: [(VName, SubExp)] -> Imp.TExp Bool
isActive :: [(VName, SubExp)] -> TExp Bool
isActive [(VName, SubExp)]
limit = case [TExp Bool]
actives of
[] -> TExp Bool
forall v. TPrimExp Bool v
true
TExp Bool
x : [TExp Bool]
xs -> (TExp Bool -> TExp Bool -> TExp Bool)
-> TExp Bool -> [TExp Bool] -> TExp Bool
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
(.&&.) TExp Bool
x [TExp Bool]
xs
where
([VName]
is, [SubExp]
ws) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip [(VName, SubExp)]
limit
actives :: [TExp Bool]
actives = (VName -> TExp Int64 -> TExp Bool)
-> [VName] -> [TExp Int64] -> [TExp Bool]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith VName -> TExp Int64 -> TExp Bool
active [VName]
is ([TExp Int64] -> [TExp Bool]) -> [TExp Int64] -> [TExp Bool]
forall a b. (a -> b) -> a -> b
$ (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
ws
active :: VName -> TExp Int64 -> TExp Bool
active VName
i = (VName -> TExp Int64
Imp.vi64 VName
i TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<.)
makeAllMemoryGlobal :: CallKernelGen a -> CallKernelGen a
makeAllMemoryGlobal :: CallKernelGen a -> CallKernelGen a
makeAllMemoryGlobal =
Space -> CallKernelGen a -> CallKernelGen a
forall rep r op a. Space -> ImpM rep r op a -> ImpM rep r op a
localDefaultSpace (String -> Space
Imp.Space String
"global") (CallKernelGen a -> CallKernelGen a)
-> (CallKernelGen a -> CallKernelGen a)
-> CallKernelGen a
-> CallKernelGen a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VTable GPUMem -> VTable GPUMem)
-> CallKernelGen a -> CallKernelGen a
forall rep r op a.
(VTable rep -> VTable rep) -> ImpM rep r op a -> ImpM rep r op a
localVTable ((VarEntry GPUMem -> VarEntry GPUMem)
-> VTable GPUMem -> VTable GPUMem
forall a b k. (a -> b) -> Map k a -> Map k b
M.map VarEntry GPUMem -> VarEntry GPUMem
forall rep. VarEntry rep -> VarEntry rep
globalMemory)
where
globalMemory :: VarEntry rep -> VarEntry rep
globalMemory (MemVar Maybe (Exp rep)
_ MemEntry
entry)
| MemEntry -> Space
entryMemSpace MemEntry
entry Space -> Space -> Bool
forall a. Eq a => a -> a -> Bool
/= String -> Space
Space String
"local" =
Maybe (Exp rep) -> MemEntry -> VarEntry rep
forall rep. Maybe (Exp rep) -> MemEntry -> VarEntry rep
MemVar Maybe (Exp rep)
forall a. Maybe a
Nothing MemEntry
entry {entryMemSpace :: Space
entryMemSpace = String -> Space
Imp.Space String
"global"}
globalMemory VarEntry rep
entry =
VarEntry rep
entry
groupReduce ::
Imp.TExp Int32 ->
Lambda GPUMem ->
[VName] ->
InKernelGen ()
groupReduce :: TExp Int32 -> Lambda GPUMem -> [VName] -> InKernelGen ()
groupReduce TExp Int32
w Lambda GPUMem
lam [VName]
arrs = do
TV Int32
offset <- String -> PrimType -> ImpM GPUMem KernelEnv KernelOp (TV Int32)
forall rep r op t. String -> PrimType -> ImpM rep r op (TV t)
dPrim String
"offset" PrimType
int32
TV Int32
-> TExp Int32 -> Lambda GPUMem -> [VName] -> InKernelGen ()
groupReduceWithOffset TV Int32
offset TExp Int32
w Lambda GPUMem
lam [VName]
arrs
groupReduceWithOffset ::
TV Int32 ->
Imp.TExp Int32 ->
Lambda GPUMem ->
[VName] ->
InKernelGen ()
groupReduceWithOffset :: TV Int32
-> TExp Int32 -> Lambda GPUMem -> [VName] -> InKernelGen ()
groupReduceWithOffset TV Int32
offset TExp Int32
w Lambda GPUMem
lam [VName]
arrs = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
let local_tid :: TExp Int32
local_tid = KernelConstants -> TExp Int32
kernelLocalThreadId KernelConstants
constants
global_tid :: TExp Int32
global_tid = KernelConstants -> TExp Int32
kernelGlobalThreadId KernelConstants
constants
barrier :: InKernelGen ()
barrier
| (Type -> Bool) -> [Type] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType ([Type] -> Bool) -> [Type] -> Bool
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [Type]
forall rep. LambdaT rep -> [Type]
lambdaReturnType Lambda GPUMem
lam = KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
| Bool
otherwise = KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceGlobal
readReduceArgument :: Param LetDecMem -> VName -> InKernelGen ()
readReduceArgument Param LetDecMem
param VName
arr
| Prim PrimType
_ <- Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
param = do
let i :: TExp Int32
i = TExp Int32
local_tid TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
+ TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
offset
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
param) [] (VName -> SubExp
Var VName
arr) [TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
i]
| Bool
otherwise = do
let i :: TExp Int32
i = TExp Int32
global_tid TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
+ TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
offset
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
param) [] (VName -> SubExp
Var VName
arr) [TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
i]
writeReduceOpResult :: Param LetDecMem -> VName -> InKernelGen ()
writeReduceOpResult Param LetDecMem
param VName
arr
| Prim PrimType
_ <- Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
param =
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
arr [TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
local_tid] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
param) []
| Bool
otherwise =
() -> InKernelGen ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
let ([Param LetDecMem]
reduce_acc_params, [Param LetDecMem]
reduce_arr_params) = Int -> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a. Int -> [a] -> ([a], [a])
splitAt ([VName] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [VName]
arrs) ([Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem]))
-> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam
TV Int32
skip_waves <- String -> TExp Int32 -> ImpM GPUMem KernelEnv KernelOp (TV Int32)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"skip_waves" (TExp Int32
1 :: Imp.TExp Int32)
[LParam GPUMem] -> InKernelGen ()
forall rep inner r op.
Mem rep inner =>
[LParam rep] -> ImpM rep r op ()
dLParams ([LParam GPUMem] -> InKernelGen ())
-> [LParam GPUMem] -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam
TV Int32
offset TV Int32 -> TExp Int32 -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- (TExp Int32
0 :: Imp.TExp Int32)
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
comment String
"participating threads read initial accumulator" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Int32
local_tid TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int32
w) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
(Param LetDecMem -> VName -> InKernelGen ())
-> [Param LetDecMem] -> [VName] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ Param LetDecMem -> VName -> InKernelGen ()
readReduceArgument [Param LetDecMem]
reduce_acc_params [VName]
arrs
let do_reduce :: InKernelGen ()
do_reduce = do
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
comment String
"read array element" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
(Param LetDecMem -> VName -> InKernelGen ())
-> [Param LetDecMem] -> [VName] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ Param LetDecMem -> VName -> InKernelGen ()
readReduceArgument [Param LetDecMem]
reduce_arr_params [VName]
arrs
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
comment String
"apply reduction operation" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[Param LetDecMem] -> BodyT GPUMem -> InKernelGen ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [Param LetDecMem]
reduce_acc_params (BodyT GPUMem -> InKernelGen ()) -> BodyT GPUMem -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> BodyT GPUMem
forall rep. LambdaT rep -> BodyT rep
lambdaBody Lambda GPUMem
lam
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
comment String
"write result of operation" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
(Param LetDecMem -> VName -> InKernelGen ())
-> [Param LetDecMem] -> [VName] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ Param LetDecMem -> VName -> InKernelGen ()
writeReduceOpResult [Param LetDecMem]
reduce_acc_params [VName]
arrs
in_wave_reduce :: InKernelGen ()
in_wave_reduce = InKernelGen () -> InKernelGen ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile InKernelGen ()
do_reduce
wave_size :: TExp Int32
wave_size = KernelConstants -> TExp Int32
kernelWaveSize KernelConstants
constants
group_size :: TExp Int64
group_size = KernelConstants -> TExp Int64
kernelGroupSize KernelConstants
constants
wave_id :: TExp Int32
wave_id = TExp Int32
local_tid TExp Int32 -> TExp Int32 -> TExp Int32
forall e. IntegralExp e => e -> e -> e
`quot` TExp Int32
wave_size
in_wave_id :: TExp Int32
in_wave_id = TExp Int32
local_tid TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TExp Int32
wave_id TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
wave_size
num_waves :: TExp Int32
num_waves = (TExp Int64 -> TExp Int32
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 TExp Int64
group_size TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
+ TExp Int32
wave_size TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TExp Int32
1) TExp Int32 -> TExp Int32 -> TExp Int32
forall e. IntegralExp e => e -> e -> e
`quot` TExp Int32
wave_size
arg_in_bounds :: TExp Bool
arg_in_bounds = TExp Int32
local_tid TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
+ TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
offset TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int32
w
doing_in_wave_reductions :: TExp Bool
doing_in_wave_reductions =
TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
offset TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int32
wave_size
apply_in_in_wave_iteration :: TExp Bool
apply_in_in_wave_iteration =
(TExp Int32
in_wave_id TExp Int32 -> TExp Int32 -> TExp Int32
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp t v
.&. (TExp Int32
2 TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
offset TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TExp Int32
1)) TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0
in_wave_reductions :: InKernelGen ()
in_wave_reductions = do
TV Int32
offset TV Int32 -> TExp Int32 -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- (TExp Int32
1 :: Imp.TExp Int32)
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhile TExp Bool
doing_in_wave_reductions (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen
(TExp Bool
arg_in_bounds TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Bool
apply_in_in_wave_iteration)
InKernelGen ()
in_wave_reduce
TV Int32
offset TV Int32 -> TExp Int32 -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
offset TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
2
doing_cross_wave_reductions :: TExp Bool
doing_cross_wave_reductions =
TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_waves TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int32
num_waves
is_first_thread_in_wave :: TExp Bool
is_first_thread_in_wave =
TExp Int32
in_wave_id TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0
wave_not_skipped :: TExp Bool
wave_not_skipped =
(TExp Int32
wave_id TExp Int32 -> TExp Int32 -> TExp Int32
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp t v
.&. (TExp Int32
2 TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_waves TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TExp Int32
1)) TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0
apply_in_cross_wave_iteration :: TExp Bool
apply_in_cross_wave_iteration =
TExp Bool
arg_in_bounds TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Bool
is_first_thread_in_wave TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Bool
wave_not_skipped
cross_wave_reductions :: InKernelGen ()
cross_wave_reductions =
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhile TExp Bool
doing_cross_wave_reductions (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
InKernelGen ()
barrier
TV Int32
offset TV Int32 -> TExp Int32 -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_waves TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
wave_size
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen
TExp Bool
apply_in_cross_wave_iteration
InKernelGen ()
do_reduce
TV Int32
skip_waves TV Int32 -> TExp Int32 -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_waves TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
2
InKernelGen ()
in_wave_reductions
InKernelGen ()
cross_wave_reductions
groupScan ::
Maybe (Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool) ->
Imp.TExp Int64 ->
Imp.TExp Int64 ->
Lambda GPUMem ->
[VName] ->
InKernelGen ()
groupScan :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
-> TExp Int64
-> TExp Int64
-> Lambda GPUMem
-> [VName]
-> InKernelGen ()
groupScan Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag TExp Int64
arrs_full_size TExp Int64
w Lambda GPUMem
lam [VName]
arrs = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
Lambda GPUMem
renamed_lam <- Lambda GPUMem -> ImpM GPUMem KernelEnv KernelOp (Lambda GPUMem)
forall rep (m :: * -> *).
(Renameable rep, MonadFreshNames m) =>
Lambda rep -> m (Lambda rep)
renameLambda Lambda GPUMem
lam
let ltid32 :: TExp Int32
ltid32 = KernelConstants -> TExp Int32
kernelLocalThreadId KernelConstants
constants
ltid :: TExp Int64
ltid = TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
ltid32
([Param LetDecMem]
x_params, [Param LetDecMem]
y_params) = Int -> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a. Int -> [a] -> ([a], [a])
splitAt ([VName] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [VName]
arrs) ([Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem]))
-> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam
[LParam GPUMem] -> InKernelGen ()
forall rep inner r op.
Mem rep inner =>
[LParam rep] -> ImpM rep r op ()
dLParams (Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam [Param LetDecMem] -> [Param LetDecMem] -> [Param LetDecMem]
forall a. [a] -> [a] -> [a]
++ Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
renamed_lam)
TExp Bool
ltid_in_bounds <- String -> TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool)
forall t rep r op. String -> TExp t -> ImpM rep r op (TExp t)
dPrimVE String
"ltid_in_bounds" (TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool))
-> TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool)
forall a b. (a -> b) -> a -> b
$ TExp Int64
ltid TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int64
w
let block_size :: TExp Int32
block_size = TExp Int32
32
simd_width :: TExp Int32
simd_width = KernelConstants -> TExp Int32
kernelWaveSize KernelConstants
constants
block_id :: TExp Int32
block_id = TExp Int32
ltid32 TExp Int32 -> TExp Int32 -> TExp Int32
forall e. IntegralExp e => e -> e -> e
`quot` TExp Int32
block_size
in_block_id :: TExp Int32
in_block_id = TExp Int32
ltid32 TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TExp Int32
block_id TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
block_size
doInBlockScan :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
-> TExp Bool -> Lambda GPUMem -> InKernelGen ()
doInBlockScan Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag' TExp Bool
active =
KernelConstants
-> Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
-> TExp Int64
-> TExp Int32
-> TExp Int32
-> TExp Bool
-> [VName]
-> InKernelGen ()
-> Lambda GPUMem
-> InKernelGen ()
inBlockScan
KernelConstants
constants
Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag'
TExp Int64
arrs_full_size
TExp Int32
simd_width
TExp Int32
block_size
TExp Bool
active
[VName]
arrs
InKernelGen ()
barrier
array_scan :: Bool
array_scan = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ (Type -> Bool) -> [Type] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType ([Type] -> Bool) -> [Type] -> Bool
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [Type]
forall rep. LambdaT rep -> [Type]
lambdaReturnType Lambda GPUMem
lam
barrier :: InKernelGen ()
barrier
| Bool
array_scan =
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceGlobal
| Bool
otherwise =
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
group_offset :: TExp Int64
group_offset = TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (KernelConstants -> TExp Int32
kernelGroupId KernelConstants
constants) TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
* KernelConstants -> TExp Int64
kernelGroupSize KernelConstants
constants
writeBlockResult :: Param LetDecMem -> VName -> InKernelGen ()
writeBlockResult Param LetDecMem
p VName
arr
| Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool) -> Type -> Bool
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
p =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM VName
arr [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
block_id] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) []
| Bool
otherwise =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM VName
arr [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
group_offset TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
block_id] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) []
readPrevBlockResult :: Param LetDecMem -> VName -> InKernelGen ()
readPrevBlockResult Param LetDecMem
p VName
arr
| Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool) -> Type -> Bool
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
p =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) [] (VName -> SubExp
Var VName
arr) [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
block_id TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
1]
| Bool
otherwise =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) [] (VName -> SubExp
Var VName
arr) [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
group_offset TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
block_id TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
1]
Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
-> TExp Bool -> Lambda GPUMem -> InKernelGen ()
doInBlockScan Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag TExp Bool
ltid_in_bounds Lambda GPUMem
lam
InKernelGen ()
barrier
let is_first_block :: TExp Bool
is_first_block = TExp Int32
block_id TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
array_scan (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"save correct values for first block" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
is_first_block (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[(Param LetDecMem, VName)]
-> ((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem] -> [VName] -> [(Param LetDecMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
x_params [VName]
arrs) (((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ())
-> ((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
x, VName
arr) ->
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool) -> Type -> Bool
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
x) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM VName
arr [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
arrs_full_size TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
group_offset TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
block_size TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
ltid] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
x) []
InKernelGen ()
barrier
let last_in_block :: TExp Bool
last_in_block = TExp Int32
in_block_id TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
block_size TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TExp Int32
1
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"last thread of block 'i' writes its result to offset 'i'" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Bool
last_in_block TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Bool
ltid_in_bounds) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
InKernelGen () -> InKernelGen ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
(Param LetDecMem -> VName -> InKernelGen ())
-> [Param LetDecMem] -> [VName] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ Param LetDecMem -> VName -> InKernelGen ()
writeBlockResult [Param LetDecMem]
x_params [VName]
arrs
InKernelGen ()
barrier
let first_block_seg_flag :: Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
first_block_seg_flag = do
TExp Int32 -> TExp Int32 -> TExp Bool
flag_true <- Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag
(TExp Int32 -> TExp Int32 -> TExp Bool)
-> Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
forall a. a -> Maybe a
Just ((TExp Int32 -> TExp Int32 -> TExp Bool)
-> Maybe (TExp Int32 -> TExp Int32 -> TExp Bool))
-> (TExp Int32 -> TExp Int32 -> TExp Bool)
-> Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
forall a b. (a -> b) -> a -> b
$ \TExp Int32
from TExp Int32
to ->
TExp Int32 -> TExp Int32 -> TExp Bool
flag_true (TExp Int32
from TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
block_size TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
+ TExp Int32
block_size TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
-TExp Int32
1) (TExp Int32
to TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
block_size TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
+ TExp Int32
block_size TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
-TExp Int32
1)
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
comment
String
"scan the first block, after which offset 'i' contains carry-in for block 'i+1'"
(InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
-> TExp Bool -> Lambda GPUMem -> InKernelGen ()
doInBlockScan Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
first_block_seg_flag (TExp Bool
is_first_block TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Bool
ltid_in_bounds) Lambda GPUMem
renamed_lam
InKernelGen ()
barrier
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
array_scan (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"move correct values for first block back a block" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
is_first_block (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[(Param LetDecMem, VName)]
-> ((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem] -> [VName] -> [(Param LetDecMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
x_params [VName]
arrs) (((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ())
-> ((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
x, VName
arr) ->
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool) -> Type -> Bool
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
x) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM
VName
arr
[TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
arrs_full_size TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
group_offset TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
ltid]
(VName -> SubExp
Var VName
arr)
[TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
arrs_full_size TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
group_offset TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
block_size TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
ltid]
InKernelGen ()
barrier
let read_carry_in :: InKernelGen ()
read_carry_in = do
[(Param LetDecMem, Param LetDecMem)]
-> ((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem]
-> [Param LetDecMem] -> [(Param LetDecMem, Param LetDecMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
x_params [Param LetDecMem]
y_params) (((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ())
-> ((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
x, Param LetDecMem
y) ->
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
y) [] (VName -> SubExp
Var (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
x)) []
(Param LetDecMem -> VName -> InKernelGen ())
-> [Param LetDecMem] -> [VName] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ Param LetDecMem -> VName -> InKernelGen ()
readPrevBlockResult [Param LetDecMem]
x_params [VName]
arrs
y_to_x :: InKernelGen ()
y_to_x = [(Param LetDecMem, Param LetDecMem)]
-> ((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem]
-> [Param LetDecMem] -> [(Param LetDecMem, Param LetDecMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
x_params [Param LetDecMem]
y_params) (((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ())
-> ((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
x, Param LetDecMem
y) ->
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
x)) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
x) [] (VName -> SubExp
Var (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
y)) []
op_to_x :: InKernelGen ()
op_to_x
| Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
Nothing <- Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag =
[Param LetDecMem] -> BodyT GPUMem -> InKernelGen ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [Param LetDecMem]
x_params (BodyT GPUMem -> InKernelGen ()) -> BodyT GPUMem -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> BodyT GPUMem
forall rep. LambdaT rep -> BodyT rep
lambdaBody Lambda GPUMem
lam
| Just TExp Int32 -> TExp Int32 -> TExp Bool
flag_true <- Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag = do
TExp Bool
inactive <-
String -> TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool)
forall t rep r op. String -> TExp t -> ImpM rep r op (TExp t)
dPrimVE String
"inactive" (TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool))
-> TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool)
forall a b. (a -> b) -> a -> b
$ TExp Int32 -> TExp Int32 -> TExp Bool
flag_true (TExp Int32
block_id TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
block_size TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
-TExp Int32
1) TExp Int32
ltid32
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
inactive InKernelGen ()
y_to_x
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
array_scan InKernelGen ()
barrier
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sUnless TExp Bool
inactive (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ [Param LetDecMem] -> BodyT GPUMem -> InKernelGen ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [Param LetDecMem]
x_params (BodyT GPUMem -> InKernelGen ()) -> BodyT GPUMem -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> BodyT GPUMem
forall rep. LambdaT rep -> BodyT rep
lambdaBody Lambda GPUMem
lam
write_final_result :: InKernelGen ()
write_final_result =
[(Param LetDecMem, VName)]
-> ((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem] -> [VName] -> [(Param LetDecMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
x_params [VName]
arrs) (((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ())
-> ((Param LetDecMem, VName) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
p, VName
arr) ->
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool) -> Type -> Bool
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
p) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM VName
arr [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix TExp Int64
ltid] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) []
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"carry-in for every block except the first" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sUnless (TExp Bool
is_first_block TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.||. TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v
bNot TExp Bool
ltid_in_bounds) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"read operands" InKernelGen ()
read_carry_in
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"perform operation" InKernelGen ()
op_to_x
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"write final result" InKernelGen ()
write_final_result
InKernelGen ()
barrier
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"restore correct values for first block" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
is_first_block (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[(Param LetDecMem, Param LetDecMem, VName)]
-> ((Param LetDecMem, Param LetDecMem, VName) -> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem]
-> [Param LetDecMem]
-> [VName]
-> [(Param LetDecMem, Param LetDecMem, VName)]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [Param LetDecMem]
x_params [Param LetDecMem]
y_params [VName]
arrs) (((Param LetDecMem, Param LetDecMem, VName) -> InKernelGen ())
-> InKernelGen ())
-> ((Param LetDecMem, Param LetDecMem, VName) -> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
x, Param LetDecMem
y, VName
arr) ->
if Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
y)
then VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM VName
arr [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix TExp Int64
ltid] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
y) []
else VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
x) [] (VName -> SubExp
Var VName
arr) [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
arrs_full_size TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
group_offset TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
ltid]
InKernelGen ()
barrier
inBlockScan ::
KernelConstants ->
Maybe (Imp.TExp Int32 -> Imp.TExp Int32 -> Imp.TExp Bool) ->
Imp.TExp Int64 ->
Imp.TExp Int32 ->
Imp.TExp Int32 ->
Imp.TExp Bool ->
[VName] ->
InKernelGen () ->
Lambda GPUMem ->
InKernelGen ()
inBlockScan :: KernelConstants
-> Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
-> TExp Int64
-> TExp Int32
-> TExp Int32
-> TExp Bool
-> [VName]
-> InKernelGen ()
-> Lambda GPUMem
-> InKernelGen ()
inBlockScan KernelConstants
constants Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag TExp Int64
arrs_full_size TExp Int32
lockstep_width TExp Int32
block_size TExp Bool
active [VName]
arrs InKernelGen ()
barrier Lambda GPUMem
scan_lam = InKernelGen () -> InKernelGen ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
TV Int32
skip_threads <- String -> PrimType -> ImpM GPUMem KernelEnv KernelOp (TV Int32)
forall rep r op t. String -> PrimType -> ImpM rep r op (TV t)
dPrim String
"skip_threads" PrimType
int32
let in_block_thread_active :: TExp Bool
in_block_thread_active =
TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_threads TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<=. TExp Int32
in_block_id
actual_params :: [LParam GPUMem]
actual_params = Lambda GPUMem -> [LParam GPUMem]
forall rep. LambdaT rep -> [LParam rep]
lambdaParams Lambda GPUMem
scan_lam
([Param LetDecMem]
x_params, [Param LetDecMem]
y_params) =
Int -> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a. Int -> [a] -> ([a], [a])
splitAt ([Param LetDecMem] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [LParam GPUMem]
[Param LetDecMem]
actual_params Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2) [LParam GPUMem]
[Param LetDecMem]
actual_params
y_to_x :: InKernelGen ()
y_to_x =
[(Param LetDecMem, Param LetDecMem)]
-> ((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem]
-> [Param LetDecMem] -> [(Param LetDecMem, Param LetDecMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
x_params [Param LetDecMem]
y_params) (((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ())
-> ((Param LetDecMem, Param LetDecMem) -> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
x, Param LetDecMem
y) ->
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
x)) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
x) [] (VName -> SubExp
Var (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
y)) []
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"read input for in-block scan" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
active (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
(Param LetDecMem -> VName -> InKernelGen ())
-> [Param LetDecMem] -> [VName] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ Param LetDecMem -> VName -> InKernelGen ()
readInitial [Param LetDecMem]
y_params [VName]
arrs
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Int32
in_block_id TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0) InKernelGen ()
y_to_x
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
array_scan InKernelGen ()
barrier
let op_to_x :: InKernelGen ()
op_to_x
| Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
Nothing <- Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag =
[Param LetDecMem] -> BodyT GPUMem -> InKernelGen ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [Param LetDecMem]
x_params (BodyT GPUMem -> InKernelGen ()) -> BodyT GPUMem -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> BodyT GPUMem
forall rep. LambdaT rep -> BodyT rep
lambdaBody Lambda GPUMem
scan_lam
| Just TExp Int32 -> TExp Int32 -> TExp Bool
flag_true <- Maybe (TExp Int32 -> TExp Int32 -> TExp Bool)
seg_flag = do
TExp Bool
inactive <-
String -> TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool)
forall t rep r op. String -> TExp t -> ImpM rep r op (TExp t)
dPrimVE String
"inactive" (TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool))
-> TExp Bool -> ImpM GPUMem KernelEnv KernelOp (TExp Bool)
forall a b. (a -> b) -> a -> b
$
TExp Int32 -> TExp Int32 -> TExp Bool
flag_true (TExp Int32
ltid32 TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_threads) TExp Int32
ltid32
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
inactive InKernelGen ()
y_to_x
Bool -> InKernelGen () -> InKernelGen ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
array_scan InKernelGen ()
barrier
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sUnless TExp Bool
inactive (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ [Param LetDecMem] -> BodyT GPUMem -> InKernelGen ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [Param LetDecMem]
x_params (BodyT GPUMem -> InKernelGen ()) -> BodyT GPUMem -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> BodyT GPUMem
forall rep. LambdaT rep -> BodyT rep
lambdaBody Lambda GPUMem
scan_lam
maybeBarrier :: InKernelGen ()
maybeBarrier =
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen
(TExp Int32
lockstep_width TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<=. TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_threads)
InKernelGen ()
barrier
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"in-block scan (hopefully no barriers needed)" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
TV Int32
skip_threads TV Int32 -> TExp Int32 -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Int32
1
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhile (TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_threads TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int32
block_size) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Bool
in_block_thread_active TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Bool
active) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"read operands" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
(Param LetDecMem -> VName -> InKernelGen ())
-> [Param LetDecMem] -> [VName] -> InKernelGen ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (TExp Int64 -> Param LetDecMem -> VName -> InKernelGen ()
readParam (TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64) -> TExp Int32 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_threads)) [Param LetDecMem]
x_params [VName]
arrs
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"perform operation" InKernelGen ()
op_to_x
InKernelGen ()
maybeBarrier
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Bool
in_block_thread_active TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Bool
active) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
String -> InKernelGen () -> InKernelGen ()
forall rep r op. String -> ImpM rep r op () -> ImpM rep r op ()
sComment String
"write result" (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[InKernelGen ()] -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, Monad m) =>
t (m a) -> m ()
sequence_ ([InKernelGen ()] -> InKernelGen ())
-> [InKernelGen ()] -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ (Param LetDecMem -> Param LetDecMem -> VName -> InKernelGen ())
-> [Param LetDecMem]
-> [Param LetDecMem]
-> [VName]
-> [InKernelGen ()]
forall a b c d. (a -> b -> c -> d) -> [a] -> [b] -> [c] -> [d]
zipWith3 Param LetDecMem -> Param LetDecMem -> VName -> InKernelGen ()
writeResult [Param LetDecMem]
x_params [Param LetDecMem]
y_params [VName]
arrs
InKernelGen ()
maybeBarrier
TV Int32
skip_threads TV Int32 -> TExp Int32 -> InKernelGen ()
forall t rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
skip_threads TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
2
where
block_id :: TExp Int32
block_id = TExp Int32
ltid32 TExp Int32 -> TExp Int32 -> TExp Int32
forall e. IntegralExp e => e -> e -> e
`quot` TExp Int32
block_size
in_block_id :: TExp Int32
in_block_id = TExp Int32
ltid32 TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TExp Int32
block_id TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int32
block_size
ltid32 :: TExp Int32
ltid32 = KernelConstants -> TExp Int32
kernelLocalThreadId KernelConstants
constants
ltid :: TExp Int64
ltid = TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int32
ltid32
gtid :: TExp Int64
gtid = TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64) -> TExp Int32 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int32
kernelGlobalThreadId KernelConstants
constants
array_scan :: Bool
array_scan = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ (Type -> Bool) -> [Type] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType ([Type] -> Bool) -> [Type] -> Bool
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [Type]
forall rep. LambdaT rep -> [Type]
lambdaReturnType Lambda GPUMem
scan_lam
readInitial :: Param LetDecMem -> VName -> InKernelGen ()
readInitial Param LetDecMem
p VName
arr
| Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool) -> Type -> Bool
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
p =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) [] (VName -> SubExp
Var VName
arr) [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix TExp Int64
ltid]
| Bool
otherwise =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) [] (VName -> SubExp
Var VName
arr) [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix TExp Int64
gtid]
readParam :: TExp Int64 -> Param LetDecMem -> VName -> InKernelGen ()
readParam TExp Int64
behind Param LetDecMem
p VName
arr
| Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool) -> Type -> Bool
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
p =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) [] (VName -> SubExp
Var VName
arr) [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
ltid TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
behind]
| Bool
otherwise =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
p) [] (VName -> SubExp
Var VName
arr) [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
gtid TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
behind TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
arrs_full_size]
writeResult :: Param LetDecMem -> Param LetDecMem -> VName -> InKernelGen ()
writeResult Param LetDecMem
x Param LetDecMem
y VName
arr
| Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool) -> Type -> Bool
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType Param LetDecMem
x = do
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM VName
arr [TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix TExp Int64
ltid] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
x) []
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
y) [] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
x) []
| Bool
otherwise =
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
y) [] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
x) []
computeMapKernelGroups :: Imp.TExp Int64 -> CallKernelGen (Imp.TExp Int64, Imp.TExp Int64)
computeMapKernelGroups :: TExp Int64 -> CallKernelGen (TExp Int64, TExp Int64)
computeMapKernelGroups TExp Int64
kernel_size = do
TV Int64
group_size <- String -> PrimType -> ImpM GPUMem HostEnv HostOp (TV Int64)
forall rep r op t. String -> PrimType -> ImpM rep r op (TV t)
dPrim String
"group_size" PrimType
int64
Maybe Name
fname <- ImpM GPUMem HostEnv HostOp (Maybe Name)
forall rep r op. ImpM rep r op (Maybe Name)
askFunction
let group_size_key :: Name
group_size_key = Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname (Name -> Name) -> Name -> Name
forall a b. (a -> b) -> a -> b
$ String -> Name
nameFromString (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$ VName -> String
forall a. Pretty a => a -> String
pretty (VName -> String) -> VName -> String
forall a b. (a -> b) -> a -> b
$ TV Int64 -> VName
forall t. TV t -> VName
tvVar TV Int64
group_size
HostOp -> ImpM GPUMem HostEnv HostOp ()
forall op rep r. op -> ImpM rep r op ()
sOp (HostOp -> ImpM GPUMem HostEnv HostOp ())
-> HostOp -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ VName -> Name -> SizeClass -> HostOp
Imp.GetSize (TV Int64 -> VName
forall t. TV t -> VName
tvVar TV Int64
group_size) Name
group_size_key SizeClass
Imp.SizeGroup
TV Int64
num_groups <- String -> TExp Int64 -> ImpM GPUMem HostEnv HostOp (TV Int64)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"num_groups" (TExp Int64 -> ImpM GPUMem HostEnv HostOp (TV Int64))
-> TExp Int64 -> ImpM GPUMem HostEnv HostOp (TV Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
kernel_size TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`divUp` TV Int64 -> TExp Int64
forall t. TV t -> TExp t
tvExp TV Int64
group_size
(TExp Int64, TExp Int64) -> CallKernelGen (TExp Int64, TExp Int64)
forall (m :: * -> *) a. Monad m => a -> m a
return (TV Int64 -> TExp Int64
forall t. TV t -> TExp t
tvExp TV Int64
num_groups, TV Int64 -> TExp Int64
forall t. TV t -> TExp t
tvExp TV Int64
group_size)
simpleKernelConstants ::
Imp.TExp Int64 ->
String ->
CallKernelGen (KernelConstants, InKernelGen ())
simpleKernelConstants :: TExp Int64
-> String -> CallKernelGen (KernelConstants, InKernelGen ())
simpleKernelConstants TExp Int64
kernel_size String
desc = do
VName
thread_gtid <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName (String -> ImpM GPUMem HostEnv HostOp VName)
-> String -> ImpM GPUMem HostEnv HostOp VName
forall a b. (a -> b) -> a -> b
$ String
desc String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"_gtid"
VName
thread_ltid <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName (String -> ImpM GPUMem HostEnv HostOp VName)
-> String -> ImpM GPUMem HostEnv HostOp VName
forall a b. (a -> b) -> a -> b
$ String
desc String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"_ltid"
VName
group_id <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName (String -> ImpM GPUMem HostEnv HostOp VName)
-> String -> ImpM GPUMem HostEnv HostOp VName
forall a b. (a -> b) -> a -> b
$ String
desc String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"_gid"
(TExp Int64
num_groups, TExp Int64
group_size) <- TExp Int64 -> CallKernelGen (TExp Int64, TExp Int64)
computeMapKernelGroups TExp Int64
kernel_size
let set_constants :: InKernelGen ()
set_constants = do
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
thread_gtid PrimType
int32
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
thread_ltid PrimType
int32
VName -> PrimType -> InKernelGen ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ VName
group_id PrimType
int32
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (VName -> Int -> KernelOp
Imp.GetGlobalId VName
thread_gtid Int
0)
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (VName -> Int -> KernelOp
Imp.GetLocalId VName
thread_ltid Int
0)
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (VName -> Int -> KernelOp
Imp.GetGroupId VName
group_id Int
0)
(KernelConstants, InKernelGen ())
-> CallKernelGen (KernelConstants, InKernelGen ())
forall (m :: * -> *) a. Monad m => a -> m a
return
( TExp Int32
-> TExp Int32
-> TExp Int32
-> VName
-> VName
-> VName
-> TExp Int64
-> TExp Int64
-> TExp Int32
-> TExp Int32
-> TExp Bool
-> Map [SubExp] [TExp Int32]
-> KernelConstants
KernelConstants
(VName -> TExp Int32
Imp.vi32 VName
thread_gtid)
(VName -> TExp Int32
Imp.vi32 VName
thread_ltid)
(VName -> TExp Int32
Imp.vi32 VName
group_id)
VName
thread_gtid
VName
thread_ltid
VName
group_id
TExp Int64
num_groups
TExp Int64
group_size
(TExp Int64 -> TExp Int32
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 (TExp Int64
group_size TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
* TExp Int64
num_groups))
TExp Int32
0
(VName -> TExp Int64
Imp.vi64 VName
thread_gtid TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int64
kernel_size)
Map [SubExp] [TExp Int32]
forall a. Monoid a => a
mempty,
InKernelGen ()
set_constants
)
virtualiseGroups ::
SegVirt ->
Imp.TExp Int32 ->
(Imp.TExp Int32 -> InKernelGen ()) ->
InKernelGen ()
virtualiseGroups :: SegVirt
-> TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen ()
virtualiseGroups SegVirt
SegVirt TExp Int32
required_groups TExp Int32 -> InKernelGen ()
m = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
TV Int32
phys_group_id <- String -> PrimType -> ImpM GPUMem KernelEnv KernelOp (TV Int32)
forall rep r op t. String -> PrimType -> ImpM rep r op (TV t)
dPrim String
"phys_group_id" PrimType
int32
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ VName -> Int -> KernelOp
Imp.GetGroupId (TV Int32 -> VName
forall t. TV t -> VName
tvVar TV Int32
phys_group_id) Int
0
let iterations :: TExp Int32
iterations =
(TExp Int32
required_groups TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
- TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
phys_group_id)
TExp Int32 -> TExp Int32 -> TExp Int32
forall e. IntegralExp e => e -> e -> e
`divUp` TExp Int64 -> TExp Int32
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 (KernelConstants -> TExp Int64
kernelNumGroups KernelConstants
constants)
String
-> TExp Int32 -> (TExp Int32 -> InKernelGen ()) -> InKernelGen ()
forall t rep r op.
String
-> TExp t -> (TExp t -> ImpM rep r op ()) -> ImpM rep r op ()
sFor String
"i" TExp Int32
iterations ((TExp Int32 -> InKernelGen ()) -> InKernelGen ())
-> (TExp Int32 -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \TExp Int32
i -> do
TExp Int32 -> InKernelGen ()
m (TExp Int32 -> InKernelGen ())
-> (TV Int32 -> TExp Int32) -> TV Int32 -> InKernelGen ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp
(TV Int32 -> InKernelGen ())
-> ImpM GPUMem KernelEnv KernelOp (TV Int32) -> InKernelGen ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< String -> TExp Int32 -> ImpM GPUMem KernelEnv KernelOp (TV Int32)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV
String
"virt_group_id"
(TV Int32 -> TExp Int32
forall t. TV t -> TExp t
tvExp TV Int32
phys_group_id TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
+ TExp Int32
i TExp Int32 -> TExp Int32 -> TExp Int32
forall a. Num a => a -> a -> a
* TExp Int64 -> TExp Int32
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 (KernelConstants -> TExp Int64
kernelNumGroups KernelConstants
constants))
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceGlobal
virtualiseGroups SegVirt
_ TExp Int32
_ TExp Int32 -> InKernelGen ()
m = do
VName
gid <- KernelConstants -> VName
kernelGroupIdVar (KernelConstants -> VName)
-> (KernelEnv -> KernelConstants) -> KernelEnv -> VName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> VName)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp VName
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
TExp Int32 -> InKernelGen ()
m (TExp Int32 -> InKernelGen ()) -> TExp Int32 -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ VName -> TExp Int32
Imp.vi32 VName
gid
sKernelThread ::
String ->
Count NumGroups (Imp.TExp Int64) ->
Count GroupSize (Imp.TExp Int64) ->
VName ->
InKernelGen () ->
CallKernelGen ()
sKernelThread :: String
-> Count NumGroups (TExp Int64)
-> Count GroupSize (TExp Int64)
-> VName
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernelThread = Operations GPUMem KernelEnv KernelOp
-> (KernelConstants -> TExp Int32)
-> String
-> Count NumGroups (TExp Int64)
-> Count GroupSize (TExp Int64)
-> VName
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernel Operations GPUMem KernelEnv KernelOp
threadOperations KernelConstants -> TExp Int32
kernelGlobalThreadId
sKernelGroup ::
String ->
Count NumGroups (Imp.TExp Int64) ->
Count GroupSize (Imp.TExp Int64) ->
VName ->
InKernelGen () ->
CallKernelGen ()
sKernelGroup :: String
-> Count NumGroups (TExp Int64)
-> Count GroupSize (TExp Int64)
-> VName
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernelGroup = Operations GPUMem KernelEnv KernelOp
-> (KernelConstants -> TExp Int32)
-> String
-> Count NumGroups (TExp Int64)
-> Count GroupSize (TExp Int64)
-> VName
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernel Operations GPUMem KernelEnv KernelOp
groupOperations KernelConstants -> TExp Int32
kernelGroupId
sKernelFailureTolerant ::
Bool ->
Operations GPUMem KernelEnv Imp.KernelOp ->
KernelConstants ->
Name ->
InKernelGen () ->
CallKernelGen ()
sKernelFailureTolerant :: Bool
-> Operations GPUMem KernelEnv KernelOp
-> KernelConstants
-> Name
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernelFailureTolerant Bool
tol Operations GPUMem KernelEnv KernelOp
ops KernelConstants
constants Name
name InKernelGen ()
m = do
HostEnv AtomicBinOp
atomics Target
_ Map VName Locks
locks <- ImpM GPUMem HostEnv HostOp HostEnv
forall rep r op. ImpM rep r op r
askEnv
Code KernelOp
body <- CallKernelGen (Code KernelOp) -> CallKernelGen (Code KernelOp)
forall a. CallKernelGen a -> CallKernelGen a
makeAllMemoryGlobal (CallKernelGen (Code KernelOp) -> CallKernelGen (Code KernelOp))
-> CallKernelGen (Code KernelOp) -> CallKernelGen (Code KernelOp)
forall a b. (a -> b) -> a -> b
$ KernelEnv
-> Operations GPUMem KernelEnv KernelOp
-> InKernelGen ()
-> CallKernelGen (Code KernelOp)
forall r' rep op' a r op.
r'
-> Operations rep r' op'
-> ImpM rep r' op' a
-> ImpM rep r op (Code op')
subImpM_ (AtomicBinOp -> KernelConstants -> Map VName Locks -> KernelEnv
KernelEnv AtomicBinOp
atomics KernelConstants
constants Map VName Locks
locks) Operations GPUMem KernelEnv KernelOp
ops InKernelGen ()
m
[KernelUse]
uses <- Code KernelOp -> [VName] -> CallKernelGen [KernelUse]
forall a. FreeIn a => a -> [VName] -> CallKernelGen [KernelUse]
computeKernelUses Code KernelOp
body [VName]
forall a. Monoid a => a
mempty
Code HostOp -> ImpM GPUMem HostEnv HostOp ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code HostOp -> ImpM GPUMem HostEnv HostOp ())
-> Code HostOp -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$
HostOp -> Code HostOp
forall a. a -> Code a
Imp.Op (HostOp -> Code HostOp) -> HostOp -> Code HostOp
forall a b. (a -> b) -> a -> b
$
Kernel -> HostOp
Imp.CallKernel
Kernel :: Code KernelOp
-> [KernelUse]
-> [PrimExp ExpLeaf]
-> [PrimExp ExpLeaf]
-> Name
-> Bool
-> Kernel
Imp.Kernel
{ kernelBody :: Code KernelOp
Imp.kernelBody = Code KernelOp
body,
kernelUses :: [KernelUse]
Imp.kernelUses = [KernelUse]
uses,
kernelNumGroups :: [PrimExp ExpLeaf]
Imp.kernelNumGroups = [TExp Int64 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped (TExp Int64 -> PrimExp ExpLeaf) -> TExp Int64 -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int64
kernelNumGroups KernelConstants
constants],
kernelGroupSize :: [PrimExp ExpLeaf]
Imp.kernelGroupSize = [TExp Int64 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped (TExp Int64 -> PrimExp ExpLeaf) -> TExp Int64 -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int64
kernelGroupSize KernelConstants
constants],
kernelName :: Name
Imp.kernelName = Name
name,
kernelFailureTolerant :: Bool
Imp.kernelFailureTolerant = Bool
tol
}
sKernel ::
Operations GPUMem KernelEnv Imp.KernelOp ->
(KernelConstants -> Imp.TExp Int32) ->
String ->
Count NumGroups (Imp.TExp Int64) ->
Count GroupSize (Imp.TExp Int64) ->
VName ->
InKernelGen () ->
CallKernelGen ()
sKernel :: Operations GPUMem KernelEnv KernelOp
-> (KernelConstants -> TExp Int32)
-> String
-> Count NumGroups (TExp Int64)
-> Count GroupSize (TExp Int64)
-> VName
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernel Operations GPUMem KernelEnv KernelOp
ops KernelConstants -> TExp Int32
flatf String
name Count NumGroups (TExp Int64)
num_groups Count GroupSize (TExp Int64)
group_size VName
v InKernelGen ()
f = do
(KernelConstants
constants, InKernelGen ()
set_constants) <- Count NumGroups (TExp Int64)
-> Count GroupSize (TExp Int64)
-> CallKernelGen (KernelConstants, InKernelGen ())
kernelInitialisationSimple Count NumGroups (TExp Int64)
num_groups Count GroupSize (TExp Int64)
group_size
Name
name' <- String -> ImpM GPUMem HostEnv HostOp Name
forall rep r op. String -> ImpM rep r op Name
nameForFun (String -> ImpM GPUMem HostEnv HostOp Name)
-> String -> ImpM GPUMem HostEnv HostOp Name
forall a b. (a -> b) -> a -> b
$ String
name String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (VName -> Int
baseTag VName
v)
Bool
-> Operations GPUMem KernelEnv KernelOp
-> KernelConstants
-> Name
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernelFailureTolerant Bool
False Operations GPUMem KernelEnv KernelOp
ops KernelConstants
constants Name
name' (InKernelGen () -> ImpM GPUMem HostEnv HostOp ())
-> InKernelGen () -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ do
InKernelGen ()
set_constants
VName -> TExp Int32 -> InKernelGen ()
forall t rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ VName
v (TExp Int32 -> InKernelGen ()) -> TExp Int32 -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int32
flatf KernelConstants
constants
InKernelGen ()
f
copyInGroup :: CopyCompiler GPUMem KernelEnv Imp.KernelOp
copyInGroup :: CopyCompiler GPUMem KernelEnv KernelOp
copyInGroup PrimType
pt MemLoc
destloc MemLoc
srcloc = do
Space
dest_space <- MemEntry -> Space
entryMemSpace (MemEntry -> Space)
-> ImpM GPUMem KernelEnv KernelOp MemEntry
-> ImpM GPUMem KernelEnv KernelOp Space
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem KernelEnv KernelOp MemEntry
forall rep r op. VName -> ImpM rep r op MemEntry
lookupMemory (MemLoc -> VName
memLocName MemLoc
destloc)
Space
src_space <- MemEntry -> Space
entryMemSpace (MemEntry -> Space)
-> ImpM GPUMem KernelEnv KernelOp MemEntry
-> ImpM GPUMem KernelEnv KernelOp Space
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem KernelEnv KernelOp MemEntry
forall rep r op. VName -> ImpM rep r op MemEntry
lookupMemory (MemLoc -> VName
memLocName MemLoc
srcloc)
let src_ixfun :: IxFun (TExp Int64)
src_ixfun = MemLoc -> IxFun (TExp Int64)
memLocIxFun MemLoc
srcloc
dims :: [TExp Int64]
dims = IxFun (TExp Int64) -> [TExp Int64]
forall num. (Eq num, IntegralExp num) => IxFun num -> Shape num
IxFun.shape IxFun (TExp Int64)
src_ixfun
rank :: Int
rank = [TExp Int64] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [TExp Int64]
dims
case (Space
dest_space, Space
src_space) of
(ScalarSpace [SubExp]
destds PrimType
_, ScalarSpace [SubExp]
srcds PrimType
_) -> do
let fullDim :: d -> DimIndex d
fullDim d
d = d -> d -> d -> DimIndex d
forall d. d -> d -> d -> DimIndex d
DimSlice d
0 d
d d
1
destslice' :: Slice (TExp Int64)
destslice' =
[DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall d. [DimIndex d] -> Slice d
Slice ([DimIndex (TExp Int64)] -> Slice (TExp Int64))
-> [DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall a b. (a -> b) -> a -> b
$
Int -> DimIndex (TExp Int64) -> [DimIndex (TExp Int64)]
forall a. Int -> a -> [a]
replicate (Int
rank Int -> Int -> Int
forall a. Num a => a -> a -> a
- [SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SubExp]
destds) (TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix TExp Int64
0)
[DimIndex (TExp Int64)]
-> [DimIndex (TExp Int64)] -> [DimIndex (TExp Int64)]
forall a. [a] -> [a] -> [a]
++ Int -> [DimIndex (TExp Int64)] -> [DimIndex (TExp Int64)]
forall a. Int -> [a] -> [a]
takeLast ([SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SubExp]
destds) ((TExp Int64 -> DimIndex (TExp Int64))
-> [TExp Int64] -> [DimIndex (TExp Int64)]
forall a b. (a -> b) -> [a] -> [b]
map TExp Int64 -> DimIndex (TExp Int64)
forall d. Num d => d -> DimIndex d
fullDim [TExp Int64]
dims)
srcslice' :: Slice (TExp Int64)
srcslice' =
[DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall d. [DimIndex d] -> Slice d
Slice ([DimIndex (TExp Int64)] -> Slice (TExp Int64))
-> [DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall a b. (a -> b) -> a -> b
$
Int -> DimIndex (TExp Int64) -> [DimIndex (TExp Int64)]
forall a. Int -> a -> [a]
replicate (Int
rank Int -> Int -> Int
forall a. Num a => a -> a -> a
- [SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SubExp]
srcds) (TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix TExp Int64
0)
[DimIndex (TExp Int64)]
-> [DimIndex (TExp Int64)] -> [DimIndex (TExp Int64)]
forall a. [a] -> [a] -> [a]
++ Int -> [DimIndex (TExp Int64)] -> [DimIndex (TExp Int64)]
forall a. Int -> [a] -> [a]
takeLast ([SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SubExp]
srcds) ((TExp Int64 -> DimIndex (TExp Int64))
-> [TExp Int64] -> [DimIndex (TExp Int64)]
forall a b. (a -> b) -> [a] -> [b]
map TExp Int64 -> DimIndex (TExp Int64)
forall d. Num d => d -> DimIndex d
fullDim [TExp Int64]
dims)
CopyCompiler GPUMem KernelEnv KernelOp
forall rep r op. CopyCompiler rep r op
copyElementWise
PrimType
pt
(MemLoc -> Slice (TExp Int64) -> MemLoc
sliceMemLoc MemLoc
destloc Slice (TExp Int64)
destslice')
(MemLoc -> Slice (TExp Int64) -> MemLoc
sliceMemLoc MemLoc
srcloc Slice (TExp Int64)
srcslice')
(Space, Space)
_ -> do
[TExp Int64] -> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
groupCoverSpace [TExp Int64]
dims (([TExp Int64] -> InKernelGen ()) -> InKernelGen ())
-> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
is ->
CopyCompiler GPUMem KernelEnv KernelOp
forall rep r op. CopyCompiler rep r op
copyElementWise
PrimType
pt
(MemLoc -> Slice (TExp Int64) -> MemLoc
sliceMemLoc MemLoc
destloc ([DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall d. [DimIndex d] -> Slice d
Slice ([DimIndex (TExp Int64)] -> Slice (TExp Int64))
-> [DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall a b. (a -> b) -> a -> b
$ (TExp Int64 -> DimIndex (TExp Int64))
-> [TExp Int64] -> [DimIndex (TExp Int64)]
forall a b. (a -> b) -> [a] -> [b]
map TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix [TExp Int64]
is))
(MemLoc -> Slice (TExp Int64) -> MemLoc
sliceMemLoc MemLoc
srcloc ([DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall d. [DimIndex d] -> Slice d
Slice ([DimIndex (TExp Int64)] -> Slice (TExp Int64))
-> [DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall a b. (a -> b) -> a -> b
$ (TExp Int64 -> DimIndex (TExp Int64))
-> [TExp Int64] -> [DimIndex (TExp Int64)]
forall a b. (a -> b) -> [a] -> [b]
map TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> DimIndex d
DimFix [TExp Int64]
is))
KernelOp -> InKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (KernelOp -> InKernelGen ()) -> KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Fence -> KernelOp
Imp.Barrier Fence
Imp.FenceLocal
threadOperations, groupOperations :: Operations GPUMem KernelEnv Imp.KernelOp
threadOperations :: Operations GPUMem KernelEnv KernelOp
threadOperations =
(OpCompiler GPUMem KernelEnv KernelOp
-> Operations GPUMem KernelEnv KernelOp
forall rep inner op r.
(Mem rep inner, FreeIn op) =>
OpCompiler rep r op -> Operations rep r op
defaultOperations OpCompiler GPUMem KernelEnv KernelOp
compileThreadOp)
{ opsCopyCompiler :: CopyCompiler GPUMem KernelEnv KernelOp
opsCopyCompiler = CopyCompiler GPUMem KernelEnv KernelOp
forall rep r op. CopyCompiler rep r op
copyElementWise,
opsExpCompiler :: ExpCompiler GPUMem KernelEnv KernelOp
opsExpCompiler = ExpCompiler GPUMem KernelEnv KernelOp
compileThreadExp,
opsStmsCompiler :: Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
opsStmsCompiler = \Names
_ -> Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
forall rep inner op r.
(Mem rep inner, FreeIn op) =>
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
defCompileStms Names
forall a. Monoid a => a
mempty,
opsAllocCompilers :: Map Space (AllocCompiler GPUMem KernelEnv KernelOp)
opsAllocCompilers =
[(Space, AllocCompiler GPUMem KernelEnv KernelOp)]
-> Map Space (AllocCompiler GPUMem KernelEnv KernelOp)
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(String -> Space
Space String
"local", AllocCompiler GPUMem KernelEnv KernelOp
forall r. AllocCompiler GPUMem r KernelOp
allocLocal)]
}
groupOperations :: Operations GPUMem KernelEnv KernelOp
groupOperations =
(OpCompiler GPUMem KernelEnv KernelOp
-> Operations GPUMem KernelEnv KernelOp
forall rep inner op r.
(Mem rep inner, FreeIn op) =>
OpCompiler rep r op -> Operations rep r op
defaultOperations OpCompiler GPUMem KernelEnv KernelOp
compileGroupOp)
{ opsCopyCompiler :: CopyCompiler GPUMem KernelEnv KernelOp
opsCopyCompiler = CopyCompiler GPUMem KernelEnv KernelOp
copyInGroup,
opsExpCompiler :: ExpCompiler GPUMem KernelEnv KernelOp
opsExpCompiler = ExpCompiler GPUMem KernelEnv KernelOp
compileGroupExp,
opsStmsCompiler :: Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
opsStmsCompiler = \Names
_ -> Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
forall rep inner op r.
(Mem rep inner, FreeIn op) =>
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
defCompileStms Names
forall a. Monoid a => a
mempty,
opsAllocCompilers :: Map Space (AllocCompiler GPUMem KernelEnv KernelOp)
opsAllocCompilers =
[(Space, AllocCompiler GPUMem KernelEnv KernelOp)]
-> Map Space (AllocCompiler GPUMem KernelEnv KernelOp)
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(String -> Space
Space String
"local", AllocCompiler GPUMem KernelEnv KernelOp
forall r. AllocCompiler GPUMem r KernelOp
allocLocal)]
}
sReplicateKernel :: VName -> SubExp -> CallKernelGen ()
sReplicateKernel :: VName -> SubExp -> ImpM GPUMem HostEnv HostOp ()
sReplicateKernel VName
arr SubExp
se = do
Type
t <- SubExp -> ImpM GPUMem HostEnv HostOp Type
forall t (m :: * -> *). HasScope t m => SubExp -> m Type
subExpType SubExp
se
[SubExp]
ds <- Int -> [SubExp] -> [SubExp]
forall a. Int -> [a] -> [a]
dropLast (Type -> Int
forall shape u. ArrayShape shape => TypeBase shape u -> Int
arrayRank Type
t) ([SubExp] -> [SubExp]) -> (Type -> [SubExp]) -> Type -> [SubExp]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Type -> [SubExp]
forall u. TypeBase Shape u -> [SubExp]
arrayDims (Type -> [SubExp])
-> ImpM GPUMem HostEnv HostOp Type
-> ImpM GPUMem HostEnv HostOp [SubExp]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem HostEnv HostOp Type
forall rep (m :: * -> *). HasScope rep m => VName -> m Type
lookupType VName
arr
let dims :: [TExp Int64]
dims = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp ([SubExp] -> [TExp Int64]) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> a -> b
$ [SubExp]
ds [SubExp] -> [SubExp] -> [SubExp]
forall a. [a] -> [a] -> [a]
++ Type -> [SubExp]
forall u. TypeBase Shape u -> [SubExp]
arrayDims Type
t
(KernelConstants
constants, InKernelGen ()
set_constants) <-
TExp Int64
-> String -> CallKernelGen (KernelConstants, InKernelGen ())
simpleKernelConstants ([TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product ([TExp Int64] -> TExp Int64) -> [TExp Int64] -> TExp Int64
forall a b. (a -> b) -> a -> b
$ (TExp Int64 -> TExp Int64) -> [TExp Int64] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map TExp Int64 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 [TExp Int64]
dims) String
"replicate"
Maybe Name
fname <- ImpM GPUMem HostEnv HostOp (Maybe Name)
forall rep r op. ImpM rep r op (Maybe Name)
askFunction
let name :: Name
name =
Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname (Name -> Name) -> Name -> Name
forall a b. (a -> b) -> a -> b
$
String -> Name
nameFromString (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$
String
"replicate_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (VName -> Int
baseTag (VName -> Int) -> VName -> Int
forall a b. (a -> b) -> a -> b
$ KernelConstants -> VName
kernelGlobalThreadIdVar KernelConstants
constants)
Bool
-> Operations GPUMem KernelEnv KernelOp
-> KernelConstants
-> Name
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernelFailureTolerant Bool
True Operations GPUMem KernelEnv KernelOp
threadOperations KernelConstants
constants Name
name (InKernelGen () -> ImpM GPUMem HostEnv HostOp ())
-> InKernelGen () -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ do
InKernelGen ()
set_constants
[TExp Int64]
is' <- String -> [TExp Int64] -> TExp Int64 -> InKernelGen [TExp Int64]
forall rep r op.
String -> [TExp Int64] -> TExp Int64 -> ImpM rep r op [TExp Int64]
dIndexSpace' String
"rep_i" [TExp Int64]
dims (TExp Int64 -> InKernelGen [TExp Int64])
-> TExp Int64 -> InKernelGen [TExp Int64]
forall a b. (a -> b) -> a -> b
$ TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64) -> TExp Int32 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int32
kernelGlobalThreadId KernelConstants
constants
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (KernelConstants -> TExp Bool
kernelThreadActive KernelConstants
constants) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
arr [TExp Int64]
is' SubExp
se ([TExp Int64] -> InKernelGen ()) -> [TExp Int64] -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ Int -> [TExp Int64] -> [TExp Int64]
forall a. Int -> [a] -> [a]
drop ([SubExp] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SubExp]
ds) [TExp Int64]
is'
replicateName :: PrimType -> String
replicateName :: PrimType -> String
replicateName PrimType
bt = String
"replicate_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ PrimType -> String
forall a. Pretty a => a -> String
pretty PrimType
bt
replicateForType :: PrimType -> CallKernelGen Name
replicateForType :: PrimType -> ImpM GPUMem HostEnv HostOp Name
replicateForType PrimType
bt = do
let fname :: Name
fname = String -> Name
nameFromString (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$ String
"builtin#" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> PrimType -> String
replicateName PrimType
bt
Bool
exists <- Name -> ImpM GPUMem HostEnv HostOp Bool
forall rep r op. Name -> ImpM rep r op Bool
hasFunction Name
fname
Bool
-> ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
exists (ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ())
-> ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ do
VName
mem <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"mem"
VName
num_elems <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"num_elems"
VName
val <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"val"
let params :: [Param]
params =
[ VName -> Space -> Param
Imp.MemParam VName
mem (String -> Space
Space String
"device"),
VName -> PrimType -> Param
Imp.ScalarParam VName
num_elems PrimType
int32,
VName -> PrimType -> Param
Imp.ScalarParam VName
val PrimType
bt
]
shape :: Shape
shape = [SubExp] -> Shape
forall d. [d] -> ShapeBase d
Shape [VName -> SubExp
Var VName
num_elems]
Name
-> [Param]
-> [Param]
-> ImpM GPUMem HostEnv HostOp ()
-> ImpM GPUMem HostEnv HostOp ()
forall rep r op.
Name -> [Param] -> [Param] -> ImpM rep r op () -> ImpM rep r op ()
function Name
fname [] [Param]
params (ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ())
-> ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ do
VName
arr <-
String
-> PrimType -> Shape -> MemBind -> ImpM GPUMem HostEnv HostOp VName
forall rep r op.
String -> PrimType -> Shape -> MemBind -> ImpM rep r op VName
sArray String
"arr" PrimType
bt Shape
shape (MemBind -> ImpM GPUMem HostEnv HostOp VName)
-> MemBind -> ImpM GPUMem HostEnv HostOp VName
forall a b. (a -> b) -> a -> b
$
VName -> IxFun -> MemBind
ArrayIn VName
mem (IxFun -> MemBind) -> IxFun -> MemBind
forall a b. (a -> b) -> a -> b
$
Shape (TPrimExp Int64 VName) -> IxFun
forall num. IntegralExp num => Shape num -> IxFun num
IxFun.iota (Shape (TPrimExp Int64 VName) -> IxFun)
-> Shape (TPrimExp Int64 VName) -> IxFun
forall a b. (a -> b) -> a -> b
$
(SubExp -> TPrimExp Int64 VName)
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TPrimExp Int64 VName
pe64 ([SubExp] -> Shape (TPrimExp Int64 VName))
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> a -> b
$ Shape -> [SubExp]
forall d. ShapeBase d -> [d]
shapeDims Shape
shape
VName -> SubExp -> ImpM GPUMem HostEnv HostOp ()
sReplicateKernel VName
arr (SubExp -> ImpM GPUMem HostEnv HostOp ())
-> SubExp -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ VName -> SubExp
Var VName
val
Name -> ImpM GPUMem HostEnv HostOp Name
forall (m :: * -> *) a. Monad m => a -> m a
return Name
fname
replicateIsFill :: VName -> SubExp -> CallKernelGen (Maybe (CallKernelGen ()))
replicateIsFill :: VName
-> SubExp -> CallKernelGen (Maybe (ImpM GPUMem HostEnv HostOp ()))
replicateIsFill VName
arr SubExp
v = do
ArrayEntry (MemLoc VName
arr_mem [SubExp]
arr_shape IxFun (TExp Int64)
arr_ixfun) PrimType
_ <- VName -> ImpM GPUMem HostEnv HostOp ArrayEntry
forall rep r op. VName -> ImpM rep r op ArrayEntry
lookupArray VName
arr
Type
v_t <- SubExp -> ImpM GPUMem HostEnv HostOp Type
forall t (m :: * -> *). HasScope t m => SubExp -> m Type
subExpType SubExp
v
case Type
v_t of
Prim PrimType
v_t'
| IxFun (TExp Int64) -> Bool
forall num. (Eq num, IntegralExp num) => IxFun num -> Bool
IxFun.isLinear IxFun (TExp Int64)
arr_ixfun -> Maybe (ImpM GPUMem HostEnv HostOp ())
-> CallKernelGen (Maybe (ImpM GPUMem HostEnv HostOp ()))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (ImpM GPUMem HostEnv HostOp ())
-> CallKernelGen (Maybe (ImpM GPUMem HostEnv HostOp ())))
-> Maybe (ImpM GPUMem HostEnv HostOp ())
-> CallKernelGen (Maybe (ImpM GPUMem HostEnv HostOp ()))
forall a b. (a -> b) -> a -> b
$
ImpM GPUMem HostEnv HostOp ()
-> Maybe (ImpM GPUMem HostEnv HostOp ())
forall a. a -> Maybe a
Just (ImpM GPUMem HostEnv HostOp ()
-> Maybe (ImpM GPUMem HostEnv HostOp ()))
-> ImpM GPUMem HostEnv HostOp ()
-> Maybe (ImpM GPUMem HostEnv HostOp ())
forall a b. (a -> b) -> a -> b
$ do
Name
fname <- PrimType -> ImpM GPUMem HostEnv HostOp Name
replicateForType PrimType
v_t'
Code HostOp -> ImpM GPUMem HostEnv HostOp ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code HostOp -> ImpM GPUMem HostEnv HostOp ())
-> Code HostOp -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$
[VName] -> Name -> [Arg] -> Code HostOp
forall a. [VName] -> Name -> [Arg] -> Code a
Imp.Call
[]
Name
fname
[ VName -> Arg
Imp.MemArg VName
arr_mem,
PrimExp ExpLeaf -> Arg
Imp.ExpArg (PrimExp ExpLeaf -> Arg) -> PrimExp ExpLeaf -> Arg
forall a b. (a -> b) -> a -> b
$ TExp Int64 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped (TExp Int64 -> PrimExp ExpLeaf) -> TExp Int64 -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product ([TExp Int64] -> TExp Int64) -> [TExp Int64] -> TExp Int64
forall a b. (a -> b) -> a -> b
$ (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
arr_shape,
PrimExp ExpLeaf -> Arg
Imp.ExpArg (PrimExp ExpLeaf -> Arg) -> PrimExp ExpLeaf -> Arg
forall a b. (a -> b) -> a -> b
$ PrimType -> SubExp -> PrimExp ExpLeaf
forall a. ToExp a => PrimType -> a -> PrimExp ExpLeaf
toExp' PrimType
v_t' SubExp
v
]
Type
_ -> Maybe (ImpM GPUMem HostEnv HostOp ())
-> CallKernelGen (Maybe (ImpM GPUMem HostEnv HostOp ()))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (ImpM GPUMem HostEnv HostOp ())
forall a. Maybe a
Nothing
sReplicate :: VName -> SubExp -> CallKernelGen ()
sReplicate :: VName -> SubExp -> ImpM GPUMem HostEnv HostOp ()
sReplicate VName
arr SubExp
se = do
Maybe (ImpM GPUMem HostEnv HostOp ())
is_fill <- VName
-> SubExp -> CallKernelGen (Maybe (ImpM GPUMem HostEnv HostOp ()))
replicateIsFill VName
arr SubExp
se
case Maybe (ImpM GPUMem HostEnv HostOp ())
is_fill of
Just ImpM GPUMem HostEnv HostOp ()
m -> ImpM GPUMem HostEnv HostOp ()
m
Maybe (ImpM GPUMem HostEnv HostOp ())
Nothing -> VName -> SubExp -> ImpM GPUMem HostEnv HostOp ()
sReplicateKernel VName
arr SubExp
se
sIotaKernel ::
VName ->
Imp.TExp Int64 ->
Imp.Exp ->
Imp.Exp ->
IntType ->
CallKernelGen ()
sIotaKernel :: VName
-> TExp Int64
-> PrimExp ExpLeaf
-> PrimExp ExpLeaf
-> IntType
-> ImpM GPUMem HostEnv HostOp ()
sIotaKernel VName
arr TExp Int64
n PrimExp ExpLeaf
x PrimExp ExpLeaf
s IntType
et = do
MemLoc
destloc <- ArrayEntry -> MemLoc
entryArrayLoc (ArrayEntry -> MemLoc)
-> ImpM GPUMem HostEnv HostOp ArrayEntry
-> ImpM GPUMem HostEnv HostOp MemLoc
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem HostEnv HostOp ArrayEntry
forall rep r op. VName -> ImpM rep r op ArrayEntry
lookupArray VName
arr
(KernelConstants
constants, InKernelGen ()
set_constants) <- TExp Int64
-> String -> CallKernelGen (KernelConstants, InKernelGen ())
simpleKernelConstants TExp Int64
n String
"iota"
Maybe Name
fname <- ImpM GPUMem HostEnv HostOp (Maybe Name)
forall rep r op. ImpM rep r op (Maybe Name)
askFunction
let name :: Name
name =
Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname (Name -> Name) -> Name -> Name
forall a b. (a -> b) -> a -> b
$
String -> Name
nameFromString (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$
String
"iota_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ IntType -> String
forall a. Pretty a => a -> String
pretty IntType
et String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"_"
String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (VName -> Int
baseTag (VName -> Int) -> VName -> Int
forall a b. (a -> b) -> a -> b
$ KernelConstants -> VName
kernelGlobalThreadIdVar KernelConstants
constants)
Bool
-> Operations GPUMem KernelEnv KernelOp
-> KernelConstants
-> Name
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernelFailureTolerant Bool
True Operations GPUMem KernelEnv KernelOp
threadOperations KernelConstants
constants Name
name (InKernelGen () -> ImpM GPUMem HostEnv HostOp ())
-> InKernelGen () -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ do
InKernelGen ()
set_constants
let gtid :: TExp Int64
gtid = TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64) -> TExp Int32 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int32
kernelGlobalThreadId KernelConstants
constants
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (KernelConstants -> TExp Bool
kernelThreadActive KernelConstants
constants) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ do
(VName
destmem, Space
destspace, Count Elements (TExp Int64)
destidx) <- MemLoc
-> [TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, Count Elements (TExp Int64))
forall rep r op.
MemLoc
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray' MemLoc
destloc [TExp Int64
gtid]
Code KernelOp -> InKernelGen ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code KernelOp -> InKernelGen ())
-> Code KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName
-> Count Elements (TExp Int64)
-> PrimType
-> Space
-> Volatility
-> PrimExp ExpLeaf
-> Code KernelOp
forall a.
VName
-> Count Elements (TExp Int64)
-> PrimType
-> Space
-> Volatility
-> PrimExp ExpLeaf
-> Code a
Imp.Write VName
destmem Count Elements (TExp Int64)
destidx (IntType -> PrimType
IntType IntType
et) Space
destspace Volatility
Imp.Nonvolatile (PrimExp ExpLeaf -> Code KernelOp)
-> PrimExp ExpLeaf -> Code KernelOp
forall a b. (a -> b) -> a -> b
$
BinOp -> PrimExp ExpLeaf -> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall v. BinOp -> PrimExp v -> PrimExp v -> PrimExp v
BinOpExp
(IntType -> Overflow -> BinOp
Add IntType
et Overflow
OverflowWrap)
(BinOp -> PrimExp ExpLeaf -> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall v. BinOp -> PrimExp v -> PrimExp v -> PrimExp v
BinOpExp (IntType -> Overflow -> BinOp
Mul IntType
et Overflow
OverflowWrap) (IntType -> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall v. IntType -> PrimExp v -> PrimExp v
Imp.sExt IntType
et (PrimExp ExpLeaf -> PrimExp ExpLeaf)
-> PrimExp ExpLeaf -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ TExp Int64 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped TExp Int64
gtid) PrimExp ExpLeaf
s)
PrimExp ExpLeaf
x
iotaName :: IntType -> String
iotaName :: IntType -> String
iotaName IntType
bt = String
"iota_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ IntType -> String
forall a. Pretty a => a -> String
pretty IntType
bt
iotaForType :: IntType -> CallKernelGen Name
iotaForType :: IntType -> ImpM GPUMem HostEnv HostOp Name
iotaForType IntType
bt = do
let fname :: Name
fname = String -> Name
nameFromString (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$ String
"builtin#" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> IntType -> String
iotaName IntType
bt
Bool
exists <- Name -> ImpM GPUMem HostEnv HostOp Bool
forall rep r op. Name -> ImpM rep r op Bool
hasFunction Name
fname
Bool
-> ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
exists (ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ())
-> ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ do
VName
mem <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"mem"
VName
n <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"n"
VName
x <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"x"
VName
s <- String -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName String
"s"
let params :: [Param]
params =
[ VName -> Space -> Param
Imp.MemParam VName
mem (String -> Space
Space String
"device"),
VName -> PrimType -> Param
Imp.ScalarParam VName
n PrimType
int32,
VName -> PrimType -> Param
Imp.ScalarParam VName
x (PrimType -> Param) -> PrimType -> Param
forall a b. (a -> b) -> a -> b
$ IntType -> PrimType
IntType IntType
bt,
VName -> PrimType -> Param
Imp.ScalarParam VName
s (PrimType -> Param) -> PrimType -> Param
forall a b. (a -> b) -> a -> b
$ IntType -> PrimType
IntType IntType
bt
]
shape :: Shape
shape = [SubExp] -> Shape
forall d. [d] -> ShapeBase d
Shape [VName -> SubExp
Var VName
n]
n' :: TExp Int64
n' = VName -> TExp Int64
Imp.vi64 VName
n
x' :: PrimExp ExpLeaf
x' = VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
x (PrimType -> PrimExp ExpLeaf) -> PrimType -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ IntType -> PrimType
IntType IntType
bt
s' :: PrimExp ExpLeaf
s' = VName -> PrimType -> PrimExp ExpLeaf
Imp.var VName
s (PrimType -> PrimExp ExpLeaf) -> PrimType -> PrimExp ExpLeaf
forall a b. (a -> b) -> a -> b
$ IntType -> PrimType
IntType IntType
bt
Name
-> [Param]
-> [Param]
-> ImpM GPUMem HostEnv HostOp ()
-> ImpM GPUMem HostEnv HostOp ()
forall rep r op.
Name -> [Param] -> [Param] -> ImpM rep r op () -> ImpM rep r op ()
function Name
fname [] [Param]
params (ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ())
-> ImpM GPUMem HostEnv HostOp () -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ do
VName
arr <-
String
-> PrimType -> Shape -> MemBind -> ImpM GPUMem HostEnv HostOp VName
forall rep r op.
String -> PrimType -> Shape -> MemBind -> ImpM rep r op VName
sArray String
"arr" (IntType -> PrimType
IntType IntType
bt) Shape
shape (MemBind -> ImpM GPUMem HostEnv HostOp VName)
-> MemBind -> ImpM GPUMem HostEnv HostOp VName
forall a b. (a -> b) -> a -> b
$
VName -> IxFun -> MemBind
ArrayIn VName
mem (IxFun -> MemBind) -> IxFun -> MemBind
forall a b. (a -> b) -> a -> b
$
Shape (TPrimExp Int64 VName) -> IxFun
forall num. IntegralExp num => Shape num -> IxFun num
IxFun.iota (Shape (TPrimExp Int64 VName) -> IxFun)
-> Shape (TPrimExp Int64 VName) -> IxFun
forall a b. (a -> b) -> a -> b
$
(SubExp -> TPrimExp Int64 VName)
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TPrimExp Int64 VName
pe64 ([SubExp] -> Shape (TPrimExp Int64 VName))
-> [SubExp] -> Shape (TPrimExp Int64 VName)
forall a b. (a -> b) -> a -> b
$ Shape -> [SubExp]
forall d. ShapeBase d -> [d]
shapeDims Shape
shape
VName
-> TExp Int64
-> PrimExp ExpLeaf
-> PrimExp ExpLeaf
-> IntType
-> ImpM GPUMem HostEnv HostOp ()
sIotaKernel VName
arr (TExp Int64 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int64
n') PrimExp ExpLeaf
x' PrimExp ExpLeaf
s' IntType
bt
Name -> ImpM GPUMem HostEnv HostOp Name
forall (m :: * -> *) a. Monad m => a -> m a
return Name
fname
sIota ::
VName ->
Imp.TExp Int64 ->
Imp.Exp ->
Imp.Exp ->
IntType ->
CallKernelGen ()
sIota :: VName
-> TExp Int64
-> PrimExp ExpLeaf
-> PrimExp ExpLeaf
-> IntType
-> ImpM GPUMem HostEnv HostOp ()
sIota VName
arr TExp Int64
n PrimExp ExpLeaf
x PrimExp ExpLeaf
s IntType
et = do
ArrayEntry (MemLoc VName
arr_mem [SubExp]
_ IxFun (TExp Int64)
arr_ixfun) PrimType
_ <- VName -> ImpM GPUMem HostEnv HostOp ArrayEntry
forall rep r op. VName -> ImpM rep r op ArrayEntry
lookupArray VName
arr
if IxFun (TExp Int64) -> Bool
forall num. (Eq num, IntegralExp num) => IxFun num -> Bool
IxFun.isLinear IxFun (TExp Int64)
arr_ixfun
then do
Name
fname <- IntType -> ImpM GPUMem HostEnv HostOp Name
iotaForType IntType
et
Code HostOp -> ImpM GPUMem HostEnv HostOp ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code HostOp -> ImpM GPUMem HostEnv HostOp ())
-> Code HostOp -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$
[VName] -> Name -> [Arg] -> Code HostOp
forall a. [VName] -> Name -> [Arg] -> Code a
Imp.Call
[]
Name
fname
[VName -> Arg
Imp.MemArg VName
arr_mem, PrimExp ExpLeaf -> Arg
Imp.ExpArg (PrimExp ExpLeaf -> Arg) -> PrimExp ExpLeaf -> Arg
forall a b. (a -> b) -> a -> b
$ TExp Int64 -> PrimExp ExpLeaf
forall t v. TPrimExp t v -> PrimExp v
untyped TExp Int64
n, PrimExp ExpLeaf -> Arg
Imp.ExpArg PrimExp ExpLeaf
x, PrimExp ExpLeaf -> Arg
Imp.ExpArg PrimExp ExpLeaf
s]
else VName
-> TExp Int64
-> PrimExp ExpLeaf
-> PrimExp ExpLeaf
-> IntType
-> ImpM GPUMem HostEnv HostOp ()
sIotaKernel VName
arr TExp Int64
n PrimExp ExpLeaf
x PrimExp ExpLeaf
s IntType
et
sCopy :: CopyCompiler GPUMem HostEnv Imp.HostOp
sCopy :: CopyCompiler GPUMem HostEnv HostOp
sCopy PrimType
bt destloc :: MemLoc
destloc@(MemLoc VName
destmem [SubExp]
_ IxFun (TExp Int64)
_) srcloc :: MemLoc
srcloc@(MemLoc VName
srcmem [SubExp]
srcdims IxFun (TExp Int64)
_) = do
let shape :: [TExp Int64]
shape = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
srcdims
kernel_size :: TExp Int64
kernel_size = [TExp Int64] -> TExp Int64
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
shape
(KernelConstants
constants, InKernelGen ()
set_constants) <- TExp Int64
-> String -> CallKernelGen (KernelConstants, InKernelGen ())
simpleKernelConstants TExp Int64
kernel_size String
"copy"
Maybe Name
fname <- ImpM GPUMem HostEnv HostOp (Maybe Name)
forall rep r op. ImpM rep r op (Maybe Name)
askFunction
let name :: Name
name =
Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname (Name -> Name) -> Name -> Name
forall a b. (a -> b) -> a -> b
$
String -> Name
nameFromString (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$
String
"copy_" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (VName -> Int
baseTag (VName -> Int) -> VName -> Int
forall a b. (a -> b) -> a -> b
$ KernelConstants -> VName
kernelGlobalThreadIdVar KernelConstants
constants)
Bool
-> Operations GPUMem KernelEnv KernelOp
-> KernelConstants
-> Name
-> InKernelGen ()
-> ImpM GPUMem HostEnv HostOp ()
sKernelFailureTolerant Bool
True Operations GPUMem KernelEnv KernelOp
threadOperations KernelConstants
constants Name
name (InKernelGen () -> ImpM GPUMem HostEnv HostOp ())
-> InKernelGen () -> ImpM GPUMem HostEnv HostOp ()
forall a b. (a -> b) -> a -> b
$ do
InKernelGen ()
set_constants
let gtid :: TExp Int64
gtid = TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64) -> TExp Int32 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int32
kernelGlobalThreadId KernelConstants
constants
[TExp Int64]
is <- String -> [TExp Int64] -> TExp Int64 -> InKernelGen [TExp Int64]
forall rep r op.
String -> [TExp Int64] -> TExp Int64 -> ImpM rep r op [TExp Int64]
dIndexSpace' String
"copy_i" [TExp Int64]
shape TExp Int64
gtid
(VName
_, Space
destspace, Count Elements (TExp Int64)
destidx) <- MemLoc
-> [TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, Count Elements (TExp Int64))
forall rep r op.
MemLoc
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray' MemLoc
destloc [TExp Int64]
is
(VName
_, Space
srcspace, Count Elements (TExp Int64)
srcidx) <- MemLoc
-> [TExp Int64]
-> ImpM
GPUMem
KernelEnv
KernelOp
(VName, Space, Count Elements (TExp Int64))
forall rep r op.
MemLoc
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray' MemLoc
srcloc [TExp Int64]
is
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Int64
gtid TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int64
kernel_size) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Code KernelOp -> InKernelGen ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code KernelOp -> InKernelGen ())
-> Code KernelOp -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName
-> Count Elements (TExp Int64)
-> PrimType
-> Space
-> Volatility
-> PrimExp ExpLeaf
-> Code KernelOp
forall a.
VName
-> Count Elements (TExp Int64)
-> PrimType
-> Space
-> Volatility
-> PrimExp ExpLeaf
-> Code a
Imp.Write VName
destmem Count Elements (TExp Int64)
destidx PrimType
bt Space
destspace Volatility
Imp.Nonvolatile (PrimExp ExpLeaf -> Code KernelOp)
-> PrimExp ExpLeaf -> Code KernelOp
forall a b. (a -> b) -> a -> b
$
VName
-> Count Elements (TExp Int64)
-> PrimType
-> Space
-> Volatility
-> PrimExp ExpLeaf
Imp.index VName
srcmem Count Elements (TExp Int64)
srcidx PrimType
bt Space
srcspace Volatility
Imp.Nonvolatile
compileGroupResult ::
SegSpace ->
PatElem GPUMem ->
KernelResult ->
InKernelGen ()
compileGroupResult :: SegSpace
-> PatElemT (LetDec GPUMem) -> KernelResult -> InKernelGen ()
compileGroupResult SegSpace
_ PatElemT (LetDec GPUMem)
pe (TileReturns Certs
_ [(SubExp
w, SubExp
per_group_elems)] VName
what) = do
TExp Int64
n <- SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp (SubExp -> TExp Int64) -> (Type -> SubExp) -> Type -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Type -> SubExp
forall u. Int -> TypeBase Shape u -> SubExp
arraySize Int
0 (Type -> TExp Int64)
-> ImpM GPUMem KernelEnv KernelOp Type
-> ImpM GPUMem KernelEnv KernelOp (TExp Int64)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem KernelEnv KernelOp Type
forall rep (m :: * -> *). HasScope rep m => VName -> m Type
lookupType VName
what
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
let ltid :: TExp Int64
ltid = TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64) -> TExp Int32 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int32
kernelLocalThreadId KernelConstants
constants
offset :: TExp Int64
offset =
SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
per_group_elems
TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
* TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (KernelConstants -> TExp Int32
kernelGroupId KernelConstants
constants)
Operations GPUMem KernelEnv KernelOp
-> InKernelGen () -> InKernelGen ()
forall rep r op a.
Operations rep r op -> ImpM rep r op a -> ImpM rep r op a
localOps Operations GPUMem KernelEnv KernelOp
threadOperations (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
if SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
per_group_elems TExp Int64 -> TExp Int64 -> Bool
forall a. Eq a => a -> a -> Bool
== KernelConstants -> TExp Int64
kernelGroupSize KernelConstants
constants
then
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Int64
ltid TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
offset TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
w) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [TExp Int64
ltid TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
offset] (VName -> SubExp
Var VName
what) [TExp Int64
ltid]
else String
-> TExp Int64 -> (TExp Int64 -> InKernelGen ()) -> InKernelGen ()
forall t rep r op.
String
-> TExp t -> (TExp t -> ImpM rep r op ()) -> ImpM rep r op ()
sFor String
"i" (TExp Int64
n TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`divUp` KernelConstants -> TExp Int64
kernelGroupSize KernelConstants
constants) ((TExp Int64 -> InKernelGen ()) -> InKernelGen ())
-> (TExp Int64 -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> do
TExp Int64
j <- String -> TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TExp Int64)
forall t rep r op. String -> TExp t -> ImpM rep r op (TExp t)
dPrimVE String
"j" (TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TExp Int64))
-> TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TExp Int64)
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int64
kernelGroupSize KernelConstants
constants TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
* TExp Int64
i TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
ltid
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Int64
j TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
offset TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
w) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [TExp Int64
j TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
offset] (VName -> SubExp
Var VName
what) [TExp Int64
j]
compileGroupResult SegSpace
space PatElemT (LetDec GPUMem)
pe (TileReturns Certs
_ [(SubExp, SubExp)]
dims VName
what) = do
let gids :: [VName]
gids = ((VName, SubExp) -> VName) -> [(VName, SubExp)] -> [VName]
forall a b. (a -> b) -> [a] -> [b]
map (VName, SubExp) -> VName
forall a b. (a, b) -> a
fst ([(VName, SubExp)] -> [VName]) -> [(VName, SubExp)] -> [VName]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
out_tile_sizes :: [TExp Int64]
out_tile_sizes = ((SubExp, SubExp) -> TExp Int64)
-> [(SubExp, SubExp)] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map (SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp (SubExp -> TExp Int64)
-> ((SubExp, SubExp) -> SubExp) -> (SubExp, SubExp) -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (SubExp, SubExp) -> SubExp
forall a b. (a, b) -> b
snd) [(SubExp, SubExp)]
dims
group_is :: [TExp Int64]
group_is = (TExp Int64 -> TExp Int64 -> TExp Int64)
-> [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
(*) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
Imp.vi64 [VName]
gids) [TExp Int64]
out_tile_sizes
[TExp Int64]
local_is <- [SubExp] -> InKernelGen [TExp Int64]
localThreadIDs ([SubExp] -> InKernelGen [TExp Int64])
-> [SubExp] -> InKernelGen [TExp Int64]
forall a b. (a -> b) -> a -> b
$ ((SubExp, SubExp) -> SubExp) -> [(SubExp, SubExp)] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map (SubExp, SubExp) -> SubExp
forall a b. (a, b) -> b
snd [(SubExp, SubExp)]
dims
[TV Int64]
is_for_thread <-
(TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TV Int64))
-> [TExp Int64] -> ImpM GPUMem KernelEnv KernelOp [TV Int64]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (String -> TExp Int64 -> ImpM GPUMem KernelEnv KernelOp (TV Int64)
forall t rep r op. String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"thread_out_index") ([TExp Int64] -> ImpM GPUMem KernelEnv KernelOp [TV Int64])
-> [TExp Int64] -> ImpM GPUMem KernelEnv KernelOp [TV Int64]
forall a b. (a -> b) -> a -> b
$
(TExp Int64 -> TExp Int64 -> TExp Int64)
-> [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
(+) [TExp Int64]
group_is [TExp Int64]
local_is
Operations GPUMem KernelEnv KernelOp
-> InKernelGen () -> InKernelGen ()
forall rep r op a.
Operations rep r op -> ImpM rep r op a -> ImpM rep r op a
localOps Operations GPUMem KernelEnv KernelOp
threadOperations (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen ([(VName, SubExp)] -> TExp Bool
isActive ([(VName, SubExp)] -> TExp Bool) -> [(VName, SubExp)] -> TExp Bool
forall a b. (a -> b) -> a -> b
$ [VName] -> [SubExp] -> [(VName, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((TV Int64 -> VName) -> [TV Int64] -> [VName]
forall a b. (a -> b) -> [a] -> [b]
map TV Int64 -> VName
forall t. TV t -> VName
tvVar [TV Int64]
is_for_thread) ([SubExp] -> [(VName, SubExp)]) -> [SubExp] -> [(VName, SubExp)]
forall a b. (a -> b) -> a -> b
$ ((SubExp, SubExp) -> SubExp) -> [(SubExp, SubExp)] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map (SubExp, SubExp) -> SubExp
forall a b. (a, b) -> a
fst [(SubExp, SubExp)]
dims) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) ((TV Int64 -> TExp Int64) -> [TV Int64] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map TV Int64 -> TExp Int64
forall t. TV t -> TExp t
tvExp [TV Int64]
is_for_thread) (VName -> SubExp
Var VName
what) [TExp Int64]
local_is
compileGroupResult SegSpace
space PatElemT (LetDec GPUMem)
pe (RegTileReturns Certs
_ [(SubExp, SubExp, SubExp)]
dims_n_tiles VName
what) = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
let gids :: [VName]
gids = ((VName, SubExp) -> VName) -> [(VName, SubExp)] -> [VName]
forall a b. (a -> b) -> [a] -> [b]
map (VName, SubExp) -> VName
forall a b. (a, b) -> a
fst ([(VName, SubExp)] -> [VName]) -> [(VName, SubExp)] -> [VName]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
([SubExp]
dims, [SubExp]
group_tiles, [SubExp]
reg_tiles) = [(SubExp, SubExp, SubExp)] -> ([SubExp], [SubExp], [SubExp])
forall a b c. [(a, b, c)] -> ([a], [b], [c])
unzip3 [(SubExp, SubExp, SubExp)]
dims_n_tiles
group_tiles' :: [TExp Int64]
group_tiles' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
group_tiles
reg_tiles' :: [TExp Int64]
reg_tiles' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
reg_tiles
let group_tile_is :: [TExp Int64]
group_tile_is = (VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
Imp.vi64 [VName]
gids
[TExp Int64]
reg_tile_is <-
String -> [TExp Int64] -> TExp Int64 -> InKernelGen [TExp Int64]
forall rep r op.
String -> [TExp Int64] -> TExp Int64 -> ImpM rep r op [TExp Int64]
dIndexSpace' String
"reg_tile_i" [TExp Int64]
group_tiles' (TExp Int64 -> InKernelGen [TExp Int64])
-> TExp Int64 -> InKernelGen [TExp Int64]
forall a b. (a -> b) -> a -> b
$ TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64) -> TExp Int32 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ KernelConstants -> TExp Int32
kernelLocalThreadId KernelConstants
constants
let regTileSliceDim :: (TExp t, TExp t)
-> (TExp t, TExp t) -> ImpM rep r op (DimIndex (TExp t))
regTileSliceDim (TExp t
group_tile, TExp t
group_tile_i) (TExp t
reg_tile, TExp t
reg_tile_i) = do
TExp t
tile_dim_start <-
String -> TExp t -> ImpM rep r op (TExp t)
forall t rep r op. String -> TExp t -> ImpM rep r op (TExp t)
dPrimVE String
"tile_dim_start" (TExp t -> ImpM rep r op (TExp t))
-> TExp t -> ImpM rep r op (TExp t)
forall a b. (a -> b) -> a -> b
$
TExp t
reg_tile TExp t -> TExp t -> TExp t
forall a. Num a => a -> a -> a
* (TExp t
group_tile TExp t -> TExp t -> TExp t
forall a. Num a => a -> a -> a
* TExp t
group_tile_i TExp t -> TExp t -> TExp t
forall a. Num a => a -> a -> a
+ TExp t
reg_tile_i)
DimIndex (TExp t) -> ImpM rep r op (DimIndex (TExp t))
forall (m :: * -> *) a. Monad m => a -> m a
return (DimIndex (TExp t) -> ImpM rep r op (DimIndex (TExp t)))
-> DimIndex (TExp t) -> ImpM rep r op (DimIndex (TExp t))
forall a b. (a -> b) -> a -> b
$ TExp t -> TExp t -> TExp t -> DimIndex (TExp t)
forall d. d -> d -> d -> DimIndex d
DimSlice TExp t
tile_dim_start TExp t
reg_tile TExp t
1
Slice (TExp Int64)
reg_tile_slices <-
[DimIndex (TExp Int64)] -> Slice (TExp Int64)
forall d. [DimIndex d] -> Slice d
Slice
([DimIndex (TExp Int64)] -> Slice (TExp Int64))
-> ImpM GPUMem KernelEnv KernelOp [DimIndex (TExp Int64)]
-> ImpM GPUMem KernelEnv KernelOp (Slice (TExp Int64))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((TExp Int64, TExp Int64)
-> (TExp Int64, TExp Int64)
-> ImpM GPUMem KernelEnv KernelOp (DimIndex (TExp Int64)))
-> [(TExp Int64, TExp Int64)]
-> [(TExp Int64, TExp Int64)]
-> ImpM GPUMem KernelEnv KernelOp [DimIndex (TExp Int64)]
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m [c]
zipWithM
(TExp Int64, TExp Int64)
-> (TExp Int64, TExp Int64)
-> ImpM GPUMem KernelEnv KernelOp (DimIndex (TExp Int64))
forall t rep r op.
NumExp t =>
(TExp t, TExp t)
-> (TExp t, TExp t) -> ImpM rep r op (DimIndex (TExp t))
regTileSliceDim
([TExp Int64] -> [TExp Int64] -> [(TExp Int64, TExp Int64)]
forall a b. [a] -> [b] -> [(a, b)]
zip [TExp Int64]
group_tiles' [TExp Int64]
group_tile_is)
([TExp Int64] -> [TExp Int64] -> [(TExp Int64, TExp Int64)]
forall a b. [a] -> [b] -> [(a, b)]
zip [TExp Int64]
reg_tiles' [TExp Int64]
reg_tile_is)
Operations GPUMem KernelEnv KernelOp
-> InKernelGen () -> InKernelGen ()
forall rep r op a.
Operations rep r op -> ImpM rep r op a -> ImpM rep r op a
localOps Operations GPUMem KernelEnv KernelOp
threadOperations (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
Shape -> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
forall rep r op.
Shape -> ([TExp Int64] -> ImpM rep r op ()) -> ImpM rep r op ()
sLoopNest ([SubExp] -> Shape
forall d. [d] -> ShapeBase d
Shape [SubExp]
reg_tiles) (([TExp Int64] -> InKernelGen ()) -> InKernelGen ())
-> ([TExp Int64] -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
is_in_reg_tile -> do
let dest_is :: [TExp Int64]
dest_is = Slice (TExp Int64) -> [TExp Int64] -> [TExp Int64]
forall d. Num d => Slice d -> [d] -> [d]
fixSlice Slice (TExp Int64)
reg_tile_slices [TExp Int64]
is_in_reg_tile
src_is :: [TExp Int64]
src_is = [TExp Int64]
reg_tile_is [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a] -> [a]
++ [TExp Int64]
is_in_reg_tile
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen ((TExp Bool -> TExp Bool -> TExp Bool) -> [TExp Bool] -> TExp Bool
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldl1 TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
(.&&.) ([TExp Bool] -> TExp Bool) -> [TExp Bool] -> TExp Bool
forall a b. (a -> b) -> a -> b
$ (TExp Int64 -> TExp Int64 -> TExp Bool)
-> [TExp Int64] -> [TExp Int64] -> [TExp Bool]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith TExp Int64 -> TExp Int64 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
(.<.) [TExp Int64]
dest_is ([TExp Int64] -> [TExp Bool]) -> [TExp Int64] -> [TExp Bool]
forall a b. (a -> b) -> a -> b
$ (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
dims) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [TExp Int64]
dest_is (VName -> SubExp
Var VName
what) [TExp Int64]
src_is
compileGroupResult SegSpace
space PatElemT (LetDec GPUMem)
pe (Returns ResultManifest
_ Certs
_ SubExp
what) = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
Bool
in_local_memory <- SubExp -> InKernelGen Bool
arrayInLocalMemory SubExp
what
let gids :: [TExp Int64]
gids = ((VName, SubExp) -> TExp Int64)
-> [(VName, SubExp)] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map (VName -> TExp Int64
Imp.vi64 (VName -> TExp Int64)
-> ((VName, SubExp) -> VName) -> (VName, SubExp) -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VName, SubExp) -> VName
forall a b. (a, b) -> a
fst) ([(VName, SubExp)] -> [TExp Int64])
-> [(VName, SubExp)] -> [TExp Int64]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
if Bool -> Bool
not Bool
in_local_memory
then
Operations GPUMem KernelEnv KernelOp
-> InKernelGen () -> InKernelGen ()
forall rep r op a.
Operations rep r op -> ImpM rep r op a -> ImpM rep r op a
localOps Operations GPUMem KernelEnv KernelOp
threadOperations (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (KernelConstants -> TExp Int32
kernelLocalThreadId KernelConstants
constants TExp Int32 -> TExp Int32 -> TExp Bool
forall t v. TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0) (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [TExp Int64]
gids SubExp
what []
else
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [TExp Int64]
gids SubExp
what []
compileGroupResult SegSpace
_ PatElemT (LetDec GPUMem)
_ WriteReturns {} =
String -> InKernelGen ()
forall a. String -> a
compilerLimitationS String
"compileGroupResult: WriteReturns not handled yet."
compileGroupResult SegSpace
_ PatElemT (LetDec GPUMem)
_ ConcatReturns {} =
String -> InKernelGen ()
forall a. String -> a
compilerLimitationS String
"compileGroupResult: ConcatReturns not handled yet."
compileThreadResult ::
SegSpace ->
PatElem GPUMem ->
KernelResult ->
InKernelGen ()
compileThreadResult :: SegSpace
-> PatElemT (LetDec GPUMem) -> KernelResult -> InKernelGen ()
compileThreadResult SegSpace
_ PatElemT (LetDec GPUMem)
_ RegTileReturns {} =
String -> InKernelGen ()
forall a. String -> a
compilerLimitationS String
"compileThreadResult: RegTileReturns not yet handled."
compileThreadResult SegSpace
space PatElemT (LetDec GPUMem)
pe (Returns ResultManifest
_ Certs
_ SubExp
what) = do
let is :: [TExp Int64]
is = ((VName, SubExp) -> TExp Int64)
-> [(VName, SubExp)] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map (VName -> TExp Int64
Imp.vi64 (VName -> TExp Int64)
-> ((VName, SubExp) -> VName) -> (VName, SubExp) -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VName, SubExp) -> VName
forall a b. (a, b) -> a
fst) ([(VName, SubExp)] -> [TExp Int64])
-> [(VName, SubExp)] -> [TExp Int64]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [TExp Int64]
is SubExp
what []
compileThreadResult SegSpace
_ PatElemT (LetDec GPUMem)
pe (ConcatReturns Certs
_ SplitOrdering
SplitContiguous SubExp
_ SubExp
per_thread_elems VName
what) = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
let offset :: TExp Int64
offset =
SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
per_thread_elems
TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
* TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (KernelConstants -> TExp Int32
kernelGlobalThreadId KernelConstants
constants)
TExp Int64
n <- SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp (SubExp -> TExp Int64) -> (Type -> SubExp) -> Type -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Type -> SubExp
forall u. Int -> TypeBase Shape u -> SubExp
arraySize Int
0 (Type -> TExp Int64)
-> ImpM GPUMem KernelEnv KernelOp Type
-> ImpM GPUMem KernelEnv KernelOp (TExp Int64)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem KernelEnv KernelOp Type
forall rep (m :: * -> *). HasScope rep m => VName -> m Type
lookupType VName
what
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [TExp Int64 -> TExp Int64 -> TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> d -> d -> DimIndex d
DimSlice TExp Int64
offset TExp Int64
n TExp Int64
1] (VName -> SubExp
Var VName
what) []
compileThreadResult SegSpace
_ PatElemT (LetDec GPUMem)
pe (ConcatReturns Certs
_ (SplitStrided SubExp
stride) SubExp
_ SubExp
_ VName
what) = do
TExp Int64
offset <- TExp Int32 -> TExp Int64
forall t v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int32 -> TExp Int64)
-> (KernelEnv -> TExp Int32) -> KernelEnv -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelConstants -> TExp Int32
kernelGlobalThreadId (KernelConstants -> TExp Int32)
-> (KernelEnv -> KernelConstants) -> KernelEnv -> TExp Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> TExp Int64)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp (TExp Int64)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
TExp Int64
n <- SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp (SubExp -> TExp Int64) -> (Type -> SubExp) -> Type -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Type -> SubExp
forall u. Int -> TypeBase Shape u -> SubExp
arraySize Int
0 (Type -> TExp Int64)
-> ImpM GPUMem KernelEnv KernelOp Type
-> ImpM GPUMem KernelEnv KernelOp (TExp Int64)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem KernelEnv KernelOp Type
forall rep (m :: * -> *). HasScope rep m => VName -> m Type
lookupType VName
what
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) [TExp Int64 -> TExp Int64 -> TExp Int64 -> DimIndex (TExp Int64)
forall d. d -> d -> d -> DimIndex d
DimSlice TExp Int64
offset TExp Int64
n (TExp Int64 -> DimIndex (TExp Int64))
-> TExp Int64 -> DimIndex (TExp Int64)
forall a b. (a -> b) -> a -> b
$ SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp SubExp
stride] (VName -> SubExp
Var VName
what) []
compileThreadResult SegSpace
_ PatElemT (LetDec GPUMem)
pe (WriteReturns Certs
_ (Shape [SubExp]
rws) VName
_arr [(Slice SubExp, SubExp)]
dests) = do
KernelConstants
constants <- KernelEnv -> KernelConstants
kernelConstants (KernelEnv -> KernelConstants)
-> ImpM GPUMem KernelEnv KernelOp KernelEnv
-> ImpM GPUMem KernelEnv KernelOp KernelConstants
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem KernelEnv KernelOp KernelEnv
forall rep r op. ImpM rep r op r
askEnv
let rws' :: [TExp Int64]
rws' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp [SubExp]
rws
[(Slice SubExp, SubExp)]
-> ((Slice SubExp, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [(Slice SubExp, SubExp)]
dests (((Slice SubExp, SubExp) -> InKernelGen ()) -> InKernelGen ())
-> ((Slice SubExp, SubExp) -> InKernelGen ()) -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(Slice SubExp
slice, SubExp
e) -> do
let slice' :: Slice (TExp Int64)
slice' = (SubExp -> TExp Int64) -> Slice SubExp -> Slice (TExp Int64)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap SubExp -> TExp Int64
forall a. ToExp a => a -> TExp Int64
toInt64Exp Slice SubExp
slice
write :: TExp Bool
write = KernelConstants -> TExp Bool
kernelThreadActive KernelConstants
constants TExp Bool -> TExp Bool -> TExp Bool
forall v. TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. Slice (TExp Int64) -> [TExp Int64] -> TExp Bool
inBounds Slice (TExp Int64)
slice' [TExp Int64]
rws'
TExp Bool -> InKernelGen () -> InKernelGen ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen TExp Bool
write (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> InKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (PatElemT LetDecMem -> VName
forall dec. PatElemT dec -> VName
patElemName PatElemT (LetDec GPUMem)
PatElemT LetDecMem
pe) (Slice (TExp Int64) -> [DimIndex (TExp Int64)]
forall d. Slice d -> [DimIndex d]
unSlice Slice (TExp Int64)
slice') SubExp
e []
compileThreadResult SegSpace
_ PatElemT (LetDec GPUMem)
_ TileReturns {} =
String -> InKernelGen ()
forall a. String -> a
compilerBugS String
"compileThreadResult: TileReturns unhandled."
arrayInLocalMemory :: SubExp -> InKernelGen Bool
arrayInLocalMemory :: SubExp -> InKernelGen Bool
arrayInLocalMemory (Var VName
name) = do
VarEntry GPUMem
res <- VName -> ImpM GPUMem KernelEnv KernelOp (VarEntry GPUMem)
forall rep r op. VName -> ImpM rep r op (VarEntry rep)
lookupVar VName
name
case VarEntry GPUMem
res of
ArrayVar Maybe (Exp GPUMem)
_ ArrayEntry
entry ->
(String -> Space
Space String
"local" Space -> Space -> Bool
forall a. Eq a => a -> a -> Bool
==) (Space -> Bool) -> (MemEntry -> Space) -> MemEntry -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. MemEntry -> Space
entryMemSpace
(MemEntry -> Bool)
-> ImpM GPUMem KernelEnv KernelOp MemEntry -> InKernelGen Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> ImpM GPUMem KernelEnv KernelOp MemEntry
forall rep r op. VName -> ImpM rep r op MemEntry
lookupMemory (MemLoc -> VName
memLocName (ArrayEntry -> MemLoc
entryArrayLoc ArrayEntry
entry))
VarEntry GPUMem
_ -> Bool -> InKernelGen Bool
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False
arrayInLocalMemory Constant {} = Bool -> InKernelGen Bool
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False