module Futhark.CodeGen.ImpGen.Multicore.SegScan
( compileSegScan,
)
where
import Control.Monad
import Data.List (zip4)
import Futhark.CodeGen.ImpCode.Multicore qualified as Imp
import Futhark.CodeGen.ImpGen
import Futhark.CodeGen.ImpGen.Multicore.Base
import Futhark.IR.MCMem
import Futhark.Util.IntegralExp (quot, rem)
import Prelude hiding (quot, rem)
compileSegScan ::
Pat LetDecMem ->
SegSpace ->
[SegBinOp MCMem] ->
KernelBody MCMem ->
TV Int32 ->
MulticoreGen Imp.MCCode
compileSegScan :: Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> TV Int32
-> MulticoreGen MCCode
compileSegScan Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
reds KernelBody MCMem
kbody TV Int32
nsubtasks
| [(VName, SubExp)
_] <- SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space =
Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> TV Int32
-> MulticoreGen MCCode
nonsegmentedScan Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
reds KernelBody MCMem
kbody TV Int32
nsubtasks
| Bool
otherwise =
Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> MulticoreGen MCCode
segmentedScan Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
reds KernelBody MCMem
kbody
xParams, yParams :: SegBinOp MCMem -> [LParam MCMem]
xParams :: SegBinOp MCMem -> [LParam MCMem]
xParams SegBinOp MCMem
scan =
Int -> [Param LParamMem] -> [Param LParamMem]
forall a. Int -> [a] -> [a]
take ([SubExp] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (SegBinOp MCMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral SegBinOp MCMem
scan)) (Lambda MCMem -> [LParam MCMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams (SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp MCMem
scan))
yParams :: SegBinOp MCMem -> [LParam MCMem]
yParams SegBinOp MCMem
scan =
Int -> [Param LParamMem] -> [Param LParamMem]
forall a. Int -> [a] -> [a]
drop ([SubExp] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (SegBinOp MCMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral SegBinOp MCMem
scan)) (Lambda MCMem -> [LParam MCMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams (SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp MCMem
scan))
lamBody :: SegBinOp MCMem -> Body MCMem
lamBody :: SegBinOp MCMem -> Body MCMem
lamBody = Lambda MCMem -> Body MCMem
forall rep. Lambda rep -> Body rep
lambdaBody (Lambda MCMem -> Body MCMem)
-> (SegBinOp MCMem -> Lambda MCMem) -> SegBinOp MCMem -> Body MCMem
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda
carryArrays :: String -> TV Int32 -> [SegBinOp MCMem] -> MulticoreGen [[VName]]
carryArrays :: String -> TV Int32 -> [SegBinOp MCMem] -> MulticoreGen [[VName]]
carryArrays String
s TV Int32
nsubtasks [SegBinOp MCMem]
segops =
[SegBinOp MCMem]
-> (SegBinOp MCMem -> ImpM MCMem HostEnv Multicore [VName])
-> MulticoreGen [[VName]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [SegBinOp MCMem]
segops ((SegBinOp MCMem -> ImpM MCMem HostEnv Multicore [VName])
-> MulticoreGen [[VName]])
-> (SegBinOp MCMem -> ImpM MCMem HostEnv Multicore [VName])
-> MulticoreGen [[VName]]
forall a b. (a -> b) -> a -> b
$ \(SegBinOp Commutativity
_ Lambda MCMem
lam [SubExp]
_ ShapeBase SubExp
shape) ->
[Type]
-> (Type -> ImpM MCMem HostEnv Multicore VName)
-> ImpM MCMem HostEnv Multicore [VName]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM (Lambda MCMem -> [Type]
forall rep. Lambda rep -> [Type]
lambdaReturnType Lambda MCMem
lam) ((Type -> ImpM MCMem HostEnv Multicore VName)
-> ImpM MCMem HostEnv Multicore [VName])
-> (Type -> ImpM MCMem HostEnv Multicore VName)
-> ImpM MCMem HostEnv Multicore [VName]
forall a b. (a -> b) -> a -> b
$ \Type
t -> do
let pt :: PrimType
pt = Type -> PrimType
forall shape u. TypeBase shape u -> PrimType
elemType Type
t
full_shape :: ShapeBase SubExp
full_shape =
[SubExp] -> ShapeBase SubExp
forall d. [d] -> ShapeBase d
Shape [VName -> SubExp
Var (TV Int32 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int32
nsubtasks)]
ShapeBase SubExp -> ShapeBase SubExp -> ShapeBase SubExp
forall a. Semigroup a => a -> a -> a
<> ShapeBase SubExp
shape
ShapeBase SubExp -> ShapeBase SubExp -> ShapeBase SubExp
forall a. Semigroup a => a -> a -> a
<> Type -> ShapeBase SubExp
forall shape u. ArrayShape shape => TypeBase shape u -> shape
arrayShape Type
t
String
-> PrimType
-> ShapeBase SubExp
-> Space
-> ImpM MCMem HostEnv Multicore VName
forall rep r op.
String
-> PrimType -> ShapeBase SubExp -> Space -> ImpM rep r op VName
sAllocArray String
s PrimType
pt ShapeBase SubExp
full_shape Space
DefaultSpace
nonsegmentedScan ::
Pat LetDecMem ->
SegSpace ->
[SegBinOp MCMem] ->
KernelBody MCMem ->
TV Int32 ->
MulticoreGen Imp.MCCode
nonsegmentedScan :: Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> TV Int32
-> MulticoreGen MCCode
nonsegmentedScan Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scan_ops KernelBody MCMem
kbody TV Int32
nsubtasks = do
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ String -> Maybe Exp -> MCCode
forall a. String -> Maybe Exp -> Code a
Imp.DebugPrint String
"nonsegmented segScan" Maybe Exp
forall a. Maybe a
Nothing
ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
let dims :: [[SubExp]]
dims = (SegBinOp MCMem -> [SubExp]) -> [SegBinOp MCMem] -> [[SubExp]]
forall a b. (a -> b) -> [a] -> [b]
map (ShapeBase SubExp -> [SubExp]
forall d. ShapeBase d -> [d]
shapeDims (ShapeBase SubExp -> [SubExp])
-> (SegBinOp MCMem -> ShapeBase SubExp)
-> SegBinOp MCMem
-> [SubExp]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp MCMem -> ShapeBase SubExp
forall rep. SegBinOp rep -> ShapeBase SubExp
segBinOpShape) [SegBinOp MCMem]
scan_ops
let scalars :: Bool
scalars = (SegBinOp MCMem -> Bool) -> [SegBinOp MCMem] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ((Param LParamMem -> Bool) -> [Param LParamMem] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType (Type -> Bool)
-> (Param LParamMem -> Type) -> Param LParamMem -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LParamMem -> Type
forall t. Typed t => t -> Type
typeOf (LParamMem -> Type)
-> (Param LParamMem -> LParamMem) -> Param LParamMem -> Type
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Param LParamMem -> LParamMem
forall dec. Param dec -> dec
paramDec) ([Param LParamMem] -> Bool)
-> (SegBinOp MCMem -> [Param LParamMem]) -> SegBinOp MCMem -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Lambda MCMem -> [LParam MCMem]
Lambda MCMem -> [Param LParamMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams (Lambda MCMem -> [Param LParamMem])
-> (SegBinOp MCMem -> Lambda MCMem)
-> SegBinOp MCMem
-> [Param LParamMem]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda)) [SegBinOp MCMem]
scan_ops Bool -> Bool -> Bool
&& ([SubExp] -> Bool) -> [[SubExp]] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all [SubExp] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[SubExp]]
dims
let vectorize :: Bool
vectorize = [] [SubExp] -> [[SubExp]] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [[SubExp]]
dims
let param_types :: [Type]
param_types = (SegBinOp MCMem -> [Type]) -> [SegBinOp MCMem] -> [Type]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((Param LParamMem -> Type) -> [Param LParamMem] -> [Type]
forall a b. (a -> b) -> [a] -> [b]
map Param LParamMem -> Type
forall dec. Typed dec => Param dec -> Type
paramType ([Param LParamMem] -> [Type])
-> (SegBinOp MCMem -> [Param LParamMem])
-> SegBinOp MCMem
-> [Type]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Lambda MCMem -> [LParam MCMem]
Lambda MCMem -> [Param LParamMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams (Lambda MCMem -> [Param LParamMem])
-> (SegBinOp MCMem -> Lambda MCMem)
-> SegBinOp MCMem
-> [Param LParamMem]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda)) [SegBinOp MCMem]
scan_ops
let no_array_param :: Bool
no_array_param = (Type -> Bool) -> [Type] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Type -> Bool
forall shape u. TypeBase shape u -> Bool
primType [Type]
param_types
let (Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> ImpM MCMem HostEnv Multicore ()
scanStage1, Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
scanStage3)
| Bool
scalars = (Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> ImpM MCMem HostEnv Multicore ()
scanStage1Scalar, Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
scanStage3Scalar)
| Bool
vectorize Bool -> Bool -> Bool
&& Bool
no_array_param = (Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> ImpM MCMem HostEnv Multicore ()
scanStage1Nested, Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
scanStage3Nested)
| Bool
otherwise = (Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> ImpM MCMem HostEnv Multicore ()
scanStage1Fallback, Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
scanStage3Fallback)
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ String -> Maybe Exp -> MCCode
forall a. String -> Maybe Exp -> Code a
Imp.DebugPrint String
"Scan stage 1" Maybe Exp
forall a. Maybe a
Nothing
Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> ImpM MCMem HostEnv Multicore ()
scanStage1 Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody [SegBinOp MCMem]
scan_ops
let nsubtasks' :: TPrimExp Int32 VName
nsubtasks' = TV Int32 -> TPrimExp Int32 VName
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int32
nsubtasks
TExp Bool
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TPrimExp Int32 VName
nsubtasks' TPrimExp Int32 VName -> TPrimExp Int32 VName -> TExp Bool
forall {k} v (t :: k).
Eq v =>
TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.>. TPrimExp Int32 VName
1) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ do
[SegBinOp MCMem]
scan_ops2 <- [SegBinOp MCMem] -> MulticoreGen [SegBinOp MCMem]
renameSegBinOp [SegBinOp MCMem]
scan_ops
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ String -> Maybe Exp -> MCCode
forall a. String -> Maybe Exp -> Code a
Imp.DebugPrint String
"Scan stage 2" Maybe Exp
forall a. Maybe a
Nothing
[[VName]]
carries <- Pat LParamMem
-> TV Int32
-> SegSpace
-> [SegBinOp MCMem]
-> MulticoreGen [[VName]]
scanStage2 Pat LParamMem
pat TV Int32
nsubtasks SegSpace
space [SegBinOp MCMem]
scan_ops2
[SegBinOp MCMem]
scan_ops3 <- [SegBinOp MCMem] -> MulticoreGen [SegBinOp MCMem]
renameSegBinOp [SegBinOp MCMem]
scan_ops
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ String -> Maybe Exp -> MCCode
forall a. String -> Maybe Exp -> Code a
Imp.DebugPrint String
"Scan stage 3" Maybe Exp
forall a. Maybe a
Nothing
Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
scanStage3 Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scan_ops3 [[VName]]
carries
data ScanLoopType
= ScanSeq
| ScanNested
| ScanScalar
getScanLoop ::
ScanLoopType ->
(Imp.TExp Int64 -> MulticoreGen ()) ->
MulticoreGen ()
getScanLoop :: ScanLoopType
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
getScanLoop ScanLoopType
ScanScalar = (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateUniformizeLoop
getScanLoop ScanLoopType
_ = \TExp Int64 -> ImpM MCMem HostEnv Multicore ()
body -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
body TExp Int64
0
getExtract :: ScanLoopType -> Imp.TExp Int64 -> MulticoreGen Imp.MCCode -> MulticoreGen ()
ScanLoopType
ScanSeq = \TExp Int64
_ MulticoreGen MCCode
body -> MulticoreGen MCCode
body MulticoreGen MCCode
-> (MCCode -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b.
ImpM MCMem HostEnv Multicore a
-> (a -> ImpM MCMem HostEnv Multicore b)
-> ImpM MCMem HostEnv Multicore b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit
getExtract ScanLoopType
_ = TExp Int64
-> MulticoreGen MCCode -> ImpM MCMem HostEnv Multicore ()
extractVectorLane
genBinOpParams :: [SegBinOp MCMem] -> MulticoreGen ()
genBinOpParams :: [SegBinOp MCMem] -> ImpM MCMem HostEnv Multicore ()
genBinOpParams [SegBinOp MCMem]
scan_ops =
Maybe (Exp MCMem) -> Scope MCMem -> ImpM MCMem HostEnv Multicore ()
forall rep (inner :: * -> *) r op.
Mem rep inner =>
Maybe (Exp rep) -> Scope rep -> ImpM rep r op ()
dScope Maybe (Exp MCMem)
forall a. Maybe a
Nothing (Scope MCMem -> ImpM MCMem HostEnv Multicore ())
-> Scope MCMem -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[Param LParamMem] -> Scope MCMem
forall rep dec. (LParamInfo rep ~ dec) => [Param dec] -> Scope rep
scopeOfLParams ([Param LParamMem] -> Scope MCMem)
-> [Param LParamMem] -> Scope MCMem
forall a b. (a -> b) -> a -> b
$
(SegBinOp MCMem -> [Param LParamMem])
-> [SegBinOp MCMem] -> [Param LParamMem]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Lambda MCMem -> [LParam MCMem]
Lambda MCMem -> [Param LParamMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams (Lambda MCMem -> [Param LParamMem])
-> (SegBinOp MCMem -> Lambda MCMem)
-> SegBinOp MCMem
-> [Param LParamMem]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda) [SegBinOp MCMem]
scan_ops
genLocalAccsStage1 :: [SegBinOp MCMem] -> MulticoreGen [[VName]]
genLocalAccsStage1 :: [SegBinOp MCMem] -> MulticoreGen [[VName]]
genLocalAccsStage1 [SegBinOp MCMem]
scan_ops = do
[SegBinOp MCMem]
-> (SegBinOp MCMem -> ImpM MCMem HostEnv Multicore [VName])
-> MulticoreGen [[VName]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [SegBinOp MCMem]
scan_ops ((SegBinOp MCMem -> ImpM MCMem HostEnv Multicore [VName])
-> MulticoreGen [[VName]])
-> (SegBinOp MCMem -> ImpM MCMem HostEnv Multicore [VName])
-> MulticoreGen [[VName]]
forall a b. (a -> b) -> a -> b
$ \SegBinOp MCMem
scan_op -> do
let shape :: ShapeBase SubExp
shape = SegBinOp MCMem -> ShapeBase SubExp
forall rep. SegBinOp rep -> ShapeBase SubExp
segBinOpShape SegBinOp MCMem
scan_op
ts :: [Type]
ts = Lambda MCMem -> [Type]
forall rep. Lambda rep -> [Type]
lambdaReturnType (Lambda MCMem -> [Type]) -> Lambda MCMem -> [Type]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp MCMem
scan_op
[(Param LParamMem, SubExp, Type)]
-> ((Param LParamMem, SubExp, Type)
-> ImpM MCMem HostEnv Multicore VName)
-> ImpM MCMem HostEnv Multicore [VName]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM ([Param LParamMem]
-> [SubExp] -> [Type] -> [(Param LParamMem, SubExp, Type)]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 (SegBinOp MCMem -> [LParam MCMem]
xParams SegBinOp MCMem
scan_op) (SegBinOp MCMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral SegBinOp MCMem
scan_op) [Type]
ts) (((Param LParamMem, SubExp, Type)
-> ImpM MCMem HostEnv Multicore VName)
-> ImpM MCMem HostEnv Multicore [VName])
-> ((Param LParamMem, SubExp, Type)
-> ImpM MCMem HostEnv Multicore VName)
-> ImpM MCMem HostEnv Multicore [VName]
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, SubExp
ne, Type
t) -> do
VName
acc <-
case ShapeBase SubExp -> [SubExp]
forall d. ShapeBase d -> [d]
shapeDims ShapeBase SubExp
shape of
[] -> VName -> ImpM MCMem HostEnv Multicore VName
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (VName -> ImpM MCMem HostEnv Multicore VName)
-> VName -> ImpM MCMem HostEnv Multicore VName
forall a b. (a -> b) -> a -> b
$ Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p
[SubExp]
_ -> do
let pt :: PrimType
pt = Type -> PrimType
forall shape u. TypeBase shape u -> PrimType
elemType Type
t
String
-> PrimType
-> ShapeBase SubExp
-> Space
-> ImpM MCMem HostEnv Multicore VName
forall rep r op.
String
-> PrimType -> ShapeBase SubExp -> Space -> ImpM rep r op VName
sAllocArray String
"local_acc" PrimType
pt (ShapeBase SubExp
shape ShapeBase SubExp -> ShapeBase SubExp -> ShapeBase SubExp
forall a. Semigroup a => a -> a -> a
<> Type -> ShapeBase SubExp
forall shape u. ArrayShape shape => TypeBase shape u -> shape
arrayShape Type
t) Space
DefaultSpace
ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
ShapeBase SubExp
-> ([TExp Int64] -> ImpM rep r op ()) -> ImpM rep r op ()
sLoopNest (SegBinOp MCMem -> ShapeBase SubExp
forall rep. SegBinOp rep -> ShapeBase SubExp
segBinOpShape SegBinOp MCMem
scan_op) (([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
vec_is ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
acc [TExp Int64]
vec_is SubExp
ne []
VName -> ImpM MCMem HostEnv Multicore VName
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure VName
acc
getNestLoop ::
ScanLoopType ->
Shape ->
([Imp.TExp Int64] -> MulticoreGen ()) ->
MulticoreGen ()
getNestLoop :: ScanLoopType
-> ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
getNestLoop ScanLoopType
ScanNested = ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
sLoopNestVectorized
getNestLoop ScanLoopType
_ = ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
ShapeBase SubExp
-> ([TExp Int64] -> ImpM rep r op ()) -> ImpM rep r op ()
sLoopNest
applyScanOps ::
ScanLoopType ->
Pat LetDecMem ->
SegSpace ->
[SubExp] ->
[SegBinOp MCMem] ->
[[VName]] ->
ImpM MCMem HostEnv Imp.Multicore ()
applyScanOps :: ScanLoopType
-> Pat LParamMem
-> SegSpace
-> [SubExp]
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
applyScanOps ScanLoopType
typ Pat LParamMem
pat SegSpace
space [SubExp]
all_scan_res [SegBinOp MCMem]
scan_ops [[VName]]
local_accs = do
let per_scan_res :: [[SubExp]]
per_scan_res = [SegBinOp MCMem] -> [SubExp] -> [[SubExp]]
forall rep a. [SegBinOp rep] -> [a] -> [[a]]
segBinOpChunks [SegBinOp MCMem]
scan_ops [SubExp]
all_scan_res
per_scan_pes :: [[PatElem LParamMem]]
per_scan_pes = [SegBinOp MCMem] -> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall rep a. [SegBinOp rep] -> [a] -> [[a]]
segBinOpChunks [SegBinOp MCMem]
scan_ops ([PatElem LParamMem] -> [[PatElem LParamMem]])
-> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall a b. (a -> b) -> a -> b
$ Pat LParamMem -> [PatElem LParamMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LParamMem
pat
let ([VName]
is, [SubExp]
_) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
ScanLoopType
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
getScanLoop ScanLoopType
typ ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
j ->
[([PatElem LParamMem], SegBinOp MCMem, [SubExp], [VName])]
-> (([PatElem LParamMem], SegBinOp MCMem, [SubExp], [VName])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([[PatElem LParamMem]]
-> [SegBinOp MCMem]
-> [[SubExp]]
-> [[VName]]
-> [([PatElem LParamMem], SegBinOp MCMem, [SubExp], [VName])]
forall a b c d. [a] -> [b] -> [c] -> [d] -> [(a, b, c, d)]
zip4 [[PatElem LParamMem]]
per_scan_pes [SegBinOp MCMem]
scan_ops [[SubExp]]
per_scan_res [[VName]]
local_accs) ((([PatElem LParamMem], SegBinOp MCMem, [SubExp], [VName])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (([PatElem LParamMem], SegBinOp MCMem, [SubExp], [VName])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \([PatElem LParamMem]
pes, SegBinOp MCMem
scan_op, [SubExp]
scan_res, [VName]
acc) ->
ScanLoopType
-> ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
getNestLoop ScanLoopType
typ (SegBinOp MCMem -> ShapeBase SubExp
forall rep. SegBinOp rep -> ShapeBase SubExp
segBinOpShape SegBinOp MCMem
scan_op) (([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
vec_is -> do
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"Read accumulator" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, VName)]
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem] -> [VName] -> [(Param LParamMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
xParams SegBinOp MCMem
scan_op) [VName]
acc) (((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, VName
acc') -> do
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var VName
acc') [TExp Int64]
vec_is
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"Read next values" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, SubExp)]
-> ((Param LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem] -> [SubExp] -> [(Param LParamMem, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
yParams SegBinOp MCMem
scan_op) [SubExp]
scan_res) (((Param LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, SubExp
se) ->
ScanLoopType
-> TExp Int64
-> MulticoreGen MCCode
-> ImpM MCMem HostEnv Multicore ()
getExtract ScanLoopType
typ TExp Int64
j (MulticoreGen MCCode -> ImpM MCMem HostEnv Multicore ())
-> MulticoreGen MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] SubExp
se [TExp Int64]
vec_is
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"Scan op body" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
Names
-> Stms MCMem
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (Body MCMem -> Stms MCMem
forall rep. Body rep -> Stms rep
bodyStms (Body MCMem -> Stms MCMem) -> Body MCMem -> Stms MCMem
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(VName, PatElem LParamMem, SubExp)]
-> ((VName, PatElem LParamMem, SubExp)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName]
-> [PatElem LParamMem]
-> [SubExp]
-> [(VName, PatElem LParamMem, SubExp)]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [VName]
acc [PatElem LParamMem]
pes ([SubExp] -> [(VName, PatElem LParamMem, SubExp)])
-> [SubExp] -> [(VName, PatElem LParamMem, SubExp)]
forall a b. (a -> b) -> a -> b
$ (SubExpRes -> SubExp) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map SubExpRes -> SubExp
resSubExp ([SubExpRes] -> [SubExp]) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ Body MCMem -> [SubExpRes]
forall rep. Body rep -> [SubExpRes]
bodyResult (Body MCMem -> [SubExpRes]) -> Body MCMem -> [SubExpRes]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (((VName, PatElem LParamMem, SubExp)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((VName, PatElem LParamMem, SubExp)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
\(VName
acc', PatElem LParamMem
pe, SubExp
se) -> do
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 [VName]
is [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a] -> [a]
++ [TExp Int64]
vec_is) SubExp
se []
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
acc' [TExp Int64]
vec_is SubExp
se []
genScanLoop ::
ScanLoopType ->
Pat LetDecMem ->
SegSpace ->
KernelBody MCMem ->
[SegBinOp MCMem] ->
[[VName]] ->
Imp.TExp Int64 ->
ImpM MCMem HostEnv Imp.Multicore ()
genScanLoop :: ScanLoopType
-> Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> [[VName]]
-> TExp Int64
-> ImpM MCMem HostEnv Multicore ()
genScanLoop ScanLoopType
typ Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody [SegBinOp MCMem]
scan_ops [[VName]]
local_accs TExp Int64
i = do
let ([KernelResult]
all_scan_res, [KernelResult]
map_res) =
Int -> [KernelResult] -> ([KernelResult], [KernelResult])
forall a. Int -> [a] -> ([a], [a])
splitAt ([SegBinOp MCMem] -> Int
forall rep. [SegBinOp rep] -> Int
segBinOpResults [SegBinOp MCMem]
scan_ops) ([KernelResult] -> ([KernelResult], [KernelResult]))
-> [KernelResult] -> ([KernelResult], [KernelResult])
forall a b. (a -> b) -> a -> b
$ KernelBody MCMem -> [KernelResult]
forall rep. KernelBody rep -> [KernelResult]
kernelBodyResult KernelBody MCMem
kbody
let ([VName]
is, [SubExp]
ns) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
ns' :: [TExp Int64]
ns' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
ns
(VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> [VName] -> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ [VName]
is ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64 -> [TExp Int64]
forall num. IntegralExp num => [num] -> num -> [num]
unflattenIndex [TExp Int64]
ns' TExp Int64
i
Names
-> Stms MCMem
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (KernelBody MCMem -> Stms MCMem
forall rep. KernelBody rep -> Stms rep
kernelBodyStms KernelBody MCMem
kbody) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ do
let map_arrs :: [PatElem LParamMem]
map_arrs = Int -> [PatElem LParamMem] -> [PatElem LParamMem]
forall a. Int -> [a] -> [a]
drop ([SegBinOp MCMem] -> Int
forall rep. [SegBinOp rep] -> Int
segBinOpResults [SegBinOp MCMem]
scan_ops) ([PatElem LParamMem] -> [PatElem LParamMem])
-> [PatElem LParamMem] -> [PatElem LParamMem]
forall a b. (a -> b) -> a -> b
$ Pat LParamMem -> [PatElem LParamMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LParamMem
pat
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"write mapped values results to memory" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
(PatElem LParamMem
-> KernelResult -> ImpM MCMem HostEnv Multicore ())
-> [PatElem LParamMem]
-> [KernelResult]
-> ImpM MCMem HostEnv Multicore ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (SegSpace
-> PatElem LParamMem
-> KernelResult
-> ImpM MCMem HostEnv Multicore ()
compileThreadResult SegSpace
space) [PatElem LParamMem]
map_arrs [KernelResult]
map_res
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"Apply scan op" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
ScanLoopType
-> Pat LParamMem
-> SegSpace
-> [SubExp]
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
applyScanOps ScanLoopType
typ Pat LParamMem
pat SegSpace
space ((KernelResult -> SubExp) -> [KernelResult] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map KernelResult -> SubExp
kernelResultSubExp [KernelResult]
all_scan_res) [SegBinOp MCMem]
scan_ops [[VName]]
local_accs
scanStage1Scalar ::
Pat LetDecMem ->
SegSpace ->
KernelBody MCMem ->
[SegBinOp MCMem] ->
MulticoreGen ()
scanStage1Scalar :: Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> ImpM MCMem HostEnv Multicore ()
scanStage1Scalar Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody [SegBinOp MCMem]
scan_ops = do
MCCode
fbody <- ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
VName -> PrimType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ (SegSpace -> VName
segFlat SegSpace
space) PrimType
int64
Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetTaskId (SegSpace -> VName
segFlat SegSpace
space)
[SegBinOp MCMem] -> ImpM MCMem HostEnv Multicore ()
genBinOpParams [SegBinOp MCMem]
scan_ops
[[VName]]
local_accs <- [SegBinOp MCMem] -> MulticoreGen [[VName]]
genLocalAccsStage1 [SegBinOp MCMem]
scan_ops
ImpM MCMem HostEnv Multicore () -> ImpM MCMem HostEnv Multicore ()
inISPC (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
String
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop String
"SegScan" ChunkLoopVectorization
Vectorized ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
ScanLoopType
-> Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> [[VName]]
-> TExp Int64
-> ImpM MCMem HostEnv Multicore ()
genScanLoop ScanLoopType
ScanScalar Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody [SegBinOp MCMem]
scan_ops [[VName]]
local_accs
[Param]
free_params <- MCCode -> MulticoreGen [Param]
forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams MCCode
fbody
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> MCCode
forall a. a -> Code a
Imp.Op (Multicore -> MCCode) -> Multicore -> MCCode
forall a b. (a -> b) -> a -> b
$ String -> MCCode -> [Param] -> Multicore
Imp.ParLoop String
"scan_stage_1" MCCode
fbody [Param]
free_params
scanStage1Nested ::
Pat LetDecMem ->
SegSpace ->
KernelBody MCMem ->
[SegBinOp MCMem] ->
MulticoreGen ()
scanStage1Nested :: Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> ImpM MCMem HostEnv Multicore ()
scanStage1Nested Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody [SegBinOp MCMem]
scan_ops = do
MCCode
fbody <- ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
VName -> PrimType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ (SegSpace -> VName
segFlat SegSpace
space) PrimType
int64
Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetTaskId (SegSpace -> VName
segFlat SegSpace
space)
[[VName]]
local_accs <- [SegBinOp MCMem] -> MulticoreGen [[VName]]
genLocalAccsStage1 [SegBinOp MCMem]
scan_ops
ImpM MCMem HostEnv Multicore () -> ImpM MCMem HostEnv Multicore ()
inISPC (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ do
[SegBinOp MCMem] -> ImpM MCMem HostEnv Multicore ()
genBinOpParams [SegBinOp MCMem]
scan_ops
String
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop String
"SegScan" ChunkLoopVectorization
Scalar ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> do
ScanLoopType
-> Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> [[VName]]
-> TExp Int64
-> ImpM MCMem HostEnv Multicore ()
genScanLoop ScanLoopType
ScanNested Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody [SegBinOp MCMem]
scan_ops [[VName]]
local_accs TExp Int64
i
[Param]
free_params <- MCCode -> MulticoreGen [Param]
forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams MCCode
fbody
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> MCCode
forall a. a -> Code a
Imp.Op (Multicore -> MCCode) -> Multicore -> MCCode
forall a b. (a -> b) -> a -> b
$ String -> MCCode -> [Param] -> Multicore
Imp.ParLoop String
"scan_stage_1" MCCode
fbody [Param]
free_params
scanStage1Fallback ::
Pat LetDecMem ->
SegSpace ->
KernelBody MCMem ->
[SegBinOp MCMem] ->
MulticoreGen ()
scanStage1Fallback :: Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> ImpM MCMem HostEnv Multicore ()
scanStage1Fallback Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody [SegBinOp MCMem]
scan_ops = do
MCCode
fbody <- ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
VName -> PrimType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ (SegSpace -> VName
segFlat SegSpace
space) PrimType
int64
Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetTaskId (SegSpace -> VName
segFlat SegSpace
space)
[SegBinOp MCMem] -> ImpM MCMem HostEnv Multicore ()
genBinOpParams [SegBinOp MCMem]
scan_ops
[[VName]]
local_accs <- [SegBinOp MCMem] -> MulticoreGen [[VName]]
genLocalAccsStage1 [SegBinOp MCMem]
scan_ops
String
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop String
"SegScan" ChunkLoopVectorization
Scalar ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
ScanLoopType
-> Pat LParamMem
-> SegSpace
-> KernelBody MCMem
-> [SegBinOp MCMem]
-> [[VName]]
-> TExp Int64
-> ImpM MCMem HostEnv Multicore ()
genScanLoop ScanLoopType
ScanSeq Pat LParamMem
pat SegSpace
space KernelBody MCMem
kbody [SegBinOp MCMem]
scan_ops [[VName]]
local_accs
[Param]
free_params <- MCCode -> MulticoreGen [Param]
forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams MCCode
fbody
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> MCCode
forall a. a -> Code a
Imp.Op (Multicore -> MCCode) -> Multicore -> MCCode
forall a b. (a -> b) -> a -> b
$ String -> MCCode -> [Param] -> Multicore
Imp.ParLoop String
"scan_stage_1" MCCode
fbody [Param]
free_params
scanStage2 ::
Pat LetDecMem ->
TV Int32 ->
SegSpace ->
[SegBinOp MCMem] ->
MulticoreGen [[VName]]
scanStage2 :: Pat LParamMem
-> TV Int32
-> SegSpace
-> [SegBinOp MCMem]
-> MulticoreGen [[VName]]
scanStage2 Pat LParamMem
pat TV Int32
nsubtasks SegSpace
space [SegBinOp MCMem]
scan_ops = do
let ([VName]
is, [SubExp]
ns) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
ns_64 :: [TExp Int64]
ns_64 = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
ns
per_scan_pes :: [[PatElem LParamMem]]
per_scan_pes = [SegBinOp MCMem] -> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall rep a. [SegBinOp rep] -> [a] -> [[a]]
segBinOpChunks [SegBinOp MCMem]
scan_ops ([PatElem LParamMem] -> [[PatElem LParamMem]])
-> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall a b. (a -> b) -> a -> b
$ Pat LParamMem -> [PatElem LParamMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LParamMem
pat
nsubtasks' :: TExp Int64
nsubtasks' = TPrimExp Int32 VName -> TExp Int64
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TPrimExp Int32 VName -> TExp Int64)
-> TPrimExp Int32 VName -> TExp Int64
forall a b. (a -> b) -> a -> b
$ TV Int32 -> TPrimExp Int32 VName
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int32
nsubtasks
Maybe (Exp MCMem) -> Scope MCMem -> ImpM MCMem HostEnv Multicore ()
forall rep (inner :: * -> *) r op.
Mem rep inner =>
Maybe (Exp rep) -> Scope rep -> ImpM rep r op ()
dScope Maybe (Exp MCMem)
forall a. Maybe a
Nothing (Scope MCMem -> ImpM MCMem HostEnv Multicore ())
-> Scope MCMem -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [Param LParamMem] -> Scope MCMem
forall rep dec. (LParamInfo rep ~ dec) => [Param dec] -> Scope rep
scopeOfLParams ([Param LParamMem] -> Scope MCMem)
-> [Param LParamMem] -> Scope MCMem
forall a b. (a -> b) -> a -> b
$ (SegBinOp MCMem -> [Param LParamMem])
-> [SegBinOp MCMem] -> [Param LParamMem]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Lambda MCMem -> [LParam MCMem]
Lambda MCMem -> [Param LParamMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams (Lambda MCMem -> [Param LParamMem])
-> (SegBinOp MCMem -> Lambda MCMem)
-> SegBinOp MCMem
-> [Param LParamMem]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda) [SegBinOp MCMem]
scan_ops
TV Int64
offset <- String -> TExp Int64 -> ImpM MCMem HostEnv Multicore (TV Int64)
forall {k} (t :: k) rep r op.
String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"offset" (TExp Int64
0 :: Imp.TExp Int64)
let offset' :: TExp Int64
offset' = TV Int64 -> TExp Int64
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int64
offset
TV Int64
offset_index <- String -> TExp Int64 -> ImpM MCMem HostEnv Multicore (TV Int64)
forall {k} (t :: k) rep r op.
String -> TExp t -> ImpM rep r op (TV t)
dPrimV String
"offset_index" (TExp Int64
0 :: Imp.TExp Int64)
let offset_index' :: TExp Int64
offset_index' = TV Int64 -> TExp Int64
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int64
offset_index
let iter_pr_subtask :: TExp Int64
iter_pr_subtask = [TExp Int64] -> TExp Int64
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
ns_64 TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`quot` TExp Int64
nsubtasks'
remainder :: TExp Int64
remainder = [TExp Int64] -> TExp Int64
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
ns_64 TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`rem` TExp Int64
nsubtasks'
[[VName]]
carries <- String -> TV Int32 -> [SegBinOp MCMem] -> MulticoreGen [[VName]]
carryArrays String
"scan_stage_2_carry" TV Int32
nsubtasks [SegBinOp MCMem]
scan_ops
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"carry-in for first chunk is neutral" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(SegBinOp MCMem, [VName])]
-> ((SegBinOp MCMem, [VName]) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([SegBinOp MCMem] -> [[VName]] -> [(SegBinOp MCMem, [VName])]
forall a b. [a] -> [b] -> [(a, b)]
zip [SegBinOp MCMem]
scan_ops [[VName]]
carries) (((SegBinOp MCMem, [VName]) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((SegBinOp MCMem, [VName]) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(SegBinOp MCMem
scan_op, [VName]
carry) ->
ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
ShapeBase SubExp
-> ([TExp Int64] -> ImpM rep r op ()) -> ImpM rep r op ()
sLoopNest (SegBinOp MCMem -> ShapeBase SubExp
forall rep. SegBinOp rep -> ShapeBase SubExp
segBinOpShape SegBinOp MCMem
scan_op) (([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
vec_is ->
[(VName, SubExp)]
-> ((VName, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName] -> [SubExp] -> [(VName, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
carry ([SubExp] -> [(VName, SubExp)]) -> [SubExp] -> [(VName, SubExp)]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral SegBinOp MCMem
scan_op) (((VName, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((VName, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(VName
carry', SubExp
ne) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
carry' (TExp Int64
0 TExp Int64 -> [TExp Int64] -> [TExp Int64]
forall a. a -> [a] -> [a]
: [TExp Int64]
vec_is) SubExp
ne []
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"scan carries" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ String
-> TExp Int64
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op.
String
-> TExp t -> (TExp t -> ImpM rep r op ()) -> ImpM rep r op ()
sFor String
"i" (TExp Int64
nsubtasks' TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
1) ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> do
TV Int64
offset TV Int64 -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Int64
iter_pr_subtask
TExp Bool
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhen (TExp Int64 -> TExp Int64
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int64
i TExp Int64 -> TExp Int64 -> TExp Bool
forall {k} v (t :: k).
Eq v =>
TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<. TExp Int64
remainder) (TV Int64
offset TV Int64 -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Int64
offset' TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
1)
TV Int64
offset_index TV Int64 -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Int64
offset_index' TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
offset'
(VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> [VName] -> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ [VName]
is ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64 -> [TExp Int64]
forall num. IntegralExp num => [num] -> num -> [num]
unflattenIndex [TExp Int64]
ns_64 (TExp Int64 -> [TExp Int64]) -> TExp Int64 -> [TExp Int64]
forall a b. (a -> b) -> a -> b
$ TExp Int64 -> TExp Int64
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 TExp Int64
offset_index'
[([PatElem LParamMem], SegBinOp MCMem, [VName])]
-> (([PatElem LParamMem], SegBinOp MCMem, [VName])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([[PatElem LParamMem]]
-> [SegBinOp MCMem]
-> [[VName]]
-> [([PatElem LParamMem], SegBinOp MCMem, [VName])]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [[PatElem LParamMem]]
per_scan_pes [SegBinOp MCMem]
scan_ops [[VName]]
carries) ((([PatElem LParamMem], SegBinOp MCMem, [VName])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (([PatElem LParamMem], SegBinOp MCMem, [VName])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \([PatElem LParamMem]
pes, SegBinOp MCMem
scan_op, [VName]
carry) ->
ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
ShapeBase SubExp
-> ([TExp Int64] -> ImpM rep r op ()) -> ImpM rep r op ()
sLoopNest (SegBinOp MCMem -> ShapeBase SubExp
forall rep. SegBinOp rep -> ShapeBase SubExp
segBinOpShape SegBinOp MCMem
scan_op) (([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
vec_is -> do
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"Read carry" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, VName)]
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem] -> [VName] -> [(Param LParamMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
xParams SegBinOp MCMem
scan_op) [VName]
carry) (((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, VName
carry') ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var VName
carry') (TExp Int64
i TExp Int64 -> [TExp Int64] -> [TExp Int64]
forall a. a -> [a] -> [a]
: [TExp Int64]
vec_is)
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"Read next values" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, PatElem LParamMem)]
-> ((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem]
-> [PatElem LParamMem] -> [(Param LParamMem, PatElem LParamMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
yParams SegBinOp MCMem
scan_op) [PatElem LParamMem]
pes) (((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, PatElem LParamMem
pe) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var (VName -> SubExp) -> VName -> SubExp
forall a b. (a -> b) -> a -> b
$ PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe) ((TExp Int64
offset_index' TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
1) TExp Int64 -> [TExp Int64] -> [TExp Int64]
forall a. a -> [a] -> [a]
: [TExp Int64]
vec_is)
Names
-> Stms MCMem
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (Body MCMem -> Stms MCMem
forall rep. Body rep -> Stms rep
bodyStms (Body MCMem -> Stms MCMem) -> Body MCMem -> Stms MCMem
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(VName, SubExp)]
-> ((VName, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName] -> [SubExp] -> [(VName, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
carry ([SubExp] -> [(VName, SubExp)]) -> [SubExp] -> [(VName, SubExp)]
forall a b. (a -> b) -> a -> b
$ (SubExpRes -> SubExp) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map SubExpRes -> SubExp
resSubExp ([SubExpRes] -> [SubExp]) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ Body MCMem -> [SubExpRes]
forall rep. Body rep -> [SubExpRes]
bodyResult (Body MCMem -> [SubExpRes]) -> Body MCMem -> [SubExpRes]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (((VName, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((VName, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(VName
carry', SubExp
se) -> do
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
carry' ((TExp Int64
i TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
1) TExp Int64 -> [TExp Int64] -> [TExp Int64]
forall a. a -> [a] -> [a]
: [TExp Int64]
vec_is) SubExp
se []
[[VName]] -> MulticoreGen [[VName]]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [[VName]]
carries
scanStage3Scalar ::
Pat LetDecMem ->
SegSpace ->
[SegBinOp MCMem] ->
[[VName]] ->
MulticoreGen ()
scanStage3Scalar :: Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
scanStage3Scalar Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scan_ops [[VName]]
per_scan_carries = do
let per_scan_pes :: [[PatElem LParamMem]]
per_scan_pes = [SegBinOp MCMem] -> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall rep a. [SegBinOp rep] -> [a] -> [[a]]
segBinOpChunks [SegBinOp MCMem]
scan_ops ([PatElem LParamMem] -> [[PatElem LParamMem]])
-> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall a b. (a -> b) -> a -> b
$ Pat LParamMem -> [PatElem LParamMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LParamMem
pat
([VName]
is, [SubExp]
ns) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
ns' :: [TExp Int64]
ns' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
ns
MCCode
body <- ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
VName -> PrimType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ (SegSpace -> VName
segFlat SegSpace
space) PrimType
int64
Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetTaskId (VName -> Multicore) -> VName -> Multicore
forall a b. (a -> b) -> a -> b
$ SegSpace -> VName
segFlat SegSpace
space
ImpM MCMem HostEnv Multicore () -> ImpM MCMem HostEnv Multicore ()
inISPC (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ do
[SegBinOp MCMem] -> ImpM MCMem HostEnv Multicore ()
genBinOpParams [SegBinOp MCMem]
scan_ops
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"load carry-in" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[([VName], SegBinOp MCMem)]
-> (([VName], SegBinOp MCMem) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([[VName]] -> [SegBinOp MCMem] -> [([VName], SegBinOp MCMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip [[VName]]
per_scan_carries [SegBinOp MCMem]
scan_ops) ((([VName], SegBinOp MCMem) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (([VName], SegBinOp MCMem) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \([VName]
op_carries, SegBinOp MCMem
scan_op) ->
[(Param LParamMem, VName)]
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem] -> [VName] -> [(Param LParamMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
xParams SegBinOp MCMem
scan_op) [VName]
op_carries) (((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, VName
carries) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var VName
carries) [VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
le64 (SegSpace -> VName
segFlat SegSpace
space)]
String
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop String
"SegScan" ChunkLoopVectorization
Vectorized ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> do
(VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> [VName] -> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ [VName]
is ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64 -> [TExp Int64]
forall num. IntegralExp num => [num] -> num -> [num]
unflattenIndex [TExp Int64]
ns' TExp Int64
i
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"load partial result" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[([PatElem LParamMem], SegBinOp MCMem)]
-> (([PatElem LParamMem], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([[PatElem LParamMem]]
-> [SegBinOp MCMem] -> [([PatElem LParamMem], SegBinOp MCMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip [[PatElem LParamMem]]
per_scan_pes [SegBinOp MCMem]
scan_ops) ((([PatElem LParamMem], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (([PatElem LParamMem], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \([PatElem LParamMem]
scan_pes, SegBinOp MCMem
scan_op) ->
[(Param LParamMem, PatElem LParamMem)]
-> ((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem]
-> [PatElem LParamMem] -> [(Param LParamMem, PatElem LParamMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
yParams SegBinOp MCMem
scan_op) [PatElem LParamMem]
scan_pes) (((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, PatElem LParamMem
pe) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe)) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
le64 [VName]
is)
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"combine carry with partial result" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[([PatElem LParamMem], SegBinOp MCMem)]
-> (([PatElem LParamMem], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([[PatElem LParamMem]]
-> [SegBinOp MCMem] -> [([PatElem LParamMem], SegBinOp MCMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip [[PatElem LParamMem]]
per_scan_pes [SegBinOp MCMem]
scan_ops) ((([PatElem LParamMem], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (([PatElem LParamMem], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \([PatElem LParamMem]
scan_pes, SegBinOp MCMem
scan_op) ->
Names
-> Stms MCMem
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (Body MCMem -> Stms MCMem
forall rep. Body rep -> Stms rep
bodyStms (Body MCMem -> Stms MCMem) -> Body MCMem -> Stms MCMem
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(PatElem LParamMem, SubExp)]
-> ((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([PatElem LParamMem] -> [SubExp] -> [(PatElem LParamMem, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [PatElem LParamMem]
scan_pes ([SubExp] -> [(PatElem LParamMem, SubExp)])
-> [SubExp] -> [(PatElem LParamMem, SubExp)]
forall a b. (a -> b) -> a -> b
$ (SubExpRes -> SubExp) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map SubExpRes -> SubExp
resSubExp ([SubExpRes] -> [SubExp]) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ Body MCMem -> [SubExpRes]
forall rep. Body rep -> [SubExpRes]
bodyResult (Body MCMem -> [SubExpRes]) -> Body MCMem -> [SubExpRes]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(PatElem LParamMem
pe, SubExp
se) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 [VName]
is) SubExp
se []
[Param]
free_params <- MCCode -> MulticoreGen [Param]
forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams MCCode
body
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> MCCode
forall a. a -> Code a
Imp.Op (Multicore -> MCCode) -> Multicore -> MCCode
forall a b. (a -> b) -> a -> b
$ String -> MCCode -> [Param] -> Multicore
Imp.ParLoop String
"scan_stage_3" MCCode
body [Param]
free_params
scanStage3Nested ::
Pat LetDecMem ->
SegSpace ->
[SegBinOp MCMem] ->
[[VName]] ->
MulticoreGen ()
scanStage3Nested :: Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
scanStage3Nested Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scan_ops [[VName]]
per_scan_carries = do
let per_scan_pes :: [[PatElem LParamMem]]
per_scan_pes = [SegBinOp MCMem] -> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall rep a. [SegBinOp rep] -> [a] -> [[a]]
segBinOpChunks [SegBinOp MCMem]
scan_ops ([PatElem LParamMem] -> [[PatElem LParamMem]])
-> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall a b. (a -> b) -> a -> b
$ Pat LParamMem -> [PatElem LParamMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LParamMem
pat
([VName]
is, [SubExp]
ns) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
ns' :: [TExp Int64]
ns' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
ns
MCCode
body <- ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
VName -> PrimType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ (SegSpace -> VName
segFlat SegSpace
space) PrimType
int64
Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetTaskId (SegSpace -> VName
segFlat SegSpace
space)
String
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop String
"SegScan" ChunkLoopVectorization
Scalar ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> do
[SegBinOp MCMem] -> ImpM MCMem HostEnv Multicore ()
genBinOpParams [SegBinOp MCMem]
scan_ops
(VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> [VName] -> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ [VName]
is ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64 -> [TExp Int64]
forall num. IntegralExp num => [num] -> num -> [num]
unflattenIndex [TExp Int64]
ns' TExp Int64
i
[([PatElem LParamMem], [VName], SegBinOp MCMem)]
-> (([PatElem LParamMem], [VName], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([[PatElem LParamMem]]
-> [[VName]]
-> [SegBinOp MCMem]
-> [([PatElem LParamMem], [VName], SegBinOp MCMem)]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [[PatElem LParamMem]]
per_scan_pes [[VName]]
per_scan_carries [SegBinOp MCMem]
scan_ops) ((([PatElem LParamMem], [VName], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (([PatElem LParamMem], [VName], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \([PatElem LParamMem]
scan_pes, [VName]
op_carries, SegBinOp MCMem
scan_op) -> do
ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
ShapeBase SubExp
-> ([TExp Int64] -> ImpM rep r op ()) -> ImpM rep r op ()
sLoopNest (SegBinOp MCMem -> ShapeBase SubExp
forall rep. SegBinOp rep -> ShapeBase SubExp
segBinOpShape SegBinOp MCMem
scan_op) (([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
vec_is -> do
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"load carry-in" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, VName)]
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem] -> [VName] -> [(Param LParamMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
xParams SegBinOp MCMem
scan_op) [VName]
op_carries) (((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, VName
carries) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var VName
carries) (VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
le64 (SegSpace -> VName
segFlat SegSpace
space) TExp Int64 -> [TExp Int64] -> [TExp Int64]
forall a. a -> [a] -> [a]
: [TExp Int64]
vec_is)
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"load partial result" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, PatElem LParamMem)]
-> ((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem]
-> [PatElem LParamMem] -> [(Param LParamMem, PatElem LParamMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
yParams SegBinOp MCMem
scan_op) [PatElem LParamMem]
scan_pes) (((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, PatElem LParamMem
pe) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe)) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
le64 [VName]
is [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a] -> [a]
++ [TExp Int64]
vec_is)
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"combine carry with partial result" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
Names
-> Stms MCMem
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (Body MCMem -> Stms MCMem
forall rep. Body rep -> Stms rep
bodyStms (Body MCMem -> Stms MCMem) -> Body MCMem -> Stms MCMem
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(PatElem LParamMem, SubExp)]
-> ((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([PatElem LParamMem] -> [SubExp] -> [(PatElem LParamMem, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [PatElem LParamMem]
scan_pes ([SubExp] -> [(PatElem LParamMem, SubExp)])
-> [SubExp] -> [(PatElem LParamMem, SubExp)]
forall a b. (a -> b) -> a -> b
$ (SubExpRes -> SubExp) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map SubExpRes -> SubExp
resSubExp ([SubExpRes] -> [SubExp]) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ Body MCMem -> [SubExpRes]
forall rep. Body rep -> [SubExpRes]
bodyResult (Body MCMem -> [SubExpRes]) -> Body MCMem -> [SubExpRes]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(PatElem LParamMem
pe, SubExp
se) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 [VName]
is [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a] -> [a]
++ [TExp Int64]
vec_is) SubExp
se []
[Param]
free_params <- MCCode -> MulticoreGen [Param]
forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams MCCode
body
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> MCCode
forall a. a -> Code a
Imp.Op (Multicore -> MCCode) -> Multicore -> MCCode
forall a b. (a -> b) -> a -> b
$ String -> MCCode -> [Param] -> Multicore
Imp.ParLoop String
"scan_stage_3" MCCode
body [Param]
free_params
scanStage3Fallback ::
Pat LetDecMem ->
SegSpace ->
[SegBinOp MCMem] ->
[[VName]] ->
MulticoreGen ()
scanStage3Fallback :: Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> [[VName]]
-> ImpM MCMem HostEnv Multicore ()
scanStage3Fallback Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scan_ops [[VName]]
per_scan_carries = do
let per_scan_pes :: [[PatElem LParamMem]]
per_scan_pes = [SegBinOp MCMem] -> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall rep a. [SegBinOp rep] -> [a] -> [[a]]
segBinOpChunks [SegBinOp MCMem]
scan_ops ([PatElem LParamMem] -> [[PatElem LParamMem]])
-> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall a b. (a -> b) -> a -> b
$ Pat LParamMem -> [PatElem LParamMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LParamMem
pat
([VName]
is, [SubExp]
ns) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
ns' :: [TExp Int64]
ns' = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
ns
MCCode
body <- ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
VName -> PrimType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ (SegSpace -> VName
segFlat SegSpace
space) PrimType
int64
Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetTaskId (SegSpace -> VName
segFlat SegSpace
space)
[SegBinOp MCMem] -> ImpM MCMem HostEnv Multicore ()
genBinOpParams [SegBinOp MCMem]
scan_ops
String
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop String
"SegScan" ChunkLoopVectorization
Scalar ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> do
(VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> [VName] -> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ [VName]
is ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64 -> [TExp Int64]
forall num. IntegralExp num => [num] -> num -> [num]
unflattenIndex [TExp Int64]
ns' TExp Int64
i
[([PatElem LParamMem], [VName], SegBinOp MCMem)]
-> (([PatElem LParamMem], [VName], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([[PatElem LParamMem]]
-> [[VName]]
-> [SegBinOp MCMem]
-> [([PatElem LParamMem], [VName], SegBinOp MCMem)]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [[PatElem LParamMem]]
per_scan_pes [[VName]]
per_scan_carries [SegBinOp MCMem]
scan_ops) ((([PatElem LParamMem], [VName], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (([PatElem LParamMem], [VName], SegBinOp MCMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \([PatElem LParamMem]
scan_pes, [VName]
op_carries, SegBinOp MCMem
scan_op) -> do
ShapeBase SubExp
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
ShapeBase SubExp
-> ([TExp Int64] -> ImpM rep r op ()) -> ImpM rep r op ()
sLoopNest (SegBinOp MCMem -> ShapeBase SubExp
forall rep. SegBinOp rep -> ShapeBase SubExp
segBinOpShape SegBinOp MCMem
scan_op) (([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \[TExp Int64]
vec_is -> do
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"load carry-in" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, VName)]
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem] -> [VName] -> [(Param LParamMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
xParams SegBinOp MCMem
scan_op) [VName]
op_carries) (((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, VName) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, VName
carries) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var VName
carries) (VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
le64 (SegSpace -> VName
segFlat SegSpace
space) TExp Int64 -> [TExp Int64] -> [TExp Int64]
forall a. a -> [a] -> [a]
: [TExp Int64]
vec_is)
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"load partial result" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, PatElem LParamMem)]
-> ((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem]
-> [PatElem LParamMem] -> [(Param LParamMem, PatElem LParamMem)]
forall a b. [a] -> [b] -> [(a, b)]
zip (SegBinOp MCMem -> [LParam MCMem]
yParams SegBinOp MCMem
scan_op) [PatElem LParamMem]
scan_pes) (((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, PatElem LParamMem)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, PatElem LParamMem
pe) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (VName -> SubExp
Var (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe)) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
le64 [VName]
is [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a] -> [a]
++ [TExp Int64]
vec_is)
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"combine carry with partial result" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
Names
-> Stms MCMem
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (Body MCMem -> Stms MCMem
forall rep. Body rep -> Stms rep
bodyStms (Body MCMem -> Stms MCMem) -> Body MCMem -> Stms MCMem
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(PatElem LParamMem, SubExp)]
-> ((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([PatElem LParamMem] -> [SubExp] -> [(PatElem LParamMem, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [PatElem LParamMem]
scan_pes ([SubExp] -> [(PatElem LParamMem, SubExp)])
-> [SubExp] -> [(PatElem LParamMem, SubExp)]
forall a b. (a -> b) -> a -> b
$ (SubExpRes -> SubExp) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map SubExpRes -> SubExp
resSubExp ([SubExpRes] -> [SubExp]) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ Body MCMem -> [SubExpRes]
forall rep. Body rep -> [SubExpRes]
bodyResult (Body MCMem -> [SubExpRes]) -> Body MCMem -> [SubExpRes]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Body MCMem
lamBody SegBinOp MCMem
scan_op) (((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((PatElem LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(PatElem LParamMem
pe, SubExp
se) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 [VName]
is [TExp Int64] -> [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a] -> [a]
++ [TExp Int64]
vec_is) SubExp
se []
[Param]
free_params <- MCCode -> MulticoreGen [Param]
forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams MCCode
body
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> MCCode
forall a. a -> Code a
Imp.Op (Multicore -> MCCode) -> Multicore -> MCCode
forall a b. (a -> b) -> a -> b
$ String -> MCCode -> [Param] -> Multicore
Imp.ParLoop String
"scan_stage_3" MCCode
body [Param]
free_params
segmentedScan ::
Pat LetDecMem ->
SegSpace ->
[SegBinOp MCMem] ->
KernelBody MCMem ->
MulticoreGen Imp.MCCode
segmentedScan :: Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> MulticoreGen MCCode
segmentedScan Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scan_ops KernelBody MCMem
kbody = do
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ String -> Maybe Exp -> MCCode
forall a. String -> Maybe Exp -> Code a
Imp.DebugPrint String
"segmented segScan" Maybe Exp
forall a. Maybe a
Nothing
ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
MCCode
body <- Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> MulticoreGen MCCode
compileSegScanBody Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scan_ops KernelBody MCMem
kbody
[Param]
free_params <- MCCode -> MulticoreGen [Param]
forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams MCCode
body
MCCode -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (MCCode -> ImpM MCMem HostEnv Multicore ())
-> MCCode -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> MCCode
forall a. a -> Code a
Imp.Op (Multicore -> MCCode) -> Multicore -> MCCode
forall a b. (a -> b) -> a -> b
$ String -> MCCode -> [Param] -> Multicore
Imp.ParLoop String
"seg_scan" MCCode
body [Param]
free_params
compileSegScanBody ::
Pat LetDecMem ->
SegSpace ->
[SegBinOp MCMem] ->
KernelBody MCMem ->
MulticoreGen Imp.MCCode
compileSegScanBody :: Pat LParamMem
-> SegSpace
-> [SegBinOp MCMem]
-> KernelBody MCMem
-> MulticoreGen MCCode
compileSegScanBody Pat LParamMem
pat SegSpace
space [SegBinOp MCMem]
scan_ops KernelBody MCMem
kbody = ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode)
-> ImpM MCMem HostEnv Multicore () -> MulticoreGen MCCode
forall a b. (a -> b) -> a -> b
$ do
let ([VName]
is, [SubExp]
ns) = [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(VName, SubExp)] -> ([VName], [SubExp]))
-> [(VName, SubExp)] -> ([VName], [SubExp])
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
ns_64 :: [TExp Int64]
ns_64 = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
ns
VName -> PrimType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> PrimType -> ImpM rep r op ()
dPrim_ (SegSpace -> VName
segFlat SegSpace
space) PrimType
int64
Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Multicore
Imp.GetTaskId (SegSpace -> VName
segFlat SegSpace
space)
let per_scan_pes :: [[PatElem LParamMem]]
per_scan_pes = [SegBinOp MCMem] -> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall rep a. [SegBinOp rep] -> [a] -> [[a]]
segBinOpChunks [SegBinOp MCMem]
scan_ops ([PatElem LParamMem] -> [[PatElem LParamMem]])
-> [PatElem LParamMem] -> [[PatElem LParamMem]]
forall a b. (a -> b) -> a -> b
$ Pat LParamMem -> [PatElem LParamMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LParamMem
pat
String
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop String
"SegScan" ChunkLoopVectorization
Scalar ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
segment_i -> do
[(SegBinOp MCMem, [PatElem LParamMem])]
-> ((SegBinOp MCMem, [PatElem LParamMem])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([SegBinOp MCMem]
-> [[PatElem LParamMem]] -> [(SegBinOp MCMem, [PatElem LParamMem])]
forall a b. [a] -> [b] -> [(a, b)]
zip [SegBinOp MCMem]
scan_ops [[PatElem LParamMem]]
per_scan_pes) (((SegBinOp MCMem, [PatElem LParamMem])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((SegBinOp MCMem, [PatElem LParamMem])
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(SegBinOp MCMem
scan_op, [PatElem LParamMem]
scan_pes) -> do
Maybe (Exp MCMem) -> Scope MCMem -> ImpM MCMem HostEnv Multicore ()
forall rep (inner :: * -> *) r op.
Mem rep inner =>
Maybe (Exp rep) -> Scope rep -> ImpM rep r op ()
dScope Maybe (Exp MCMem)
forall a. Maybe a
Nothing (Scope MCMem -> ImpM MCMem HostEnv Multicore ())
-> Scope MCMem -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [Param LParamMem] -> Scope MCMem
forall rep dec. (LParamInfo rep ~ dec) => [Param dec] -> Scope rep
scopeOfLParams ([Param LParamMem] -> Scope MCMem)
-> [Param LParamMem] -> Scope MCMem
forall a b. (a -> b) -> a -> b
$ Lambda MCMem -> [LParam MCMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams (Lambda MCMem -> [LParam MCMem]) -> Lambda MCMem -> [LParam MCMem]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp MCMem
scan_op
let ([Param LParamMem]
scan_x_params, [Param LParamMem]
scan_y_params) = Int -> [Param LParamMem] -> ([Param LParamMem], [Param LParamMem])
forall a. Int -> [a] -> ([a], [a])
splitAt ([SubExp] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([SubExp] -> Int) -> [SubExp] -> Int
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral SegBinOp MCMem
scan_op) ([Param LParamMem] -> ([Param LParamMem], [Param LParamMem]))
-> [Param LParamMem] -> ([Param LParamMem], [Param LParamMem])
forall a b. (a -> b) -> a -> b
$ (Lambda MCMem -> [LParam MCMem]
Lambda MCMem -> [Param LParamMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams (Lambda MCMem -> [Param LParamMem])
-> (SegBinOp MCMem -> Lambda MCMem)
-> SegBinOp MCMem
-> [Param LParamMem]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda) SegBinOp MCMem
scan_op
[(Param LParamMem, SubExp)]
-> ((Param LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem] -> [SubExp] -> [(Param LParamMem, SubExp)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LParamMem]
scan_x_params ([SubExp] -> [(Param LParamMem, SubExp)])
-> [SubExp] -> [(Param LParamMem, SubExp)]
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral SegBinOp MCMem
scan_op) (((Param LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, SubExp) -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, SubExp
ne) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] SubExp
ne []
let inner_bound :: TExp Int64
inner_bound = [TExp Int64] -> TExp Int64
forall a. HasCallStack => [a] -> a
last [TExp Int64]
ns_64
String
-> TExp Int64
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op.
String
-> TExp t -> (TExp t -> ImpM rep r op ()) -> ImpM rep r op ()
sFor String
"i" TExp Int64
inner_bound ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> do
(VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> [VName] -> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ ([VName] -> [VName]
forall a. HasCallStack => [a] -> [a]
init [VName]
is) ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64 -> [TExp Int64]
forall num. IntegralExp num => [num] -> num -> [num]
unflattenIndex ([TExp Int64] -> [TExp Int64]
forall a. HasCallStack => [a] -> [a]
init [TExp Int64]
ns_64) TExp Int64
segment_i
VName -> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op. VName -> TExp t -> ImpM rep r op ()
dPrimV_ ([VName] -> VName
forall a. HasCallStack => [a] -> a
last [VName]
is) TExp Int64
i
Names
-> Stms MCMem
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (KernelBody MCMem -> Stms MCMem
forall rep. KernelBody rep -> Stms rep
kernelBodyStms KernelBody MCMem
kbody) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ do
let ([KernelResult]
scan_res, [KernelResult]
map_res) = Int -> [KernelResult] -> ([KernelResult], [KernelResult])
forall a. Int -> [a] -> ([a], [a])
splitAt ([SubExp] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([SubExp] -> Int) -> [SubExp] -> Int
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral SegBinOp MCMem
scan_op) ([KernelResult] -> ([KernelResult], [KernelResult]))
-> [KernelResult] -> ([KernelResult], [KernelResult])
forall a b. (a -> b) -> a -> b
$ KernelBody MCMem -> [KernelResult]
forall rep. KernelBody rep -> [KernelResult]
kernelBodyResult KernelBody MCMem
kbody
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"write to-scan values to parameters" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, KernelResult)]
-> ((Param LParamMem, KernelResult)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem]
-> [KernelResult] -> [(Param LParamMem, KernelResult)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LParamMem]
scan_y_params [KernelResult]
scan_res) (((Param LParamMem, KernelResult)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, KernelResult)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, KernelResult
se) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] (KernelResult -> SubExp
kernelResultSubExp KernelResult
se) []
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"write mapped values results to memory" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(PatElem LParamMem, KernelResult)]
-> ((PatElem LParamMem, KernelResult)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([PatElem LParamMem]
-> [KernelResult] -> [(PatElem LParamMem, KernelResult)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Int -> [PatElem LParamMem] -> [PatElem LParamMem]
forall a. Int -> [a] -> [a]
drop ([SubExp] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([SubExp] -> Int) -> [SubExp] -> Int
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> [SubExp]
forall rep. SegBinOp rep -> [SubExp]
segBinOpNeutral SegBinOp MCMem
scan_op) ([PatElem LParamMem] -> [PatElem LParamMem])
-> [PatElem LParamMem] -> [PatElem LParamMem]
forall a b. (a -> b) -> a -> b
$ Pat LParamMem -> [PatElem LParamMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LParamMem
pat) [KernelResult]
map_res) (((PatElem LParamMem, KernelResult)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((PatElem LParamMem, KernelResult)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(PatElem LParamMem
pe, KernelResult
se) ->
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 [VName]
is) (KernelResult -> SubExp
kernelResultSubExp KernelResult
se) []
Text
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"combine with carry and write to memory" (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
Names
-> Stms MCMem
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms Names
forall a. Monoid a => a
mempty (Body MCMem -> Stms MCMem
forall rep. Body rep -> Stms rep
bodyStms (Body MCMem -> Stms MCMem) -> Body MCMem -> Stms MCMem
forall a b. (a -> b) -> a -> b
$ Lambda MCMem -> Body MCMem
forall rep. Lambda rep -> Body rep
lambdaBody (Lambda MCMem -> Body MCMem) -> Lambda MCMem -> Body MCMem
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp MCMem
scan_op) (ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
[(Param LParamMem, PatElem LParamMem, SubExp)]
-> ((Param LParamMem, PatElem LParamMem, SubExp)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LParamMem]
-> [PatElem LParamMem]
-> [SubExp]
-> [(Param LParamMem, PatElem LParamMem, SubExp)]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [Param LParamMem]
scan_x_params [PatElem LParamMem]
scan_pes ([SubExp] -> [(Param LParamMem, PatElem LParamMem, SubExp)])
-> [SubExp] -> [(Param LParamMem, PatElem LParamMem, SubExp)]
forall a b. (a -> b) -> a -> b
$ (SubExpRes -> SubExp) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map SubExpRes -> SubExp
resSubExp ([SubExpRes] -> [SubExp]) -> [SubExpRes] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ Body MCMem -> [SubExpRes]
forall rep. Body rep -> [SubExpRes]
bodyResult (Body MCMem -> [SubExpRes]) -> Body MCMem -> [SubExpRes]
forall a b. (a -> b) -> a -> b
$ Lambda MCMem -> Body MCMem
forall rep. Lambda rep -> Body rep
lambdaBody (Lambda MCMem -> Body MCMem) -> Lambda MCMem -> Body MCMem
forall a b. (a -> b) -> a -> b
$ SegBinOp MCMem -> Lambda MCMem
forall rep. SegBinOp rep -> Lambda rep
segBinOpLambda SegBinOp MCMem
scan_op) (((Param LParamMem, PatElem LParamMem, SubExp)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ())
-> ((Param LParamMem, PatElem LParamMem, SubExp)
-> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(Param LParamMem
p, PatElem LParamMem
pe, SubExp
se) -> do
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElem LParamMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LParamMem
pe) ((VName -> TExp Int64) -> [VName] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 [VName]
is) SubExp
se []
VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LParamMem -> VName
forall dec. Param dec -> VName
paramName Param LParamMem
p) [] SubExp
se []