module Futhark.CodeGen.ImpGen.Multicore.Base
  ( extractAllocations,
    compileThreadResult,
    Locks (..),
    HostEnv (..),
    AtomicBinOp,
    MulticoreGen,
    decideScheduling,
    decideScheduling',
    renameSegBinOp,
    freeParams,
    renameHistOpLambda,
    atomicUpdateLocking,
    AtomicUpdate (..),
    DoAtomicUpdate,
    Locking (..),
    getSpace,
    getLoopBounds,
    getIterationDomain,
    getReturnParams,
    segOpString,
    ChunkLoopVectorization (..),
    generateChunkLoop,
    generateUniformizeLoop,
    extractVectorLane,
    inISPC,
    toParam,
    sLoopNestVectorized,
  )
where

import Control.Monad
import Data.Bifunctor
import Data.Map qualified as M
import Data.Maybe
import Futhark.CodeGen.ImpCode.Multicore qualified as Imp
import Futhark.CodeGen.ImpGen
import Futhark.Error
import Futhark.IR.MCMem
import Futhark.MonadFreshNames
import Futhark.Transform.Rename
import Prelude hiding (quot, rem)

-- | Is there an atomic t'BinOp' corresponding to this t'BinOp'?
type AtomicBinOp =
  BinOp ->
  Maybe (VName -> VName -> Imp.Count Imp.Elements (Imp.TExp Int32) -> Imp.Exp -> Imp.AtomicOp)

-- | Information about the locks available for accumulators.
data Locks = Locks
  { Locks -> VName
locksArray :: VName,
    Locks -> Int
locksCount :: Int
  }

data HostEnv = HostEnv
  { HostEnv -> AtomicBinOp
hostAtomics :: AtomicBinOp,
    HostEnv -> Map VName Locks
hostLocks :: M.Map VName Locks
  }

type MulticoreGen = ImpM MCMem HostEnv Imp.Multicore

segOpString :: SegOp () MCMem -> MulticoreGen String
segOpString :: SegOp () MCMem -> MulticoreGen [Char]
segOpString SegMap {} = [Char] -> MulticoreGen [Char]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Char]
"segmap"
segOpString SegRed {} = [Char] -> MulticoreGen [Char]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Char]
"segred"
segOpString SegScan {} = [Char] -> MulticoreGen [Char]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Char]
"segscan"
segOpString SegHist {} = [Char] -> MulticoreGen [Char]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Char]
"seghist"

arrParam :: VName -> MulticoreGen Imp.Param
arrParam :: VName -> MulticoreGen Param
arrParam VName
arr = do
  VarEntry MCMem
name_entry <- VName -> ImpM MCMem HostEnv Multicore (VarEntry MCMem)
forall rep r op. VName -> ImpM rep r op (VarEntry rep)
lookupVar VName
arr
  case VarEntry MCMem
name_entry of
    ArrayVar Maybe (Exp MCMem)
_ (ArrayEntry (MemLoc VName
mem [SubExp]
_ LMAD (TExp Int64)
_) PrimType
_) ->
      Param -> MulticoreGen Param
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Param -> MulticoreGen Param) -> Param -> MulticoreGen Param
forall a b. (a -> b) -> a -> b
$ VName -> Space -> Param
Imp.MemParam VName
mem Space
DefaultSpace
    VarEntry MCMem
_ -> [Char] -> MulticoreGen Param
forall a. HasCallStack => [Char] -> a
error ([Char] -> MulticoreGen Param) -> [Char] -> MulticoreGen Param
forall a b. (a -> b) -> a -> b
$ [Char]
"arrParam: could not handle array " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ VName -> [Char]
forall a. Show a => a -> [Char]
show VName
arr

toParam :: VName -> TypeBase shape u -> MulticoreGen [Imp.Param]
toParam :: forall shape u. VName -> TypeBase shape u -> MulticoreGen [Param]
toParam VName
name (Prim PrimType
pt) = [Param] -> MulticoreGen [Param]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [VName -> PrimType -> Param
Imp.ScalarParam VName
name PrimType
pt]
toParam VName
name (Mem Space
space) = [Param] -> MulticoreGen [Param]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [VName -> Space -> Param
Imp.MemParam VName
name Space
space]
toParam VName
name Array {} = Param -> [Param]
forall a. a -> [a]
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Param -> [Param]) -> MulticoreGen Param -> MulticoreGen [Param]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> VName -> MulticoreGen Param
arrParam VName
name
toParam VName
_name Acc {} = [Param] -> MulticoreGen [Param]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [] -- FIXME?  Are we sure this works?

getSpace :: SegOp () MCMem -> SegSpace
getSpace :: SegOp () MCMem -> SegSpace
getSpace (SegHist ()
_ SegSpace
space [HistOp MCMem]
_ [Type]
_ KernelBody MCMem
_) = SegSpace
space
getSpace (SegRed ()
_ SegSpace
space [SegBinOp MCMem]
_ [Type]
_ KernelBody MCMem
_) = SegSpace
space
getSpace (SegScan ()
_ SegSpace
space [SegBinOp MCMem]
_ [Type]
_ KernelBody MCMem
_) = SegSpace
space
getSpace (SegMap ()
_ SegSpace
space [Type]
_ KernelBody MCMem
_) = SegSpace
space

getLoopBounds :: MulticoreGen (Imp.TExp Int64, Imp.TExp Int64)
getLoopBounds :: MulticoreGen (TExp Int64, TExp Int64)
getLoopBounds = do
  TV Int64
start <- [Char] -> PrimType -> ImpM MCMem HostEnv Multicore (TV Int64)
forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"start" PrimType
int64
  TV Int64
end <- [Char] -> PrimType -> ImpM MCMem HostEnv Multicore (TV Int64)
forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"end" PrimType
int64
  Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> Code Multicore
forall a. a -> Code a
Imp.Op (Multicore -> Code Multicore) -> Multicore -> Code Multicore
forall a b. (a -> b) -> a -> b
$ VName -> VName -> Multicore
Imp.GetLoopBounds (TV Int64 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int64
start) (TV Int64 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int64
end)
  (TExp Int64, TExp Int64) -> MulticoreGen (TExp Int64, TExp Int64)
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TV Int64 -> TExp Int64
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int64
start, TV Int64 -> TExp Int64
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int64
end)

getIterationDomain :: SegOp () MCMem -> SegSpace -> MulticoreGen (Imp.TExp Int64)
getIterationDomain :: SegOp () MCMem -> SegSpace -> MulticoreGen (TExp Int64)
getIterationDomain SegMap {} SegSpace
space = do
  let ns :: [SubExp]
ns = ((VName, SubExp) -> SubExp) -> [(VName, SubExp)] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map (VName, SubExp) -> SubExp
forall a b. (a, b) -> b
snd ([(VName, SubExp)] -> [SubExp]) -> [(VName, SubExp)] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
      ns_64 :: [TExp Int64]
ns_64 = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
ns
  TExp Int64 -> MulticoreGen (TExp Int64)
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TExp Int64 -> MulticoreGen (TExp Int64))
-> TExp Int64 -> MulticoreGen (TExp Int64)
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
ns_64
getIterationDomain SegOp () MCMem
_ SegSpace
space = do
  let ns :: [SubExp]
ns = ((VName, SubExp) -> SubExp) -> [(VName, SubExp)] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map (VName, SubExp) -> SubExp
forall a b. (a, b) -> b
snd ([(VName, SubExp)] -> [SubExp]) -> [(VName, SubExp)] -> [SubExp]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
      ns_64 :: [TExp Int64]
ns_64 = (SubExp -> TExp Int64) -> [SubExp] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map SubExp -> TExp Int64
pe64 [SubExp]
ns
  case SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space of
    [(VName, SubExp)
_] -> TExp Int64 -> MulticoreGen (TExp Int64)
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TExp Int64 -> MulticoreGen (TExp Int64))
-> TExp Int64 -> MulticoreGen (TExp Int64)
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [TExp Int64]
ns_64
    -- A segmented SegOp is over the segments
    -- so we drop the last dimension, which is
    -- executed sequentially
    [(VName, SubExp)]
_ -> TExp Int64 -> MulticoreGen (TExp Int64)
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TExp Int64 -> MulticoreGen (TExp Int64))
-> TExp Int64 -> MulticoreGen (TExp Int64)
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> TExp Int64
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product ([TExp Int64] -> TExp Int64) -> [TExp Int64] -> TExp Int64
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> [TExp Int64]
forall a. HasCallStack => [a] -> [a]
init [TExp Int64]
ns_64

-- When the SegRed's return value is a scalar
-- we perform a call by value-result in the segop function
getReturnParams :: Pat LetDecMem -> SegOp () MCMem -> MulticoreGen [Imp.Param]
getReturnParams :: Pat LetDecMem -> SegOp () MCMem -> MulticoreGen [Param]
getReturnParams Pat LetDecMem
pat SegRed {} =
  -- It's a good idea to make sure any prim values are initialised, as
  -- we will load them (redundantly) in the task code, and
  -- uninitialised values are UB.
  ([[Param]] -> [Param])
-> ImpM MCMem HostEnv Multicore [[Param]] -> MulticoreGen [Param]
forall a b.
(a -> b)
-> ImpM MCMem HostEnv Multicore a -> ImpM MCMem HostEnv Multicore b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Param]] -> [Param]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (ImpM MCMem HostEnv Multicore [[Param]] -> MulticoreGen [Param])
-> ((PatElem LetDecMem -> MulticoreGen [Param])
    -> ImpM MCMem HostEnv Multicore [[Param]])
-> (PatElem LetDecMem -> MulticoreGen [Param])
-> MulticoreGen [Param]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PatElem LetDecMem]
-> (PatElem LetDecMem -> MulticoreGen [Param])
-> ImpM MCMem HostEnv Multicore [[Param]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM (Pat LetDecMem -> [PatElem LetDecMem]
forall dec. Pat dec -> [PatElem dec]
patElems Pat LetDecMem
pat) ((PatElem LetDecMem -> MulticoreGen [Param])
 -> MulticoreGen [Param])
-> (PatElem LetDecMem -> MulticoreGen [Param])
-> MulticoreGen [Param]
forall a b. (a -> b) -> a -> b
$ \PatElem LetDecMem
pe -> do
    case PatElem LetDecMem -> Type
forall dec. Typed dec => PatElem dec -> Type
patElemType PatElem LetDecMem
pe of
      Prim PrimType
pt -> PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LetDecMem
pe VName -> Exp -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> Exp -> ImpM rep r op ()
<~~ PrimValue -> Exp
forall v. PrimValue -> PrimExp v
ValueExp (PrimType -> PrimValue
blankPrimValue PrimType
pt)
      Type
_ -> () -> ImpM MCMem HostEnv Multicore ()
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
    VName -> Type -> MulticoreGen [Param]
forall shape u. VName -> TypeBase shape u -> MulticoreGen [Param]
toParam (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LetDecMem
pe) (PatElem LetDecMem -> Type
forall dec. Typed dec => PatElem dec -> Type
patElemType PatElem LetDecMem
pe)
getReturnParams Pat LetDecMem
_ SegOp () MCMem
_ = [Param] -> MulticoreGen [Param]
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Param]
forall a. Monoid a => a
mempty

renameSegBinOp :: [SegBinOp MCMem] -> MulticoreGen [SegBinOp MCMem]
renameSegBinOp :: [SegBinOp MCMem] -> MulticoreGen [SegBinOp MCMem]
renameSegBinOp [SegBinOp MCMem]
segbinops =
  [SegBinOp MCMem]
-> (SegBinOp MCMem
    -> ImpM MCMem HostEnv Multicore (SegBinOp MCMem))
-> MulticoreGen [SegBinOp MCMem]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [SegBinOp MCMem]
segbinops ((SegBinOp MCMem -> ImpM MCMem HostEnv Multicore (SegBinOp MCMem))
 -> MulticoreGen [SegBinOp MCMem])
-> (SegBinOp MCMem
    -> ImpM MCMem HostEnv Multicore (SegBinOp MCMem))
-> MulticoreGen [SegBinOp MCMem]
forall a b. (a -> b) -> a -> b
$ \(SegBinOp Commutativity
comm Lambda MCMem
lam [SubExp]
ne Shape
shape) -> do
    Lambda MCMem
lam' <- Lambda MCMem -> ImpM MCMem HostEnv Multicore (Lambda MCMem)
forall rep (m :: * -> *).
(Renameable rep, MonadFreshNames m) =>
Lambda rep -> m (Lambda rep)
renameLambda Lambda MCMem
lam
    SegBinOp MCMem -> ImpM MCMem HostEnv Multicore (SegBinOp MCMem)
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (SegBinOp MCMem -> ImpM MCMem HostEnv Multicore (SegBinOp MCMem))
-> SegBinOp MCMem -> ImpM MCMem HostEnv Multicore (SegBinOp MCMem)
forall a b. (a -> b) -> a -> b
$ Commutativity
-> Lambda MCMem -> [SubExp] -> Shape -> SegBinOp MCMem
forall rep.
Commutativity -> Lambda rep -> [SubExp] -> Shape -> SegBinOp rep
SegBinOp Commutativity
comm Lambda MCMem
lam' [SubExp]
ne Shape
shape

compileThreadResult ::
  SegSpace ->
  PatElem LetDecMem ->
  KernelResult ->
  MulticoreGen ()
compileThreadResult :: SegSpace
-> PatElem LetDecMem
-> KernelResult
-> ImpM MCMem HostEnv Multicore ()
compileThreadResult SegSpace
space PatElem LetDecMem
pe (Returns ResultManifest
_ Certs
_ SubExp
what) = do
  let is :: [TExp Int64]
is = ((VName, SubExp) -> TExp Int64)
-> [(VName, SubExp)] -> [TExp Int64]
forall a b. (a -> b) -> [a] -> [b]
map (VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 (VName -> TExp Int64)
-> ((VName, SubExp) -> VName) -> (VName, SubExp) -> TExp Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VName, SubExp) -> VName
forall a b. (a, b) -> a
fst) ([(VName, SubExp)] -> [TExp Int64])
-> [(VName, SubExp)] -> [TExp Int64]
forall a b. (a -> b) -> a -> b
$ SegSpace -> [(VName, SubExp)]
unSegSpace SegSpace
space
  VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LetDecMem
pe) [TExp Int64]
is SubExp
what []
compileThreadResult SegSpace
_ PatElem LetDecMem
_ WriteReturns {} =
  [Char] -> ImpM MCMem HostEnv Multicore ()
forall a. [Char] -> a
compilerBugS [Char]
"compileThreadResult: WriteReturns unhandled."
compileThreadResult SegSpace
_ PatElem LetDecMem
_ TileReturns {} =
  [Char] -> ImpM MCMem HostEnv Multicore ()
forall a. [Char] -> a
compilerBugS [Char]
"compileThreadResult: TileReturns unhandled."
compileThreadResult SegSpace
_ PatElem LetDecMem
_ RegTileReturns {} =
  [Char] -> ImpM MCMem HostEnv Multicore ()
forall a. [Char] -> a
compilerBugS [Char]
"compileThreadResult: RegTileReturns unhandled."

freeParams :: (FreeIn a) => a -> MulticoreGen [Imp.Param]
freeParams :: forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams a
code = do
  let free :: [VName]
free = Names -> [VName]
namesToList (Names -> [VName]) -> Names -> [VName]
forall a b. (a -> b) -> a -> b
$ a -> Names
forall a. FreeIn a => a -> Names
freeIn a
code
  [Type]
ts <- (VName -> ImpM MCMem HostEnv Multicore Type)
-> [VName] -> ImpM MCMem HostEnv Multicore [Type]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM VName -> ImpM MCMem HostEnv Multicore Type
forall rep (m :: * -> *). HasScope rep m => VName -> m Type
lookupType [VName]
free
  [[Param]] -> [Param]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Param]] -> [Param])
-> ImpM MCMem HostEnv Multicore [[Param]] -> MulticoreGen [Param]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (VName -> Type -> MulticoreGen [Param])
-> [VName] -> [Type] -> ImpM MCMem HostEnv Multicore [[Param]]
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m [c]
zipWithM VName -> Type -> MulticoreGen [Param]
forall shape u. VName -> TypeBase shape u -> MulticoreGen [Param]
toParam [VName]
free [Type]
ts

isLoadBalanced :: Imp.MCCode -> Bool
isLoadBalanced :: Code Multicore -> Bool
isLoadBalanced (Code Multicore
a Imp.:>>: Code Multicore
b) = Code Multicore -> Bool
isLoadBalanced Code Multicore
a Bool -> Bool -> Bool
&& Code Multicore -> Bool
isLoadBalanced Code Multicore
b
isLoadBalanced (Imp.For VName
_ Exp
_ Code Multicore
a) = Code Multicore -> Bool
isLoadBalanced Code Multicore
a
isLoadBalanced (Imp.If TExp Bool
_ Code Multicore
a Code Multicore
b) = Code Multicore -> Bool
isLoadBalanced Code Multicore
a Bool -> Bool -> Bool
&& Code Multicore -> Bool
isLoadBalanced Code Multicore
b
isLoadBalanced (Imp.Comment Text
_ Code Multicore
a) = Code Multicore -> Bool
isLoadBalanced Code Multicore
a
isLoadBalanced Imp.While {} = Bool
False
isLoadBalanced (Imp.Op (Imp.ParLoop [Char]
_ Code Multicore
code [Param]
_)) = Code Multicore -> Bool
isLoadBalanced Code Multicore
code
isLoadBalanced (Imp.Op (Imp.ForEachActive VName
_ Code Multicore
a)) = Code Multicore -> Bool
isLoadBalanced Code Multicore
a
isLoadBalanced (Imp.Op (Imp.ForEach VName
_ Exp
_ Exp
_ Code Multicore
a)) = Code Multicore -> Bool
isLoadBalanced Code Multicore
a
isLoadBalanced (Imp.Op (Imp.ISPCKernel Code Multicore
a [Param]
_)) = Code Multicore -> Bool
isLoadBalanced Code Multicore
a
isLoadBalanced Code Multicore
_ = Bool
True

decideScheduling' :: SegOp () rep -> Imp.MCCode -> Imp.Scheduling
decideScheduling' :: forall rep. SegOp () rep -> Code Multicore -> Scheduling
decideScheduling' SegHist {} Code Multicore
_ = Scheduling
Imp.Static
decideScheduling' SegScan {} Code Multicore
_ = Scheduling
Imp.Static
decideScheduling' SegRed {} Code Multicore
_ = Scheduling
Imp.Static
decideScheduling' SegMap {} Code Multicore
code = Code Multicore -> Scheduling
decideScheduling Code Multicore
code

decideScheduling :: Imp.MCCode -> Imp.Scheduling
decideScheduling :: Code Multicore -> Scheduling
decideScheduling Code Multicore
code =
  if Code Multicore -> Bool
isLoadBalanced Code Multicore
code
    then Scheduling
Imp.Static
    else Scheduling
Imp.Dynamic

-- | Try to extract invariant allocations.  If we assume that the
-- given 'Imp.MCCode' is the body of a 'SegOp', then it is always safe
-- to move the immediate allocations to the prebody.
extractAllocations :: Imp.MCCode -> (Imp.MCCode, Imp.MCCode)
extractAllocations :: Code Multicore -> (Code Multicore, Code Multicore)
extractAllocations Code Multicore
segop_code = Code Multicore -> (Code Multicore, Code Multicore)
forall {a}. Code Multicore -> (Code a, Code Multicore)
f Code Multicore
segop_code
  where
    declared :: Names
declared = Code Multicore -> Names
forall a. Code a -> Names
Imp.declaredIn Code Multicore
segop_code
    f :: Code Multicore -> (Code a, Code Multicore)
f (Imp.DeclareMem VName
name Space
space) =
      -- Hoisting declarations out is always safe.
      (VName -> Space -> Code a
forall a. VName -> Space -> Code a
Imp.DeclareMem VName
name Space
space, Code Multicore
forall a. Monoid a => a
mempty)
    f (Imp.Allocate VName
name Count Bytes (TExp Int64)
size Space
space)
      | Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Count Bytes (TExp Int64) -> Names
forall a. FreeIn a => a -> Names
freeIn Count Bytes (TExp Int64)
size Names -> Names -> Bool
`namesIntersect` Names
declared =
          (VName -> Count Bytes (TExp Int64) -> Space -> Code a
forall a. VName -> Count Bytes (TExp Int64) -> Space -> Code a
Imp.Allocate VName
name Count Bytes (TExp Int64)
size Space
space, Code Multicore
forall a. Monoid a => a
mempty)
    f (Code Multicore
x Imp.:>>: Code Multicore
y) = Code Multicore -> (Code a, Code Multicore)
f Code Multicore
x (Code a, Code Multicore)
-> (Code a, Code Multicore) -> (Code a, Code Multicore)
forall a. Semigroup a => a -> a -> a
<> Code Multicore -> (Code a, Code Multicore)
f Code Multicore
y
    f (Imp.While TExp Bool
cond Code Multicore
body) =
      (Code a
forall a. Monoid a => a
mempty, TExp Bool -> Code Multicore -> Code Multicore
forall a. TExp Bool -> Code a -> Code a
Imp.While TExp Bool
cond Code Multicore
body)
    f (Imp.For VName
i Exp
bound Code Multicore
body) =
      (Code a
forall a. Monoid a => a
mempty, VName -> Exp -> Code Multicore -> Code Multicore
forall a. VName -> Exp -> Code a -> Code a
Imp.For VName
i Exp
bound Code Multicore
body)
    f (Imp.Comment Text
s Code Multicore
code) =
      (Code Multicore -> Code Multicore)
-> (Code a, Code Multicore) -> (Code a, Code Multicore)
forall b c a. (b -> c) -> (a, b) -> (a, c)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (Text -> Code Multicore -> Code Multicore
forall a. Text -> Code a -> Code a
Imp.Comment Text
s) (Code Multicore -> (Code a, Code Multicore)
f Code Multicore
code)
    f Imp.Free {} =
      (Code a, Code Multicore)
forall a. Monoid a => a
mempty
    f (Imp.If TExp Bool
cond Code Multicore
tcode Code Multicore
fcode) =
      let (Code a
ta, Code Multicore
tcode') = Code Multicore -> (Code a, Code Multicore)
f Code Multicore
tcode
          (Code a
fa, Code Multicore
fcode') = Code Multicore -> (Code a, Code Multicore)
f Code Multicore
fcode
       in (Code a
ta Code a -> Code a -> Code a
forall a. Semigroup a => a -> a -> a
<> Code a
fa, TExp Bool -> Code Multicore -> Code Multicore -> Code Multicore
forall a. TExp Bool -> Code a -> Code a -> Code a
Imp.If TExp Bool
cond Code Multicore
tcode' Code Multicore
fcode')
    f (Imp.Op (Imp.ParLoop [Char]
s Code Multicore
body [Param]
free)) =
      let (Code Multicore
body_allocs, Code Multicore
body') = Code Multicore -> (Code Multicore, Code Multicore)
extractAllocations Code Multicore
body
          (Code a
free_allocs, Code Multicore
here_allocs) = Code Multicore -> (Code a, Code Multicore)
f Code Multicore
body_allocs
          free' :: [Param]
free' =
            (Param -> Bool) -> [Param] -> [Param]
forall a. (a -> Bool) -> [a] -> [a]
filter
              ( (VName -> Names -> Bool
`notNameIn` Code Multicore -> Names
forall a. Code a -> Names
Imp.declaredIn Code Multicore
body_allocs) (VName -> Bool) -> (Param -> VName) -> Param -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Param -> VName
Imp.paramName
              )
              [Param]
free
       in ( Code a
free_allocs,
            Code Multicore
here_allocs Code Multicore -> Code Multicore -> Code Multicore
forall a. Semigroup a => a -> a -> a
<> Multicore -> Code Multicore
forall a. a -> Code a
Imp.Op ([Char] -> Code Multicore -> [Param] -> Multicore
Imp.ParLoop [Char]
s Code Multicore
body' [Param]
free')
          )
    f Code Multicore
code =
      (Code a
forall a. Monoid a => a
mempty, Code Multicore
code)

-- | Indicates whether to vectorize a chunk loop or keep it sequential.
-- We use this to allow falling back to sequential chunk loops in cases
-- we don't care about trying to vectorize.
data ChunkLoopVectorization = Vectorized | Scalar

-- | Emit code for the chunk loop, given an action that generates code
-- for a single iteration.
--
-- The action is called with the (symbolic) index of the current
-- iteration.
generateChunkLoop ::
  String ->
  ChunkLoopVectorization ->
  (Imp.TExp Int64 -> MulticoreGen ()) ->
  MulticoreGen ()
generateChunkLoop :: [Char]
-> ChunkLoopVectorization
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateChunkLoop [Char]
desc ChunkLoopVectorization
Scalar TExp Int64 -> ImpM MCMem HostEnv Multicore ()
m = do
  (TExp Int64
start, TExp Int64
end) <- MulticoreGen (TExp Int64, TExp Int64)
getLoopBounds
  TExp Int64
n <- [Char] -> TExp Int64 -> MulticoreGen (TExp Int64)
forall {k} (t :: k) rep r op.
[Char] -> TExp t -> ImpM rep r op (TExp t)
dPrimVE [Char]
"n" (TExp Int64 -> MulticoreGen (TExp Int64))
-> TExp Int64 -> MulticoreGen (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
end TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
start
  VName
i <- [Char] -> ImpM MCMem HostEnv Multicore VName
forall (m :: * -> *). MonadFreshNames m => [Char] -> m VName
newVName ([Char]
desc [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
"_i")
  (Code Multicore
body_allocs, Code Multicore
body) <- (Code Multicore -> (Code Multicore, Code Multicore))
-> ImpM MCMem HostEnv Multicore (Code Multicore)
-> ImpM MCMem HostEnv Multicore (Code Multicore, Code Multicore)
forall a b.
(a -> b)
-> ImpM MCMem HostEnv Multicore a -> ImpM MCMem HostEnv Multicore b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Code Multicore -> (Code Multicore, Code Multicore)
extractAllocations (ImpM MCMem HostEnv Multicore (Code Multicore)
 -> ImpM MCMem HostEnv Multicore (Code Multicore, Code Multicore))
-> ImpM MCMem HostEnv Multicore (Code Multicore)
-> ImpM MCMem HostEnv Multicore (Code Multicore, Code Multicore)
forall a b. (a -> b) -> a -> b
$
    ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore (Code Multicore)
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore (Code Multicore))
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore (Code Multicore)
forall a b. (a -> b) -> a -> b
$ do
      VName -> IntType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> IntType -> ImpM rep r op ()
addLoopVar VName
i IntType
Int64
      TExp Int64 -> ImpM MCMem HostEnv Multicore ()
m (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ TExp Int64
start TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 VName
i
  Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit Code Multicore
body_allocs
  -- Emit either foreach or normal for loop
  let bound :: Exp
bound = TExp Int64 -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped TExp Int64
n
  Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Exp -> Code Multicore -> Code Multicore
forall a. VName -> Exp -> Code a -> Code a
Imp.For VName
i Exp
bound Code Multicore
body
generateChunkLoop [Char]
desc ChunkLoopVectorization
Vectorized TExp Int64 -> ImpM MCMem HostEnv Multicore ()
m = do
  (TExp Int64
start, TExp Int64
end) <- MulticoreGen (TExp Int64, TExp Int64)
getLoopBounds
  TExp Int64
n <- [Char] -> TExp Int64 -> MulticoreGen (TExp Int64)
forall {k} (t :: k) rep r op.
[Char] -> TExp t -> ImpM rep r op (TExp t)
dPrimVE [Char]
"n" (TExp Int64 -> MulticoreGen (TExp Int64))
-> TExp Int64 -> MulticoreGen (TExp Int64)
forall a b. (a -> b) -> a -> b
$ TExp Int64
end TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
- TExp Int64
start
  VName
i <- [Char] -> ImpM MCMem HostEnv Multicore VName
forall (m :: * -> *). MonadFreshNames m => [Char] -> m VName
newVName ([Char]
desc [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
"_i")
  (Code Multicore
body_allocs, Code Multicore
body) <- (Code Multicore -> (Code Multicore, Code Multicore))
-> ImpM MCMem HostEnv Multicore (Code Multicore)
-> ImpM MCMem HostEnv Multicore (Code Multicore, Code Multicore)
forall a b.
(a -> b)
-> ImpM MCMem HostEnv Multicore a -> ImpM MCMem HostEnv Multicore b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Code Multicore -> (Code Multicore, Code Multicore)
extractAllocations (ImpM MCMem HostEnv Multicore (Code Multicore)
 -> ImpM MCMem HostEnv Multicore (Code Multicore, Code Multicore))
-> ImpM MCMem HostEnv Multicore (Code Multicore)
-> ImpM MCMem HostEnv Multicore (Code Multicore, Code Multicore)
forall a b. (a -> b) -> a -> b
$
    ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore (Code Multicore)
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore (Code Multicore))
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore (Code Multicore)
forall a b. (a -> b) -> a -> b
$ do
      VName -> IntType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> IntType -> ImpM rep r op ()
addLoopVar VName
i IntType
Int64
      TExp Int64 -> ImpM MCMem HostEnv Multicore ()
m (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 VName
i
  Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit Code Multicore
body_allocs
  -- Emit either foreach or normal for loop
  let from :: Exp
from = TExp Int64 -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped TExp Int64
start
  let bound :: Exp
bound = TExp Int64 -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped (TExp Int64
start TExp Int64 -> TExp Int64 -> TExp Int64
forall a. Num a => a -> a -> a
+ TExp Int64
n)
  Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> Code Multicore
forall a. a -> Code a
Imp.Op (Multicore -> Code Multicore) -> Multicore -> Code Multicore
forall a b. (a -> b) -> a -> b
$ VName -> Exp -> Exp -> Code Multicore -> Multicore
Imp.ForEach VName
i Exp
from Exp
bound Code Multicore
body

-- | Emit code for a sequential loop over each vector lane, given
-- and action that generates code for a single iteration. The action
-- is called with the symbolic index of the current iteration.
generateUniformizeLoop :: (Imp.TExp Int64 -> MulticoreGen ()) -> MulticoreGen ()
generateUniformizeLoop :: (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
generateUniformizeLoop TExp Int64 -> ImpM MCMem HostEnv Multicore ()
m = do
  VName
i <- [Char] -> ImpM MCMem HostEnv Multicore VName
forall (m :: * -> *). MonadFreshNames m => [Char] -> m VName
newVName [Char]
"uni_i"
  Code Multicore
body <- ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore (Code Multicore)
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore (Code Multicore))
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore (Code Multicore)
forall a b. (a -> b) -> a -> b
$ do
    VName -> IntType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> IntType -> ImpM rep r op ()
addLoopVar VName
i IntType
Int64
    TExp Int64 -> ImpM MCMem HostEnv Multicore ()
m (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> TExp Int64 -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> TExp Int64
forall a. a -> TPrimExp Int64 a
Imp.le64 VName
i
  Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> Code Multicore
forall a. a -> Code a
Imp.Op (Multicore -> Code Multicore) -> Multicore -> Code Multicore
forall a b. (a -> b) -> a -> b
$ VName -> Code Multicore -> Multicore
Imp.ForEachActive VName
i Code Multicore
body

-- | Given a piece of code, if that code performs an assignment, turn
-- that assignment into an extraction of element from a vector on the
-- right hand side, using a passed index for the extraction. Other code
-- is left as is.
extractVectorLane :: Imp.TExp Int64 -> MulticoreGen Imp.MCCode -> MulticoreGen ()
extractVectorLane :: TExp Int64
-> ImpM MCMem HostEnv Multicore (Code Multicore)
-> ImpM MCMem HostEnv Multicore ()
extractVectorLane TExp Int64
j ImpM MCMem HostEnv Multicore (Code Multicore)
code = do
  let ut_exp :: Exp
ut_exp = TExp Int64 -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped TExp Int64
j
  Code Multicore
code' <- ImpM MCMem HostEnv Multicore (Code Multicore)
code
  case Code Multicore
code' of
    Imp.SetScalar VName
vname Exp
e -> do
      Type
typ <- VName -> ImpM MCMem HostEnv Multicore Type
forall rep (m :: * -> *). HasScope rep m => VName -> m Type
lookupType VName
vname
      case Type
typ of
        -- ISPC v1.17 does not support extract on f16 yet..
        -- Thus we do this stupid conversion to f32
        Prim (FloatType FloatType
Float16) -> do
          TV Any
tv <- [Char] -> PrimType -> ImpM MCMem HostEnv Multicore (TV Any)
forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"hack_extract_f16" (FloatType -> PrimType
FloatType FloatType
Float32)
          Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName -> Exp -> Code Multicore
forall a. VName -> Exp -> Code a
Imp.SetScalar (TV Any -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Any
tv) Exp
e
          Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> Code Multicore
forall a. a -> Code a
Imp.Op (Multicore -> Code Multicore) -> Multicore -> Code Multicore
forall a b. (a -> b) -> a -> b
$ VName -> Exp -> Exp -> Multicore
Imp.ExtractLane VName
vname (TPrimExp Any VName -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped (TPrimExp Any VName -> Exp) -> TPrimExp Any VName -> Exp
forall a b. (a -> b) -> a -> b
$ TV Any -> TPrimExp Any VName
forall {k} (t :: k). TV t -> TExp t
tvExp TV Any
tv) Exp
ut_exp
        Type
_ -> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> Code Multicore
forall a. a -> Code a
Imp.Op (Multicore -> Code Multicore) -> Multicore -> Code Multicore
forall a b. (a -> b) -> a -> b
$ VName -> Exp -> Exp -> Multicore
Imp.ExtractLane VName
vname Exp
e Exp
ut_exp
    Code Multicore
_ ->
      Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit Code Multicore
code'

-- | Given an action that may generate some code, put that code
-- into an ISPC kernel.
inISPC :: MulticoreGen () -> MulticoreGen ()
inISPC :: ImpM MCMem HostEnv Multicore () -> ImpM MCMem HostEnv Multicore ()
inISPC ImpM MCMem HostEnv Multicore ()
code = do
  Code Multicore
code' <- ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore (Code Multicore)
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect ImpM MCMem HostEnv Multicore ()
code
  [Param]
free <- Code Multicore -> MulticoreGen [Param]
forall a. FreeIn a => a -> MulticoreGen [Param]
freeParams Code Multicore
code'
  Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> Code Multicore
forall a. a -> Code a
Imp.Op (Multicore -> Code Multicore) -> Multicore -> Code Multicore
forall a b. (a -> b) -> a -> b
$ Code Multicore -> [Param] -> Multicore
Imp.ISPCKernel Code Multicore
code' [Param]
free

-------------------------------
------- SegRed helpers  -------
-------------------------------
sForVectorized' :: VName -> Imp.Exp -> MulticoreGen () -> MulticoreGen ()
sForVectorized' :: VName
-> Exp
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
sForVectorized' VName
i Exp
bound ImpM MCMem HostEnv Multicore ()
body = do
  let it :: IntType
it = case Exp -> PrimType
forall v. PrimExp v -> PrimType
primExpType Exp
bound of
        IntType IntType
bound_t -> IntType
bound_t
        PrimType
t -> [Char] -> IntType
forall a. HasCallStack => [Char] -> a
error ([Char] -> IntType) -> [Char] -> IntType
forall a b. (a -> b) -> a -> b
$ [Char]
"sFor': bound " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Exp -> [Char]
forall a. Pretty a => a -> [Char]
prettyString Exp
bound [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" is of type " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ PrimType -> [Char]
forall a. Pretty a => a -> [Char]
prettyString PrimType
t
  VName -> IntType -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> IntType -> ImpM rep r op ()
addLoopVar VName
i IntType
it
  Code Multicore
body' <- ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore (Code Multicore)
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect ImpM MCMem HostEnv Multicore ()
body
  Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. Code op -> ImpM rep r op ()
emit (Code Multicore -> ImpM MCMem HostEnv Multicore ())
-> Code Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Multicore -> Code Multicore
forall a. a -> Code a
Imp.Op (Multicore -> Code Multicore) -> Multicore -> Code Multicore
forall a b. (a -> b) -> a -> b
$ VName -> Exp -> Exp -> Code Multicore -> Multicore
Imp.ForEach VName
i (PrimValue -> Exp
forall v. PrimValue -> PrimExp v
Imp.ValueExp (PrimValue -> Exp) -> PrimValue -> Exp
forall a b. (a -> b) -> a -> b
$ PrimType -> PrimValue
blankPrimValue (PrimType -> PrimValue) -> PrimType -> PrimValue
forall a b. (a -> b) -> a -> b
$ IntType -> PrimType
Imp.IntType IntType
Imp.Int64) Exp
bound Code Multicore
body'

sForVectorized :: String -> Imp.TExp t -> (Imp.TExp t -> MulticoreGen ()) -> MulticoreGen ()
sForVectorized :: forall {k} (t :: k).
[Char]
-> TExp t
-> (TExp t -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
sForVectorized [Char]
i TExp t
bound TExp t -> ImpM MCMem HostEnv Multicore ()
body = do
  VName
i' <- [Char] -> ImpM MCMem HostEnv Multicore VName
forall (m :: * -> *). MonadFreshNames m => [Char] -> m VName
newVName [Char]
i
  VName
-> Exp
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
sForVectorized' VName
i' (TExp t -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped TExp t
bound) (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
    TExp t -> ImpM MCMem HostEnv Multicore ()
body (TExp t -> ImpM MCMem HostEnv Multicore ())
-> TExp t -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
      Exp -> TExp t
forall {k} (t :: k) v. PrimExp v -> TPrimExp t v
TPrimExp (Exp -> TExp t) -> Exp -> TExp t
forall a b. (a -> b) -> a -> b
$
        VName -> PrimType -> Exp
Imp.var VName
i' (PrimType -> Exp) -> PrimType -> Exp
forall a b. (a -> b) -> a -> b
$
          Exp -> PrimType
forall v. PrimExp v -> PrimType
primExpType (Exp -> PrimType) -> Exp -> PrimType
forall a b. (a -> b) -> a -> b
$
            TExp t -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped TExp t
bound

-- | Like sLoopNest, but puts a vectorized loop at the innermost layer.
sLoopNestVectorized ::
  Shape ->
  ([Imp.TExp Int64] -> MulticoreGen ()) ->
  MulticoreGen ()
sLoopNestVectorized :: Shape
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
sLoopNestVectorized = [TExp Int64]
-> [SubExp]
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
sLoopNest' [] ([SubExp]
 -> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
 -> ImpM MCMem HostEnv Multicore ())
-> (Shape -> [SubExp])
-> Shape
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Shape -> [SubExp]
forall d. ShapeBase d -> [d]
shapeDims
  where
    sLoopNest' :: [TExp Int64]
-> [SubExp]
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
sLoopNest' [TExp Int64]
is [] [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
f = [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
f ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ [TExp Int64] -> [TExp Int64]
forall a. [a] -> [a]
reverse [TExp Int64]
is
    sLoopNest' [TExp Int64]
is [SubExp
d] [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
f =
      [Char]
-> TExp Int64
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k).
[Char]
-> TExp t
-> (TExp t -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
sForVectorized [Char]
"nest_i" (SubExp -> TExp Int64
pe64 SubExp
d) ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
 -> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> [TExp Int64]
-> [SubExp]
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
sLoopNest' (TExp Int64
i TExp Int64 -> [TExp Int64] -> [TExp Int64]
forall a. a -> [a] -> [a]
: [TExp Int64]
is) [] [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
f
    sLoopNest' [TExp Int64]
is (SubExp
d : [SubExp]
ds) [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
f =
      [Char]
-> TExp Int64
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall {k} (t :: k) rep r op.
[Char]
-> TExp t -> (TExp t -> ImpM rep r op ()) -> ImpM rep r op ()
sFor [Char]
"nest_i" (SubExp -> TExp Int64
pe64 SubExp
d) ((TExp Int64 -> ImpM MCMem HostEnv Multicore ())
 -> ImpM MCMem HostEnv Multicore ())
-> (TExp Int64 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \TExp Int64
i -> [TExp Int64]
-> [SubExp]
-> ([TExp Int64] -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
sLoopNest' (TExp Int64
i TExp Int64 -> [TExp Int64] -> [TExp Int64]
forall a. a -> [a] -> [a]
: [TExp Int64]
is) [SubExp]
ds [TExp Int64] -> ImpM MCMem HostEnv Multicore ()
f

-------------------------------
------- SegHist helpers -------
-------------------------------
renameHistOpLambda :: [HistOp MCMem] -> MulticoreGen [HistOp MCMem]
renameHistOpLambda :: [HistOp MCMem] -> MulticoreGen [HistOp MCMem]
renameHistOpLambda [HistOp MCMem]
hist_ops =
  [HistOp MCMem]
-> (HistOp MCMem -> ImpM MCMem HostEnv Multicore (HistOp MCMem))
-> MulticoreGen [HistOp MCMem]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [HistOp MCMem]
hist_ops ((HistOp MCMem -> ImpM MCMem HostEnv Multicore (HistOp MCMem))
 -> MulticoreGen [HistOp MCMem])
-> (HistOp MCMem -> ImpM MCMem HostEnv Multicore (HistOp MCMem))
-> MulticoreGen [HistOp MCMem]
forall a b. (a -> b) -> a -> b
$ \(HistOp Shape
w SubExp
rf [VName]
dest [SubExp]
neutral Shape
shape Lambda MCMem
lam) -> do
    Lambda MCMem
lam' <- Lambda MCMem -> ImpM MCMem HostEnv Multicore (Lambda MCMem)
forall rep (m :: * -> *).
(Renameable rep, MonadFreshNames m) =>
Lambda rep -> m (Lambda rep)
renameLambda Lambda MCMem
lam
    HistOp MCMem -> ImpM MCMem HostEnv Multicore (HistOp MCMem)
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (HistOp MCMem -> ImpM MCMem HostEnv Multicore (HistOp MCMem))
-> HistOp MCMem -> ImpM MCMem HostEnv Multicore (HistOp MCMem)
forall a b. (a -> b) -> a -> b
$ Shape
-> SubExp
-> [VName]
-> [SubExp]
-> Shape
-> Lambda MCMem
-> HistOp MCMem
forall rep.
Shape
-> SubExp
-> [VName]
-> [SubExp]
-> Shape
-> Lambda rep
-> HistOp rep
HistOp Shape
w SubExp
rf [VName]
dest [SubExp]
neutral Shape
shape Lambda MCMem
lam'

-- | Locking strategy used for an atomic update.
data Locking = Locking
  { -- | Array containing the lock.
    Locking -> VName
lockingArray :: VName,
    -- | Value for us to consider the lock free.
    Locking -> TExp Int32
lockingIsUnlocked :: Imp.TExp Int32,
    -- | What to write when we lock it.
    Locking -> TExp Int32
lockingToLock :: Imp.TExp Int32,
    -- | What to write when we unlock it.
    Locking -> TExp Int32
lockingToUnlock :: Imp.TExp Int32,
    -- | A transformation from the logical lock index to the
    -- physical position in the array.  This can also be used
    -- to make the lock array smaller.
    Locking -> [TExp Int64] -> [TExp Int64]
lockingMapping :: [Imp.TExp Int64] -> [Imp.TExp Int64]
  }

-- | A function for generating code for an atomic update.  Assumes
-- that the bucket is in-bounds.
type DoAtomicUpdate rep r =
  [VName] -> [Imp.TExp Int64] -> MulticoreGen ()

-- | The mechanism that will be used for performing the atomic update.
-- Approximates how efficient it will be.  Ordered from most to least
-- efficient.
data AtomicUpdate rep r
  = AtomicPrim (DoAtomicUpdate rep r)
  | -- | Can be done by efficient swaps.
    AtomicCAS (DoAtomicUpdate rep r)
  | -- | Requires explicit locking.
    AtomicLocking (Locking -> DoAtomicUpdate rep r)

atomicUpdateLocking ::
  AtomicBinOp ->
  Lambda MCMem ->
  AtomicUpdate MCMem ()
atomicUpdateLocking :: AtomicBinOp -> Lambda MCMem -> AtomicUpdate MCMem ()
atomicUpdateLocking AtomicBinOp
atomicBinOp Lambda MCMem
lam
  | Just [(BinOp, PrimType, VName, VName)]
ops_and_ts <- Lambda MCMem -> Maybe [(BinOp, PrimType, VName, VName)]
forall rep.
ASTRep rep =>
Lambda rep -> Maybe [(BinOp, PrimType, VName, VName)]
lamIsBinOp Lambda MCMem
lam,
    ((BinOp, PrimType, VName, VName) -> Bool)
-> [(BinOp, PrimType, VName, VName)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (\(BinOp
_, PrimType
t, VName
_, VName
_) -> Int -> Bool
supportedPrims (Int -> Bool) -> Int -> Bool
forall a b. (a -> b) -> a -> b
$ PrimType -> Int
primBitSize PrimType
t) [(BinOp, PrimType, VName, VName)]
ops_and_ts =
      [(BinOp, PrimType, VName, VName)]
-> DoAtomicUpdate MCMem () -> AtomicUpdate MCMem ()
forall {k} {k} {t :: * -> *} {b} {c} {d} {rep :: k} {r :: k}.
Foldable t =>
t (BinOp, b, c, d) -> DoAtomicUpdate MCMem () -> AtomicUpdate rep r
primOrCas [(BinOp, PrimType, VName, VName)]
ops_and_ts (DoAtomicUpdate MCMem () -> AtomicUpdate MCMem ())
-> DoAtomicUpdate MCMem () -> AtomicUpdate MCMem ()
forall a b. (a -> b) -> a -> b
$ \[VName]
arrs [TExp Int64]
bucket ->
        -- If the operator is a vectorised binary operator on 32-bit values,
        -- we can use a particularly efficient implementation. If the
        -- operator has an atomic implementation we use that, otherwise it
        -- is still a binary operator which can be implemented by atomic
        -- compare-and-swap if 32 bits.
        [(VName, (BinOp, PrimType, VName, VName))]
-> ((VName, (BinOp, PrimType, VName, VName))
    -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([VName]
-> [(BinOp, PrimType, VName, VName)]
-> [(VName, (BinOp, PrimType, VName, VName))]
forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
arrs [(BinOp, PrimType, VName, VName)]
ops_and_ts) (((VName, (BinOp, PrimType, VName, VName))
  -> ImpM MCMem HostEnv Multicore ())
 -> ImpM MCMem HostEnv Multicore ())
-> ((VName, (BinOp, PrimType, VName, VName))
    -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ \(VName
a, (BinOp
op, PrimType
t, VName
x, VName
y)) -> do
          -- Common variables.
          TV Any
old <- [Char] -> PrimType -> ImpM MCMem HostEnv Multicore (TV Any)
forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"old" PrimType
t

          (VName
arr', Space
_a_space, Count Elements (TExp Int64)
bucket_offset) <- VName
-> [TExp Int64]
-> ImpM
     MCMem HostEnv Multicore (VName, Space, Count Elements (TExp Int64))
forall rep r op.
VName
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray VName
a [TExp Int64]
bucket

          case VName
-> VName
-> Count Elements (TExp Int32)
-> BinOp
-> Maybe (Exp -> Multicore)
opHasAtomicSupport (TV Any -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Any
old) VName
arr' (TExp Int64 -> TExp Int32
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 (TExp Int64 -> TExp Int32)
-> Count Elements (TExp Int64) -> Count Elements (TExp Int32)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Count Elements (TExp Int64)
bucket_offset) BinOp
op of
            Just Exp -> Multicore
f -> Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> Multicore -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ Exp -> Multicore
f (Exp -> Multicore) -> Exp -> Multicore
forall a b. (a -> b) -> a -> b
$ VName -> PrimType -> Exp
Imp.var VName
y PrimType
t
            Maybe (Exp -> Multicore)
Nothing ->
              PrimType
-> VName
-> VName
-> [TExp Int64]
-> VName
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
atomicUpdateCAS PrimType
t VName
a (TV Any -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Any
old) [TExp Int64]
bucket VName
x (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
                VName
x VName -> Exp -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> Exp -> ImpM rep r op ()
<~~ BinOp -> Exp -> Exp -> Exp
forall v. BinOp -> PrimExp v -> PrimExp v -> PrimExp v
Imp.BinOpExp BinOp
op (VName -> PrimType -> Exp
Imp.var VName
x PrimType
t) (VName -> PrimType -> Exp
Imp.var VName
y PrimType
t)
  where
    opHasAtomicSupport :: VName
-> VName
-> Count Elements (TExp Int32)
-> BinOp
-> Maybe (Exp -> Multicore)
opHasAtomicSupport VName
old VName
arr' Count Elements (TExp Int32)
bucket' BinOp
bop = do
      let atomic :: (VName -> VName -> Count Elements (TExp Int32) -> a -> AtomicOp)
-> a -> Multicore
atomic VName -> VName -> Count Elements (TExp Int32) -> a -> AtomicOp
f = AtomicOp -> Multicore
Imp.Atomic (AtomicOp -> Multicore) -> (a -> AtomicOp) -> a -> Multicore
forall b c a. (b -> c) -> (a -> b) -> a -> c
. VName -> VName -> Count Elements (TExp Int32) -> a -> AtomicOp
f VName
old VName
arr' Count Elements (TExp Int32)
bucket'
      (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)
-> Exp -> Multicore
forall {a}.
(VName -> VName -> Count Elements (TExp Int32) -> a -> AtomicOp)
-> a -> Multicore
atomic ((VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)
 -> Exp -> Multicore)
-> Maybe
     (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)
-> Maybe (Exp -> Multicore)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> AtomicBinOp
atomicBinOp BinOp
bop

    primOrCas :: t (BinOp, b, c, d) -> DoAtomicUpdate MCMem () -> AtomicUpdate rep r
primOrCas t (BinOp, b, c, d)
ops
      | ((BinOp, b, c, d) -> Bool) -> t (BinOp, b, c, d) -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (BinOp, b, c, d) -> Bool
forall {b} {c} {d}. (BinOp, b, c, d) -> Bool
isPrim t (BinOp, b, c, d)
ops = DoAtomicUpdate MCMem () -> AtomicUpdate rep r
forall {k} {k} (rep :: k) (r :: k).
DoAtomicUpdate MCMem () -> AtomicUpdate rep r
AtomicPrim
      | Bool
otherwise = DoAtomicUpdate MCMem () -> AtomicUpdate rep r
forall {k} {k} (rep :: k) (r :: k).
DoAtomicUpdate MCMem () -> AtomicUpdate rep r
AtomicCAS

    isPrim :: (BinOp, b, c, d) -> Bool
isPrim (BinOp
op, b
_, c
_, d
_) = Maybe
  (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)
-> Bool
forall a. Maybe a -> Bool
isJust (Maybe
   (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)
 -> Bool)
-> Maybe
     (VName -> VName -> Count Elements (TExp Int32) -> Exp -> AtomicOp)
-> Bool
forall a b. (a -> b) -> a -> b
$ AtomicBinOp
atomicBinOp BinOp
op
atomicUpdateLocking AtomicBinOp
_ Lambda MCMem
op
  | [Prim PrimType
t] <- Lambda MCMem -> [Type]
forall rep. Lambda rep -> [Type]
lambdaReturnType Lambda MCMem
op,
    [LParam MCMem
xp, LParam MCMem
_] <- Lambda MCMem -> [LParam MCMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams Lambda MCMem
op,
    Int -> Bool
supportedPrims (PrimType -> Int
primBitSize PrimType
t) = DoAtomicUpdate MCMem () -> AtomicUpdate MCMem ()
forall {k} {k} (rep :: k) (r :: k).
DoAtomicUpdate MCMem () -> AtomicUpdate rep r
AtomicCAS (DoAtomicUpdate MCMem () -> AtomicUpdate MCMem ())
-> DoAtomicUpdate MCMem () -> AtomicUpdate MCMem ()
forall a b. (a -> b) -> a -> b
$ \[VName
arr] [TExp Int64]
bucket -> do
      TV Any
old <- [Char] -> PrimType -> ImpM MCMem HostEnv Multicore (TV Any)
forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"old" PrimType
t
      PrimType
-> VName
-> VName
-> [TExp Int64]
-> VName
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
atomicUpdateCAS PrimType
t VName
arr (TV Any -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Any
old) [TExp Int64]
bucket (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName LParam MCMem
Param LetDecMem
xp) (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
        [Param LetDecMem] -> Body MCMem -> ImpM MCMem HostEnv Multicore ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [LParam MCMem
Param LetDecMem
xp] (Body MCMem -> ImpM MCMem HostEnv Multicore ())
-> Body MCMem -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
          Lambda MCMem -> Body MCMem
forall rep. Lambda rep -> Body rep
lambdaBody Lambda MCMem
op
atomicUpdateLocking AtomicBinOp
_ Lambda MCMem
op = (Locking -> DoAtomicUpdate MCMem ()) -> AtomicUpdate MCMem ()
forall {k} {k} (rep :: k) (r :: k).
(Locking -> DoAtomicUpdate MCMem ()) -> AtomicUpdate rep r
AtomicLocking ((Locking -> DoAtomicUpdate MCMem ()) -> AtomicUpdate MCMem ())
-> (Locking -> DoAtomicUpdate MCMem ()) -> AtomicUpdate MCMem ()
forall a b. (a -> b) -> a -> b
$ \Locking
locking [VName]
arrs [TExp Int64]
bucket -> do
  TV Int32
old <- [Char] -> PrimType -> ImpM MCMem HostEnv Multicore (TV Int32)
forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"old" PrimType
int32
  TV Int32
continue <- [Char]
-> PrimType
-> TExp Int32
-> ImpM MCMem HostEnv Multicore (TV Int32)
forall {k} (t :: k) rep r op.
[Char] -> PrimType -> TExp t -> ImpM rep r op (TV t)
dPrimVol [Char]
"continue" PrimType
int32 (TExp Int32
0 :: Imp.TExp Int32)

  -- Correctly index into locks.
  (VName
locks', Space
_locks_space, Count Elements (TExp Int64)
locks_offset) <-
    VName
-> [TExp Int64]
-> ImpM
     MCMem HostEnv Multicore (VName, Space, Count Elements (TExp Int64))
forall rep r op.
VName
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray (Locking -> VName
lockingArray Locking
locking) ([TExp Int64]
 -> ImpM
      MCMem
      HostEnv
      Multicore
      (VName, Space, Count Elements (TExp Int64)))
-> [TExp Int64]
-> ImpM
     MCMem HostEnv Multicore (VName, Space, Count Elements (TExp Int64))
forall a b. (a -> b) -> a -> b
$ Locking -> [TExp Int64] -> [TExp Int64]
lockingMapping Locking
locking [TExp Int64]
bucket

  -- Critical section
  let try_acquire_lock :: ImpM rep r Multicore ()
try_acquire_lock = do
        TV Int32
old TV Int32 -> TExp Int32 -> ImpM rep r Multicore ()
forall {k} (t :: k) rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- (TExp Int32
0 :: Imp.TExp Int32)
        Multicore -> ImpM rep r Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM rep r Multicore ())
-> (AtomicOp -> Multicore) -> AtomicOp -> ImpM rep r Multicore ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AtomicOp -> Multicore
Imp.Atomic (AtomicOp -> ImpM rep r Multicore ())
-> AtomicOp -> ImpM rep r Multicore ()
forall a b. (a -> b) -> a -> b
$
          PrimType
-> VName
-> VName
-> Count Elements (TExp Int32)
-> VName
-> Exp
-> AtomicOp
Imp.AtomicCmpXchg
            PrimType
int32
            (TV Int32 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int32
old)
            VName
locks'
            (TExp Int64 -> TExp Int32
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 (TExp Int64 -> TExp Int32)
-> Count Elements (TExp Int64) -> Count Elements (TExp Int32)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Count Elements (TExp Int64)
locks_offset)
            (TV Int32 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int32
continue)
            (TExp Int32 -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped (Locking -> TExp Int32
lockingToLock Locking
locking))
      lock_acquired :: TExp Int32
lock_acquired = TV Int32 -> TExp Int32
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int32
continue
      -- Even the releasing is done with an atomic rather than a
      -- simple write, for memory coherency reasons.
      release_lock :: ImpM rep r Multicore ()
release_lock = do
        TV Int32
old TV Int32 -> TExp Int32 -> ImpM rep r Multicore ()
forall {k} (t :: k) rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- Locking -> TExp Int32
lockingToLock Locking
locking
        Multicore -> ImpM rep r Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM rep r Multicore ())
-> (AtomicOp -> Multicore) -> AtomicOp -> ImpM rep r Multicore ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AtomicOp -> Multicore
Imp.Atomic (AtomicOp -> ImpM rep r Multicore ())
-> AtomicOp -> ImpM rep r Multicore ()
forall a b. (a -> b) -> a -> b
$
          PrimType
-> VName
-> VName
-> Count Elements (TExp Int32)
-> VName
-> Exp
-> AtomicOp
Imp.AtomicCmpXchg
            PrimType
int32
            (TV Int32 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int32
old)
            VName
locks'
            (TExp Int64 -> TExp Int32
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 (TExp Int64 -> TExp Int32)
-> Count Elements (TExp Int64) -> Count Elements (TExp Int32)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Count Elements (TExp Int64)
locks_offset)
            (TV Int32 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int32
continue)
            (TExp Int32 -> Exp
forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped (Locking -> TExp Int32
lockingToUnlock Locking
locking))

  -- Preparing parameters. It is assumed that the caller has already
  -- filled the arr_params. We copy the current value to the
  -- accumulator parameters.
  let ([Param LetDecMem]
acc_params, [Param LetDecMem]
_arr_params) = Int -> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a. Int -> [a] -> ([a], [a])
splitAt ([VName] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [VName]
arrs) ([Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem]))
-> [Param LetDecMem] -> ([Param LetDecMem], [Param LetDecMem])
forall a b. (a -> b) -> a -> b
$ Lambda MCMem -> [LParam MCMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams Lambda MCMem
op
      bind_acc_params :: ImpM rep r op ()
bind_acc_params =
        ImpM rep r op () -> ImpM rep r op ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile (ImpM rep r op () -> ImpM rep r op ())
-> ImpM rep r op () -> ImpM rep r op ()
forall a b. (a -> b) -> a -> b
$
          Text -> ImpM rep r op () -> ImpM rep r op ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"bind lhs" (ImpM rep r op () -> ImpM rep r op ())
-> ImpM rep r op () -> ImpM rep r op ()
forall a b. (a -> b) -> a -> b
$
            [(Param LetDecMem, VName)]
-> ((Param LetDecMem, VName) -> ImpM rep r op ())
-> ImpM rep r op ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Param LetDecMem] -> [VName] -> [(Param LetDecMem, VName)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Param LetDecMem]
acc_params [VName]
arrs) (((Param LetDecMem, VName) -> ImpM rep r op ())
 -> ImpM rep r op ())
-> ((Param LetDecMem, VName) -> ImpM rep r op ())
-> ImpM rep r op ()
forall a b. (a -> b) -> a -> b
$ \(Param LetDecMem
acc_p, VName
arr) ->
              VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName Param LetDecMem
acc_p) [] (VName -> SubExp
Var VName
arr) [TExp Int64]
bucket

  let op_body :: ImpM MCMem r op ()
op_body =
        Text -> ImpM MCMem r op () -> ImpM MCMem r op ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"execute operation" (ImpM MCMem r op () -> ImpM MCMem r op ())
-> ImpM MCMem r op () -> ImpM MCMem r op ()
forall a b. (a -> b) -> a -> b
$
          [Param LetDecMem] -> Body MCMem -> ImpM MCMem r op ()
forall dec rep r op. [Param dec] -> Body rep -> ImpM rep r op ()
compileBody' [Param LetDecMem]
acc_params (Body MCMem -> ImpM MCMem r op ())
-> Body MCMem -> ImpM MCMem r op ()
forall a b. (a -> b) -> a -> b
$
            Lambda MCMem -> Body MCMem
forall rep. Lambda rep -> Body rep
lambdaBody Lambda MCMem
op

      do_hist :: ImpM rep r op ()
do_hist =
        ImpM rep r op () -> ImpM rep r op ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile (ImpM rep r op () -> ImpM rep r op ())
-> ImpM rep r op () -> ImpM rep r op ()
forall a b. (a -> b) -> a -> b
$
          Text -> ImpM rep r op () -> ImpM rep r op ()
forall rep r op. Text -> ImpM rep r op () -> ImpM rep r op ()
sComment Text
"update global result" (ImpM rep r op () -> ImpM rep r op ())
-> ImpM rep r op () -> ImpM rep r op ()
forall a b. (a -> b) -> a -> b
$
            (VName -> SubExp -> ImpM rep r op ())
-> [VName] -> [SubExp] -> ImpM rep r op ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ ([TExp Int64] -> VName -> SubExp -> ImpM rep r op ()
forall {rep} {r} {op}.
[TExp Int64] -> VName -> SubExp -> ImpM rep r op ()
writeArray [TExp Int64]
bucket) [VName]
arrs ([SubExp] -> ImpM rep r op ()) -> [SubExp] -> ImpM rep r op ()
forall a b. (a -> b) -> a -> b
$
              (Param LetDecMem -> SubExp) -> [Param LetDecMem] -> [SubExp]
forall a b. (a -> b) -> [a] -> [b]
map (VName -> SubExp
Var (VName -> SubExp)
-> (Param LetDecMem -> VName) -> Param LetDecMem -> SubExp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName) [Param LetDecMem]
acc_params

  -- While-loop: Try to insert your value
  TExp Bool
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhile (TV Int32 -> TExp Int32
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int32
continue TExp Int32 -> TExp Int32 -> TExp Bool
forall {k} v (t :: k).
Eq v =>
TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0) (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ do
    ImpM MCMem HostEnv Multicore ()
forall {rep} {r}. ImpM rep r Multicore ()
try_acquire_lock
    TExp Bool
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sUnless (TExp Int32
lock_acquired TExp Int32 -> TExp Int32 -> TExp Bool
forall {k} v (t :: k).
Eq v =>
TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0) (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ do
      [LParam MCMem] -> ImpM MCMem HostEnv Multicore ()
forall rep (inner :: * -> *) r op.
Mem rep inner =>
[LParam rep] -> ImpM rep r op ()
dLParams [LParam MCMem]
[Param LetDecMem]
acc_params
      ImpM MCMem HostEnv Multicore ()
forall {rep} {r} {op}. ImpM rep r op ()
bind_acc_params
      ImpM MCMem HostEnv Multicore ()
forall {r} {op}. ImpM MCMem r op ()
op_body
      ImpM MCMem HostEnv Multicore ()
forall {rep} {r} {op}. ImpM rep r op ()
do_hist
      ImpM MCMem HostEnv Multicore ()
forall {rep} {r}. ImpM rep r Multicore ()
release_lock
  where
    writeArray :: [TExp Int64] -> VName -> SubExp -> ImpM rep r op ()
writeArray [TExp Int64]
bucket VName
arr SubExp
val = VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
arr [TExp Int64]
bucket SubExp
val []

atomicUpdateCAS ::
  PrimType ->
  VName ->
  VName ->
  [Imp.TExp Int64] ->
  VName ->
  MulticoreGen () ->
  MulticoreGen ()
atomicUpdateCAS :: PrimType
-> VName
-> VName
-> [TExp Int64]
-> VName
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
atomicUpdateCAS PrimType
t VName
arr VName
old [TExp Int64]
bucket VName
x ImpM MCMem HostEnv Multicore ()
do_op = do
  TV Int32
run_loop <- [Char] -> TExp Int32 -> ImpM MCMem HostEnv Multicore (TV Int32)
forall {k} (t :: k) rep r op.
[Char] -> TExp t -> ImpM rep r op (TV t)
dPrimV [Char]
"run_loop" (TExp Int32
0 :: Imp.TExp Int32)
  (VName
arr', Space
_a_space, Count Elements (TExp Int64)
bucket_offset) <- VName
-> [TExp Int64]
-> ImpM
     MCMem HostEnv Multicore (VName, Space, Count Elements (TExp Int64))
forall rep r op.
VName
-> [TExp Int64]
-> ImpM rep r op (VName, Space, Count Elements (TExp Int64))
fullyIndexArray VName
arr [TExp Int64]
bucket

  PrimType
bytes <- Int -> MulticoreGen PrimType
toIntegral (Int -> MulticoreGen PrimType) -> Int -> MulticoreGen PrimType
forall a b. (a -> b) -> a -> b
$ PrimType -> Int
primBitSize PrimType
t
  let (PrimExp v -> PrimExp v
toBits, PrimExp v -> PrimExp v
fromBits) =
        case PrimType
t of
          FloatType FloatType
Float16 ->
            ( \PrimExp v
v -> [Char] -> [PrimExp v] -> PrimType -> PrimExp v
forall v. [Char] -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp [Char]
"to_bits16" [PrimExp v
v] PrimType
int16,
              \PrimExp v
v -> [Char] -> [PrimExp v] -> PrimType -> PrimExp v
forall v. [Char] -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp [Char]
"from_bits16" [PrimExp v
v] PrimType
t
            )
          FloatType FloatType
Float32 ->
            ( \PrimExp v
v -> [Char] -> [PrimExp v] -> PrimType -> PrimExp v
forall v. [Char] -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp [Char]
"to_bits32" [PrimExp v
v] PrimType
int32,
              \PrimExp v
v -> [Char] -> [PrimExp v] -> PrimType -> PrimExp v
forall v. [Char] -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp [Char]
"from_bits32" [PrimExp v
v] PrimType
t
            )
          FloatType FloatType
Float64 ->
            ( \PrimExp v
v -> [Char] -> [PrimExp v] -> PrimType -> PrimExp v
forall v. [Char] -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp [Char]
"to_bits64" [PrimExp v
v] PrimType
int64,
              \PrimExp v
v -> [Char] -> [PrimExp v] -> PrimType -> PrimExp v
forall v. [Char] -> [PrimExp v] -> PrimType -> PrimExp v
Imp.FunExp [Char]
"from_bits64" [PrimExp v
v] PrimType
t
            )
          PrimType
_ -> (PrimExp v -> PrimExp v
forall a. a -> a
id, PrimExp v -> PrimExp v
forall a. a -> a
id)

      int :: PrimType
int
        | PrimType -> Int
primBitSize PrimType
t Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
16 = PrimType
int16
        | PrimType -> Int
primBitSize PrimType
t Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
32 = PrimType
int32
        | Bool
otherwise = PrimType
int64

  ImpM MCMem HostEnv Multicore () -> ImpM MCMem HostEnv Multicore ()
forall rep r op a. ImpM rep r op a -> ImpM rep r op a
everythingVolatile (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ VName
-> [TExp Int64]
-> SubExp
-> [TExp Int64]
-> ImpM MCMem HostEnv Multicore ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix VName
old [] (VName -> SubExp
Var VName
arr) [TExp Int64]
bucket

  VName
old_bits_v <- TV Any -> VName
forall {k} (t :: k). TV t -> VName
tvVar (TV Any -> VName)
-> ImpM MCMem HostEnv Multicore (TV Any)
-> ImpM MCMem HostEnv Multicore VName
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> PrimType -> ImpM MCMem HostEnv Multicore (TV Any)
forall {k} rep r op (t :: k).
[Char] -> PrimType -> ImpM rep r op (TV t)
dPrim [Char]
"old_bits" PrimType
int
  VName
old_bits_v VName -> Exp -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> Exp -> ImpM rep r op ()
<~~ Exp -> Exp
forall {v}. PrimExp v -> PrimExp v
toBits (VName -> PrimType -> Exp
Imp.var VName
old PrimType
t)
  let old_bits :: Exp
old_bits = VName -> PrimType -> Exp
Imp.var VName
old_bits_v PrimType
int

  -- While-loop: Try to insert your value
  TExp Bool
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall rep r op. TExp Bool -> ImpM rep r op () -> ImpM rep r op ()
sWhile (TV Int32 -> TExp Int32
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int32
run_loop TExp Int32 -> TExp Int32 -> TExp Bool
forall {k} v (t :: k).
Eq v =>
TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.==. TExp Int32
0) (ImpM MCMem HostEnv Multicore ()
 -> ImpM MCMem HostEnv Multicore ())
-> ImpM MCMem HostEnv Multicore ()
-> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$ do
    VName
x VName -> Exp -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> Exp -> ImpM rep r op ()
<~~ VName -> PrimType -> Exp
Imp.var VName
old PrimType
t
    ImpM MCMem HostEnv Multicore ()
do_op -- Writes result into x
    Multicore -> ImpM MCMem HostEnv Multicore ()
forall op rep r. op -> ImpM rep r op ()
sOp (Multicore -> ImpM MCMem HostEnv Multicore ())
-> (AtomicOp -> Multicore)
-> AtomicOp
-> ImpM MCMem HostEnv Multicore ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AtomicOp -> Multicore
Imp.Atomic (AtomicOp -> ImpM MCMem HostEnv Multicore ())
-> AtomicOp -> ImpM MCMem HostEnv Multicore ()
forall a b. (a -> b) -> a -> b
$
      PrimType
-> VName
-> VName
-> Count Elements (TExp Int32)
-> VName
-> Exp
-> AtomicOp
Imp.AtomicCmpXchg
        PrimType
bytes
        VName
old_bits_v
        VName
arr'
        (TExp Int64 -> TExp Int32
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 (TExp Int64 -> TExp Int32)
-> Count Elements (TExp Int64) -> Count Elements (TExp Int32)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Count Elements (TExp Int64)
bucket_offset)
        (TV Int32 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int32
run_loop)
        (Exp -> Exp
forall {v}. PrimExp v -> PrimExp v
toBits (VName -> PrimType -> Exp
Imp.var VName
x PrimType
t))
    VName
old VName -> Exp -> ImpM MCMem HostEnv Multicore ()
forall rep r op. VName -> Exp -> ImpM rep r op ()
<~~ Exp -> Exp
forall {v}. PrimExp v -> PrimExp v
fromBits Exp
old_bits

supportedPrims :: Int -> Bool
supportedPrims :: Int -> Bool
supportedPrims Int
8 = Bool
True
supportedPrims Int
16 = Bool
True
supportedPrims Int
32 = Bool
True
supportedPrims Int
64 = Bool
True
supportedPrims Int
_ = Bool
False

-- Supported bytes lengths by GCC (and clang) compiler
toIntegral :: Int -> MulticoreGen PrimType
toIntegral :: Int -> MulticoreGen PrimType
toIntegral Int
8 = PrimType -> MulticoreGen PrimType
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PrimType
int8
toIntegral Int
16 = PrimType -> MulticoreGen PrimType
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PrimType
int16
toIntegral Int
32 = PrimType -> MulticoreGen PrimType
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PrimType
int32
toIntegral Int
64 = PrimType -> MulticoreGen PrimType
forall a. a -> ImpM MCMem HostEnv Multicore a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PrimType
int64
toIntegral Int
b = [Char] -> MulticoreGen PrimType
forall a. HasCallStack => [Char] -> a
error ([Char] -> MulticoreGen PrimType)
-> [Char] -> MulticoreGen PrimType
forall a b. (a -> b) -> a -> b
$ [Char]
"number of bytes is not supported for CAS - " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Pretty a => a -> [Char]
prettyString Int
b