{-# LANGUAGE QuasiQuotes #-}

-- | C code generator.  This module can convert a correct ImpCode
-- program to an equivalent C program.
module Futhark.CodeGen.Backends.MulticoreC
  ( compileProg,
    generateContext,
    GC.CParts (..),
    GC.asLibrary,
    GC.asExecutable,
    GC.asServer,
    operations,
    cliOptions,
    compileOp,
    ValueType (..),
    paramToCType,
    prepareTaskStruct,
    closureFreeStructField,
    generateParLoopFn,
    addTimingFields,
    functionTiming,
    functionIterations,
    multiCoreReport,
    multicoreDef,
    multicoreName,
    DefSpecifier,
    atomicOps,
  )
where

import Control.Monad
import Data.Loc
import Data.Map qualified as M
import Data.Maybe
import Data.Text qualified as T
import Futhark.CodeGen.Backends.GenericC qualified as GC
import Futhark.CodeGen.Backends.GenericC.Options
import Futhark.CodeGen.Backends.SimpleRep
import Futhark.CodeGen.ImpCode.Multicore hiding (ValueType)
import Futhark.CodeGen.ImpGen.Multicore qualified as ImpGen
import Futhark.CodeGen.RTS.C (schedulerH)
import Futhark.IR.MCMem (MCMem, Prog)
import Futhark.MonadFreshNames
import Language.C.Quote.OpenCL qualified as C
import Language.C.Syntax qualified as C

-- | Compile the program to ImpCode with multicore operations.
compileProg ::
  MonadFreshNames m => T.Text -> Prog MCMem -> m (ImpGen.Warnings, GC.CParts)
compileProg :: forall (m :: * -> *).
MonadFreshNames m =>
Text -> Prog MCMem -> m (Warnings, CParts)
compileProg Text
version =
  forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse
    ( forall (m :: * -> *) op.
MonadFreshNames m =>
Text
-> Text
-> Operations op ()
-> CompilerM op () ()
-> Text
-> (Space, [Space])
-> [Option]
-> Definitions op
-> m CParts
GC.compileProg
        Text
"multicore"
        Text
version
        forall s. Operations Multicore s
operations
        forall op s. CompilerM op s ()
generateContext
        Text
""
        (Space
DefaultSpace, [Space
DefaultSpace])
        [Option]
cliOptions
    )
    forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (m :: * -> *).
MonadFreshNames m =>
Prog MCMem -> m (Warnings, Definitions Multicore)
ImpGen.compileProg

-- | Generate the multicore context definitions.  This is exported
-- because the WASM backend needs it.
generateContext :: GC.CompilerM op s ()
generateContext :: forall op s. CompilerM op s ()
generateContext = do
  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cunit|$esc:(T.unpack schedulerH)|]

  String
cfg <- forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s String
GC.publicDef String
"context_config" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|struct $id:s;|],
      [C.cedecl|struct $id:s { int in_use;
                               int debugging;
                               int profiling;
                               int num_threads;
                               const char *cache_fname;
                             };|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_new" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|struct $id:cfg* $id:s(void);|],
      [C.cedecl|struct $id:cfg* $id:s(void) {
                             struct $id:cfg *cfg = (struct $id:cfg*) malloc(sizeof(struct $id:cfg));
                             if (cfg == NULL) {
                               return NULL;
                             }
                             cfg->in_use = 0;
                             cfg->debugging = 0;
                             cfg->profiling = 0;
                             cfg->cache_fname = NULL;
                             cfg->num_threads = 0;
                             return cfg;
                           }|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_free" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg) {
                             assert(!cfg->in_use);
                             free(cfg);
                           }|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_debugging" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int detail) {
                      cfg->debugging = detail;
                    }|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_profiling" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag) {
                      cfg->profiling = flag;
                    }|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_logging" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int detail) {
                             // Does nothing for this backend.
                             (void)cfg; (void)detail;
                           }|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_num_threads" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg *cfg, int n);|],
      [C.cedecl|void $id:s(struct $id:cfg *cfg, int n) {
                             cfg->num_threads = n;
                           }|]
    )

  ([FieldGroup]
fields, [Stm]
init_fields, [Stm]
free_fields) <- forall op s. CompilerM op s ([FieldGroup], [Stm], [Stm])
GC.contextContents

  String
ctx <- forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s String
GC.publicDef String
"context" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|struct $id:s;|],
      [C.cedecl|struct $id:s {
                      struct $id:cfg* cfg;
                      struct scheduler scheduler;
                      int detail_memory;
                      int debugging;
                      int profiling;
                      int profiling_paused;
                      int logging;
                      typename lock_t lock;
                      char *error;
                      typename lock_t error_lock;
                      typename FILE *log;
                      int total_runs;
                      long int total_runtime;
                      $sdecls:fields

                      // Tuning parameters
                      typename int64_t tuning_timing;
                      typename int64_t tuning_iter;
                    };|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_new" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg);|],
      [C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg) {
             assert(!cfg->in_use);
             struct $id:ctx* ctx = (struct $id:ctx*) malloc(sizeof(struct $id:ctx));
             if (ctx == NULL) {
               return NULL;
             }
             ctx->cfg = cfg;
             ctx->cfg->in_use = 1;

             // Initialize rand()
             fast_srand(time(0));
             ctx->detail_memory = cfg->debugging;
             ctx->debugging = cfg->debugging;
             ctx->profiling = cfg->profiling;
             ctx->profiling_paused = 0;
             ctx->logging = 0;
             ctx->error = NULL;
             create_lock(&ctx->error_lock);
             ctx->log = stderr;
             create_lock(&ctx->lock);

             int tune_kappa = 0;
             double kappa = 5.1f * 1000;

             if (tune_kappa) {
               if (determine_kappa(&kappa) != 0) {
                 return NULL;
               }
             }

             if (scheduler_init(&ctx->scheduler,
                                cfg->num_threads > 0 ?
                                cfg->num_threads : num_processors(),
                                kappa) != 0) {
               return NULL;
             }

             $stms:init_fields

             init_constants(ctx);

             return ctx;
          }|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_free" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|void $id:s(struct $id:ctx* ctx);|],
      [C.cedecl|void $id:s(struct $id:ctx* ctx) {
             $stms:free_fields
             free_constants(ctx);
             (void)scheduler_destroy(&ctx->scheduler);
             free_lock(&ctx->lock);
             ctx->cfg->in_use = 0;
             free(ctx);
           }|]
    )

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_sync" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|int $id:s(struct $id:ctx* ctx);|],
      [C.cedecl|int $id:s(struct $id:ctx* ctx) {
                             (void)ctx;
                             return 0;
                           }|]
    )

  forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_names[0];|]
  forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_vars[0];|]
  forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_classes[0];|]

  forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_tuning_param" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
    ( [C.cedecl|int $id:s(struct $id:cfg* cfg, const char *param_name, size_t param_value);|],
      [C.cedecl|int $id:s(struct $id:cfg* cfg, const char *param_name, size_t param_value) {
                     (void)cfg; (void)param_name; (void)param_value;
                     return 1;
                   }|]
    )

-- | Multicore-related command line options.
cliOptions :: [Option]
cliOptions :: [Option]
cliOptions =
  [ Option
      { optionLongName :: String
optionLongName = String
"profile",
        optionShortName :: Maybe Char
optionShortName = forall a. a -> Maybe a
Just Char
'P',
        optionArgument :: OptionArgument
optionArgument = OptionArgument
NoArgument,
        optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_profiling(cfg, 1);|],
        optionDescription :: String
optionDescription = String
"Gather profiling information."
      },
    Option
      { optionLongName :: String
optionLongName = String
"num-threads",
        optionShortName :: Maybe Char
optionShortName = forall a. Maybe a
Nothing,
        optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"INT",
        optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_num_threads(cfg, atoi(optarg));|],
        optionDescription :: String
optionDescription = String
"Set number of threads used for execution."
      }
  ]

-- | Operations for generating multicore code.
operations :: GC.Operations Multicore s
operations :: forall s. Operations Multicore s
operations =
  forall op s. Operations op s
GC.defaultOperations
    { opsCompiler :: OpCompiler Multicore s
GC.opsCompiler = forall s. OpCompiler Multicore s
compileOp,
      opsCritical :: ([BlockItem], [BlockItem])
GC.opsCritical =
        -- The thread entering an API function is always considered
        -- the "first worker" - note that this might differ from the
        -- thread that created the context!  This likely only matters
        -- for entry points, since they are the only API functions
        -- that contain parallel operations.
        ( [C.citems|worker_local = &ctx->scheduler.workers[0];|],
          []
        )
    }

closureFreeStructField :: VName -> Name
closureFreeStructField :: VName -> Name
closureFreeStructField VName
v =
  String -> Name
nameFromString String
"free_" forall a. Semigroup a => a -> a -> a
<> String -> Name
nameFromString (forall a. Pretty a => a -> String
prettyString VName
v)

closureRetvalStructField :: VName -> Name
closureRetvalStructField :: VName -> Name
closureRetvalStructField VName
v =
  String -> Name
nameFromString String
"retval_" forall a. Semigroup a => a -> a -> a
<> String -> Name
nameFromString (forall a. Pretty a => a -> String
prettyString VName
v)

data ValueType = Prim PrimType | MemBlock | RawMem

compileFreeStructFields :: [VName] -> [(C.Type, ValueType)] -> [C.FieldGroup]
compileFreeStructFields :: [VName] -> [(Type, ValueType)] -> [FieldGroup]
compileFreeStructFields = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith VName -> (Type, ValueType) -> FieldGroup
field
  where
    field :: VName -> (Type, ValueType) -> FieldGroup
field VName
name (Type
ty, Prim PrimType
_) =
      [C.csdecl|$ty:ty $id:(closureFreeStructField name);|]
    field VName
name (Type
_, ValueType
_) =
      [C.csdecl|$ty:defaultMemBlockType $id:(closureFreeStructField name);|]

compileRetvalStructFields :: [VName] -> [(C.Type, ValueType)] -> [C.FieldGroup]
compileRetvalStructFields :: [VName] -> [(Type, ValueType)] -> [FieldGroup]
compileRetvalStructFields = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith VName -> (Type, ValueType) -> FieldGroup
field
  where
    field :: VName -> (Type, ValueType) -> FieldGroup
field VName
name (Type
ty, Prim PrimType
_) =
      [C.csdecl|$ty:ty *$id:(closureRetvalStructField name);|]
    field VName
name (Type
_, ValueType
_) =
      [C.csdecl|$ty:defaultMemBlockType $id:(closureRetvalStructField name);|]

compileSetStructValues ::
  C.ToIdent a =>
  a ->
  [VName] ->
  [(C.Type, ValueType)] ->
  [C.Stm]
compileSetStructValues :: forall a. ToIdent a => a -> [VName] -> [(Type, ValueType)] -> [Stm]
compileSetStructValues a
struct = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith forall {a}. VName -> (a, ValueType) -> Stm
field
  where
    field :: VName -> (a, ValueType) -> Stm
field VName
name (a
_, Prim PrimType
pt) =
      [C.cstm|$id:struct.$id:(closureFreeStructField name)=$exp:(toStorage pt (C.toExp name noLoc));|]
    field VName
name (a
_, ValueType
MemBlock) =
      [C.cstm|$id:struct.$id:(closureFreeStructField name)=$id:name.mem;|]
    field VName
name (a
_, ValueType
RawMem) =
      [C.cstm|$id:struct.$id:(closureFreeStructField name)=$id:name;|]

compileSetRetvalStructValues ::
  C.ToIdent a =>
  a ->
  [VName] ->
  [(C.Type, ValueType)] ->
  [C.Stm]
compileSetRetvalStructValues :: forall a. ToIdent a => a -> [VName] -> [(Type, ValueType)] -> [Stm]
compileSetRetvalStructValues a
struct [VName]
vnames [(Type, ValueType)]
we = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith VName -> (Type, ValueType) -> [Stm]
field [VName]
vnames [(Type, ValueType)]
we
  where
    field :: VName -> (Type, ValueType) -> [Stm]
field VName
name (Type
ct, Prim PrimType
_) =
      [C.cstms|$id:struct.$id:(closureRetvalStructField name)=(($ty:ct*)&$id:name);
               $escstm:("#if ISPC")
               $id:struct.$id:(closureRetvalStructField name)+= programIndex;
               $escstm:("#endif")|]
    field VName
name (Type
_, ValueType
MemBlock) =
      [C.cstms|$id:struct.$id:(closureRetvalStructField name)=$id:name.mem;|]
    field VName
name (Type
_, ValueType
RawMem) =
      [C.cstms|$id:struct.$id:(closureRetvalStructField name)=$id:name;|]

compileGetRetvalStructVals :: C.ToIdent a => a -> [VName] -> [(C.Type, ValueType)] -> [C.InitGroup]
compileGetRetvalStructVals :: forall a.
ToIdent a =>
a -> [VName] -> [(Type, ValueType)] -> [InitGroup]
compileGetRetvalStructVals a
struct = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith VName -> (Type, ValueType) -> InitGroup
field
  where
    field :: VName -> (Type, ValueType) -> InitGroup
field VName
name (Type
ty, Prim PrimType
pt) =
      let inner :: Exp
inner = [C.cexp|*$id:struct->$id:(closureRetvalStructField name)|]
       in [C.cdecl|$ty:ty $id:name = $exp:(fromStorage pt inner);|]
    field VName
name (Type
ty, ValueType
_) =
      [C.cdecl|$ty:ty $id:name =
                 {.desc = $string:(prettyString name),
                 .mem = $id:struct->$id:(closureRetvalStructField name),
                 .size = 0, .references = NULL};|]

compileGetStructVals ::
  C.ToIdent a =>
  a ->
  [VName] ->
  [(C.Type, ValueType)] ->
  [C.InitGroup]
compileGetStructVals :: forall a.
ToIdent a =>
a -> [VName] -> [(Type, ValueType)] -> [InitGroup]
compileGetStructVals a
struct = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith VName -> (Type, ValueType) -> InitGroup
field
  where
    field :: VName -> (Type, ValueType) -> InitGroup
field VName
name (Type
ty, Prim PrimType
pt) =
      let inner :: Exp
inner = [C.cexp|$id:struct->$id:(closureFreeStructField name)|]
       in [C.cdecl|$ty:ty $id:name = $exp:(fromStorage pt inner);|]
    field VName
name (Type
ty, ValueType
_) =
      [C.cdecl|$ty:ty $id:name =
                 {.desc = $string:(prettyString name),
                  .mem = $id:struct->$id:(closureFreeStructField name),
                  .size = 0, .references = NULL};|]

compileWriteBackResVals :: C.ToIdent a => a -> [VName] -> [(C.Type, ValueType)] -> [C.Stm]
compileWriteBackResVals :: forall a. ToIdent a => a -> [VName] -> [(Type, ValueType)] -> [Stm]
compileWriteBackResVals a
struct = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith forall {a}. VName -> (a, ValueType) -> Stm
field
  where
    field :: VName -> (a, ValueType) -> Stm
field VName
name (a
_, Prim PrimType
pt) =
      [C.cstm|*$id:struct->$id:(closureRetvalStructField name) = $exp:(toStorage pt (C.toExp name noLoc));|]
    field VName
name (a
_, ValueType
_) =
      [C.cstm|$id:struct->$id:(closureRetvalStructField name) = $id:name.mem;|]

paramToCType :: Param -> GC.CompilerM op s (C.Type, ValueType)
paramToCType :: forall op s. Param -> CompilerM op s (Type, ValueType)
paramToCType (ScalarParam VName
_ PrimType
pt) = do
  let t :: Type
t = PrimType -> Type
primStorageType PrimType
pt
  forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type
t, PrimType -> ValueType
Prim PrimType
pt)
paramToCType (MemParam VName
name Space
space') = forall op s. VName -> Space -> CompilerM op s (Type, ValueType)
mcMemToCType VName
name Space
space'

mcMemToCType :: VName -> Space -> GC.CompilerM op s (C.Type, ValueType)
mcMemToCType :: forall op s. VName -> Space -> CompilerM op s (Type, ValueType)
mcMemToCType VName
v Space
space = do
  Bool
refcount <- forall op s. Space -> CompilerM op s Bool
GC.fatMemory Space
space
  Bool
cached <- forall a. Maybe a -> Bool
isJust forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a op s. ToExp a => a -> CompilerM op s (Maybe VName)
GC.cacheMem VName
v
  forall (f :: * -> *) a. Applicative f => a -> f a
pure
    ( Space -> Type
GC.fatMemType Space
space,
      if Bool
refcount Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
cached
        then ValueType
MemBlock
        else ValueType
RawMem
    )

functionRuntime :: Name -> C.Id
functionRuntime :: Name -> Id
functionRuntime = (forall a. ToIdent a => a -> SrcLoc -> Id
`C.toIdent` forall a. Monoid a => a
mempty) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Semigroup a => a -> a -> a
<> Name
"_total_runtime")

functionRuns :: Name -> C.Id
functionRuns :: Name -> Id
functionRuns = (forall a. ToIdent a => a -> SrcLoc -> Id
`C.toIdent` forall a. Monoid a => a
mempty) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Semigroup a => a -> a -> a
<> Name
"_runs")

functionIter :: Name -> C.Id
functionIter :: Name -> Id
functionIter = (forall a. ToIdent a => a -> SrcLoc -> Id
`C.toIdent` forall a. Monoid a => a
mempty) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Semigroup a => a -> a -> a
<> Name
"_iter")

multiCoreReport :: [(Name, Bool)] -> [C.BlockItem]
multiCoreReport :: [(Name, Bool)] -> [BlockItem]
multiCoreReport [(Name, Bool)]
names = [BlockItem]
report_kernels
  where
    report_kernels :: [BlockItem]
report_kernels = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Name, Bool) -> [BlockItem]
reportKernel [(Name, Bool)]
names
    max_name_len_pad :: Int
max_name_len_pad = Int
40
    format_string :: Name -> Bool -> String
format_string Name
name Bool
True =
      let name_s :: String
name_s = Name -> String
nameToString Name
name
          padding :: String
padding = forall a. Int -> a -> [a]
replicate (Int
max_name_len_pad forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. Foldable t => t a -> Int
length String
name_s) Char
' '
       in [String] -> String
unwords [String
"tid %2d -", String
name_s forall a. [a] -> [a] -> [a]
++ String
padding, String
"ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n"]
    format_string Name
name Bool
False =
      let name_s :: String
name_s = Name -> String
nameToString Name
name
          padding :: String
padding = forall a. Int -> a -> [a]
replicate (Int
max_name_len_pad forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. Foldable t => t a -> Int
length String
name_s) Char
' '
       in [String] -> String
unwords [String
"        ", String
name_s forall a. [a] -> [a] -> [a]
++ String
padding, String
"ran %10d times; avg: %10ldus; total: %10ldus; time pr. iter %9.6f; iters %9ld; avg %ld\n"]
    reportKernel :: (Name, Bool) -> [BlockItem]
reportKernel (Name
name, Bool
is_array) =
      let runs :: Id
runs = Name -> Id
functionRuns Name
name
          total_runtime :: Id
total_runtime = Name -> Id
functionRuntime Name
name
          iters :: Id
iters = Name -> Id
functionIter Name
name
       in if Bool
is_array
            then
              [ [C.citem|
                     for (int i = 0; i < ctx->scheduler.num_threads; i++) {
                       fprintf(ctx->log,
                         $string:(format_string name is_array),
                         i,
                         ctx->$id:runs[i],
                         (long int) ctx->$id:total_runtime[i] / (ctx->$id:runs[i] != 0 ? ctx->$id:runs[i] : 1),
                         (long int) ctx->$id:total_runtime[i],
                         (double) ctx->$id:total_runtime[i] /  (ctx->$id:iters[i] == 0 ? 1 : (double)ctx->$id:iters[i]),
                         (long int) (ctx->$id:iters[i]),
                         (long int) (ctx->$id:iters[i]) / (ctx->$id:runs[i] != 0 ? ctx->$id:runs[i] : 1)
                         );
                     }
                   |]
              ]
            else
              [ [C.citem|
                    fprintf(ctx->log,
                       $string:(format_string name is_array),
                       ctx->$id:runs,
                       (long int) ctx->$id:total_runtime / (ctx->$id:runs != 0 ? ctx->$id:runs : 1),
                       (long int) ctx->$id:total_runtime,
                       (double) ctx->$id:total_runtime /  (ctx->$id:iters == 0 ? 1 : (double)ctx->$id:iters),
                       (long int) (ctx->$id:iters),
                       (long int) (ctx->$id:iters) / (ctx->$id:runs != 0 ? ctx->$id:runs : 1));
                   |],
                [C.citem|ctx->total_runtime += ctx->$id:total_runtime;|],
                [C.citem|ctx->total_runs += ctx->$id:runs;|]
              ]

addBenchmarkFields :: Name -> Maybe C.Id -> GC.CompilerM op s ()
addBenchmarkFields :: forall op s. Name -> Maybe Id -> CompilerM op s ()
addBenchmarkFields Name
name (Just Id
_) = do
  forall op s. Id -> Type -> Maybe Exp -> Stm -> CompilerM op s ()
GC.contextFieldDyn
    (Name -> Id
functionRuntime Name
name)
    [C.cty|typename int64_t*|]
    (forall a. a -> Maybe a
Just [C.cexp|calloc(sizeof(typename int64_t), ctx->scheduler.num_threads)|])
    [C.cstm|free(ctx->$id:(functionRuntime name));|]
  forall op s. Id -> Type -> Maybe Exp -> Stm -> CompilerM op s ()
GC.contextFieldDyn
    (Name -> Id
functionRuns Name
name)
    [C.cty|int*|]
    (forall a. a -> Maybe a
Just [C.cexp|calloc(sizeof(int), ctx->scheduler.num_threads)|])
    [C.cstm|free(ctx->$id:(functionRuns name));|]
  forall op s. Id -> Type -> Maybe Exp -> Stm -> CompilerM op s ()
GC.contextFieldDyn
    (Name -> Id
functionIter Name
name)
    [C.cty|typename int64_t*|]
    (forall a. a -> Maybe a
Just [C.cexp|calloc(sizeof(sizeof(typename int64_t)), ctx->scheduler.num_threads)|])
    [C.cstm|free(ctx->$id:(functionIter name));|]
addBenchmarkFields Name
name Maybe Id
Nothing = do
  forall op s. Id -> Type -> Maybe Exp -> CompilerM op s ()
GC.contextField (Name -> Id
functionRuntime Name
name) [C.cty|typename int64_t|] forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just [C.cexp|0|]
  forall op s. Id -> Type -> Maybe Exp -> CompilerM op s ()
GC.contextField (Name -> Id
functionRuns Name
name) [C.cty|int|] forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just [C.cexp|0|]
  forall op s. Id -> Type -> Maybe Exp -> CompilerM op s ()
GC.contextField (Name -> Id
functionIter Name
name) [C.cty|typename int64_t|] forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just [C.cexp|0|]

benchmarkCode :: Name -> Maybe C.Id -> [C.BlockItem] -> GC.CompilerM op s [C.BlockItem]
benchmarkCode :: forall op s.
Name -> Maybe Id -> [BlockItem] -> CompilerM op s [BlockItem]
benchmarkCode Name
name Maybe Id
tid [BlockItem]
code = do
  forall op s. Name -> Maybe Id -> CompilerM op s ()
addBenchmarkFields Name
name Maybe Id
tid
  forall (f :: * -> *) a. Applicative f => a -> f a
pure
    [C.citems|
     typename uint64_t $id:start = 0;
     if (ctx->profiling && !ctx->profiling_paused) {
       $id:start = get_wall_time();
     }
     $items:code
     if (ctx->profiling && !ctx->profiling_paused) {
       typename uint64_t $id:end = get_wall_time();
       typename uint64_t elapsed = $id:end - $id:start;
       $items:(updateFields tid)
     }
     |]
  where
    start :: Name
start = Name
name forall a. Semigroup a => a -> a -> a
<> Name
"_start"
    end :: Name
end = Name
name forall a. Semigroup a => a -> a -> a
<> Name
"_end"
    updateFields :: Maybe a -> [BlockItem]
updateFields Maybe a
Nothing =
      [C.citems|__atomic_fetch_add(&ctx->$id:(functionRuns name), 1, __ATOMIC_RELAXED);
                                            __atomic_fetch_add(&ctx->$id:(functionRuntime name), elapsed, __ATOMIC_RELAXED);
                                            __atomic_fetch_add(&ctx->$id:(functionIter name), iterations, __ATOMIC_RELAXED);|]
    updateFields (Just a
_tid') =
      [C.citems|ctx->$id:(functionRuns name)[tid]++;
                                            ctx->$id:(functionRuntime name)[tid] += elapsed;
                                            ctx->$id:(functionIter name)[tid] += iterations;|]

functionTiming :: Name -> C.Id
functionTiming :: Name -> Id
functionTiming = (forall a. ToIdent a => a -> SrcLoc -> Id
`C.toIdent` forall a. Monoid a => a
mempty) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Semigroup a => a -> a -> a
<> Name
"_total_time")

functionIterations :: Name -> C.Id
functionIterations :: Name -> Id
functionIterations = (forall a. ToIdent a => a -> SrcLoc -> Id
`C.toIdent` forall a. Monoid a => a
mempty) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Semigroup a => a -> a -> a
<> Name
"_total_iter")

addTimingFields :: Name -> GC.CompilerM op s ()
addTimingFields :: forall op s. Name -> CompilerM op s ()
addTimingFields Name
name = do
  forall op s. Id -> Type -> Maybe Exp -> CompilerM op s ()
GC.contextField (Name -> Id
functionTiming Name
name) [C.cty|typename int64_t|] forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just [C.cexp|0|]
  forall op s. Id -> Type -> Maybe Exp -> CompilerM op s ()
GC.contextField (Name -> Id
functionIterations Name
name) [C.cty|typename int64_t|] forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just [C.cexp|0|]

multicoreName :: String -> GC.CompilerM op s Name
multicoreName :: forall op s. String -> CompilerM op s Name
multicoreName String
s = do
  VName
s' <- forall (m :: * -> *). MonadFreshNames m => String -> m VName
newVName (String
"futhark_mc_" forall a. [a] -> [a] -> [a]
++ String
s)
  forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ String -> Name
nameFromString forall a b. (a -> b) -> a -> b
$ VName -> String
baseString VName
s' forall a. [a] -> [a] -> [a]
++ String
"_" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (VName -> Int
baseTag VName
s')

type DefSpecifier s = String -> (Name -> GC.CompilerM Multicore s C.Definition) -> GC.CompilerM Multicore s Name

multicoreDef :: DefSpecifier s
multicoreDef :: forall s. DefSpecifier s
multicoreDef String
s Name -> CompilerM Multicore s Definition
f = do
  Name
s' <- forall op s. String -> CompilerM op s Name
multicoreName String
s
  forall op s. Definition -> CompilerM op s ()
GC.libDecl forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Name -> CompilerM Multicore s Definition
f Name
s'
  forall (f :: * -> *) a. Applicative f => a -> f a
pure Name
s'

generateParLoopFn ::
  C.ToIdent a =>
  M.Map VName Space ->
  String ->
  MCCode ->
  a ->
  [(VName, (C.Type, ValueType))] ->
  [(VName, (C.Type, ValueType))] ->
  GC.CompilerM Multicore s Name
generateParLoopFn :: forall a s.
ToIdent a =>
Map VName Space
-> String
-> MCCode
-> a
-> [(VName, (Type, ValueType))]
-> [(VName, (Type, ValueType))]
-> CompilerM Multicore s Name
generateParLoopFn Map VName Space
lexical String
basename MCCode
code a
fstruct [(VName, (Type, ValueType))]
free [(VName, (Type, ValueType))]
retval = do
  let ([VName]
fargs, [(Type, ValueType)]
fctypes) = forall a b. [(a, b)] -> ([a], [b])
unzip [(VName, (Type, ValueType))]
free
  let ([VName]
retval_args, [(Type, ValueType)]
retval_ctypes) = forall a b. [(a, b)] -> ([a], [b])
unzip [(VName, (Type, ValueType))]
retval
  forall s. DefSpecifier s
multicoreDef String
basename forall a b. (a -> b) -> a -> b
$ \Name
s -> do
    [BlockItem]
fbody <- forall op s.
Name -> Maybe Id -> [BlockItem] -> CompilerM op s [BlockItem]
benchmarkCode Name
s (forall a. a -> Maybe a
Just Id
"tid") forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall op s a. CompilerM op s a -> CompilerM op s a
GC.inNewFunction forall a b. (a -> b) -> a -> b
$
      forall op s a.
Map VName Space
-> ([BlockItem] -> [Stm] -> CompilerM op s a) -> CompilerM op s a
GC.cachingMemory Map VName Space
lexical forall a b. (a -> b) -> a -> b
$ \[BlockItem]
decl_cached [Stm]
free_cached -> forall op s. CompilerM op s () -> CompilerM op s [BlockItem]
GC.collect forall a b. (a -> b) -> a -> b
$ do
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item [C.citems|$decls:(compileGetStructVals fstruct fargs fctypes)|]
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item [C.citems|$decls:(compileGetRetvalStructVals fstruct retval_args retval_ctypes)|]
        [BlockItem]
code' <- forall op s. CompilerM op s () -> CompilerM op s [BlockItem]
GC.collect forall a b. (a -> b) -> a -> b
$ forall op s. Code op -> CompilerM op s ()
GC.compileCode MCCode
code
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item [BlockItem]
decl_cached
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall op s. CompilerM op s [BlockItem]
GC.declAllocatedMem
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item [BlockItem]
code'
        [BlockItem]
free_mem <- forall op s. CompilerM op s [BlockItem]
GC.freeAllocatedMem
        forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|cleanup: {$stms:free_cached $items:free_mem}|]
    forall (f :: * -> *) a. Applicative f => a -> f a
pure
      [C.cedecl|int $id:s(void *args, typename int64_t iterations, int tid, struct scheduler_info info) {
                           int err = 0;
                           int subtask_id = tid;
                           struct $id:fstruct *$id:fstruct = (struct $id:fstruct*) args;
                           struct futhark_context *ctx = $id:fstruct->ctx;
                           $items:fbody
                           if (err == 0) {
                             $stms:(compileWriteBackResVals fstruct retval_args retval_ctypes)
                           }
                           return err;
                      }|]

prepareTaskStruct ::
  DefSpecifier s ->
  String ->
  [VName] ->
  [(C.Type, ValueType)] ->
  [VName] ->
  [(C.Type, ValueType)] ->
  GC.CompilerM Multicore s Name
prepareTaskStruct :: forall s.
DefSpecifier s
-> String
-> [VName]
-> [(Type, ValueType)]
-> [VName]
-> [(Type, ValueType)]
-> CompilerM Multicore s Name
prepareTaskStruct DefSpecifier s
def String
name [VName]
free_args [(Type, ValueType)]
free_ctypes [VName]
retval_args [(Type, ValueType)]
retval_ctypes = do
  let makeStruct :: a -> f Definition
makeStruct a
s =
        forall (f :: * -> *) a. Applicative f => a -> f a
pure
          [C.cedecl|struct $id:s {
                       struct futhark_context *ctx;
                       $sdecls:(compileFreeStructFields free_args free_ctypes)
                       $sdecls:(compileRetvalStructFields retval_args retval_ctypes)
                     };|]
  Name
fstruct <- DefSpecifier s
def String
name forall {f :: * -> *} {a}.
(Applicative f, ToIdent a) =>
a -> f Definition
makeStruct
  let fstruct' :: Name
fstruct' = Name
fstruct forall a. Semigroup a => a -> a -> a
<> Name
"_"
  forall op s. InitGroup -> CompilerM op s ()
GC.decl [C.cdecl|struct $id:fstruct $id:fstruct';|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:fstruct'.ctx = ctx;|]
  forall op s. [Stm] -> CompilerM op s ()
GC.stms [C.cstms|$stms:(compileSetStructValues fstruct' free_args free_ctypes)|]
  forall op s. [Stm] -> CompilerM op s ()
GC.stms [C.cstms|$stms:(compileSetRetvalStructValues fstruct' retval_args retval_ctypes)|]
  forall (f :: * -> *) a. Applicative f => a -> f a
pure Name
fstruct

-- Generate a segop function for top_level and potentially nested SegOp code
compileOp :: GC.OpCompiler Multicore s
compileOp :: forall s. OpCompiler Multicore s
compileOp (GetLoopBounds VName
start VName
end) = do
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:start = start;|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:end = end;|]
compileOp (GetTaskId VName
v) =
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:v = subtask_id;|]
compileOp (GetNumTasks VName
v) =
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:v = info.nsubtasks;|]
compileOp (SegOp String
name [Param]
params ParallelTask
seq_task Maybe ParallelTask
par_task [Param]
retvals (SchedulerInfo Exp
e Scheduling
sched)) = do
  let (ParallelTask MCCode
seq_code) = ParallelTask
seq_task
  [(Type, ValueType)]
free_ctypes <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall op s. Param -> CompilerM op s (Type, ValueType)
paramToCType [Param]
params
  [(Type, ValueType)]
retval_ctypes <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall op s. Param -> CompilerM op s (Type, ValueType)
paramToCType [Param]
retvals
  let free_args :: [VName]
free_args = forall a b. (a -> b) -> [a] -> [b]
map Param -> VName
paramName [Param]
params
      retval_args :: [VName]
retval_args = forall a b. (a -> b) -> [a] -> [b]
map Param -> VName
paramName [Param]
retvals
      free :: [(VName, (Type, ValueType))]
free = forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
free_args [(Type, ValueType)]
free_ctypes
      retval :: [(VName, (Type, ValueType))]
retval = forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
retval_args [(Type, ValueType)]
retval_ctypes

  Exp
e' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
e

  let lexical :: Map VName Space
lexical = KernelHandling -> Function Multicore -> Map VName Space
lexicalMemoryUsageMC KernelHandling
TraverseKernels forall a b. (a -> b) -> a -> b
$ forall a.
Maybe EntryPoint -> [Param] -> [Param] -> Code a -> FunctionT a
Function forall a. Maybe a
Nothing [] [Param]
params MCCode
seq_code

  Name
fstruct <-
    forall s.
DefSpecifier s
-> String
-> [VName]
-> [(Type, ValueType)]
-> [VName]
-> [(Type, ValueType)]
-> CompilerM Multicore s Name
prepareTaskStruct forall s. DefSpecifier s
multicoreDef String
"task" [VName]
free_args [(Type, ValueType)]
free_ctypes [VName]
retval_args [(Type, ValueType)]
retval_ctypes

  Name
fpar_task <- forall a s.
ToIdent a =>
Map VName Space
-> String
-> MCCode
-> a
-> [(VName, (Type, ValueType))]
-> [(VName, (Type, ValueType))]
-> CompilerM Multicore s Name
generateParLoopFn Map VName Space
lexical (String
name forall a. [a] -> [a] -> [a]
++ String
"_task") MCCode
seq_code Name
fstruct [(VName, (Type, ValueType))]
free [(VName, (Type, ValueType))]
retval
  forall op s. Name -> CompilerM op s ()
addTimingFields Name
fpar_task

  let ftask_name :: Name
ftask_name = Name
fstruct forall a. Semigroup a => a -> a -> a
<> Name
"_task"
  forall op s. InitGroup -> CompilerM op s ()
GC.decl [C.cdecl|struct scheduler_segop $id:ftask_name;|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.args = &$id:(fstruct <> "_");|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.top_level_fn = $id:fpar_task;|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.name = $string:(nameToString fpar_task);|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.iterations = $exp:e';|]
  -- Create the timing fields for the task
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.task_time = &ctx->$id:(functionTiming fpar_task);|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.task_iter = &ctx->$id:(functionIterations fpar_task);|]

  case Scheduling
sched of
    Scheduling
Dynamic -> forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.sched = DYNAMIC;|]
    Scheduling
Static -> forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.sched = STATIC;|]

  -- Generate the nested segop function if available
  [(Name, Bool)]
fnpar_task <- case Maybe ParallelTask
par_task of
    Just (ParallelTask MCCode
nested_code) -> do
      let lexical_nested :: Map VName Space
lexical_nested = KernelHandling -> Function Multicore -> Map VName Space
lexicalMemoryUsageMC KernelHandling
TraverseKernels forall a b. (a -> b) -> a -> b
$ forall a.
Maybe EntryPoint -> [Param] -> [Param] -> Code a -> FunctionT a
Function forall a. Maybe a
Nothing [] [Param]
params MCCode
nested_code
      Name
fnpar_task <- forall a s.
ToIdent a =>
Map VName Space
-> String
-> MCCode
-> a
-> [(VName, (Type, ValueType))]
-> [(VName, (Type, ValueType))]
-> CompilerM Multicore s Name
generateParLoopFn Map VName Space
lexical_nested (String
name forall a. [a] -> [a] -> [a]
++ String
"_nested_task") MCCode
nested_code Name
fstruct [(VName, (Type, ValueType))]
free [(VName, (Type, ValueType))]
retval
      forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.nested_fn = $id:fnpar_task;|]
      forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a b. [a] -> [b] -> [(a, b)]
zip [Name
fnpar_task] [Bool
True]
    Maybe ParallelTask
Nothing -> do
      forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.nested_fn=NULL;|]
      forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Monoid a => a
mempty

  [BlockItem]
free_all_mem <- forall op s. CompilerM op s [BlockItem]
GC.freeAllocatedMem
  let ftask_err :: Name
ftask_err = Name
fpar_task forall a. Semigroup a => a -> a -> a
<> Name
"_err"
      code :: [BlockItem]
code =
        [C.citems|int $id:ftask_err = scheduler_prepare_task(&ctx->scheduler, &$id:ftask_name);
                  if ($id:ftask_err != 0) {
                    $items:free_all_mem;
                    err = $id:ftask_err;
                    goto cleanup;
                  }|]

  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item [BlockItem]
code

  -- Add profile fields for -P option
  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.profileReport forall a b. (a -> b) -> a -> b
$ [(Name, Bool)] -> [BlockItem]
multiCoreReport forall a b. (a -> b) -> a -> b
$ (Name
fpar_task, Bool
True) forall a. a -> [a] -> [a]
: [(Name, Bool)]
fnpar_task
compileOp (ParLoop String
s' MCCode
body [Param]
free) = do
  [(Type, ValueType)]
free_ctypes <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall op s. Param -> CompilerM op s (Type, ValueType)
paramToCType [Param]
free
  let free_args :: [VName]
free_args = forall a b. (a -> b) -> [a] -> [b]
map Param -> VName
paramName [Param]
free

  let lexical :: Map VName Space
lexical = KernelHandling -> Function Multicore -> Map VName Space
lexicalMemoryUsageMC KernelHandling
TraverseKernels forall a b. (a -> b) -> a -> b
$ forall a.
Maybe EntryPoint -> [Param] -> [Param] -> Code a -> FunctionT a
Function forall a. Maybe a
Nothing [] [Param]
free MCCode
body

  Name
fstruct <-
    forall s.
DefSpecifier s
-> String
-> [VName]
-> [(Type, ValueType)]
-> [VName]
-> [(Type, ValueType)]
-> CompilerM Multicore s Name
prepareTaskStruct forall s. DefSpecifier s
multicoreDef (String
s' forall a. [a] -> [a] -> [a]
++ String
"_parloop_struct") [VName]
free_args [(Type, ValueType)]
free_ctypes forall a. Monoid a => a
mempty forall a. Monoid a => a
mempty

  Name
ftask <- forall s. DefSpecifier s
multicoreDef (String
s' forall a. [a] -> [a] -> [a]
++ String
"_parloop") forall a b. (a -> b) -> a -> b
$ \Name
s -> do
    [BlockItem]
fbody <- forall op s.
Name -> Maybe Id -> [BlockItem] -> CompilerM op s [BlockItem]
benchmarkCode Name
s (forall a. a -> Maybe a
Just Id
"tid") forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall op s a. CompilerM op s a -> CompilerM op s a
GC.inNewFunction forall a b. (a -> b) -> a -> b
$
      forall op s a.
Map VName Space
-> ([BlockItem] -> [Stm] -> CompilerM op s a) -> CompilerM op s a
GC.cachingMemory Map VName Space
lexical forall a b. (a -> b) -> a -> b
$ \[BlockItem]
decl_cached [Stm]
free_cached -> forall op s. CompilerM op s () -> CompilerM op s [BlockItem]
GC.collect forall a b. (a -> b) -> a -> b
$ do
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_
          forall op s. BlockItem -> CompilerM op s ()
GC.item
          [C.citems|$decls:(compileGetStructVals fstruct free_args free_ctypes)|]

        forall op s. InitGroup -> CompilerM op s ()
GC.decl [C.cdecl|typename int64_t iterations = end-start;|]

        [BlockItem]
body' <- forall op s. CompilerM op s () -> CompilerM op s [BlockItem]
GC.collect forall a b. (a -> b) -> a -> b
$ forall op s. Code op -> CompilerM op s ()
GC.compileCode MCCode
body

        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item [BlockItem]
decl_cached
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall op s. CompilerM op s [BlockItem]
GC.declAllocatedMem
        [BlockItem]
free_mem <- forall op s. CompilerM op s [BlockItem]
GC.freeAllocatedMem
        forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item [BlockItem]
body'
        forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|cleanup: {$stms:free_cached $items:free_mem}|]
    forall (f :: * -> *) a. Applicative f => a -> f a
pure
      [C.cedecl|static int $id:s(void *args,
                                 typename int64_t start,
                                 typename int64_t end,
                                 int subtask_id,
                                 int tid) {
                       int err = 0;
                       struct $id:fstruct *$id:fstruct = (struct $id:fstruct*) args;
                       struct futhark_context *ctx = $id:fstruct->ctx;
                       $items:fbody
                       return err;
                }|]

  let ftask_name :: Name
ftask_name = Name
ftask forall a. Semigroup a => a -> a -> a
<> Name
"_task"
  forall op s. InitGroup -> CompilerM op s ()
GC.decl [C.cdecl|struct scheduler_parloop $id:ftask_name;|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.name = $string:(nameToString ftask);|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.fn = $id:ftask;|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.args = &$id:(fstruct <> "_");|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.iterations = iterations;|]
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:ftask_name.info = info;|]

  let ftask_err :: Name
ftask_err = Name
ftask forall a. Semigroup a => a -> a -> a
<> Name
"_err"
      ftask_total :: Name
ftask_total = Name
ftask forall a. Semigroup a => a -> a -> a
<> Name
"_total"
  [BlockItem]
code' <-
    forall op s.
Name -> Maybe Id -> [BlockItem] -> CompilerM op s [BlockItem]
benchmarkCode
      Name
ftask_total
      forall a. Maybe a
Nothing
      [C.citems|int $id:ftask_err = scheduler_execute_task(&ctx->scheduler,
                                                           &$id:ftask_name);
               if ($id:ftask_err != 0) {
                 err = $id:ftask_err;
                 goto cleanup;
               }|]

  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.item [BlockItem]
code'
  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.profileReport forall a b. (a -> b) -> a -> b
$ [(Name, Bool)] -> [BlockItem]
multiCoreReport forall a b. (a -> b) -> a -> b
$ forall a b. [a] -> [b] -> [(a, b)]
zip [Name
ftask, Name
ftask_total] [Bool
True, Bool
False]
compileOp (Atomic AtomicOp
aop) =
  forall op s.
AtomicOp
-> (Type -> VName -> CompilerM op s Type) -> CompilerM op s ()
atomicOps AtomicOp
aop (\Type
ty VName
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure [C.cty|$ty:ty*|])
compileOp (ISPCKernel MCCode
body [Param]
_) =
  forall s. MCCode -> CompilerM Multicore s ()
scopedBlock MCCode
body
compileOp (ForEach VName
i Exp
from Exp
bound MCCode
body) = do
  let i' :: SrcLoc -> Id
i' = forall a. ToIdent a => a -> SrcLoc -> Id
C.toIdent VName
i
      t :: Type
t = PrimType -> Type
primTypeToCType forall a b. (a -> b) -> a -> b
$ forall v. PrimExp v -> PrimType
primExpType Exp
bound
  Exp
from' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
from
  Exp
bound' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
bound
  [BlockItem]
body' <- forall op s. CompilerM op s () -> CompilerM op s [BlockItem]
GC.collect forall a b. (a -> b) -> a -> b
$ forall op s. Code op -> CompilerM op s ()
GC.compileCode MCCode
body
  forall op s. Stm -> CompilerM op s ()
GC.stm
    [C.cstm|for ($ty:t $id:i' = $exp:from'; $id:i' < $exp:bound'; $id:i'++) {
            $items:body'
          }|]
compileOp (ForEachActive VName
i MCCode
body) = do
  forall op s. InitGroup -> CompilerM op s ()
GC.decl [C.cdecl|typename int64_t $id:i = 0;|]
  forall s. MCCode -> CompilerM Multicore s ()
scopedBlock MCCode
body
compileOp (ExtractLane VName
dest Exp
tar Exp
_) = do
  Exp
tar' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
tar
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:dest = $exp:tar';|]

scopedBlock :: MCCode -> GC.CompilerM Multicore s ()
scopedBlock :: forall s. MCCode -> CompilerM Multicore s ()
scopedBlock MCCode
code = do
  [BlockItem]
inner <- forall op s. CompilerM op s () -> CompilerM op s [BlockItem]
GC.collect forall a b. (a -> b) -> a -> b
$ forall op s. Code op -> CompilerM op s ()
GC.compileCode MCCode
code
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|{$items:inner}|]

doAtomic ::
  (C.ToIdent a1) =>
  a1 ->
  VName ->
  Count u (TExp Int32) ->
  Exp ->
  String ->
  C.Type ->
  (C.Type -> VName -> GC.CompilerM op s C.Type) ->
  GC.CompilerM op s ()
doAtomic :: forall {k} a1 (u :: k) op s.
ToIdent a1 =>
a1
-> VName
-> Count u (TExp Int32)
-> Exp
-> String
-> Type
-> (Type -> VName -> CompilerM op s Type)
-> CompilerM op s ()
doAtomic a1
old VName
arr Count u (TExp Int32)
ind Exp
val String
op Type
ty Type -> VName -> CompilerM op s Type
castf = do
  Exp
ind' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp forall a b. (a -> b) -> a -> b
$ forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped forall a b. (a -> b) -> a -> b
$ forall {k} (u :: k) e. Count u e -> e
unCount Count u (TExp Int32)
ind
  Exp
val' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
val
  Type
cast <- Type -> VName -> CompilerM op s Type
castf Type
ty VName
arr
  Exp
arr' <- forall op s. VName -> CompilerM op s Exp
GC.rawMem VName
arr
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:old = $id:op(&(($ty:cast)$exp:arr')[$exp:ind'], ($ty:ty) $exp:val', __ATOMIC_RELAXED);|]

atomicOps :: AtomicOp -> (C.Type -> VName -> GC.CompilerM op s C.Type) -> GC.CompilerM op s ()
atomicOps :: forall op s.
AtomicOp
-> (Type -> VName -> CompilerM op s Type) -> CompilerM op s ()
atomicOps (AtomicCmpXchg PrimType
t VName
old VName
arr Count Elements (TExp Int32)
ind VName
res Exp
val) Type -> VName -> CompilerM op s Type
castf = do
  Exp
ind' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp forall a b. (a -> b) -> a -> b
$ forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped forall a b. (a -> b) -> a -> b
$ forall {k} (u :: k) e. Count u e -> e
unCount Count Elements (TExp Int32)
ind
  Exp
new_val' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
val
  Type
cast <- Type -> VName -> CompilerM op s Type
castf [C.cty|$ty:(GC.primTypeToCType t)|] VName
arr
  Exp
arr' <- forall op s. VName -> CompilerM op s Exp
GC.rawMem VName
arr
  forall op s. Stm -> CompilerM op s ()
GC.stm
    [C.cstm|$id:res = $id:op(&(($ty:cast)$exp:arr')[$exp:ind'],
                 &$id:old,
                 $exp:new_val',
                 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);|]
  where
    op :: String
    op :: String
op = String
"__atomic_compare_exchange_n"
atomicOps (AtomicXchg PrimType
t VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val) Type -> VName -> CompilerM op s Type
castf = do
  Exp
ind' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp forall a b. (a -> b) -> a -> b
$ forall {k} (t :: k) v. TPrimExp t v -> PrimExp v
untyped forall a b. (a -> b) -> a -> b
$ forall {k} (u :: k) e. Count u e -> e
unCount Count Elements (TExp Int32)
ind
  Exp
val' <- forall op s. Exp -> CompilerM op s Exp
GC.compileExp Exp
val
  Type
cast <- Type -> VName -> CompilerM op s Type
castf [C.cty|$ty:(GC.primTypeToCType t)|] VName
arr
  forall op s. Stm -> CompilerM op s ()
GC.stm [C.cstm|$id:old = $id:op(&(($ty:cast)$id:arr.mem)[$exp:ind'], $exp:val', __ATOMIC_SEQ_CST);|]
  where
    op :: String
    op :: String
op = String
"__atomic_exchange_n"
atomicOps (AtomicAdd IntType
t VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val) Type -> VName -> CompilerM op s Type
castf =
  forall {k} a1 (u :: k) op s.
ToIdent a1 =>
a1
-> VName
-> Count u (TExp Int32)
-> Exp
-> String
-> Type
-> (Type -> VName -> CompilerM op s Type)
-> CompilerM op s ()
doAtomic VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val String
"__atomic_fetch_add" [C.cty|$ty:(GC.intTypeToCType t)|] Type -> VName -> CompilerM op s Type
castf
atomicOps (AtomicSub IntType
t VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val) Type -> VName -> CompilerM op s Type
castf =
  forall {k} a1 (u :: k) op s.
ToIdent a1 =>
a1
-> VName
-> Count u (TExp Int32)
-> Exp
-> String
-> Type
-> (Type -> VName -> CompilerM op s Type)
-> CompilerM op s ()
doAtomic VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val String
"__atomic_fetch_sub" [C.cty|$ty:(GC.intTypeToCType t)|] Type -> VName -> CompilerM op s Type
castf
atomicOps (AtomicAnd IntType
t VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val) Type -> VName -> CompilerM op s Type
castf =
  forall {k} a1 (u :: k) op s.
ToIdent a1 =>
a1
-> VName
-> Count u (TExp Int32)
-> Exp
-> String
-> Type
-> (Type -> VName -> CompilerM op s Type)
-> CompilerM op s ()
doAtomic VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val String
"__atomic_fetch_and" [C.cty|$ty:(GC.intTypeToCType t)|] Type -> VName -> CompilerM op s Type
castf
atomicOps (AtomicOr IntType
t VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val) Type -> VName -> CompilerM op s Type
castf =
  forall {k} a1 (u :: k) op s.
ToIdent a1 =>
a1
-> VName
-> Count u (TExp Int32)
-> Exp
-> String
-> Type
-> (Type -> VName -> CompilerM op s Type)
-> CompilerM op s ()
doAtomic VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val String
"__atomic_fetch_or" [C.cty|$ty:(GC.intTypeToCType t)|] Type -> VName -> CompilerM op s Type
castf
atomicOps (AtomicXor IntType
t VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val) Type -> VName -> CompilerM op s Type
castf =
  forall {k} a1 (u :: k) op s.
ToIdent a1 =>
a1
-> VName
-> Count u (TExp Int32)
-> Exp
-> String
-> Type
-> (Type -> VName -> CompilerM op s Type)
-> CompilerM op s ()
doAtomic VName
old VName
arr Count Elements (TExp Int32)
ind Exp
val String
"__atomic_fetch_xor" [C.cty|$ty:(GC.intTypeToCType t)|] Type -> VName -> CompilerM op s Type
castf