{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}

module Futhark.CodeGen.Backends.COpenCL.Boilerplate
  ( generateBoilerplate,
    profilingEvent,
    copyDevToDev,
    copyDevToHost,
    copyHostToDev,
    copyScalarToDev,
    copyScalarFromDev,
    commonOptions,
    failureSwitch,
    costCentreReport,
    kernelRuntime,
    kernelRuns,
    sizeLoggingCode,
  )
where

import Control.Monad.State
import qualified Data.Map as M
import Data.Maybe
import qualified Data.Text as T
import qualified Futhark.CodeGen.Backends.GenericC as GC
import Futhark.CodeGen.Backends.GenericC.Options
import Futhark.CodeGen.ImpCode.OpenCL
import Futhark.CodeGen.OpenCL.Heuristics
import Futhark.CodeGen.RTS.C (freeListH, openclH)
import Futhark.Util (chunk, zEncodeString)
import Futhark.Util.Pretty (prettyOneLine)
import qualified Language.C.Quote.OpenCL as C
import qualified Language.C.Syntax as C

errorMsgNumArgs :: ErrorMsg a -> Int
errorMsgNumArgs :: ErrorMsg a -> Int
errorMsgNumArgs = [PrimType] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([PrimType] -> Int)
-> (ErrorMsg a -> [PrimType]) -> ErrorMsg a -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorMsg a -> [PrimType]
forall a. ErrorMsg a -> [PrimType]
errorMsgArgTypes

failureSwitch :: [FailureMsg] -> C.Stm
failureSwitch :: [FailureMsg] -> Stm
failureSwitch [FailureMsg]
failures =
  let printfEscape :: [Char] -> [Char]
printfEscape =
        let escapeChar :: Char -> [Char]
escapeChar Char
'%' = [Char]
"%%"
            escapeChar Char
c = [Char
c]
         in (Char -> [Char]) -> [Char] -> [Char]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> [Char]
escapeChar
      onPart :: ErrorMsgPart a -> [Char]
onPart (ErrorString [Char]
s) = [Char] -> [Char]
printfEscape [Char]
s
      -- FIXME: bogus for non-ints.
      onPart ErrorVal {} = [Char]
"%lld"
      onFailure :: a -> FailureMsg -> Stm
onFailure a
i (FailureMsg emsg :: ErrorMsg Exp
emsg@(ErrorMsg [ErrorMsgPart Exp]
parts) [Char]
backtrace) =
        let msg :: [Char]
msg = (ErrorMsgPart Exp -> [Char]) -> [ErrorMsgPart Exp] -> [Char]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ErrorMsgPart Exp -> [Char]
forall a. ErrorMsgPart a -> [Char]
onPart [ErrorMsgPart Exp]
parts [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [Char]
printfEscape [Char]
backtrace
            msgargs :: [Exp]
msgargs = [[C.cexp|args[$int:j]|] | Int
j <- [Int
0 .. ErrorMsg Exp -> Int
forall a. ErrorMsg a -> Int
errorMsgNumArgs ErrorMsg Exp
emsg Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]]
         in [C.cstm|case $int:i: {ctx->error = msgprintf($string:msg, $args:msgargs); break;}|]
      failure_cases :: [Stm]
failure_cases =
        (Int -> FailureMsg -> Stm) -> [Int] -> [FailureMsg] -> [Stm]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> FailureMsg -> Stm
forall a. (Show a, Integral a) => a -> FailureMsg -> Stm
onFailure [(Int
0 :: Int) ..] [FailureMsg]
failures
   in [C.cstm|switch (failure_idx) { $stms:failure_cases }|]

copyDevToDev, copyDevToHost, copyHostToDev, copyScalarToDev, copyScalarFromDev :: Name
copyDevToDev :: Name
copyDevToDev = Name
"copy_dev_to_dev"
copyDevToHost :: Name
copyDevToHost = Name
"copy_dev_to_host"
copyHostToDev :: Name
copyHostToDev = Name
"copy_host_to_dev"
copyScalarToDev :: Name
copyScalarToDev = Name
"copy_scalar_to_dev"
copyScalarFromDev :: Name
copyScalarFromDev = Name
"copy_scalar_from_dev"

profilingEvent :: Name -> C.Exp
profilingEvent :: Name -> Exp
profilingEvent Name
name =
  [C.cexp|(ctx->profiling_paused || !ctx->profiling) ? NULL
          : opencl_get_event(&ctx->opencl,
                             &ctx->$id:(kernelRuns name),
                             &ctx->$id:(kernelRuntime name))|]

-- | Called after most code has been generated to generate the bulk of
-- the boilerplate.
generateBoilerplate ::
  T.Text ->
  T.Text ->
  [Name] ->
  M.Map KernelName KernelSafety ->
  [PrimType] ->
  M.Map Name SizeClass ->
  [FailureMsg] ->
  GC.CompilerM OpenCL () ()
generateBoilerplate :: Text
-> Text
-> [Name]
-> Map Name KernelSafety
-> [PrimType]
-> Map Name SizeClass
-> [FailureMsg]
-> CompilerM OpenCL () ()
generateBoilerplate Text
opencl_code Text
opencl_prelude [Name]
cost_centres Map Name KernelSafety
kernels [PrimType]
types Map Name SizeClass
sizes [FailureMsg]
failures = do
  [Stm]
final_inits <- CompilerM OpenCL () [Stm]
forall op s. CompilerM op s [Stm]
GC.contextFinalInits

  let ([FieldGroup]
ctx_opencl_fields, [Stm]
ctx_opencl_inits, [Definition]
top_decls, [Definition]
later_top_decls) =
        [Name]
-> Map Name KernelSafety
-> Text
-> ([FieldGroup], [Stm], [Definition], [Definition])
openClDecls [Name]
cost_centres Map Name KernelSafety
kernels (Text
opencl_prelude Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
opencl_code)

  (Definition -> CompilerM OpenCL () ())
-> [Definition] -> CompilerM OpenCL () ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [Definition]
top_decls

  let size_name_inits :: [Initializer]
size_name_inits = (Name -> Initializer) -> [Name] -> [Initializer]
forall a b. (a -> b) -> [a] -> [b]
map (\Name
k -> [C.cinit|$string:(pretty k)|]) ([Name] -> [Initializer]) -> [Name] -> [Initializer]
forall a b. (a -> b) -> a -> b
$ Map Name SizeClass -> [Name]
forall k a. Map k a -> [k]
M.keys Map Name SizeClass
sizes
      size_var_inits :: [Initializer]
size_var_inits = (Name -> Initializer) -> [Name] -> [Initializer]
forall a b. (a -> b) -> [a] -> [b]
map (\Name
k -> [C.cinit|$string:(zEncodeString (pretty k))|]) ([Name] -> [Initializer]) -> [Name] -> [Initializer]
forall a b. (a -> b) -> a -> b
$ Map Name SizeClass -> [Name]
forall k a. Map k a -> [k]
M.keys Map Name SizeClass
sizes
      size_class_inits :: [Initializer]
size_class_inits = (SizeClass -> Initializer) -> [SizeClass] -> [Initializer]
forall a b. (a -> b) -> [a] -> [b]
map (\SizeClass
c -> [C.cinit|$string:(pretty c)|]) ([SizeClass] -> [Initializer]) -> [SizeClass] -> [Initializer]
forall a b. (a -> b) -> a -> b
$ Map Name SizeClass -> [SizeClass]
forall k a. Map k a -> [a]
M.elems Map Name SizeClass
sizes
      num_sizes :: Int
num_sizes = Map Name SizeClass -> Int
forall k a. Map k a -> Int
M.size Map Name SizeClass
sizes

  Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_names[] = { $inits:size_name_inits };|]
  Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_vars[] = { $inits:size_var_inits };|]
  Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_classes[] = { $inits:size_class_inits };|]

  let size_decls :: [FieldGroup]
size_decls = (Name -> FieldGroup) -> [Name] -> [FieldGroup]
forall a b. (a -> b) -> [a] -> [b]
map (\Name
k -> [C.csdecl|typename int64_t *$id:k;|]) ([Name] -> [FieldGroup]) -> [Name] -> [FieldGroup]
forall a b. (a -> b) -> a -> b
$ Map Name SizeClass -> [Name]
forall k a. Map k a -> [k]
M.keys Map Name SizeClass
sizes
  Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|struct tuning_params { $sdecls:size_decls };|]
  [Char]
cfg <- [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () [Char]
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s [Char]
GC.publicDef [Char]
"context_config" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition))
 -> CompilerM OpenCL () [Char])
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () [Char]
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|struct $id:s;|],
      [C.cedecl|struct $id:s { int in_use;
                               struct opencl_config opencl;
                               typename int64_t tuning_params[$int:num_sizes];
                               int num_build_opts;
                               const char **build_opts;
                            };|]
    )

  let size_value_inits :: [Stm]
size_value_inits = (Int -> SizeClass -> Stm) -> [Int] -> [SizeClass] -> [Stm]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> SizeClass -> Stm
forall a. (Show a, Integral a) => a -> SizeClass -> Stm
sizeInit [Int
0 .. Map Name SizeClass -> Int
forall k a. Map k a -> Int
M.size Map Name SizeClass
sizes Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1] (Map Name SizeClass -> [SizeClass]
forall k a. Map k a -> [a]
M.elems Map Name SizeClass
sizes)
      sizeInit :: a -> SizeClass -> Stm
sizeInit a
i SizeClass
size = [C.cstm|cfg->tuning_params[$int:i] = $int:val;|]
        where
          val :: Int64
val = Int64 -> Maybe Int64 -> Int64
forall a. a -> Maybe a -> a
fromMaybe Int64
0 (Maybe Int64 -> Int64) -> Maybe Int64 -> Int64
forall a b. (a -> b) -> a -> b
$ SizeClass -> Maybe Int64
sizeDefault SizeClass
size
  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_new" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|struct $id:cfg* $id:s(void);|],
      [C.cedecl|struct $id:cfg* $id:s(void) {
                         struct $id:cfg *cfg = (struct $id:cfg*) malloc(sizeof(struct $id:cfg));
                         if (cfg == NULL) {
                           return NULL;
                         }

                         cfg->in_use = 0;
                         cfg->num_build_opts = 0;
                         cfg->build_opts = (const char**) malloc(sizeof(const char*));
                         cfg->build_opts[0] = NULL;
                         $stms:size_value_inits
                         opencl_config_init(&cfg->opencl, $int:num_sizes,
                                            tuning_param_names, tuning_param_vars,
                                            cfg->tuning_params, tuning_param_classes);
                         return cfg;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_free" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg) {
                         assert(!cfg->in_use);
                         free(cfg->build_opts);
                         free(cfg);
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_add_build_option" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *opt);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *opt) {
                         cfg->build_opts[cfg->num_build_opts] = opt;
                         cfg->num_build_opts++;
                         cfg->build_opts = (const char**) realloc(cfg->build_opts, (cfg->num_build_opts+1) * sizeof(const char*));
                         cfg->build_opts[cfg->num_build_opts] = NULL;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_debugging" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag) {
                         cfg->opencl.profiling = cfg->opencl.logging = cfg->opencl.debugging = flag;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_profiling" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag) {
                         cfg->opencl.profiling = flag;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_logging" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag) {
                         cfg->opencl.logging = flag;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_device" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *s);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *s) {
                         set_preferred_device(&cfg->opencl, s);
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_platform" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *s);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *s) {
                         set_preferred_platform(&cfg->opencl, s);
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_select_device_interactively" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg) {
                         select_device_interactively(&cfg->opencl);
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_list_devices" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg) {
                         (void)cfg;
                         list_devices();
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_dump_program_to" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path) {
                         cfg->opencl.dump_program_to = path;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_load_program_from" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path) {
                         cfg->opencl.load_program_from = path;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_dump_binary_to" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path) {
                         cfg->opencl.dump_binary_to = path;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_load_binary_from" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path) {
                         cfg->opencl.load_binary_from = path;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_default_group_size" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int size);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int size) {
                         cfg->opencl.default_group_size = size;
                         cfg->opencl.default_group_size_changed = 1;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_default_num_groups" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int num);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int num) {
                         cfg->opencl.default_num_groups = num;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_default_tile_size" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int num);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int size) {
                         cfg->opencl.default_tile_size = size;
                         cfg->opencl.default_tile_size_changed = 1;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_default_reg_tile_size" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int num);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int size) {
                         cfg->opencl.default_reg_tile_size = size;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_default_threshold" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:cfg* cfg, int num);|],
      [C.cedecl|void $id:s(struct $id:cfg* cfg, int size) {
                         cfg->opencl.default_threshold = size;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_config_set_tuning_param" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|int $id:s(struct $id:cfg* cfg, const char *param_name, size_t new_value);|],
      [C.cedecl|int $id:s(struct $id:cfg* cfg, const char *param_name, size_t new_value) {

                         for (int i = 0; i < $int:num_sizes; i++) {
                           if (strcmp(param_name, tuning_param_names[i]) == 0) {
                             cfg->tuning_params[i] = new_value;
                             return 0;
                           }
                         }

                         if (strcmp(param_name, "default_group_size") == 0) {
                           cfg->opencl.default_group_size = new_value;
                           return 0;
                         }

                         if (strcmp(param_name, "default_num_groups") == 0) {
                           cfg->opencl.default_num_groups = new_value;
                           return 0;
                         }

                         if (strcmp(param_name, "default_threshold") == 0) {
                           cfg->opencl.default_threshold = new_value;
                           return 0;
                         }

                         if (strcmp(param_name, "default_tile_size") == 0) {
                           cfg->opencl.default_tile_size = new_value;
                           return 0;
                         }

                         if (strcmp(param_name, "default_reg_tile_size") == 0) {
                           cfg->opencl.default_reg_tile_size = new_value;
                           return 0;
                         }

                         return 1;
                       }|]
    )

  ([FieldGroup]
fields, [Stm]
init_fields, [Stm]
free_fields) <- CompilerM OpenCL () ([FieldGroup], [Stm], [Stm])
forall op s. CompilerM op s ([FieldGroup], [Stm], [Stm])
GC.contextContents
  [Char]
ctx <- [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () [Char]
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s [Char]
GC.publicDef [Char]
"context" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition))
 -> CompilerM OpenCL () [Char])
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () [Char]
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|struct $id:s;|],
      [C.cedecl|struct $id:s {
                         struct $id:cfg* cfg;
                         int detail_memory;
                         int debugging;
                         int profiling;
                         int profiling_paused;
                         int logging;
                         typename lock_t lock;
                         char *error;
                         typename FILE *log;
                         $sdecls:fields
                         $sdecls:ctx_opencl_fields
                         typename cl_mem global_failure;
                         typename cl_mem global_failure_args;
                         struct opencl_context opencl;
                         struct tuning_params tuning_params;
                         // True if a potentially failing kernel has been enqueued.
                         typename cl_int failure_is_an_option;
                       };|]
    )

  (Definition -> CompilerM OpenCL () ())
-> [Definition] -> CompilerM OpenCL () ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [Definition]
later_top_decls

  Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl
    [C.cedecl|static void init_context_early(struct $id:cfg *cfg, struct $id:ctx* ctx) {
                     ctx->opencl.cfg = cfg->opencl;
                     ctx->detail_memory = cfg->opencl.debugging;
                     ctx->debugging = cfg->opencl.debugging;
                     ctx->profiling = cfg->opencl.profiling;
                     ctx->profiling_paused = 0;
                     ctx->logging = cfg->opencl.logging;
                     ctx->error = NULL;
                     ctx->log = stderr;
                     ctx->opencl.profiling_records_capacity = 200;
                     ctx->opencl.profiling_records_used = 0;
                     ctx->opencl.profiling_records =
                       malloc(ctx->opencl.profiling_records_capacity *
                              sizeof(struct profiling_record));
                     create_lock(&ctx->lock);

                     ctx->failure_is_an_option = 0;
                     $stms:init_fields
                     $stms:ctx_opencl_inits
  }|]

  let set_tuning_params :: [Stm]
set_tuning_params =
        (Int -> Name -> Stm) -> [Int] -> [Name] -> [Stm]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith
          (\Int
i Name
k -> [C.cstm|ctx->tuning_params.$id:k = &cfg->tuning_params[$int:i];|])
          [(Int
0 :: Int) ..]
          ([Name] -> [Stm]) -> [Name] -> [Stm]
forall a b. (a -> b) -> a -> b
$ Map Name SizeClass -> [Name]
forall k a. Map k a -> [k]
M.keys Map Name SizeClass
sizes
      max_failure_args :: Int
max_failure_args =
        (Int -> Int -> Int) -> Int -> [Int] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
0 ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (FailureMsg -> Int) -> [FailureMsg] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (ErrorMsg Exp -> Int
forall a. ErrorMsg a -> Int
errorMsgNumArgs (ErrorMsg Exp -> Int)
-> (FailureMsg -> ErrorMsg Exp) -> FailureMsg -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FailureMsg -> ErrorMsg Exp
failureError) [FailureMsg]
failures

  Definition -> CompilerM OpenCL () ()
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl
    [C.cedecl|static int init_context_late(struct $id:cfg *cfg, struct $id:ctx* ctx, typename cl_program prog) {
                     typename cl_int error;

                     typename cl_int no_error = -1;
                     ctx->global_failure =
                       clCreateBuffer(ctx->opencl.ctx,
                                      CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                      sizeof(cl_int), &no_error, &error);
                     OPENCL_SUCCEED_OR_RETURN(error);

                     // The +1 is to avoid zero-byte allocations.
                     ctx->global_failure_args =
                       clCreateBuffer(ctx->opencl.ctx,
                                      CL_MEM_READ_WRITE,
                                      sizeof(int64_t)*($int:max_failure_args+1), NULL, &error);
                     OPENCL_SUCCEED_OR_RETURN(error);

                     // Load all the kernels.
                     $stms:(map loadKernel (M.toList kernels))

                     $stms:final_inits
                     $stms:set_tuning_params

                     init_constants(ctx);
                     // Clear the free list of any deallocations that occurred while initialising constants.
                     OPENCL_SUCCEED_OR_RETURN(opencl_free_all(&ctx->opencl));

                     // The program will be properly freed after all the kernels have also been freed.
                     OPENCL_SUCCEED_OR_RETURN(clReleaseProgram(prog));

                     return futhark_context_sync(ctx);
  }|]

  let set_required_types :: [Stm]
set_required_types =
        [ [C.cstm|required_types |= OPENCL_F64; |]
          | FloatType -> PrimType
FloatType FloatType
Float64 PrimType -> [PrimType] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [PrimType]
types
        ]

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_new" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg);|],
      [C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg) {
                          assert(!cfg->in_use);
                          struct $id:ctx* ctx = (struct $id:ctx*) malloc(sizeof(struct $id:ctx));
                          if (ctx == NULL) {
                            return NULL;
                          }
                          ctx->cfg = cfg;
                          ctx->cfg->in_use = 1;

                          int required_types = 0;
                          $stms:set_required_types

                          init_context_early(cfg, ctx);
                          typename cl_program prog = setup_opencl(&ctx->opencl, opencl_program, required_types, cfg->build_opts);
                          init_context_late(cfg, ctx, prog);
                          return ctx;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_new_with_command_queue" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg, typename cl_command_queue queue);|],
      [C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg, typename cl_command_queue queue) {
                          assert(!cfg->in_use);
                          struct $id:ctx* ctx = (struct $id:ctx*) malloc(sizeof(struct $id:ctx));
                          if (ctx == NULL) {
                            return NULL;
                          }
                          ctx->cfg = cfg;
                          ctx->cfg->in_use = 1;

                          int required_types = 0;
                          $stms:set_required_types

                          init_context_early(cfg, ctx);
                          typename cl_program prog = setup_opencl_with_command_queue(&ctx->opencl, queue, opencl_program, required_types, cfg->build_opts);
                          init_context_late(cfg, ctx, prog);
                          return ctx;
                       }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_free" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|void $id:s(struct $id:ctx* ctx);|],
      [C.cedecl|void $id:s(struct $id:ctx* ctx) {
                                 $stms:free_fields
                                 free_constants(ctx);
                                 free_lock(&ctx->lock);
                                 $stms:(map releaseKernel (M.toList kernels))
                                 teardown_opencl(&ctx->opencl);
                                 ctx->cfg->in_use = 0;
                                 free(ctx);
                               }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_sync" HeaderSection
GC.MiscDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|int $id:s(struct $id:ctx* ctx);|],
      [C.cedecl|int $id:s(struct $id:ctx* ctx) {
                 // Check for any delayed error.
                 typename cl_int failure_idx = -1;
                 if (ctx->failure_is_an_option) {
                   OPENCL_SUCCEED_OR_RETURN(
                     clEnqueueReadBuffer(ctx->opencl.queue,
                                         ctx->global_failure,
                                         CL_FALSE,
                                         0, sizeof(typename cl_int), &failure_idx,
                                         0, NULL, $exp:(profilingEvent copyScalarFromDev)));
                   ctx->failure_is_an_option = 0;
                 }

                 OPENCL_SUCCEED_OR_RETURN(clFinish(ctx->opencl.queue));

                 if (failure_idx >= 0) {
                   // We have to clear global_failure so that the next entry point
                   // is not considered a failure from the start.
                   typename cl_int no_failure = -1;
                   OPENCL_SUCCEED_OR_RETURN(
                    clEnqueueWriteBuffer(ctx->opencl.queue, ctx->global_failure, CL_TRUE,
                                         0, sizeof(cl_int), &no_failure,
                                         0, NULL, NULL));

                   typename int64_t args[$int:max_failure_args+1];
                   OPENCL_SUCCEED_OR_RETURN(
                     clEnqueueReadBuffer(ctx->opencl.queue,
                                         ctx->global_failure_args,
                                         CL_TRUE,
                                         0, sizeof(args), &args,
                                         0, NULL, $exp:(profilingEvent copyDevToHost)));

                   $stm:(failureSwitch failures)

                   return 1;
                 }
                 return 0;
               }|]
    )

  [Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM OpenCL () ()
forall op s.
[Char]
-> HeaderSection
-> ([Char] -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ [Char]
"context_get_command_queue" HeaderSection
GC.InitDecl (([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ())
-> ([Char] -> (Definition, Definition)) -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$ \[Char]
s ->
    ( [C.cedecl|typename cl_command_queue $id:s(struct $id:ctx* ctx);|],
      [C.cedecl|typename cl_command_queue $id:s(struct $id:ctx* ctx) {
                 return ctx->opencl.queue;
               }|]
    )

  BlockItem -> CompilerM OpenCL () ()
forall op s. BlockItem -> CompilerM op s ()
GC.onClear
    [C.citem|if (ctx->error == NULL) {
                        ctx->error = OPENCL_SUCCEED_NONFATAL(opencl_free_all(&ctx->opencl));
                      }|]

  BlockItem -> CompilerM OpenCL () ()
forall op s. BlockItem -> CompilerM op s ()
GC.profileReport [C.citem|OPENCL_SUCCEED_FATAL(opencl_tally_profiling_records(&ctx->opencl));|]
  (BlockItem -> CompilerM OpenCL () ())
-> [BlockItem] -> CompilerM OpenCL () ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ BlockItem -> CompilerM OpenCL () ()
forall op s. BlockItem -> CompilerM op s ()
GC.profileReport ([BlockItem] -> CompilerM OpenCL () ())
-> [BlockItem] -> CompilerM OpenCL () ()
forall a b. (a -> b) -> a -> b
$
    [Name] -> [BlockItem]
costCentreReport ([Name] -> [BlockItem]) -> [Name] -> [BlockItem]
forall a b. (a -> b) -> a -> b
$
      [Name]
cost_centres [Name] -> [Name] -> [Name]
forall a. [a] -> [a] -> [a]
++ Map Name KernelSafety -> [Name]
forall k a. Map k a -> [k]
M.keys Map Name KernelSafety
kernels

openClDecls ::
  [Name] ->
  M.Map KernelName KernelSafety ->
  T.Text ->
  ([C.FieldGroup], [C.Stm], [C.Definition], [C.Definition])
openClDecls :: [Name]
-> Map Name KernelSafety
-> Text
-> ([FieldGroup], [Stm], [Definition], [Definition])
openClDecls [Name]
cost_centres Map Name KernelSafety
kernels Text
opencl_program =
  ([FieldGroup]
ctx_fields, [Stm]
ctx_inits, [Definition]
openCL_boilerplate, [Definition]
openCL_load)
  where
    opencl_program_fragments :: [Initializer]
opencl_program_fragments =
      -- Some C compilers limit the size of literal strings, so
      -- chunk the entire program into small bits here, and
      -- concatenate it again at runtime.
      [ [C.cinit|$string:s|]
        | [Char]
s <- Int -> [Char] -> [[Char]]
forall a. Int -> [a] -> [[a]]
chunk Int
2000 ([Char] -> [[Char]]) -> [Char] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
opencl_program
      ]

    ctx_fields :: [FieldGroup]
ctx_fields =
      [ [C.csdecl|int total_runs;|],
        [C.csdecl|long int total_runtime;|]
      ]
        [FieldGroup] -> [FieldGroup] -> [FieldGroup]
forall a. [a] -> [a] -> [a]
++ [ [C.csdecl|typename cl_kernel $id:name;|]
             | Name
name <- Map Name KernelSafety -> [Name]
forall k a. Map k a -> [k]
M.keys Map Name KernelSafety
kernels
           ]
        [FieldGroup] -> [FieldGroup] -> [FieldGroup]
forall a. [a] -> [a] -> [a]
++ [[FieldGroup]] -> [FieldGroup]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
          [ [ [C.csdecl|typename int64_t $id:(kernelRuntime name);|],
              [C.csdecl|int $id:(kernelRuns name);|]
            ]
            | Name
name <- [Name]
cost_centres [Name] -> [Name] -> [Name]
forall a. [a] -> [a] -> [a]
++ Map Name KernelSafety -> [Name]
forall k a. Map k a -> [k]
M.keys Map Name KernelSafety
kernels
          ]

    ctx_inits :: [Stm]
ctx_inits =
      [ [C.cstm|ctx->total_runs = 0;|],
        [C.cstm|ctx->total_runtime = 0;|]
      ]
        [Stm] -> [Stm] -> [Stm]
forall a. [a] -> [a] -> [a]
++ [[Stm]] -> [Stm]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
          [ [ [C.cstm|ctx->$id:(kernelRuntime name) = 0;|],
              [C.cstm|ctx->$id:(kernelRuns name) = 0;|]
            ]
            | Name
name <- [Name]
cost_centres [Name] -> [Name] -> [Name]
forall a. [a] -> [a] -> [a]
++ Map Name KernelSafety -> [Name]
forall k a. Map k a -> [k]
M.keys Map Name KernelSafety
kernels
          ]

    openCL_load :: [Definition]
openCL_load =
      [ [C.cedecl|
void post_opencl_setup(struct opencl_context *ctx, struct opencl_device_option *option) {
  $stms:(map sizeHeuristicsCode sizeHeuristicsTable)
}|]
      ]

    program_fragments :: [Initializer]
program_fragments = [Initializer]
opencl_program_fragments [Initializer] -> [Initializer] -> [Initializer]
forall a. [a] -> [a] -> [a]
++ [[C.cinit|NULL|]]
    openCL_boilerplate :: [Definition]
openCL_boilerplate =
      [C.cunit|
          $esc:("typedef cl_mem fl_mem_t;")
          $esc:(T.unpack freeListH)
          $esc:(T.unpack openclH)
          static const char *opencl_program[] = {$inits:program_fragments};|]

loadKernel :: (KernelName, KernelSafety) -> C.Stm
loadKernel :: (Name, KernelSafety) -> Stm
loadKernel (Name
name, KernelSafety
safety) =
  [C.cstm|{
  ctx->$id:name = clCreateKernel(prog, $string:(pretty (C.toIdent name mempty)), &error);
  OPENCL_SUCCEED_FATAL(error);
  $items:set_args
  if (ctx->debugging) {
    fprintf(ctx->log, "Created kernel %s.\n", $string:(pretty name));
  }
  }|]
  where
    set_global_failure :: BlockItem
set_global_failure =
      [C.citem|OPENCL_SUCCEED_FATAL(
                     clSetKernelArg(ctx->$id:name, 0, sizeof(typename cl_mem),
                                    &ctx->global_failure));|]
    set_global_failure_args :: BlockItem
set_global_failure_args =
      [C.citem|OPENCL_SUCCEED_FATAL(
                     clSetKernelArg(ctx->$id:name, 2, sizeof(typename cl_mem),
                                    &ctx->global_failure_args));|]
    set_args :: [BlockItem]
set_args = case KernelSafety
safety of
      KernelSafety
SafetyNone -> []
      KernelSafety
SafetyCheap -> [BlockItem
set_global_failure]
      KernelSafety
SafetyFull -> [BlockItem
set_global_failure, BlockItem
set_global_failure_args]

releaseKernel :: (KernelName, KernelSafety) -> C.Stm
releaseKernel :: (Name, KernelSafety) -> Stm
releaseKernel (Name
name, KernelSafety
_) = [C.cstm|OPENCL_SUCCEED_FATAL(clReleaseKernel(ctx->$id:name));|]

kernelRuntime :: KernelName -> Name
kernelRuntime :: Name -> Name
kernelRuntime = (Name -> Name -> Name
forall a. Semigroup a => a -> a -> a
<> Name
"_total_runtime")

kernelRuns :: KernelName -> Name
kernelRuns :: Name -> Name
kernelRuns = (Name -> Name -> Name
forall a. Semigroup a => a -> a -> a
<> Name
"_runs")

costCentreReport :: [Name] -> [C.BlockItem]
costCentreReport :: [Name] -> [BlockItem]
costCentreReport [Name]
names = [BlockItem]
report_kernels [BlockItem] -> [BlockItem] -> [BlockItem]
forall a. [a] -> [a] -> [a]
++ [BlockItem
report_total]
  where
    longest_name :: Int
longest_name = (Int -> Int -> Int) -> Int -> [Int] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
0 ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (Name -> Int) -> [Name] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ([Char] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([Char] -> Int) -> (Name -> [Char]) -> Name -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Name -> [Char]
forall a. Pretty a => a -> [Char]
pretty) [Name]
names
    report_kernels :: [BlockItem]
report_kernels = (Name -> [BlockItem]) -> [Name] -> [BlockItem]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Name -> [BlockItem]
reportKernel [Name]
names
    format_string :: [Char] -> [Char]
format_string [Char]
name =
      let padding :: [Char]
padding = Int -> Char -> [Char]
forall a. Int -> a -> [a]
replicate (Int
longest_name Int -> Int -> Int
forall a. Num a => a -> a -> a
- [Char] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
name) Char
' '
       in [[Char]] -> [Char]
unwords
            [ [Char]
name [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
padding,
              [Char]
"ran %5d times; avg: %8ldus; total: %8ldus\n"
            ]
    reportKernel :: Name -> [BlockItem]
reportKernel Name
name =
      let runs :: Name
runs = Name -> Name
kernelRuns Name
name
          total_runtime :: Name
total_runtime = Name -> Name
kernelRuntime Name
name
       in [ [C.citem|
               str_builder(&builder,
                           $string:(format_string (pretty name)),
                           ctx->$id:runs,
                           (long int) ctx->$id:total_runtime / (ctx->$id:runs != 0 ? ctx->$id:runs : 1),
                           (long int) ctx->$id:total_runtime);
              |],
            [C.citem|ctx->total_runtime += ctx->$id:total_runtime;|],
            [C.citem|ctx->total_runs += ctx->$id:runs;|]
          ]

    report_total :: BlockItem
report_total =
      [C.citem|
                          str_builder(&builder, "%d operations with cumulative runtime: %6ldus\n",
                                      ctx->total_runs, ctx->total_runtime);
                        |]

sizeHeuristicsCode :: SizeHeuristic -> C.Stm
sizeHeuristicsCode :: SizeHeuristic -> Stm
sizeHeuristicsCode (SizeHeuristic [Char]
platform_name DeviceType
device_type WhichSize
which (TPrimExp PrimExp DeviceInfo
what)) =
  [C.cstm|
   if ($exp:which' == 0 &&
       strstr(option->platform_name, $string:platform_name) != NULL &&
       (option->device_type & $exp:(clDeviceType device_type)) == $exp:(clDeviceType device_type)) {
     $items:get_size
   }|]
  where
    clDeviceType :: DeviceType -> Exp
clDeviceType DeviceType
DeviceGPU = [C.cexp|CL_DEVICE_TYPE_GPU|]
    clDeviceType DeviceType
DeviceCPU = [C.cexp|CL_DEVICE_TYPE_CPU|]

    which' :: Exp
which' = case WhichSize
which of
      WhichSize
LockstepWidth -> [C.cexp|ctx->lockstep_width|]
      WhichSize
NumGroups -> [C.cexp|ctx->cfg.default_num_groups|]
      WhichSize
GroupSize -> [C.cexp|ctx->cfg.default_group_size|]
      WhichSize
TileSize -> [C.cexp|ctx->cfg.default_tile_size|]
      WhichSize
RegTileSize -> [C.cexp|ctx->cfg.default_reg_tile_size|]
      WhichSize
Threshold -> [C.cexp|ctx->cfg.default_threshold|]

    get_size :: [BlockItem]
get_size =
      let (Exp
e, Map [Char] [BlockItem]
m) = State (Map [Char] [BlockItem]) Exp
-> Map [Char] [BlockItem] -> (Exp, Map [Char] [BlockItem])
forall s a. State s a -> s -> (a, s)
runState ((DeviceInfo -> State (Map [Char] [BlockItem]) Exp)
-> PrimExp DeviceInfo -> State (Map [Char] [BlockItem]) Exp
forall (m :: * -> *) v.
Monad m =>
(v -> m Exp) -> PrimExp v -> m Exp
GC.compilePrimExp DeviceInfo -> State (Map [Char] [BlockItem]) Exp
forall (m :: * -> *).
MonadState (Map [Char] [BlockItem]) m =>
DeviceInfo -> m Exp
onLeaf PrimExp DeviceInfo
what) Map [Char] [BlockItem]
forall a. Monoid a => a
mempty
       in [[BlockItem]] -> [BlockItem]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Map [Char] [BlockItem] -> [[BlockItem]]
forall k a. Map k a -> [a]
M.elems Map [Char] [BlockItem]
m) [BlockItem] -> [BlockItem] -> [BlockItem]
forall a. [a] -> [a] -> [a]
++ [[C.citem|$exp:which' = $exp:e;|]]

    onLeaf :: DeviceInfo -> m Exp
onLeaf (DeviceInfo [Char]
s) = do
      let s' :: [Char]
s' = [Char]
"CL_DEVICE_" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
s
          v :: [Char]
v = [Char]
s [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"_val"
      Map [Char] [BlockItem]
m <- m (Map [Char] [BlockItem])
forall s (m :: * -> *). MonadState s m => m s
get
      case [Char] -> Map [Char] [BlockItem] -> Maybe [BlockItem]
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup [Char]
s Map [Char] [BlockItem]
m of
        Maybe [BlockItem]
Nothing ->
          -- XXX: Cheating with the type here; works for the infos we
          -- currently use because we zero-initialise and assume a
          -- little-endian platform, but should be made more
          -- size-aware in the future.
          (Map [Char] [BlockItem] -> Map [Char] [BlockItem]) -> m ()
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify ((Map [Char] [BlockItem] -> Map [Char] [BlockItem]) -> m ())
-> (Map [Char] [BlockItem] -> Map [Char] [BlockItem]) -> m ()
forall a b. (a -> b) -> a -> b
$
            [Char]
-> [BlockItem] -> Map [Char] [BlockItem] -> Map [Char] [BlockItem]
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert
              [Char]
s'
              [C.citems|size_t $id:v = 0;
                        clGetDeviceInfo(ctx->device, $id:s',
                                        sizeof($id:v), &$id:v,
                                        NULL);|]
        Just [BlockItem]
_ -> () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

      Exp -> m Exp
forall (m :: * -> *) a. Monad m => a -> m a
return [C.cexp|$id:v|]

-- Output size information if logging is enabled.
--
-- The autotuner depends on the format of this output, so use caution if
-- changing it.
sizeLoggingCode :: VName -> Name -> C.Exp -> GC.CompilerM op () ()
sizeLoggingCode :: VName -> Name -> Exp -> CompilerM op () ()
sizeLoggingCode VName
v Name
key Exp
x' = do
  Stm -> CompilerM op () ()
forall op s. Stm -> CompilerM op s ()
GC.stm
    [C.cstm|if (ctx->logging) {
    fprintf(ctx->log, "Compared %s <= %ld: %s.\n", $string:(prettyOneLine key), (long)$exp:x', $id:v ? "true" : "false");
    }|]

-- Options that are common to multiple GPU-like backends.
commonOptions :: [Option]
commonOptions :: [Option]
commonOptions =
  [ Option :: [Char] -> Maybe Char -> OptionArgument -> [Char] -> Stm -> Option
Option
      { optionLongName :: [Char]
optionLongName = [Char]
"device",
        optionShortName :: Maybe Char
optionShortName = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'd',
        optionArgument :: OptionArgument
optionArgument = [Char] -> OptionArgument
RequiredArgument [Char]
"NAME",
        optionDescription :: [Char]
optionDescription = [Char]
"Use the first OpenCL device whose name contains the given string.",
        optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_device(cfg, optarg);|]
      },
    Option :: [Char] -> Maybe Char -> OptionArgument -> [Char] -> Stm -> Option
Option
      { optionLongName :: [Char]
optionLongName = [Char]
"default-group-size",
        optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
        optionArgument :: OptionArgument
optionArgument = [Char] -> OptionArgument
RequiredArgument [Char]
"INT",
        optionDescription :: [Char]
optionDescription = [Char]
"The default size of OpenCL workgroups that are launched.",
        optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_group_size(cfg, atoi(optarg));|]
      },
    Option :: [Char] -> Maybe Char -> OptionArgument -> [Char] -> Stm -> Option
Option
      { optionLongName :: [Char]
optionLongName = [Char]
"default-num-groups",
        optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
        optionArgument :: OptionArgument
optionArgument = [Char] -> OptionArgument
RequiredArgument [Char]
"INT",
        optionDescription :: [Char]
optionDescription = [Char]
"The default number of OpenCL workgroups that are launched.",
        optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_num_groups(cfg, atoi(optarg));|]
      },
    Option :: [Char] -> Maybe Char -> OptionArgument -> [Char] -> Stm -> Option
Option
      { optionLongName :: [Char]
optionLongName = [Char]
"default-tile-size",
        optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
        optionArgument :: OptionArgument
optionArgument = [Char] -> OptionArgument
RequiredArgument [Char]
"INT",
        optionDescription :: [Char]
optionDescription = [Char]
"The default tile size used when performing two-dimensional tiling.",
        optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_tile_size(cfg, atoi(optarg));|]
      },
    Option :: [Char] -> Maybe Char -> OptionArgument -> [Char] -> Stm -> Option
Option
      { optionLongName :: [Char]
optionLongName = [Char]
"default-reg-tile-size",
        optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
        optionArgument :: OptionArgument
optionArgument = [Char] -> OptionArgument
RequiredArgument [Char]
"INT",
        optionDescription :: [Char]
optionDescription = [Char]
"The default register tile size used when performing two-dimensional tiling.",
        optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_reg_tile_size(cfg, atoi(optarg));|]
      },
    Option :: [Char] -> Maybe Char -> OptionArgument -> [Char] -> Stm -> Option
Option
      { optionLongName :: [Char]
optionLongName = [Char]
"default-threshold",
        optionShortName :: Maybe Char
optionShortName = Maybe Char
forall a. Maybe a
Nothing,
        optionArgument :: OptionArgument
optionArgument = [Char] -> OptionArgument
RequiredArgument [Char]
"INT",
        optionDescription :: [Char]
optionDescription = [Char]
"The default parallelism threshold.",
        optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_threshold(cfg, atoi(optarg));|]
      }
  ]