{-# LANGUAGE QuasiQuotes #-}
module Futhark.CodeGen.Backends.COpenCL.Boilerplate
( generateBoilerplate,
profilingEvent,
copyDevToDev,
copyDevToHost,
copyHostToDev,
copyScalarToDev,
copyScalarFromDev,
commonOptions,
failureSwitch,
costCentreReport,
kernelRuntime,
kernelRuns,
sizeLoggingCode,
)
where
import Control.Monad.State
import Data.Map qualified as M
import Data.Maybe
import Data.Text qualified as T
import Futhark.CodeGen.Backends.GenericC qualified as GC
import Futhark.CodeGen.Backends.GenericC.Options
import Futhark.CodeGen.Backends.GenericC.Pretty
import Futhark.CodeGen.ImpCode.OpenCL
import Futhark.CodeGen.OpenCL.Heuristics
import Futhark.CodeGen.RTS.C (freeListH, openclH)
import Futhark.Util (chunk, zEncodeString)
import Futhark.Util.Pretty (prettyTextOneLine)
import Language.C.Quote.OpenCL qualified as C
import Language.C.Syntax qualified as C
errorMsgNumArgs :: ErrorMsg a -> Int
errorMsgNumArgs :: forall a. ErrorMsg a -> Int
errorMsgNumArgs = forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ErrorMsg a -> [PrimType]
errorMsgArgTypes
failureSwitch :: [FailureMsg] -> C.Stm
failureSwitch :: [FailureMsg] -> Stm
failureSwitch [FailureMsg]
failures =
let printfEscape :: String -> String
printfEscape =
let escapeChar :: Char -> String
escapeChar Char
'%' = String
"%%"
escapeChar Char
c = [Char
c]
in forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> String
escapeChar
onPart :: ErrorMsgPart a -> String
onPart (ErrorString Text
s) = String -> String
printfEscape forall a b. (a -> b) -> a -> b
$ Text -> String
T.unpack Text
s
onPart ErrorVal {} = String
"%lld"
onFailure :: a -> FailureMsg -> Stm
onFailure a
i (FailureMsg emsg :: ErrorMsg Exp
emsg@(ErrorMsg [ErrorMsgPart Exp]
parts) String
backtrace) =
let msg :: String
msg = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap forall {a}. ErrorMsgPart a -> String
onPart [ErrorMsgPart Exp]
parts forall a. [a] -> [a] -> [a]
++ String
"\n" forall a. [a] -> [a] -> [a]
++ String -> String
printfEscape String
backtrace
msgargs :: [Exp]
msgargs = [[C.cexp|args[$int:j]|] | Int
j <- [Int
0 .. forall a. ErrorMsg a -> Int
errorMsgNumArgs ErrorMsg Exp
emsg forall a. Num a => a -> a -> a
- Int
1]]
in [C.cstm|case $int:i: {ctx->error = msgprintf($string:msg, $args:msgargs); break;}|]
failure_cases :: [Stm]
failure_cases =
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith forall {a}. (Show a, Integral a) => a -> FailureMsg -> Stm
onFailure [(Int
0 :: Int) ..] [FailureMsg]
failures
in [C.cstm|switch (failure_idx) { $stms:failure_cases }|]
copyDevToDev, copyDevToHost, copyHostToDev, copyScalarToDev, copyScalarFromDev :: Name
copyDevToDev :: KernelName
copyDevToDev = KernelName
"copy_dev_to_dev"
copyDevToHost :: KernelName
copyDevToHost = KernelName
"copy_dev_to_host"
copyHostToDev :: KernelName
copyHostToDev = KernelName
"copy_host_to_dev"
copyScalarToDev :: KernelName
copyScalarToDev = KernelName
"copy_scalar_to_dev"
copyScalarFromDev :: KernelName
copyScalarFromDev = KernelName
"copy_scalar_from_dev"
profilingEvent :: Name -> C.Exp
profilingEvent :: KernelName -> Exp
profilingEvent KernelName
name =
[C.cexp|(ctx->profiling_paused || !ctx->profiling) ? NULL
: opencl_get_event(&ctx->opencl,
&ctx->$id:(kernelRuns name),
&ctx->$id:(kernelRuntime name))|]
generateBoilerplate ::
T.Text ->
T.Text ->
[Name] ->
M.Map KernelName KernelSafety ->
[PrimType] ->
M.Map Name SizeClass ->
[FailureMsg] ->
GC.CompilerM OpenCL () ()
generateBoilerplate :: Text
-> Text
-> [KernelName]
-> Map KernelName KernelSafety
-> [PrimType]
-> Map KernelName SizeClass
-> [FailureMsg]
-> CompilerM OpenCL () ()
generateBoilerplate Text
opencl_code Text
opencl_prelude [KernelName]
cost_centres Map KernelName KernelSafety
kernels [PrimType]
types Map KernelName SizeClass
sizes [FailureMsg]
failures = do
[Stm]
final_inits <- forall op s. CompilerM op s [Stm]
GC.contextFinalInits
let ([FieldGroup]
ctx_opencl_fields, [Stm]
ctx_opencl_inits, [Definition]
top_decls, [Definition]
later_top_decls) =
[KernelName]
-> Map KernelName KernelSafety
-> Text
-> ([FieldGroup], [Stm], [Definition], [Definition])
openClDecls [KernelName]
cost_centres Map KernelName KernelSafety
kernels (Text
opencl_prelude forall a. Semigroup a => a -> a -> a
<> Text
opencl_code)
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [Definition]
top_decls
let size_name_inits :: [Initializer]
size_name_inits = forall a b. (a -> b) -> [a] -> [b]
map (\KernelName
k -> [C.cinit|$string:(prettyString k)|]) forall a b. (a -> b) -> a -> b
$ forall k a. Map k a -> [k]
M.keys Map KernelName SizeClass
sizes
size_var_inits :: [Initializer]
size_var_inits = forall a b. (a -> b) -> [a] -> [b]
map (\KernelName
k -> [C.cinit|$string:(zEncodeString (prettyString k))|]) forall a b. (a -> b) -> a -> b
$ forall k a. Map k a -> [k]
M.keys Map KernelName SizeClass
sizes
size_class_inits :: [Initializer]
size_class_inits = forall a b. (a -> b) -> [a] -> [b]
map (\SizeClass
c -> [C.cinit|$string:(prettyString c)|]) forall a b. (a -> b) -> a -> b
$ forall k a. Map k a -> [a]
M.elems Map KernelName SizeClass
sizes
num_sizes :: Int
num_sizes = forall k a. Map k a -> Int
M.size Map KernelName SizeClass
sizes
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_names[] = { $inits:size_name_inits };|]
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_vars[] = { $inits:size_var_inits };|]
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|static const char *tuning_param_classes[] = { $inits:size_class_inits };|]
let size_decls :: [FieldGroup]
size_decls = forall a b. (a -> b) -> [a] -> [b]
map (\KernelName
k -> [C.csdecl|typename int64_t *$id:k;|]) forall a b. (a -> b) -> a -> b
$ forall k a. Map k a -> [k]
M.keys Map KernelName SizeClass
sizes
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [C.cedecl|struct tuning_params { $sdecls:size_decls };|]
String
cfg <- forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s String
GC.publicDef String
"context_config" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|struct $id:s;|],
[C.cedecl|struct $id:s { int in_use;
struct opencl_config opencl;
typename int64_t tuning_params[$int:num_sizes];
int num_build_opts;
const char **build_opts;
const char *cache_fname;
};|]
)
let size_value_inits :: [Stm]
size_value_inits = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith forall {a}. (Show a, Integral a) => a -> SizeClass -> Stm
sizeInit [Int
0 .. forall k a. Map k a -> Int
M.size Map KernelName SizeClass
sizes forall a. Num a => a -> a -> a
- Int
1] (forall k a. Map k a -> [a]
M.elems Map KernelName SizeClass
sizes)
sizeInit :: a -> SizeClass -> Stm
sizeInit a
i SizeClass
size = [C.cstm|cfg->tuning_params[$int:i] = $int:val;|]
where
val :: Int64
val = forall a. a -> Maybe a -> a
fromMaybe Int64
0 forall a b. (a -> b) -> a -> b
$ SizeClass -> Maybe Int64
sizeDefault SizeClass
size
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_new" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|struct $id:cfg* $id:s(void);|],
[C.cedecl|struct $id:cfg* $id:s(void) {
struct $id:cfg *cfg = (struct $id:cfg*) malloc(sizeof(struct $id:cfg));
if (cfg == NULL) {
return NULL;
}
cfg->in_use = 0;
cfg->num_build_opts = 0;
cfg->build_opts = (const char**) malloc(sizeof(const char*));
cfg->build_opts[0] = NULL;
cfg->cache_fname = NULL;
$stms:size_value_inits
opencl_config_init(&cfg->opencl, $int:num_sizes,
tuning_param_names, tuning_param_vars,
cfg->tuning_params, tuning_param_classes);
return cfg;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_free" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg) {
assert(!cfg->in_use);
free(cfg->build_opts);
free(cfg);
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_add_build_option" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *opt);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, const char *opt) {
cfg->build_opts[cfg->num_build_opts] = opt;
cfg->num_build_opts++;
cfg->build_opts = (const char**) realloc(cfg->build_opts, (cfg->num_build_opts+1) * sizeof(const char*));
cfg->build_opts[cfg->num_build_opts] = NULL;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_debugging" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, int flag) {
cfg->opencl.profiling = cfg->opencl.logging = cfg->opencl.debugging = flag;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_profiling" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, int flag) {
cfg->opencl.profiling = flag;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_logging" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, int flag);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, int flag) {
cfg->opencl.logging = flag;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_device" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *s);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, const char *s) {
set_preferred_device(&cfg->opencl, s);
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_platform" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *s);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, const char *s) {
set_preferred_platform(&cfg->opencl, s);
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_select_device_interactively" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg) {
select_device_interactively(&cfg->opencl);
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_list_devices" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg) {
(void)cfg;
list_devices();
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_dump_program_to" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path) {
cfg->opencl.dump_program_to = path;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_load_program_from" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path) {
cfg->opencl.load_program_from = path;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_dump_binary_to" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path) {
cfg->opencl.dump_binary_to = path;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_load_binary_from" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, const char *path) {
cfg->opencl.load_binary_from = path;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_default_group_size" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, int size);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, int size) {
cfg->opencl.default_group_size = size;
cfg->opencl.default_group_size_changed = 1;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_default_num_groups" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, int num);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, int num) {
cfg->opencl.default_num_groups = num;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_default_tile_size" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, int num);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, int size) {
cfg->opencl.default_tile_size = size;
cfg->opencl.default_tile_size_changed = 1;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_default_reg_tile_size" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, int num);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, int size) {
cfg->opencl.default_reg_tile_size = size;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_default_threshold" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:cfg* cfg, int num);|],
[C.cedecl|void $id:s(struct $id:cfg* cfg, int size) {
cfg->opencl.default_threshold = size;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_config_set_tuning_param" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|int $id:s(struct $id:cfg* cfg, const char *param_name, size_t new_value);|],
[C.cedecl|int $id:s(struct $id:cfg* cfg, const char *param_name, size_t new_value) {
for (int i = 0; i < $int:num_sizes; i++) {
if (strcmp(param_name, tuning_param_names[i]) == 0) {
cfg->tuning_params[i] = new_value;
return 0;
}
}
if (strcmp(param_name, "default_group_size") == 0) {
cfg->opencl.default_group_size = new_value;
return 0;
}
if (strcmp(param_name, "default_num_groups") == 0) {
cfg->opencl.default_num_groups = new_value;
return 0;
}
if (strcmp(param_name, "default_threshold") == 0) {
cfg->opencl.default_threshold = new_value;
return 0;
}
if (strcmp(param_name, "default_tile_size") == 0) {
cfg->opencl.default_tile_size = new_value;
return 0;
}
if (strcmp(param_name, "default_reg_tile_size") == 0) {
cfg->opencl.default_reg_tile_size = new_value;
return 0;
}
return 1;
}|]
)
([FieldGroup]
fields, [Stm]
init_fields, [Stm]
free_fields) <- forall op s. CompilerM op s ([FieldGroup], [Stm], [Stm])
GC.contextContents
String
ctx <- forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s String
GC.publicDef String
"context" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|struct $id:s;|],
[C.cedecl|struct $id:s {
struct $id:cfg* cfg;
int detail_memory;
int debugging;
int profiling;
int profiling_paused;
int logging;
typename lock_t lock;
char *error;
typename lock_t error_lock;
typename FILE *log;
$sdecls:fields
$sdecls:ctx_opencl_fields
typename cl_mem global_failure;
typename cl_mem global_failure_args;
struct opencl_context opencl;
struct tuning_params tuning_params;
// True if a potentially failing kernel has been enqueued.
typename cl_int failure_is_an_option;
};|]
)
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. Definition -> CompilerM op s ()
GC.earlyDecl [Definition]
later_top_decls
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl
[C.cedecl|static void init_context_early(struct $id:cfg *cfg, struct $id:ctx* ctx) {
ctx->opencl.cfg = cfg->opencl;
ctx->detail_memory = cfg->opencl.debugging;
ctx->debugging = cfg->opencl.debugging;
ctx->profiling = cfg->opencl.profiling;
ctx->profiling_paused = 0;
ctx->logging = cfg->opencl.logging;
ctx->error = NULL;
create_lock(&ctx->error_lock);
ctx->log = stderr;
ctx->opencl.profiling_records_capacity = 200;
ctx->opencl.profiling_records_used = 0;
ctx->opencl.profiling_records =
malloc(ctx->opencl.profiling_records_capacity *
sizeof(struct profiling_record));
create_lock(&ctx->lock);
ctx->failure_is_an_option = 0;
$stms:init_fields
$stms:ctx_opencl_inits
}|]
let set_tuning_params :: [Stm]
set_tuning_params =
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith
(\Int
i KernelName
k -> [C.cstm|ctx->tuning_params.$id:k = &cfg->tuning_params[$int:i];|])
[(Int
0 :: Int) ..]
forall a b. (a -> b) -> a -> b
$ forall k a. Map k a -> [k]
M.keys Map KernelName SizeClass
sizes
max_failure_args :: Int
max_failure_args =
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl forall a. Ord a => a -> a -> a
max Int
0 forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall a. ErrorMsg a -> Int
errorMsgNumArgs forall b c a. (b -> c) -> (a -> b) -> a -> c
. FailureMsg -> ErrorMsg Exp
failureError) [FailureMsg]
failures
forall op s. Definition -> CompilerM op s ()
GC.earlyDecl
[C.cedecl|static int init_context_late(struct $id:cfg *cfg, struct $id:ctx* ctx, typename cl_program prog) {
typename cl_int error;
typename cl_int no_error = -1;
ctx->global_failure =
clCreateBuffer(ctx->opencl.ctx,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_int), &no_error, &error);
OPENCL_SUCCEED_OR_RETURN(error);
// The +1 is to avoid zero-byte allocations.
ctx->global_failure_args =
clCreateBuffer(ctx->opencl.ctx,
CL_MEM_READ_WRITE,
sizeof(int64_t)*($int:max_failure_args+1), NULL, &error);
OPENCL_SUCCEED_OR_RETURN(error);
// Load all the kernels.
$stms:(map loadKernel (M.toList kernels))
$stms:final_inits
$stms:set_tuning_params
init_constants(ctx);
// Clear the free list of any deallocations that occurred while initialising constants.
OPENCL_SUCCEED_OR_RETURN(opencl_free_all(&ctx->opencl));
// The program will be properly freed after all the kernels have also been freed.
OPENCL_SUCCEED_OR_RETURN(clReleaseProgram(prog));
return futhark_context_sync(ctx);
}|]
let set_required_types :: [Stm]
set_required_types =
[ [C.cstm|required_types |= OPENCL_F64; |]
| FloatType -> PrimType
FloatType FloatType
Float64 forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [PrimType]
types
]
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_new" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg);|],
[C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg) {
assert(!cfg->in_use);
struct $id:ctx* ctx = (struct $id:ctx*) malloc(sizeof(struct $id:ctx));
if (ctx == NULL) {
return NULL;
}
ctx->cfg = cfg;
ctx->cfg->in_use = 1;
int required_types = 0;
$stms:set_required_types
init_context_early(cfg, ctx);
typename cl_program prog =
setup_opencl(&ctx->opencl, opencl_program, required_types, cfg->build_opts,
cfg->cache_fname);
init_context_late(cfg, ctx, prog);
return ctx;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_new_with_command_queue" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg, typename cl_command_queue queue);|],
[C.cedecl|struct $id:ctx* $id:s(struct $id:cfg* cfg, typename cl_command_queue queue) {
assert(!cfg->in_use);
struct $id:ctx* ctx = (struct $id:ctx*) malloc(sizeof(struct $id:ctx));
if (ctx == NULL) {
return NULL;
}
ctx->cfg = cfg;
ctx->cfg->in_use = 1;
int required_types = 0;
$stms:set_required_types
init_context_early(cfg, ctx);
typename cl_program prog =
setup_opencl_with_command_queue(&ctx->opencl, queue, opencl_program, required_types, cfg->build_opts,
cfg->cache_fname);
init_context_late(cfg, ctx, prog);
return ctx;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_free" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|void $id:s(struct $id:ctx* ctx);|],
[C.cedecl|void $id:s(struct $id:ctx* ctx) {
$stms:free_fields
free_constants(ctx);
free_lock(&ctx->lock);
$stms:(map releaseKernel (M.toList kernels))
OPENCL_SUCCEED_FATAL(clReleaseMemObject(ctx->global_failure));
OPENCL_SUCCEED_FATAL(clReleaseMemObject(ctx->global_failure_args));
teardown_opencl(&ctx->opencl);
ctx->cfg->in_use = 0;
free(ctx);
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_sync" HeaderSection
GC.MiscDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|int $id:s(struct $id:ctx* ctx);|],
[C.cedecl|int $id:s(struct $id:ctx* ctx) {
// Check for any delayed error.
typename cl_int failure_idx = -1;
if (ctx->failure_is_an_option) {
OPENCL_SUCCEED_OR_RETURN(
clEnqueueReadBuffer(ctx->opencl.queue,
ctx->global_failure,
CL_FALSE,
0, sizeof(typename cl_int), &failure_idx,
0, NULL, $exp:(profilingEvent copyScalarFromDev)));
ctx->failure_is_an_option = 0;
}
OPENCL_SUCCEED_OR_RETURN(clFinish(ctx->opencl.queue));
if (failure_idx >= 0) {
// We have to clear global_failure so that the next entry point
// is not considered a failure from the start.
typename cl_int no_failure = -1;
OPENCL_SUCCEED_OR_RETURN(
clEnqueueWriteBuffer(ctx->opencl.queue, ctx->global_failure, CL_TRUE,
0, sizeof(cl_int), &no_failure,
0, NULL, NULL));
typename int64_t args[$int:max_failure_args+1];
OPENCL_SUCCEED_OR_RETURN(
clEnqueueReadBuffer(ctx->opencl.queue,
ctx->global_failure_args,
CL_TRUE,
0, sizeof(args), &args,
0, NULL, $exp:(profilingEvent copyDevToHost)));
$stm:(failureSwitch failures)
return FUTHARK_PROGRAM_ERROR;
}
return 0;
}|]
)
forall op s.
String
-> HeaderSection
-> (String -> (Definition, Definition))
-> CompilerM op s ()
GC.publicDef_ String
"context_get_command_queue" HeaderSection
GC.InitDecl forall a b. (a -> b) -> a -> b
$ \String
s ->
( [C.cedecl|typename cl_command_queue $id:s(struct $id:ctx* ctx);|],
[C.cedecl|typename cl_command_queue $id:s(struct $id:ctx* ctx) {
return ctx->opencl.queue;
}|]
)
forall op s. BlockItem -> CompilerM op s ()
GC.onClear
[C.citem|if (ctx->error == NULL) {
ctx->error = OPENCL_SUCCEED_NONFATAL(opencl_free_all(&ctx->opencl));
}|]
forall op s. BlockItem -> CompilerM op s ()
GC.profileReport [C.citem|OPENCL_SUCCEED_FATAL(opencl_tally_profiling_records(&ctx->opencl));|]
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall op s. BlockItem -> CompilerM op s ()
GC.profileReport forall a b. (a -> b) -> a -> b
$
[KernelName] -> [BlockItem]
costCentreReport forall a b. (a -> b) -> a -> b
$
[KernelName]
cost_centres forall a. [a] -> [a] -> [a]
++ forall k a. Map k a -> [k]
M.keys Map KernelName KernelSafety
kernels
openClDecls ::
[Name] ->
M.Map KernelName KernelSafety ->
T.Text ->
([C.FieldGroup], [C.Stm], [C.Definition], [C.Definition])
openClDecls :: [KernelName]
-> Map KernelName KernelSafety
-> Text
-> ([FieldGroup], [Stm], [Definition], [Definition])
openClDecls [KernelName]
cost_centres Map KernelName KernelSafety
kernels Text
opencl_program =
([FieldGroup]
ctx_fields, [Stm]
ctx_inits, [Definition]
openCL_boilerplate, [Definition]
openCL_load)
where
opencl_program_fragments :: [Initializer]
opencl_program_fragments =
[ [C.cinit|$string:s|]
| String
s <- forall a. Int -> [a] -> [[a]]
chunk Int
2000 forall a b. (a -> b) -> a -> b
$ Text -> String
T.unpack Text
opencl_program
]
ctx_fields :: [FieldGroup]
ctx_fields =
[ [C.csdecl|int total_runs;|],
[C.csdecl|long int total_runtime;|]
]
forall a. [a] -> [a] -> [a]
++ [ [C.csdecl|typename cl_kernel $id:name;|]
| KernelName
name <- forall k a. Map k a -> [k]
M.keys Map KernelName KernelSafety
kernels
]
forall a. [a] -> [a] -> [a]
++ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
[ [ [C.csdecl|typename int64_t $id:(kernelRuntime name);|],
[C.csdecl|int $id:(kernelRuns name);|]
]
| KernelName
name <- [KernelName]
cost_centres forall a. [a] -> [a] -> [a]
++ forall k a. Map k a -> [k]
M.keys Map KernelName KernelSafety
kernels
]
ctx_inits :: [Stm]
ctx_inits =
[ [C.cstm|ctx->total_runs = 0;|],
[C.cstm|ctx->total_runtime = 0;|]
]
forall a. [a] -> [a] -> [a]
++ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
[ [ [C.cstm|ctx->$id:(kernelRuntime name) = 0;|],
[C.cstm|ctx->$id:(kernelRuns name) = 0;|]
]
| KernelName
name <- [KernelName]
cost_centres forall a. [a] -> [a] -> [a]
++ forall k a. Map k a -> [k]
M.keys Map KernelName KernelSafety
kernels
]
openCL_load :: [Definition]
openCL_load =
[ [C.cedecl|
void post_opencl_setup(struct opencl_context *ctx, struct opencl_device_option *option) {
$stms:(map sizeHeuristicsCode sizeHeuristicsTable)
}|]
]
program_fragments :: [Initializer]
program_fragments = [Initializer]
opencl_program_fragments forall a. [a] -> [a] -> [a]
++ [[C.cinit|NULL|]]
openCL_boilerplate :: [Definition]
openCL_boilerplate =
[C.cunit|
$esc:("typedef cl_mem fl_mem_t;")
$esc:(T.unpack freeListH)
$esc:(T.unpack openclH)
static const char *opencl_program[] = {$inits:program_fragments};|]
loadKernel :: (KernelName, KernelSafety) -> C.Stm
loadKernel :: (KernelName, KernelSafety) -> Stm
loadKernel (KernelName
name, KernelSafety
safety) =
[C.cstm|{
ctx->$id:name = clCreateKernel(prog, $string:(T.unpack (idText (C.toIdent name mempty))), &error);
OPENCL_SUCCEED_FATAL(error);
$items:set_args
if (ctx->debugging) {
fprintf(ctx->log, "Created kernel %s.\n", $string:(prettyString name));
}
}|]
where
set_global_failure :: BlockItem
set_global_failure =
[C.citem|OPENCL_SUCCEED_FATAL(
clSetKernelArg(ctx->$id:name, 0, sizeof(typename cl_mem),
&ctx->global_failure));|]
set_global_failure_args :: BlockItem
set_global_failure_args =
[C.citem|OPENCL_SUCCEED_FATAL(
clSetKernelArg(ctx->$id:name, 2, sizeof(typename cl_mem),
&ctx->global_failure_args));|]
set_args :: [BlockItem]
set_args = case KernelSafety
safety of
KernelSafety
SafetyNone -> []
KernelSafety
SafetyCheap -> [BlockItem
set_global_failure]
KernelSafety
SafetyFull -> [BlockItem
set_global_failure, BlockItem
set_global_failure_args]
releaseKernel :: (KernelName, KernelSafety) -> C.Stm
releaseKernel :: (KernelName, KernelSafety) -> Stm
releaseKernel (KernelName
name, KernelSafety
_) = [C.cstm|OPENCL_SUCCEED_FATAL(clReleaseKernel(ctx->$id:name));|]
kernelRuntime :: KernelName -> Name
kernelRuntime :: KernelName -> KernelName
kernelRuntime = (forall a. Semigroup a => a -> a -> a
<> KernelName
"_total_runtime")
kernelRuns :: KernelName -> Name
kernelRuns :: KernelName -> KernelName
kernelRuns = (forall a. Semigroup a => a -> a -> a
<> KernelName
"_runs")
costCentreReport :: [Name] -> [C.BlockItem]
costCentreReport :: [KernelName] -> [BlockItem]
costCentreReport [KernelName]
names = [BlockItem]
report_kernels forall a. [a] -> [a] -> [a]
++ [BlockItem
report_total]
where
longest_name :: Int
longest_name = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl forall a. Ord a => a -> a -> a
max Int
0 forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Pretty a => a -> String
prettyString) [KernelName]
names
report_kernels :: [BlockItem]
report_kernels = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap KernelName -> [BlockItem]
reportKernel [KernelName]
names
format_string :: String -> String
format_string String
name =
let padding :: String
padding = forall a. Int -> a -> [a]
replicate (Int
longest_name forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. Foldable t => t a -> Int
length String
name) Char
' '
in [String] -> String
unwords
[ String
name forall a. [a] -> [a] -> [a]
++ String
padding,
String
"ran %5d times; avg: %8ldus; total: %8ldus\n"
]
reportKernel :: KernelName -> [BlockItem]
reportKernel KernelName
name =
let runs :: KernelName
runs = KernelName -> KernelName
kernelRuns KernelName
name
total_runtime :: KernelName
total_runtime = KernelName -> KernelName
kernelRuntime KernelName
name
in [ [C.citem|
str_builder(&builder,
$string:(format_string (prettyString name)),
ctx->$id:runs,
(long int) ctx->$id:total_runtime / (ctx->$id:runs != 0 ? ctx->$id:runs : 1),
(long int) ctx->$id:total_runtime);
|],
[C.citem|ctx->total_runtime += ctx->$id:total_runtime;|],
[C.citem|ctx->total_runs += ctx->$id:runs;|]
]
report_total :: BlockItem
report_total =
[C.citem|
str_builder(&builder, "%d operations with cumulative runtime: %6ldus\n",
ctx->total_runs, ctx->total_runtime);
|]
sizeHeuristicsCode :: SizeHeuristic -> C.Stm
sizeHeuristicsCode :: SizeHeuristic -> Stm
sizeHeuristicsCode (SizeHeuristic String
platform_name DeviceType
device_type WhichSize
which (TPrimExp PrimExp DeviceInfo
what)) =
[C.cstm|
if ($exp:which' == 0 &&
strstr(option->platform_name, $string:platform_name) != NULL &&
(option->device_type & $exp:(clDeviceType device_type)) == $exp:(clDeviceType device_type)) {
$items:get_size
}|]
where
clDeviceType :: DeviceType -> Exp
clDeviceType DeviceType
DeviceGPU = [C.cexp|CL_DEVICE_TYPE_GPU|]
clDeviceType DeviceType
DeviceCPU = [C.cexp|CL_DEVICE_TYPE_CPU|]
which' :: Exp
which' = case WhichSize
which of
WhichSize
LockstepWidth -> [C.cexp|ctx->lockstep_width|]
WhichSize
NumGroups -> [C.cexp|ctx->cfg.default_num_groups|]
WhichSize
GroupSize -> [C.cexp|ctx->cfg.default_group_size|]
WhichSize
TileSize -> [C.cexp|ctx->cfg.default_tile_size|]
WhichSize
RegTileSize -> [C.cexp|ctx->cfg.default_reg_tile_size|]
WhichSize
Threshold -> [C.cexp|ctx->cfg.default_threshold|]
get_size :: [BlockItem]
get_size =
let (Exp
e, Map String [BlockItem]
m) = forall s a. State s a -> s -> (a, s)
runState (forall (m :: * -> *) v.
Monad m =>
(v -> m Exp) -> PrimExp v -> m Exp
GC.compilePrimExp forall {m :: * -> *}.
MonadState (Map String [BlockItem]) m =>
DeviceInfo -> m Exp
onLeaf PrimExp DeviceInfo
what) forall a. Monoid a => a
mempty
in forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (forall k a. Map k a -> [a]
M.elems Map String [BlockItem]
m) forall a. [a] -> [a] -> [a]
++ [[C.citem|$exp:which' = $exp:e;|]]
onLeaf :: DeviceInfo -> m Exp
onLeaf (DeviceInfo String
s) = do
let s' :: String
s' = String
"CL_DEVICE_" forall a. [a] -> [a] -> [a]
++ String
s
v :: String
v = String
s forall a. [a] -> [a] -> [a]
++ String
"_val"
Map String [BlockItem]
m <- forall s (m :: * -> *). MonadState s m => m s
get
case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup String
s Map String [BlockItem]
m of
Maybe [BlockItem]
Nothing ->
forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert
String
s'
[C.citems|size_t $id:v = 0;
clGetDeviceInfo(ctx->device, $id:s',
sizeof($id:v), &$id:v,
NULL);|]
Just [BlockItem]
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure [C.cexp|$id:v|]
sizeLoggingCode :: VName -> Name -> C.Exp -> GC.CompilerM op () ()
sizeLoggingCode :: forall op. VName -> KernelName -> Exp -> CompilerM op () ()
sizeLoggingCode VName
v KernelName
key Exp
x' = do
forall op s. Stm -> CompilerM op s ()
GC.stm
[C.cstm|if (ctx->logging) {
fprintf(ctx->log, "Compared %s <= %ld: %s.\n", $string:(T.unpack (prettyTextOneLine key)), (long)$exp:x', $id:v ? "true" : "false");
}|]
commonOptions :: [Option]
commonOptions :: [Option]
commonOptions =
[ Option
{ optionLongName :: String
optionLongName = String
"device",
optionShortName :: Maybe Char
optionShortName = forall a. a -> Maybe a
Just Char
'd',
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"NAME",
optionDescription :: String
optionDescription = String
"Use the first OpenCL device whose name contains the given string.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_device(cfg, optarg);|]
},
Option
{ optionLongName :: String
optionLongName = String
"default-group-size",
optionShortName :: Maybe Char
optionShortName = forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"INT",
optionDescription :: String
optionDescription = String
"The default size of OpenCL workgroups that are launched.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_group_size(cfg, atoi(optarg));|]
},
Option
{ optionLongName :: String
optionLongName = String
"default-num-groups",
optionShortName :: Maybe Char
optionShortName = forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"INT",
optionDescription :: String
optionDescription = String
"The default number of OpenCL workgroups that are launched.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_num_groups(cfg, atoi(optarg));|]
},
Option
{ optionLongName :: String
optionLongName = String
"default-tile-size",
optionShortName :: Maybe Char
optionShortName = forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"INT",
optionDescription :: String
optionDescription = String
"The default tile size used when performing two-dimensional tiling.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_tile_size(cfg, atoi(optarg));|]
},
Option
{ optionLongName :: String
optionLongName = String
"default-reg-tile-size",
optionShortName :: Maybe Char
optionShortName = forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"INT",
optionDescription :: String
optionDescription = String
"The default register tile size used when performing two-dimensional tiling.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_reg_tile_size(cfg, atoi(optarg));|]
},
Option
{ optionLongName :: String
optionLongName = String
"default-threshold",
optionShortName :: Maybe Char
optionShortName = forall a. Maybe a
Nothing,
optionArgument :: OptionArgument
optionArgument = String -> OptionArgument
RequiredArgument String
"INT",
optionDescription :: String
optionDescription = String
"The default parallelism threshold.",
optionAction :: Stm
optionAction = [C.cstm|futhark_context_config_set_default_threshold(cfg, atoi(optarg));|]
}
]