#ifndef GPU_WRAPPER_H #define GPU_WRAPPER_H #include #include #ifdef __cplusplus extern "C" { #endif // Opaque types for Haskell FFI typedef void* GPUContext; typedef void* GPUTensor; typedef void* GPUKernel; typedef void* GPUShape; typedef void* GPUKernelCode; typedef void* GPUQuerySet; typedef void* GPUCommandEncoder; typedef void* GPUBuffer; // Numeric types enum matching gpu.hpp typedef enum { GPU_F16 = 0, GPU_F32 = 1, GPU_F64 = 2, GPU_I4 = 3, // 4-bit signed (packed: 8 nibbles per u32) GPU_I8 = 4, GPU_I16 = 5, GPU_I32 = 6, GPU_I64 = 7, GPU_U4 = 8, // 4-bit unsigned (packed: 8 nibbles per u32) GPU_U8 = 9, GPU_U16 = 10, GPU_U32 = 11, GPU_U64 = 12, GPU_UNKNOWN = 13 } GPUNumType; // Error handling typedef struct { int code; const char* message; } GPUError; // Context management GPUContext gpu_create_context(GPUError* error); GPUContext gpu_create_context_with_features( const char** enabledToggles, size_t toggleCount, const uint32_t* requiredFeatures, size_t featureCount, GPUError* error); void gpu_destroy_context(GPUContext ctx); // Shape management GPUShape gpu_create_shape(const size_t* dims, size_t rank, GPUError* error); void gpu_destroy_shape(GPUShape shape); size_t gpu_shape_size(GPUShape shape); size_t gpu_shape_rank(GPUShape shape); size_t gpu_shape_dim(GPUShape shape, size_t index); // Tensor management GPUTensor gpu_create_tensor(GPUContext ctx, GPUShape shape, GPUNumType dtype, GPUError* error); GPUTensor gpu_create_tensor_with_data(GPUContext ctx, GPUShape shape, GPUNumType dtype, const void* data, size_t data_size, GPUError* error); void gpu_destroy_tensor(GPUTensor tensor); size_t gpu_tensor_size_bytes(GPUTensor tensor); // Data transfer void gpu_to_cpu(GPUContext ctx, GPUTensor tensor, void* output, size_t size, GPUError* error); void gpu_to_gpu(GPUContext ctx, const void* input, GPUTensor tensor, size_t size, GPUError* error); // Kernel code management GPUKernelCode gpu_create_kernel_code(const char* wgsl_code, GPUError* error); void gpu_set_kernel_workgroup_size(GPUKernelCode code, size_t x, size_t y, size_t z); void gpu_set_kernel_entry_point(GPUKernelCode code, const char* entry_point); void gpu_destroy_kernel_code(GPUKernelCode code); // Kernel compilation and execution GPUKernel gpu_create_kernel(GPUContext ctx, GPUKernelCode code, GPUTensor* tensors, size_t num_tensors, size_t num_workgroups_x, size_t num_workgroups_y, size_t num_workgroups_z, const char* cache_key, // Optional cache key for shader caching GPUError* error); void gpu_destroy_kernel(GPUKernel kernel); void gpu_dispatch_kernel(GPUContext ctx, GPUKernel kernel, GPUError* error); // Async dispatch - returns immediately, kernel executes in background // Call gpu_wait_all() to synchronize void gpu_dispatch_kernel_async(GPUContext ctx, GPUKernel kernel, GPUError* error); // Wait for all pending async dispatches to complete void gpu_wait_all(GPUContext ctx); // Command batching - accumulate multiple kernels into a single submission void gpu_begin_batch(GPUContext ctx, GPUError* error); void gpu_end_batch(GPUContext ctx, GPUError* error); // Utility functions size_t gpu_size_of_type(GPUNumType dtype); const char* gpu_type_name(GPUNumType dtype); // QuerySet types for timestamp queries typedef enum { GPU_QUERY_TYPE_OCCLUSION = 1, GPU_QUERY_TYPE_TIMESTAMP = 2 } GPUQueryType; // Timestamp query support GPUQuerySet gpu_create_query_set(GPUContext ctx, GPUQueryType type, uint32_t count, GPUError* error); void gpu_destroy_query_set(GPUQuerySet querySet); // Get raw WebGPU handles for advanced usage GPUCommandEncoder gpu_get_command_encoder(GPUContext ctx, GPUError* error); void gpu_release_command_encoder(GPUCommandEncoder encoder); // Timestamp query commands (to be called on command encoder) void gpu_write_timestamp(GPUCommandEncoder encoder, GPUQuerySet querySet, uint32_t queryIndex); void gpu_resolve_query_set(GPUCommandEncoder encoder, GPUQuerySet querySet, uint32_t firstQuery, uint32_t queryCount, GPUBuffer destination, uint64_t destinationOffset); // Create buffer for query results GPUBuffer gpu_create_query_buffer(GPUContext ctx, size_t size, GPUError* error); void gpu_release_buffer(GPUBuffer buffer); // Read back query results (blocking) void gpu_read_query_buffer(GPUContext ctx, GPUBuffer buffer, uint64_t* data, size_t count, GPUError* error); // Profiling functions void gpu_print_profile_stats(void); void gpu_reset_profile_stats(void); #ifdef ENABLE_GLFW // GLFW window and surface management typedef void* GPUWindow; typedef void* GPUSurface; // Initialize GLFW (must be called before creating windows) int gpu_glfw_init(GPUError* error); void gpu_glfw_terminate(void); // Window management GPUWindow gpu_create_window(int width, int height, const char* title, GPUError* error); void gpu_destroy_window(GPUWindow window); int gpu_window_should_close(GPUWindow window); void gpu_poll_events(void); int gpu_window_get_key(GPUWindow window, int key); // Surface management (for rendering to window) GPUSurface gpu_create_surface_for_window(GPUContext ctx, GPUWindow window, GPUError* error); void gpu_destroy_surface(GPUSurface surface); void gpu_configure_surface(GPUSurface surface, int width, int height, GPUError* error); // Get current texture from surface for rendering typedef void* GPUTexture; typedef void* GPUTextureView; GPUTexture gpu_surface_get_current_texture(GPUSurface surface, GPUError* error); GPUTextureView gpu_texture_create_view(GPUTexture texture, GPUError* error); void gpu_texture_release(GPUTexture texture); void gpu_texture_view_release(GPUTextureView view); // Present the surface void gpu_surface_present(GPUSurface surface); // Command encoding for rendering typedef void* GPUCommandEncoder; typedef void* GPURenderPassEncoder; typedef void* GPUCommandBuffer; typedef void* GPURenderPipeline; GPUCommandEncoder gpu_device_create_command_encoder(GPUContext ctx, GPUError* error); GPURenderPassEncoder gpu_encoder_begin_render_pass(GPUCommandEncoder encoder, GPUTextureView view, GPUError* error); void gpu_render_pass_set_pipeline(GPURenderPassEncoder pass, GPURenderPipeline pipeline); void gpu_render_pass_draw(GPURenderPassEncoder pass, uint32_t vertex_count); void gpu_render_pass_end(GPURenderPassEncoder pass); GPUCommandBuffer gpu_encoder_finish(GPUCommandEncoder encoder, GPUError* error); void gpu_queue_submit(GPUContext ctx, GPUCommandBuffer command, GPUError* error); void gpu_command_buffer_release(GPUCommandBuffer command); void gpu_command_encoder_release(GPUCommandEncoder encoder); void gpu_render_pass_encoder_release(GPURenderPassEncoder pass); // Pipeline creation typedef void* GPUShaderModule; GPUShaderModule gpu_create_shader_module(GPUContext ctx, const char* wgsl_code, GPUError* error); GPURenderPipeline gpu_create_render_pipeline(GPUContext ctx, GPUShaderModule shader, int texture_format, GPUError* error); void gpu_shader_module_release(GPUShaderModule shader); void gpu_render_pipeline_release(GPURenderPipeline pipeline); // Get texture format from surface int gpu_surface_get_preferred_format(GPUSurface surface, GPUError* error); #endif // ENABLE_GLFW // Debug Ring Buffer API (for GPU printf) typedef void* GPUDebugBuffer; // Create a debug ring buffer for GPU printf-style debugging // bufferSize: size in bytes (e.g., 64KB = 65536) GPUDebugBuffer gpu_create_debug_buffer(GPUContext ctx, size_t bufferSize, GPUError* error); // Destroy debug buffer void gpu_destroy_debug_buffer(GPUDebugBuffer debugBuffer); // Read debug buffer contents after kernel execution // Returns number of entries read uint32_t gpu_read_debug_buffer(GPUContext ctx, GPUDebugBuffer debugBuffer, uint32_t* data, size_t maxEntries, GPUError* error); // Clear debug buffer (reset write position) void gpu_clear_debug_buffer(GPUContext ctx, GPUDebugBuffer debugBuffer); #ifdef __cplusplus } #endif #endif // GPU_WRAPPER_H