#pragma once
#define CL_CALL(func, ...) { \
cl_int err = func(__VA_ARGS__); \
if (err != CL_SUCCESS) { \
std::cerr << "Error: " << utils::getErrorString(err) << std::endl; \
throw std::runtime_error("OpenCL error"); \
} \
}
#if defined __has_include
# if __has_include (<CL/cl.h>)
# include <CL/cl.h>
# endif
#endif
#if defined __has_include
# if __has_include (<CL/cl.hpp>)
# include <CL/cl.hpp>
# endif
#endif
#if defined __has_include
# if __has_include (<CL/opencl.hpp>)
# include <CL/opencl.hpp>
# endif
#endif
#include <chrono>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <numeric>
#include <sstream>
#include <variant>
#include <unordered_map>
#include <algorithm>
#include <utility>
#include <filesystem>
namespace utils {
const char* getErrorString(cl_int error)
{
switch(error){
// Run-time and JIT compiler errors based on: https://streamhpc.com/blog/2013-04-28/opencl-error-codes/
case 0: return "OpenCL Error Code 0: CL_SUCCESS: \n\tThe sweet spot.";
case -1: return "OpenCL Error Code -1: CL_DEVICE_NOT_FOUND: \n Function(s): clGetDeviceIDs \n\tif no OpenCL devices that matched device_type were found.";
case -2: return "OpenCL Error Code -2: CL_DEVICE_NOT_AVAILABLE: \n Function(s): clCreateContext \n\tif a device in devices is currently not available even though the device was returned by clGetDeviceIDs.";
case -3: return "OpenCL Error Code -3: CL_COMPILER_NOT_AVAILABLE: \n Function(s): clBuildProgram \n\tif program is created with clCreateProgramWithSource and a compiler is not available i.e. CL_DEVICE_COMPILER_AVAILABLE specified in the table of OpenCL Device Queries for clGetDeviceInfo is set to CL_FALSE.";
case -4: return "OpenCL Error Code -4: CL_MEM_OBJECT_ALLOCATION_FAILURE: \n\tif there is a failure to allocate memory for buffer object.";
case -5: return "OpenCL Error Code -5: CL_OUT_OF_RESOURCES: \n\tif there is a failure to allocate resources required by the OpenCL implementation on the device.";
case -6: return "OpenCL Error Code -6: CL_OUT_OF_HOST_MEMORY: \n\tif there is a failure to allocate resources required by the OpenCL implementation on the host.";
case -7: return "OpenCL Error Code -7: CL_PROFILING_INFO_NOT_AVAILABLE: \n Function(s): clGetEventProfilingInfo \n\tif the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue, if the execution status of the command identified by event is not CL_COMPLETE or if event is a user event object.";
case -8: return "OpenCL Error Code -8: CL_MEM_COPY_OVERLAP: \n Function(s): clEnqueueCopyBuffer, clEnqueueCopyBufferRect, clEnqueueCopyImage \n\tif src_buffer and dst_buffer are the same buffer or subbuffer object and the source and destination regions overlap or if src_buffer and dst_buffer are different sub-buffers of the same associated buffer object and they overlap. The regions overlap if src_offset ≤ to dst_offset ≤ to src_offset + size - 1, or if dst_offset ≤ to src_offset ≤ to dst_offset + size - 1.";
case -9: return "OpenCL Error Code -9: CL_IMAGE_FORMAT_MISMATCH: \n Function(s): clEnqueueCopyImage \n\tif src_image and dst_image do not use the same image format.";
case -10: return "OpenCL Error Code -10: CL_IMAGE_FORMAT_NOT_SUPPORTED: \n Function(s): clCreateImage \n\tif the image_format is not supported.";
case -11: return "OpenCL Error Code -11: CL_BUILD_PROGRAM_FAILURE: \n Function(s): clBuildProgram \n\tif there is a failure to build the program executable. This error will be returned if clBuildProgram does not return until the build has completed.";
case -12: return "OpenCL Error Code -12: CL_MAP_FAILURE: \n Function(s): clEnqueueMapBuffer, clEnqueueMapImage \n\tif there is a failure to map the requested region into the host address space. This error cannot occur for image objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR.";
case -13: return "OpenCL Error Code -13: CL_MISALIGNED_SUB_BUFFER_OFFSET: \n\tif a sub-buffer object is specified as the value for an argument that is a buffer object and the offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue.";
case -14: return "OpenCL Error Code -14: CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: \n\tif the execution status of any of the events in event_list is a negative integer value.";
case -15: return "OpenCL Error Code -15: CL_COMPILE_PROGRAM_FAILURE: \n Function(s): clCompileProgram \n\tif there is a failure to compile the program source. This error will be returned if clCompileProgram does not return until the compile has completed.";
case -16: return "OpenCL Error Code -16: CL_LINKER_NOT_AVAILABLE: \n Function(s): clLinkProgram \n\tif a linker is not available i.e. CL_DEVICE_LINKER_AVAILABLE specified in the table of allowed values for param_name for clGetDeviceInfo is set to CL_FALSE.";
case -17: return "OpenCL Error Code -17: CL_LINK_PROGRAM_FAILURE: \n Function(s): clLinkProgram \n\tif there is a failure to link the compiled binaries and/or libraries.";
case -18: return "OpenCL Error Code -18: CL_DEVICE_PARTITION_FAILED: \n Function(s): clCreateSubDevices \n\tif the partition name is supported by the implementation but in_device could not be further partitioned.";
case -19: return "OpenCL Error Code -19: CL_KERNEL_ARG_INFO_NOT_AVAILABLE: \n Function(s): clGetKernelArgInfo \n\tif the argument information is not available for kernel.";
// compile-time errors
case -30: return "OpenCL Error Code -30: CL_INVALID_VALUE: \n Function(s): clGetDeviceIDs, clCreateContext \n\tThis depends on the function: two or more coupled parameters had errors.";
case -31: return "OpenCL Error Code -31: CL_INVALID_DEVICE_TYPE: \n Function(s): clGetDeviceIDs \n\tif an invalid device_type is given";
case -32: return "OpenCL Error Code -32: CL_INVALID_PLATFORM: \n Function(s): clGetDeviceIDs \n\tif an invalid platform was given";
case -33: return "OpenCL Error Code -33: CL_INVALID_DEVICE: \n Function(s): clCreateContext, clBuildProgram \n\tif devices contains an invalid device or are not associated with the specified platform.";
case -34: return "OpenCL Error Code -34: CL_INVALID_CONTEXT: \n\tif context is not a valid context.";
case -35: return "OpenCL Error Code -35: CL_INVALID_QUEUE_PROPERTIES: \n Function(s): clCreateCommandQueue \n\tif specified command-queue-properties are valid but are not supported by the device.";
case -36: return "OpenCL Error Code -36: CL_INVALID_COMMAND_QUEUE: \n\tif command_queue is not a valid command-queue.";
case -37: return "OpenCL Error Code -37: CL_INVALID_HOST_PTR: \n Function(s): clCreateImage, clCreateBuffer \n\tThis flag is valid only if host_ptr is not NULL. If specified, it indicates that the application wants the OpenCL implementation to allocate memory for the memory object and copy the data from memory referenced by host_ptr.CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR are mutually exclusive.CL_MEM_COPY_HOST_PTR can be used with CL_MEM_ALLOC_HOST_PTR to initialize the contents of the cl_mem object allocated using host-accessible (e.g. PCIe) memory.";
case -38: return "OpenCL Error Code -38: CL_INVALID_MEM_OBJECT: \n\tif memobj is not a valid OpenCL memory object.";
case -39: return "OpenCL Error Code -39: CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: \n\tif the OpenGL/DirectX texture internal format does not map to a supported OpenCL image format.";
case -40: return "OpenCL Error Code -40: CL_INVALID_IMAGE_SIZE: \n\tif an image object is specified as an argument value and the image dimensions (image width, height, specified or compute row and/or slice pitch) are not supported by device associated with queue.";
case -41: return "OpenCL Error Code -41: CL_INVALID_SAMPLER: \n Function(s): clGetSamplerInfo, clReleaseSampler, clRetainSampler, clSetKernelArg \n\tif sampler is not a valid sampler object.";
case -42: return "OpenCL Error Code -42: CL_INVALID_BINARY: \n Function(s): clCreateProgramWithBinary, clBuildProgram \n\tThe provided binary is unfit for the selected device. if program is created with clCreateProgramWithBinary and devices listed in device_list do not have a valid program binary loaded.";
case -43: return "OpenCL Error Code -43: CL_INVALID_BUILD_OPTIONS: \n Function(s): clBuildProgram \n\tif the build options specified by options are invalid.";
case -44: return "OpenCL Error Code -44: CL_INVALID_PROGRAM: \n\tif program is a not a valid program object.";
case -45: return "OpenCL Error Code -45: CL_INVALID_PROGRAM_EXECUTABLE: \n\tif there is no successfully built program executable available for device associated with command_queue.";
case -46: return "OpenCL Error Code -46: CL_INVALID_KERNEL_NAME: \n Function(s): clCreateKernel \n\tif kernel_name is not found in program";
case -47: return "OpenCL Error Code -47: CL_INVALID_KERNEL_DEFINITION: \n Function(s): clCreateKernel \n\tif the function definition for __kernel function given by kernel_name such as the number of arguments, the argument types are not the same for all devices for which the program executable has been built.";
case -48: return "OpenCL Error Code -48: CL_INVALID_KERNEL: \n\tif kernel is not a valid kernel object.";
case -49: return "OpenCL Error Code -49: CL_INVALID_ARG_INDEX: \n Function(s): clSetKernelArg, clGetKernelArgInfo \n\tif arg_index is not a valid argument index.";
case -50: return "OpenCL Error Code -50: CL_INVALID_ARG_VALUE: \n Function(s): clSetKernelArg, clGetKernelArgInfo \n\tif arg_value specified is not a valid value.";
case -51: return "OpenCL Error Code -51: CL_INVALID_ARG_SIZE: \n Function(s): clSetKernelArg \n\tif arg_size does not match the size of the data type for an argument that is not a memory object or if the argument is a memory object and arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the __local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler).";
case -52: return "OpenCL Error Code -52: CL_INVALID_KERNEL_ARGS: \n\tif the kernel argument values have not been specified.";
case -53: return "OpenCL Error Code -53: CL_INVALID_WORK_DIMENSION: \n\tif work_dim is not a valid value (i.e. a value between 1 and 3).";
case -54: return "OpenCL Error Code -54: CL_INVALID_WORK_GROUP_SIZE: \n\tif local_work_size is specified and number of work-items specified by global_work_size is not evenly divisable by size of work-group given by local_work_size or does not match the work-group size specified for kernel using the __attribute__ ((reqd_work_group_size(X, Y, Z))) qualifier in program source.if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] *… local_work_size[work_dim - 1] is greater than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in the table of OpenCL Device Queries for clGetDeviceInfo.if local_work_size is NULL and the __attribute__ ((reqd_work_group_size(X, Y, Z))) qualifier is used to declare the work-group size for kernel in the program source.";
case -55: return "OpenCL Error Code -55: CL_INVALID_WORK_ITEM_SIZE: \n\tif the number of work-items specified in any of local_work_size[0], … local_work_size[work_dim - 1] is greater than the corresponding values specified by CL_DEVICE_MAX_WORK_ITEM_SIZES[0], …. CL_DEVICE_MAX_WORK_ITEM_SIZES[work_dim - 1].";
case -56: return "OpenCL Error Code -56: CL_INVALID_GLOBAL_OFFSET: \n\tif the value specified in global_work_size + the corresponding values in global_work_offset for any dimensions is greater than the sizeof(size_t) for the device on which the kernel execution will be enqueued.";
case -57: return "OpenCL Error Code -57: CL_INVALID_EVENT_WAIT_LIST: \n\tif event_wait_list is NULL and num_events_in_wait_list > 0, or event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in event_wait_list are not valid events.";
case -58: return "OpenCL Error Code -58: CL_INVALID_EVENT: \n\tif event objects specified in event_list are not valid event objects.";
case -59: return "OpenCL Error Code -59: CL_INVALID_OPERATION: \n\tif interoperability is specified by setting CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_ADAPTER_D3D9EX_KHR or CL_CONTEXT_ADAPTER_DXVA_KHR to a non-NULL value, and interoperability with another graphics API is also specified. (only if the cl_khr_dx9_media_sharing extension is supported).";
case -60: return "OpenCL Error Code -60: CL_INVALID_GL_OBJECT: \n\tif texture is not a GL texture object whose type matches texture_target, if the specified miplevel of texture is not defined, or if the width or height of the specified miplevel is zero.";
case -61: return "OpenCL Error Code -61: CL_INVALID_BUFFER_SIZE: \n Function(s): clCreateBuffer, clCreateSubBuffer \n\tif size is 0.Implementations may return CL_INVALID_BUFFER_SIZE if size is greater than the CL_DEVICE_MAX_MEM_ALLOC_SIZE value specified in the table of allowed values for param_name for clGetDeviceInfo for all devices in context.";
case -62: return "OpenCL Error Code -62: CL_INVALID_MIP_LEVEL: \n Function(s): OpenGL-functions \n\tif miplevel is greater than zero and the OpenGL implementation does not support creating from non-zero mipmap levels.";
case -63: return "OpenCL Error Code -63: CL_INVALID_GLOBAL_WORK_SIZE: \n\tif global_work_size is NULL, or if any of the values specified in global_work_size[0], …global_work_size [work_dim - 1] are 0 or exceed the range given by the sizeof(size_t) for the device on which the kernel execution will be enqueued.";
case -64: return "OpenCL Error Code -64: CL_INVALID_PROPERTY: \n Function(s): clCreateContext \n\tVague error, depends on the function";
case -65: return "OpenCL Error Code -65: CL_INVALID_IMAGE_DESCRIPTOR: \n Function(s): clCreateImage \n\tif values specified in image_desc are not valid or if image_desc is NULL.";
case -66: return "OpenCL Error Code -66: CL_INVALID_COMPILER_OPTIONS: \n Function(s): clCompileProgram \n\tif the compiler options specified by options are invalid.";
case -67: return "OpenCL Error Code -67: CL_INVALID_LINKER_OPTIONS: \n Function(s): clLinkProgram \n\tif the linker options specified by options are invalid.";
case -68: return "OpenCL Error Code -68: CL_INVALID_DEVICE_PARTITION_COUNT: \n Function(s): clCreateSubDevices \n\tif the partition name specified in properties is CL_DEVICE_PARTITION_BY_COUNTS and the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_SUB_DEVICES or the total number of compute units requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device, or the number of compute units requested for one or more sub-devices is less than zero or the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device.";
case -69: return "OpenCL Error Code -69: CL_INVALID_PIPE_SIZE: \n Function(s): clCreatePipe \n\tif pipe_packet_size is 0 or the pipe_packet_size exceeds CL_DEVICE_PIPE_MAX_PACKET_SIZE value for all devices in context or if pipe_max_packets is 0.";
case -70: return "OpenCL Error Code -70: CL_INVALID_DEVICE_QUEUE: \n Function(s): clSetKernelArg \n\twhen an argument is of type queue_t when it's not a valid device queue object.";
// extension errors
case -1000: return "OpenCL Error Code -1000: CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: \n Function(s): clGetGLContextInfoKHR, clCreateContext \n\tCL and GL not on the same device (only when using a GPU).";
case -1001: return "OpenCL Error Code -1001: CL_PLATFORM_NOT_FOUND_KHR: \n Function(s): clGetPlatform \n\tNo valid ICDs found.";
case -1002: return "OpenCL Error Code -1002: CL_INVALID_D3D10_DEVICE_KHR: \n Function(s): clCreateContext, clCreateContextFromType \n\tif the Direct3D 10 device specified for interoperability is not compatible with the devices against which the context is to be created.";
case -1003: return "OpenCL Error Code -1003: CL_INVALID_D3D10_RESOURCE_KHR: \n Function(s): clCreateFromD3D10BufferKHR, clCreateFromD3D10Texture2DKHR, clCreateFromD3D10Texture3DKHR \n\tIf the resource is not a Direct3D 10 buffer or texture object";
case -1004: return "OpenCL Error Code -1004: CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR: \n Function(s): clEnqueueAcquireD3D10ObjectsKHR \n\tIf a mem_object is already acquired by OpenCL";
case -1005: return "OpenCL Error Code -1005: CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR: \n Function(s): clEnqueueReleaseD3D10ObjectsKHR \n\tIf a mem_object is not acquired by OpenCL";
case -1006: return "OpenCL Error Code -1006: CL_INVALID_D3D11_DEVICE_KHR: \n Function(s): clCreateContext, clCreateContextFromType \n\tif the Direct3D 11 device specified for interoperability is not compatible with the devices against which the context is to be created.";
case -1007: return "OpenCL Error Code -1007: CL_INVALID_D3D11_RESOURCE_KHR: \n Function(s): clCreateFromD3D11BufferKHR, clCreateFromD3D11Texture2DKHR, clCreateFromD3D11Texture3DKHR \n\tIf the resource is not a Direct3D 11 buffer or texture object";
case -1008: return "OpenCL Error Code -1008: CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR: \n Function(s): clEnqueueAcquireD3D11ObjectsKHR \n\tIf a mem_object is already acquired by OpenCL";
case -1009: return "OpenCL Error Code -1009: CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR: \n Function(s): clEnqueueReleaseD3D11ObjectsKHR\n\tIf a mem_object is not acquired by OpenCL";
case -1010: return "OpenCL Error Code -1010: CL_INVALID_D3D9_DEVICE_NV CL_INVALID_DX9_DEVICE_INTEL: \n Function(s): clCreateContext, clCreateContextFromType \n\tIf the Direct3D 9 device specified for interoperability is not compatible with the devices against which the context is to be created.";
case -1011: return "OpenCL Error Code -1011: CL_INVALID_D3D9_RESOURCE_NV CL_INVALID_DX9_RESOURCE_INTEL: \n Function(s): clCreateFromD3D9VertexBufferNV, clCreateFromD3D9IndexBufferNV, clCreateFromD3D9SurfaceNV, clCreateFromD3D9TextureNV, clCreateFromD3D9CubeTextureNV, clCreateFromD3D9VolumeTextureNV \n\tIf a 'mem_object' is not a Direct3D 9 resource of the required type.";
case -1012: return "OpenCL Error Code -1012: CL_D3D9_RESOURCE_ALREADY_ACQUIRED_NV CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL: \n Function(s): clEnqueueAcquireD3D9ObjectsNV \n\tIf any of the 'mem_objects' is currently already acquired by OpenCL.";
case -1013: return "OpenCL Error Code -1013: CL_D3D9_RESOURCE_NOT_ACQUIRED_NV CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL: \n Function(s): clEnqueueReleaseD3D9ObjectsNV \n\tIf any of the 'mem_objects' is currently not acquired by OpenCL.";
case -1092: return "OpenCL Error Code -1092: CL_EGL_RESOURCE_NOT_ACQUIRED_KHR: \n Function(s): clEnqueueReleaseEGLObjectsKHR \n\tIf a 'mem_object' is not acquired by OpenCL.";
case -1093: return "OpenCL Error Code -1093: CL_INVALID_EGL_OBJECT_KHR: \n Function(s): clCreateFromEGLImageKHR, clEnqueueAcquireEGLObjectsKHR \n\tIf a 'mem_object' is not a EGL resource of the required type.";
case -1094: return "OpenCL Error Code -1094: CL_INVALID_ACCELERATOR_INTEL: \n Function(s): clSetKernelArg \n\twhen 'arg_value' is not a valid accelerator object, and by clRetainAccelerator, clReleaseAccelerator, and clGetAcceleratorInfo when 'accelerator' is not a valid accelerator object.";
case -1095: return "OpenCL Error Code -1095: CL_INVALID_ACCELERATOR_TYPE_INTEL: \n Function(s): clSetKernelArg, clCreateAccelerator \n\twhen 'arg_value' is not an accelerator object of the correct type, or when 'accelerator_type' is not a valid accelerator type.";
case -1096: return "OpenCL Error Code -1096: CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL: \n Function(s): clCreateAccelerator \n\twhen values described by 'descriptor' are not valid, or if a combination of values is not valid.";
case -1097: return "OpenCL Error Code -1097: CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL: \n Function(s): clCreateAccelerator \n\twhen 'accelerator_type’ is a valid accelerator type, but it not supported by any device in 'context'.";
case -1098: return "OpenCL Error Code -1098: CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL: \n Function(s): clCreateContext, clCreateContextFromType \n\tIf the VA API display specified for interoperability is not compatible with the devices against which the context is to be created.";
case -1099: return "OpenCL Error Code -1099: CL_INVALID_VA_API_MEDIA_SURFACE_INTEL: \n Function(s): clEnqueueReleaseVA_APIMediaSurfacesINTEL \n\tIf 'surface' is not a VA API surface of the required type, by clGetMemObjectInfo when 'param_name' is CL_MEM_VA_API_MEDIA_SURFACE_INTEL when was not created from a VA API surface, and from clGetImageInfo when 'param_name' is CL_IMAGE_VA_API_PLANE_INTEL and 'image' was not created from a VA API surface.";
case -1100: return "OpenCL Error Code -1100: CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL: \n Function(s): clEnqueueReleaseVA_APIMediaSurfacesINTEL \n\tIf any of the 'mem_objects' is already acquired by OpenCL.";
case -1101: return "OpenCL Error Code -1101: CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL: \n Function(s): clEnqueueReleaseVA_APIMediaSurfacesINTEL \n\tIf any of the 'mem_objects' are not currently acquired by OpenCL.";
// errors thrown by Vendors
case -9999: return "OpenCL Error Code -9999: NVidia: \n Function(s): clEnqueueNDRangeKernel illegal read or write to a buffer.";
default: return "Unknown OpenCL error";
}
}
bool is_unified_memory(const cl::Device& device) {
cl_device_svm_capabilities svm_flags = 0;
device.getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_flags);
return svm_flags & CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
}
template <typename T>
cl::Buffer make_buffer(const cl::Context& context, cl_mem_flags flags, size_t n_elems, T* host_ptr = (T*)nullptr) {
//Determine if the device has unified memory
if(n_elems == 0) return cl::Buffer();
cl_int err;
cl::Buffer buffer = cl::Buffer(context, flags, n_elems*sizeof(T), host_ptr, &err);
if (err != CL_SUCCESS) {
throw std::runtime_error("Error creating buffer with n_elems: " + std::to_string(n_elems) + " and flags: " + std::to_string(flags) +
"\n" + getErrorString(err));
}
return buffer;
}
auto make_program( const std::string& path,
const cl::Context& context,
const std::string& options =
{"-cl-mad-enable "
"-cl-no-signed-zeros "
"-cl-fast-relaxed-math "
"-cl-unsafe-math-optimizations "
"-cl-finite-math-only "
"-cl-denorms-are-zero "
"-cl-single-precision-constant"}) {
//Check that the path fucking exists
if (!std::filesystem::exists(path)) {
throw std::runtime_error("File not found: " + path);
}
std::ifstream helloWorldFile(path);
std::stringstream buffer;
buffer << helloWorldFile.rdbuf();
std::string source = buffer.str();
cl::Program program(context, source);
auto devices = context.getInfo<CL_CONTEXT_DEVICES>();
auto err = program.build(devices , options.c_str());
if (err != CL_SUCCESS) {
std::cerr
<< getErrorString(err) << std::endl
<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices.front()) << std::endl;
throw std::runtime_error("OpenCL compilation error");
}
return program;
}
std::vector<cl::Device> get_devices(cl_device_type type) {
if( type != CL_DEVICE_TYPE_GPU &&
type != CL_DEVICE_TYPE_CPU &&
type != CL_DEVICE_TYPE_DEFAULT &&
type != CL_DEVICE_TYPE_ACCELERATOR &&
type != CL_DEVICE_TYPE_CUSTOM) {
throw std::runtime_error("Invalid device type");
}
std::vector<cl::Platform> platforms;
auto err = cl::Platform::get(&platforms);
if (platforms.empty() || err != CL_SUCCESS) {
throw std::runtime_error("OpenCL platforms not found");
}
// Get the first available GPU
std::vector<cl::Device> devices;
for (std::decay_t<decltype(platforms.size())> i = 0; i < platforms.size(); i++) {
cl::Platform p = platforms[i];
std::vector<cl::Device> p_devices;
try {
err = p.getDevices(type, &p_devices);
if (!p_devices.empty()) {
for (auto& device : p_devices) {
devices.push_back(device);
}
}
} catch (...) {
devices.clear();
}
}
return devices;
}
template <typename... Args>
void setArgs(cl::Kernel& kernel, Args&&... args) {
int i = 0;
auto arg_setter = [&](auto& arg) {
auto err = kernel.setArg(i++, arg);
if (err != CL_SUCCESS) {
std::cerr << "Error setting kernel argument " << i << ": " << utils::getErrorString(err) << std::endl;
throw std::runtime_error("OpenCL error");
}
};
(arg_setter(args), ...); //C++17 fold expression;
}
template <typename... Args>
void enqueueReads(cl::CommandQueue& queue, Args&&... args) {
auto device = queue.getInfo<CL_QUEUE_DEVICE>();
auto read_buffer = [&](auto& arg_pair) {
cl::Buffer& arg = arg_pair.first;
auto& output_ptr = arg_pair.second;
CL_CALL(queue.enqueueReadBuffer, arg, CL_TRUE, 0, arg.getInfo<CL_MEM_SIZE>(), output_ptr);
};
(read_buffer(args), ...); //C++17 fold expression;
}
template <typename... Args>
void enqueueWrites(cl::CommandQueue& queue, Args&&... args) {
auto device = queue.getInfo<CL_QUEUE_DEVICE>();
auto write_buffer = [&](auto& arg_pair) {
cl::Buffer& arg = arg_pair.first;
auto& input_ptr = arg_pair.second;
CL_CALL(queue.enqueueWriteBuffer, arg, CL_TRUE, 0, arg.getInfo<CL_MEM_SIZE>(), input_ptr);
};
(write_buffer(args), ...); //C++17 fold expression;
}
int next_multiple(int n, int factor) {
return (n + factor - 1) / factor * factor;
}
}