Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <CL/cl.h>
- #include <fstream>
- #include <vector>
- #include <cstring>
- #include <iostream>
- #ifdef _WIN32
- #include <intrin.h>
- #else
- #include <cpuid.h>
- #endif
- bool supportsBMI2() {
- unsigned int eax, ebx, ecx, edx;
- #ifdef _WIN32
- int cpuInfo[4];
- __cpuid(cpuInfo, 7);
- ebx = cpuInfo[1];
- #else
- __cpuid_count(7, 0, eax, ebx, ecx, edx);
- #endif
- return (ebx & (1 << 8)) != 0; // BMI2 is bit 8 of EBX for CPUID leaf 7
- }
- void checkError(cl_int err, const char* operation) {
- if (err != CL_SUCCESS) {
- std::cerr << "Error during operation '" << operation << "': " << err << std::endl;
- exit(1);
- }
- }
- const char* kernelSource = R"(
- __kernel void copyData(__global const char* input, __global char* output) {
- int id = get_global_id(0);
- output[id] = input[id];
- }
- )";
- int main() {
- if (!supportsBMI2()) {
- std::cerr << "BMI2 is not supported by your CPU." << std::endl;
- return 1;
- } else {
- std::cout << "BMI2 is supported!" << std::endl;
- }
- // Load data from disk
- std::ifstream file("data.bin", std::ios::binary);
- if (!file) {
- std::cerr << "Failed to open file 'data.bin'" << std::endl;
- return 1;
- }
- std::vector<char> data(1024 * 1024); // 1 MB buffer
- file.read(data.data(), data.size());
- file.close();
- // Initialize OpenCL
- cl_platform_id platform;
- cl_device_id device;
- cl_context context;
- cl_program program;
- cl_kernel kernel;
- cl_command_queue queue;
- cl_int err;
- cl_uint num_platforms;
- err = clGetPlatformIDs(1, &platform, &num_platforms);
- checkError(err, "clGetPlatformIDs");
- err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
- checkError(err, "clGetDeviceIDs");
- context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
- checkError(err, "clCreateContext");
- queue = clCreateCommandQueueWithProperties(context, device, 0, &err);
- checkError(err, "clCreateCommandQueueWithProperties");
- const size_t lengths[] = {strlen(kernelSource)};
- const char* sources[] = {kernelSource};
- program = clCreateProgramWithSource(context, 1, sources, lengths, &err);
- checkError(err, "clCreateProgramWithSource");
- err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
- if (err != CL_SUCCESS) {
- size_t log_size;
- clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
- std::vector<char> log(log_size);
- clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, log.data(), NULL);
- std::cerr << "Build log:\n" << log.data() << std::endl;
- checkError(err, "clBuildProgram");
- }
- kernel = clCreateKernel(program, "copyData", &err);
- checkError(err, "clCreateKernel");
- // Create OpenCL buffers
- cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, data.size(), data.data(), &err);
- checkError(err, "clCreateBuffer(input)");
- cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, data.size(), NULL, &err);
- checkError(err, "clCreateBuffer(output)");
- // Set kernel arguments and execute
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
- checkError(err, "clSetKernelArg(input)");
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);
- checkError(err, "clSetKernelArg(output)");
- const size_t globalSize = data.size();
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, NULL, 0, NULL, NULL);
- checkError(err, "clEnqueueNDRangeKernel");
- err = clFinish(queue);
- checkError(err, "clFinish");
- std::vector<char> output(data.size());
- err = clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, output.size(), output.data(), 0, NULL, NULL);
- checkError(err, "clEnqueueReadBuffer");
- std::cout << "Data copied to VRAM and back to RAM successfully!" << std::endl;
- clReleaseMemObject(inputBuffer);
- clReleaseMemObject(outputBuffer);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- clReleaseCommandQueue(queue);
- clReleaseContext(context);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement