Skip to content

Commit

Permalink
Merge pull request #6 from lukkio88/SwitchingToCPPWrapper
Browse files Browse the repository at this point in the history
Switching to cpp wrapper
  • Loading branch information
lukkio88 authored Mar 3, 2024
2 parents bcaabd7 + 551fa1a commit a3a10a6
Show file tree
Hide file tree
Showing 19 changed files with 516 additions and 131 deletions.
22 changes: 15 additions & 7 deletions BasicSetup/BasicTemplate/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(BasicTemplate)

set(CMAKE_BUILD_TYPE Debug)
set(CMAKE_CXX_STANDARD 11)
if(UNIX)
set(OpenCL_INCLUDE_DIR /usr/local/cuda-10.0/include/)
set(OpenCL_LIBRARY_DIR /usr/local/cuda-10.0/lib64/libOpenCL.so)
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_STANDARD 17)
set(GitProjectDir ${CMAKE_SOURCE_DIR}/../../)
set(OpenCL_external ${GitProjectDir}OpenCL-SDK/external/)
set(OpenCL_INCLUDE_DIR ${OpenCL_external}OpenCL-CLHPP/include)

if(WIN32)
set(OpenCL_OtherInclude ${OpenCL_external}OpenCL-Headers/)
endif()

add_library(opencl SHARED IMPORTED)
message(STATUS ${OpenCL_INCLUDE_DIR})
message(STATUS ${OpenCL_OtherInclude})

set(OpenCL_LIBRARY_DIR ${GitProjectDir}OpenCL-SDK/install/lib/OpenCL.lib)

add_library(opencl STATIC IMPORTED)
set_target_properties(opencl PROPERTIES IMPORTED_LOCATION ${OpenCL_LIBRARY_DIR})

set(SRC_FILES main.cpp)
add_executable(basic-template ${SRC_FILES})
target_include_directories(basic-template PUBLIC ${OpenCL_INCLUDE_DIR})
target_include_directories(basic-template PUBLIC ${OpenCL_INCLUDE_DIR} ${OpenCL_OtherInclude})
target_link_libraries(basic-template opencl)
198 changes: 74 additions & 124 deletions BasicSetup/BasicTemplate/main.cpp
Original file line number Diff line number Diff line change
@@ -1,151 +1,101 @@
#include <CL/cl.h>
#include <cstdio>
#include <cstring>

/*
* Simplest openCL program that execute a simple kernel for vector addition using GPU
* This example is mostly written in C style, there's no error checking because the focus
* is on the basic structure of a OpenCL program.
*/

cl_int basicProgram(
double * inputA,
double * inputB,
double * output,
int vectorSize, //this is common for A,B and output
const char * platformName = "NVIDIA CUDA",
cl_device_type deviceType = CL_DEVICE_TYPE_GPU,
const char * deviceName = "GeForce GTX 1070")
{
//Selecting the platform and device
cl_uint numPlatforms;
cl_int error = clGetPlatformIDs(0,nullptr,&numPlatforms);
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 300

cl_platform_id *platform = new cl_platform_id[numPlatforms];
error = clGetPlatformIDs(numPlatforms,platform,nullptr);
#include <CL/opencl.hpp>
#include <iostream>

int platformIdx;
for(platformIdx = 0; platformIdx < numPlatforms; ++platformIdx)
void printVector(std::vector<float>& v)
{
for (int i = 0; i < v.size(); ++i)
{
char * currentPlatformName;
size_t size;
error = clGetPlatformInfo(platform[platformIdx],CL_PLATFORM_NAME,0,nullptr,&size);
currentPlatformName = new char[size];
error = clGetPlatformInfo(platform[platformIdx],CL_PLATFORM_NAME,size,currentPlatformName,nullptr);
std::cout << v[i] << " ";
}
std::cout << std::endl;
}

if(strcmp(currentPlatformName,platformName) == 0)
void printPlatformsAndDevices(
const std::string& platformName = "NVIDIA CUDA",
const cl_device_type& deviceType = CL_DEVICE_TYPE_GPU,
const std::string& deviceName = "GeForce GT 730")
{
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
cl::Platform platform;

for (auto& p : platforms) {
const std::string& currentPlatformName = p.getInfo<CL_PLATFORM_NAME>();
if (currentPlatformName == platformName)
{
delete [] currentPlatformName;
break;
std::cout << p.getInfo<CL_PLATFORM_NAME>() << std::endl;
platform = p;
}
delete [] currentPlatformName;

}

if(platformIdx == numPlatforms)
std::vector<cl::Device> devices;
platform.getDevices(deviceType, &devices);
cl::Device device;
for (auto& d : devices)
{
printf("Could not find platform %s\n",platformName);
}
else
{
printf("Platform %s found\n",platformName);
}

cl_platform_id platformId = platform[platformIdx];
delete[] platform;

cl_device_id * device;
cl_uint numDevices;
error = clGetDeviceIDs(platformId,deviceType,0,nullptr,&numDevices);
device = new cl_device_id[numDevices];
error = clGetDeviceIDs(platformId,deviceType,numDevices,device,nullptr);

int deviceIdx;
for(deviceIdx = 0; deviceIdx < numDevices; ++deviceIdx)
{
char * currentDeviceName;
size_t size;
error = clGetDeviceInfo(device[deviceIdx],CL_DEVICE_NAME,0,nullptr,&size);
currentDeviceName = new char[size];
error = clGetDeviceInfo(device[deviceIdx],CL_DEVICE_NAME,size,currentDeviceName,nullptr);
if(strcmp(currentDeviceName,deviceName) == 0)
const std::string& currentDeviceName = d.getInfo<CL_DEVICE_NAME>();
if (deviceName == currentDeviceName)
{
delete [] currentDeviceName;
break;
std::cout << d.getInfo<CL_DEVICE_NAME>() << std::endl;
device = d;
}
delete [] currentDeviceName;
}

if(deviceIdx == numDevices)
{
printf("Cannot find device %s\n",deviceName);
}
else
const cl::string& kernelSource =
"kernel void vadd(\n"
" global float *a,\n"
" global float *b,\n"
" global float *c\n"
"){\n"
" int i = get_global_id(0);\n"
" c[i] = a[i] + b[i];\n"
"}";

cl::Context context(device);
cl::Device d = context.getInfo<CL_CONTEXT_DEVICES>()[0];
std::cout << d.getInfo<CL_DEVICE_NAME>() << std::endl;
cl::Program program(context, kernelSource);
auto buildResult = program.build(device);

if (buildResult != CL_SUCCESS)
{
printf("Device %s found\n",deviceName);
std::cout << "Cannot build program" << std::endl;
exit(1);
}

cl_device_id deviceId = device[deviceIdx];
delete []device;

//Creating context and command queue
cl_context context = clCreateContext(nullptr,1,&deviceId,nullptr,nullptr,&error);
cl_command_queue commandQueue = clCreateCommandQueue(context,deviceId,0,&error);

//Create program, build and create kernel
const char * programSource =
"__kernel void vecAdd(\n"
" __global double * inputA,\n"
" __global double * inputB,\n"
" __global double * outputC) {\n"
" size_t idx = get_global_id(0);\n"
" outputC[idx] = inputA[idx] + inputB[idx];\n"
"}";
const size_t sourceLength = strlen(programSource);
cl_program program = clCreateProgramWithSource(context,1,&programSource,&sourceLength,&error);
error = clBuildProgram(program,1,&deviceId,nullptr,nullptr,nullptr);
cl_kernel kernel = clCreateKernel(program,"vecAdd",&error);

//Create input and output clBuffers
cl_mem mem_A = clCreateBuffer(context,CL_MEM_READ_ONLY,vectorSize*sizeof(double),inputA,&error);
cl_mem mem_B = clCreateBuffer(context,CL_MEM_READ_ONLY,vectorSize*sizeof(double),inputB,&error);
cl_mem mem_C = clCreateBuffer(context,CL_MEM_READ_WRITE,vectorSize*sizeof(double),output,&error);
clEnqueueWriteBuffer(commandQueue,mem_A,CL_TRUE,0,vectorSize*sizeof(double),inputA,0,nullptr,nullptr);
clEnqueueWriteBuffer(commandQueue,mem_B,CL_TRUE,0,vectorSize*sizeof(double),inputB,0,nullptr,nullptr);
int DIM = 4;
cl::Buffer aBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * DIM);
cl::Buffer bBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * DIM);
cl::Buffer cBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * DIM);

clFinish(commandQueue);
std::vector<float> a{1.0,2.0,3.0,4.0};
std::vector<float> b{ 4.0,3.0,2.0,1.0 };
std::vector<float> c(DIM);

//Setting kernel argument and NDRange
clSetKernelArg(kernel,0,sizeof(cl_mem),&mem_A);
clSetKernelArg(kernel,1,sizeof(cl_mem),&mem_B);
clSetKernelArg(kernel,2,sizeof(cl_mem),&mem_C);
cl::Kernel kernel(program, "vadd");
kernel.setArg(0, aBuffer);
kernel.setArg(1, bBuffer);
kernel.setArg(2, cBuffer);

const size_t globalWorkSize = vectorSize;
clEnqueueNDRangeKernel(commandQueue,kernel,1,nullptr,&globalWorkSize,nullptr,0,nullptr,nullptr);
cl::CommandQueue queue(context, device, cl::QueueProperties::Profiling);
queue.enqueueWriteBuffer(aBuffer, CL_TRUE, 0, sizeof(float) * DIM, a.data());
queue.enqueueWriteBuffer(bBuffer, CL_TRUE, 0, sizeof(float) * DIM, b.data());

clEnqueueReadBuffer(commandQueue,mem_C,CL_TRUE,0,vectorSize*sizeof(double),output,0,nullptr,nullptr);
queue.enqueueNDRangeKernel(kernel,cl::NullRange,cl::NDRange(DIM));
queue.finish();
queue.flush();

clFinish(commandQueue);

//Teardown
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
queue.enqueueReadBuffer(cBuffer, CL_TRUE, 0, sizeof(float) * DIM, c.data());

printVector(a);
printVector(b);
printVector(c);
}

int main(int argc, char** argv)
{

double arrayA[] = {1.0,2.0,3.0,4.0,5.0};
double arrayB[] = {5.0,4.0,3.0,2.0,1.0};
double arrayC[] = {0.0,0.0,0.0,0.0,0.0};
int numberOfElements = 5;

basicProgram(arrayA,arrayB,arrayC,numberOfElements);

for(int i = 0; i < numberOfElements; ++i)
{
printf("%f + %f = %f\n",arrayA[i],arrayB[i],arrayC[i]);
}
return 0;
printPlatformsAndDevices();
}
38 changes: 38 additions & 0 deletions OpenCL-CLHPP-Examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(opencl-cpp-examples)
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_STANDARD 17)
add_compile_definitions(CL_KERNEL_PATH="${CMAKE_SOURCE_DIR}/cl-src")
set(GitProjectDir ${CMAKE_SOURCE_DIR}/../)
set(OpenCL_external ${GitProjectDir}OpenCL-SDK/external/)
set(OpenCL_INCLUDE_DIR ${OpenCL_external}OpenCL-CLHPP/include)

if(WIN32)
set(OpenCL_OtherInclude ${OpenCL_external}OpenCL-Headers/)
endif()

set(EXAMPLES-OPENCL-CPP
utils
cl-src
print-platforms-and-devices
vector-add
vec2vec
vec2scalar)

set(OpenCL_LIBRARY_DIR ${GitProjectDir}OpenCL-SDK/install/lib/OpenCL.lib)

if(NOT TARGET opencl)
add_library(opencl STATIC IMPORTED)
set_target_properties(opencl PROPERTIES IMPORTED_LOCATION ${OpenCL_LIBRARY_DIR})
endif()

function(BUILD_TARGET project-name)
set(SRC_FILES main.cpp)
add_executable(${project-name} ${SRC_FILES})
target_include_directories(${project-name} PUBLIC ${OpenCL_INCLUDE_DIR} ${OpenCL_OtherInclude} ${CMAKE_SOURCE_DIR}/utils)
target_link_libraries(${project-name} opencl utils)
endfunction()

foreach(EXAMPLE ${EXAMPLES-OPENCL-CPP})
add_subdirectory(./${EXAMPLE})
endforeach()
6 changes: 6 additions & 0 deletions OpenCL-CLHPP-Examples/cl-src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
set(project-name cl-src)
project(${project-name})

set(SRC vadd.cl v2scalar.cl v2v.cl)
add_custom_target(${project-name} SOURCES ${SRC})
44 changes: 44 additions & 0 deletions OpenCL-CLHPP-Examples/cl-src/v2scalar.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
kernel void calculateSum(global float* v, global float* tmp)
{
int currentSize = get_local_size(0) / 2;
int i = get_global_id(0);
tmp[i] = v[i];
barrier(CLK_GLOBAL_MEM_FENCE);

while (currentSize > 0)
{
tmp[i] = tmp[i] + tmp[i + currentSize];
barrier(CLK_GLOBAL_MEM_FENCE);
currentSize /= 2;
}
}

kernel void maxArray(global float* v, global float* tmp)
{
int currentSize = get_local_size(0) / 2;
int i = get_global_id(0);
tmp[i] = v[i];
barrier(CLK_GLOBAL_MEM_FENCE);

while (currentSize > 0)
{
tmp[i] = max(tmp[i],tmp[i + currentSize]);
barrier(CLK_GLOBAL_MEM_FENCE);
currentSize /= 2;
}
}

kernel void minArray(global float* v, global float* tmp)
{
int currentSize = get_local_size(0) / 2;
int i = get_global_id(0);
tmp[i] = v[i];
barrier(CLK_GLOBAL_MEM_FENCE);

while (currentSize > 0)
{
tmp[i] = min(tmp[i], tmp[i + currentSize]);
barrier(CLK_GLOBAL_MEM_FENCE);
currentSize /= 2;
}
}
5 changes: 5 additions & 0 deletions OpenCL-CLHPP-Examples/cl-src/v2v.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
kernel void saxpy(global float* s, global float* x, global float* y, global float* z)
{
int i = get_global_id(0);
z[i] = (*s) * x[i] + y[i];
}
5 changes: 5 additions & 0 deletions OpenCL-CLHPP-Examples/cl-src/vadd.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
kernel void vadd(global float *a, global float *b, global float *c)
{
int i = get_global_id(0);
c[i] = a[i] + b[i];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
set(project-name print-platforms-and-devices)
project(${project-name})
BUILD_TARGET(${project-name})
Loading

0 comments on commit a3a10a6

Please sign in to comment.