From 83ce42c59e27f53ec73290642292801d1163261c Mon Sep 17 00:00:00 2001 From: marty1885 Date: Sat, 23 Jun 2018 13:38:51 +0800 Subject: [PATCH 1/2] querying OpenCL device infomation instead of guessing --- src/codegen/build_module.cc | 17 +++++++++--- src/runtime/opencl/opencl_device_api.cc | 35 ++++++++++++++++++++----- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index 318e231fc67c..da58ac178500 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -9,6 +9,8 @@ #include #include +#include "../runtime/opencl/opencl_common.h" + #include #include #include @@ -70,15 +72,21 @@ Target CreateTarget(const std::string& target_name, // For now assume rocm schedule for opencl if (target_name == "opencl") { t->device_type = kDLOpenCL; + auto workspace = runtime::cl::OpenCLWorkspace::Global(); + cl::OpenCLThreadEntry* clt = cl::OpenCLThreadEntry::ThreadLocal(); + TVMContext context = clt->context; + TVMRetValue rv; + + workspace->GetAttr(context, kMaxThreadsPerBlock, &rv); + t->max_num_threads = rv.operator int(); + + workspace->GetAttr(context, kWarpSize, &rv); + t->thread_warp_size = rv.operator int(); } else { t->device_type = kDLROCM; } t->keys_array.push_back(ir::StringImm::make("rocm")); t->keys_array.push_back(ir::StringImm::make("gpu")); - t->max_num_threads = 256; - if (t->device_name == "intel_gpu") { - t->thread_warp_size = 16; - } } else if (target_name == "metal" || target_name == "vulkan") { if (target_name == "metal") { t->device_type = kDLMetal; @@ -252,6 +260,7 @@ Target rocm(const std::vector& options) { } Target opencl(const std::vector& options) { + std::cout << "Creating opencl target" << std::endl; return CreateTarget("opencl", options); } diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index 759f074b2fc1..a1cefd5f2744 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -6,6 +6,8 @@ #include #include "./opencl_common.h" +#include + namespace tvm { namespace runtime { namespace cl { @@ -39,12 +41,33 @@ void OpenCLWorkspace::GetAttr( break; } case kWarpSize: { - /* TODO: the warp size of OpenCL device is not always 1 - e.g. Intel GPU has a sub group concept which contains 8 - 32 work items, - corresponding to the number of SIMD entries the heardware configures. - We need to figure out a way to query this information from the hardware. - */ - *rv = 1; + static const std::string dummy_kernel("__kernel void tvm_dummy_kernel(__global int* a)" + "{a[get_global_id(0)] = 0;}"); + cl_kernel kernel{nullptr}; + cl_program program{nullptr}; + size_t prefered_mul = 0; + size_t kernel_src_size = dummy_kernel.size(); + const char* src = dummy_kernel.c_str(); + cl_int err; + + program = clCreateProgramWithSource(context, 1, &src, &kernel_src_size, &err); + OPENCL_CALL(err); + OPENCL_CALL(clBuildProgram( + program, 1, + &devices[index], "", + nullptr, nullptr)); + + kernel = clCreateKernel(program, "tvm_dummy_kernel", &err); + OPENCL_CALL(err); + OPENCL_CALL(clGetKernelWorkGroupInfo( + kernel, devices[index], + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, + sizeof(prefered_mul), &prefered_mul, nullptr)); + + OPENCL_CALL(clReleaseKernel(kernel)); + OPENCL_CALL(clReleaseProgram(program)); + + *rv = (int)prefered_mul; break; } case kMaxSharedMemoryPerBlock: { From b833165dc4470657da2bb53a953d842655013848 Mon Sep 17 00:00:00 2001 From: marty1885 Date: Sat, 23 Jun 2018 13:53:10 +0800 Subject: [PATCH 2/2] remove debug print --- src/codegen/build_module.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index da58ac178500..eba23237f8f4 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -260,7 +260,6 @@ Target rocm(const std::vector& options) { } Target opencl(const std::vector& options) { - std::cout << "Creating opencl target" << std::endl; return CreateTarget("opencl", options); }