From e6928884afba30bf6ee3f6fb3d34fdde7a1c5975 Mon Sep 17 00:00:00 2001 From: Ilya Znamenskiy Date: Fri, 1 Oct 2021 14:15:25 +0300 Subject: [PATCH 1/2] [GPU] Num threads per eu update --- .../thirdparty/clDNN/api/cldnn/runtime/device_info.hpp | 2 -- .../thirdparty/clDNN/runtime/ocl/ocl_device.cpp | 9 +++------ .../thirdparty/clDNN/src/kernel_selector_helper.cpp | 4 ++-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp index d72f68900ba8cb..d0c90088305db6 100644 --- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp @@ -30,8 +30,6 @@ struct gfx_version { struct device_info { uint32_t execution_units_count; ///< Number of available execution units. uint32_t gpu_frequency; ///< Clock frequency in MHz. - uint32_t max_threads_per_execution_unit; ///< Number of available HW threads on EU. - uint32_t max_threads_per_device; ///< Maximum number of HW threads on device. uint64_t max_work_group_size; ///< Maximum number of work-items in a work-group executing a kernel using the data parallel execution model. uint64_t max_local_mem_size; ///< Maximum size of local memory arena in bytes. diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp index 7d294c2a8ea586..554990b9585640 100644 --- a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp @@ -223,12 +223,6 @@ device_info init_device_info(const cl::Device& device) { info.supports_subgroups_short = extensions.find("cl_intel_subgroups_short") != std::string::npos; info.supports_subgroups_char = extensions.find("cl_intel_subgroups_char") != std::string::npos; - info.supports_imad = get_imad_support(device); - info.supports_immad = false; - - info.max_threads_per_execution_unit = 7; - info.max_threads_per_device = static_cast(info.execution_units_count * info.max_threads_per_execution_unit); - info.supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos; info.supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos && @@ -256,6 +250,9 @@ device_info init_device_info(const cl::Device& device) { info.num_sub_slices_per_slice = 0; info.num_eus_per_sub_slice = 0; info.num_threads_per_eu = 0; + + info.supports_imad = get_imad_support(device); + info.supports_immad = false; } return info; diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp index de0e86a12787ee..b222abb0b67b62 100644 --- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp +++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp @@ -832,8 +832,8 @@ void set_params(const program_node& node, kernel_selector::params& params) { params.engineInfo.maxImage2dWidth = device_info.max_image2d_width; params.engineInfo.maxImage2dHeight = device_info.max_image2d_height; params.engineInfo.computeUnitsCount = device_info.execution_units_count; - params.engineInfo.maxThreadsPerExecutionUnit = device_info.max_threads_per_execution_unit; - params.engineInfo.maxThreadsPerDevice = device_info.max_threads_per_device; + params.engineInfo.maxThreadsPerExecutionUnit = device_info.num_threads_per_eu > 0 ? device_info.num_threads_per_eu : 7; + params.engineInfo.maxThreadsPerDevice = params.engineInfo.maxThreadsPerDevice * device_info.execution_units_count; params.engineInfo.deviceCache = program.get_tuning_cache(); params.engineInfo.driverVersion = device_info.driver_version; From 7dbb4eab169a04cb0ea196f674090cf6c73a9f89 Mon Sep 17 00:00:00 2001 From: Ilya Znamenskiy Date: Tue, 5 Oct 2021 06:15:59 +0300 Subject: [PATCH 2/2] [GPU] Comments fixes --- .../thirdparty/clDNN/runtime/ocl/ocl_device.cpp | 6 +++--- .../thirdparty/clDNN/src/kernel_selector_helper.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp index 554990b9585640..5a5a36919e03a5 100644 --- a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp @@ -223,6 +223,9 @@ device_info init_device_info(const cl::Device& device) { info.supports_subgroups_short = extensions.find("cl_intel_subgroups_short") != std::string::npos; info.supports_subgroups_char = extensions.find("cl_intel_subgroups_char") != std::string::npos; + info.supports_imad = get_imad_support(device); + info.supports_immad = false; + info.supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos; info.supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos && @@ -250,9 +253,6 @@ device_info init_device_info(const cl::Device& device) { info.num_sub_slices_per_slice = 0; info.num_eus_per_sub_slice = 0; info.num_threads_per_eu = 0; - - info.supports_imad = get_imad_support(device); - info.supports_immad = false; } return info; diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp index b222abb0b67b62..4c7cd17da9c470 100644 --- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp +++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp @@ -833,7 +833,7 @@ void set_params(const program_node& node, kernel_selector::params& params) { params.engineInfo.maxImage2dHeight = device_info.max_image2d_height; params.engineInfo.computeUnitsCount = device_info.execution_units_count; params.engineInfo.maxThreadsPerExecutionUnit = device_info.num_threads_per_eu > 0 ? device_info.num_threads_per_eu : 7; - params.engineInfo.maxThreadsPerDevice = params.engineInfo.maxThreadsPerDevice * device_info.execution_units_count; + params.engineInfo.maxThreadsPerDevice = params.engineInfo.maxThreadsPerExecutionUnit * device_info.execution_units_count; params.engineInfo.deviceCache = program.get_tuning_cache(); params.engineInfo.driverVersion = device_info.driver_version;