Skip to content

Commit

Permalink
[GPU] Num threads per eu update (#7823)
Browse files Browse the repository at this point in the history
  • Loading branch information
lznamens authored Oct 6, 2021
1 parent b5499f6 commit 17dc82a
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ struct gfx_version {
struct device_info {
uint32_t execution_units_count; ///< Number of available execution units.
uint32_t gpu_frequency; ///< Clock frequency in MHz.
uint32_t max_threads_per_execution_unit; ///< Number of available HW threads on EU.
uint32_t max_threads_per_device; ///< Maximum number of HW threads on device.

uint64_t max_work_group_size; ///< Maximum number of work-items in a work-group executing a kernel using the data parallel execution model.
uint64_t max_local_mem_size; ///< Maximum size of local memory arena in bytes.
Expand Down
3 changes: 0 additions & 3 deletions inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,6 @@ device_info init_device_info(const cl::Device& device) {
info.supports_imad = get_imad_support(device);
info.supports_immad = false;

info.max_threads_per_execution_unit = 7;
info.max_threads_per_device = static_cast<uint32_t>(info.execution_units_count * info.max_threads_per_execution_unit);

info.supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos;

info.supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -832,8 +832,8 @@ void set_params(const program_node& node, kernel_selector::params& params) {
params.engineInfo.maxImage2dWidth = device_info.max_image2d_width;
params.engineInfo.maxImage2dHeight = device_info.max_image2d_height;
params.engineInfo.computeUnitsCount = device_info.execution_units_count;
params.engineInfo.maxThreadsPerExecutionUnit = device_info.max_threads_per_execution_unit;
params.engineInfo.maxThreadsPerDevice = device_info.max_threads_per_device;
params.engineInfo.maxThreadsPerExecutionUnit = device_info.num_threads_per_eu > 0 ? device_info.num_threads_per_eu : 7;
params.engineInfo.maxThreadsPerDevice = params.engineInfo.maxThreadsPerExecutionUnit * device_info.execution_units_count;
params.engineInfo.deviceCache = program.get_tuning_cache();
params.engineInfo.driverVersion = device_info.driver_version;

Expand Down

0 comments on commit 17dc82a

Please sign in to comment.