From fd32d5476dd8e4a237e3d272c8de686c65c17bf8 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Tue, 8 Nov 2022 18:36:43 +0900 Subject: [PATCH] code refactoring based on code review --- .../src/graph/kernel_selector_helper.cpp | 87 ++++++++++++++++++- .../intel_gpu/src/graph/primitive_inst.cpp | 16 +--- src/plugins/intel_gpu/src/graph/program.cpp | 4 +- .../intel_gpu/src/plugin/compiled_model.cpp | 9 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 4 - 5 files changed, 94 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp index bbc1dc168dce3d..5607b7fea120c2 100644 --- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp @@ -1047,7 +1047,46 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const { ob << unique_id; ob << input_layouts; ob << output_layout; - ob << primary_input_idx; + ob << input_offsets.size(); + for (size_t i = 0; i < input_offsets.size(); i++) { + ob << input_offsets[i].sizes(); + } + + if (weights_layout.has_value()) { + ob << true; + ob << weights_layout.value(); + } else { + ob << false; + } + + if (bias_layout.has_value()) { + ob << true; + ob << bias_layout.value(); + } else { + ob << false; + } + + if (weights_zero_points_layout.has_value()) { + ob << true; + ob << weights_zero_points_layout.value(); + } else { + ob << false; + } + + if (activations_zero_points_layout.has_value()) { + ob << true; + ob << activations_zero_points_layout.value(); + } else { + ob << false; + } + + if (compensation_layout.has_value()) { + ob << true; + ob << compensation_layout.value(); + } else { + ob << false; + } + ob << fused_desc.size(); #ifdef ENABLE_ONEDNN_FOR_GPU size_t num_fused_prims = fused_desc_onednn.size(); @@ -1056,6 +1095,7 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const { ob << make_data(&fused_prim, sizeof(fused_primitive_desc_onednn)); } #endif // ENABLE_ONEDNN_FOR_GPU + ob << primary_input_idx; } void kernel_impl_params::load(BinaryInputBuffer& ib) { @@ -1063,7 +1103,49 @@ void kernel_impl_params::load(BinaryInputBuffer& ib) { ib >> unique_id; ib >> input_layouts; ib >> output_layout; - ib >> primary_input_idx; + { + size_t num_input_offsets; + ib >> num_input_offsets; + input_offsets.resize(num_input_offsets); + for (size_t i = 0; i < num_input_offsets; i++) { + std::vector sizes; + ib >> sizes; + input_offsets[i] = cldnn::tensor(sizes); + } + } + bool has_value = false; + layout layout_buf; + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + weights_layout = layout_buf; + } + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + bias_layout = layout_buf; + } + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + weights_zero_points_layout = layout_buf; + } + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + activations_zero_points_layout = layout_buf; + } + + ib >> has_value; + if (has_value) { + ib >> layout_buf; + compensation_layout = layout_buf; + } + { // Fake fused_desc just for has_fused_primitives() size_t num_fused_desc; @@ -1080,4 +1162,5 @@ void kernel_impl_params::load(BinaryInputBuffer& ib) { ib >> make_data(&fused_desc_onednn[idx], sizeof(fused_primitive_desc_onednn)); } #endif // ENABLE_ONEDNN_FOR_GPU + ib >> primary_input_idx; } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 1a892f45b597f1..70d831aa166fb1 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -438,11 +438,8 @@ void primitive_inst::rebuild_deps( _deps.resize(_dep_ids.size()); for (size_t i = 0; i < _dep_ids.size(); i++) { - if (primitives.count(_dep_ids[i]) > 0) { - _deps[i] = primitives.at(_dep_ids[i]); - } else { - std::cout << _dep_ids[i] << " is not found in _primitives" << std::endl; - } + OPENVINO_ASSERT((primitives.count(_dep_ids[i]) > 0), _dep_ids[i], "is not found in _primitives"); + _deps[i] = primitives.at(_dep_ids[i]); } } @@ -459,9 +456,7 @@ void primitive_inst::rebuild_exec_deps( break; } } - if (found == false) { - std::cout << "not found in _exec_order" << std::endl; - } + OPENVINO_ASSERT(found, _exec_dep_ids[i], "not found in _exec_order"); } } @@ -1083,7 +1078,7 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { allocation_type _allocation_type; ib >> make_data(&_allocation_type, sizeof(_allocation_type)); - size_t data_size; // = _output->size(); + size_t data_size; ib >> cldnn::make_data(&data_size, sizeof(size_t)); _outputs[0] = get_network().get_memory_pool().get_memory(output_layout, _allocation_type, false); @@ -1096,7 +1091,6 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { delete[] _buf; } } else if (_object_type == object_type::EXECUTABLE_INST) { - // primitive_impl _impl_params.release(); _impl_params = make_unique(); _impl_params->load(ib); @@ -1123,7 +1117,6 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { ib >> _can_share_buffer; ib >> _is_constant; - // output memory layout output_layout = layout(cldnn::data_types::bin, cldnn::format::any, cldnn::tensor()); ib >> output_layout; @@ -1160,5 +1153,4 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) { _output_changed = false; } } - } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 394a14041fc70c..34acc7ee22f2e2 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -703,7 +703,9 @@ void program::cleanup() { } } } - // _kernels_cache->reset(); + + if (_engine.configuration().kernels_cache_path.empty()) + _kernels_cache->reset(); } void program::add_split_outputs() { diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index c4ec038d6acee7..422f8533a3a0ed 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -53,9 +53,7 @@ CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_p m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { auto casted_context = std::dynamic_pointer_cast(context); - if (nullptr == casted_context) { - IE_THROW() << "Invalid remote context"; - } + OPENVINO_ASSERT((casted_context != nullptr), "Invalid remote context"); m_context = casted_context; @@ -107,9 +105,7 @@ CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptrgetIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { auto casted_context = std::dynamic_pointer_cast(context); - if (nullptr == casted_context) { - IE_THROW() << "Invalid remote context"; - } + OPENVINO_ASSERT((casted_context != nullptr), "Invalid remote context"); m_context = casted_context; @@ -195,7 +191,6 @@ CompiledModel::CompiledModel(std::istream& networkModel, std::shared_ptrset_friendly_name(param_name); new_param->set_element_type(param_element_type); new_param->set_layout(param_layout); - // hoho->output(0).get_rt_info() = param_rt_info; new_param->output(0).get_tensor().set_names(param_names); new_param->validate_and_infer_types(); new_params.emplace_back(new_param); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index a8893b93eced31..05fd568f6494e3 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -163,7 +163,6 @@ Plugin::Plugin() : m_defaultContexts({}) { CustomLayer::LoadFromFile(config_path, config.second.customLayers, true); } - isModelCachingEnabled = false; if (const char* env_p = std::getenv("OV_GPU_MODEL_CACHING")) { if (env_p[0] == '1') { isModelCachingEnabled = true; @@ -279,9 +278,6 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateContext"); std::lock_guard lock(engine_mutex); if (!canReuseDefaultContext()) { - // if (m_defaultContexts.find(conf.device_id) != m_defaultContexts.end()) { - // statistics_map.erase(m_defaultContexts[conf.device_id]); - // } m_defaultContexts[conf.device_id] = std::make_shared(shared_from_this(), AnyMap(), conf); } else { m_defaultContexts[conf.device_id]->GetConfig().kernels_cache_dir = conf.kernels_cache_dir;