From de94a33a6b6d352fbc4517184b44edc8126b0506 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 7 Oct 2024 19:01:01 +0400 Subject: [PATCH] [GPU] network code cleanup (#26908) ### Details: - Removed few unnecessary methods - Refactor `network::execute()` --- .../include/intel_gpu/graph/network.hpp | 32 ------- src/plugins/intel_gpu/src/graph/kv_cache.cpp | 3 +- src/plugins/intel_gpu/src/graph/network.cpp | 89 +++++-------------- src/plugins/intel_gpu/src/plugin/graph.cpp | 1 - .../tests/unit/passes/reorder_inputs_test.cpp | 8 +- .../test_cases/concatenation_gpu_test.cpp | 24 ++--- .../unit/test_cases/condition_gpu_test.cpp | 14 +-- .../unit/test_cases/convolution_gpu_test.cpp | 82 ++++++++--------- .../tests/unit/test_cases/crop_gpu_test.cpp | 5 +- .../tests/unit/test_cases/reduce_gpu_test.cpp | 12 +-- .../unit/test_cases/reorder_gpu_test.cpp | 1 - 11 files changed, 96 insertions(+), 175 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 63adae28ddabf3..f4e09a51513085 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -116,34 +116,10 @@ struct network { std::vector> const& get_outputs() { return _outputs; } - const std::vector>& get_outputs() const { - return reinterpret_cast>&>(_outputs); - } - - network_output get_output(const primitive_id& output_id) { - event::ptr evt; - if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling) - evt = get_primitive_event(output_id); - return network_output(evt, get_output_memory(output_id), get_stream_ptr(), get_output_layout(output_id)); - } - layout get_node_output_layout(const primitive_id& output_id) const; memory::ptr get_output_memory(const primitive_id& output_id); layout get_output_layout(const primitive_id& output_id) const; std::vector get_input_layouts() const; - /// @brief Returns the list of primitive ids before and after graph optimization. - /// @details If primitive was not optimized, the old and actual id will be the same. - /// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_". - std::map get_all_primitives() const { - auto primitive_ids = get_all_primitive_ids(); - auto primitive_org_ids = get_all_primitive_org_ids(); - std::map result; - for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) { - result.emplace(primitive_org_ids[i], primitive_ids[i]); - } - return result; - } - /// @brief Returns the list of @ref event for the primitives that were executed in network. std::map get_executed_primitives() const { auto primitive_ids = get_executed_primitive_ids(); @@ -201,7 +177,6 @@ struct network { void configure_primitives_second_output(); void build_insts_deps(); uint32_t get_id() const { return net_id; } - uint32_t get_local_id() const { return _local_net_id; } stream& get_stream() const { return *_stream; } stream::ptr get_stream_ptr() const { return _stream; } bool is_internal() const { return _internal; } @@ -219,7 +194,6 @@ struct network { const ov::intel_gpu::VariableStateInfo& get_variable_info(const std::string &variable_id) const; const ov::intel_gpu::VariablesMap& get_variables() const; const ov::intel_gpu::VariablesInfoMap& get_variables_info() const; - std::vector get_kv_cache_ids() const { return kv_cache_ids; } const ExecutionConfig& get_config() const { return _config; } @@ -243,8 +217,6 @@ struct network { bool _is_dynamic = false; bool _enable_profiling = false; bool _reset_arguments; - uint32_t _local_net_id = 0; // This is for thread-safe deserialization. 'net_id' is globally unique, - // but '_local_net_id' is unique only in each intel_gpu::Graph. std::unordered_map> _primitives; std::vector _in_out_shared_mem_types; @@ -255,10 +227,8 @@ struct network { ov::intel_gpu::VariablesMap _variables_states; ov::intel_gpu::VariablesInfoMap _variables_state_info; - std::vector kv_cache_ids; program::primitives_info _prims_info; - std::map _ext_id_mapping; size_t _weights_cache_capacity = 1; std::unordered_map _events; @@ -272,9 +242,7 @@ struct network { void allocate_primitive_instance(program_node const& node); void transfer_memory_to_device(std::shared_ptr instance, program_node const& node); void add_to_exec_order(const primitive_id& id); - std::shared_ptr find_in_internal_networks(const primitive_id& id) const; std::shared_ptr find_primitive(const primitive_id& id) const; - void check_names(); void add_default_output_chains(); void calculate_weights_cache_capacity(); output_chains_map::iterator add_output_chain(std::shared_ptr& p_inst); diff --git a/src/plugins/intel_gpu/src/graph/kv_cache.cpp b/src/plugins/intel_gpu/src/graph/kv_cache.cpp index 95cdd587cdf175..66a874b9b153ec 100644 --- a/src/plugins/intel_gpu/src/graph/kv_cache.cpp +++ b/src/plugins/intel_gpu/src/graph/kv_cache.cpp @@ -17,7 +17,8 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(kv_cache) kv_cache_inst::typed_primitive_inst(network& network, const kv_cache_node& node) : parent{network, node, false}, memory_state::variable{node.get_primitive()->variable_info.variable_id} { - kv_cache_id = network.get_kv_cache_ids().size(); + thread_local size_t kv_cache_counter = 0; + kv_cache_id = kv_cache_counter++; } layout kv_cache_inst::calc_output_layout(const kv_cache_node& node, kernel_impl_params const& impl_param) { diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 8f0e97dd51ee12..0af0e957df4ea8 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -203,8 +203,6 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo calculate_weights_cache_capacity(); allocate_primitives(); configure_primitives_second_output(); - if (!_program->is_loaded_from_cache()) - check_names(); build_insts_deps(); build_exec_order(); validate_primitives(); @@ -333,11 +331,7 @@ void network::reset_execution(bool wait) { event::ptr network::set_input_data(const primitive_id& id, memory::ptr data) { GPU_DEBUG_TRACE_DETAIL << "Set input " << id << " " << data->get_layout().to_short_string() << std::endl; - std::shared_ptr primitive_inst; - - primitive_inst = find_primitive(id); - - OPENVINO_ASSERT(primitive_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id); + auto primitive_inst = find_primitive(id); if (primitive_inst->type() != input_layout::type_id()) { CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input"); @@ -481,11 +475,8 @@ network::output_chains_map::iterator network::add_output_chain(std::shared_ptr

network::set_output_memory(const primitive_id& id, memory::ptr mem_new) { GPU_DEBUG_TRACE_DETAIL << "Set output " << id << " " << mem_new->get_layout().to_short_string() << std::endl; - std::shared_ptr p_inst; std::vector ret_ev; - p_inst = find_primitive(id); - - OPENVINO_ASSERT(p_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id); + std::shared_ptr p_inst = find_primitive(id); auto iter = std::find(_outputs.begin(), _outputs.end(), p_inst); if (iter == _outputs.end()) @@ -513,35 +504,10 @@ std::vector network::set_output_memory(const primitive_id& id, memor return ret_ev; } -void cldnn::network::check_names() { - for (auto const& prim : _primitives) { - if (find_in_internal_networks(prim.first) != nullptr) - CLDNN_ERROR_MESSAGE("Network", "Found primitive with id: " + prim.first + "in anotother network."); - } -} - std::shared_ptr cldnn::network::find_primitive(const primitive_id& id) const { - if (_primitives.find(id) != _primitives.end()) - return _primitives.at(id); - - return find_in_internal_networks(id); -} - -std::shared_ptr cldnn::network::find_in_internal_networks(const primitive_id& id) const { - std::shared_ptr ret; - - for (auto const& prim : _primitives) { - if (prim.second->type() == condition::type_id()) { // currently only condition inst contains mini networks - auto cond_inst = std::static_pointer_cast(prim.second); - ret = cond_inst->get_net_true()->find_primitive(id); - if (ret != nullptr) - return ret; - ret = cond_inst->get_net_false()->find_primitive(id); - if (ret != nullptr) - return ret; - } - } - return nullptr; + auto it = _primitives.find(id); + OPENVINO_ASSERT(it != _primitives.end(), "[GPU] Network doesn't contain primitive ", id); + return it->second; } std::string network::get_primitive_info(const primitive_id& id) const { @@ -552,9 +518,6 @@ std::string network::get_primitive_info(const primitive_id& id) const { bool network::does_node_need_lockable_output(const primitive_id& id) const { auto prim_inst = find_primitive(id); - OPENVINO_ASSERT(prim_inst, "[GPU] Can't get implementation type, since topology ", - "doesn't contain primitive with requested id: ", id); - const auto& node = prim_inst->get_node(); if (node.is_type()) { for (const auto& user : node.get_users()) { @@ -574,15 +537,6 @@ std::string network::get_implementation_info(const primitive_id& id) const { return _program->get_implementation_info(id); } -layout network::get_node_output_layout(const primitive_id& output_id) const { - auto res = std::find_if(_outputs.begin(), _outputs.end(), [&](const std::shared_ptr& v) { - return v->id() == output_id; - }); - OPENVINO_ASSERT(res != _outputs.end(), "[GPU] Couldn't get output layout for ", output_id, ". Output with such name is not found in the outputs list"); - - return (*res)->get_node_output_layout(); -} - memory::ptr network::get_output_memory(const primitive_id& output_id) { return get_primitive(output_id)->output_memory_ptr(); } @@ -729,17 +683,6 @@ void network::add_to_exec_order(const primitive_id& id) { } std::map network::execute(const std::vector& dependencies) { - execute_impl(dependencies); - - auto output_ids = get_output_ids(); - std::map result; - for (auto& id : output_ids) { - result.emplace(id, get_output(id)); - } - return result; -} - -void network::execute_impl(const std::vector& events) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "NetworkImpl::Execute"); NETWORK_DEBUG(*this); @@ -779,6 +722,21 @@ void network::execute_impl(const std::vector& events) { // in some cases. auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream()); + execute_impl(dependencies); + + std::map result; + for (auto& inst : _outputs) { + event::ptr ev = nullptr; + const auto& id = inst->id(); + if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling) + ev = _events.at(id); + + result.emplace(id, network_output(ev, inst->output_memory_ptr(0), get_stream_ptr(), inst->get_output_layout(0))); + } + return result; +} + +void network::execute_impl(const std::vector& events) { set_arguments(); // This extra flush command is needed for dynamic models in both cases of out_of_order / in_order operating mode @@ -904,10 +862,6 @@ const program::graph_optimizer_info& network::get_optimizer_passes_info() const } std::map network::get_ext_id_mapping() const { - if (_program == nullptr) { - return _ext_id_mapping; - } - std::map result; for (auto& prim : _primitives) { result.emplace(prim.first, prim.second->get_node().get_primitive()->origin_op_name); @@ -1008,9 +962,6 @@ void network::allocate_primitive_instance(program_node const& node) { if (node.is_type()) _data_outputs.push_back(inst); } - if (node.is_type()) { - kv_cache_ids.push_back(node.id()); - } if (auto state_prim = std::dynamic_pointer_cast(inst)) { auto prim = inst->get_node().get_primitive(); set_variables_state_info(state_prim->variable_id(), node.get_output_layout(0), state_prim->get_user_specified_type(), prim.get()); diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 2a3bd5dc0ff239..22f616e3d39818 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -558,7 +558,6 @@ void Graph::update_profiling_info() { }; std::map executedPrimitives = get_network()->get_executed_primitives(); - auto allPrimitives = get_network()->get_all_primitives(); // Get profiling info for all layers for (auto &profiledID : profilingIDs) { diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp index 7be7f74e6e96e5..cd5c2fdd1681fc 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_inputs_test.cpp @@ -183,7 +183,7 @@ TEST(reorder_inputs, impl_forcing_basic_format) { 7.f, 3.f, -2.f, -1.f }); network.set_input_data("input", input); - network.execute(); + auto outputs = network.execute(); const auto& prog = network.get_program(); auto& pool_node = prog->get_node("pool"); @@ -191,7 +191,7 @@ TEST(reorder_inputs, impl_forcing_basic_format) { ASSERT_EQ(pool_layout.format.value, format::yxfb); - auto out_mem = network.get_output("pool").get_memory(); + auto out_mem = outputs.at("pool").get_memory(); cldnn::mem_lock out_mem_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem_ptr.size(), 4u); @@ -239,7 +239,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { 7.f, 3.f, -2.f, -1.f }); network.set_input_data("input", input); - network.execute(); + auto outputs = network.execute(); auto prog = network.get_program(); auto& node = prog->get_node("actv"); @@ -250,7 +250,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { ASSERT_EQ(actv_layout.format.value, format::yxfb); ASSERT_EQ(kernel_name, actv_impl.kernel_name); - auto out_mem = network.get_output("actv").get_memory(); + auto out_mem = outputs.at("actv").get_memory(); cldnn::mem_lock out_mem_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem_ptr.size(), 8u); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp index 8e3da9692dcb45..f640b02afa99cb 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp @@ -1031,9 +1031,9 @@ struct concat_gpu_4d : public concat_gpu { network.set_input_data(input_ids[i].pid, in_memory[i]); } - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("concat").get_memory(); + auto out_mem = outputs.at("concat").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); for (size_t bi = 0; bi < batch_num; bi++) { @@ -1117,9 +1117,9 @@ struct concat_gpu_4d_axis3 : public concat_axis3_gpu { network.set_input_data(input_ids[i].pid, in_memory[i]); } - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("concat").get_memory(); + auto out_mem = outputs.at("concat").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); for (size_t bi = 0; bi < batch_num; bi++) { @@ -1283,9 +1283,9 @@ struct concat_id_conv_gpu_4d : public concat_gpu { network.set_input_data(input_ids[i].pid, in_memory[i]); } - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv").get_memory(); + auto out_mem = outputs.at("conv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, fmt); @@ -1420,13 +1420,13 @@ struct concat_gpu_4d_implicit : public concat_gpu { for (size_t i = 0; i < in_features.size(); i++) { concat_network->set_input_data(input_ids[i], in_memory[i]); } - concat_network->execute(); + auto outputs = concat_network->execute(); bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); bool concat_opt_result = std::static_pointer_cast(concat_network->get_primitive("concat"))->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); - return concat_network->get_output("reorder").get_memory(); + return outputs.at("reorder").get_memory(); } std::vector>>>> generate_input() { @@ -1640,13 +1640,13 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu { for (size_t i = 0; i < in_features.size(); i++) { concat_network.set_input_data(input_ids[i], in_memory[i]); } - concat_network.execute(); + auto outputs = concat_network.execute(); bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); EXPECT_EQ(concat_opt_enabled, concat_opt_result); - return concat_network.get_output("reorder").get_memory(); + return outputs.at("reorder").get_memory(); } std::vector>>>> generate_input() { @@ -1803,7 +1803,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { for (size_t i = 0; i < 4; i++) { concat_network.set_input_data(input_ids[i], in_memory[i]); } - concat_network.execute(); + auto outputs = concat_network.execute(); bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data); bool concat_opt_result = std::static_pointer_cast(concat_network.get_primitive("concat"))->node->can_be_optimized(); @@ -1813,7 +1813,7 @@ struct concat_gpu_4d_explicit : public concat_gpu { if (concat_opt_enabled && batch_num > 1) concat_opt_result = !concat_opt_result; EXPECT_EQ(concat_opt_enabled, concat_opt_result); - return concat_network.get_output("reorder").get_memory(); + return outputs.at("reorder").get_memory(); } std::vector>>>> generate_input() { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp index d5d7798ff4ce79..7fd439ecac5728 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/condition_gpu_test.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/graph/network.hpp" #include "intel_gpu/primitives/permute.hpp" #include "intel_gpu/runtime/internal_properties.hpp" #include "random_generator.hpp" @@ -577,6 +578,7 @@ class condition_gpu_tests: public ::testing::Test { ); branch_true.inner_program = program::build_program(engine, branch_true_topology, config, false, false, true); branch_true.input_map.insert({"input", "branch_input3"}); + branch_true.input_map.insert({"predicate2", "predicate2"}); branch_true.output_map.insert({0, "condi_nested"}); } @@ -598,11 +600,12 @@ class condition_gpu_tests: public ::testing::Test { ); topology.add( - input_layout("predicate", predicate->get_layout()) + input_layout("predicate", predicate->get_layout()), + input_layout("predicate2", predicate2->get_layout()) ); topology.add( - condition("condi", {input_info("predicate"), input_info("input")}, branch_true, branch_false) + condition("condi", {input_info("predicate"), input_info("predicate2"), input_info("input")}, branch_true, branch_false) ); std::vector input_data = { @@ -773,7 +776,7 @@ class condition_gpu_tests: public ::testing::Test { pooling(duplicated_id, input_info(cond_id), cldnn::pooling_mode::max, { 2, 1 }, { 2, 1 }) ); - EXPECT_ANY_THROW(network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);); + EXPECT_NO_THROW(network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);); } void test_empty_body(bool is_caching_test) { @@ -1038,6 +1041,7 @@ TEST(condition_gpu, set_empty_tensor) { net.set_input_data(empty_input_id, empty_input_mem); net.set_input_data(input_id, input_mem); - OV_ASSERT_NO_THROW(net.execute()); - OV_ASSERT_NO_THROW(net.get_output(cond_id).get_memory()); + std::map outputs; + OV_ASSERT_NO_THROW(outputs = net.execute()); + OV_ASSERT_NO_THROW(outputs.at(cond_id).get_memory()); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 421941296e58ab..4155ac0b420e66 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -5439,9 +5439,9 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); @@ -5549,9 +5549,9 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); @@ -5773,9 +5773,9 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("concat").get_memory(); + auto out_mem = outputs.at("concat").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bfyx); @@ -6020,9 +6020,9 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("out").get_memory(); + auto out_mem = outputs.at("out").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto output_layout = out_mem->get_layout(); @@ -6254,12 +6254,12 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfzyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); @@ -6394,12 +6394,12 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfzyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); @@ -6531,12 +6531,12 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfzyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); @@ -6695,12 +6695,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); @@ -6836,12 +6836,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); @@ -6975,12 +6975,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); + auto out_mem = outputs.at("conv_bsv16_fsv16").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); + auto out_mem_bfyx = outputs.at("reorder_bfyx").get_memory(); cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); @@ -7113,9 +7113,9 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); @@ -7257,9 +7257,9 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::b_fs_yx_fsv16); @@ -7395,9 +7395,9 @@ TEST_P(convolution_depthwise_gpu_fsv16_xy, depthwise_conv_b_fs_yx_fsv16) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("out").get_memory(); + auto out_mem = outputs.at("out").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::b_fs_yx_fsv16); @@ -7602,9 +7602,9 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv").get_memory(); + auto out_mem = outputs.at("conv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::bfyx); @@ -7924,9 +7924,9 @@ TEST_P(convolution_grouped_gpu, base) { cldnn::network network(engine, topology, config); network.set_input_data("input", input); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv").get_memory(); + auto out_mem = outputs.at("conv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); @@ -8092,9 +8092,9 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { network network(engine, topology, config); network.set_input_data("input", input_mem); - network.execute(); + auto outputs = network.execute(); - auto out_mem = network.get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); @@ -9669,7 +9669,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { std::cerr << p.original_id << " " << p.kernel_id << std::endl; auto out_ptr = get_output_values_to_float(network, outputs.find("conv_fsv")->second); - auto out_lay = network.get_node_output_layout("conv_fsv"); + auto out_lay = network.get_primitive("conv_fsv")->get_node_output_layout(); ASSERT_EQ(out_lay.batch(), expected_result.size()); ASSERT_EQ(out_lay.feature(), expected_result[0].size()); ASSERT_EQ(out_lay.spatial(1), expected_result[0][0].size()); @@ -10330,9 +10330,9 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_block_size_1( network->set_input_data("input", input_mem); - network->execute(); + auto outputs = network->execute(); - auto out_mem = network->get_output("conv_fsv").get_memory(); + auto out_mem = outputs.at("conv_fsv").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem->get_layout().format, format::b_fs_yx_fsv16); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp index 1b9e52d1e7ef2b..20d42e85d0c301 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/crop_gpu_test.cpp @@ -1569,9 +1569,8 @@ TEST(crop_gpu, optimized_out_crop) { for (size_t i = 0; i < out_vec.size(); i++) ASSERT_EQ(output_ptr[i], out_vec[i]); - auto all_primitives = network.get_all_primitives(); - ASSERT_TRUE(all_primitives["crop1"] == "_optimized_"); - ASSERT_TRUE(all_primitives["crop2"] == "_optimized_"); + ASSERT_TRUE(network.get_primitive("crop1")->can_be_optimized()); + ASSERT_TRUE(network.get_primitive("crop2")->can_be_optimized()); } TEST(crop_single_axis, simple_Baxis) { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp index 2dd46fe7598b5a..a0e1d307e373c0 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reduce_gpu_test.cpp @@ -538,9 +538,9 @@ class ReduceTestBase : public ::testing::TestWithParamset_input_data("input", input_mem); - network->execute(); + auto outputs = network->execute(); - auto out_mem = network->get_output("reduce").get_memory(); + auto out_mem = outputs.at("reduce").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); @@ -1972,9 +1972,9 @@ class ReduceXYWithBigTensorTestBase : public ::testing::TestWithParamset_input_data("input", input_mem); - network->execute(); + auto outputs = network->execute(); - auto out_mem = network->get_output("reduce").get_memory(); + auto out_mem = outputs.at("reduce").get_memory(); cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); @@ -2132,9 +2132,9 @@ class ReduceOnednnTestBase : public ::testing::TestWithParam out_ptr(out_mem, get_test_stream()); auto out_lay = out_mem->get_layout(); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp index 5d99607c5efac5..d43273e2a1508d 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/reorder_gpu_test.cpp @@ -1916,7 +1916,6 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) net.set_input_data("in", in); auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); - auto all_primitives = net.get_all_primitives(); if (engine.get_device_info().supports_immad) { // Currently, oneDNN only supports in_order_queue