diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 1f72b49bd8b5d1..4d353c270706ef 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -91,6 +91,9 @@ struct program_node { if (u->get_dependencies().size() <= dep_idx) { continue; } + if (u->is_fused_dep(dep_idx)) { + continue; + } if (u->get_dependency(dep_idx).get_unique_id() == unique_id) { return true; } @@ -99,6 +102,8 @@ struct program_node { return false; } + bool is_fused_dep(size_t dep_idx) const; + std::map get_const_memory_deps() const; virtual std::unique_ptr get_kernel_impl_params() const { diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index aae9e844a4eeb6..4acd2d02c808e6 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -190,6 +190,10 @@ void primitive_inst::update_shape() { } auto& dep = _node->get_dependency(i); auto dep_id = dep.id(); + // exclude fused node from memory_deps + if (_node->is_fused_dep(i)) { + break; + } // Events may be not created for in-order queue, so take them for OOO queue only if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) { dependencies_events.push_back(_network.get_primitive_event(dep_id)); diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 6055cd23407f01..0eb1b9a3a01dd1 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -378,6 +378,16 @@ bool program_node::has_padded_dependency() const { }); } +bool program_node::is_fused_dep(size_t dep_idx) const { + for (auto fused : get_fused_primitives()) { + if (dep_idx >= fused.dep_start_idx) { + return true; + } + } + + return false; +} + std::map program_node::get_const_memory_deps() const { std::map mem_deps; for (auto& i : get_shape_infer_dependencies()) { @@ -385,6 +395,12 @@ std::map program_node::get_const_memory_deps() const { if (i >= get_dependencies().size()) continue; + // exclude fused dependency + if (is_fused_dep(i)) { + continue; + } + + // constant type only auto& dep = get_dependency(i); if (dep.is_type()) { mem_deps.insert({i, dep.as().get_attached_memory_ptr()}); diff --git a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp index 4c1a6431488d60..cd1ead0bcd2a21 100644 --- a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -258,6 +259,78 @@ TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) { } } + +TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad_exclude_fused_mem_dep) { + // Filter : 2x2 + // Input : 2x2 + // Output : 3x3 + // + // Input: + // 8 0.5 + // 6 9 + // + // Filter + // -2 0.5 + // 3.5 1.5 + // + // no bias + // + // + // Output: + // -16.f, 3.f, 0.25f, + // 16.f, -1.25f, 5.25f, + // 21.f, 40.5f, 13.5f + + auto& engine = get_test_engine(); + + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); + auto elt_input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 9, 1, 1, 1 } }); + auto in_layout = layout(ov::PartialShape::dynamic(4), data_types::f32, format::yxfb); + + set_values(input, { 8.f, 0.5f, 6.f, 9.f }); + set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); + set_values(elt_input, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f }); + + topology topology( + input_layout("input", in_layout), + input_layout("elt_input", elt_input->get_layout()), + reorder("reordered_input", input_info("input"), this->input_layout_format, data_types::f32), + reorder("reordered_elt_input", input_info("elt_input"), format::bfyx, data_types::f32), + data("weights", weights), + deconvolution("deconv", input_info("reordered_input"), { "weights" }), + eltwise("elt_scale", { input_info("deconv"), input_info("reordered_elt_input") }, eltwise_mode::prod), + reorder("plane_output", input_info("elt_scale"), format::bfyx, data_types::f32) + ); + + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + network.set_input_data("input", input); + network.set_input_data("elt_input", elt_input); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "plane_output"); + + auto output_prim = outputs.begin()->second.get_memory(); + + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); + + std::vector expected_output_vec = { + -16.f, 3.f, 0.25f, + 16.f, -1.25f, 5.25f, + 21.f, 40.5f, 13.5f + }; + + for (unsigned int i = 0; i < expected_output_vec.size(); i++) + { + ASSERT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]); + } +} + TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Filter : 2x2 // Input : 2x2 // Output : 3x3