Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Need to exclude fused mem_dep from shape_infer_dep #16300

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/plugins/intel_gpu/src/graph/include/program_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ struct program_node {
if (u->get_dependencies().size() <= dep_idx) {
continue;
}
if (u->is_fused_dep(dep_idx)) {
continue;
}
if (u->get_dependency(dep_idx).get_unique_id() == unique_id) {
return true;
}
Expand All @@ -99,6 +102,8 @@ struct program_node {
return false;
}

bool is_fused_dep(size_t dep_idx) const;
yeonbok marked this conversation as resolved.
Show resolved Hide resolved

std::map<size_t, memory::ptr> get_const_memory_deps() const;

virtual std::unique_ptr<kernel_impl_params> get_kernel_impl_params() const {
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ void primitive_inst::update_shape() {
}
auto& dep = _node->get_dependency(i);
auto dep_id = dep.id();
// exclude fused node from memory_deps
if (_node->is_fused_dep(i)) {
break;
}
// Events may be not created for in-order queue, so take them for OOO queue only
if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) {
dependencies_events.push_back(_network.get_primitive_event(dep_id));
Expand Down
16 changes: 16 additions & 0 deletions src/plugins/intel_gpu/src/graph/program_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,13 +378,29 @@ bool program_node::has_padded_dependency() const {
});
}

bool program_node::is_fused_dep(size_t dep_idx) const {
for (auto fused : get_fused_primitives()) {
if (dep_idx >= fused.dep_start_idx) {
yeonbok marked this conversation as resolved.
Show resolved Hide resolved
return true;
}
}

yeonbok marked this conversation as resolved.
Show resolved Hide resolved
return false;
}

std::map<size_t, memory::ptr> program_node::get_const_memory_deps() const {
std::map<size_t, memory::ptr> mem_deps;
for (auto& i : get_shape_infer_dependencies()) {
// Some primitives may have flexible count of deps (e.g. reshape), thus allow skipping some deps
if (i >= get_dependencies().size())
continue;
yeonbok marked this conversation as resolved.
Show resolved Hide resolved

// exclude fused dependency
if (is_fused_dep(i)) {
continue;
}

// constant type only
auto& dep = get_dependency(i);
if (dep.is_type<data>()) {
mem_deps.insert({i, dep.as<data>().get_attached_memory_ptr()});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/deconvolution.hpp>
#include <intel_gpu/primitives/crop.hpp>
#include <intel_gpu/primitives/eltwise.hpp>
#include <intel_gpu/primitives/reorder.hpp>
#include <intel_gpu/primitives/data.hpp>

Expand Down Expand Up @@ -258,6 +259,78 @@ TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
}
}


TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad_exclude_fused_mem_dep) {
// Filter : 2x2
// Input : 2x2
// Output : 3x3
//
// Input:
// 8 0.5
// 6 9
//
// Filter
// -2 0.5
// 3.5 1.5
//
// no bias
//
//
// Output:
// -16.f, 3.f, 0.25f,
// 16.f, -1.25f, 5.25f,
// 21.f, 40.5f, 13.5f

auto& engine = get_test_engine();

auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } });
auto elt_input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 9, 1, 1, 1 } });
auto in_layout = layout(ov::PartialShape::dynamic(4), data_types::f32, format::yxfb);

set_values(input, { 8.f, 0.5f, 6.f, 9.f });
set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f });
set_values(elt_input, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f });

topology topology(
input_layout("input", in_layout),
input_layout("elt_input", elt_input->get_layout()),
reorder("reordered_input", input_info("input"), this->input_layout_format, data_types::f32),
reorder("reordered_elt_input", input_info("elt_input"), format::bfyx, data_types::f32),
data("weights", weights),
deconvolution("deconv", input_info("reordered_input"), { "weights" }),
eltwise("elt_scale", { input_info("deconv"), input_info("reordered_elt_input") }, eltwise_mode::prod),
reorder("plane_output", input_info("elt_scale"), format::bfyx, data_types::f32)
);

ExecutionConfig config;
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

network network(engine, topology, config);
network.set_input_data("input", input);
network.set_input_data("elt_input", elt_input);

auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "plane_output");

auto output_prim = outputs.begin()->second.get_memory();

cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());

std::vector<float> expected_output_vec = {
-16.f, 3.f, 0.25f,
16.f, -1.25f, 5.25f,
21.f, 40.5f, 13.5f
};

for (unsigned int i = 0; i < expected_output_vec.size(); i++)
{
ASSERT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
}
}

TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Filter : 2x2
// Input : 2x2
// Output : 3x3
Expand Down