Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Some fixes to enable dynamic validation model #18340

Merged
merged 6 commits into from
Jul 5, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,23 @@ void add_required_reorders::run(program& p) {
}
}

if (usr->is_type<eltwise>() && usr->is_in_shape_of_subgraph()) {
for (size_t i = 0; i < usr->get_dependencies().size(); i++) {
auto& dep = usr->get_dependency(i);
if (!dep.is_in_data_flow() || dep.is_constant())
continue;
auto dep_layout = dep.get_output_layout();
auto out_layout = usr->get_output_layout();
bool required_reorder = out_layout.data_type != dep_layout.data_type;
if (required_reorder) {
auto new_reorder = std::make_shared<reorder>(dep.id() + "_reorder_" + usr->id(), dep.id(), out_layout.format, out_layout.data_type);
auto& new_reorder_node = p.get_or_create(new_reorder);
p.add_intermediate(new_reorder_node, *usr, dep);
new_reorder_node.recalc_output_layout(false);
}
}
}

if (optimize_data) {
auto fused_ops = usr->get_fused_primitives();
auto out_layout = usr->get_output_layout();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ bool concat_in_place_optimization::match(const program_node& concat_node,
kernel_impl_params concat_params,
std::vector<kernel_impl_params> pred_params,
bool is_runtime) {
if (concat_node.is_output() || concat_params.fused_desc.size() > 0)
if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph())
return false;
auto pred_nodes = concat_node.get_dependencies();
for (auto p : pred_nodes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -826,11 +826,11 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
if (input_data.in_shape_of_subgraph || node->in_shape_of_subgraph)
return;

auto& input_lo = quantize_node.get_dependency(1);
auto& input_hi = quantize_node.get_dependency(2);

auto out_layout = quantize_node.get_output_layout();
auto in_layout = input_data.get_output_layout();
if (in_layout.is_dynamic() || out_layout.is_dynamic())
return;

auto out_dt = out_layout.data_type;
auto in_dt = input_data.get_input_layout(0).data_type;
auto out_dt_is_i8_u8 = data_type_traits::is_i8_u8(out_dt);
Expand All @@ -844,6 +844,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
quantize_node.get_per_tensor_output_shift() &&
quantize_node.get_per_tensor_output_range();

auto& input_lo = quantize_node.get_dependency(1);
auto& input_hi = quantize_node.get_dependency(2);
bool should_fuse = input_data.is_type<binary_convolution>() &&
((out_dt == data_types::bin &&
quantize_node.get_dependencies().size() == 5 &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,8 @@ void remove_redundant_reorders::run(program& p) {
itr = p.get_processing_order().begin();
while (itr != p.get_processing_order().end()) {
auto& node_ptr = *itr++;
if (!node_ptr->is_type<reorder>() || !node_ptr->is_in_data_flow() || node_ptr->get_users().size() != 1 || node_ptr->get_dependencies().size() != 1)
if (!node_ptr->is_type<reorder>() || !node_ptr->is_in_data_flow() || node_ptr->get_users().size() != 1 ||
node_ptr->get_dependencies().size() != 1 || node_ptr->is_dynamic())
continue;

auto& node = node_ptr->as<reorder>();
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,8 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, reorder_node
// Because mvn and concatenation kernel can work cross-layout, if reorder only performs type conversion,
// fusing reorder to the previous node can be done even if it is a dynamic shape case
if ((prev.is_type<mvn>() || prev.is_type<concatenation>()) &&
(format::is_simple_data_format(fmt_prev) && format::is_simple_data_format(fmt_next)) &&
node.is_type_conversion_only())
!prev.is_in_shape_of_subgraph() && node.is_type_conversion_only() &&
(format::is_simple_data_format(fmt_prev) && format::is_simple_data_format(fmt_next)))
return true;

if (prev.is_dynamic() || (!node.get_users().empty() && node.get_users().front()->is_dynamic()))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "test_utils.h"

#include "intel_gpu/runtime/engine.hpp"

#include "intel_gpu/graph/network.hpp"
#include "intel_gpu/graph/program.hpp"
#include "data_inst.h"
#include "shape_of_inst.h"
#include "gather_inst.h"
#include "eltwise_inst.h"
#include "reshape_inst.h"
#include "concatenation_inst.h"
#include "pass_manager.h"
#include "to_string_utils.h"

#include "program_wrapper.h"

#include <memory>

using namespace cldnn;
using namespace ::tests;

TEST(add_required_reorders, input_reorder_inside_shape_of_subgraph) {
auto& engine = get_test_engine();
auto input_layout_dynamic = layout{ov::PartialShape{1, 32, ov::Dimension::dynamic(), ov::Dimension::dynamic()},
data_types::f16, format::bfyx};
auto input = engine.allocate_memory({ov::PartialShape{1, 32, 32, 32}, data_types::f16, format::bfyx});
auto data_0 = engine.allocate_memory({ ov::PartialShape{}, data_types::i32, format::bfyx });
auto data_1 = engine.allocate_memory({ ov::PartialShape{}, data_types::f32, format::bfyx });

topology topology;
topology.add(input_layout("input", input_layout_dynamic));
topology.add(data("data_0", data_0));
topology.add(data("data_1", data_1));
topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(eltwise("eltwise0", {input_info("gather0"), input_info("data_1")}, eltwise_mode::prod, data_types::f32));
topology.add(reshape("reshape0", input_info("eltwise0"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(eltwise("eltwise1", {input_info("gather1"), input_info("data_1")}, eltwise_mode::prod, data_types::f32));
topology.add(reshape("reshape1", input_info("eltwise1"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(concatenation("concat0", {input_info("reshape0"), input_info("reshape1")}, 0, data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);

network.execute();

auto prog = network.get_program();
ASSERT_NE(prog, nullptr);

auto& eltwise_node = prog->get_node("eltwise0");
auto eltwise_in_layout = eltwise_node.get_input_layout();

ASSERT_EQ(eltwise_in_layout.data_type, data_types::f32);
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "fully_connected_inst.h"
#include "permute_inst.h"
#include "reorder_inst.h"
#include "shape_of_inst.h"
#include "gather_inst.h"
#include "intel_gpu/graph/network.hpp"
#include "pass_manager.h"
#include "to_string_utils.h"
Expand Down Expand Up @@ -520,6 +522,61 @@ TEST(prepare_buffer_fusing, crop_b_axis) {
}
}

TEST(prepare_buffer_fusing, skip_in_place_concat_inside_shape_of_subgraph) {
auto& engine = get_test_engine();
auto input_layout_dynamic = layout{ov::PartialShape{1, 32, ov::Dimension::dynamic(), ov::Dimension::dynamic()},
data_types::f16, format::bfyx};
auto input = engine.allocate_memory({ov::PartialShape{1, 32, 32, 32}, data_types::f16, format::bfyx});
auto data_0 = engine.allocate_memory({ ov::PartialShape{}, data_types::i32, format::bfyx });
auto data_1 = engine.allocate_memory({ ov::PartialShape{}, data_types::f32, format::bfyx });
auto data_2 = engine.allocate_memory({ ov::PartialShape{4}, data_types::i32, format::bfyx });

const ov::op::AutoBroadcastSpec& broadcast_spec = ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY);

topology topology;
topology.add(input_layout("input", input_layout_dynamic));
topology.add(data("data_0", data_0));
topology.add(data("data_1", data_1));
topology.add(data("data_2", data_2));
topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(reorder("reorder0", input_info("gather0"), format::any, data_types::f32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(eltwise("eltwise0", input_info("reorder0"), input_info("data_1"), eltwise_mode::prod, broadcast_spec));
topology.add(reshape("reshape0", input_info("eltwise0"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(reorder("reorder1", input_info("gather1"), format::any, data_types::f32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(eltwise("eltwise1", input_info("reorder1"), input_info("data_1"), eltwise_mode::prod, broadcast_spec));
topology.add(reshape("reshape1", input_info("eltwise1"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(crop("crop", input_info("shape_of"), tensor({2,1,1,1,1,1,1,1,1}), tensor({0,0,0,0,1,1,1,1,1})));
topology.add(concatenation("concat0", {input_info("reshape0"), input_info("reshape1")}, 0, data_types::f32));
topology.add(reorder("reorder3", input_info("concat0"), format::any, data_types::i32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(concatenation("concat1", {input_info("reorder3"), input_info("crop")}, 0, data_types::i32));
topology.add(eltwise("eltwise2", input_info("concat1"), input_info("data_2"), eltwise_mode::prod, broadcast_spec));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);

network.execute();

auto prog = network.get_program();
ASSERT_NE(prog, nullptr);

auto& crop_node = prog->get_node("crop");
auto impl_param = crop_node.get_kernel_impl_params();
auto crop_mem = network.get_output_memory("crop");
ASSERT_EQ(impl_param->get_output_layout(), crop_mem->get_layout());
auto in_place = engine.is_the_same_buffer(*network.get_output_memory("crop"), *network.get_output_memory("concat1"));
ASSERT_FALSE(in_place);
}

#ifdef ENABLE_ONEDNN_FOR_GPU
TEST(prepare_buffer_fusing, in_place_onednn_concat_static) {
auto& engine = get_test_engine();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "activation_inst.h"
#include "mvn_inst.h"
#include "concatenation_inst.h"
#include "shape_of_inst.h"
#include "gather_inst.h"
#include "pass_manager.h"
#include "to_string_utils.h"

Expand Down Expand Up @@ -323,3 +325,55 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_concat_dyn) {

ASSERT_EQ(concat_layout.data_type, data_types::f32);
}

TEST(remove_redundant_reorders, not_to_fuse_concat_with_reorder_inside_shape_of_subgraph) {
auto& engine = get_test_engine();
auto input_layout_dynamic = layout{ov::PartialShape{1, 32, ov::Dimension::dynamic(), ov::Dimension::dynamic()},
data_types::f16, format::bfyx};
auto input = engine.allocate_memory({ov::PartialShape{1, 32, 32, 32}, data_types::f16, format::bfyx});
auto data_0 = engine.allocate_memory({ ov::PartialShape{}, data_types::i32, format::bfyx });
auto data_1 = engine.allocate_memory({ ov::PartialShape{}, data_types::f32, format::bfyx });
auto data_2 = engine.allocate_memory({ ov::PartialShape{2}, data_types::i32, format::bfyx });

const ov::op::AutoBroadcastSpec& broadcast_spec = ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY);

topology topology;
topology.add(input_layout("input", input_layout_dynamic));
topology.add(data("data_0", data_0));
topology.add(data("data_1", data_1));
topology.add(data("data_2", data_2));
topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(reorder("reorder0", input_info("gather0"), format::any, data_types::f32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(eltwise("eltwise0", input_info("reorder0"), input_info("data_1"), eltwise_mode::prod, broadcast_spec));
topology.add(reshape("reshape0", input_info("eltwise0"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(reorder("reorder1", input_info("gather1"), format::any, data_types::f32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(eltwise("eltwise1", input_info("reorder1"), input_info("data_1"), eltwise_mode::prod, broadcast_spec));
topology.add(reshape("reshape1", input_info("eltwise1"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(concatenation("concat0", {input_info("reshape0"), input_info("reshape1")}, 0, data_types::f32));
topology.add(reorder("reorder3", input_info("concat0"), format::any, data_types::i32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(concatenation("concat1", {input_info("reorder3"), input_info("data_2")}, 0, data_types::i32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);

network.execute();

auto prog = network.get_program();
ASSERT_NE(prog, nullptr);

ASSERT_TRUE(has_node(*prog, "reorder3"));
auto& concat_node = prog->get_node("concat0");
auto concat_layout = concat_node.get_output_layout();

ASSERT_EQ(concat_layout.data_type, data_types::f32);
}