Skip to content

Commit

Permalink
[GPU] Some fixes to enable dynamic validation model (#18340)
Browse files Browse the repository at this point in the history
* skip fuse_quantize_f if input or output layout is dynamic

* Update condition of can_fuse_reorder_to_prev for concat in shapeof subgraph

* skip concat_in_place_optimization if concat is shape of subgraph

Signed-off-by: Andrew Park <[email protected]>

* Add reorder if eltwise is shapeof subgraph and data type between input and output is different

* Skip reorder optimization if reorder has dynamic shape on remove_redundant_reorders

* Add reproducible TCs for ov_gpu_unit_tests

---------

Signed-off-by: Andrew Park <[email protected]>
  • Loading branch information
andrew-k-park authored Jul 5, 2023
1 parent 68e1f07 commit 9069dab
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,23 @@ void add_required_reorders::run(program& p) {
}
}

if (usr->is_type<eltwise>() && usr->is_in_shape_of_subgraph()) {
for (size_t i = 0; i < usr->get_dependencies().size(); i++) {
auto& dep = usr->get_dependency(i);
if (!dep.is_in_data_flow() || dep.is_constant())
continue;
auto dep_layout = dep.get_output_layout();
auto out_layout = usr->get_output_layout();
bool required_reorder = out_layout.data_type != dep_layout.data_type;
if (required_reorder) {
auto new_reorder = std::make_shared<reorder>(dep.id() + "_reorder_" + usr->id(), dep.id(), out_layout.format, out_layout.data_type);
auto& new_reorder_node = p.get_or_create(new_reorder);
p.add_intermediate(new_reorder_node, *usr, dep);
new_reorder_node.recalc_output_layout(false);
}
}
}

if (optimize_data) {
auto fused_ops = usr->get_fused_primitives();
auto out_layout = usr->get_output_layout();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ bool concat_in_place_optimization::match(const program_node& concat_node,
kernel_impl_params concat_params,
std::vector<kernel_impl_params> pred_params,
bool is_runtime) {
if (concat_node.is_output() || concat_params.fused_desc.size() > 0)
if (concat_node.is_output() || concat_params.fused_desc.size() > 0 || concat_node.is_in_shape_of_subgraph())
return false;
auto pred_nodes = concat_node.get_dependencies();
for (auto p : pred_nodes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -826,11 +826,11 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
if (input_data.in_shape_of_subgraph || node->in_shape_of_subgraph)
return;

auto& input_lo = quantize_node.get_dependency(1);
auto& input_hi = quantize_node.get_dependency(2);

auto out_layout = quantize_node.get_output_layout();
auto in_layout = input_data.get_output_layout();
if (in_layout.is_dynamic() || out_layout.is_dynamic())
return;

auto out_dt = out_layout.data_type;
auto in_dt = input_data.get_input_layout(0).data_type;
auto out_dt_is_i8_u8 = data_type_traits::is_i8_u8(out_dt);
Expand All @@ -844,6 +844,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
quantize_node.get_per_tensor_output_shift() &&
quantize_node.get_per_tensor_output_range();

auto& input_lo = quantize_node.get_dependency(1);
auto& input_hi = quantize_node.get_dependency(2);
bool should_fuse = input_data.is_type<binary_convolution>() &&
((out_dt == data_types::bin &&
quantize_node.get_dependencies().size() == 5 &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,8 @@ void remove_redundant_reorders::run(program& p) {
itr = p.get_processing_order().begin();
while (itr != p.get_processing_order().end()) {
auto& node_ptr = *itr++;
if (!node_ptr->is_type<reorder>() || !node_ptr->is_in_data_flow() || node_ptr->get_users().size() != 1 || node_ptr->get_dependencies().size() != 1)
if (!node_ptr->is_type<reorder>() || !node_ptr->is_in_data_flow() || node_ptr->get_users().size() != 1 ||
node_ptr->get_dependencies().size() != 1 || node_ptr->is_dynamic())
continue;

auto& node = node_ptr->as<reorder>();
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,8 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, reorder_node
// Because mvn and concatenation kernel can work cross-layout, if reorder only performs type conversion,
// fusing reorder to the previous node can be done even if it is a dynamic shape case
if ((prev.is_type<mvn>() || prev.is_type<concatenation>()) &&
(format::is_simple_data_format(fmt_prev) && format::is_simple_data_format(fmt_next)) &&
node.is_type_conversion_only())
!prev.is_in_shape_of_subgraph() && node.is_type_conversion_only() &&
(format::is_simple_data_format(fmt_prev) && format::is_simple_data_format(fmt_next)))
return true;

if (prev.is_dynamic() || (!node.get_users().empty() && node.get_users().front()->is_dynamic()))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "test_utils.h"

#include "intel_gpu/runtime/engine.hpp"

#include "intel_gpu/graph/network.hpp"
#include "intel_gpu/graph/program.hpp"
#include "data_inst.h"
#include "shape_of_inst.h"
#include "gather_inst.h"
#include "eltwise_inst.h"
#include "reshape_inst.h"
#include "concatenation_inst.h"
#include "pass_manager.h"
#include "to_string_utils.h"

#include "program_wrapper.h"

#include <memory>

using namespace cldnn;
using namespace ::tests;

TEST(add_required_reorders, input_reorder_inside_shape_of_subgraph) {
auto& engine = get_test_engine();
auto input_layout_dynamic = layout{ov::PartialShape{1, 32, ov::Dimension::dynamic(), ov::Dimension::dynamic()},
data_types::f16, format::bfyx};
auto input = engine.allocate_memory({ov::PartialShape{1, 32, 32, 32}, data_types::f16, format::bfyx});
auto data_0 = engine.allocate_memory({ ov::PartialShape{}, data_types::i32, format::bfyx });
auto data_1 = engine.allocate_memory({ ov::PartialShape{}, data_types::f32, format::bfyx });

topology topology;
topology.add(input_layout("input", input_layout_dynamic));
topology.add(data("data_0", data_0));
topology.add(data("data_1", data_1));
topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(eltwise("eltwise0", {input_info("gather0"), input_info("data_1")}, eltwise_mode::prod, data_types::f32));
topology.add(reshape("reshape0", input_info("eltwise0"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(eltwise("eltwise1", {input_info("gather1"), input_info("data_1")}, eltwise_mode::prod, data_types::f32));
topology.add(reshape("reshape1", input_info("eltwise1"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(concatenation("concat0", {input_info("reshape0"), input_info("reshape1")}, 0, data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);

network.execute();

auto prog = network.get_program();
ASSERT_NE(prog, nullptr);

auto& eltwise_node = prog->get_node("eltwise0");
auto eltwise_in_layout = eltwise_node.get_input_layout();

ASSERT_EQ(eltwise_in_layout.data_type, data_types::f32);
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "fully_connected_inst.h"
#include "permute_inst.h"
#include "reorder_inst.h"
#include "shape_of_inst.h"
#include "gather_inst.h"
#include "intel_gpu/graph/network.hpp"
#include "pass_manager.h"
#include "to_string_utils.h"
Expand Down Expand Up @@ -520,6 +522,61 @@ TEST(prepare_buffer_fusing, crop_b_axis) {
}
}

TEST(prepare_buffer_fusing, skip_in_place_concat_inside_shape_of_subgraph) {
auto& engine = get_test_engine();
auto input_layout_dynamic = layout{ov::PartialShape{1, 32, ov::Dimension::dynamic(), ov::Dimension::dynamic()},
data_types::f16, format::bfyx};
auto input = engine.allocate_memory({ov::PartialShape{1, 32, 32, 32}, data_types::f16, format::bfyx});
auto data_0 = engine.allocate_memory({ ov::PartialShape{}, data_types::i32, format::bfyx });
auto data_1 = engine.allocate_memory({ ov::PartialShape{}, data_types::f32, format::bfyx });
auto data_2 = engine.allocate_memory({ ov::PartialShape{4}, data_types::i32, format::bfyx });

const ov::op::AutoBroadcastSpec& broadcast_spec = ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY);

topology topology;
topology.add(input_layout("input", input_layout_dynamic));
topology.add(data("data_0", data_0));
topology.add(data("data_1", data_1));
topology.add(data("data_2", data_2));
topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(reorder("reorder0", input_info("gather0"), format::any, data_types::f32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(eltwise("eltwise0", input_info("reorder0"), input_info("data_1"), eltwise_mode::prod, broadcast_spec));
topology.add(reshape("reshape0", input_info("eltwise0"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(reorder("reorder1", input_info("gather1"), format::any, data_types::f32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(eltwise("eltwise1", input_info("reorder1"), input_info("data_1"), eltwise_mode::prod, broadcast_spec));
topology.add(reshape("reshape1", input_info("eltwise1"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(crop("crop", input_info("shape_of"), tensor({2,1,1,1,1,1,1,1,1}), tensor({0,0,0,0,1,1,1,1,1})));
topology.add(concatenation("concat0", {input_info("reshape0"), input_info("reshape1")}, 0, data_types::f32));
topology.add(reorder("reorder3", input_info("concat0"), format::any, data_types::i32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(concatenation("concat1", {input_info("reorder3"), input_info("crop")}, 0, data_types::i32));
topology.add(eltwise("eltwise2", input_info("concat1"), input_info("data_2"), eltwise_mode::prod, broadcast_spec));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);

network.execute();

auto prog = network.get_program();
ASSERT_NE(prog, nullptr);

auto& crop_node = prog->get_node("crop");
auto impl_param = crop_node.get_kernel_impl_params();
auto crop_mem = network.get_output_memory("crop");
ASSERT_EQ(impl_param->get_output_layout(), crop_mem->get_layout());
auto in_place = engine.is_the_same_buffer(*network.get_output_memory("crop"), *network.get_output_memory("concat1"));
ASSERT_FALSE(in_place);
}

#ifdef ENABLE_ONEDNN_FOR_GPU
TEST(prepare_buffer_fusing, in_place_onednn_concat_static) {
auto& engine = get_test_engine();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "activation_inst.h"
#include "mvn_inst.h"
#include "concatenation_inst.h"
#include "shape_of_inst.h"
#include "gather_inst.h"
#include "pass_manager.h"
#include "to_string_utils.h"

Expand Down Expand Up @@ -323,3 +325,55 @@ TEST(remove_redundant_reorders, fuse_reorder_to_prev_concat_dyn) {

ASSERT_EQ(concat_layout.data_type, data_types::f32);
}

TEST(remove_redundant_reorders, not_to_fuse_concat_with_reorder_inside_shape_of_subgraph) {
auto& engine = get_test_engine();
auto input_layout_dynamic = layout{ov::PartialShape{1, 32, ov::Dimension::dynamic(), ov::Dimension::dynamic()},
data_types::f16, format::bfyx};
auto input = engine.allocate_memory({ov::PartialShape{1, 32, 32, 32}, data_types::f16, format::bfyx});
auto data_0 = engine.allocate_memory({ ov::PartialShape{}, data_types::i32, format::bfyx });
auto data_1 = engine.allocate_memory({ ov::PartialShape{}, data_types::f32, format::bfyx });
auto data_2 = engine.allocate_memory({ ov::PartialShape{2}, data_types::i32, format::bfyx });

const ov::op::AutoBroadcastSpec& broadcast_spec = ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY);

topology topology;
topology.add(input_layout("input", input_layout_dynamic));
topology.add(data("data_0", data_0));
topology.add(data("data_1", data_1));
topology.add(data("data_2", data_2));
topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
topology.add(gather("gather0", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(reorder("reorder0", input_info("gather0"), format::any, data_types::f32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(eltwise("eltwise0", input_info("reorder0"), input_info("data_1"), eltwise_mode::prod, broadcast_spec));
topology.add(reshape("reshape0", input_info("eltwise0"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(gather("gather1", input_info("shape_of"), input_info("data_0"), 0, {}, 0, true));
topology.add(reorder("reorder1", input_info("gather1"), format::any, data_types::f32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(eltwise("eltwise1", input_info("reorder1"), input_info("data_1"), eltwise_mode::prod, broadcast_spec));
topology.add(reshape("reshape1", input_info("eltwise1"), false, {},
ov::PartialShape{1}, reshape::reshape_mode::unsqueeze));
topology.add(concatenation("concat0", {input_info("reshape0"), input_info("reshape1")}, 0, data_types::f32));
topology.add(reorder("reorder3", input_info("concat0"), format::any, data_types::i32,
std::vector<float>(), reorder_mean_mode::subtract, padding(), true));
topology.add(concatenation("concat1", {input_info("reorder3"), input_info("data_2")}, 0, data_types::i32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);

network.execute();

auto prog = network.get_program();
ASSERT_NE(prog, nullptr);

ASSERT_TRUE(has_node(*prog, "reorder3"));
auto& concat_node = prog->get_node("concat0");
auto concat_layout = concat_node.get_output_layout();

ASSERT_EQ(concat_layout.data_type, data_types::f32);
}

0 comments on commit 9069dab

Please sign in to comment.