Skip to content

Commit

Permalink
[GPU] Skip Depth To Space fusing when dynamic shape and skip broadcas…
Browse files Browse the repository at this point in the history
…table check in select typed_primitive_inst() when new shape infer (openvinotoolkit#23270)

### Details:
 - Skip Depth To Space fusing when dynamic shape
- Skip broadcastable check in select typed_primitive_inst() when new
shape infer

### Tickets:
 - 130775
  • Loading branch information
wilson-seok authored and alvoron committed Apr 29, 2024
1 parent f5e0c43 commit 9088c9e
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
bool input_conv = node.get_dependency(0).is_type<convolution>();
bool out_eltw = node.get_users().front()->is_type<eltwise>();
if (input_conv && out_eltw) {
if (node.is_dynamic())
return false;
auto& eltw = static_cast<const eltwise&>(*node.get_users().front()->get_primitive());
auto& conv = node.get_dependency(0).as<convolution>();
auto eltw_mode = eltw.mode == eltwise_mode::sum;
Expand Down
86 changes: 45 additions & 41 deletions src/plugins/intel_gpu/src/graph/select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,49 +92,53 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p
3,
"");

if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) {
CLDNN_ERROR_LAYOUT_MISMATCH(node.id(),
"Positive input layout",
deps[1].first->get_output_layout(),
"Negative input layout",
deps[2].first->get_output_layout(),
"");

CLDNN_ERROR_NOT_EQUAL(node.id(),
"Mask size",
deps[0].first->get_output_layout().get_tensor(),
"Positive input format",
deps[1].first->get_output_layout().get_tensor(),
"");
} else if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NUMPY) {
CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(),
"Positive input data type",
deps[1].first->get_output_layout().data_type,
"Negative input data type",
deps[2].first->get_output_layout().data_type,
"");

auto dep1_size = deps[1].first->get_output_layout().get_tensor();
auto dep2_size = deps[2].first->get_output_layout().get_tensor();
cldnn::tensor output_tensor = tensor::max(dep1_size, dep2_size);
// Cond input0 also can be broadcasted.
auto dep0_size = deps[0].first->get_output_layout().get_tensor();
output_tensor = tensor::max(dep0_size, output_tensor);

auto max_dim_count = output_tensor.raw.size();

for (size_t i = 0; i < deps.size(); i++) {
for (size_t d = 0; d < max_dim_count; d++) {
auto current_dim = deps[i].first->get_output_layout().get_tensor().raw[d];

CLDNN_ERROR_BOOL(node.id(),
"Sizes equal or broadcast is possible",
!(current_dim == output_tensor.raw[d] || current_dim == 1),
"Invalid input shapes");
bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
// Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true
if (!allow_new_shape_infer) {
if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) {
CLDNN_ERROR_LAYOUT_MISMATCH(node.id(),
"Positive input layout",
deps[1].first->get_output_layout(),
"Negative input layout",
deps[2].first->get_output_layout(),
"");

CLDNN_ERROR_NOT_EQUAL(node.id(),
"Mask size",
deps[0].first->get_output_layout().get_tensor(),
"Positive input format",
deps[1].first->get_output_layout().get_tensor(),
"");
} else if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NUMPY) {
CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(),
"Positive input data type",
deps[1].first->get_output_layout().data_type,
"Negative input data type",
deps[2].first->get_output_layout().data_type,
"");

auto dep1_size = deps[1].first->get_output_layout().get_tensor();
auto dep2_size = deps[2].first->get_output_layout().get_tensor();
cldnn::tensor output_tensor = tensor::max(dep1_size, dep2_size);
// Cond input0 also can be broadcasted.
auto dep0_size = deps[0].first->get_output_layout().get_tensor();
output_tensor = tensor::max(dep0_size, output_tensor);

auto max_dim_count = output_tensor.raw.size();

for (size_t i = 0; i < deps.size(); i++) {
for (size_t d = 0; d < max_dim_count; d++) {
auto current_dim = deps[i].first->get_output_layout().get_tensor().raw[d];

CLDNN_ERROR_BOOL(node.id(),
"Sizes equal or broadcast is possible",
!(current_dim == output_tensor.raw[d] || current_dim == 1),
"Invalid input shapes");
}
}
} else {
CLDNN_ERROR_MESSAGE(node.id(), "Unsupported broadcast_type: " + std::to_string(static_cast<int>(node.get_primitive()->broadcast_spec.m_type)));
}
} else {
CLDNN_ERROR_MESSAGE(node.id(), "Unsupported broadcast_type: " + std::to_string(static_cast<int>(node.get_primitive()->broadcast_spec.m_type)));
}
}
} // namespace cldnn
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ const std::vector<std::vector<InputShape>> inShapesDynamicNumpy = {
{ { -1, -1, -1}, {{ 4, 5, 6}} },
{ { -1, -1}, {{ 5, 6}} }
},
{
{ { -1}, {{ 130048}} },
{ { -1, -1}, {{ 2, 130048}} },
{ { -1, -1}, {{ 2, 130048}} }
},
};

const auto numpyCases = ::testing::Combine(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "fully_connected_inst.h"
#include "gemm_inst.h"
#include "convolution_inst.h"
#include "depth_to_space_inst.h"
#include "pass_manager.h"
#include "to_string_utils.h"

Expand Down Expand Up @@ -648,4 +649,43 @@ TEST(prepare_primitive_fusing, can_profiling_data_when_fuse_illegal) {
auto output = net.execute();
for (auto& iter : output)
ASSERT_NE(iter.second.get_event(), nullptr);
}

TEST(prepare_primitive_fusing, dont_fuse_eltwise_to_dyn_dts) {
auto& engine = get_test_engine();
tests::random_generator rg(GET_SUITE_NAME);

auto in_layout = layout{ ov::PartialShape{-1, -1, -1, -1}, data_types::f32, format::bfyx };
auto weight_layout = layout{ ov::PartialShape{32, 32, 3, 3}, data_types::f32, format::bfyx};
auto weight_mem = engine.allocate_memory(weight_layout);
auto weight_data = rg.generate_random_4d<ov::float16>(32, 32, 3, 3, -1, 1);
set_values(weight_mem, weight_data);
auto scale_layout = layout{ ov::PartialShape{1, 2, 1, 1}, data_types::f32, format::bfyx };
auto scale_mem = engine.allocate_memory(scale_layout);
auto elt_layout = layout{ ov::PartialShape{1, 2, 32, 32}, data_types::f32, format::bfyx };
auto elt_mem = engine.allocate_memory(elt_layout);

topology topology;

topology.add(data("weights", weight_mem));
topology.add(input_layout("input", in_layout));
topology.add(convolution("conv", input_info("input"), "weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
topology.add(depth_to_space("depth_to_space", input_info("conv"), 4, depth_to_space_mode::blocks_first));
topology.add(data("scale1_data", scale_mem));
topology.add(eltwise("scale1", { input_info("depth_to_space"), input_info("scale1_data") }, eltwise_mode::prod, data_types::f32));
topology.add(activation("actv1", input_info("scale1"), activation_func::relu));
topology.add(data("eltw_data", elt_mem));
topology.add(eltwise("eltw", { input_info("actv1"), input_info("eltw_data") }, eltwise_mode::sum, data_types::f32));
topology.add(reorder("reorder_bfyx", input_info("eltw"), format::bfyx, data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
auto prog = program::build_program(engine, topology, config, false, true);

layout_optimizer lo(true);

program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);

ASSERT_NE(prog, nullptr);
ASSERT_TRUE(has_node(*prog, "scale1"));
}

0 comments on commit 9088c9e

Please sign in to comment.