From db1e6938d0eefd52780a44c75e8ca420a07b70ff Mon Sep 17 00:00:00 2001 From: wilson-seok Date: Tue, 5 Mar 2024 15:58:39 +0000 Subject: [PATCH 1/3] skip dts fusing when dynamic shape and skip broadcastable check in typed_primitive_inst() when new shape infer --- .../prepare_primitive_fusing.cpp | 2 + src/plugins/intel_gpu/src/graph/select.cpp | 88 ++++++++++--------- .../single_layer_tests/dynamic/select.cpp | 9 +- .../passes/prepare_primitive_fusing_test.cpp | 40 +++++++++ 4 files changed, 96 insertions(+), 43 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 13960c14ec45ed..47d4d490e34144 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -536,6 +536,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { bool input_conv = node.get_dependency(0).is_type(); bool out_eltw = node.get_users().front()->is_type(); if (input_conv && out_eltw) { + if (node.is_dynamic()) + return false; auto& eltw = static_cast(*node.get_users().front()->get_primitive()); auto& conv = node.get_dependency(0).as(); auto eltw_mode = eltw.mode == eltwise_mode::sum; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 6f48082b66654b..ada1761f793d57 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -92,49 +92,55 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { - CLDNN_ERROR_LAYOUT_MISMATCH(node.id(), - "Positive input layout", - deps[1].first->get_output_layout(), - "Negative input layout", - deps[2].first->get_output_layout(), - ""); - - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Mask size", - deps[0].first->get_output_layout().get_tensor(), - "Positive input format", - deps[1].first->get_output_layout().get_tensor(), - ""); - } else if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NUMPY) { - CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(), - "Positive input data type", - deps[1].first->get_output_layout().data_type, - "Negative input data type", - deps[2].first->get_output_layout().data_type, - ""); - - auto dep1_size = deps[1].first->get_output_layout().get_tensor(); - auto dep2_size = deps[2].first->get_output_layout().get_tensor(); - cldnn::tensor output_tensor = tensor::max(dep1_size, dep2_size); - // Cond input0 also can be broadcasted. - auto dep0_size = deps[0].first->get_output_layout().get_tensor(); - output_tensor = tensor::max(dep0_size, output_tensor); - - auto max_dim_count = output_tensor.raw.size(); - - for (size_t i = 0; i < deps.size(); i++) { - for (size_t d = 0; d < max_dim_count; d++) { - auto current_dim = deps[i].first->get_output_layout().get_tensor().raw[d]; - - CLDNN_ERROR_BOOL(node.id(), - "Sizes equal or broadcast is possible", - !(current_dim == output_tensor.raw[d] || current_dim == 1), - "Invalid input shapes"); + bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true + if (!allow_new_shape_infer) { + if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { + CLDNN_ERROR_LAYOUT_MISMATCH(node.id(), + "Positive input layout", + deps[1].first->get_output_layout(), + "Negative input layout", + deps[2].first->get_output_layout(), + ""); + + CLDNN_ERROR_NOT_EQUAL(node.id(), + "Mask size", + deps[0].first->get_output_layout().get_tensor(), + "Positive input format", + deps[1].first->get_output_layout().get_tensor(), + ""); + } else if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NUMPY) { + CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(), + "Positive input data type", + deps[1].first->get_output_layout().data_type, + "Negative input data type", + deps[2].first->get_output_layout().data_type, + ""); + + auto dep1_size = deps[1].first->get_output_layout().get_tensor(); + auto dep2_size = deps[2].first->get_output_layout().get_tensor(); + cldnn::tensor output_tensor = tensor::max(dep1_size, dep2_size); + // Cond input0 also can be broadcasted. + auto dep0_size = deps[0].first->get_output_layout().get_tensor(); + output_tensor = tensor::max(dep0_size, output_tensor); + + auto max_dim_count = output_tensor.raw.size(); + + for (size_t i = 0; i < deps.size(); i++) { + for (size_t d = 0; d < max_dim_count; d++) { + auto current_dim = deps[i].first->get_output_layout().get_tensor().raw[d]; + + if (!(current_dim == output_tensor.raw[d] || current_dim == 1)) + std::cout << "error!" << std::endl; + CLDNN_ERROR_BOOL(node.id(), + "Sizes equal or broadcast is possible", + !(current_dim == output_tensor.raw[d] || current_dim == 1), + "Invalid input shapes"); + } } + } else { + CLDNN_ERROR_MESSAGE(node.id(), "Unsupported broadcast_type: " + std::to_string(static_cast(node.get_primitive()->broadcast_spec.m_type))); } - } else { - CLDNN_ERROR_MESSAGE(node.id(), "Unsupported broadcast_type: " + std::to_string(static_cast(node.get_primitive()->broadcast_spec.m_type))); } } } // namespace cldnn diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp index 73c24ff347f7bc..888106969ec7ba 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp @@ -84,8 +84,8 @@ TEST_P(SelectLayerGPUTest, Inference) { const std::vector model_types = { ov::element::f32, - ov::element::f16, - ov::element::i32, + // ov::element::f16, + // ov::element::i32, }; // AutoBroadcastType: NUMPY @@ -120,6 +120,11 @@ const std::vector> inShapesDynamicNumpy = { { { -1, -1, -1}, {{ 4, 5, 6}} }, { { -1, -1}, {{ 5, 6}} } }, + { + { { -1}, {{ 130048}} }, + { { -1, -1}, {{ 2, 130048}} }, + { { -1, -1}, {{ 2, 130048}} } + }, }; const auto numpyCases = ::testing::Combine( diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp index f1b34831510925..082daad4eaf9c2 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp @@ -16,6 +16,7 @@ #include "fully_connected_inst.h" #include "gemm_inst.h" #include "convolution_inst.h" +#include "depth_to_space_inst.h" #include "pass_manager.h" #include "to_string_utils.h" @@ -648,4 +649,43 @@ TEST(prepare_primitive_fusing, can_profiling_data_when_fuse_illegal) { auto output = net.execute(); for (auto& iter : output) ASSERT_NE(iter.second.get_event(), nullptr); +} + +TEST(prepare_primitive_fusing, dont_fuse_eltwise_to_dyn_dts) { + auto& engine = get_test_engine(); + tests::random_generator rg(GET_SUITE_NAME); + + auto in_layout = layout{ ov::PartialShape{-1, -1, -1, -1}, data_types::f32, format::bfyx }; + auto weight_layout = layout{ ov::PartialShape{32, 32, 3, 3}, data_types::f32, format::bfyx}; + auto weight_mem = engine.allocate_memory(weight_layout); + auto weight_data = rg.generate_random_4d(32, 32, 3, 3, -1, 1); + set_values(weight_mem, weight_data); + auto scale_layout = layout{ ov::PartialShape{1, 2, 1, 1}, data_types::f32, format::bfyx }; + auto scale_mem = engine.allocate_memory(scale_layout); + auto elt_layout = layout{ ov::PartialShape{1, 2, 32, 32}, data_types::f32, format::bfyx }; + auto elt_mem = engine.allocate_memory(elt_layout); + + topology topology; + + topology.add(data("weights", weight_mem)); + topology.add(input_layout("input", in_layout)); + topology.add(convolution("conv", input_info("input"), "weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(depth_to_space("depth_to_space", input_info("conv"), 4, depth_to_space_mode::blocks_first)); + topology.add(data("scale1_data", scale_mem)); + topology.add(eltwise("scale1", { input_info("depth_to_space"), input_info("scale1_data") }, eltwise_mode::prod, data_types::f32)); + topology.add(activation("actv1", input_info("scale1"), activation_func::relu)); + topology.add(data("eltw_data", elt_mem)); + topology.add(eltwise("eltw", { input_info("actv1"), input_info("eltw_data") }, eltwise_mode::sum, data_types::f32)); + topology.add(reorder("reorder_bfyx", input_info("eltw"), format::bfyx, data_types::f32)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); + + layout_optimizer lo(true); + + program_wrapper::apply_opt_pass(*prog, lo); + + ASSERT_NE(prog, nullptr); + ASSERT_TRUE(has_node(*prog, "scale1")); } \ No newline at end of file From 13d7b060a36e99ec5247276948a3cea32800fca3 Mon Sep 17 00:00:00 2001 From: wilson-seok Date: Tue, 5 Mar 2024 18:49:03 +0000 Subject: [PATCH 2/3] fix unintended comment --- .../tests/functional/single_layer_tests/dynamic/select.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp index 888106969ec7ba..2ebfd6e543fbf0 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp @@ -84,8 +84,8 @@ TEST_P(SelectLayerGPUTest, Inference) { const std::vector model_types = { ov::element::f32, - // ov::element::f16, - // ov::element::i32, + ov::element::f16, + ov::element::i32, }; // AutoBroadcastType: NUMPY From 5384cd9a699c5b3b060c6e3541c93e82f0a1105b Mon Sep 17 00:00:00 2001 From: wilson-seok Date: Thu, 7 Mar 2024 18:33:58 +0000 Subject: [PATCH 3/3] remove debugging code --- src/plugins/intel_gpu/src/graph/select.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index ada1761f793d57..9de8a118dcd7d0 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -130,8 +130,6 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p for (size_t d = 0; d < max_dim_count; d++) { auto current_dim = deps[i].first->get_output_layout().get_tensor().raw[d]; - if (!(current_dim == output_tensor.raw[d] || current_dim == 1)) - std::cout << "error!" << std::endl; CLDNN_ERROR_BOOL(node.id(), "Sizes equal or broadcast is possible", !(current_dim == output_tensor.raw[d] || current_dim == 1),