From 9088c9ef013f1b133932634872e9cdeb79560285 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Thu, 7 Mar 2024 22:48:31 +0900 Subject: [PATCH] [GPU] Skip Depth To Space fusing when dynamic shape and skip broadcastable check in select typed_primitive_inst() when new shape infer (#23270) ### Details: - Skip Depth To Space fusing when dynamic shape - Skip broadcastable check in select typed_primitive_inst() when new shape infer ### Tickets: - 130775 --- .../prepare_primitive_fusing.cpp | 2 + src/plugins/intel_gpu/src/graph/select.cpp | 86 ++++++++++--------- .../single_layer_tests/dynamic/select.cpp | 5 ++ .../passes/prepare_primitive_fusing_test.cpp | 40 +++++++++ 4 files changed, 92 insertions(+), 41 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 13960c14ec45ed..47d4d490e34144 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -536,6 +536,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { bool input_conv = node.get_dependency(0).is_type(); bool out_eltw = node.get_users().front()->is_type(); if (input_conv && out_eltw) { + if (node.is_dynamic()) + return false; auto& eltw = static_cast(*node.get_users().front()->get_primitive()); auto& conv = node.get_dependency(0).as(); auto eltw_mode = eltw.mode == eltwise_mode::sum; diff --git a/src/plugins/intel_gpu/src/graph/select.cpp b/src/plugins/intel_gpu/src/graph/select.cpp index 6f48082b66654b..9de8a118dcd7d0 100644 --- a/src/plugins/intel_gpu/src/graph/select.cpp +++ b/src/plugins/intel_gpu/src/graph/select.cpp @@ -92,49 +92,53 @@ select_inst::typed_primitive_inst(network& network, select_node const& node) : p 3, ""); - if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { - CLDNN_ERROR_LAYOUT_MISMATCH(node.id(), - "Positive input layout", - deps[1].first->get_output_layout(), - "Negative input layout", - deps[2].first->get_output_layout(), - ""); - - CLDNN_ERROR_NOT_EQUAL(node.id(), - "Mask size", - deps[0].first->get_output_layout().get_tensor(), - "Positive input format", - deps[1].first->get_output_layout().get_tensor(), - ""); - } else if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NUMPY) { - CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(), - "Positive input data type", - deps[1].first->get_output_layout().data_type, - "Negative input data type", - deps[2].first->get_output_layout().data_type, - ""); - - auto dep1_size = deps[1].first->get_output_layout().get_tensor(); - auto dep2_size = deps[2].first->get_output_layout().get_tensor(); - cldnn::tensor output_tensor = tensor::max(dep1_size, dep2_size); - // Cond input0 also can be broadcasted. - auto dep0_size = deps[0].first->get_output_layout().get_tensor(); - output_tensor = tensor::max(dep0_size, output_tensor); - - auto max_dim_count = output_tensor.raw.size(); - - for (size_t i = 0; i < deps.size(); i++) { - for (size_t d = 0; d < max_dim_count; d++) { - auto current_dim = deps[i].first->get_output_layout().get_tensor().raw[d]; - - CLDNN_ERROR_BOOL(node.id(), - "Sizes equal or broadcast is possible", - !(current_dim == output_tensor.raw[d] || current_dim == 1), - "Invalid input shapes"); + bool allow_new_shape_infer = network.get_program()->get_config().get_property(ov::intel_gpu::allow_new_shape_infer); + // Broadcast check is performed in ngraph shape infer of select when allow_new_shape_infer=true + if (!allow_new_shape_infer) { + if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NONE) { + CLDNN_ERROR_LAYOUT_MISMATCH(node.id(), + "Positive input layout", + deps[1].first->get_output_layout(), + "Negative input layout", + deps[2].first->get_output_layout(), + ""); + + CLDNN_ERROR_NOT_EQUAL(node.id(), + "Mask size", + deps[0].first->get_output_layout().get_tensor(), + "Positive input format", + deps[1].first->get_output_layout().get_tensor(), + ""); + } else if (node.get_primitive()->broadcast_spec.m_type == ov::op::AutoBroadcastType::NUMPY) { + CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(), + "Positive input data type", + deps[1].first->get_output_layout().data_type, + "Negative input data type", + deps[2].first->get_output_layout().data_type, + ""); + + auto dep1_size = deps[1].first->get_output_layout().get_tensor(); + auto dep2_size = deps[2].first->get_output_layout().get_tensor(); + cldnn::tensor output_tensor = tensor::max(dep1_size, dep2_size); + // Cond input0 also can be broadcasted. + auto dep0_size = deps[0].first->get_output_layout().get_tensor(); + output_tensor = tensor::max(dep0_size, output_tensor); + + auto max_dim_count = output_tensor.raw.size(); + + for (size_t i = 0; i < deps.size(); i++) { + for (size_t d = 0; d < max_dim_count; d++) { + auto current_dim = deps[i].first->get_output_layout().get_tensor().raw[d]; + + CLDNN_ERROR_BOOL(node.id(), + "Sizes equal or broadcast is possible", + !(current_dim == output_tensor.raw[d] || current_dim == 1), + "Invalid input shapes"); + } } + } else { + CLDNN_ERROR_MESSAGE(node.id(), "Unsupported broadcast_type: " + std::to_string(static_cast(node.get_primitive()->broadcast_spec.m_type))); } - } else { - CLDNN_ERROR_MESSAGE(node.id(), "Unsupported broadcast_type: " + std::to_string(static_cast(node.get_primitive()->broadcast_spec.m_type))); } } } // namespace cldnn diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp index 73c24ff347f7bc..2ebfd6e543fbf0 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/select.cpp @@ -120,6 +120,11 @@ const std::vector> inShapesDynamicNumpy = { { { -1, -1, -1}, {{ 4, 5, 6}} }, { { -1, -1}, {{ 5, 6}} } }, + { + { { -1}, {{ 130048}} }, + { { -1, -1}, {{ 2, 130048}} }, + { { -1, -1}, {{ 2, 130048}} } + }, }; const auto numpyCases = ::testing::Combine( diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp index f1b34831510925..082daad4eaf9c2 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_primitive_fusing_test.cpp @@ -16,6 +16,7 @@ #include "fully_connected_inst.h" #include "gemm_inst.h" #include "convolution_inst.h" +#include "depth_to_space_inst.h" #include "pass_manager.h" #include "to_string_utils.h" @@ -648,4 +649,43 @@ TEST(prepare_primitive_fusing, can_profiling_data_when_fuse_illegal) { auto output = net.execute(); for (auto& iter : output) ASSERT_NE(iter.second.get_event(), nullptr); +} + +TEST(prepare_primitive_fusing, dont_fuse_eltwise_to_dyn_dts) { + auto& engine = get_test_engine(); + tests::random_generator rg(GET_SUITE_NAME); + + auto in_layout = layout{ ov::PartialShape{-1, -1, -1, -1}, data_types::f32, format::bfyx }; + auto weight_layout = layout{ ov::PartialShape{32, 32, 3, 3}, data_types::f32, format::bfyx}; + auto weight_mem = engine.allocate_memory(weight_layout); + auto weight_data = rg.generate_random_4d(32, 32, 3, 3, -1, 1); + set_values(weight_mem, weight_data); + auto scale_layout = layout{ ov::PartialShape{1, 2, 1, 1}, data_types::f32, format::bfyx }; + auto scale_mem = engine.allocate_memory(scale_layout); + auto elt_layout = layout{ ov::PartialShape{1, 2, 32, 32}, data_types::f32, format::bfyx }; + auto elt_mem = engine.allocate_memory(elt_layout); + + topology topology; + + topology.add(data("weights", weight_mem)); + topology.add(input_layout("input", in_layout)); + topology.add(convolution("conv", input_info("input"), "weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(depth_to_space("depth_to_space", input_info("conv"), 4, depth_to_space_mode::blocks_first)); + topology.add(data("scale1_data", scale_mem)); + topology.add(eltwise("scale1", { input_info("depth_to_space"), input_info("scale1_data") }, eltwise_mode::prod, data_types::f32)); + topology.add(activation("actv1", input_info("scale1"), activation_func::relu)); + topology.add(data("eltw_data", elt_mem)); + topology.add(eltwise("eltw", { input_info("actv1"), input_info("eltw_data") }, eltwise_mode::sum, data_types::f32)); + topology.add(reorder("reorder_bfyx", input_info("eltw"), format::bfyx, data_types::f32)); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + auto prog = program::build_program(engine, topology, config, false, true); + + layout_optimizer lo(true); + + program_wrapper::apply_opt_pass(*prog, lo); + + ASSERT_NE(prog, nullptr); + ASSERT_TRUE(has_node(*prog, "scale1")); } \ No newline at end of file