diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 559e63eded946a..e0fc57d05f06e6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -512,6 +512,12 @@ bool crop_in_place_optimization::match(const program_node& node, if (node.get_program().is_body_program() && node.get_dependency(0).is_type()) { return false; } + + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->disable_runtime_buffer_fusing && node.is_dynamic()) { + return false; + } + // optimization is available for cropping across depth(features) or batch // if output padding has defined padding across features already it wouldn't // work because it expect to have zeros in the padded area. @@ -553,18 +559,22 @@ bool crop_in_place_optimization::optimize(crop_node& node) { node.get_primitive()->axis, false); } else if (can_crop_be_optimized_simple_data_format(crop_layout, input_layout)) { - std::vector reshape_layouts; - if (node.get_users().front()->is_type() && node.get_users().front()->as().is_runtime_propagatable_padding()) { - reshape_layouts.push_back(node.get_users().front()->get_output_layout()); + std::pair user_info; + if (node.get_users().front()->is_type()) { + auto& reshape_node = node.get_users().front()->as(); + if (reshape_node.is_runtime_propagatable_padding()) { + user_info.first = &reshape_node; + user_info.second = reshape_node.get_output_layout(); + } } update_in_place_crop_padding_simple_data_format(crop_layout, input_layout, - reshape_layouts, + user_info, crop_params->input_offsets[0], node.get_primitive()->axis, false); - if (reshape_layouts.size() > 0) { - node.get_users().front()->set_output_layout(reshape_layouts[0]); + if (user_info.first) { + node.get_users().front()->set_output_layout(user_info.second); } } node.set_output_layout(crop_layout); @@ -632,24 +642,51 @@ void crop_in_place_optimization::update_in_place_crop_padding_along_feature(cons void crop_in_place_optimization::update_in_place_crop_padding_simple_data_format(layout& crop_layout, layout& input_layout, - std::vector& user_layouts, + std::pair& user_info, const tensor offsets, size_t crop_axis, bool is_runtime) { - auto crop_axis_legacy = crop_axis; - if (crop_axis_legacy >= 2) { - auto spatial_axis = crop_axis_legacy - 2; - // Default and minimum number of dimensions is 4 - auto spatial_size = std::max(crop_layout.get_partial_shape().size(), 4) - 2; - crop_axis_legacy = spatial_size - spatial_axis - 1 + 2; - } + auto convert_axis_to_legacy = [](size_t axis, size_t rank) { + auto axis_legacy = axis; + if (axis_legacy >= 2) { + auto spatial_axis = axis_legacy - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(rank, 4) - 2; + axis_legacy = spatial_size - spatial_axis - 1 + 2; + } + + return axis_legacy; + }; + + auto crop_axis_legacy = convert_axis_to_legacy(crop_axis, crop_layout.get_partial_shape().size()); + // If it's build-time and node is dynamic, only dynamic padding is set first if ((crop_layout.is_dynamic() || input_layout.is_dynamic()) && !is_runtime) { auto dyn_pad_sizes = tensor(0).sizes(); dyn_pad_sizes[crop_axis_legacy] = 1; crop_layout.data_padding.set_dynamic_pad(tensor(dyn_pad_sizes)); - for (auto& user_layout : user_layouts) { - user_layout.data_padding.set_dynamic_pad(tensor(dyn_pad_sizes)); + + if (user_info.first && user_info.first->is_type()) { + auto reshape_desc = user_info.first->as().get_primitive(); + auto reshape_mode = reshape_desc->mode; + if (reshape_mode == reshape::reshape_mode::base) { + user_info.second.data_padding.set_dynamic_pad(tensor(dyn_pad_sizes)); + } else if (reshape_mode == reshape::reshape_mode::unsqueeze || reshape_mode == reshape::reshape_mode::squeeze) { + auto reshape_ps = user_info.second.get_partial_shape(); + auto output_pattern = reshape_desc->output_pattern; + + auto reshape_axis = crop_axis; + for (size_t i = 0; i < output_pattern.size(); i++) { + if (output_pattern[i] <= static_cast(reshape_axis)) { + reshape_axis += reshape_mode == reshape::reshape_mode::unsqueeze ? 1 : -1; + } + } + + auto dyn_pad_mask = tensor(0).sizes(); + auto reshape_axis_legacy = convert_axis_to_legacy(reshape_axis, reshape_ps.size()); + dyn_pad_mask[reshape_axis_legacy] = 1; + user_info.second.data_padding.set_dynamic_pad(tensor(dyn_pad_mask)); + } } return; } @@ -673,14 +710,40 @@ void crop_in_place_optimization::update_in_place_crop_padding_simple_data_format auto dyn_pad_sizes = lower_sizes; dyn_pad_sizes[crop_axis_legacy] = 1; crop_layout.data_padding = padding(lower_sizes, upper_sizes, 0.f, tensor(dyn_pad_sizes)); - for (auto& user_layout : user_layouts) { - auto reshape_rank = user_layout.get_partial_shape().size(); - auto reshape_last_dim = user_layout.get_partial_shape().to_shape()[reshape_rank - 1]; - if (lower_sizes[crop_axis_legacy]) - lower_sizes[crop_axis_legacy] /= reshape_last_dim; - if (upper_sizes[crop_axis_legacy]) - upper_sizes[crop_axis_legacy] /= reshape_last_dim; - user_layout.data_padding = padding(lower_sizes, upper_sizes, 0.f, tensor(dyn_pad_sizes)); + if (user_info.first) { + auto reshape_desc = user_info.first->as().get_primitive(); + auto reshape_mode = reshape_desc->mode; + if (reshape_mode == reshape::reshape_mode::base) { + auto reshape_rank = user_info.second.get_partial_shape().size(); + auto reshape_last_dim = user_info.second.get_partial_shape().to_shape()[reshape_rank - 1]; + if (lower_sizes[crop_axis_legacy]) + lower_sizes[crop_axis_legacy] /= reshape_last_dim; + if (upper_sizes[crop_axis_legacy]) + upper_sizes[crop_axis_legacy] /= reshape_last_dim; + user_info.second.data_padding = padding(lower_sizes, upper_sizes, 0.f, tensor(dyn_pad_sizes)); + } else { + auto reshape_ps = user_info.second.get_partial_shape(); + auto output_pattern = reshape_desc->output_pattern; + + auto reshape_axis = crop_axis; + for (size_t i = 0; i < output_pattern.size(); i++) { + if (output_pattern[i] <= static_cast(reshape_axis)) { + reshape_axis += reshape_mode == reshape::reshape_mode::unsqueeze ? 1 : -1; + } + } + + const auto output_rank = std::max(reshape_ps.size(), static_cast(4)); + std::vector reshape_lower_sizes(output_rank, 0); + std::vector reshape_upper_sizes(output_rank, 0); + std::vector reshape_dyn_pad_mask(output_rank, 0); + + const auto reshape_axis_legacy = convert_axis_to_legacy(reshape_axis, reshape_ps.size()); + reshape_lower_sizes[reshape_axis_legacy] = lower_sizes[crop_axis_legacy]; + reshape_upper_sizes[reshape_axis_legacy] = upper_sizes[crop_axis_legacy]; + reshape_dyn_pad_mask[reshape_axis_legacy] = 1; + + user_info.second.data_padding = padding(reshape_lower_sizes, reshape_upper_sizes, 0.f, tensor(reshape_dyn_pad_mask)); + } } } else { crop_layout.data_padding = padding(lower_sizes, upper_sizes); @@ -743,18 +806,23 @@ void prepare_buffer_fusing::run(program& p) { node.get_primitive()->axis, false); } else if (crop_in_place_optimization::can_crop_be_optimized_simple_data_format(crop_layout, pred_layout)) { + std::pair user_info; std::vector reshape_layouts; - if (node.get_users().front()->is_type() && node.get_users().front()->as().is_runtime_propagatable_padding()) { - reshape_layouts.push_back(node.get_users().front()->get_output_layout()); + if (node.get_users().front()->is_type()) { + auto& reshape_node = node.get_users().front()->as(); + if (reshape_node.is_runtime_propagatable_padding()) { + user_info.first = &reshape_node; + user_info.second = reshape_node.get_output_layout(); + } } crop_in_place_optimization::update_in_place_crop_padding_simple_data_format(crop_layout, pred_layout, - reshape_layouts, + user_info, crop_params->input_offsets[0], node.get_primitive()->axis, false); - if (reshape_layouts.size() > 0) { - node.get_users().front()->set_output_layout(reshape_layouts[0]); + if (user_info.first) { + node.get_users().front()->set_output_layout(user_info.second); } } node.set_output_layout(crop_layout); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.h b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.h index 47bc4fc49bd490..543a55864eb63b 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.h +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.h @@ -82,7 +82,7 @@ struct crop_in_place_optimization : pattern_match_optimization_typed& user_layouts, + std::pair& user_info, const tensor offsets, size_t crop_axis, bool is_runtime); diff --git a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h index 78cbba8f3eeba4..7ae1d4792879eb 100644 --- a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h @@ -32,8 +32,11 @@ struct typed_program_node : public typed_program_node_base { bool is_runtime_propagatable_padding() const { auto prim = typed_desc(); - if (prim->mode == reshape::reshape_mode::squeeze || prim->mode == reshape::reshape_mode::unsqueeze) - return true; + if (prim->mode == reshape::reshape_mode::squeeze || prim->mode == reshape::reshape_mode::unsqueeze) { + // For proper padding propagation we need to know output pattern at model loading stage + // in case of squeeze/unsqueeze mode + return prim->output_pattern.size() > 0; + } // TODO: This function is to limit condition to a specific case (crop + reshape) among cases for the base mode if (!input().is_type()) diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index f8267673722e64..93a3183ee39d5e 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1485,7 +1485,7 @@ void primitive_inst::do_runtime_in_place_crop() { u->update_shape_done_by_other = true; const auto& crop_users = u->get_user_insts(); - std::vector reshape_layouts; + std::pair user_info; if (crop_users.front()->get_node().is_type()) { OPENVINO_ASSERT(crop_users.size() == 1, "[GPU] Expected number of reshape users is 1, but it is ", crop_users.size()); auto reshape_inst = crop_users.front(); @@ -1493,7 +1493,8 @@ void primitive_inst::do_runtime_in_place_crop() { GPU_DEBUG_TRACE_DETAIL << "[In place crop] update shape for " << reshape_inst->id() << std::endl; reshape_inst->update_shape(); reshape_inst->update_shape_done_by_other = true; - reshape_layouts.push_back(reshape_inst->_impl_params->get_output_layout()); + user_info.first = &reshape_inst->get_node(); + user_info.second = reshape_inst->_impl_params->get_output_layout(); } } @@ -1510,11 +1511,10 @@ void primitive_inst::do_runtime_in_place_crop() { if (crop_in_place_optimization::can_crop_be_optimized_along_feature(crop_layout, pred_layout)) { crop_in_place_optimization::update_in_place_crop_padding_along_feature(u->get_node(), crop_layout, pred_layout, offsets, crop_axis, true); } else if (crop_in_place_optimization::can_crop_be_optimized_simple_data_format(crop_layout, pred_layout)) { - crop_in_place_optimization::update_in_place_crop_padding_simple_data_format(crop_layout, pred_layout, reshape_layouts, - offsets, crop_axis, true); - if (crop_users.front()->get_node().is_type() && reshape_layouts.size() > 0) { + crop_in_place_optimization::update_in_place_crop_padding_simple_data_format(crop_layout, pred_layout, user_info, offsets, crop_axis, true); + if (user_info.first) { auto reshape_inst = crop_users.front(); - reshape_inst->_impl_params->output_layouts[0] = reshape_layouts[0]; + reshape_inst->_impl_params->output_layouts[0] = user_info.second; reshape_inst->set_shape_change(); } } else { diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index 5cbef11dd3b045..a4bf38785354d3 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -51,6 +51,11 @@ padding propagate_padding(const layout& in_layout, const ov::PartialShape& out_s update_pad_upper = pad_upper; update_pad_mask = pad_mask; + // Truncate to the actual rank (for shapes with a rank less than 4) + update_pad_lower.resize(rank); + update_pad_upper.resize(rank); + update_pad_mask.resize(rank); + std::unordered_set tmp(axes.begin(), axes.end()); std::vector unique_axes; const auto expanded_rank = rank + tmp.size(); @@ -61,13 +66,13 @@ padding propagate_padding(const layout& in_layout, const ov::PartialShape& out_s // Normalize then remove repeated axes after normalization. for (const auto& axis : axes) { if (static_cast(axis) <= out_shape.size()) { - pad_lower.insert(std::next(std::begin(pad_lower), axis), 0); - pad_upper.insert(std::next(std::begin(pad_upper), axis), 0); - pad_mask.insert(std::next(std::begin(pad_mask), axis), 0); + update_pad_lower.insert(std::next(std::begin(update_pad_lower), axis), 0); + update_pad_upper.insert(std::next(std::begin(update_pad_upper), axis), 0); + update_pad_mask.insert(std::next(std::begin(update_pad_mask), axis), 0); } else { - pad_lower.push_back(0); - pad_upper.push_back(0); - pad_mask.push_back(0); + update_pad_lower.push_back(0); + update_pad_upper.push_back(0); + update_pad_mask.push_back(0); } } } else { @@ -254,6 +259,7 @@ std::string reshape_inst::to_string(reshape_node const& node) { reshape_info.add("output pshape", desc->output_partial_shape); reshape_info.add("output pattern", desc->output_pattern); reshape_info.add("special zero", desc->special_zero); + reshape_info.add("reshape mode", desc->mode); node_info->add("reshape info", reshape_info); node_info->dump(primitive_description); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl index e79b801770db49..36d4306b59ba79 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl @@ -71,17 +71,26 @@ KERNEL(rope_ref)( uint cos_sin_b = b < INPUT1_BATCH_NUM ? b : 0; uint cos_sin_p = p + INPUT1_FEATURE_NUM - INPUT0_FEATURE_NUM < INPUT1_FEATURE_NUM ? p + INPUT1_FEATURE_NUM - INPUT0_FEATURE_NUM : 0; uint cos_sin_h = h < INPUT1_SIZE_Y ? h : 0; + +#ifndef SIN_COS_HAVE_DYNAMIC_PADDINGS uint cos_sin_idx = INPUT1_GET_INDEX(cos_sin_b, cos_sin_p, cos_sin_h, 0); + uint cos_idx = cos_sin_idx; + uint sin_idx = cos_sin_idx; +#else + uint cos_idx = INPUT1_GET_INDEX(cos_sin_b, cos_sin_p, cos_sin_h, 0); + uint sin_idx = INPUT2_GET_INDEX(cos_sin_b, cos_sin_p, cos_sin_h, 0); +#endif + uint output_idx = OUTPUT_GET_INDEX(b, p, h, 0); INPUT0_TYPE in1 = input[input_idx + r]; INPUT0_TYPE in2 = input[input_idx + HALF_ROTARY_NDIMS + r]; - output[output_idx + r] = cos[cos_sin_idx + r] * in1 - sin[cos_sin_idx + r] * in2; + output[output_idx + r] = cos[cos_idx + r] * in1 - sin[sin_idx + r] * in2; - output[output_idx + HALF_ROTARY_NDIMS + r] = cos[cos_sin_idx + HALF_ROTARY_NDIMS + r] * in2 + - sin[cos_sin_idx + HALF_ROTARY_NDIMS + r] * in1; + output[output_idx + HALF_ROTARY_NDIMS + r] = cos[cos_idx + HALF_ROTARY_NDIMS + r] * in2 + + sin[sin_idx + HALF_ROTARY_NDIMS + r] * in1; } #endif @@ -128,16 +137,25 @@ KERNEL(rope_ref)( cos_sin_p = gather[gather_idx]; #endif cos_sin_p = cos_sin_p < INPUT1_SIZE_Y ? cos_sin_p : 0; + +#ifndef SIN_COS_HAVE_DYNAMIC_PADDINGS uint cos_sin_idx = INPUT1_GET_INDEX(cos_sin_b, cos_sin_h, cos_sin_p, 0); + uint cos_idx = cos_sin_idx; + uint sin_idx = cos_sin_idx; +#else + uint cos_idx = INPUT1_GET_INDEX(cos_sin_b, cos_sin_h, cos_sin_p, 0); + uint sin_idx = INPUT2_GET_INDEX(cos_sin_b, cos_sin_h, cos_sin_p, 0); +#endif + uint output_idx = OUTPUT_GET_INDEX(b, h, p, 0); INPUT0_TYPE in1 = input[input_idx + r]; INPUT0_TYPE in2 = input[input_idx + HALF_ROTARY_NDIMS + r]; - output[output_idx + r] = cos[cos_sin_idx + r] * in1 - sin[cos_sin_idx + r] * in2; + output[output_idx + r] = cos[cos_idx + r] * in1 - sin[sin_idx + r] * in2; - output[output_idx + HALF_ROTARY_NDIMS + r] = cos[cos_sin_idx + HALF_ROTARY_NDIMS + r] * in2 + - sin[cos_sin_idx + HALF_ROTARY_NDIMS + r] * in1; + output[output_idx + HALF_ROTARY_NDIMS + r] = cos[cos_idx + HALF_ROTARY_NDIMS + r] * in2 + + sin[sin_idx + HALF_ROTARY_NDIMS + r] * in1; } #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp index 507a41b8b01a50..a9e0818aeae2f5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp @@ -63,6 +63,10 @@ JitConstants RoPEKernelBase::GetJitConstants(const rope_params& params, RoPEKern jit.AddConstant(MakeJitConstant("TRANSPOSED_INPUT0_BATCH_PITCH", "INPUT0_BATCH_PITCH")); } + if (!params.is_chatglm && (params.inputs[1].has_dynamic_pad() || params.inputs[2].has_dynamic_pad())) { + jit.AddConstant(MakeJitConstant("SIN_COS_HAVE_DYNAMIC_PADDINGS", true)); + } + if (params.is_qwen) { jit.AddConstant(MakeJitConstant("QWEN", true)); } else if (params.is_chatglm) { diff --git a/src/plugins/intel_gpu/src/plugin/ops/reshape.cpp b/src/plugins/intel_gpu/src/plugin/ops/reshape.cpp index 1312de47ed7033..a95e00725736e6 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/reshape.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/reshape.cpp @@ -9,6 +9,7 @@ #include "openvino/op/squeeze.hpp" #include "openvino/op/unsqueeze.hpp" #include "openvino/op/constant.hpp" +#include "openvino/core/validation_util.hpp" #include "intel_gpu/primitives/reshape.hpp" #include "intel_gpu/primitives/reorder.hpp" @@ -30,6 +31,9 @@ static void CreateCommonReshapeOp(ProgramBuilder& p, const std::shared_ptr output_pattern = {}; if (second_const_input != nullptr) { output_pattern = second_const_input->cast_vector(); + if (mode == cldnn::reshape::reshape_mode::unsqueeze || mode == cldnn::reshape::reshape_mode::squeeze) { + ov::util::try_normalize_axes(output_pattern, op->get_output_partial_shape(0).rank(), *op); + } } // If second input is absent (it's optional in Squeeze op) or it's constant, create reshape with single input and compile time out pattern diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index e5506388eba273..668003aac77192 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -777,6 +777,85 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic) { ASSERT_EQ(output_ptr_3[i], out3[i]); } +TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_unsqueeze) { + auto& engine = get_test_engine(); + + auto in_layout = layout{ ov::PartialShape{-1, -1, 4}, data_types::f32, format::bfyx}; + auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); + auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); + auto zp_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); + auto axis_mem = engine.allocate_memory({ {}, data_types::i64, format::bfyx }); + auto splits_length_mem = engine.allocate_memory({ {2}, data_types::i64, format::bfyx }); + + int64_t axis = 2; + set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, + 0.5f, -2.0f, -0.5f, -1.0f }); + set_values(axis_mem, {axis}); + set_values(splits_length_mem, { 2, 6 }); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); + set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 2.0f }); + set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 2.0f }); + set_values(zp_mem, { 1.0f, 2.0f, 2.0f, 1.0f, 4.0f, 1.0f, 6.0f, 2.0f }); + + std::vector out1 = { 13.f, 58.f, -11.f, -62.f }; + std::vector out2 = { -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f }; + std::vector out3 = { 13.f, 58.f, -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, -11.f, -62.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f }; + + cldnn::crop_ngraph_op_mode op_mode = cldnn::crop_ngraph_op_mode::variadic_split; + topology topology( + input_layout("input", in_layout), + data("axis", axis_mem), + data("splits_length", splits_length_mem), + data("weights", weights_mem), + data("bias", bias_mem), + data("scale", scale_mem), + data("zp", zp_mem), + fully_connected("fc", input_info("input"), "weights", "bias", "scale", "zp", data_types::f32, 3, 2), + crop("crop1", { input_info("fc"), input_info("axis"), input_info("splits_length") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 0, axis), + reorder("output1", input_info("crop1"), format::bfyx, data_types::f32), + crop("crop2", { input_info("fc"), input_info("axis"), input_info("splits_length") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 1, axis), + reshape("reshape", input_info("crop2"), false, std::vector{1}, ov::PartialShape{-1, 1, -1, 6}, cldnn::reshape::reshape_mode::unsqueeze), + reorder("output2", input_info("reshape"), format::bfyx, data_types::f32, std::vector(), reorder_mean_mode::subtract, padding(), true), + reorder("output3", input_info("fc"), format::bfyx, data_types::f32) + ); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + network.set_input_data("input", input_mem); + + auto outputs = network.execute(); + + auto output = outputs.at("output1").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < out1.size(); i++) + ASSERT_EQ(output_ptr[i], out1[i]); + + auto output_2 = outputs.at("output2").get_memory(); + cldnn::mem_lock output_ptr_2(output_2, get_test_stream()); + + for (size_t i = 0; i < out2.size(); i++) + ASSERT_EQ(output_ptr_2[i], out2[i]); + + auto output_3 = outputs.at("output3").get_memory(); + cldnn::mem_lock output_ptr_3(output_3, get_test_stream()); + + for (size_t i = 0; i < out3.size(); i++) + ASSERT_EQ(output_ptr_3[i], out3[i]); +} + TEST(prepare_buffer_fusing, in_place_crop_dynamic_split_lengths) { auto& engine = get_test_engine(); diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/reshape_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/reshape_si_test.cpp index 4a54ea96233ee7..97a50f2d9e05d6 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/reshape_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/reshape_si_test.cpp @@ -35,12 +35,14 @@ inline padding get_pad(format fmt, std::vector axes, bool is_dynamic) { std::vector upper(fmt.dimension(), 0); std::vector mask(fmt.dimension(), 0); + auto start_pad_val = 13; for (auto& axis : axes) { - lower[axis] = 13 + axis; - upper[axis] = 25 + axis; + lower[axis] = start_pad_val; + upper[axis] = start_pad_val / 2; if (is_dynamic) { mask[axis] = 1; } + start_pad_val += 5; } return padding(tensor(fmt, lower, 0).sizes(), tensor(fmt, upper, 0).sizes(), 0.0f, tensor(fmt, mask, 0)); @@ -280,20 +282,30 @@ INSTANTIATE_TEST_SUITE_P(smoke, unsqueeze_test, layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {0}, ov::PartialShape::dynamic(1), layout{ov::PartialShape{1}, data_types::f32, format::bfyx} }, + { + layout{ov::PartialShape{1, 128}, data_types::f32, format::bfyx, get_pad(format::bfyx, {0, 1}, true)}, + layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {1, 3}, ov::PartialShape::dynamic(4), + layout{ov::PartialShape{1, 1, 128, 1}, data_types::f32, format::bfyx, get_pad(format::bfyx, {0, 2}, true)} + }, + { + layout{ov::PartialShape{1, 1, 128}, data_types::f32, format::bfyx, get_pad(format::bfyx, {2}, true)}, + layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {1}, ov::PartialShape::dynamic(4), + layout{ov::PartialShape{1, 1, 1, 128}, data_types::f32, format::bfyx, get_pad(format::bfyx, {3}, true)} + }, { layout{ov::PartialShape{1, 10, 20, 30}, data_types::f32, format::bfyx, get_pad(format::bfyx, {2}, true)}, layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {1}, ov::PartialShape::dynamic(5), - layout{ov::PartialShape{1, 1, 10, 20, 30}, data_types::f32, format::bfzyx, get_pad(format::bfyx, {2}, true)} + layout{ov::PartialShape{1, 1, 10, 20, 30}, data_types::f32, format::bfzyx, get_pad(format::bfzyx, {3}, true)} }, { layout{ov::PartialShape{1, 10, 20, 30}, data_types::f32, format::bfyx, get_pad(format::bfyx, {2, 3}, true)}, layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {1}, ov::PartialShape::dynamic(5), - layout{ov::PartialShape{1, 1, 10, 20, 30}, data_types::f32, format::bfzyx, get_pad(format::bfyx, {2, 3}, true)} + layout{ov::PartialShape{1, 1, 10, 20, 30}, data_types::f32, format::bfzyx, get_pad(format::bfzyx, {3, 4}, true)} }, { layout{ov::PartialShape{1, 10, 20, 30}, data_types::f32, format::bfyx, get_pad(format::bfyx, {2, 3}, true)}, layout{ov::PartialShape{2}, data_types::i64, format::bfyx}, {1, 4}, ov::PartialShape::dynamic(6), - layout{ov::PartialShape{1, 1, 10, 20, 1, 30}, data_types::f32, format::bfwzyx, get_pad(format::bfyx, {2, 3}, true)} + layout{ov::PartialShape{1, 1, 10, 20, 1, 30}, data_types::f32, format::bfwzyx, get_pad(format::bfwzyx, {3, 5}, true)} } })); } // shape_infer_tests