From 97a214851c0a0b848ef6d9a432772ed26a0fa49e Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 8 Jan 2025 08:26:31 +0100 Subject: [PATCH 1/3] [core] Result node may uses inputs names on creation (#28168) ### Details: - The Result node may use inputs names when created. Creation such Result assume as previous layer is model output. - The model when created from outputs and the outputs are note Result node, then create results and take its names as model output names. - The Result has option to enable/disable use inputs names as it owns. If names not used they still be visible as output names if Result has no dedicated names but if Result is connect to other input these names stay on origin input. ### Tickets: - CVS-159672 --------- Signed-off-by: Raasz, Pawel --- .../tests/utils/convert_precision.cpp | 4 - src/core/include/openvino/op/result.hpp | 6 ++ src/core/src/model.cpp | 4 +- src/core/src/node_vector.cpp | 2 +- src/core/src/op/result.cpp | 8 ++ src/core/tests/preprocess.cpp | 86 ++++++++++++++++++- src/core/tests/type_prop/result.cpp | 21 ++++- src/frontends/ir/src/ir_deserializer.cpp | 2 +- .../onnx/frontend/src/input_model.cpp | 7 -- 9 files changed, 122 insertions(+), 18 deletions(-) diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index a2edb0232b40f2..75cf81d295e3ac 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -2441,9 +2441,6 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp auto convert_1 = make_shared(param_1, element::f32); auto axis = opset10::Constant::create(element::i32, Shape{}, {0}); auto split = make_shared(convert_1, axis, 3); - split->get_output_tensor(0).add_names({"split:0"}); - split->get_output_tensor(1).add_names({"split:1"}); - split->get_output_tensor(2).add_names({"split:2"}); auto convert_split_0 = make_shared(split->output(0), element::f64); auto convert_split_1 = make_shared(split->output(1), element::f64); @@ -2567,7 +2564,6 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) { if_op->set_input(convert_1, param_1_then, param_1_else); if_op->set_input(convert_2, param_2_then, param_2_else); auto result = if_op->set_output(result_then, result_else); - result.add_names({"if_result:0"}); auto converted_result = make_shared(result, element::f64); converted_result->get_output_tensor(0).add_names({"if_result:0"}); diff --git a/src/core/include/openvino/op/result.hpp b/src/core/include/openvino/op/result.hpp index 9cad2d9444a267..d7a1cd8fa803a5 100644 --- a/src/core/include/openvino/op/result.hpp +++ b/src/core/include/openvino/op/result.hpp @@ -69,6 +69,12 @@ class OPENVINO_API Result : public Op { /// \param arg Node that produces the input tensor. Result(const Output& arg); + /// \brief Allows a value to be used as a function result. + /// + /// \param arg Node that produces the input tensor. + /// \param use_input_names When true Result will use input node tensor names as Result's output names. + Result(const Output& arg, bool use_input_names); + void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; diff --git a/src/core/src/model.cpp b/src/core/src/model.cpp index 1493d950cd78ef..e2e4115d925b16 100644 --- a/src/core/src/model.cpp +++ b/src/core/src/model.cpp @@ -949,8 +949,8 @@ ov::Output ov::Model::add_output(const ov::Output& port) { return input.get_node()->output(0); } } - auto result = std::make_shared(port); - m_results.push_back(result); + m_results.emplace_back(std::make_shared(port, true)); + auto& result = m_results.back(); if (m_shared_rt_info->get_use_topological_cache()) { if (cache_valid()) { // Full update of topological cache is not needed, 'result' can be just inserted to the end diff --git a/src/core/src/node_vector.cpp b/src/core/src/node_vector.cpp index 3ef52278ffa5a6..d1bb7ffd9d316b 100644 --- a/src/core/src/node_vector.cpp +++ b/src/core/src/node_vector.cpp @@ -29,7 +29,7 @@ ov::ResultVector ov::as_result_vector(const OutputVector& values) { for (const auto& value : values) { std::shared_ptr node = value.get_node_shared_ptr(); result.push_back(ov::is_type(node) ? ov::as_type_ptr(node) - : std::make_shared(value)); + : std::make_shared(value, true)); } return result; } diff --git a/src/core/src/op/result.cpp b/src/core/src/op/result.cpp index 97dc95a0e53f17..f527c66bb1d4fc 100644 --- a/src/core/src/op/result.cpp +++ b/src/core/src/op/result.cpp @@ -10,6 +10,7 @@ #include "itt.hpp" #include "openvino/core/descriptor_tensor.hpp" +#include "openvino/op/util/op_types.hpp" namespace ov { namespace op { @@ -19,6 +20,13 @@ Result::Result(const Output& arg) : Op({arg}) { constructor_validate_and_infer_types(); } +Result::Result(const Output& arg, bool use_input_names) : Result(arg) { + if (use_input_names && !util::is_parameter(arg.get_node())) { + // On create use inputs names which will be used as model output names (except Paramater, model's inputs names). + get_output_tensor(0).add_names(get_input_tensor(0).get_names()); + } +} + void Result::validate_and_infer_types() { OV_OP_SCOPE(v0_Result_validate_and_infer_types); NODE_VALIDATION_CHECK(this, get_input_size() == 1, "Argument has ", get_input_size(), " outputs (1 expected)."); diff --git a/src/core/tests/preprocess.cpp b/src/core/tests/preprocess.cpp index 99f2789b217b6d..86b8dad63ba01e 100644 --- a/src/core/tests/preprocess.cpp +++ b/src/core/tests/preprocess.cpp @@ -2096,12 +2096,94 @@ TEST(pre_post_process, postprocess_one_node_many_outputs) { p.output(2).tensor().set_element_type(element::f32); model = p.build(); EXPECT_EQ(model->get_results().size(), 3); - // Tensor names on output is lost as origin named tensor is before convert op + // Tensor names on modified outputs are set to Split tensors not model output. // New result has different precision means different tensor. EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 0); + EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 0); + // Add output node still on output after pre-processing EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); + // Not modified output still have name EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); - EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 0); + EXPECT_EQ(model->get_results()[0]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.0"); + EXPECT_EQ(model->get_results()[1]->input(0).get_source_output().get_node()->get_friendly_name(), "Split"); + EXPECT_EQ(model->get_results()[2]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.2"); +} + +TEST(pre_post_process, postprocess_one_node_many_outputs_results_created_by_model) { + auto data1 = std::make_shared(element::i32, Shape{3}); + auto c1 = opset8::Constant::create(element::i32, Shape{}, {0}); + auto op = std::make_shared(data1, c1, 3); + op->set_friendly_name("Split"); + op->output(0).set_names({"tensor_Split0"}); + auto r1 = std::make_shared(op->output(0)); + + OutputVector outputs{r1}; + for (size_t i = 1; i < op->get_num_splits(); i++) { + auto output = op->output(i); + output.set_names({"tensor_Split" + std::to_string(i)}); + outputs.push_back(std::move(output)); + } + auto model = std::make_shared(outputs, ParameterVector{data1}); + // Set tensor name to model output 0 + model->output(0).set_names({"output_split0"}); + EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); + // Result input has still tensor_split0 names from split op + EXPECT_EQ(model->output(0).get_node()->get_input_tensor(0).get_names().count("tensor_Split0"), 1); + EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); + EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1); + + auto p = PrePostProcessor(model); + p.output(0).tensor().set_element_type(element::f32); + p.output(2).tensor().set_element_type(element::f32); + model = p.build(); + EXPECT_EQ(model->get_results().size(), 3); + + // output 0 by user,not use input nodes names as its own, modified by PPP (tensor_Split0 is on split output) + EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 0); + EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); + // output 1 created by model, assume its names is output name, not modified by PPP + EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); + // output 2 created by model, assume its names is output name, modified by PPP + EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1); + EXPECT_EQ(model->get_results()[0]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.0"); + EXPECT_EQ(model->get_results()[1]->input(0).get_source_output().get_node()->get_friendly_name(), "Split"); + EXPECT_EQ(model->get_results()[2]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.2"); +} + +TEST(pre_post_process, postprocess_one_node_many_outputs_results_created_or_added_by_model) { + auto data1 = std::make_shared(element::i32, Shape{3}); + auto c1 = opset8::Constant::create(element::i32, Shape{}, {0}); + auto op = std::make_shared(data1, c1, 3); + op->set_friendly_name("Split"); + for (size_t i = 0; i < op->get_output_size(); ++i) { + op->output(i).set_names({"tensor_Split" + std::to_string(i)}); + } + + OutputVector outputs{std::make_shared(op->output(0)), op->output(1)}; + + auto model = std::make_shared(outputs, ParameterVector{data1}); + model->add_output(op->output(2)); + // Set tensor name to model output 0 + model->output(0).set_names({"output_split0"}); + EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); + // Result input has still tensor_split0 names from split op + EXPECT_EQ(model->output(0).get_node()->get_input_tensor(0).get_names().count("tensor_Split0"), 1); + EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); + EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1); + + auto p = PrePostProcessor(model); + p.output(0).tensor().set_element_type(element::f32); + p.output(2).tensor().set_element_type(element::f32); + model = p.build(); + EXPECT_EQ(model->get_results().size(), 3); + + // output 0 by user,not use input nodes names as its own, modified by PPP (tensor_Split0 is on split output) + EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 0); + EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); + // output 1 created by model, assume its names is output name, not modified by PPP + EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); + // output 2 created by model, assume its names is output name, modified by PPP + EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1); EXPECT_EQ(model->get_results()[0]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.0"); EXPECT_EQ(model->get_results()[1]->input(0).get_source_output().get_node()->get_friendly_name(), "Split"); EXPECT_EQ(model->get_results()[2]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.2"); diff --git a/src/core/tests/type_prop/result.cpp b/src/core/tests/type_prop/result.cpp index 9776768df052a0..03b7c550f22938 100644 --- a/src/core/tests/type_prop/result.cpp +++ b/src/core/tests/type_prop/result.cpp @@ -10,6 +10,7 @@ namespace ov { namespace test { +using ov::op::v0::Constant; using ov::op::v0::Parameter; using std::make_shared; using testing::UnorderedElementsAre; @@ -135,7 +136,7 @@ TEST_F(TypePropResultV0Test, preserve_specific_name_on_input_replace) { const auto a = std::make_shared(element::f32, PartialShape::dynamic()); a->get_output_tensor(0).set_names({"input a"}); - const auto result = make_op(a); + const auto result = make_op(a, true); result->output(0).set_names({"out"}); EXPECT_THAT(result->input(0).get_tensor().get_names(), UnorderedElementsAre("out", "input a")); @@ -151,5 +152,23 @@ TEST_F(TypePropResultV0Test, preserve_specific_name_on_input_replace) { EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("out")); EXPECT_THAT(a->output(0).get_names(), UnorderedElementsAre("input a")); } + +TEST_F(TypePropResultV0Test, take_input_node_names) { + const auto c = std::make_shared(element::f32, Shape{2}, std::vector{2.f, 1.f}); + c->get_output_tensor(0).set_names({"constant data"}); + const auto result = make_op(c, true); + + EXPECT_THAT(result->input(0).get_tensor().get_names(), UnorderedElementsAre("constant data")); + EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("constant data")); + + const auto new_const = std::make_shared(element::f32, Shape{2}, std::vector{0.f, 0.f}); + + result->input(0).replace_source_output(new_const); + result->validate_and_infer_types(); + + EXPECT_THAT(c->get_output_tensor(0).get_names(), testing::IsEmpty()); + EXPECT_THAT(result->get_input_tensor(0).get_names(), UnorderedElementsAre("constant data")); + EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("constant data")); +} } // namespace test } // namespace ov diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index c60b4bf0dda9ce..d7bc89b96c4358 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -1033,7 +1033,7 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vectorget_input_source_output(0).get_node())) { // Copy names if parent node is not parameter, model's input names should not be dedicated // output names as they could be removed from Parameter's tensor during model transformations. - ov::descriptor::copy_tensor_names(result->get_output_tensor(0), result->get_input_tensor(0)); + result->get_output_tensor(0).add_names(result->get_input_tensor(0).get_names()); } } } diff --git a/src/frontends/onnx/frontend/src/input_model.cpp b/src/frontends/onnx/frontend/src/input_model.cpp index 9410f54e428b3f..87f1439eb18b38 100644 --- a/src/frontends/onnx/frontend/src/input_model.cpp +++ b/src/frontends/onnx/frontend/src/input_model.cpp @@ -533,13 +533,6 @@ void InputModel::add_tensor_names(std::shared_ptr& model) { it->add_names(tensor_names.second); } } - - // Set model output names - for (auto&& result : model->get_results()) { - if (!is_type(result->get_input_source_output(0).get_node())) { - result->get_output_tensor(0).add_names(result->get_input_tensor(0).get_names()); - } - } } void InputModel::reshape_model_inputs(std::shared_ptr& model) { From 3526fa5805cc371bf7cedadf8e69d94f29865701 Mon Sep 17 00:00:00 2001 From: Michal Miotk Date: Wed, 8 Jan 2025 08:46:04 +0100 Subject: [PATCH 2/3] [GPU] remove code duplication in mvn_gpu_bfyx_opt kernel (#28284) ### Details: - spotted code duplication ### Tickets: - --- .../cl_kernels/mvn_gpu_bfyx_opt.cl | 43 +++---------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/mvn_gpu_bfyx_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/mvn_gpu_bfyx_opt.cl index 350cc0493a09c8..2f438bccd4cc74 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/mvn_gpu_bfyx_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/mvn_gpu_bfyx_opt.cl @@ -25,25 +25,23 @@ KERNEL (mvn_gpu_bfyx_opt)( const uint data_set_offset = data_set_idx * data_set_size; const uint my_data_offset = data_set_offset + in_data_set_idx; + uint iters_num = items_num; + if (in_data_set_idx < leftovers) + ++iters_num; float my_sum = 0; float tmp; //each WI reads items_num consecutive items from batch*feature - for (uint i=0; i Date: Wed, 8 Jan 2025 13:47:05 +0400 Subject: [PATCH 3/3] [Snippets] Fixed SplitDimensionM pass for Subgraphs with dynamic params (#28280) ### Details: - *Currently, the pass `SplitDimensionM` supports only static inputs of Subgraphs because the pass inserts `Reshape` ops with const shapes. In some cases (the case from the ticket), MatMul may have static output shape but some parameters - dynamic shape. Then `SplitDimensionM` should not call `split` method. The PR added check with early `return` to cover such cases* ### Tickets: - *159661* --- src/common/snippets/src/pass/split_dimension_m.cpp | 6 ++++++ src/common/snippets/tests/src/pass/mha_tokenization.cpp | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/src/common/snippets/src/pass/split_dimension_m.cpp b/src/common/snippets/src/pass/split_dimension_m.cpp index ae95a371483163..a2875c3d90a80c 100644 --- a/src/common/snippets/src/pass/split_dimension_m.cpp +++ b/src/common/snippets/src/pass/split_dimension_m.cpp @@ -285,6 +285,12 @@ bool SplitDimensionM::run_on_subgraph(const std::shared_ptr& subgr if (!subgraph->has_domain_sensitive_ops()) return false; + // The pass supports only static shapes on Subgraph inputs due to static `Reshape` insertion around Subgraph. + const auto& params = subgraph->body_ptr()->get_parameters(); + const auto is_dynamic = [](const std::shared_ptr& p) { return p->get_output_partial_shape(0).is_dynamic(); }; + if (std::any_of(params.cbegin(), params.cend(), is_dynamic)) + return false; + if (const auto matmul0 = get_matmul(subgraph)) { const auto mm_shape = matmul0->get_shape(); size_t batch_m_dim, new_m_dim; diff --git a/src/common/snippets/tests/src/pass/mha_tokenization.cpp b/src/common/snippets/tests/src/pass/mha_tokenization.cpp index 382257f935cc49..cca0f4f0002470 100644 --- a/src/common/snippets/tests/src/pass/mha_tokenization.cpp +++ b/src/common/snippets/tests/src/pass/mha_tokenization.cpp @@ -221,6 +221,15 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_SplitM_AlmostAllThreads) { run(); } +TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_4D_SplitM_DynamicParameter) { + const auto &f = MHAFunction(std::vector{{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 128, -1}, {1, 128, 16, 64}}, + std::vector({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), false, false); + model = f.getOriginal(); + model_ref = f.getReference(); + config.set_concurrency(32); + run(); +} + TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHASelect_SplitM) { const auto& f = MHASelectSplitMFunction(std::vector{{8, 512, 18}, {8, 18, 64}, {1, 512, 64}, {1, 1, 64}, {8, 64, 512}}, std::vector{{8, 2, 256, 18}, {8, 1, 18, 64}, {1, 2, 256, 64}, {1, 1, 1, 64},