Skip to content

Commit

Permalink
Merge branch 'toml_build' of https://github.com/mryzhov/openvino into…
Browse files Browse the repository at this point in the history
… toml_build
  • Loading branch information
mryzhov committed Jan 8, 2025
2 parents 5806213 + 6ee3625 commit ae0c7b4
Show file tree
Hide file tree
Showing 12 changed files with 144 additions and 54 deletions.
6 changes: 6 additions & 0 deletions src/common/snippets/src/pass/split_dimension_m.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,12 @@ bool SplitDimensionM::run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgr
if (!subgraph->has_domain_sensitive_ops())
return false;

// The pass supports only static shapes on Subgraph inputs due to static `Reshape` insertion around Subgraph.
const auto& params = subgraph->body_ptr()->get_parameters();
const auto is_dynamic = [](const std::shared_ptr<ov::Node>& p) { return p->get_output_partial_shape(0).is_dynamic(); };
if (std::any_of(params.cbegin(), params.cend(), is_dynamic))
return false;

if (const auto matmul0 = get_matmul(subgraph)) {
const auto mm_shape = matmul0->get_shape();
size_t batch_m_dim, new_m_dim;
Expand Down
9 changes: 9 additions & 0 deletions src/common/snippets/tests/src/pass/mha_tokenization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,15 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_SplitM_AlmostAllThreads) {
run();
}

TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA_4D_SplitM_DynamicParameter) {
const auto &f = MHAFunction(std::vector<PartialShape>{{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 128, -1}, {1, 128, 16, 64}},
std::vector<ov::element::Type>({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}), false, false);
model = f.getOriginal();
model_ref = f.getReference();
config.set_concurrency(32);
run();
}

TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHASelect_SplitM) {
const auto& f = MHASelectSplitMFunction(std::vector<PartialShape>{{8, 512, 18}, {8, 18, 64}, {1, 512, 64}, {1, 1, 64}, {8, 64, 512}},
std::vector<Shape>{{8, 2, 256, 18}, {8, 1, 18, 64}, {1, 2, 256, 64}, {1, 1, 1, 64},
Expand Down
4 changes: 0 additions & 4 deletions src/common/transformations/tests/utils/convert_precision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2441,9 +2441,6 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp
auto convert_1 = make_shared<opset10::Convert>(param_1, element::f32);
auto axis = opset10::Constant::create(element::i32, Shape{}, {0});
auto split = make_shared<opset10::Split>(convert_1, axis, 3);
split->get_output_tensor(0).add_names({"split:0"});
split->get_output_tensor(1).add_names({"split:1"});
split->get_output_tensor(2).add_names({"split:2"});

auto convert_split_0 = make_shared<opset10::Convert>(split->output(0), element::f64);
auto convert_split_1 = make_shared<opset10::Convert>(split->output(1), element::f64);
Expand Down Expand Up @@ -2567,7 +2564,6 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) {
if_op->set_input(convert_1, param_1_then, param_1_else);
if_op->set_input(convert_2, param_2_then, param_2_else);
auto result = if_op->set_output(result_then, result_else);
result.add_names({"if_result:0"});
auto converted_result = make_shared<opset10::Convert>(result, element::f64);
converted_result->get_output_tensor(0).add_names({"if_result:0"});

Expand Down
6 changes: 6 additions & 0 deletions src/core/include/openvino/op/result.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ class OPENVINO_API Result : public Op {
/// \param arg Node that produces the input tensor.
Result(const Output<Node>& arg);

/// \brief Allows a value to be used as a function result.
///
/// \param arg Node that produces the input tensor.
/// \param use_input_names When true Result will use input node tensor names as Result's output names.
Result(const Output<Node>& arg, bool use_input_names);

void validate_and_infer_types() override;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
Expand Down
4 changes: 2 additions & 2 deletions src/core/src/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,8 +949,8 @@ ov::Output<ov::Node> ov::Model::add_output(const ov::Output<ov::Node>& port) {
return input.get_node()->output(0);
}
}
auto result = std::make_shared<ov::op::v0::Result>(port);
m_results.push_back(result);
m_results.emplace_back(std::make_shared<ov::op::v0::Result>(port, true));
auto& result = m_results.back();
if (m_shared_rt_info->get_use_topological_cache()) {
if (cache_valid()) {
// Full update of topological cache is not needed, 'result' can be just inserted to the end
Expand Down
2 changes: 1 addition & 1 deletion src/core/src/node_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ ov::ResultVector ov::as_result_vector(const OutputVector& values) {
for (const auto& value : values) {
std::shared_ptr<Node> node = value.get_node_shared_ptr();
result.push_back(ov::is_type<ov::op::v0::Result>(node) ? ov::as_type_ptr<ov::op::v0::Result>(node)
: std::make_shared<ov::op::v0::Result>(value));
: std::make_shared<ov::op::v0::Result>(value, true));
}
return result;
}
8 changes: 8 additions & 0 deletions src/core/src/op/result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "itt.hpp"
#include "openvino/core/descriptor_tensor.hpp"
#include "openvino/op/util/op_types.hpp"

namespace ov {
namespace op {
Expand All @@ -19,6 +20,13 @@ Result::Result(const Output<Node>& arg) : Op({arg}) {
constructor_validate_and_infer_types();
}

Result::Result(const Output<Node>& arg, bool use_input_names) : Result(arg) {
if (use_input_names && !util::is_parameter(arg.get_node())) {
// On create use inputs names which will be used as model output names (except Paramater, model's inputs names).
get_output_tensor(0).add_names(get_input_tensor(0).get_names());
}
}

void Result::validate_and_infer_types() {
OV_OP_SCOPE(v0_Result_validate_and_infer_types);
NODE_VALIDATION_CHECK(this, get_input_size() == 1, "Argument has ", get_input_size(), " outputs (1 expected).");
Expand Down
86 changes: 84 additions & 2 deletions src/core/tests/preprocess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2096,12 +2096,94 @@ TEST(pre_post_process, postprocess_one_node_many_outputs) {
p.output(2).tensor().set_element_type(element::f32);
model = p.build();
EXPECT_EQ(model->get_results().size(), 3);
// Tensor names on output is lost as origin named tensor is before convert op
// Tensor names on modified outputs are set to Split tensors not model output.
// New result has different precision means different tensor.
EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 0);
EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 0);
// Add output node still on output after pre-processing
EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1);
// Not modified output still have name
EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1);
EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 0);
EXPECT_EQ(model->get_results()[0]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.0");
EXPECT_EQ(model->get_results()[1]->input(0).get_source_output().get_node()->get_friendly_name(), "Split");
EXPECT_EQ(model->get_results()[2]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.2");
}

TEST(pre_post_process, postprocess_one_node_many_outputs_results_created_by_model) {
auto data1 = std::make_shared<op::v0::Parameter>(element::i32, Shape{3});
auto c1 = opset8::Constant::create(element::i32, Shape{}, {0});
auto op = std::make_shared<opset8::Split>(data1, c1, 3);
op->set_friendly_name("Split");
op->output(0).set_names({"tensor_Split0"});
auto r1 = std::make_shared<op::v0::Result>(op->output(0));

OutputVector outputs{r1};
for (size_t i = 1; i < op->get_num_splits(); i++) {
auto output = op->output(i);
output.set_names({"tensor_Split" + std::to_string(i)});
outputs.push_back(std::move(output));
}
auto model = std::make_shared<Model>(outputs, ParameterVector{data1});
// Set tensor name to model output 0
model->output(0).set_names({"output_split0"});
EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1);
// Result input has still tensor_split0 names from split op
EXPECT_EQ(model->output(0).get_node()->get_input_tensor(0).get_names().count("tensor_Split0"), 1);
EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1);
EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1);

auto p = PrePostProcessor(model);
p.output(0).tensor().set_element_type(element::f32);
p.output(2).tensor().set_element_type(element::f32);
model = p.build();
EXPECT_EQ(model->get_results().size(), 3);

// output 0 by user,not use input nodes names as its own, modified by PPP (tensor_Split0 is on split output)
EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 0);
EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1);
// output 1 created by model, assume its names is output name, not modified by PPP
EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1);
// output 2 created by model, assume its names is output name, modified by PPP
EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1);
EXPECT_EQ(model->get_results()[0]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.0");
EXPECT_EQ(model->get_results()[1]->input(0).get_source_output().get_node()->get_friendly_name(), "Split");
EXPECT_EQ(model->get_results()[2]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.2");
}

TEST(pre_post_process, postprocess_one_node_many_outputs_results_created_or_added_by_model) {
auto data1 = std::make_shared<op::v0::Parameter>(element::i32, Shape{3});
auto c1 = opset8::Constant::create(element::i32, Shape{}, {0});
auto op = std::make_shared<opset8::Split>(data1, c1, 3);
op->set_friendly_name("Split");
for (size_t i = 0; i < op->get_output_size(); ++i) {
op->output(i).set_names({"tensor_Split" + std::to_string(i)});
}

OutputVector outputs{std::make_shared<op::v0::Result>(op->output(0)), op->output(1)};

auto model = std::make_shared<Model>(outputs, ParameterVector{data1});
model->add_output(op->output(2));
// Set tensor name to model output 0
model->output(0).set_names({"output_split0"});
EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1);
// Result input has still tensor_split0 names from split op
EXPECT_EQ(model->output(0).get_node()->get_input_tensor(0).get_names().count("tensor_Split0"), 1);
EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1);
EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1);

auto p = PrePostProcessor(model);
p.output(0).tensor().set_element_type(element::f32);
p.output(2).tensor().set_element_type(element::f32);
model = p.build();
EXPECT_EQ(model->get_results().size(), 3);

// output 0 by user,not use input nodes names as its own, modified by PPP (tensor_Split0 is on split output)
EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 0);
EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1);
// output 1 created by model, assume its names is output name, not modified by PPP
EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1);
// output 2 created by model, assume its names is output name, modified by PPP
EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1);
EXPECT_EQ(model->get_results()[0]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.0");
EXPECT_EQ(model->get_results()[1]->input(0).get_source_output().get_node()->get_friendly_name(), "Split");
EXPECT_EQ(model->get_results()[2]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.2");
Expand Down
21 changes: 20 additions & 1 deletion src/core/tests/type_prop/result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
namespace ov {
namespace test {

using ov::op::v0::Constant;
using ov::op::v0::Parameter;
using std::make_shared;
using testing::UnorderedElementsAre;
Expand Down Expand Up @@ -135,7 +136,7 @@ TEST_F(TypePropResultV0Test, preserve_specific_name_on_input_replace) {
const auto a = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
a->get_output_tensor(0).set_names({"input a"});

const auto result = make_op(a);
const auto result = make_op(a, true);
result->output(0).set_names({"out"});

EXPECT_THAT(result->input(0).get_tensor().get_names(), UnorderedElementsAre("out", "input a"));
Expand All @@ -151,5 +152,23 @@ TEST_F(TypePropResultV0Test, preserve_specific_name_on_input_replace) {
EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("out"));
EXPECT_THAT(a->output(0).get_names(), UnorderedElementsAre("input a"));
}

TEST_F(TypePropResultV0Test, take_input_node_names) {
const auto c = std::make_shared<Constant>(element::f32, Shape{2}, std::vector<float>{2.f, 1.f});
c->get_output_tensor(0).set_names({"constant data"});
const auto result = make_op(c, true);

EXPECT_THAT(result->input(0).get_tensor().get_names(), UnorderedElementsAre("constant data"));
EXPECT_THAT(result->output(0).get_names(), UnorderedElementsAre("constant data"));

const auto new_const = std::make_shared<Constant>(element::f32, Shape{2}, std::vector<float>{0.f, 0.f});

result->input(0).replace_source_output(new_const);
result->validate_and_infer_types();

EXPECT_THAT(c->get_output_tensor(0).get_names(), testing::IsEmpty());
EXPECT_THAT(result->get_input_tensor(0).get_names(), UnorderedElementsAre("constant data"));
EXPECT_THAT(result->get_output_tensor(0).get_names(), UnorderedElementsAre("constant data"));
}
} // namespace test
} // namespace ov
2 changes: 1 addition & 1 deletion src/frontends/ir/src/ir_deserializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,7 @@ std::shared_ptr<ov::Node> ov::XmlDeserializer::create_node(const std::vector<ov:
if (!ov::op::util::is_parameter(result->get_input_source_output(0).get_node())) {
// Copy names if parent node is not parameter, model's input names should not be dedicated
// output names as they could be removed from Parameter's tensor during model transformations.
ov::descriptor::copy_tensor_names(result->get_output_tensor(0), result->get_input_tensor(0));
result->get_output_tensor(0).add_names(result->get_input_tensor(0).get_names());
}
}
}
Expand Down
7 changes: 0 additions & 7 deletions src/frontends/onnx/frontend/src/input_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,13 +533,6 @@ void InputModel::add_tensor_names(std::shared_ptr<Model>& model) {
it->add_names(tensor_names.second);
}
}

// Set model output names
for (auto&& result : model->get_results()) {
if (!is_type<op::v0::Parameter>(result->get_input_source_output(0).get_node())) {
result->get_output_tensor(0).add_names(result->get_input_tensor(0).get_names());
}
}
}

void InputModel::reshape_model_inputs(std::shared_ptr<Model>& model) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,62 +25,43 @@ KERNEL (mvn_gpu_bfyx_opt)(

const uint data_set_offset = data_set_idx * data_set_size;
const uint my_data_offset = data_set_offset + in_data_set_idx;
uint iters_num = items_num;
if (in_data_set_idx < leftovers)
++iters_num;

float my_sum = 0;
float tmp;

//each WI reads items_num consecutive items from batch*feature
for (uint i=0; i<items_num; ++i)
for (uint i=0; i<iters_num; ++i)
{
my_sum += (float)input[my_data_offset + i * workers_per_data_set];
}

if (in_data_set_idx < leftovers)
{
my_sum += (float)input[data_set_offset + workers_per_data_set * items_num + in_data_set_idx];
}

my_sum = work_group_reduce_add(my_sum) / data_set_size;

#if NORMALIZE_VARIANCE == 0
for (uint i=0; i<items_num; ++i) {
for (uint i=0; i<iters_num; ++i) {
uint iteration_in_data_set_offset = i * workers_per_data_set;
ACTIVATION_TYPE result = TO_ACTIVATION_TYPE(input[my_data_offset + iteration_in_data_set_offset]) - TO_ACTIVATION_TYPE(my_sum);
# if HAS_FUSED_OPS
FUSED_OPS;
output[my_data_offset + iteration_in_data_set_offset] = FUSED_OPS_RESULT;
# else
output[my_data_offset + iteration_in_data_set_offset] = TO_OUTPUT_TYPE(ACTIVATION(result, ACTIVATION_PARAMS));
# endif
}
if (in_data_set_idx < leftovers) {
uint iteration_in_data_set_offset = items_num * workers_per_data_set;
ACTIVATION_TYPE result = TO_ACTIVATION_TYPE(input[my_data_offset + iteration_in_data_set_offset]) - TO_ACTIVATION_TYPE(my_sum);
# if HAS_FUSED_OPS
FUSED_OPS;
output[my_data_offset + iteration_in_data_set_offset] = FUSED_OPS_RESULT;
# else
output[my_data_offset + iteration_in_data_set_offset] = TO_OUTPUT_TYPE(ACTIVATION(result, ACTIVATION_PARAMS));
# endif
}
#else

float my_variance = 0.f;
//each WI reads items_num consecutive items from batch*feature
for (uint i=0; i<items_num; ++i)
for (uint i=0; i<iters_num; ++i)
{
tmp = (float)input[my_data_offset + i * workers_per_data_set];
tmp -= my_sum;
my_variance = fma(tmp, tmp, my_variance);
}

if (in_data_set_idx < leftovers)
{
tmp = (float)input[data_set_offset + workers_per_data_set * items_num + in_data_set_idx];
tmp -= my_sum;
my_variance = fma(tmp, tmp, my_variance);
}

my_variance = work_group_reduce_add(my_variance);

if (in_data_set_idx == 0)
Expand All @@ -96,24 +77,14 @@ KERNEL (mvn_gpu_bfyx_opt)(

my_variance = work_group_broadcast(my_variance, 0);

for (uint i=0; i<items_num; ++i) {
for (uint i=0; i<iters_num; ++i) {
uint iteration_in_data_set_offset = i * workers_per_data_set;
ACTIVATION_TYPE result = (TO_ACTIVATION_TYPE(input[my_data_offset + iteration_in_data_set_offset]) - TO_ACTIVATION_TYPE(my_sum)) * TO_ACTIVATION_TYPE(my_variance);
# if HAS_FUSED_OPS
FUSED_OPS;
output[my_data_offset + iteration_in_data_set_offset] = FUSED_OPS_RESULT;
# else
output[my_data_offset + iteration_in_data_set_offset] = TO_OUTPUT_TYPE(ACTIVATION(result, ACTIVATION_PARAMS));
# endif
}
if (in_data_set_idx < leftovers) {
uint iteration_in_data_set_offset = items_num * workers_per_data_set;
ACTIVATION_TYPE result = (TO_ACTIVATION_TYPE(input[my_data_offset + iteration_in_data_set_offset]) - TO_ACTIVATION_TYPE(my_sum)) * TO_ACTIVATION_TYPE(my_variance);
# if HAS_FUSED_OPS
FUSED_OPS;
output[my_data_offset + iteration_in_data_set_offset] = FUSED_OPS_RESULT;
# else
output[my_data_offset + iteration_in_data_set_offset] = TO_OUTPUT_TYPE(ACTIVATION(result, ACTIVATION_PARAMS));
# endif
}
#endif
Expand Down

0 comments on commit ae0c7b4

Please sign in to comment.