diff --git a/.ci/azure/linux_coverity.yml b/.ci/azure/linux_coverity.yml index 642617b903a545..17442c16397983 100644 --- a/.ci/azure/linux_coverity.yml +++ b/.ci/azure/linux_coverity.yml @@ -91,8 +91,6 @@ jobs: -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.8 -DENABLE_WHEEL=ON - # Skipping tests from static analysis - # -DENABLE_TESTS=ON -DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DENABLE_FASTER_BUILD=ON -DENABLE_STRICT_DEPENDENCIES=OFF diff --git a/.gitignore b/.gitignore index 1c5368e74d57c9..a136188f3a93d0 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ inference-engine/report docs/template_plugin/html/ CMakeLists.txt.user docs/IE_PLUGIN_DG/html/ +inference-engine/ie_bridges/python/wheel/.env *.project *.cproject @@ -57,15 +58,3 @@ __pycache__ /model-optimizer/*.mapping /model-optimizer/*.dat /model-optimizer/*.svg - -# ngraph -ngraph/src/CPackConfig.cmake -ngraph/src/CPackSourceConfig.cmake -ngraph/src/VERSION -ngraph/src/gtest/ -ngraph/src/json/ -ngraph/src/ngraphConfig.cmake -ngraph/src/ngraphConfigVersion.cmake -ngraph/src/protobuf/ -ngraph/src/src/ -ngraph/src/test/ diff --git a/docs/template_plugin/tests/functional/op_reference/if.cpp b/docs/template_plugin/tests/functional/op_reference/if.cpp new file mode 100644 index 00000000000000..c02bd246c7f704 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/if.cpp @@ -0,0 +1,368 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +struct IfFunctionalBase { + virtual std::shared_ptr create_function(const std::vector& if_inputs, + const std::vector& results) = 0; + IfFunctionalBase() {} +}; + +struct IfCondConst : public IfFunctionalBase { + std::shared_ptr create_function(const std::vector& if_inputs, + const std::vector& results) override { + NGRAPH_CHECK(if_inputs.size() == 2, "Incorrect test case! Number of inputs is not 2."); + NGRAPH_CHECK(results.size() == 1, "Incorrect test case! Number of outputs is not 1."); + + auto X = std::make_shared(if_inputs[0].type, if_inputs[0].shape); + auto Y = std::make_shared(if_inputs[1].type, if_inputs[1].shape); + auto cond = std::make_shared(ngraph::element::boolean, Shape{1}, cond_value); + auto Xt = std::make_shared(if_inputs[0].type, PartialShape::dynamic()); + auto Yt = std::make_shared(if_inputs[1].type, PartialShape::dynamic()); + auto Xe = std::make_shared(if_inputs[0].type, PartialShape::dynamic()); + auto then_op = std::make_shared(Xt, Yt); + auto res0 = std::make_shared(then_op); + auto res1 = std::make_shared(Xe); + auto then_body = std::make_shared(OutputVector{res0}, ParameterVector{Xt, Yt}); + auto else_body = std::make_shared(OutputVector{res1}, ParameterVector{Xe}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(X, Xt, Xe); + if_op->set_input(Y, Yt, nullptr); + auto result = if_op->set_output(res0, res1); + auto res = std::make_shared(result); + auto fun = std::make_shared(OutputVector{res}, ParameterVector{X, Y}); + return fun; + } + + explicit IfCondConst(bool value) : cond_value(value) {} + bool cond_value; +}; + +struct IfCondIsNonConst : public IfFunctionalBase { + std::shared_ptr create_function(const std::vector& if_inputs, + const std::vector& results) override { + NGRAPH_CHECK(if_inputs.size() == 3, "Incorrect test case! Number of inputs is not 3."); + NGRAPH_CHECK(results.size() == 1, "Incorrect test case! Number of outputs is not 1."); + + auto X = std::make_shared(element::f32, Shape{1, 2, 2}); + auto Y = std::make_shared(element::f32, Shape{1, 2, 2}); + auto cond = std::make_shared(element::boolean, Shape{1}); + // Set up the cell body, a function from (Xi, Yi) -> (Zo) + // Body parameters + auto Xt = std::make_shared(element::f32, PartialShape::dynamic()); + auto Yt = std::make_shared(element::f32, PartialShape::dynamic()); + auto Xe = std::make_shared(element::f32, PartialShape::dynamic()); + auto Ye = std::make_shared(element::f32, PartialShape::dynamic()); + // Body + auto then_op = std::make_shared(Xt, Yt); + auto else_op = std::make_shared(Xe, Ye); + auto then_op_result = std::make_shared(then_op); + auto else_op_result = std::make_shared(else_op); + auto then_body = std::make_shared(OutputVector{then_op_result}, ParameterVector{Xt, Yt}); + auto else_body = std::make_shared(OutputVector{else_op_result}, ParameterVector{Xe, Ye}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(X, Xt, Xe); + if_op->set_input(Y, Yt, Ye); + auto result = if_op->set_output(then_op_result, else_op_result); + auto res = std::make_shared(result); + auto fun = std::make_shared(OutputVector{res}, ParameterVector{cond, X, Y}); + return fun; + } +}; + +struct IfWithoutAdditionalInputs : IfFunctionalBase { + std::shared_ptr create_function(const std::vector& if_inputs, + const std::vector& results) override { + NGRAPH_CHECK(if_inputs.size() == 1, "Incorrect test case! Number of inputs is not 1."); + NGRAPH_CHECK(results.size() == 1, "Incorrect test case! Number of outputs is not 1."); + + auto cond = std::make_shared(element::boolean, Shape{1}); + auto A = std::make_shared(element::f32, Shape{1}, 8.0); + auto B = std::make_shared(element::f32, Shape{1}, 2.0); + auto A_res = std::make_shared(A); + auto B_res = std::make_shared(B); + auto then_body = std::make_shared(OutputVector{A_res}, ParameterVector{}); + auto else_body = std::make_shared(OutputVector{B_res}, ParameterVector{}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + auto res = if_op->set_output(A_res, B_res); + auto fun = std::make_shared(OutputVector{res}, ParameterVector{cond}); + return fun; + } +}; + +struct IfDynamismCaseWithStaticInputs : public IfFunctionalBase { + std::shared_ptr create_function(const std::vector& if_inputs, + const std::vector& results) override { + NGRAPH_CHECK(if_inputs.size() == 4, "Incorrect test case! Number of inputs is not 4."); + NGRAPH_CHECK(results.size() == 2, "Incorrect test case! Number of outputs is not 2."); + + auto X = std::make_shared(element::f32, Shape{1, 2, 2}); + auto Y = std::make_shared(element::f32, Shape{4, 2, 2}); + auto Z = std::make_shared(element::f32, Shape{8, 8, 8}); + auto cond = std::make_shared(element::boolean, Shape{1}); + // Set up the cell body, a function from (Xi, Yi) -> (Zo) + // Body parameters + auto Xt = std::make_shared(element::f32, PartialShape::dynamic()); + auto Yt = std::make_shared(element::f32, PartialShape::dynamic()); + auto Xe = std::make_shared(element::f32, PartialShape::dynamic()); + auto Ze = std::make_shared(element::f32, PartialShape::dynamic()); + // Body + auto then_op = std::make_shared(Xt, Xt); + auto else_op = std::make_shared(Xe, Xe); + auto then_op_result1 = std::make_shared(then_op); + auto then_op_result2 = std::make_shared(Yt); + auto else_op_result1 = std::make_shared(else_op); + auto else_op_result2 = std::make_shared(Ze); + auto then_body = + std::make_shared(OutputVector{then_op_result1, then_op_result2}, ParameterVector{Xt, Yt}); + auto else_body = + std::make_shared(OutputVector{else_op_result1, else_op_result2}, ParameterVector{Xe, Ze}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(X, Xt, Xe); + if_op->set_input(Y, Yt, nullptr); + if_op->set_input(Z, nullptr, Ze); + auto res1 = if_op->set_output(then_op_result1, else_op_result1); + auto res2 = if_op->set_output(then_op_result2, else_op_result2); + auto result_if1 = std::make_shared(res1); + auto result_if2 = std::make_shared(res2); + auto fun = std::make_shared(OutputVector{result_if1, result_if2}, ParameterVector{cond, X, Y, Z}); + return fun; + } +}; + +struct IfConditionIsScalar : public IfFunctionalBase { + std::shared_ptr create_function(const std::vector& if_inputs, + const std::vector& results) override { + NGRAPH_CHECK(if_inputs.size() == 3, "Incorrect test case! Number of inputs is not 3."); + NGRAPH_CHECK(results.size() == 1, "Incorrect test case! Number of outputs is not 1."); + + auto X = std::make_shared(element::f32, Shape{1, 2, 2}); + auto Y = std::make_shared(element::f32, Shape{1, 2, 2}); + auto cond = std::make_shared(element::boolean, Shape{}); + // Set up the cell body, a function from (Xi, Yi) -> (Zo) + // Body parameters + auto Xt = std::make_shared(element::f32, PartialShape::dynamic()); + auto Yt = std::make_shared(element::f32, PartialShape::dynamic()); + auto Xe = std::make_shared(element::f32, PartialShape::dynamic()); + auto Ye = std::make_shared(element::f32, PartialShape::dynamic()); + // Body + auto then_op = std::make_shared(Xt, Yt); + auto else_op = std::make_shared(Xe, Ye); + auto then_op_result = std::make_shared(then_op); + auto else_op_result = std::make_shared(else_op); + auto then_body = std::make_shared(OutputVector{then_op_result}, ParameterVector{Xt, Yt}); + auto else_body = std::make_shared(OutputVector{else_op_result}, ParameterVector{Xe, Ye}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(X, Xt, Xe); + if_op->set_input(Y, Yt, Ye); + auto res = if_op->set_output(then_op_result, else_op_result); + if_op->validate_and_infer_types(); + std::vector X_v{1.0, 2.0, 3.0, 4.0}; + std::vector Y_v{2.0, 1.0, 2.0, 3.0}; + auto fun = std::make_shared(OutputVector{res}, ParameterVector{cond, X, Y}); + return fun; + } +}; + +struct IfConditionIsDynamic : public IfFunctionalBase { + std::shared_ptr create_function(const std::vector& if_inputs, + const std::vector& results) override { + NGRAPH_CHECK(if_inputs.size() == 3, "Incorrect test case! Number of inputs is not 3."); + NGRAPH_CHECK(results.size() == 1, "Incorrect test case! Number of outputs is not 1."); + + auto X = std::make_shared(element::f32, Shape{1, 2, 2}); + auto Y = std::make_shared(element::f32, Shape{1, 2, 2}); + auto cond = std::make_shared(element::boolean, PartialShape{Dimension::dynamic()}); + // auto cond = std::make_shared(element::boolean, Shape{1}); + // Set up the cell body, a function from (Xi, Yi) -> (Zo) + // Body parameters + auto Xt = std::make_shared(element::f32, PartialShape::dynamic()); + auto Yt = std::make_shared(element::f32, PartialShape::dynamic()); + auto Xe = std::make_shared(element::f32, PartialShape::dynamic()); + auto Ye = std::make_shared(element::f32, PartialShape::dynamic()); + // Body + auto then_op = std::make_shared(Xt, Yt); + auto else_op = std::make_shared(Xe, Ye); + auto then_op_result = std::make_shared(then_op); + auto else_op_result = std::make_shared(else_op); + auto then_body = std::make_shared(OutputVector{then_op_result}, ParameterVector{Xt, Yt}); + auto else_body = std::make_shared(OutputVector{else_op_result}, ParameterVector{Xe, Ye}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(X, Xt, Xe); + if_op->set_input(Y, Yt, Ye); + auto rs = if_op->set_output(then_op_result, else_op_result); + auto result = std::make_shared(rs); + auto fun = std::make_shared(OutputVector{result}, ParameterVector{cond, X, Y}); + return fun; + } +}; + +struct IfParams { + IfParams(const std::shared_ptr& functional, + const std::vector& if_inputs, + const std::vector& expected_results, + const std::string& test_case_name) + : function(functional), + inputs(if_inputs), + expected_results(expected_results), + test_case_name(test_case_name) {} + + std::shared_ptr function; + std::vector inputs; + std::vector expected_results; + std::string test_case_name; +}; + +class ReferenceIfLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = params.function->create_function(params.inputs, params.expected_results); + inputData.reserve(params.inputs.size()); + refOutData.reserve(params.expected_results.size()); + for (auto& input_tensor : params.inputs) { + inputData.push_back(input_tensor.data); + } + for (auto& expected_tensor : params.expected_results) { + refOutData.push_back(expected_tensor.data); + } + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + return param.test_case_name; + } +}; + +TEST_P(ReferenceIfLayerTest, IfWithHardcodedRefs) { + Exec(); +} +std::vector Y_gen() { + std::vector Y_v; + for (auto c_ind = 0; c_ind < 4; ++c_ind) { + for (auto d_ind = 0; d_ind < 4; ++d_ind) { + Y_v.push_back(static_cast(c_ind * d_ind)); + } + } + return Y_v; +} +std::vector Z_gen() { + std::vector Z_v; + for (auto c_ind = 0; c_ind < 8; ++c_ind) { + for (auto d_ind = 0; d_ind < 64; ++d_ind) { + Z_v.push_back(static_cast(c_ind * d_ind)); + } + } + return Z_v; +} + +INSTANTIATE_TEST_SUITE_P( + smoke_If_With_Hardcoded_Refs, + ReferenceIfLayerTest, + ::testing::Values( + IfParams( + std::make_shared(true), + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 1.0, 1.0, 1.0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 2.0, 2.0, 2.0})}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 2.0, 2.0, 2.0})}, + "if_condition_const_is_true"), + IfParams( + std::make_shared(false), + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 1.0, 1.0, 1.0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 2.0, 2.0, 2.0})}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 1.0, 1.0, 1.0})}, + "if_condition_const_is_false"), + IfParams( + std::make_shared(), + std::vector{Tensor(Shape{1}, ngraph::element::boolean, std::vector{1}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 2.0, 3.0, 4.0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 1.0, 2.0, 3.0})}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 2.0, 6.0, 12.0})}, + "if_condition_si_non_const_true"), + IfParams( + std::make_shared(), + std::vector{Tensor(Shape{1}, ngraph::element::boolean, std::vector{0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 2.0, 3.0, 4.0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 1.0, 2.0, 3.0})}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{3.0, 3.0, 5.0, 7.0})}, + "if_condition_is_non_const_false"), + IfParams(std::make_shared(), + std::vector{Tensor(Shape{1}, ngraph::element::boolean, std::vector{1})}, + std::vector{Tensor(Shape{1}, ngraph::element::f32, std::vector{8.0})}, + "if_without_addition_inputs_condition_is_true"), + IfParams(std::make_shared(), + std::vector{Tensor(Shape{1}, ngraph::element::boolean, std::vector{0})}, + std::vector{Tensor(Shape{1}, ngraph::element::f32, std::vector{2.0})}, + "if_without_addition_inputs_condition_is_false"), + IfParams( + std::make_shared(), + std::vector{Tensor(Shape{}, ngraph::element::boolean, std::vector{1}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 2.0, 3.0, 4.0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 1.0, 2.0, 3.0})}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 2.0, 6.0, 12.0})}, + "if_condition_is_scalar_cond_true"), + IfParams( + std::make_shared(), + std::vector{Tensor(Shape{}, ngraph::element::boolean, std::vector{0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 2.0, 3.0, 4.0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 1.0, 2.0, 3.0})}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{3.0, 3.0, 5.0, 7.0})}, + "if_condition_is_scalar_cond_false"), + IfParams( + std::make_shared(), + std::vector{Tensor(Shape{}, ngraph::element::boolean, std::vector{1}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 2.0, 3.0, 4.0}), + Tensor(Shape{4, 2, 2}, ngraph::element::f32, Y_gen()), + Tensor(Shape{8, 8, 8}, ngraph::element::f32, Z_gen())}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 4.0, 9.0, 16.0}), + Tensor(Shape{4, 2, 2}, ngraph::element::f32, Y_gen())}, + "If_dynamism_case_with_static_inputs_condition_true"), + IfParams( + std::make_shared(), + std::vector{Tensor(Shape{}, ngraph::element::boolean, std::vector{0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 2.0, 3.0, 4.0}), + Tensor(Shape{4, 2, 2}, ngraph::element::f32, Y_gen()), + Tensor(Shape{8, 8, 8}, ngraph::element::f32, Z_gen())}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 4.0, 6.0, 8.0}), + Tensor(Shape{8, 8, 8}, ngraph::element::f32, Z_gen())}, + "If_dynamism_case_with_static_inputs_condition_false"), + IfParams( + std::make_shared(), + std::vector{Tensor(Shape{}, ngraph::element::boolean, std::vector{1}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 2.0, 3.0, 4.0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 1.0, 2.0, 3.0})}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 2.0, 6.0, 12.0})}, + "if_condition_is_dynamic_cond_true"), + IfParams( + std::make_shared(), + std::vector{Tensor(Shape{}, ngraph::element::boolean, std::vector{0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{1.0, 2.0, 3.0, 4.0}), + Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{2.0, 1.0, 2.0, 3.0})}, + std::vector{Tensor(Shape{1, 2, 2}, ngraph::element::f32, std::vector{3.0, 3.0, 5.0, 7.0})}, + "if_condition_is_dynamic_cond_false"))); \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/scatter_update.cpp b/docs/template_plugin/tests/functional/op_reference/scatter_update.cpp new file mode 100644 index 00000000000000..9cf8bfa6940aaf --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/scatter_update.cpp @@ -0,0 +1,1007 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" +#include "ngraph_functions/builders.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using namespace reference_tests; + +namespace reference_tests { + +namespace { + +// ---------------------- V3 ------------------------------ + +struct ScatterUpdate3Params { + Tensor data; + Tensor indices; + Tensor updates; + Tensor axis; + Tensor expected; +}; + +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, data); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, indices); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, updates); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, axis); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected); +}; + +class ReferenceScatterUpdate6LayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params); + inputData = {params.data.data, params.indices.data, params.updates.data, params.axis.data}; + refOutData = {params.expected.data}; + } + + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + const auto& param = obj.param; + std::ostringstream result; + result << "D_shape=" << param.data.shape << "_"; + result << "I_shape=" << param.indices.shape << "_"; + result << "U_shape=" << param.updates.shape << "_"; + result << "A_shape=" << param.axis.shape << "_"; + result << "dType=" << param.data.type << "_"; + result << "iType=" << param.indices.type << "_"; + result << "uType=" << param.updates.type << "_"; + result << "aType=" << param.axis.type << "_"; + result << "oType=" << param.expected.type; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const ScatterUpdate3Params& params) { + const auto data_shape = params.data.shape; + const auto indices_shape = params.indices.shape; + const auto updates_shape = params.updates.shape; + const auto axis_shape = params.axis.shape; + const auto numeric_type = params.data.type; + const auto indices_type = params.indices.type; + const auto axis_type = params.axis.type; + + const auto data = std::make_shared(numeric_type, data_shape); + const auto indices = std::make_shared(indices_type, indices_shape); + const auto updates = std::make_shared(numeric_type, updates_shape); + const auto axis = std::make_shared(axis_type, axis_shape); + const auto scatter_update = std::make_shared(data, indices, updates, axis); + return std::make_shared(ngraph::NodeVector {scatter_update}, ngraph::ParameterVector {data, indices, updates, axis}); + } +}; + +TEST_P(ReferenceScatterUpdate6LayerTest, ScatterUpdateWithHardcodedRefs) { + Exec(); +} + +template +std::vector generateScatterUpdate3Params(const element::Type& numeric_type, const element::Type& integer_type) { + using N = typename element_type_traits::value_type; + using I = typename element_type_traits::value_type; + std::vector ScatterUpdateParams { + Builder {} + .data({{3, 2, 2, 3}, numeric_type, std::vector { + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{2, 1}, integer_type, std::vector {0, 1}}) + .updates({{3, 3, 2, 2, 2}, numeric_type, std::vector { + 1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24, + 25, 26, + 27, 28, + 29, 30, + 31, 32, + 33, 34, + 35, 36, + 37, 38, + 39, 40, + 41, 42, + 43, 44, + 45, 46, + 47, 48, + 49, 50, + 51, 52, + 53, 54, + 55, 56, + 57, 58, + 59, 60, + 61, 62, + 63, 64, + 65, 66, + 67, 68, + 69, 70, + 71, 72}}) + .axis({{1}, integer_type, std::vector {2}}) + .expected({{3, 2, 2, 3}, numeric_type, std::vector { + 1, 2, 9, + 3, 4, 11, + 10, 17, 18, + 12, 19, 20, + 25, 26, 33, + 27, 28, 35, + 34, 41, 42, + 36, 43, 44, + 49, 50, 57, + 51, 52, 59, + 58, 65, 66, + 60, 67, 68}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{2}, integer_type, std::vector {1, 2}}) + .updates({{3, 2}, numeric_type, std::vector {1, 1, + 1, 2, + 2, 2}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{3, 3}, numeric_type, std::vector {0, 1, 1, + 0, 1, 2, + 0, 2, 2}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{2}, integer_type, std::vector {1, 2}}) + .updates({{2, 3}, numeric_type, std::vector {1, 1, 1, + 2, 2, 2}}) + .axis({{1}, integer_type, std::vector {0}}) + .expected({{3, 3}, numeric_type, std::vector {0, 0, 0, + 1, 1, 1, + 2, 2, 2}}), + Builder {} + .data({{3, 4}, numeric_type, std::vector {0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}}) + .indices({{2}, integer_type, std::vector {0, 2}}) + .updates({{3, 4}, numeric_type, std::vector {1, 2, 3, 7, + 4, 5, 6, 8, + 7, 8, 9, 10}}) + .axis({{1}, integer_type, std::vector {0}}) + .expected({{3, 4}, numeric_type, std::vector {1, 2, 3, 7, + 0, 0, 0, 0, + 4, 5, 6, 8}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{2}, integer_type, std::vector {0, 2}}) + .updates({{3, 5}, numeric_type, std::vector {1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{3, 3}, numeric_type, std::vector {1, 0, 2, + 6, 0, 7, + 11, 0, 12}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 2}, integer_type, std::vector {1, 2}}) + .updates({{1, 2, 3}, numeric_type, std::vector {1, 2, 3, + 4, 5, 6}}) + .axis({{1}, integer_type, std::vector {0}}) + .expected({{3, 3}, numeric_type, std::vector {0, 0, 0, + 1, 2, 3, + 4, 5, 6}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 2}, integer_type, std::vector {1, 2}}) + .updates({{3, 1, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{3, 3}, numeric_type, std::vector {0, 1, 2, + 0, 3, 4, + 0, 5, 6}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 2}, integer_type, std::vector {1, 2}}) + .updates({{4, 4, 4}, numeric_type, std::vector {1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + 17, 18, 19, 20, + 21, 22, 23, 24, + 25, 26, 27, 28, + 29, 30, 31, 32, + 33, 34, 35, 36, + 37, 38, 39, 40, + 41, 42, 43, 44, + 45, 46, 47, 48, + 49, 50, 51, 52, + 53, 54, 55, 56, + 57, 58, 59, 60, + 61, 62, 63, 64}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{3, 3}, numeric_type, std::vector {0, 1, 2, + 0, 17, 18, + 0, 33, 34}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 3}, integer_type, std::vector {0, 1, 2}}) + .updates({{4, 4, 4}, numeric_type, std::vector {1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16, + 17, 18, 19, 20, + 21, 22, 23, 24, + 25, 26, 27, 28, + 29, 30, 31, 32, + 33, 34, 35, 36, + 37, 38, 39, 40, + 41, 42, 43, 44, + 45, 46, 47, 48, + 49, 50, 51, 52, + 53, 54, 55, 56, + 57, 58, 59, 60, + 61, 62, 63, 64}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{3, 3}, numeric_type, std::vector {1, 2, 3, + 17, 18, 19, + 33, 34, 35}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 1}, integer_type, std::vector {2}}) + .updates({{2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{3, 3}, numeric_type, std::vector {0, 0, 1, + 0, 0, 5, + 0, 0, 0}}), + Builder {} + .data({{3, 4}, numeric_type, std::vector {0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}}) + .indices({{1, 4}, integer_type, std::vector {0, 1, 2, 3}}) + .updates({{2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{3, 4}, numeric_type, std::vector {1, 2, 3, 4, + 5, 6, 7, 8, + 0, 0, 0, 0}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 3}, integer_type, std::vector {0, 1, 2}}) + .updates({{2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8}}) + .axis({{1}, integer_type, std::vector {0}}) + .expected({{3, 3}, numeric_type, std::vector {1, 2, 0, + 3, 4, 0, + 5, 6, 0}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 3}, integer_type, std::vector {0, 1, 2}}) + .updates({{2, 2, 1}, numeric_type, std::vector {1, 2, + 3, 4}}) + .axis({{1}, integer_type, std::vector {0}}) + .expected({{3, 3}, numeric_type, std::vector {1, 0, 0, + 2, 0, 0, + 3, 0, 0}}), + Builder {} + .data({{3, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 3}, integer_type, std::vector {0, 1, 2}}) + .updates({{1, 1, 1}, numeric_type, std::vector {1}}) + .axis({{1}, integer_type, std::vector {0}}) + .expected({{3, 3}, numeric_type, std::vector {1, 0, 0, + 0, 0, 0, + 0, 0, 0}}), + Builder {} + .data({{2, 2}, numeric_type, std::vector {0, 0, + 0, 0}}) + .indices({{2, 1}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8}}) + .axis({{1}, integer_type, std::vector {0}}) + .expected({{2, 2}, numeric_type, std::vector {1, 2, + 3, 4}}), + Builder {} + .data({{4, 4}, numeric_type, std::vector {0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}}) + .indices({{4, 1}, integer_type, std::vector {0, 1, 2, 3}}) + .updates({{2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8}}) + .axis({{1}, integer_type, std::vector {0}}) + .expected({{4, 4}, numeric_type, std::vector {1, 2, 0, 0, + 3, 4, 0, 0, + 5, 6, 0, 0, + 7, 8, 0, 0}}), + Builder {} + .data({{2, 3, 4, 2}, numeric_type, std::vector {0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}}) + .indices({{3, 1}, integer_type, std::vector {0, 1, 2}}) + .updates({{3, 2, 3, 3, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24, + 25, 26, + 27, 28, + 29, 30, + 31, 32, + 33, 34, + 35, 36, + 37, 38, + 39, 40, + 41, 42, + 43, 44, + 45, 46, + 47, 48, + 49, 50, + 51, 52, + 53, 54, + 55, 56, + 57, 58, + 59, 60, + 61, 62, + 63, 64, + 65, 66, + 67, 68, + 69, 70, + 71, 72, + 73, 74, + 75, 76, + 77, 78, + 79, 80, + 81, 82, + 83, 84, + 85, 86, + 87, 88, + 89, 90, + 91, 92, + 93, 94, + 95, 96, + 97, 98, + 99, 100, + 101, 102, + 103, 104, + 105, 106, + 107, 108}}) + .axis({{1}, integer_type, std::vector {2}}) + .expected({{2, 3, 4, 2}, numeric_type, std::vector { + 1, 2, + 3, 4, + 5, 6, + 0, 0, + 19, 20, + 21, 22, + 23, 24, + 0, 0, + 37, 38, + 39, 40, + 41, 42, + 0, 0, + 55, 56, + 57, 58, + 59, 60, + 0, 0, + 73, 74, + 75, 76, + 77, 78, + 0, 0, + 91, 92, + 93, 94, + 95, 96, + 0, 0}}), + Builder {} + .data({{1, 3, 2, 2}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 3}, integer_type, std::vector {2, 0, 1}}) + .updates({{1, 3, 2, 2, 2}, numeric_type, std::vector { + 1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{1, 3, 2, 2}, numeric_type, std::vector { + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 1, 2, + 3, 4}}), + Builder {} + .data({{2, 2, 2}, numeric_type, std::vector {0, 0, + 0, 0, + 0, 0, + 0, 0}}) + .indices({{1, 2}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 3, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 13, 14, + 15, 16}}), + Builder {} + .data({{2, 2, 4}, numeric_type, std::vector {0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}}) + .indices({{1, 2, 1}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 3, 2, 1}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{2, 2, 2}, numeric_type, std::vector {1, 13, 0, 0, + 2, 14, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}}), + Builder {} + .data({{2, 4, 2}, numeric_type, std::vector {0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}}) + .indices({{1, 2, 1}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 3, 2, 1}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{2, 4, 2}, numeric_type, std::vector {1, 13, + 2, 14, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}}), + Builder {} + .data({{2, 2, 2}, numeric_type, std::vector {0, 0, + 0, 0, + 0, 0, + 0, 0}}) + .indices({{2, 1}, integer_type, std::vector {1, 0}}) + .updates({{2, 2, 3, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24}}) + .axis({{1}, integer_type, std::vector {2}}) + .expected({{2, 2, 2}, numeric_type, std::vector { + 2, 1, + 8, 7, + // + 14, 13, + 20, 19}}), +Builder {} + .data({{2, 2, 4}, numeric_type, std::vector {0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}}) + .indices({{1, 2, 1}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 3, 1, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24}}) + .axis({{1}, integer_type, std::vector {1}}) + .expected({{2, 2, 4}, numeric_type, std::vector {1, 2, 13, 14, + 3, 4, 15, 16, + 0, 0, 0, 0, + 0, 0, 0, 0}}), + + Builder {} + .data({{3, 2, 2, 2}, numeric_type, std::vector {0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}}) + .indices({{2}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16}}) + .axis({{1}, integer_type, std::vector {3}}) + .expected({{3, 2, 2, 2}, numeric_type, std::vector { + 1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 0, 0, + 0, 0, + 0, 0, + 0, 0}}), + Builder {} + .data({{5, 2, 2, 2}, numeric_type, std::vector {0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}}) + .indices({{2}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16}}) + .axis({{1}, integer_type, std::vector {2}}) + .expected({{5, 2, 2, 2}, numeric_type, std::vector { + 1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}}), +Builder {} + .data({{5, 2, 2, 2}, numeric_type, std::vector {0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}}) + .indices({{2, 1}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24, + 25, 26, + 27, 28, + 29, 30, + 31, 32}}) + .axis({{1}, integer_type, std::vector {2}}) + .expected({{5, 2, 2, 2}, numeric_type, std::vector { + 1, 2, + 3, 4, + 9, 10, + 11, 12, + 17, 18, + 19, 20, + 25, 26, + 27, 28, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}})}; + return ScatterUpdateParams; +} + +template +std::vector generateScatterUpdate3ParamsNegativeAxis(const element::Type& numeric_type, const element::Type& integer_type) { + using N = typename element_type_traits::value_type; + using I = typename element_type_traits::value_type; + std::vector ScatterUpdateParams { + Builder {} + .data({{2, 2, 3}, numeric_type, std::vector {0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0}}) + .indices({{1, 2, 1}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 3, 1, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24}}) + .axis({{1}, integer_type, std::vector {-2}}) + .expected({{2, 2, 3}, numeric_type, std::vector {1, 2, 13, + 3, 4, 15, + 14, 0, 0, + 16, 0, 0}}), + Builder {} + .data({{2, 2, 2}, numeric_type, std::vector {0, 0, + 0, 0, + 0, 0, + 0, 0}}) + .indices({{1, 2, 1}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 3, 1, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 17, 18, + 19, 20, + 21, 22, + 23, 24}}) + .axis({{1}, integer_type, std::vector {-1}}) + .expected({{2, 2, 2}, numeric_type, std::vector {1, 2, + 7, 8, + 13, 14, + 19, 20}}), + Builder {} + .data({{4, 2, 2, 2}, numeric_type, std::vector {0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}}) + .indices({{2}, integer_type, std::vector {0, 1}}) + .updates({{2, 2, 2, 2}, numeric_type, std::vector {1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16}}) + .axis({{1}, integer_type, std::vector {-3}}) + .expected({{4, 2, 2, 2}, numeric_type, std::vector { + 1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + 13, 14, + 15, 16, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0}})}; + return ScatterUpdateParams; +} + +std::vector generateScatterUpdateCombinedParams() { + const std::vector> ScatterUpdateTypeParams { + // f32 + generateScatterUpdate3Params(element::f32, element::i16), + generateScatterUpdate3Params(element::f32, element::i32), + generateScatterUpdate3Params(element::f32, element::i64), + generateScatterUpdate3Params(element::f32, element::u32), + generateScatterUpdate3Params(element::f32, element::u64), + + // f16 + generateScatterUpdate3Params(element::f16, element::i16), + generateScatterUpdate3Params(element::f16, element::i32), + generateScatterUpdate3Params(element::f16, element::i64), + generateScatterUpdate3Params(element::f16, element::u32), + generateScatterUpdate3Params(element::f16, element::u64), + // i8 + generateScatterUpdate3Params(element::i8, element::i16), + generateScatterUpdate3Params(element::i8, element::i32), + generateScatterUpdate3Params(element::i8, element::i64), + generateScatterUpdate3Params(element::i8, element::u32), + generateScatterUpdate3Params(element::i8, element::u64), + // i16 + generateScatterUpdate3Params(element::i16, element::i16), + generateScatterUpdate3Params(element::i16, element::i32), + generateScatterUpdate3Params(element::i16, element::i64), + generateScatterUpdate3Params(element::i16, element::u32), + generateScatterUpdate3Params(element::i16, element::u64), + // i32 + generateScatterUpdate3Params(element::i32, element::i16), + generateScatterUpdate3Params(element::i32, element::i32), + generateScatterUpdate3Params(element::i32, element::i64), + generateScatterUpdate3Params(element::i32, element::u32), + generateScatterUpdate3Params(element::i32, element::u64), + // i64 + generateScatterUpdate3Params(element::i64, element::i16), + generateScatterUpdate3Params(element::i64, element::i32), + generateScatterUpdate3Params(element::i64, element::i64), + generateScatterUpdate3Params(element::i64, element::u32), + generateScatterUpdate3Params(element::i64, element::u64), + // u8 + generateScatterUpdate3Params(element::u8, element::i16), + generateScatterUpdate3Params(element::u8, element::i32), + generateScatterUpdate3Params(element::u8, element::i64), + generateScatterUpdate3Params(element::u8, element::u32), + generateScatterUpdate3Params(element::u8, element::u64), + // u16 + generateScatterUpdate3Params(element::u16, element::i16), + generateScatterUpdate3Params(element::u16, element::i32), + generateScatterUpdate3Params(element::u16, element::i64), + generateScatterUpdate3Params(element::u16, element::u32), + generateScatterUpdate3Params(element::u16, element::u64), + // u32 + generateScatterUpdate3Params(element::u32, element::i16), + generateScatterUpdate3Params(element::u32, element::i32), + generateScatterUpdate3Params(element::u32, element::i64), + generateScatterUpdate3Params(element::u32, element::u32), + generateScatterUpdate3Params(element::u32, element::u64), + // u64 + generateScatterUpdate3Params(element::u64, element::i16), + generateScatterUpdate3Params(element::u64, element::i32), + generateScatterUpdate3Params(element::u64, element::i64), + generateScatterUpdate3Params(element::u64, element::u32), + generateScatterUpdate3Params(element::u64, element::u64), + // bf16 + generateScatterUpdate3Params(element::bf16, element::i16), + generateScatterUpdate3Params(element::bf16, element::i32), + generateScatterUpdate3Params(element::bf16, element::i64), + generateScatterUpdate3Params(element::bf16, element::u32), + generateScatterUpdate3Params(element::bf16, element::u64)}; + std::vector combinedParams; + + for (const auto& params : ScatterUpdateTypeParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +std::vector generateScatterUpdateNegativeAxisParams() { + const std::vector> ScatterUpdateTypeParams { + // f32 + generateScatterUpdate3Params(element::f32, element::i16), + generateScatterUpdate3Params(element::f32, element::i32), + generateScatterUpdate3Params(element::f32, element::i64), + // f16 + generateScatterUpdate3Params(element::f16, element::i16), + generateScatterUpdate3Params(element::f16, element::i32), + generateScatterUpdate3Params(element::f16, element::i64), + // i8 + generateScatterUpdate3Params(element::i8, element::i16), + generateScatterUpdate3Params(element::i8, element::i32), + generateScatterUpdate3Params(element::i8, element::i64), + // i16 + generateScatterUpdate3Params(element::i16, element::i16), + generateScatterUpdate3Params(element::i16, element::i32), + generateScatterUpdate3Params(element::i16, element::i64), + // i32 + generateScatterUpdate3Params(element::i32, element::i16), + generateScatterUpdate3Params(element::i32, element::i32), + generateScatterUpdate3Params(element::i32, element::i64), + // i64 + generateScatterUpdate3Params(element::i64, element::i16), + generateScatterUpdate3Params(element::i64, element::i32), + generateScatterUpdate3Params(element::i64, element::i64), + // u8 + generateScatterUpdate3Params(element::u8, element::i16), + generateScatterUpdate3Params(element::u8, element::i32), + generateScatterUpdate3Params(element::u8, element::i64), + // u16 + generateScatterUpdate3Params(element::u16, element::i16), + generateScatterUpdate3Params(element::u16, element::i32), + generateScatterUpdate3Params(element::u16, element::i64), + // u32 + generateScatterUpdate3Params(element::u32, element::i16), + generateScatterUpdate3Params(element::u32, element::i32), + generateScatterUpdate3Params(element::u32, element::i64), + // u64 + generateScatterUpdate3Params(element::u64, element::i16), + generateScatterUpdate3Params(element::u64, element::i32), + generateScatterUpdate3Params(element::u64, element::i64), + // bf16 + generateScatterUpdate3Params(element::bf16, element::i16), + generateScatterUpdate3Params(element::bf16, element::i32), + generateScatterUpdate3Params(element::bf16, element::i64)}; + std::vector combinedParams; + + for (const auto& params : ScatterUpdateTypeParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} +} // namespace + +INSTANTIATE_TEST_SUITE_P(smoke_ScatterUpdate_With_Hardcoded_Refs, ReferenceScatterUpdate6LayerTest, + ::testing::ValuesIn(generateScatterUpdateCombinedParams()), ReferenceScatterUpdate6LayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ScatterUpdate_Negative_Axis_With_Hardcoded_Refs, ReferenceScatterUpdate6LayerTest, + ::testing::ValuesIn(generateScatterUpdateNegativeAxisParams()), ReferenceScatterUpdate6LayerTest::getTestCaseName); +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/subgraph_reference/preprocess.cpp b/docs/template_plugin/tests/functional/subgraph_reference/preprocess.cpp index 0c0b6087c19cd1..c0a3027204e2a0 100644 --- a/docs/template_plugin/tests/functional/subgraph_reference/preprocess.cpp +++ b/docs/template_plugin/tests/functional/subgraph_reference/preprocess.cpp @@ -24,6 +24,8 @@ struct RefPreprocessParams { std::function()> function; std::vector inputs; std::vector expected; + float abs_threshold = 0.01f; + float rel_threshold = 0.01f; std::string name; }; @@ -39,6 +41,8 @@ class ReferencePreprocessTest : public testing::TestWithParam& obj) { const auto& param = obj.param; @@ -58,8 +62,12 @@ static std::shared_ptr create_simple_function(element::Type type, cons auto data1 = std::make_shared(type, shape); data1->set_friendly_name("input1"); data1->get_output_tensor(0).set_names({"tensor_input1"}); - auto res = std::make_shared(data1); - res->set_friendly_name("Result"); + auto c = op::v0::Constant::create(type, {1}, {0}); + auto op = std::make_shared(data1, c); + op->set_friendly_name("Add0"); + auto res = std::make_shared(op); + res->set_friendly_name("Result1"); + res->get_output_tensor(0).set_names({"tensor_output1"}); return std::make_shared(ResultVector{res}, ParameterVector{data1}); } @@ -67,13 +75,21 @@ static std::shared_ptr create_2inputs(element::Type type, const Partia auto data1 = std::make_shared(type, shape); data1->set_friendly_name("input1"); data1->get_output_tensor(0).set_names({"tensor_input1"}); + auto c1 = op::v0::Constant::create(type, {1}, {0}); + auto op1 = std::make_shared(data1, c1); + op1->set_friendly_name("Add01"); auto data2 = std::make_shared(type, shape); + data2->get_output_tensor(0).set_names({"tensor_input2"}); data2->set_friendly_name("input2"); - data1->get_output_tensor(0).set_names({"tensor_input2"}); - auto res1 = std::make_shared(data1); + auto c2 = op::v0::Constant::create(type, {1}, {0}); + auto op2 = std::make_shared(data2, c2); + op2->set_friendly_name("Add02"); + auto res1 = std::make_shared(op1); res1->set_friendly_name("Result1"); - auto res2 = std::make_shared(data2); + res1->get_output_tensor(0).set_names({"tensor_output1"}); + auto res2 = std::make_shared(op2); res2->set_friendly_name("Result2"); + res2->get_output_tensor(0).set_names({"tensor_output2"}); return std::make_shared(ResultVector{res1, res2}, ParameterVector{data1, data2}); } @@ -327,6 +343,26 @@ static RefPreprocessParams resize_from_spatial_dims() { return res; } +static RefPreprocessParams resize_i8() { + RefPreprocessParams res("resize_i8"); + res.function = []() { + auto f = create_simple_function(element::i8, PartialShape{1, 3, 1, 1}); + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo() + .set_spatial_dynamic_shape()) + .preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR)) + .network(InputNetworkInfo().set_layout("NCHW"))) + .build(f); + return f; + }; + res.inputs.emplace_back(element::i8, Shape{1, 3, 2, 2}, std::vector{0, 0, 0, 0, + 1, 1, 1, 1, + 2, 2, 2, 2}); + res.expected.emplace_back(Shape{1, 3, 1, 1}, element::i8, std::vector{0, 1, 2}); + return res; +} + static RefPreprocessParams resize_to_network_width_height() { RefPreprocessParams res("resize_to_network_width_height"); res.function = []() { @@ -505,6 +541,201 @@ static RefPreprocessParams resize_and_convert_layout() { return res; } +static RefPreprocessParams convert_color_nv12_to_bgr_two_planes() { + RefPreprocessParams res("convert_color_nv12_to_bgr_two_planes"); + res.abs_threshold = 2.f; // Allow small color conversion deviations + res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%) + res.function = []() { + auto f = create_simple_function(element::u8, PartialShape{1, 4, 4, 3}); + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo() + .set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps() + .convert_color(ColorFormat::BGR))) + .build(f); + return f; + }; + + // clang-format off + auto input_y = std::vector {81, 81, 145, 145, // RRGG + 81, 81, 145, 145, // RRGG + 41, 41, 81, 81, // BBRR + 41, 41, 81, 81}; // BBRR + auto input_shape_y = Shape{1, 4, 4, 1}; + auto input_uv = std::vector {240, 90, // R (2x2) + 34, 54, // G (2x2) + 110, 240, // B (2x2) + 240, 90}; // R (2x2) + auto input_shape_uv = Shape{1, 2, 2, 2}; + auto exp_out = std::vector {0, 0, 255, 0, 0, 255, 0, 255, 0, 0, 255, 0, + 0, 0, 255, 0, 0, 255, 0, 255, 0, 0, 255, 0, + 255, 0, 0, 255, 0, 0, 0, 0, 255, 0, 0, 255, + 255, 0, 0, 255, 0, 0, 0, 0, 255, 0, 0, 255}; + auto out_shape = Shape{1, 4, 4, 3}; + // clang-format on + res.inputs.emplace_back(element::u8, input_shape_y, input_y); + res.inputs.emplace_back(element::u8, input_shape_uv, input_uv); + res.expected.emplace_back(out_shape, element::u8, exp_out); + return res; +} + +static RefPreprocessParams convert_color_nv12_single_plane() { + RefPreprocessParams res("convert_color_nv12_single_plane"); + res.abs_threshold = 2.f; // Allow small color conversion deviations + res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%) + res.function = []() { + auto f = create_simple_function(element::f32, PartialShape{1, 4, 4, 3}); + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo() + .set_color_format(ColorFormat::NV12_SINGLE_PLANE)) + .preprocess(PreProcessSteps() + .convert_color(ColorFormat::RGB))) + .build(f); + return f; + }; + + // clang-format off + auto input = std::vector { 81, 81, 145, 145, // RRGG + 81, 81, 145, 145, // RRGG + 41, 41, 81, 81, // BBRR + 41, 41, 81, 81, // BBRR + 240, 90, 34, 54, 110, 240, 240, 90}; // UV (RGBR) + auto input_shape = Shape{1, 6, 4, 1}; + auto exp_out = std::vector {255, 0, 0, 255, 0, 0, 0, 255, 0, 0, 255, 0, // RRGG + 255, 0, 0, 255, 0, 0, 0, 255, 0, 0, 255, 0, // RRGG + 0, 0, 255, 0, 0, 255, 255, 0, 0, 255, 0, 0, // BBRR + 0, 0, 255, 0, 0, 255, 255, 0, 0, 255, 0, 0, // BBRR + }; + auto out_shape = Shape{1, 4, 4, 3}; + // clang-format on + res.inputs.emplace_back(element::f32, input_shape, input); + res.expected.emplace_back(out_shape, element::f32, exp_out); + return res; +} + +static RefPreprocessParams convert_color_nv12_layout_resize() { + RefPreprocessParams res("convert_color_nv12_layout_resize"); + res.abs_threshold = 2.f; // Allow small color conversion deviations + res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%) + res.function = []() { + auto f = create_simple_function(element::f32, PartialShape{1, 3, 2, 2}); + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo() + .set_color_format(ColorFormat::NV12_SINGLE_PLANE) + .set_element_type(element::u8) + .set_spatial_dynamic_shape()) + .preprocess(PreProcessSteps() + .convert_color(ColorFormat::RGB) + .convert_layout() + .convert_element_type(element::f32) + .resize(ResizeAlgorithm::RESIZE_NEAREST)) + .network(InputNetworkInfo().set_layout("NCHW"))) + .build(f); + return f; + }; + + auto result = std::make_shared(); + // clang-format off + auto input = std::vector {81, 81, 145, 145, // RRGG + 81, 81, 145, 145, // RRGG + 41, 41, 81, 81, // BBRR + 41, 41, 81, 81, // BBRR + 240, 90, 34, 54, 110, 240, 240, 90}; // UV (RGBR) + auto input_shape = Shape{1, 6, 4, 1}; + auto exp_out = std::vector {255, 0, 0, 255, // R channel + 0, 255, 0, 0, // G channel + 0, 0, 255, 0}; // B channel + auto out_shape = Shape{1, 2, 2, 3}; + // clang-format on + res.inputs.emplace_back(element::u8, input_shape, input); + res.expected.emplace_back(out_shape, element::f32, exp_out); + return res; +} + +static RefPreprocessParams element_type_before_convert_color_nv12() { + RefPreprocessParams res("element_type_before_convert_color_nv12"); + res.abs_threshold = 2.f; // Allow small color conversion deviations + res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%) + res.function = []() { + auto f = create_simple_function(element::f32, PartialShape{1, 2, 2, 3}); + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo() + .set_element_type(element::u8) + .set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps() + .convert_element_type(element::f32) + .convert_color(ColorFormat::RGB)) + .network(InputNetworkInfo().set_layout("NHWC"))) + .build(f); + return f; + }; + + // clang-format off + auto input_y = std::vector {81, 81, 81, 81}; + auto input_shape_y = Shape{1, 2, 2, 1}; + auto input_uv = std::vector {240, 90}; + auto input_shape_uv = Shape{1, 1, 1, 2}; + auto exp_out = std::vector {255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0}; + auto out_shape = Shape{1, 2, 2, 3}; + // clang-format on + res.inputs.emplace_back(element::u8, input_shape_y, input_y); + res.inputs.emplace_back(element::u8, input_shape_uv, input_uv); + res.expected.emplace_back(out_shape, element::f32, exp_out); + return res; +} + +static RefPreprocessParams postprocess_2_inputs_basic() { + RefPreprocessParams res("postprocess_2_inputs_basic"); + res.function = []() { + auto f = create_2inputs(element::f32, Shape{1, 3, 1, 2}); + f = PrePostProcessor() + .output(OutputInfo("tensor_output1") + .network(OutputNetworkInfo().set_layout("NCHW")) + .postprocess(PostProcessSteps().convert_layout()) + .tensor(OutputTensorInfo().set_layout("NHWC"))) + .output(OutputInfo("tensor_output2") + .postprocess(PostProcessSteps().convert_element_type()) + .tensor(OutputTensorInfo().set_element_type(element::u8))) + .build(f); + return f; + }; + res.inputs.emplace_back(Shape{1, 3, 1, 2}, element::f32, std::vector{1.1, 2.1, 3.1, 4.1, 5.1, 6.1}); + res.inputs.emplace_back(Shape{1, 3, 1, 2}, element::f32, std::vector{1.1, 2.1, 3.1, 4.1, 5.1, 6.1}); + res.expected.emplace_back(Shape{1, 1, 2, 3}, element::f32, std::vector{1.1, 3.1, 5.1, 2.1, 4.1, 6.1}); + res.expected.emplace_back(Shape{1, 3, 1, 2}, element::u8, std::vector{1, 2, 3, 4, 5, 6}); + return res; +} + +static RefPreprocessParams pre_and_post_processing() { + RefPreprocessParams res("pre_and_post_processing"); + res.function = []() { + auto f = create_2inputs(element::f32, Shape{1, 3, 1, 2}); + f = PrePostProcessor() + .input(InputInfo(0) + .tensor(InputTensorInfo().set_element_type(element::u8)) + .preprocess(PreProcessSteps().convert_element_type(element::f32).mean(1.f))) + .input(InputInfo(1) + .preprocess(PreProcessSteps().scale(2.f))) + .output(OutputInfo("tensor_output1") + .network(OutputNetworkInfo().set_layout("NCHW")) + .postprocess(PostProcessSteps().convert_layout()) + .tensor(OutputTensorInfo().set_layout("NHWC"))) + .output(OutputInfo("tensor_output2") + .postprocess(PostProcessSteps().convert_element_type()) + .tensor(OutputTensorInfo().set_element_type(element::u8))) + .build(f); + return f; + }; + res.inputs.emplace_back(Shape{1, 3, 1, 2}, element::u8, std::vector{1, 2, 3, 4, 5, 6}); + res.inputs.emplace_back(Shape{1, 3, 1, 2}, element::f32, std::vector{2.2, 4.2, 6.2, 2.4, 4.4, 6.4}); + res.expected.emplace_back(Shape{1, 1, 2, 3}, element::f32, std::vector{0, 2, 4, 1, 3, 5}); + res.expected.emplace_back(Shape{1, 3, 1, 2}, element::u8, std::vector{1, 2, 3, 1, 2, 3}); + return res; +} std::vector allPreprocessTests() { return std::vector { @@ -521,12 +752,19 @@ std::vector allPreprocessTests() { resize_to_network_height(), resize_to_network_width(), resize_from_spatial_dims(), + resize_i8(), resize_to_network_width_height(), resize_to_specified_width_height(), resize_lvalues(), convert_layout_nhwc_to_nchw_lvalue(), convert_layout_nhwc_to_net_no_tensor_shape(), - resize_and_convert_layout() + resize_and_convert_layout(), + convert_color_nv12_to_bgr_two_planes(), + convert_color_nv12_single_plane(), + convert_color_nv12_layout_resize(), + element_type_before_convert_color_nv12(), + postprocess_2_inputs_basic(), + pre_and_post_processing() }; } diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake index 00cf63344ce6eb..d419df1bc473ff 100644 --- a/inference-engine/cmake/vpu_dependencies.cmake +++ b/inference-engine/cmake/vpu_dependencies.cmake @@ -6,14 +6,14 @@ include_guard(GLOBAL) set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x) set(VPU_SUPPORTED_FIRMWARES_HASH - "54a732b5fb17a0124652bc5113fa628c718a5af40621bca309471cb5ffd9271b" - "5750b2831c77ef54b8e243d3840c5ed1c9509681d55aee7e369d558cef628735") + "bd0a40b82b1e024f99a175c0c967a61647d790a42a546b3f0ce8562107dc13dc" + "74efa0bb416ead2238878862aeca2f80d91268efb4859e09594536ef15908d0e") # # Default packages # -set(FIRMWARE_PACKAGE_VERSION 1717) +set(FIRMWARE_PACKAGE_VERSION 1774) set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2") # diff --git a/inference-engine/src/gna_plugin/CMakeLists.txt b/inference-engine/src/gna_plugin/CMakeLists.txt index e86aac43938676..a476318dc03e89 100644 --- a/inference-engine/src/gna_plugin/CMakeLists.txt +++ b/inference-engine/src/gna_plugin/CMakeLists.txt @@ -50,10 +50,6 @@ target_compile_definitions(${TARGET_NAME} ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) -if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CROSSCOMPILING) - target_link_options(${TARGET_NAME} PRIVATE -Wl,--unresolved-symbols=ignore-all) -endif() - # # Static version for tests # diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp index 322dbd2f9d300d..2fbed41a51fa68 100644 --- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp +++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp @@ -801,8 +801,10 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ uint32_t num_bytes_per_bias = component[i].op.affine.num_bytes_per_bias; float weight_scale_factor = component[i].op.affine.weight_scale_factor; float output_scale_factor = component[i].output_scale_factor; +#if defined(DUMP_WB) || defined(LIGHT_DUMP) uint32_t num_weight_rows = (component[i].operation == kDnnDiagonalOp) ? 1 : num_rows_out; uint32_t num_weight_columns = num_rows_in; +#endif if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) { out_file << " " << std::dec << 4 << "\n"; out_file << " " << std::dec << 4 << "\n"; @@ -823,15 +825,15 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ << GNAPluginNS::memory::MemoryOffset(component[i].op.affine.ptr_weights, ptr_dnn_memory_) << "\n"; out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex << GNAPluginNS::memory::MemoryOffset(component[i].op.affine.ptr_biases, ptr_dnn_memory_) << "\n"; - +#ifdef LIGHT_DUMP std::ofstream out_wfile((out_file_name.str() + "_weights.txt").c_str(), std::ios::out); std::ofstream out_bfile((out_file_name.str() + "_biases.txt").c_str(), std::ios::out); - +#endif if (num_bytes_per_weight == 1) { if (num_bytes_per_bias != 1) { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int8_t* ptr_weight = reinterpret_cast(component[i].op.affine.ptr_weights); gna_compound_bias_t* ptr_bias = reinterpret_cast(component[i].op.affine.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_weight_rows; row++) { for (uint32_t col = 0; col < num_weight_columns; col++) { if (logging_precision == kDnnFloat) { @@ -847,8 +849,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int8_t* ptr_weight = reinterpret_cast(component[i].op.affine.ptr_weights); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_weight_rows; row++) { for (uint32_t col = 0; col < num_weight_columns; col++) { if (logging_precision == kDnnFloat) { @@ -861,11 +863,11 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_wfile << "\n"; } } - } #endif + } } else if (num_bytes_per_weight == 2) { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int16_t *ptr_weight = reinterpret_cast(component[i].op.affine.ptr_weights); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_weight_rows; row++) { for (uint32_t col = 0; col < num_weight_columns; col++) { if (logging_precision == kDnnFloat) { @@ -879,8 +881,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else if (compute_precision_ == kDnnFloat) { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) float *ptr_weight = reinterpret_cast(component[i].op.affine.ptr_weights); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_weight_rows; row++) { for (uint32_t col = 0; col < num_weight_columns; col++) { out_wfile << std::setprecision(5) @@ -896,9 +898,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ if (compute_precision_ == kDnnInt) { if (num_bytes_per_weight == 1) { if (num_bytes_per_bias != 1) { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) gna_compound_bias_t * ptr_biases = reinterpret_cast(component[i].op.affine.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_rows_out; row++) { if (logging_precision == kDnnInt) { out_bfile << std::setw(8) << ptr_biases[row].bias << ", "; @@ -909,8 +911,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int8_t *ptr_biases = reinterpret_cast(component[i].op.affine.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_rows_out; row++) { if (logging_precision == kDnnInt) { out_bfile << std::setw(8) << ptr_biases[row] << "\n"; @@ -921,8 +923,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ #endif } } else { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int32_t *ptr_biases = reinterpret_cast(component[i].op.affine.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_rows_out; row++) { if (logging_precision == kDnnInt) { out_bfile << std::setw(8) << ptr_biases[row] << "\n"; @@ -932,11 +934,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } - } else { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) float *ptr_biases = reinterpret_cast(component[i].op.affine.ptr_biases); -#ifdef DUMP_WB - for (uint32_t row = 0; row < num_rows_out; row++) { out_bfile << std::setprecision(5) << ptr_biases[row] << "\n"; } @@ -976,15 +976,15 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex << GNAPluginNS::memory::MemoryOffset(component[i].op.conv1D.ptr_biases, ptr_dnn_memory_) << "\n"; - +#ifdef LIGHT_DUMP std::ofstream out_wfile((out_file_name.str() + "_weights.txt").c_str(), std::ios::out); std::ofstream out_bfile((out_file_name.str() + "_biases.txt").c_str(), std::ios::out); - +#endif if (num_bytes_per_weight == 1) { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int8_t *ptr_weight = reinterpret_cast(component[i].op.conv1D.ptr_filters); gna_compound_bias_t *ptr_bias = reinterpret_cast(component[i].op.conv1D.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_filters; row++) { for (uint32_t col = 0; col < num_filter_coefficients; col++) { if (logging_precision == kDnnFloat) { @@ -999,8 +999,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else if (num_bytes_per_weight == 2) { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int16_t *ptr_weight = reinterpret_cast(component[i].op.conv1D.ptr_filters); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_filters; row++) { for (uint32_t col = 0; col < num_filter_coefficients; col++) { if (logging_precision == kDnnFloat) { @@ -1015,8 +1015,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else if (compute_precision_ == kDnnFloat) { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) float *ptr_weight = reinterpret_cast(component[i].op.conv1D.ptr_filters); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_filters; row++) { for (uint32_t col = 0; col < num_filter_coefficients; col++) { out_wfile << std::setprecision(12) @@ -1032,9 +1032,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ if (compute_precision_ == kDnnInt) { if (logging_precision == kDnnInt) { if (num_bytes_per_weight == 1) { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) gna_compound_bias_t *ptr_biases = reinterpret_cast(component[i].op.conv1D.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_filters; row++) { out_bfile << "0x" << std::setfill('0') << std::setw(8) << std::hex << ptr_biases[row].bias << " "; @@ -1043,8 +1043,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int32_t *ptr_biases = reinterpret_cast(component[i].op.conv1D.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_filters; row++) { out_bfile << "0x" << std::setfill('0') << std::setw(8) << std::hex << ptr_biases[row] << "\n"; @@ -1052,8 +1052,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ #endif } } else { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) int32_t *ptr_biases = reinterpret_cast(component[i].op.conv1D.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_filters; row++) { out_bfile << std::setprecision(12) << ptr_biases[row] / output_scale_factor << "\n"; @@ -1061,8 +1061,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ #endif } } else { +#if defined(DUMP_WB) || defined(LIGHT_DUMP) float *ptr_biases = reinterpret_cast(component[i].op.conv1D.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_filters; row++) { out_bfile << std::setprecision(12) << ptr_biases[row] << "\n"; } @@ -1104,8 +1104,10 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ uint32_t num_vector_delay = component[i].op.recurrent.num_vector_delay; uint32_t num_bytes_per_weight = component[i].op.recurrent.num_bytes_per_weight; uint32_t num_bytes_per_bias = component[i].op.recurrent.num_bytes_per_bias; +#ifdef DUMP_WB uint32_t num_weight_rows = num_columns_out; uint32_t num_weight_columns = num_columns_in + num_columns_out; +#endif out_file << " " << std::dec << num_vector_delay << "\n"; if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) { out_file << " " << std::dec << 4 << "\n"; @@ -1130,10 +1132,10 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_feedbacks, ptr_dnn_memory_) << "\n"; if (num_bytes_per_weight == 1) { +#ifdef DUMP_WB int8_t *ptr_weight = reinterpret_cast(component[i].op.recurrent.ptr_weights); gna_compound_bias_t *ptr_bias = reinterpret_cast(component[i].op.recurrent.ptr_biases); -#ifdef DUMP_WB for (uint32_t row = 0; row < num_weight_rows; row++) { out_file << " "; for (uint32_t col = 0; col < num_weight_columns; col++) { @@ -1151,8 +1153,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else if (num_bytes_per_weight == 2) { - int16_t *ptr_weight = reinterpret_cast(component[i].op.recurrent.ptr_weights); #ifdef DUMP_WB + int16_t *ptr_weight = reinterpret_cast(component[i].op.recurrent.ptr_weights); for (uint32_t row = 0; row < num_weight_rows; row++) { out_file << " "; for (uint32_t col = 0; col < num_weight_columns; col++) { @@ -1168,8 +1170,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else if (compute_precision_ == kDnnFloat) { - float *ptr_weight = reinterpret_cast(component[i].op.recurrent.ptr_weights); #ifdef DUMP_WB + float *ptr_weight = reinterpret_cast(component[i].op.recurrent.ptr_weights); for (uint32_t row = 0; row < num_weight_rows; row++) { out_file << " "; for (uint32_t col = 0; col < num_weight_columns; col++) { @@ -1186,10 +1188,10 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ if (compute_precision_ == kDnnInt) { if (logging_precision == kDnnInt) { if (num_bytes_per_weight == 1) { - gna_compound_bias_t - *ptr_biases = reinterpret_cast(component[i].op.recurrent.ptr_biases); out_file << "" << " "; #ifdef DUMP_WB + gna_compound_bias_t + *ptr_biases = reinterpret_cast(component[i].op.recurrent.ptr_biases); for (uint32_t col = 0; col < num_columns_out; col++) { out_file << "0x" << std::setfill('0') << std::setw(8) << std::hex << ptr_biases[col].bias << " "; @@ -1198,9 +1200,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } #endif } else { - int32_t *ptr_biases = reinterpret_cast(component[i].op.recurrent.ptr_biases); out_file << "" << " "; #ifdef DUMP_WB + int32_t *ptr_biases = reinterpret_cast(component[i].op.recurrent.ptr_biases); for (uint32_t col = 0; col < num_columns_out; col++) { out_file << "0x" << std::setfill('0') << std::setw(8) << std::hex << ptr_biases[col] << " "; @@ -1208,9 +1210,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ #endif } } else { - int32_t *ptr_biases = reinterpret_cast(component[i].op.recurrent.ptr_biases); out_file << "" << " "; #ifdef DUMP_WB + int32_t *ptr_biases = reinterpret_cast(component[i].op.recurrent.ptr_biases); for (uint32_t col = 0; col < num_columns_out; col++) { out_file << std::setprecision(12) << std::scientific << ptr_biases[col] / output_scale_factor << " "; @@ -1218,9 +1220,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ #endif } } else { - float *ptr_biases = reinterpret_cast(component[i].op.recurrent.ptr_biases); out_file << "" << " "; #ifdef DUMP_WB + float *ptr_biases = reinterpret_cast(component[i].op.recurrent.ptr_biases); for (uint32_t col = 0; col < num_columns_out; col++) { out_file << std::setprecision(12) << std::scientific << ptr_biases[col] << " "; } @@ -2168,8 +2170,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() { in_file << std::setw(8) << floatValue / input_scale_factor << "\n"; } } -#endif } +#endif } uint32_t GNAPluginNS::backend::AMIntelDNN::num_components() { diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp index 26c2b74fac7e2b..68dd9088d98194 100644 --- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp +++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp @@ -653,11 +653,18 @@ class DataQuantizer : public DataQuant void operator()(InferenceEngine::WeightableLayer *wl, const FakeQuant&) const { auto quantData = InferenceEngine::getInjectedData(*wl); - IE_ASSERT(quantData->_weights_quant.IsStatsSet()); - if (quantData->_weights_quant.GetLevels() <= std::numeric_limits::max()) { - quantizeWeightsBiases(FakeQuantI8(), wl, Quant()); + if (quantData->_weights_quant.IsStatsSet()) { + if (quantData->_weights_quant.GetLevels() <= std::numeric_limits::max()) { + quantizeWeightsBiases(FakeQuantI8(), wl, Quant()); + } else { + quantizeWeightsBiases(FakeQuantI16(), wl, Quant()); + } } else { - quantizeWeightsBiases(FakeQuantI16(), wl, Quant()); + if (std::is_same()) { + THROW_GNA_EXCEPTION << "Infinite recursion. The type Desc::OptionalType is equal FakeQuant."; + } + + (*this)(wl, typename Desc::OptionalType()); } } }; diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp index be282b806f9fea..419a14252fcda7 100644 --- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp +++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp @@ -1354,6 +1354,12 @@ class ScaleFactorCalculator { } else { return frontend::FakeQuantI16().getWeightsPrecision().size(); } + } else { + if (!info.isSynthetic()) { + gnawarn() << "The layer (" << ptr->name << ") has not quantization statistics\n"; + } + + return GetOptionalWeightsBytesSize(); } } diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp index 13bd120fdea6e1..f04f3985325bf7 100644 --- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp +++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp @@ -344,6 +344,10 @@ class LayerInfo { return isConcatAlignFilter() || isSyntheticScaleShift() || isCropAffined(); } + bool isSynthetic() const noexcept { + return isConcatAlignFilter() || isSyntheticScaleShift() || isConvolutionFilter() || isAffineFilter(); + } + size_t paddingSize() const { static InferenceEngine::details::caseless_set layersWithPossiblePadding = {"FullyConnected", "InnerProduct", diff --git a/inference-engine/src/gna_plugin/transformations/insert_reshape_around_matmul.cpp b/inference-engine/src/gna_plugin/transformations/insert_reshape_around_matmul.cpp index 9d82d7b402d68e..3dd68e869f94a7 100644 --- a/inference-engine/src/gna_plugin/transformations/insert_reshape_around_matmul.cpp +++ b/inference-engine/src/gna_plugin/transformations/insert_reshape_around_matmul.cpp @@ -26,7 +26,7 @@ static bool InsertReshape( const std::shared_ptr& matmul2, const std::shared_ptr& add1 = nullptr, const std::shared_ptr& add2 = nullptr, - const std::shared_ptr& fake_quantize2 = nullptr, + const std::shared_ptr& fake_quantize = nullptr, const std::shared_ptr& transpose = nullptr) { const auto& pattern_map = matcher.get_pattern_value_map(); size_t matmul_input_index = 1; @@ -41,38 +41,58 @@ static bool InsertReshape( } std::shared_ptr matmul_node = iter->second.get_node_shared_ptr(); - auto matmul_node_shape = matmul_node->get_output_shape(0); if ((iter = pattern_map.find(input)) == std::end(pattern_map)) { return false; } - std::shared_ptr first_node = iter->second.get_node_shared_ptr(); + auto first_node = iter->second.get_node_shared_ptr(); + std::vector> nodes = { matmul_node }; + for (auto node : {add2, add1, fake_quantize, transpose}) { + iter = pattern_map.find(node); + if (iter != pattern_map.end()) { + nodes.push_back(iter->second.get_node_shared_ptr()); + } + } + + auto last_node_shape = nodes.back()->get_output_shape(0); auto reshape_input_node = std::dynamic_pointer_cast(first_node); bool need_reshape_before = !reshape_input_node || reshape_input_node->get_output_shape(0).size() != 2; if (need_reshape_before) { - auto input_shape = first_node->get_output_shape(0); - std::vector before_shape(2, 1); - std::copy_if(input_shape.begin(), input_shape.end(), before_shape.begin(), [](size_t e) { return e > 1; }); + std::vector before_shape = {-1, static_cast(first_node->get_output_shape(0).back())}; auto reshape_before_node = std::make_shared(first_node, std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{before_shape.size()}, before_shape), false); reshape_before_node->set_friendly_name(matmul_node->get_friendly_name() + "/reshape_before_matmul"); ngraph::copy_runtime_info(first_node, reshape_before_node); matmul_node->input(matmul_input_index).replace_source_output(reshape_before_node->output(0)); - } + if (auto transpose_node = std::dynamic_pointer_cast(nodes.back())) { + nodes.pop_back(); + std::reverse(nodes.begin(), nodes.end()); + while (!nodes.empty()) { + auto node_copy = nodes.back()->clone_with_new_inputs(nodes.back()->input_values()); + ngraph::copy_runtime_info(nodes.back(), node_copy); + ngraph::replace_node(nodes.back(), node_copy); + nodes.pop_back(); + } - std::shared_ptr last_node; - iter = pattern_map.find(transpose); - if (iter == pattern_map.end() && - (iter = pattern_map.find(fake_quantize2)) == pattern_map.end() && - (iter = pattern_map.find(add1)) == pattern_map.end() && - (iter = pattern_map.find(add2)) == pattern_map.end()) { - last_node = matmul_node; - } else { - last_node = iter->second.get_node_shared_ptr(); + auto transpose_input_shape = transpose_node->input_values()[0].get_node_shared_ptr()->get_output_shape(0); + auto transpose_constant_shape = transpose_node->input_values()[1].get_node_shared_ptr()->get_output_shape(0); + if (std::count_if(transpose_input_shape.begin(), transpose_input_shape.end(), [](size_t n) { return n > 1; }) > 2) { + THROW_GNA_EXCEPTION << "The number of dimensions that are greater than 1 is greater than 2" + << " for Transpose layer (" << transpose_node->get_friendly_name() << ")." + << " For this reason, there is no way to determine permutation shape."; + } + std::vector permutation_shape = {1, 0}; + auto transpose_node_copy = transpose_node->clone_with_new_inputs( + {transpose_node->input_values()[0], + std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{permutation_shape.size()}, permutation_shape)}); + ngraph::copy_runtime_info(transpose_node, transpose_node_copy); + ngraph::replace_node(transpose_node, transpose_node_copy); + nodes.push_back(transpose_node_copy); + } } - auto consumers = last_node->output(0).get_target_inputs(); - auto last_node_shape = last_node->get_output_shape(0); + auto consumers = nodes.back()->output(0).get_target_inputs(); bool need_reshape_after = false; for (auto consumer : consumers) { auto reshape_output_node = dynamic_cast(consumer.get_node()); @@ -83,10 +103,11 @@ static bool InsertReshape( } if (need_reshape_after) { - auto reshape_after_node = std::make_shared(last_node, - std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{last_node_shape.size()}, last_node_shape), false); - reshape_after_node->set_friendly_name(last_node->get_friendly_name()); - ngraph::copy_runtime_info(last_node, reshape_after_node); + auto reshape_after_node = std::make_shared(nodes.back(), + std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{last_node_shape.size()}, last_node_shape), false); + reshape_after_node->set_friendly_name(nodes.back()->get_friendly_name()); + ngraph::copy_runtime_info(nodes.back(), reshape_after_node); for (auto consumer : consumers) { consumer.replace_source_output(reshape_after_node); } diff --git a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp index 90dd4608de723b..a89bc529d7acf9 100644 --- a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp +++ b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp @@ -22,6 +22,7 @@ #include "ngraph/ngraph.hpp" #include "ngraph/pass/constant_folding.hpp" #include "ngraph/pass/manager.hpp" +#include "openvino/core/except.hpp" #include "transformations/serialize.hpp" #include "transformations/smart_reshape/set_batch_size.hpp" #include "transformations/smart_reshape/smart_reshape.hpp" @@ -109,6 +110,20 @@ void CNNNetworkNGraphImpl::validateFunctionNames() const { } } +ngraph::element::Type details::toLegacyType(const ngraph::element::Type& ngraph_type, bool input) { + if (input) { + return ngraph_type == ngraph::element::f16 ? ngraph::element::f32 : ngraph_type; + } else { + if (ngraph_type == ngraph::element::i64 || ngraph_type == ngraph::element::i32) { + return ngraph::element::i32; + } else if (ngraph_type != ngraph::element::f32) { + return ngraph::element::f32; + } + } + + return ngraph_type; +} + CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const std::shared_ptr& nGraph, const std::vector& exts, bool newAPI) @@ -128,7 +143,7 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const std::shared_ptr& nGra ? Precision::I16 : prc == Precision::FP16 ? Precision::FP32 : static_cast(prc); - info->setPrecision(prc); + info->setPrecision(details::convertPrecision(toLegacyType(details::convertPrecision(prc), true))); } network.setInputInfo(info); @@ -155,12 +170,8 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const std::shared_ptr& nGra if (!_new_api) { for (auto& output : _outputData) { // Convert precision into native format. Be consistent with possible conversion to CNNNetwork later. - if (output.second->getPrecision() == Precision::I64) { - output.second->setPrecision(Precision::I32); - } else if (output.second->getPrecision() != Precision::FP32 && - output.second->getPrecision() != Precision::I32) { - output.second->setPrecision(Precision::FP32); - } + output.second->setPrecision(details::convertPrecision( + toLegacyType(details::convertPrecision(output.second->getPrecision()), false))); } } } diff --git a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp index 77f445b92f99bd..370fbcac6bd09d 100644 --- a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp +++ b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp @@ -28,10 +28,13 @@ #include "ngraph/attribute_visitor.hpp" #include "ngraph/function.hpp" #include "ngraph/node.hpp" +#include "ngraph/type/element_type.hpp" namespace InferenceEngine { namespace details { +ngraph::element::Type toLegacyType(const ngraph::element::Type& ngraph_type, bool input); + IE_SUPPRESS_DEPRECATED_START /** diff --git a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp index 009b74212a0408..08440d8f1fbf1b 100644 --- a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp +++ b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp @@ -226,16 +226,16 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptrsetNetworkInputs(copyInfo(constMapCast(inputs))); exeNetwork->setNetworkOutputs(copyInfo(constMapCast(outputs))); - ngraph::ParameterVector parameters; - ngraph::ResultVector results; + ov::ParameterVector parameters; + ov::ResultVector results; std::vector> node_outputs; for (auto&& input : inputs) { auto tensor_desc = input.second->getTensorDesc(); auto dims = tensor_desc.getDims(); parameters.push_back( - std::make_shared(details::convertPrecision(tensor_desc.getPrecision()), - std::vector{dims.begin(), dims.end()})); + std::make_shared(details::convertPrecision(tensor_desc.getPrecision()), + std::vector{dims.begin(), dims.end()})); parameters.back()->set_friendly_name(input.first); node_outputs.push_back(parameters.back()->output(0)); } @@ -261,16 +261,16 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr, ngraph::Output> output_map; for (auto&& node : function->get_ordered_ops()) { ngraph::Node* new_node = nullptr; - if (ngraph::is_type(node)) { - parameters.push_back(std::static_pointer_cast(node->clone_with_new_inputs({}))); + if (ngraph::is_type(node)) { + parameters.push_back(std::static_pointer_cast(node->clone_with_new_inputs({}))); for (std::size_t i = 0; i < node->outputs().size(); ++i) { output_map.emplace(node->output(i), parameters.back()->output(i)); } @@ -280,7 +280,7 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptrinputs()) { outputs.emplace_back(output_map.at(input.get_source_output())); } - if (ngraph::is_type(node)) { + if (ngraph::is_type(node)) { results.push_back( std::static_pointer_cast(node->clone_with_new_inputs(outputs))); new_node = results.back().get(); diff --git a/inference-engine/src/inference_engine/src/ie_core.cpp b/inference-engine/src/inference_engine/src/ie_core.cpp index 42d1575a84afc0..0b85b172d40989 100644 --- a/inference-engine/src/inference_engine/src/ie_core.cpp +++ b/inference-engine/src/inference_engine/src/ie_core.cpp @@ -450,28 +450,12 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this( - cnnNet.getFunction(), - std::vector{}, - newAPI)); + return InferenceEngine::details::ReadNetwork(modelPath, binPath, extensions, newAPI); } ie::CNNNetwork ReadNetwork(const std::string& model, const ie::Blob::CPtr& weights) const override { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::ReadNetwork from memory"); - auto cnnNet = InferenceEngine::details::ReadNetwork(model, weights, extensions); - OPENVINO_ASSERT(cnnNet.getFunction() || !newAPI, "Cannot read IR v7 from OpenVINO 2.0 API"); - if (!newAPI) - return cnnNet; - - return InferenceEngine::CNNNetwork(std::make_shared( - cnnNet.getFunction(), - std::vector{}, - newAPI)); + return InferenceEngine::details::ReadNetwork(model, weights, extensions, newAPI); } // TODO: In future this method can be added to ICore interface diff --git a/inference-engine/src/inference_engine/src/ie_network_reader.cpp b/inference-engine/src/inference_engine/src/ie_network_reader.cpp index c9ca81450d0ab4..2624128952fd20 100644 --- a/inference-engine/src/inference_engine/src/ie_network_reader.cpp +++ b/inference-engine/src/inference_engine/src/ie_network_reader.cpp @@ -7,14 +7,32 @@ #include #include #include +#include #include +#include +#include "cnn_network_ngraph_impl.hpp" +#include "cpp/ie_cnn_network.h" #include "details/ie_so_pointer.hpp" #include "file_utils.h" #include "frontend_manager/frontend_manager.hpp" +#include "ie_api.h" +#include "ie_common.h" +#include "ie_icnn_network.hpp" +#include "ie_input_info.hpp" #include "ie_ir_version.hpp" #include "ie_itt.hpp" #include "ie_reader.hpp" +#include "ngraph/function.hpp" +#include "ngraph/type/element_type.hpp" +#include "ngraph/variant.hpp" +#include "openvino/core/deprecated.hpp" +#include "openvino/core/except.hpp" +#include "openvino/core/preprocess/input_network_info.hpp" +#include "openvino/core/preprocess/input_tensor_info.hpp" +#include "openvino/core/preprocess/pre_post_process.hpp" +#include "openvino/core/type/element_type.hpp" +#include "transformations/rt_info/old_api_map_attribute.hpp" namespace InferenceEngine { @@ -109,7 +127,6 @@ void registerReaders() { if (initialized) return; - // TODO: Read readers info from XML auto create_if_exists = [](const std::string name, const std::string library_name) { ov::util::FilePath libraryName = ov::util::to_file_path(library_name); ov::util::FilePath readersLibraryPath = @@ -138,6 +155,7 @@ void assertIfIRv7LikeModel(std::istream& modelStream) { for (auto&& kvp : readers) { Reader::Ptr reader = kvp.second; + // if we have reader for IR v7 if (reader->getName() == "IRv7") { return; } @@ -162,20 +180,16 @@ ov::Extensions get_extensions_map(const std::vector& exts) { - // Register readers if it is needed - registerReaders(); - +CNNNetwork load_ir_v7_network(const std::string& modelPath, + const std::string& binPath, + const std::vector& exts) { // Fix unicode name #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) std::wstring model_path = ov::util::string_to_wstring(modelPath.c_str()); #else std::string model_path = modelPath; #endif + // Try to open model file std::ifstream modelStream(model_path, std::ios::binary); if (!modelStream.is_open()) @@ -183,7 +197,6 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, assertIfIRv7LikeModel(modelStream); - // TODO: this code is needed only by V7 IR reader. So we need to remove it in future. auto fileExt = modelPath.substr(modelPath.find_last_of(".") + 1); for (auto it = readers.lower_bound(fileExt); it != readers.upper_bound(fileExt); it++) { auto reader = it->second; @@ -240,6 +253,182 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, } } + return {}; +} + +CNNNetwork convert_to_cnnnetwork(std::shared_ptr& function, + const std::vector& exts, + bool newAPI) { + auto& rt_info = function->get_rt_info(); + const auto it = rt_info.find("version"); + const bool is_ir = it != rt_info.end(); + + // only for IR cases we need preprocessing or postprocessing steps + if (is_ir) { + using namespace ov::preprocess; + PrePostProcessor prepost; + + auto iv_version_impl = std::dynamic_pointer_cast>(it->second); + OPENVINO_ASSERT(iv_version_impl != nullptr, "Failed to extract IR version from 'version' attribute"); + const int64_t ir_version = iv_version_impl->get(); + + if (ir_version == 10 && newAPI) { + const auto inputs = function->inputs(); + for (size_t i = 0; i < inputs.size(); ++i) { + const auto ngraph_type = inputs[i].get_element_type(); + const auto legacy_type = details::toLegacyType(ngraph_type, true); + prepost.input(ov::preprocess::InputInfo(i) + .tensor(InputTensorInfo().set_element_type(legacy_type)) + .preprocess(PreProcessSteps() + // TODO: remove explicit type + .convert_element_type(ngraph_type))); + } + + const auto outputs = function->outputs(); + for (size_t i = 0; i < outputs.size(); ++i) { + const auto ngraph_type = outputs[i].get_element_type(); + const auto legacy_type = details::toLegacyType(ngraph_type, false); + + prepost.output(OutputInfo(i) + .postprocess(PostProcessSteps().convert_element_type()) + .tensor(OutputTensorInfo().set_element_type(legacy_type))); + } + + function = prepost.build(function); + } else if (ir_version == 11 && !newAPI) { + const std::string& old_api_map_key = ov::OldApiMap::get_type_info_static(); + + auto& parameters = function->get_parameters(); + for (size_t i = 0; i < parameters.size(); ++i) { + const auto& parameter = parameters[i]; + ov::RTMap& rtInfo = parameter->get_rt_info(); + const auto it = rtInfo.find(old_api_map_key); + if (it == rtInfo.end()) + continue; + + const auto old_api_map_attr = std::dynamic_pointer_cast(it->second); + OPENVINO_ASSERT(old_api_map_attr != nullptr, "Failed to cast to ov::OldApiMap"); + const auto old_api_map_attr_val = old_api_map_attr->get(); + auto old_api_type = old_api_map_attr_val.get_type(); + const auto old_api_transpose_args = old_api_map_attr_val.get_order(); + + OPENVINO_ASSERT(!old_api_type.is_dynamic(), "Old API map does not support dynamic type"); + // if no differences between IR v10 and IR v11, add identity convert which will be optimized out + if (old_api_type == ov::element::undefined) + old_api_type = parameter->get_element_type(); + + std::stringstream tensorLayout, networkLayout; + for (size_t i = 0; i < old_api_transpose_args.size(); ++i) { + tensorLayout << i; + networkLayout << old_api_transpose_args[i]; + } + + PreProcessSteps steps; + // TODO: remove explicit type + steps.convert_element_type(parameter->get_element_type()); + // TODO: move steps directly to builder once we allow Layout() -> Layout transpose + if (!old_api_transpose_args.empty()) + steps.convert_layout(); + + prepost.input( + ov::preprocess::InputInfo(i) + .tensor( + InputTensorInfo().set_element_type(old_api_type).set_layout(ov::Layout(tensorLayout.str()))) + .preprocess(std::move(steps)) + .network(InputNetworkInfo().set_layout(ov::Layout(networkLayout.str())))); + + // remove old api once we applied it + rtInfo.erase(it); + } + + auto& resuls = function->get_results(); + for (size_t i = 0; i < resuls.size(); ++i) { + const auto& result = resuls[i]; + ov::RTMap& rtInfo = result->get_rt_info(); + const auto it = rtInfo.find(old_api_map_key); + if (it == rtInfo.end()) + continue; + + const auto old_api_map_attr = std::dynamic_pointer_cast(it->second); + OPENVINO_ASSERT(old_api_map_attr != nullptr, "Failed to cast to ov::OldApiMap"); + const auto old_api_map_attr_val = old_api_map_attr->get(); + auto old_api_type = old_api_map_attr_val.get_type(); + const auto old_api_transpose_args = old_api_map_attr_val.get_order(); + + OPENVINO_ASSERT(!old_api_type.is_dynamic(), "Old API map does not support dynamic type"); + // if no differences between IR v10 and IR v11, add identity convert which will be optimized out + if (old_api_type == ov::element::undefined) + old_api_type = result->get_element_type(); + + std::stringstream tensorLayout, networkLayout; + for (size_t i = 0; i < old_api_transpose_args.size(); ++i) { + networkLayout << i; + tensorLayout << old_api_transpose_args[i]; + } + + prepost.output(OutputInfo(i) + .network(OutputNetworkInfo().set_layout(ov::Layout(networkLayout.str()))) + .postprocess(PostProcessSteps().convert_layout().convert_element_type()) + .tensor(OutputTensorInfo() + .set_element_type(old_api_type) + .set_layout(ov::Layout(tensorLayout.str())))); + + // remove old api once we applied it + rtInfo.erase(it); + } + + function = prepost.build(function); + + // TODO: keep information about layout once we have an ability to + // apply permutation to layout + + // restore layout information + for (const auto& parameter : function->get_parameters()) { + parameter->set_layout({}); + } + for (const auto& result : function->get_results()) { + result->set_layout({}); + } + } + } + + // need to remove information about IR version since it's needed only on read stage + if (is_ir) { + rt_info.erase(it); + } + + OPENVINO_SUPPRESS_DEPRECATED_START + return CNNNetwork(std::make_shared(function, exts, newAPI)); + OPENVINO_SUPPRESS_DEPRECATED_END +} + +} // namespace + +CNNNetwork details::ReadNetwork(const std::string& modelPath, + const std::string& binPath, + const std::vector& exts, + bool newAPI) { + // IR v7 obsolete code + { + // Register readers if it is needed + registerReaders(); + auto cnnnetwork = load_ir_v7_network(modelPath, binPath, exts); + + OPENVINO_SUPPRESS_DEPRECATED_START + if (static_cast(cnnnetwork) != nullptr) { + OPENVINO_ASSERT(!newAPI, "Cannot read IR v7 from OpenVINO 2.0 API"); + return cnnnetwork; + } + IE_SUPPRESS_DEPRECATED_END + } + + // Fix unicode name +#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) + std::wstring model_path = ov::util::string_to_wstring(modelPath.c_str()); +#else + std::string model_path = modelPath; +#endif + // Try to load with FrontEndManager auto& manager = get_frontend_manager(); ngraph::frontend::FrontEnd::Ptr FE; @@ -265,8 +454,10 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, if (inputModel) { auto ngFunc = FE->convert(inputModel); - return CNNNetwork(ngFunc, exts); + return convert_to_cnnnetwork(ngFunc, exts, newAPI); } + + const auto fileExt = modelPath.substr(modelPath.find_last_of(".") + 1); IE_THROW(NetworkNotRead) << "Unable to read the model: " << modelPath << " Please check that model format: " << fileExt << " is supported and the model is correct."; @@ -274,20 +465,26 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, - const std::vector& exts) { - // Register readers if it is needed - registerReaders(); + const std::vector& exts, + bool newAPI) { std::istringstream modelStringStream(model); std::istream& modelStream = modelStringStream; - assertIfIRv7LikeModel(modelStream); + // IR v7 obsolete code + { + // Register readers if it is needed + registerReaders(); - for (auto it = readers.begin(); it != readers.end(); it++) { - auto reader = it->second; - if (reader->supportModel(modelStream)) { - if (weights) - return reader->read(modelStream, weights, exts); - return reader->read(modelStream, exts); + assertIfIRv7LikeModel(modelStream); + + for (auto it = readers.begin(); it != readers.end(); it++) { + auto reader = it->second; + if (reader->supportModel(modelStream)) { + OPENVINO_ASSERT(!newAPI, "Cannot read IR v7 from OpenVINO 2.0 API"); + if (weights) + return reader->read(modelStream, weights, exts); + return reader->read(modelStream, exts); + } } } @@ -312,7 +509,7 @@ CNNNetwork details::ReadNetwork(const std::string& model, inputModel = FE->load(params); if (inputModel) { auto ngFunc = FE->convert(inputModel); - return CNNNetwork(ngFunc, exts); + return convert_to_cnnnetwork(ngFunc, exts, newAPI); } IE_THROW(NetworkNotRead) diff --git a/inference-engine/src/inference_engine/src/ie_network_reader.hpp b/inference-engine/src/inference_engine/src/ie_network_reader.hpp index 6939e8c3fec43a..ab370123bf4aab 100644 --- a/inference-engine/src/inference_engine/src/ie_network_reader.hpp +++ b/inference-engine/src/inference_engine/src/ie_network_reader.hpp @@ -19,11 +19,13 @@ namespace details { * @param binPath path to bin file, if path is empty, will try to read bin file with the same name as xml and * if bin file with the same name was not found, will load IR without weights. * @param exts vector with extensions + * @param newAPI Whether this function is called from OpenVINO 2.0 API * @return CNNNetwork */ CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath, - const std::vector& exts); + const std::vector& exts, + bool newAPI); /** * @brief Reads IR xml and bin (with the same name) files * @param model string with IR @@ -32,9 +34,13 @@ CNNNetwork ReadNetwork(const std::string& modelPath, * @note Reading ONNX models doesn't support loading weights from data blobs. If you are using an ONNX model with external data files, please use the ReadNetwork function overload which takes a filesystem path to the model. + * @param newAPI Whether this function is called from OpenVINO 2.0 API * @return CNNNetwork */ -CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector& exts); +CNNNetwork ReadNetwork(const std::string& model, + const Blob::CPtr& weights, + const std::vector& exts, + bool newAPI); } // namespace details } // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/src/ie_system_conf.cpp b/inference-engine/src/inference_engine/src/ie_system_conf.cpp index 067378a8d7e214..4f03f367cb6093 100644 --- a/inference-engine/src/inference_engine/src/ie_system_conf.cpp +++ b/inference-engine/src/inference_engine/src/ie_system_conf.cpp @@ -95,6 +95,21 @@ std::vector getAvailableNUMANodes() { return {-1}; } # endif +int getNumberOfLogicalCPUCores(bool) { + return parallel_get_max_threads(); +} +#else +int getNumberOfLogicalCPUCores(bool bigCoresOnly) { + int logical_cores = parallel_get_max_threads(); +# if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) + auto core_types = custom::info::core_types(); + if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ { + logical_cores = custom::info::default_concurrency( + custom::task_arena::constraints{}.set_core_type(core_types.back()).set_max_threads_per_core(-1)); + } +# endif + return logical_cores; +} #endif #if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO)) diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.cpp b/inference-engine/src/mkldnn_plugin/cpu_types.cpp index 41d96bbac5ef7d..83fc153bd06f49 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_types.cpp +++ b/inference-engine/src/mkldnn_plugin/cpu_types.cpp @@ -177,7 +177,8 @@ const InferenceEngine::details::caseless_unordered_map type_t { "NonMaxSuppression", NonMaxSuppression}, { "NonMaxSuppressionIEInternal", NonMaxSuppression}, { "MatrixNms", MatrixNms}, - { "MulticlassNms", MulticlassNms} + { "MulticlassNms", MulticlassNms}, + { "Reference", Reference}, }; Type TypeFromName(const std::string& type) { @@ -351,6 +352,8 @@ std::string NameFromType(const Type type) { return "MatrixNms"; case MulticlassNms: return "MulticlassNms"; + case Reference: + return "Reference"; default: return "Unknown"; } diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp index bc33fc901e8abe..06e7508db24a18 100644 --- a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp @@ -9,20 +9,28 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape) : MemoryDesc(shape, DnnlBlocked) { +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& strides) + : MemoryDesc(shape, DnnlBlocked) { const auto ndims = shape.getRank(); const auto &dims = shape.getDims(); - mkldnn::memory::dims plain_strides; - if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { - plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL); + + if (!strides.empty()) { // custom strides + desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims), + MKLDNNExtensionUtils::IEPrecisionToDataType(prc), + MKLDNNExtensionUtils::convertToDnnlDims(strides)}; } else { - plain_strides.resize(ndims, 1); - for (size_t i = 1; i < ndims; i++) { - plain_strides[ndims - i -1] = plain_strides[ndims - i] * dims[ndims - i]; + mkldnn::memory::dims plain_strides; + if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL); + } else { + plain_strides.resize(ndims, 1); + for (size_t i = 1; i < ndims; i++) { + plain_strides[ndims - i -1] = plain_strides[ndims - i] * dims[ndims - i]; + } } - } - desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims), MKLDNNExtensionUtils::IEPrecisionToDataType(prc), plain_strides}; + desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims), MKLDNNExtensionUtils::IEPrecisionToDataType(prc), plain_strides}; + } order.resize(ndims); std::iota(order.begin(), order.end(), 0); diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h index a467fdfc2bcdda..cb9bc0348a35c0 100644 --- a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h @@ -13,7 +13,7 @@ namespace MKLDNNPlugin { class DnnlBlockedMemoryDesc : public BlockedMemoryDesc, public DnnlMemoryDesc { public: // Creates planar DnnlBlockedMemoryDesc - DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape); + DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& strides = {}); DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp index e674ab6cf7e32e..d4daf57fd38ade 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp @@ -177,3 +177,15 @@ MKLDNNDescriptor::operator std::shared_ptr() { } return typeDesc->getPtr(); } + +MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { + this->desc.reset(new DescFwdImpl(desc)); +} + +MKLDNNDescriptor::operator std::shared_ptr() { + auto typeDesc = std::dynamic_pointer_cast>(desc); + if (typeDesc == nullptr) { + IE_THROW() << "Cannot cast descriptor!"; + } + return typeDesc->getPtr(); +} diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h index f64dfb082c40aa..d02f9c3da70a05 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h @@ -49,6 +49,9 @@ class MKLDNNDescriptor { explicit MKLDNNDescriptor(std::shared_ptr desc); operator std::shared_ptr(); + explicit MKLDNNDescriptor(std::shared_ptr desc); + operator std::shared_ptr(); + mkldnn::primitive_desc_iterator createPrimitiveDescriptorIterator(const mkldnn::engine &engine, const mkldnn::primitive_attr &attr = mkldnn::primitive_attr()) const; @@ -106,4 +109,4 @@ class MKLDNNDescriptor { }; std::shared_ptr desc; -}; \ No newline at end of file +}; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp index 69a4bc1721a8b2..ff154f4dee4199 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp @@ -91,13 +91,6 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, _callbackExecutor = _taskExecutor; } - // Workaround for initializing friendly names for all the OPs - // Otherwise they are initialized concurrently without thread safety. - // TODO: Can be removed after 57069 is done. - for (const auto& op : _network.getFunction()->get_ops()) { - op->get_friendly_name(); - } - int streams = std::max(1, _cfg.streamExecutorConfig._streams); std::vector tasks; tasks.resize(streams); _graphs.resize(streams); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 0d61fd72c8ee25..1d94f9f50a5a91 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -75,7 +75,7 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg status = Ready; - ENABLE_CPU_DEBUG_CAP(serialize(*this)); + CPU_DEBUG_CAP_ENABLE(serialize(*this)); } template void MKLDNNGraph::CreateGraph(const std::shared_ptr&, @@ -320,7 +320,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana void MKLDNNGraph::InitGraph() { MKLDNNGraphOptimizer optimizer; - ENABLE_CPU_DEBUG_CAP(initNodeDumper(config.debugCaps)); + CPU_DEBUG_CAP_ENABLE(initNodeDumper(config.debugCaps)); SortTopologically(); InitNodes(); @@ -397,7 +397,12 @@ void MKLDNNGraph::ExtractConstantAndExecutableNodes() { for (const auto& graphNode : graphNodes) { if (graphNode->isConstant()) constantGraphNodes.emplace_back(graphNode); - else if (graphNode->isExecutable()) + else if (CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable())) + /* @todo + * Revise implementation. + * With current way it is possible that with debug_caps enabled + * we execute a node, which is not ready to be executed + */ executableGraphNodes.emplace_back(graphNode); } } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index 62373878082745..afe36548d9aef6 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -117,6 +117,10 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { FuseFullyConnectedAndSimpleOperation(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMatMulAndSimpleOperation"); + FuseMatMulAndSimpleOperation(graph); + graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMVNAndSimpleOperation"); FuseMVNAndSimpleOperation(graph); graph.RemoveDroppedNodes(); @@ -646,6 +650,44 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra } } +void MKLDNNGraphOptimizer::FuseMatMulAndSimpleOperation(MKLDNNGraph &graph) { + auto& graphNodes = graph.GetNodes(); + + auto isSutableParentNode = [](const MKLDNNNodePtr& node) { + return node->getType() == MatMul && node->getChildEdges().size() == 1; + }; + + auto parent = graphNodes.begin(); + while (parent != graphNodes.end()) { + auto parentNode = *parent; + if (!isSutableParentNode(parentNode)) { + parent++; + continue; + } + + auto childNode = parentNode->getChildEdgeAt(0)->getChild(); + if (!parentNode->canFuse(childNode)) { + parent++; + continue; + } + + childNode->fuseInto(parentNode); + + if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { + auto parentEdges = childNode->parentEdges; + for (auto &parentEdge : parentEdges) { + auto p_edge = parentEdge.lock(); + if (p_edge->getParent()->getType() == MatMul) + continue; + + graph.RemoveEdge(p_edge); + } + } + + graph.DropNode(childNode); + } +} + void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h index a28f7c73431790..e5615a1541b832 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h @@ -23,6 +23,7 @@ class MKLDNNGraphOptimizer { void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph); void FuseMultiplyAndAdd(MKLDNNGraph &graph); void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph); + void FuseMatMulAndSimpleOperation(MKLDNNGraph &graph); void FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph); void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph); void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index e756b4e209d04e..2d582b019755bb 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -70,6 +70,7 @@ #include #include +#include #include #include #include @@ -97,6 +98,7 @@ #include "nodes/mkldnn_fake_quantize_node.h" #include "nodes/mkldnn_normalize_node.h" #include "ngraph_transformations/convert_to_cpu_specific_opset.hpp" +#include "transformations/smart_reshape/smart_reshape.hpp" #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) # ifdef _WIN32 @@ -363,6 +365,10 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr {0, {ngraph::element::u8}}, {1, {ngraph::element::i8}}, }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), }); auto perTensorQuantization = std::vector({ diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/align_matmul_input_ranks.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/align_matmul_input_ranks.cpp new file mode 100644 index 00000000000000..e4f46f23eb3109 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/align_matmul_input_ranks.cpp @@ -0,0 +1,142 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "align_matmul_input_ranks.hpp" + +#include "ngraph/op/matmul.hpp" +#include +#include +#include +#include +#include + +#include + +NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::AlignMatMulInputRanks, "AlignMatMulInputRanks", 0); + +MKLDNNPlugin::AlignMatMulInputRanks::AlignMatMulInputRanks() { + ngraph::OutputVector twoInputs = { + ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), + ngraph::pattern::any_input(ngraph::pattern::has_static_rank()) + }; + + auto matmulPattern = ngraph::pattern::wrap_type(twoInputs); + + ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { + auto matmul = std::dynamic_pointer_cast (m.get_match_root()); + + if (!matmul || transformation_callback(matmul)) + return false; + + const auto& input0 = matmul->input_value(0); + const auto& input1 = matmul->input_value(1); + const auto& input0shape = input0.get_partial_shape(); + const auto& input1shape = input1.get_partial_shape(); + const auto& output_shape = matmul->get_output_partial_shape(0); + + assert(input0shape.rank().is_static()); + assert(input1shape.rank().is_static()); + + const bool transposedUnsqueeze = input1shape.size() == 1; + + if (input0shape.size() == input1shape.size() && + input0shape.size() != 1) + return false; // nothing to do + + auto getUnsqueeze = [&](const ngraph::Output& nodeFrom, const ngraph::Output& nodeTo) { + auto rankFrom = nodeFrom.get_partial_shape().size(); + auto rankTo = nodeTo.get_partial_shape().size(); + + std::vector unsqueeze_axes; + for (int64_t j = 0; j < rankTo - rankFrom; ++j) + unsqueeze_axes.push_back(j); + + if (transposedUnsqueeze) // special case for one-dimensional second input + unsqueeze_axes[unsqueeze_axes.size() - 1]++; + + auto unsqueeze = std::make_shared( + nodeFrom, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{unsqueeze_axes.size()}, unsqueeze_axes)); + + unsqueeze->set_friendly_name(nodeFrom.get_node()->get_friendly_name() + "/Unsqueeze"); + + return unsqueeze; + }; + + auto matmul_new_inputs = matmul->input_values(); + ngraph::NodeVector new_ops; + + if (input0shape.size() == 1 && input1shape.size() == 1) { + // If the input is 1D tensor, it is unsqueezed to 2D tensor (row vector) + // for the first input: by adding axes with size 1 at ROW_INDEX_DIM + // to the left of the shape {S} -> {1, S} + // for the second input: by adding axes with size 1 at COL_INDEX_DIM + // to the right of the shape {S} -> {S, 1} + const auto unsqueezeInput0 = std::make_shared( + input0, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0})); + const auto unsqueezeInput1 = std::make_shared( + input1, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1})); + + matmul_new_inputs[0] = unsqueezeInput0; + new_ops.push_back(unsqueezeInput0); + matmul_new_inputs[1] = unsqueezeInput1; + new_ops.push_back(unsqueezeInput1); + // For 1D inputs transpose flag is expected to always act like `false` + matmul->set_transpose_a(false); + matmul->set_transpose_b(false); + } else if (input0shape.size() < input1shape.size()) { + std::shared_ptr unsqueezeInput0 = getUnsqueeze(input0, input1); + matmul_new_inputs[0] = unsqueezeInput0; + new_ops.push_back(unsqueezeInput0); + + if (input0shape.size() == 1) + matmul->set_transpose_a(false); + } else if (input0shape.size() > input1shape.size()) { + std::shared_ptr unsqueezeInput1 = getUnsqueeze(input1, input0); + matmul_new_inputs[1] = unsqueezeInput1; + new_ops.push_back(unsqueezeInput1); + + if (input1shape.size() == 1) + matmul->set_transpose_b(false); + } + + std::shared_ptr matmul_new = matmul->clone_with_new_inputs(matmul_new_inputs); + new_ops.push_back(matmul_new); + + if (matmul_new->get_output_partial_shape(0) != output_shape) { + // When one of the inputs is one-dimensional tensor, ngraph shrinks the output node by 1 + // For example: C * AxBxCxD -> AxBxD (instead of AxBx1xD) + // Insert additional squeeze operation to preserve output shape + const auto new_out_shape_size = matmul_new->get_output_partial_shape(0).size(); + size_t squeeze_axis = 0; + if (input0shape.size() == 1) + squeeze_axis = new_out_shape_size - 2; + else if (input1shape.size() == 1) + squeeze_axis = new_out_shape_size - 1; + std::shared_ptr squeeze_output = std::make_shared( + matmul_new, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {squeeze_axis})); + + new_ops.push_back(squeeze_output); + matmul_new->set_friendly_name(matmul->get_friendly_name() + "/MM"); + // Set the name of the last node after transformation to initial node name + // (in case initial node was an output node) + squeeze_output->set_friendly_name(matmul->get_friendly_name()); + ngraph::copy_runtime_info(matmul, new_ops); + ngraph::replace_node(matmul, squeeze_output); + } else { + matmul_new->set_friendly_name(matmul->get_friendly_name()); + ngraph::copy_runtime_info(matmul, new_ops); + ngraph::replace_node(matmul, matmul_new); + } + + + return true; + }; + + auto m = std::make_shared(matmulPattern, "AlignMatMulInputRanks"); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/align_matmul_input_ranks.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/align_matmul_input_ranks.hpp new file mode 100644 index 00000000000000..8db3fee9debd2f --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/align_matmul_input_ranks.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +/* + * Description: + * AlignMatMulInputRanks transformation detects MatMul operations + * and unsqueezes one input to another to align the ranks of the inputs. + * The transformation is required because oneDNN library + * requires inputs to have equal ranks + */ + +namespace MKLDNNPlugin { + +class AlignMatMulInputRanks: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + AlignMatMulInputRanks(); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp similarity index 59% rename from inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp rename to inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp index b28d4f5aec5c8c..30cdb9063f90c6 100644 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "convert_matmul_to_fc_or_gemm.hpp" +#include "convert_matmul_to_fc.hpp" #include "op/fully_connected.hpp" #include #include @@ -151,101 +151,3 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() { auto m = std::make_shared(matmul, "ConvertMatMulToFC"); this->register_matcher(m, callback); } - -NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertMatMulToGemm, "ConvertMatMulToGemm", 0); - -MKLDNNPlugin::ConvertMatMulToGemm::ConvertMatMulToGemm() { - auto matmul = ngraph::pattern::wrap_type({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()), - ngraph::pattern::any_input(ngraph::pattern::has_static_shape())}, - ngraph::pattern::has_static_shape()); - - ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { - auto matmul = std::dynamic_pointer_cast(m.get_match_root()); - if (!matmul) { - return false; - } - - auto input_a = matmul->input(0).get_source_output(); - auto input_b = matmul->input(1).get_source_output(); - - auto shape_a = input_a.get_shape(); - auto shape_b = input_b.get_shape(); - auto output_shape = matmul->get_shape(); - - auto fc_input_a = input_a, fc_input_b = input_b; - ngraph::NodeVector new_ops; - - if (shape_a.size() == 1) { - // If the first input is 1D tensor, it is unsqueezed to 2D tensor (row vector) - // by adding axes with size 1 at ROW_INDEX_DIM, to the left of the shape. - // For example {S} will be reshaped to {1, S}. - fc_input_a = std::make_shared(fc_input_a, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0})); - shape_a = fc_input_a.get_shape(); - new_ops.push_back(fc_input_a.get_node_shared_ptr()); - // For 1D inputs transpose flag is expected to always act like `false` - matmul->set_transpose_a(false); - } - if (shape_b.size() == 1) { - // If the second input is 1D tensor, it is unsqueezed to 2D tensor (column vector) - // by adding axes with size 1 at COL_INDEX_DIM, to the right of the shape. - // For example {S} will be reshaped to {S, 1}. - fc_input_b = std::make_shared(fc_input_b, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1})); - shape_b = fc_input_b.get_shape(); - new_ops.push_back(fc_input_b.get_node_shared_ptr()); - // For 1D inputs transpose flag is expected to always act like `false` - matmul->set_transpose_b(false); - } - - // WA for IE that Gemm must have inputs with the same length. - // If ranks of input arguments are still different, - // the smaller tensor is unsqueezed from the left side of the shape - // by necessary number of axes to make both shapes of the same rank. - if (shape_a.size() < shape_b.size()) { - // Reshape first input (fc_input_a) - ngraph::Shape reshape_shape(shape_b.size() - shape_a.size(), 1); - reshape_shape.insert(reshape_shape.end(), shape_a.begin(), shape_a.end()); - fc_input_a = ngraph::op::util::reshapeTo(fc_input_a, reshape_shape); - new_ops.push_back(fc_input_a.get_node_shared_ptr()); - } else if (shape_b.size() < shape_a.size()) { - // Reshape second input (fc_input_b) - ngraph::Shape reshape_shape(shape_a.size() - shape_b.size(), 1); - reshape_shape.insert(reshape_shape.end(), shape_b.begin(), shape_b.end()); - fc_input_b = ngraph::op::util::reshapeTo(fc_input_b, reshape_shape); - new_ops.push_back(fc_input_b.get_node_shared_ptr()); - } - - auto gemm = matmul->copy_with_new_inputs({ fc_input_a, fc_input_b }); - new_ops.push_back(gemm); - - if (gemm->get_shape() != output_shape) { - // This case is possible when one of the inputs has exactly 1 dimension (that is not supported by GEMM operation) - // So to preserve output shape we insert additional reshape operation - std::shared_ptr reshape_output; - if (output_shape.size() == 0) { - std::vector dim_indices(gemm->get_shape().size()); - std::iota(dim_indices.begin(), dim_indices.end(), 0); - reshape_output = std::make_shared(gemm, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{dim_indices.size()}, dim_indices)); - } else { - reshape_output = ngraph::op::util::reshapeTo(gemm, output_shape); - } - - new_ops.push_back(reshape_output); - gemm->set_friendly_name(matmul->get_friendly_name() + "/gemm"); - reshape_output->set_friendly_name(matmul->get_friendly_name()); - ngraph::copy_runtime_info(matmul, new_ops); - ngraph::replace_node(matmul, reshape_output); - } else { - gemm->set_friendly_name(matmul->get_friendly_name()); - ngraph::copy_runtime_info(matmul, new_ops); - ngraph::replace_node(matmul, gemm); - } - - return true; - }; - - auto m = std::make_shared(matmul, "ConvertMatMulToGemm"); - this->register_matcher(m, callback); -} diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.hpp similarity index 71% rename from inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp rename to inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.hpp index 6f223eb8df9e1a..587ba08569b1b2 100644 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.hpp @@ -14,10 +14,4 @@ class ConvertMatMulToFC: public ngraph::pass::MatcherPass { ConvertMatMulToFC(); }; -class ConvertMatMulToGemm: public ngraph::pass::MatcherPass { -public: - NGRAPH_RTTI_DECLARATION; - ConvertMatMulToGemm(); -}; - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp index 9f73446f95a168..de6087cf91d1b7 100644 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp @@ -3,17 +3,22 @@ // #include -#include "convert_matmul_to_fc_or_gemm.hpp" #include "fc_bias_fusion.hpp" +#include "ngraph/op/fake_quantize.hpp" +#include "ngraph/pass/manager.hpp" +#include "reshape_1d_ops.hpp" #include "reshape_fc_fusion.hpp" #include "reshape_fully_connected.hpp" +#include "align_matmul_input_ranks.hpp" +#include "reshape_prelu.hpp" #include "convert_broadcast_to_tiles.hpp" #include "convert_tile_to_seq_tiles.hpp" -#include "reshape_1d_ops.hpp" +#include "convert_matmul_to_fc.hpp" #include "convert_to_power_static.hpp" #include "convert_to_leaky_relu.hpp" #include "convert_to_swish_cpu.hpp" -#include "reshape_prelu.hpp" +#include "transformations/convert_precision.hpp" +#include "transformations/utils/utils.hpp" #include "rnn_sequences_optimization.hpp" namespace MKLDNNPlugin { @@ -25,10 +30,10 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr &nGraphF manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); - manager.register_pass(); - manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -46,4 +51,4 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr &nGraphF manager.run_passes(nGraphFunc); } -} // namespace MKLDNNPlugin \ No newline at end of file +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp index 6bef76d3c7ec44..f8e79ed292ee50 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp @@ -51,7 +51,7 @@ MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr& op errorPrefix = "Broadcast node with name '" + op->get_friendly_name() + "'"; if (op->get_input_size() != 2 || op->get_output_size() != 1) - IE_THROW() << errorPrefix << " has incorrect number of input/output edges!"; + IE_THROW() << errorPrefix << " has incorrect number of input/output edges! " << op->get_input_size() << "->" << op->get_output_size(); SizeVector shape_dims = op->get_input_shape(BROADCAST_SHAPE); if (shape_dims.size() > 1) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp index 060624bdab071a..969cf85d130362 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp @@ -3,10 +3,6 @@ // #include "mkldnn_def_conv_node.h" -#include "mkldnn_reorder_node.h" -#include "mkldnn_input_node.h" - -#include "mkldnn_eltwise_node.h" #include #include #include @@ -31,6 +27,8 @@ template struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_def_conv_kernel_f32) + constexpr static int sampledPointsPerPixel = MKLDNNDeformableConvolutionNode::sampledPointsPerPixel; + explicit jit_uni_def_conv_kernel_f32(jit_def_conv_params jcp) : jit_uni_def_conv_kernel(jcp), jit_generator() {} void create_ker() override { @@ -42,13 +40,20 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ this->preamble(); mov(reg_input, ptr[this->param1 + GET_OFF(src)]); - mov(reg_def_off, ptr[this->param1 + GET_OFF(off)]); + mov(reg_sampled_wei, ptr[this->param1 + GET_OFF(sampledWei)]); + mov(reg_sampled_offs, ptr[this->param1 + GET_OFF(sampledCoords)]); + mov(reg_kernel, ptr[this->param1 + GET_OFF(filt)]); if (jcp_.with_bias) mov(reg_bias, ptr[this->param1 + GET_OFF(bias)]); mov(reg_output, ptr[this->param1 + GET_OFF(dst)]); - mov(reg_input_buffer, ptr[this->param1 + GET_OFF(buf)]); - mov(reg_oh_pos, ptr[param1 + GET_OFF(oh_pos)]); + mov(reg_input_buffer_temp, ptr[this->param1 + GET_OFF(buf)]); + mov(oh_pos_temp, ptr[param1 + GET_OFF(oh_pos)]); + + // need to save temporary to prevent using of %rdi during GET_OFF(...) + mov(reg_oh_pos, oh_pos_temp); + // prevents mismatching param1 == %rcx (on windows) and reg_input_buffer + mov(reg_input_buffer, reg_input_buffer_temp); ow_loop(); @@ -69,11 +74,11 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ using reg8_t = const Xbyak::Reg8; reg64_t reg_input = r8; - reg64_t reg_def_off = r9; + reg64_t reg_sampled_wei = r9; reg64_t reg_kernel = r10; reg64_t reg_bias = r11; reg64_t reg_output = r12; - reg64_t reg_oh_pos = r13; + reg64_t reg_oh_pos = rdi; reg64_t aux_reg_bias = rsi; reg64_t reg_ow_pos = rdx; reg64_t aux_reg_output = reg_ow_pos; @@ -82,16 +87,20 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ reg64_t aux2_reg_input = reg_kernel; reg64_t reg_ic_iter = rbx; reg64_t reg_oc_work = reg_ic_iter; - reg64_t aux_reg_def_off = reg_bias; - reg64_t reg_input_buffer = abi_not_param1; + reg64_t aux_reg_sampled_wei = reg_bias; + reg64_t reg_input_buffer = rcx; reg64_t aux_reg_input_buffer = r14; reg32_t reg_tmp_32 = r15d; reg64_t reg_tmp_64 = r15; reg64_t reg_table = rbp; reg64_t aux_reg_kernel = reg_table; reg64_t aux2_reg_kernel = r15; + reg64_t oh_pos_temp = aux2_reg_kernel; reg64_t aux2_reg_input_buffer = aux_reg_bias; + reg64_t reg_sampled_offs = aux2_reg_input_buffer; reg64_t aux3_reg_input_buffer = reg_input; + reg64_t aux_reg_sampled_offs = r13; + reg64_t reg_input_buffer_temp = aux_reg_sampled_offs; Xbyak::Opmask ktail_mask = Xbyak::Opmask(2); @@ -117,9 +126,10 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ jg(ow_tail, T_NEAR); oc_loop(jcp_.ur_w); - add(reg_input, jcp_.ur_w * jcp_.stride_w * jcp_.ic * jcp_.typesize_in); - add(reg_def_off, jcp_.ur_w * jcp_.typesize_off); + add(reg_sampled_wei, jcp_.ur_w * jcp_.kh * jcp_.kw * sampledPointsPerPixel * jcp_.typesize_sampled_wei); // type = float + add(reg_sampled_offs, jcp_.ur_w * jcp_.kh * jcp_.kw * sampledPointsPerPixel * jcp_.typesize_sampled_offsets); // type = int + add(reg_output, jcp_.ur_w * jcp_.oc * jcp_.typesize_out); add(reg_ow_pos, jcp_.ur_w); @@ -189,7 +199,6 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ if (isa == cpu::x64::sse41 && ow > 0) { uni_vmovups(vmm_ker, ptr[aux2_reg_kernel + ker_off * jcp_.typesize_in]); } - uni_vfmadd231ps(vmm_acc, vmm_ker, vmm_src); } } @@ -205,7 +214,6 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ for (int ocb = 0; ocb < oc_blocks_step; ocb++) { for (int ow = 0; ow < ow_step; ow++) { Vmm vmm_acc = get_vmm_acc(r * jcp_.ur_w * jcp_.nb_oc_blocking + ocb * ow_step + ow); - uni_vpxor(vmm_acc, vmm_acc, vmm_acc); } } @@ -219,10 +227,10 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ push(reg_oc_work); push(aux_reg_bias); + push(reg_sampled_offs); mov(aux2_reg_kernel, aux_reg_kernel); mov(aux2_reg_input_buffer, reg_input_buffer); - mov(reg_ic_iter, jcp_.ic); init_accums(ow_step, oc_blocks_step, oc_step); @@ -232,7 +240,6 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ jl(ic_tail, T_NEAR); apply_filter(ow_step, oc_blocks_step, oc_step, jcp_.ic_block); - add(aux2_reg_input_buffer, jcp_.ic_block * jcp_.typesize_in); add(aux2_reg_kernel, jcp_.kh * jcp_.kw * jcp_.ic_block * jcp_.oc_block * jcp_.typesize_in); sub(reg_ic_iter, jcp_.ic_block); @@ -245,17 +252,20 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ } } + pop(reg_sampled_offs); pop(aux_reg_bias); pop(reg_oc_work); } - void interpolate_input(int ow_step) { + void prepare_buffer(int ow_step) { Label dg_loop; Label dg_loop_end; mov(reg_table, l_table); - mov(aux_reg_def_off, reg_def_off); + mov(aux_reg_sampled_wei, reg_sampled_wei); + mov(aux_reg_sampled_offs, reg_sampled_offs); mov(aux_reg_input, reg_input); + push(reg_sampled_offs); mov(aux2_reg_input_buffer, aux_reg_input_buffer); xor_(reg_dg_iter, reg_dg_iter); @@ -267,15 +277,9 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ for (int ow = 0; ow < ow_step; ow++) { for (int kh = 0; kh < jcp_.kh; kh++) { for (int kw = 0; kw < jcp_.kw; kw++) { - Label init_with_zeros; Label ic_loop_main; Label ic_loop_tail; - Label ic_loop_zeros; Label loop_end; - Label h_sec_opt; - Label h_sec_opt_exit; - Label w_sec_opt; - Label w_sec_opt_exit; mov(aux2_reg_input, aux_reg_input); add(aux2_reg_input, (ow * jcp_.stride_w * jcp_.ic) * jcp_.typesize_in); @@ -283,200 +287,51 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ mov(aux3_reg_input_buffer, aux2_reg_input_buffer); add(aux3_reg_input_buffer, (ow * jcp_.kh * jcp_.kw * jcp_.ic) * jcp_.typesize_in); - Xmm xmm_tmp = Xmm(0); - - Xmm xmm_map_h = Xmm(2); - Xmm xmm_ih_in = Xmm(4); - Xmm xmm_ih_im = Xmm(1); - Xmm xmm_cur_height = xmm_ih_im; - Xmm xmm_h_low = xmm_ih_in; - Xmm xmm_h_high = xmm_cur_height; - Xmm xmm_lh = xmm_map_h; - Xmm xmm_hh = Xmm(3); - - Xmm xmm_map_w = Xmm(6); - Xmm xmm_iw_in = Xmm(8); - Xmm xmm_iw_im = Xmm(5); - Xmm xmm_cur_width = xmm_iw_im; - Xmm xmm_w_low = xmm_iw_in; - Xmm xmm_w_high = xmm_cur_width; - Xmm xmm_lw = xmm_map_w; - Xmm xmm_hw = Xmm(7); - Xmm xmm_v1_off = Xmm(9); Xmm xmm_v2_off = Xmm(10); Xmm xmm_v3_off = Xmm(11); Xmm xmm_v4_off = Xmm(12); - Xmm xmm_w1 = xmm_h_low; - Xmm xmm_w2 = xmm_h_high; - Xmm xmm_w3 = xmm_w_low; - Xmm xmm_w4 = xmm_w_high; - - Xmm xmm_v1 = xmm_lh; - Xmm xmm_v2 = xmm_hh; - Xmm xmm_v3 = xmm_lw; - Xmm xmm_v4 = xmm_hw; - - Vmm vmm_w1 = Vmm(xmm_h_low.getIdx()); - Vmm vmm_w2 = Vmm(xmm_h_high.getIdx()); - Vmm vmm_w3 = Vmm(xmm_w_low.getIdx()); - Vmm vmm_w4 = Vmm(xmm_w_high.getIdx()); - - Vmm vmm_v1 = Vmm(xmm_lh.getIdx()); - Vmm vmm_v2 = Vmm(xmm_hh.getIdx()); - Vmm vmm_v3 = Vmm(xmm_lw.getIdx()); - Vmm vmm_v4 = Vmm(xmm_hw.getIdx()); - - size_t def_off_h = ((2 * (kh * jcp_.kw + kw) + 0) * jcp_.oh * jcp_.ow) + ow; - mov(reg_tmp_32, ptr[aux_reg_def_off + def_off_h * jcp_.typesize_off]); - movq(xmm_tmp, reg_tmp_64); - mov(reg_tmp_32, cpu::x64::float2int(static_cast((kh * (jcp_.dilate_h + 1))))); - movq(xmm_map_h, reg_tmp_64); - addss(xmm_map_h, xmm_tmp); - - mov(reg_tmp_32, jcp_.stride_h); - imul(reg_tmp_32, reg_oh_pos); - sub(reg_tmp_32, jcp_.t_pad); - movq(xmm_ih_in, reg_tmp_64); - - cvtsi2ss(xmm_ih_im, reg_tmp_32); - addss(xmm_ih_im, xmm_map_h); - - movss(xmm_tmp, xmm_ih_im); - cmpss(xmm_tmp, table_val(0), 1); - movq(reg_tmp_64, xmm_tmp); - cmp(reg_tmp_32, 0); - jne(init_with_zeros, T_NEAR); - - cmpss(xmm_ih_im, table_val(1), 1); - movq(reg_tmp_64, xmm_ih_im); - cmp(reg_tmp_32, 0); - je(init_with_zeros, T_NEAR); - - - size_t def_off_w = ((2 * (kh * jcp_.kw + kw) + 1) * jcp_.oh * jcp_.ow) + ow; - mov(reg_tmp_32, ptr[aux_reg_def_off + def_off_w * jcp_.typesize_off]); - movq(xmm_tmp, reg_tmp_64); - mov(reg_tmp_32, cpu::x64::float2int(static_cast((kw * (jcp_.dilate_w + 1))))); - movq(xmm_map_w, reg_tmp_64); - addss(xmm_map_w, xmm_tmp); - - mov(reg_tmp_32, jcp_.stride_w); - imul(reg_tmp_32, reg_ow_pos); - sub(reg_tmp_32, jcp_.l_pad - ow * jcp_.stride_w); - movq(xmm_iw_in, reg_tmp_64); - - cvtsi2ss(xmm_iw_im, reg_tmp_32); - addss(xmm_iw_im, xmm_map_w); - - movss(xmm_tmp, xmm_iw_im); - cmpss(xmm_tmp, table_val(0), 1); - movq(reg_tmp_64, xmm_tmp); - cmp(reg_tmp_32, 0); - jne(init_with_zeros, T_NEAR); - - cmpss(xmm_iw_im, table_val(2), 1); - movq(reg_tmp_64, xmm_iw_im); - cmp(reg_tmp_32, 0); - je(init_with_zeros, T_NEAR); - - - movd(xmm_cur_height, table_val(3)); - psubd(xmm_cur_height, xmm_ih_in); - - roundps(xmm_h_low, xmm_map_h, 1); - cvtps2dq(xmm_h_low, xmm_h_low); - - movups(xmm_tmp, xmm_cur_height); - pcmpgtd(xmm_tmp, xmm_h_low); - - movq(reg_tmp_64, xmm_tmp); - cmp(reg_tmp_32, 0); - jne(h_sec_opt, T_NEAR); - - movups(xmm_h_low, xmm_cur_height); - movups(xmm_h_high, xmm_h_low); - jmp(h_sec_opt_exit); - - L(h_sec_opt); - - movups(xmm_h_high, xmm_h_low); - paddd(xmm_h_high, table_val(5)); - - L(h_sec_opt_exit); - - cvtdq2ps(xmm_tmp, xmm_h_low); - subss(xmm_lh, xmm_tmp); - movss(xmm_hh, table_val(5)); - cvtdq2ps(xmm_hh, xmm_hh); - subss(xmm_hh, xmm_lh); - - - movd(xmm_cur_width, table_val(4)); - psubd(xmm_cur_width, xmm_iw_in); - - roundps(xmm_w_low, xmm_map_w, 1); - cvtps2dq(xmm_w_low, xmm_w_low); - - movups(xmm_tmp, xmm_cur_width); - pcmpgtd(xmm_tmp, xmm_w_low); - - movq(reg_tmp_64, xmm_tmp); - cmp(reg_tmp_32, 0); - jne(w_sec_opt, T_NEAR); - - movups(xmm_w_low, xmm_cur_width); - movups(xmm_w_high, xmm_w_low); - jmp(w_sec_opt_exit); - - L(w_sec_opt); - - movups(xmm_w_high, xmm_w_low); - paddd(xmm_w_high, table_val(5)); - - L(w_sec_opt_exit); - - cvtdq2ps(xmm_tmp, xmm_w_low); - subss(xmm_lw, xmm_tmp); - movss(xmm_hw, table_val(5)); - cvtdq2ps(xmm_hw, xmm_hw); - subss(xmm_hw, xmm_lw); - - - movups(xmm_v1_off, table_val(2)); - cvtps2dq(xmm_v1_off, xmm_v1_off); - movups(xmm_v3_off, xmm_v1_off); - - pmulld(xmm_v1_off, xmm_h_low); - movups(xmm_v2_off, xmm_v1_off); - paddd(xmm_v1_off, xmm_w_low); - paddd(xmm_v2_off, xmm_w_high); - - pmulld(xmm_v3_off, xmm_h_high); - movups(xmm_v4_off, xmm_v3_off); - paddd(xmm_v3_off, xmm_w_low); - paddd(xmm_v4_off, xmm_w_high); - - - movss(xmm_w1, xmm_hh); - mulss(xmm_w1, xmm_hw); - uni_vbroadcastss(vmm_w1, xmm_w1); - - movss(xmm_w2, xmm_hh); - mulss(xmm_w2, xmm_lw); - uni_vbroadcastss(vmm_w2, xmm_w2); - - movss(xmm_w3, xmm_lh); - mulss(xmm_w3, xmm_hw); - uni_vbroadcastss(vmm_w3, xmm_w3); - - movss(xmm_w4, xmm_lh); - mulss(xmm_w4, xmm_lw); - uni_vbroadcastss(vmm_w4, xmm_w4); + Xmm xmm_w1 = Xmm(4); + Xmm xmm_w2 = Xmm(1); + Xmm xmm_w3 = Xmm(8); + Xmm xmm_w4 = Xmm(5); + + Xmm xmm_v1 = Xmm(2); + Xmm xmm_v2 = Xmm(3);; + Xmm xmm_v3 = Xmm(6); + Xmm xmm_v4 = Xmm(7); + + Vmm vmm_w1 = Vmm(xmm_w1.getIdx()); + Vmm vmm_w2 = Vmm(xmm_w2.getIdx()); + Vmm vmm_w3 = Vmm(xmm_w3.getIdx()); + Vmm vmm_w4 = Vmm(xmm_w4.getIdx()); + + Vmm vmm_v1 = Vmm(xmm_v1.getIdx()); + Vmm vmm_v2 = Vmm(xmm_v2.getIdx()); + Vmm vmm_v3 = Vmm(xmm_v3.getIdx()); + Vmm vmm_v4 = Vmm(xmm_v4.getIdx()); + + // offsets computation + size_t ind_off_hh = sampledPointsPerPixel * (((size_t) kh * jcp_.kw + kw) + ow * (jcp_.kh * jcp_.kw)); + size_t ind_off_hl = ind_off_hh + 1; + size_t ind_off_lh = ind_off_hl + 1; + size_t ind_off_ll = ind_off_lh + 1; + + movq(xmm_v1_off, qword[aux_reg_sampled_offs + ind_off_ll * jcp_.typesize_sampled_offsets]); + movq(xmm_v2_off, qword[aux_reg_sampled_offs + ind_off_hl * jcp_.typesize_sampled_offsets]); + movq(xmm_v3_off, qword[aux_reg_sampled_offs + ind_off_lh * jcp_.typesize_sampled_offsets]); + movq(xmm_v4_off, qword[aux_reg_sampled_offs + ind_off_hh * jcp_.typesize_sampled_offsets]); + + // w's computation + uni_vbroadcastss(vmm_w1, dword[aux_reg_sampled_wei + ind_off_ll * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(vmm_w2, dword[aux_reg_sampled_wei + ind_off_hl * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(vmm_w3, dword[aux_reg_sampled_wei + ind_off_lh * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(vmm_w4, dword[aux_reg_sampled_wei + ind_off_hh * jcp_.typesize_sampled_wei]); int simd_w = vlen / jcp_.typesize_in; mov(reg_ic_iter, ic_per_def_group); + L(ic_loop_main); { cmp(reg_ic_iter, simd_w); @@ -529,7 +384,6 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ jl(loop_end, T_NEAR); size_t input_buffer_off = (size_t) kh * jcp_.kw * jcp_.ic + kw * jcp_.ic; - pmovsxdq(xmm_v1_off, xmm_v1_off); movq(reg_tmp_64, xmm_v1_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); @@ -568,32 +422,14 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ sub(reg_ic_iter, 1); jmp(ic_loop_tail, T_NEAR); } - jmp(loop_end, T_NEAR); - - L(init_with_zeros); - - mov(reg_ic_iter, 0); - L(ic_loop_zeros); - { - cmp(reg_ic_iter, ic_per_def_group); - je(loop_end, T_NEAR); - - size_t input_buffer_off = (size_t) kh * jcp_.kw * jcp_.ic + kw * jcp_.ic; - - pxor(xmm_tmp, xmm_tmp); - movss(ptr[aux3_reg_input_buffer + input_buffer_off * jcp_.typesize_in], xmm_tmp); - add(aux3_reg_input_buffer, jcp_.typesize_in); - inc(reg_ic_iter); - jmp(ic_loop_zeros, T_NEAR); - } - L(loop_end); } } } - add(aux_reg_def_off, 2 * jcp_.kh * jcp_.kw * jcp_.oh * jcp_.ow * jcp_.typesize_off); + add(aux_reg_sampled_wei, sampledPointsPerPixel * jcp_.kh * jcp_.kw * jcp_.oh * jcp_.ow * jcp_.typesize_sampled_wei); + add(aux_reg_sampled_offs, sampledPointsPerPixel * jcp_.kh * jcp_.kw * jcp_.oh * jcp_.ow * jcp_.typesize_sampled_offsets); add(aux_reg_input, ic_per_def_group * jcp_.typesize_in); add(aux2_reg_input_buffer, ic_per_def_group * jcp_.typesize_in); inc(reg_dg_iter); @@ -601,6 +437,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ } L(dg_loop_end); + pop(reg_sampled_offs); } void store_output(int ow_step, int oc_blocks_step, int oc_step) { @@ -614,7 +451,6 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ for (int ow = 0; ow < ow_step; ow++) { Vmm vmm_acc = get_vmm_acc(r * jcp_.ur_w * jcp_.nb_oc_blocking + ocb * ow_step + ow); - uni_vaddps(vmm_acc, vmm_acc, Vmm(0)); } } @@ -637,12 +473,10 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ if (isa == avx512_common) { size_t out_off = (size_t) ow * jcp_.oc; - uni_vmovups(ptr[aux_reg_output + out_off * jcp_.typesize_out], vmm_dst | ktail_mask); } else { for (int oc = 0; oc < tail_size; oc++) { size_t out_off = (size_t) ow * jcp_.oc + oc + r * (jcp_.oc_block / 2); - movq(reg_tmp_64, xmm_dst); mov(ptr[aux_reg_output + out_off * jcp_.typesize_out], reg_tmp_32); @@ -663,8 +497,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ for (int ocb = 0; ocb < oc_blocks_step; ocb++) { for (int ow = 0; ow < ow_step; ow++) { Vmm vmm_acc = get_vmm_acc(r * jcp_.ur_w * jcp_.nb_oc_blocking + ocb * ow_step + ow); - size_t out_off = (size_t) ow * jcp_.oc + ocb * jcp_.oc_block + r * (jcp_.oc_block / 2); - + size_t out_off = (size_t) ow * jcp_.oc * jcp_.ngroups + ocb * jcp_.oc_block + r * (jcp_.oc_block / 2); uni_vmovups(ptr[aux_reg_output + out_off * jcp_.typesize_out], vmm_acc); } } @@ -684,19 +517,20 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ push(reg_input); push(reg_kernel); - interpolate_input(ow_step); + prepare_buffer(ow_step); pop(reg_kernel); pop(reg_input); pop(reg_bias); pop(reg_output); + push(reg_sampled_offs); push(reg_ow_pos); + push(aux2_reg_kernel); mov(aux_reg_kernel, reg_kernel); mov(aux_reg_output, reg_output); mov(aux_reg_bias, reg_bias); - mov(reg_oc_work, jcp_.oc); L(oc_unrolled_loop); { @@ -736,7 +570,9 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ } } + pop(aux2_reg_kernel); pop(reg_ow_pos); + pop(reg_sampled_offs); } }; @@ -759,8 +595,7 @@ bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr } MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shared_ptr& op, - const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { + const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -793,29 +628,23 @@ MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shar } else { with_bilinear_pad = false; } - enforceRef = (op->get_type_info() == ngraph::op::v8::DeformableConvolution::type_info); } void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() { std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' "; - if (getParentEdges().size() != 3 && getParentEdges().size() != 4) IE_THROW() << errorPrefix << "has incorrect number of input edges"; if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges"; - if (getInputShapeAtPort(0).getRank() != 4) { IE_THROW() << "Deformable convolution layer. Unsupported mode. Only 4D blobs are supported as input."; } - if (getInputShapeAtPort(1).getRank() != 4) { IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getInputShapeAtPort(1).getRank(); } - if (getInputShapeAtPort(2).getRank() != 4) { IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getInputShapeAtPort(2).getRank(); } - if (getOutputShapeAtPort(0).getRank() != 4) { IE_THROW() << errorPrefix << "doesn't support output with rank: " << getOutputShapeAtPort(0).getRank(); } @@ -845,6 +674,12 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { config.outConfs[0].inPlace = -1; impl_desc_type impl_type; + const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8; + if (group != 1 || (((getInputShapeAtPort(0).getStaticDims()[1] / group) % simd_w != 0) + || ((getOutputShapeAtPort(0).getStaticDims()[1] / group) % simd_w != 0))) { + enforceRef = true; + } + if (enforceRef) { impl_type = impl_desc_type::ref; } else if (mayiuse(cpu::x64::avx512_common)) { @@ -861,14 +696,11 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { // optimized implementation auto dataFormat = memory::format_tag::nhwc; auto offFormat = memory::format_tag::nchw; - auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o - : mayiuse(avx512_common) ? memory::format_tag::OIhw16i16o : memory::format_tag::OIhw8i8o; - + auto weiFormat = mayiuse(avx512_common) ? memory::format_tag::OIhw16i16o : memory::format_tag::OIhw8i8o; config.inConfs[0].desc = std::make_shared(getInputShapeAtPort(0), memory::data_type::f32, dataFormat); config.inConfs[1].desc = std::make_shared(getInputShapeAtPort(1), memory::data_type::f32, offFormat); - auto& wDims = getInputShapeAtPort(2).getStaticDims(); if (group > 1 && wDims.size() != 5) { auto new_dims = InferenceEngine::SizeVector({group, div_up(wDims[0], group)}); @@ -882,7 +714,6 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { memory::data_type::f32, weiFormat); } - if (inputsNumber > 3) { config.inConfs[3].desc = std::make_shared(getInputShapeAtPort(3), memory::data_type::f32, memory::format_tag::nchw); @@ -908,6 +739,133 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { } } +void MKLDNNDeformableConvolutionNode::prepareSamplingWeights( + const std::vector& src_strides, const float* offsets, const std::vector& off_strides, + const float* modulation, const std::vector& modulation_strides) { + const int MB = jcp.mb; + const int OH = jcp.oh; + const int OW = jcp.ow; + + const int KH = jcp.kh; + const int KW = jcp.kw; + const int ker_size = KH * KW; + + const int DG = jcp.dg; + + const int IH = jcp.ih; + const int IW = jcp.iw; + + const int KSH = jcp.stride_h; + const int KSW = jcp.stride_w; + + const int KDH = jcp.dilate_h; + const int KDW = jcp.dilate_w; + + const int padT = jcp.t_pad; + const int padL = jcp.l_pad; + + const bool with_bi_pad = jcp.with_bi_pad; + + // prepare weights and indices + sampledCoordsVector.resize(MB * DG * KH * KW * OH * OW * sampledPointsPerPixel); + interpWeightsVector.resize(MB * DG * KH * KW * OH * OW * sampledPointsPerPixel); + auto precompKer = [&](int mb, int dg, int oh, int ow) { + int sampledCoordIndex = (mb * DG * OH * OW + dg * OH * OW + oh * OW + ow) * KH * KW * sampledPointsPerPixel; + const int h_in = oh * KSH - padT; + const int w_in = ow * KSW - padL; + + const int waOffsetH = (enforceRef ? 0 : h_in); + const int waOffsetW = (enforceRef ? 0 : w_in); + + const float *data_offset_ptr = offsets + mb * off_strides[0] + (dg * 2 * KH * KW) * off_strides[1]; + const float *modulation_offset_ptr = nullptr; + if (modulation != nullptr) { + modulation_offset_ptr = modulation + mb * modulation_strides[0] + (dg * ker_size) * modulation_strides[1]; + } + + for (int kh = 0; kh < KH; kh++) { + for (int kw = 0; kw < KW; kw++) { + const size_t data_offset_h_index = 2 * ((size_t) kh * KW + kw) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3]; + const size_t data_offset_w_index = (2 * ((size_t) kh * KW + kw) + 1) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3]; + const float offset_h = data_offset_ptr[data_offset_h_index]; + const float offset_w = data_offset_ptr[data_offset_w_index]; + float map_h = h_in + kh * (KDH + 1) + offset_h; + float map_w = w_in + kw * (KDW + 1) + offset_w; + bool skip_compute; + if (with_bilinear_pad) { + skip_compute = !(static_cast(map_w) > -1 && + static_cast(map_w) < IW && + static_cast(map_h) > -1 && + static_cast(map_h) < IH); + } else { + skip_compute = !(map_w >= 0 && map_w < IW && + map_h >= 0 && map_h < IH); + } + if (!skip_compute) { + // modulations precomp. + float modulation_scalar = 1.0f; + + if (modulation_offset_ptr != nullptr) { + size_t modulation_index = (kh * KW + kw) * modulation_strides[1] + oh * modulation_strides[2] + ow * modulation_strides[3]; + modulation_scalar = modulation_offset_ptr[modulation_index]; + } + // interpolation precomp. + const int cur_h_end = IH; + const int cur_w_end = IW; + int h_low = with_bi_pad ? static_cast(floorf(map_h)) : + std::max(static_cast(floorf(map_h)), 0); + int w_low = with_bi_pad ? static_cast(floorf(map_w)) : + std::max(static_cast(floorf(map_w)), 0); + int h_high = with_bi_pad ? h_low + 1 : std::min(static_cast(ceilf(map_h)), cur_h_end - 1); + int w_high = with_bi_pad ? w_low + 1 : std::min(static_cast(ceilf(map_w)), cur_w_end - 1); + + float lh = map_h - h_low; + float lw = map_w - w_low; + float hh = 1 - lh, hw = 1 - lw; + + int h_ind_low = std::max(h_low, 0) - waOffsetH; + int h_ind_high = std::min(h_high, cur_h_end - 1) - waOffsetH; + int w_ind_low = std::max(w_low, 0) - waOffsetW; + int w_ind_high = std::min(w_high, cur_w_end - 1) - waOffsetW; + + hh = (h_low >= 0 ? hh : 0); + hw = (w_low >= 0 ? hw : 0); + lh = (h_high < cur_h_end ? lh : 0); + lw = (w_high < cur_w_end ? lw : 0); + + const int h_off_low = h_ind_low * src_strides[2] / src_strides[3]; + const int h_off_high = h_ind_high * src_strides[2] / src_strides[3]; + const int w_off_low = w_ind_low; + const int w_off_high = w_ind_high; + sampledCoordsVector[sampledCoordIndex] = h_off_high + w_off_high; + sampledCoordsVector[sampledCoordIndex + 1] = h_off_high + w_off_low; + sampledCoordsVector[sampledCoordIndex + 2] = h_off_low + w_off_high; + sampledCoordsVector[sampledCoordIndex + 3] = h_off_low + w_off_low; + + float w22 = hh * hw * modulation_scalar, w21 = hh * lw * modulation_scalar, + w12 = lh * hw * modulation_scalar, w11 = lh * lw * modulation_scalar; + + interpWeightsVector[sampledCoordIndex] = w11; + interpWeightsVector[sampledCoordIndex + 1] = w12; + interpWeightsVector[sampledCoordIndex + 2] = w21; + interpWeightsVector[sampledCoordIndex + 3] = w22; + } else { + sampledCoordsVector[sampledCoordIndex] = 0; + interpWeightsVector[sampledCoordIndex] = 0; + interpWeightsVector[sampledCoordIndex + 1] = 0; + interpWeightsVector[sampledCoordIndex + 2] = 0; + interpWeightsVector[sampledCoordIndex + 3] = 0; + } + sampledCoordIndex += sampledPointsPerPixel; + } + } + }; + + parallel_nd(MB, DG, OH, OW, [&](int mb, int dg, int oh, int ow) { + precompKer(mb, dg, oh, ow); + }); +} + void MKLDNNDeformableConvolutionNode::createPrimitive() { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) @@ -958,8 +916,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { jcp.typesize_in = sizeof(float); jcp.typesize_off = sizeof(float); + jcp.typesize_sampled_wei = sizeof(float); + jcp.typesize_sampled_offsets = sizeof(int); jcp.typesize_out = sizeof(float); - jcp.typesize_modulation = sizeof(float); jcp.ur_w = mayiuse(cpu::x64::avx512_common) ? 6 : 3; jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4; @@ -980,126 +939,68 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { def_conv_kernel->create_ker(); } -void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const float* offsets, const float* weights, float* dst, - const std::vector& src_strides, const std::vector& off_strides, - const std::vector& wei_strides, const std::vector& dst_strides, - const float* modulation, const std::vector& modulation_strides) { - const bool with_groups = jcp.ngroups > 1; +void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const float* weights, float* dst, const std::vector& src_strides, + const std::vector& wei_strides, const std::vector& dst_strides) { const int G = jcp.ngroups; const int MB = jcp.mb; const int OH = jcp.oh; const int OW = jcp.ow; - const int IH = jcp.ih; - const int IW = jcp.iw; const int OC = jcp.oc; const int IC = jcp.ic; const int KH = jcp.kh; const int KW = jcp.kw; - - const int KSH = jcp.stride_h; - const int KSW = jcp.stride_w; - - const int KDH = jcp.dilate_h; - const int KDW = jcp.dilate_w; - - const int padT = jcp.t_pad; - const int padL = jcp.l_pad; + const int ker_size = KH * KW; const int DG = jcp.dg; - const int channel_per_deformable_group = (IC * G) / DG; + const int DGHW = DG * OH * OW; + const int HW = OH * OW; - const bool with_bi_pad = jcp.with_bi_pad; - auto ker = [=](int g, int mb, int oc, int oh, int ow) { + const int channel_per_deformable_group = (IC * G) / DG; + const size_t group_wei_stride = wei_strides[0] * OC; + auto compKer = [=](int g, int mb, int oc, int oh, int ow) { float d = 0; - const int h_in = oh * KSH - padT; - const int w_in = ow * KSW - padL; - for (int ic = 0; ic < IC; ic++) { const float *data_im_ptr = src + mb * src_strides[0] + (g * IC + ic) * src_strides[1]; const int deformable_group_index = (IC * g + ic) / channel_per_deformable_group; - const float *data_offset_ptr = offsets + mb * off_strides[0] + (deformable_group_index * 2 * KH * KW) * off_strides[1]; - const float *modulation_offset_ptr = nullptr; - if (modulation != nullptr) { - modulation_offset_ptr = modulation + mb * modulation_strides[0] + (deformable_group_index * KH * KW) * modulation_strides[1]; - } - - for (int kh = 0; kh < KH; kh++) { - for (int kw = 0; kw < KW; kw++) { - const size_t data_offset_h_index = 2 * (kh * KW + kw) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3]; - const size_t data_offset_w_index = (2 * (kh * KW + kw) + 1) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3]; - const float offset_h = data_offset_ptr[data_offset_h_index]; - const float offset_w = data_offset_ptr[data_offset_w_index]; - float map_h = h_in + kh * (KDH + 1) + offset_h; - float map_w = w_in + kw * (KDW + 1) + offset_w; - bool skip_compute; - if (with_bilinear_pad) { - skip_compute = !(static_cast(map_w) > -1 && - static_cast(map_w) < IW && - static_cast(map_h) > -1 && - static_cast(map_h) < IH); + int sampledCoordIndex = (mb * DGHW + deformable_group_index * HW + oh * OW + ow) * ker_size * sampledPointsPerPixel; + size_t weiIndex = (size_t) g * group_wei_stride + oc * wei_strides[0] + ic * wei_strides[1]; + for (int kh_off = 0; kh_off < KH * wei_strides[2]; kh_off += wei_strides[2]) { + for (int kw_off = 0; kw_off < KW * wei_strides[3]; kw_off += wei_strides[3]) { + // check if current addendum marked as equal zero + if (sampledCoordsVector[sampledCoordIndex] != -1) { + const int v11 = sampledCoordsVector[sampledCoordIndex]; + const int v12 = sampledCoordsVector[sampledCoordIndex + 1]; + const int v21 = sampledCoordsVector[sampledCoordIndex + 2]; + const int v22 = sampledCoordsVector[sampledCoordIndex + 3]; + float val = interpWeightsVector[sampledCoordIndex++] * data_im_ptr[v11]; // v11 + val += interpWeightsVector[sampledCoordIndex++] * data_im_ptr[v12]; // v12 + val += interpWeightsVector[sampledCoordIndex++] * data_im_ptr[v21]; // v21 + val += interpWeightsVector[sampledCoordIndex++] * data_im_ptr[v22]; // v22 + d += val * weights[weiIndex + kh_off + kw_off]; } else { - skip_compute = !(map_w >= 0 && - map_w < IW && - map_h >= 0 && - map_h < IH); - } - if (!skip_compute) { - const int cur_h_end = IH; - const int cur_w_end = IW; - int h_low = with_bi_pad ? static_cast(floorf(map_h)) : - std::max(static_cast(floorf(map_h)), 0); - int w_low = with_bi_pad ? static_cast(floorf(map_w)) : - std::max(static_cast(floorf(map_w)), 0); - const int cur_h_start = h_low; - const int cur_w_start = w_low; - int h_high = with_bi_pad ? h_low + 1 : std::min(static_cast(ceilf(map_h)), cur_h_end - 1); - int w_high = with_bi_pad ? w_low + 1 : std::min(static_cast(ceilf(map_w)), cur_w_end - 1); - - float lh = map_h - h_low; - float lw = map_w - w_low; - float hh = 1 - lh, hw = 1 - lw; - - float v1 = (cur_w_start >= 0 && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]] : 0.0f; - float v2 = (w_high < cur_w_end && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]] : 0.0f; - float v3 = (cur_w_start >= 0 && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]] : 0.0f; - float v4 = (w_high < cur_w_end && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]] : 0.0f; - float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - - float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - float modulation_scalar = 1.0f; - - if (modulation_offset_ptr != nullptr) { - size_t modulation_index = (kh * KW + kw) * modulation_strides[1] + oh * modulation_strides[2] + ow * modulation_strides[3]; - modulation_scalar = modulation_offset_ptr[modulation_index]; - } - - const float weight = with_groups ? weights[(g + oc / G) * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + - kw * wei_strides[3]] - : weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]]; - d += val * weight * modulation_scalar; + sampledCoordIndex += sampledPointsPerPixel; } } } } - return d; }; parallel_nd(G, MB, OC, OH, OW, - [&](int g, int mb, int oc, int oh, int ow) { - dst[mb * dst_strides[0] + (g * OC + oc) * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]] = ker(g, mb, oc, oh, ow); - }); + [&](int g, int mb, int oc, int oh, int ow) { + dst[mb * dst_strides[0] + (g * OC + oc) * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]] = compKer(g, mb, oc, oh, ow); + }); } -void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const float* offsets, const float* weights, float* dst, - const std::vector& src_strides, const std::vector& off_strides, + +void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const float* weights, float* dst, + const std::vector& src_strides, const std::vector& dst_strides) { size_t buffer_size = (size_t)jcp.nthr * jcp.ur_w * jcp.kh * jcp.kw * jcp.ic * jcp.typesize_in; std::vector input_buffer(buffer_size, 0); - float* input_buffer_ptr = &input_buffer[0]; + float* input_buffer_ptr = input_buffer.data(); parallel_for3d(jcp.mb, jcp.ngroups, jcp.oh, [&](size_t n, size_t g, size_t oh) { auto ithr = parallel_get_thread_num(); @@ -1111,10 +1012,10 @@ void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const f par_conv.src = &src[n * src_strides[0] + _ic*jcp.ic_block * src_strides[1] + (oh * jcp.stride_h - jcp.t_pad) * src_strides[2] - jcp.l_pad * src_strides[3]]; - par_conv.off = &offsets[n * off_strides[0] + oh * off_strides[2]]; - par_conv.filt = weights; - par_conv.dst = &dst[n * dst_strides[0] + _oc*jcp.oc_block * dst_strides[1] + oh * dst_strides[2]]; - + par_conv.sampledWei = &interpWeightsVector[(n * jcp.dg * jcp.oh + oh) * jcp.kh * jcp.kw * jcp.ow * sampledPointsPerPixel]; + par_conv.sampledCoords = &sampledCoordsVector[(n * jcp.dg * jcp.oh + oh) * jcp.kh * jcp.kw * jcp.ow * sampledPointsPerPixel]; + par_conv.filt = &weights[g * jcp.nb_oc * jcp.nb_ic * jcp.kh * jcp.kw * jcp.ic_block * jcp.oc_block]; + par_conv.dst = &dst[n * dst_strides[0] + _oc * jcp.oc_block * dst_strides[1] + oh * dst_strides[2]]; par_conv.buf = input_buffer_ptr + ithr * jcp.ur_w * jcp.kh * jcp.kw * jcp.ic; par_conv.oh_pos = oh; @@ -1158,7 +1059,6 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { dst_strides[dst_block_desc->getOrder()[i]] = dst_block_desc->getStrides()[i]; } - auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType()->getStrides(); auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType()->getStrides(); InferenceEngine::SizeVector modulation_strides; @@ -1166,11 +1066,12 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { modulation_strides = getParentEdgeAt(3)->getMemory().GetDescWithType()->getStrides(); } + prepareSamplingWeights(src_strides, offsets, off_strides, modulation, modulation_strides); if (def_conv_kernel) { - executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides); + executeOptimized(src, weights, dst, src_strides, dst_strides); } else { - executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides, modulation, modulation_strides); + executeReference(src, weights, dst, src_strides, wei_strides, dst_strides); } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h index 29af41af4da8cb..28f5295949d06a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h @@ -30,7 +30,8 @@ struct jit_def_conv_params { int ur_w_tail; int typesize_in; int typesize_off; - int typesize_modulation; + int typesize_sampled_wei; + int typesize_sampled_offsets; int typesize_bia; int typesize_out; bool with_bias; @@ -41,8 +42,8 @@ struct jit_def_conv_params { struct jit_def_conv_call_args { const void *src; - const void *off; - const void *modulation; + const void *sampledWei; + const void *sampledCoords; const void *filt; const void *bias; const void *dst; @@ -80,6 +81,7 @@ class MKLDNNDeformableConvolutionNode : public MKLDNNNode { return false; } bool enforceRef = false; + constexpr static int sampledPointsPerPixel = 4; // count of sampling points ({top|bottom}, {left|right}) InferenceEngine::Precision getRuntimePrecision() const override; @@ -94,14 +96,18 @@ class MKLDNNDeformableConvolutionNode : public MKLDNNNode { jit_def_conv_params jcp = {}; + std::vector sampledCoordsVector; + std::vector interpWeightsVector; + std::shared_ptr def_conv_kernel = nullptr; - void executeReference(const float* src, const float* offsets, const float* weights, float* dst, - const std::vector& src_strides, const std::vector& off_strides, - const std::vector& wei_strides, const std::vector& dst_strides, - const float* modulation = nullptr, const std::vector& modulation_strides = {}); - void executeOptimized(const float* src, const float* offsets, const float* weights, float* dst, - const std::vector& src_strides, const std::vector& off_strides, const std::vector& dst_strides); + void prepareSamplingWeights(const std::vector& src_strides, const float* offsets, const std::vector& off_strides, + const float* modulation = nullptr, const std::vector& modulation_strides = {}); + + void executeReference(const float* src, const float* weights, float* dst, const std::vector& src_strides, + const std::vector& wei_strides, const std::vector& dst_strides); + void executeOptimized(const float* src, const float* weights, float* dst, + const std::vector& src_strides, const std::vector& dst_strides); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp index 7babb5033b9630..20b89924e7480f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp @@ -3,17 +3,26 @@ // #include "mkldnn_matmul_node.h" + +#include "memory_desc/cpu_blocked_memory_desc.h" +#include "cpu_types.h" +#include "mkldnn_eltwise_node.h" + +#include +#include #include #include #include #include #include #include -#include #include "ie_parallel.hpp" #include "common/cpu_memcpy.h" #include #include "memory_desc/dnnl_blocked_memory_desc.h" +#include "utils/general_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" +#include "mkldnn_extension_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -37,14 +46,14 @@ bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptrget_input_size(); i++) { const auto inShapeRank = matMul->get_input_shape(i).size(); - if (inShapeRank < 2 || inShapeRank > 4) { + if (inShapeRank < 2) { errorMessage = "Unsupported rank: " + std::to_string(inShapeRank) + " on " + std::to_string(i) + " input"; return false; } } const auto outShapeRank = matMul->get_shape().size(); - if (outShapeRank < 2 || outShapeRank > 4) { + if (outShapeRank < 2) { errorMessage = "Unsupported rank: " + std::to_string(outShapeRank) + " on output"; return false; } @@ -55,19 +64,47 @@ bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { + MKLDNNNode(op, eng, cache) { std::string errorMessage; - if (isSupportedOperation(op, errorMessage)) { - errorPrefix = "Gemm node with name '" + getName() + "'"; - - const auto matMul = std::dynamic_pointer_cast(op); - alpha = 1.f; - beta = 0.f; - transposeA = matMul->get_transpose_a(); - transposeB = matMul->get_transpose_b(); - } else { + if (!isSupportedOperation(op, errorMessage)) IE_THROW(NotImplemented) << errorMessage; + + errorPrefix = "MatMul node with name '" + getName() + "'"; + + const auto matMul = std::dynamic_pointer_cast(op); + + transposeIn[0] = matMul->get_transpose_a(); + transposeIn[1] = matMul->get_transpose_b(); +} + +bool MKLDNNMatMulNode::canFuse(const MKLDNNNodePtr& node) const { + return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, + EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, + EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu); +} + +void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) const { + mkldnn::post_ops ops; + + for (auto &node : fusedWith) { + if (auto* eltwiseNode = dynamic_cast(node.get())) { + eltwiseNode->appendPostOps(ops); + continue; + } + + IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented"; } + + attr.set_post_ops(ops); +} + + +std::shared_ptr MKLDNNMatMulNode::initPrimitiveAttr() const { + auto attr = std::make_shared(mkldnn::primitive_attr()); + + setPostOps(*attr, true); + + return attr; } void MKLDNNMatMulNode::getSupportedDescriptors() { @@ -76,101 +113,147 @@ void MKLDNNMatMulNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges for layer " << getName(); - auto inDims0 = getInputShapeAtPort(0).getStaticDims(); - auto inDims1 = getInputShapeAtPort(1).getStaticDims(); - auto outDims = getOutputShapeAtPort(0).getStaticDims(); + auto firstInPortPrec = getOriginalInputPrecisionAtPort(0); + auto secondInPortPrec = getOriginalInputPrecisionAtPort(1); + auto outPortPrec = getOriginalOutputPrecisionAtPort(0); + + if (firstInPortPrec.size() != secondInPortPrec.size()) + firstInPortPrec = secondInPortPrec = getMaxPrecision(getOriginalInputPrecisions()); - if (inDims0.size() != inDims1.size() || inDims0.size() != outDims.size()) + if (!fusedWith.empty()) { + outPortPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0); + } + + if (inputShapes[0].getRank() != inputShapes[1].getRank() || inputShapes[0].getRank() != outputShapes[0].getRank()) IE_THROW() << errorPrefix << " has invalid dims count"; - int nDims = inDims0.size(); - xAxis = nDims - 1; - yAxis = nDims - 2; - auto xAxis0 = transposeA ? yAxis : xAxis; - auto yAxis0 = transposeA ? xAxis : yAxis; - auto xAxis1 = transposeB ? yAxis : xAxis; - auto yAxis1 = transposeB ? xAxis : yAxis; + const int nDims = inputShapes[0].getRank(); + const auto xAxis = nDims - 1; + const auto yAxis = nDims - 2; + const auto xAxis0 = transposeIn[0] ? yAxis : xAxis; + const auto yAxis0 = transposeIn[0] ? xAxis : yAxis; + const auto xAxis1 = transposeIn[1] ? yAxis : xAxis; + const auto yAxis1 = transposeIn[1] ? xAxis : yAxis; + + const auto& inDims0 = getInputShapeAtPort(0).getStaticDims(); + const auto& inDims1 = getInputShapeAtPort(1).getStaticDims(); + const auto& outDims = getOutputShapeAtPort(0).getStaticDims(); - // The check inDims0[xAxis] != inDims1[yAxis] is correct due to layer semantic // coverity[copy_paste_error] - if (inDims0[xAxis0] != inDims1[yAxis1] || inDims0[yAxis0] != outDims[yAxis] || inDims1[xAxis1] != outDims[xAxis]) + if (inDims0[xAxis0] != inDims1[yAxis1] || + inDims0[yAxis0] != outDims[yAxis] || + inDims1[xAxis1] != outDims[xAxis]) IE_THROW() << errorPrefix << " has incorrect spatial input and output dimensions"; for (int dim_idx = nDims - 3; dim_idx >= 0; dim_idx--) { - if ((inDims0[dim_idx] != outDims[dim_idx] && inDims0[dim_idx] != 1) || - (inDims1[dim_idx] != outDims[dim_idx] && inDims1[dim_idx] != 1)) { + if ((inDims0[dim_idx] != outDims[dim_idx] && + inDims0[dim_idx] != 1) || + (inDims1[dim_idx] != outDims[dim_idx] && + inDims1[dim_idx] != 1)) { IE_THROW() << errorPrefix << " has incorrect input batch dimensions"; } - - int aOffset = 1; - for (int i = dim_idx + 1; i < nDims; i++) - aOffset *= inDims0[i]; - aOffsets.push_back(inDims0[dim_idx] == outDims[dim_idx] ? aOffset : 0); - - int bOffset = 1; - for (int i = dim_idx + 1; i < nDims; i++) - bOffset *= inDims1[i]; - bOffsets.push_back(inDims1[dim_idx] == outDims[dim_idx] ? bOffset : 0); } - for (unsigned long dim_idx = aOffsets.size(); dim_idx < 2; dim_idx++) - aOffsets.push_back(0); - for (unsigned long dim_idx = bOffsets.size(); dim_idx < 2; dim_idx++) - bOffsets.push_back(0); - for (unsigned long dim_idx = cOffsets.size(); dim_idx < 2; dim_idx++) - cOffsets.push_back(0); -} - -void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) - return; + /* Example MatMul: + * 2x128x512(T) * 2x128x512 = 2x512x512 + * First input 2x128x512(T) should be transposed + * oneDNN requires memory::desc for this input to: + * - change shapes configuration as if input already transposed (2x128x512) -> (2x512x128) + * - provide transposed strides (66536, 128, 1) -> (66536, 1, 512) + */ + auto getStridesAndDims = [](Shape& shape, const bool transpose) { + const auto getRank = shape.getRank(); + + VectorDims strides(getRank, 1); + for (size_t i = 1; i < getRank; i++) { + strides[getRank - i - 1 ] = strides[getRank - i] * shape.getStaticDims()[getRank - i]; + } - auto inPrec0 = getOriginalInputPrecisionAtPort(0); - auto inPrec1 = getOriginalInputPrecisionAtPort(1); - if ((inPrec0 != Precision::U8 && inPrec0 != Precision::I8) || inPrec1 != Precision::I8) { - if (inPrec0 == Precision::BF16 || inPrec1 == Precision::BF16) { - inPrec0 = Precision::BF16; - inPrec1 = Precision::BF16; - } else { - inPrec0 = Precision::FP32; - inPrec1 = Precision::FP32; + if (transpose && getRank > 1) { + // form new shape + auto dims = shape.getStaticDims(); + std::swap(dims[getRank - 2], dims[getRank - 1]); + shape = Shape{dims}; + // update strides + strides[getRank - 1] = shape.getStaticDims()[getRank - 2]; + strides[getRank - 2] = 1; } - } - auto outputPrec = InferenceEngine::Precision::FP32; + return strides; + }; - NodeConfig config; - config.dynBatchSupport = true; + initialInShapes[0] = inputShapes[0]; + initialInShapes[1] = inputShapes[1]; - auto createDataConfig = [](const Shape& shape, InferenceEngine::Precision dataType) -> PortConfig { - PortConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - dataConfig.desc = std::make_shared(dataType, shape); - return dataConfig; - }; + const VectorDims inStrides0 = getStridesAndDims(inputShapes[0], transposeIn[0]); + const VectorDims inStrides1 = getStridesAndDims(inputShapes[1], transposeIn[1]); + const VectorDims outStrides = getStridesAndDims(outputShapes[0], false); - config.inConfs.push_back(createDataConfig(getInputShapeAtPort(0), inPrec0)); - config.inConfs.push_back(createDataConfig(getInputShapeAtPort(1), inPrec1)); - config.outConfs.push_back(createDataConfig(getOutputShapeAtPort(0), outputPrec)); + inDataDesc[0] = std::make_shared(firstInPortPrec, inputShapes[0], inStrides0); + inDataDesc[1] = std::make_shared(secondInPortPrec, inputShapes[1], inStrides1); + outDataDesc = std::make_shared(outPortPrec, getOutputShapeAtPort(0), outStrides); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::gemm_any); + createDescriptor({inDataDesc[0], inDataDesc[1]}, {outDataDesc}); } -void MKLDNNMatMulNode::initOptimalPrimitiveDescriptor() { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; - auto config = selected_pd->getConfig(); - - if (isConfigDefined(config)) - return; +void MKLDNNMatMulNode::createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) { + MKLDNNDescriptor desc{ + std::shared_ptr( + new matmul::desc(MemoryDescUtils::convertToDnnlMemoryDesc(inDataDesc[0])->getDnnlDesc(), + MemoryDescUtils::convertToDnnlMemoryDesc(inDataDesc[1])->getDnnlDesc(), + MemoryDescUtils::convertToDnnlMemoryDesc(outDataDesc)->getDnnlDesc()))}; - MKLDNNNode::initOptimalPrimitiveDescriptor(); + descs.push_back(desc); +} - auto* selectedPD = getSelectedPrimitiveDescriptor(); - if (!selectedPD) { +void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) return; + + auto attr = initPrimitiveAttr(); + + for (auto& desc : descs) { + auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), *attr); + while (static_cast(itpd)) { + NodeConfig config; + config.dynBatchSupport = true; + for (size_t i = 0; i < descInputNumbers(desc); i++) { + PortConfig portConfig; + portConfig.inPlace = -1; + portConfig.constant = false; + + auto src_desc = getSrcMemDesc(itpd, i); + if (src_desc->getType() & MemoryDescType::Blocked) { + portConfig.desc = src_desc->as()->cloneWithUndefStridesAndOffset(); + } else { + portConfig.desc = std::move(src_desc); + } + + config.inConfs.push_back(portConfig); + } + + for (size_t i = 0; i < descOutputNumbers(desc); i++) { + PortConfig portConfig; + portConfig.inPlace = canBeInPlace() ? 0 : -1; + portConfig.constant = false; + + auto dst_desc = getDstMemDesc(itpd, i); + if (dst_desc->getType() & MemoryDescType::Blocked) { + portConfig.desc = dst_desc->as()->cloneWithUndefStridesAndOffset(); + } else { + portConfig.desc = std::move(dst_desc); + } + + config.outConfs.push_back(portConfig); + } + + impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); + + supportedPrimitiveDescriptors.emplace_back(config, impl_type); + if (!itpd.next_impl()) + break; + } } } @@ -185,120 +268,29 @@ void MKLDNNMatMulNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; - auto inDims0 = src0MemPtr->getStaticDims(); - auto outDims = dstMemPtr->getStaticDims(); - - params.src0_mem_ptr = src0MemPtr; - params.src1_mem_ptr = src1MemPtr; - params.dst_mem_ptr = dstMemPtr; - - params.ndims = outDims.size(); - - params.MB1 = 1; - params.MB2 = outDims.size() > 3 ? outDims[params.ndims - 3] : 1; - - params.M = outDims[yAxis]; - params.N = outDims[xAxis]; - params.K = transposeA ? inDims0[yAxis] : inDims0[xAxis]; - - params.transa = transposeA ? 'T' : 'N'; - params.transb = transposeB ? 'T' : 'N'; - - params.lda = transposeA ? params.M : params.K; - params.ldb = transposeB ? params.K : params.N; - params.ldc = params.N; - - params.shift1 = params.M * params.N * params.MB2; - params.shift2 = params.M * params.N; - - runtimePrecision = getParentEdgeAt(0)->getMemory().getDesc().getPrecision(); -} + if (prim) + return; -inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const float *A, int lda, - const float *B, int ldb, float beta, float *C, int ldc) { - mkldnn_sgemm(transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); -} + std::shared_ptr attr = initPrimitiveAttr(); + std::shared_ptr prim_desc; + prim_desc = std::make_shared( + createPrimitiveDescriptor(*attr)); -inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const uint16_t *A, int lda, - const uint16_t *B, int ldb, float beta, float *C, int ldc) { - dnnl_gemm_bf16bf16f32(transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); -} + prim.reset(new matmul(*prim_desc)); -inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const uint8_t *A, int lda, - const int8_t *B, int ldb, float beta, float *C, int ldc) { - const int32_t co = 0; - int32_t *Ci = reinterpret_cast(C); - mkldnn_gemm_u8s8s32(transa, transb, 'F', M, N, K, alpha, A, lda, 0, B, ldb, 0, beta, Ci, ldc, &co); - parallel_for(M * N, [&](size_t i) { - C[i] = Ci[i]; - }); -} + auto src0 = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + auto src1 = getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPrimitive(); + auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); -inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const int8_t *A, int lda, - const int8_t *B, int ldb, float beta, float *C, int ldc) { - const int32_t co = 0; - int32_t *Ci = reinterpret_cast(C); - mkldnn_gemm_s8s8s32(transa, transb, 'F', M, N, K, alpha, A, lda, 0, B, ldb, 0, beta, Ci, ldc, &co); - parallel_for(M * N, [&](size_t i) { - C[i] = Ci[i]; - }); + primArgs = {{DNNL_ARG_SRC_0, src0}, {DNNL_ARG_WEIGHTS_0, src1}, {DNNL_ARG_DST, dst}}; } -template -inline void MKLDNNMatMulNode::process_data() { - const T0* src0_ptr = reinterpret_cast(params.src0_mem_ptr->GetPtr()); - const T1* src1_ptr = reinterpret_cast(params.src1_mem_ptr->GetPtr()); - float* dst_ptr = reinterpret_cast(params.dst_mem_ptr->GetPtr()); - - const int MB = batchToProcess(); - if (params.ndims == 4) { - params.MB1 = MB; - } else if (params.ndims == 3) { - params.shift1 = params.shift1 * MB / params.MB2; - params.MB2 = MB; - } - - for (int b1 = 0; b1 < params.MB1; ++b1) { - const T0 *a_ptr = src0_ptr; - const T1 *b_ptr = src1_ptr; - float *d_ptr = dst_ptr; - - for (int b2 = 0; b2 < params.MB2; ++b2) { - process_gemm(params.transa, params.transb, params.M, params.N, params.K, - alpha, a_ptr, params.lda, b_ptr, params.ldb, beta, d_ptr, params.ldc); - - a_ptr += aOffsets[0]; - b_ptr += bOffsets[0]; - d_ptr += params.shift2; - } +MemoryDescPtr MKLDNNMatMulNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1): primitive_desc_it.src_desc(idx); - src0_ptr += aOffsets[1]; - src1_ptr += bOffsets[1]; - dst_ptr += params.shift1; - } -} - -void MKLDNNMatMulNode::execute(mkldnn::stream strm) { - switch (runtimePrecision) { - case Precision::FP32: { - process_data(); - break; - } - case Precision::BF16: { - process_data(); - break; - } - case Precision::I8: { - process_data(); - break; - } - case Precision::U8: { - process_data(); - break; - } - default: - IE_THROW() << errorPrefix << " has incorrect precision on first input"; - } + return std::make_shared( + MKLDNNExtensionUtils::DataTypeToIEPrecision(static_cast(desc.data.data_type)), + initialInShapes[idx]); /* provide initial shapes, so hide transpose effect */ } bool MKLDNNMatMulNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h index 0451e1e47ce56a..8820f7a4e6eed5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h @@ -4,10 +4,11 @@ #pragma once -#include #include +#include #include #include +#include namespace MKLDNNPlugin { @@ -16,60 +17,38 @@ class MKLDNNMatMulNode : public MKLDNNNode { MKLDNNMatMulNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void initSupportedPrimitiveDescriptors() override; - void initOptimalPrimitiveDescriptor() override; + MemoryDescPtr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; void createPrimitive() override; - void execute(mkldnn::stream strm) override; + bool canFuse(const MKLDNNNodePtr& node) const override; bool created() const override; size_t getMaxBatch() const override; InferenceEngine::Precision getRuntimePrecision() const override; + size_t descInputNumbers(MKLDNNDescriptor desc) override { + return getOriginalInputsNumber(); + } static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; -private: - float alpha = 1.f; - float beta = 0.f; - bool transposeA = false; - bool transposeB = false; - - int xAxis = 0; - int yAxis = 0; - - std::vector aOffsets; - std::vector bOffsets; - std::vector cOffsets; - - InferenceEngine::Precision runtimePrecision; +protected: + std::shared_ptr initPrimitiveAttr() const override; - template inline void process_data(); +private: + void setPostOps(mkldnn::primitive_attr &attr, bool initWeights) const; std::string errorPrefix; - struct { - MKLDNNMemoryPtr src0_mem_ptr = nullptr; - MKLDNNMemoryPtr src1_mem_ptr = nullptr; - MKLDNNMemoryPtr dst_mem_ptr = nullptr; - - char transa = 'N'; - char transb = 'N'; - - int MB1 = 1; - int MB2 = 1; - - int M = 0; - int N = 0; - int K = 0; - - int lda = 0; - int ldb = 0; - int ldc = 0; - - int shift1 = 0; - int shift2 = 0; + /* whether to transpose input */ + std::array transposeIn; + /* initial shapes without transpose, + * necessary to hide transpose effect from plugin */ + std::array initialInShapes; - size_t ndims = 0; - } params; + std::array inDataDesc; + MemoryDescPtr outDataDesc; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h index abefbe06fe5acc..fce261b19befb2 100644 --- a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h +++ b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h @@ -9,7 +9,9 @@ #include #include -#define ENABLE_CPU_DEBUG_CAP(_x) _x; + +#define CPU_DEBUG_CAP_ENABLE(_x) _x; +#define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) true namespace MKLDNNPlugin { namespace DebugCaps { @@ -47,5 +49,7 @@ class Config { } // namespace MKLDNNPlugin #else // !CPU_DEBUG_CAPS -#define ENABLE_CPU_DEBUG_CAP(_x) +#define CPU_DEBUG_CAP_ENABLE(_x) +#define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) x + #endif // CPU_DEBUG_CAPS diff --git a/inference-engine/src/mkldnn_plugin/utils/general_utils.h b/inference-engine/src/mkldnn_plugin/utils/general_utils.h index f50164eb422c53..012054e59e10c5 100644 --- a/inference-engine/src/mkldnn_plugin/utils/general_utils.h +++ b/inference-engine/src/mkldnn_plugin/utils/general_utils.h @@ -4,10 +4,12 @@ #pragma once -#include #include #include "cpu_shape.h" +#include +#include + namespace MKLDNNPlugin { template @@ -101,14 +103,13 @@ inline bool dimsEqualWeak(const std::vector& lhs, const std::vector precisions) { if (!precisions.empty()) { - std::sort(precisions.begin(), precisions.end(), - [](const InferenceEngine::Precision &lhs, const InferenceEngine::Precision &rhs) { - return lhs.size() > rhs.size(); - }); - return precisions[0]; + return *std::max_element(precisions.begin(), precisions.end(), + [](const InferenceEngine::Precision &lhs, const InferenceEngine::Precision &rhs) { + return lhs.size() > rhs.size(); + }); } return InferenceEngine::Precision::UNSPECIFIED; } -} // namespace MKLDNNPlugin \ No newline at end of file +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/plugin_api/ie_system_conf.h b/inference-engine/src/plugin_api/ie_system_conf.h index 254f03a548852a..13f6f9a79a11b2 100644 --- a/inference-engine/src/plugin_api/ie_system_conf.h +++ b/inference-engine/src/plugin_api/ie_system_conf.h @@ -52,6 +52,16 @@ INFERENCE_ENGINE_API_CPP(std::vector) getAvailableCoresTypes(); */ INFERENCE_ENGINE_API_CPP(int) getNumberOfCPUCores(bool bigCoresOnly = false); +/** + * @brief Returns number of CPU logical cores on Linux/Windows (on other OSes it simply relies on the original + * parallel API of choice, which uses the 'all' logical cores). call function with 'false' to get #logical cores of + * all types call function with 'true' to get #logical 'Big' cores number of 'Little' = 'all' - 'Big' + * @ingroup ie_dev_api_system_conf + * @param[in] bigCoresOnly Additionally limits the number of reported cores to the 'Big' cores only. + * @return Number of logical CPU cores. + */ +INFERENCE_ENGINE_API_CPP(int) getNumberOfLogicalCPUCores(bool bigCoresOnly = false); + /** * @brief Checks whether CPU supports SSE 4.2 capability * @ingroup ie_dev_api_system_conf diff --git a/inference-engine/src/readers/ir_reader_v7/ie_cnn_net_reader_impl.cpp b/inference-engine/src/readers/ir_reader_v7/ie_cnn_net_reader_impl.cpp index e522940bb716bc..4952c54a40d5da 100644 --- a/inference-engine/src/readers/ir_reader_v7/ie_cnn_net_reader_impl.cpp +++ b/inference-engine/src/readers/ir_reader_v7/ie_cnn_net_reader_impl.cpp @@ -57,7 +57,7 @@ StatusCode CNNNetReaderImpl::ReadNetwork(const void* model, size_t size, Respons xmlDoc = std::make_shared(); pugi::xml_parse_result res = xmlDoc->load_buffer(model, size); if (res.status != pugi::status_ok) { - return DescriptionBuffer(resp) << res.description() << "at offset " << res.offset; + return DescriptionBuffer(resp) << res.description() << " at offset " << res.offset; } StatusCode ret = ReadNetwork(); if (ret != OK) { diff --git a/inference-engine/src/readers/ir_reader_v7/ie_ir_reader.cpp b/inference-engine/src/readers/ir_reader_v7/ie_ir_reader.cpp index 2ddba71a563a34..af4d407ad0418b 100644 --- a/inference-engine/src/readers/ir_reader_v7/ie_ir_reader.cpp +++ b/inference-engine/src/readers/ir_reader_v7/ie_ir_reader.cpp @@ -33,7 +33,7 @@ CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const pugi::xml_document xmlDoc; pugi::xml_parse_result res = xmlDoc.load(model); if (res.status != pugi::status_ok) { - IE_THROW() << res.description() << "at offset " << res.offset; + IE_THROW() << res.description() << " at offset " << res.offset; } pugi::xml_node root = xmlDoc.document_element(); diff --git a/inference-engine/src/transformations/include/transformations/rt_info/attributes.hpp b/inference-engine/src/transformations/include/transformations/rt_info/attributes.hpp index 9b6904c02331e6..1207c3bd278250 100644 --- a/inference-engine/src/transformations/include/transformations/rt_info/attributes.hpp +++ b/inference-engine/src/transformations/include/transformations/rt_info/attributes.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ class TRANSFORMATIONS_API Attributes { register_factory(); register_factory(); register_factory(); + register_factory(); } Variant * create_by_type_info(const ov::DiscreteTypeInfo & type_info) { diff --git a/inference-engine/src/transformations/include/transformations/rt_info/old_api_map_attribute.hpp b/inference-engine/src/transformations/include/transformations/rt_info/old_api_map_attribute.hpp new file mode 100644 index 00000000000000..4ffe69b98edc23 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/rt_info/old_api_map_attribute.hpp @@ -0,0 +1,124 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief Defines old API map attribute + * @file old_api_map_attribute.hpp + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ov { + +class OldApiMap; +/** + * @ingroup ie_runtime_attr_api + * @brief OldApiMapAttr class stores the value of OldApiMap class. + * + * OldApiMap stores the following information. + * Parameter: + * Order of the transpose which should be applied to Parameter with old API layout to + * obtain Parameter with new API layout. + * Element type of the Parameter in old API. + * + * Result: + * Order of the transpose which should be applied to Result with new API layout to + * obtain Result with old API layout. + * + */ +class TRANSFORMATIONS_API OldApiMapAttr { +private: + std::vector m_order; + ngraph::element::Type m_legacy_type = ngraph::element::Type_t::undefined; + +public: + friend class OldApiMap; + + /** + * A default constructor + */ + OldApiMapAttr() = default; + + /** + * @brief Constructs a new OldApiMapAttr object. + * @param[in] order Transpose order. + * @param[in] legacy_type Legacy type. + */ + explicit OldApiMapAttr(std::vector order, const ngraph::element::Type& legacy_type) + : m_order(std::move(order)), m_legacy_type(legacy_type) {} + + /** + * @brief Returns the transpose order that should be used for obtain a node with old API layout. + * @return transpose order. + */ + const std::vector & get_order() const { + return m_order; + } + + /** + * @brief Returns the legacy type of the node. + * @return legacy type. + */ + ngraph::element::Type get_type() const { + return m_legacy_type; + } +}; + +/** + * @ingroup ie_runtime_attr_api + * @brief OldApiMap class represents runtime info attribute that stores legacy type + * and order of the transpose that is required for obtaining IR in old API. + */ +class TRANSFORMATIONS_API OldApiMap : public VariantImpl { +public: + OPENVINO_RTTI("old_api_map", "0"); + + /** + * A default constructor + */ + OldApiMap() = default; + + /** + * Constructs a new OldApiMap object. + * @param[in] value The object that stores values of OldApiMap. + */ + OldApiMap(const value_type& value) : VariantImpl(value) {} + + bool is_copyable() const override { + return false; + } + + bool visit_attributes(AttributeVisitor& visitor) override; +}; + +inline bool has_old_api_map(const std::shared_ptr& node) { + const auto& rt_map = node->get_rt_info(); + return rt_map.count(OldApiMap::get_type_info_static()); +} + +inline OldApiMap get_old_api_map(const std::shared_ptr& node) { + const auto& rt_map = node->get_rt_info(); + const auto& var = rt_map.at(OldApiMap::get_type_info_static()); + return ngraph::as_type_ptr(var)->get(); +} + +inline void set_old_api_map(std::shared_ptr& node, const OldApiMap& old_api_map) { + auto& rt_map = node->get_rt_info(); + rt_map[OldApiMap::get_type_info_static()] = std::make_shared(old_api_map); +} + +} // namespace ov diff --git a/inference-engine/src/transformations/src/transformations/rt_info/old_api_map_attribute.cpp b/inference-engine/src/transformations/src/transformations/rt_info/old_api_map_attribute.cpp new file mode 100644 index 00000000000000..40f231ea9c1924 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/rt_info/old_api_map_attribute.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/rt_info/old_api_map_attribute.hpp" + +using namespace ov; + +bool OldApiMap::visit_attributes(AttributeVisitor& visitor) { + visitor.on_attribute("order", m_value.m_order); + visitor.on_attribute("element_type", m_value.m_legacy_type); + return true; +} + diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt index a2de4b7d8925c3..df998f0fd45dfa 100644 --- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt +++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt @@ -19,6 +19,7 @@ set(LINK_LIBRARIES inference_engine_snippets offline_transformations inference_engine + frontend_manager pugixml::static ) diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_deserialization.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_deserialization.cpp index 41493ccd6c0d21..982dee57ef2380 100644 --- a/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_deserialization.cpp +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_deserialization.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include #include @@ -9,20 +10,57 @@ #include #include +#include +#include "frontend_manager/frontend_manager.hpp" +#include "graph_comparator.hpp" +#include "ie_blob.h" +#include "ngraph/node.hpp" +#include "ngraph/op/parameter.hpp" +#include "ngraph/shape.hpp" +#include "ngraph/type/element_type.hpp" +#include "ngraph/variant.hpp" +#include "openvino/runtime/core.hpp" +#include using namespace ngraph; -TEST(RTInfoDeserialization, NodeV10) { +class RTInfoDeserialization : public testing::Test { +protected: + std::shared_ptr getWithIRFrontend(const std::string& model) { + std::istringstream modelStringStream(model); + std::istream& modelStream = modelStringStream; + + ngraph::frontend::FrontEnd::Ptr FE; + ngraph::frontend::InputModel::Ptr inputModel; + + ov::VariantVector params{ov::make_variant(&modelStream)}; + + FE = manager.load_by_model(params); + if (FE) + inputModel = FE->load(params); + + if (inputModel) + return FE->convert(inputModel); + + return nullptr; + } + +private: + ngraph::frontend::FrontEndManager manager; +}; + +TEST_F(RTInfoDeserialization, NodeV10) { std::string model = R"V0G0N( - + + - + 1 3 22 @@ -36,7 +74,7 @@ TEST(RTInfoDeserialization, NodeV10) { - + 1 3 22 @@ -44,7 +82,7 @@ TEST(RTInfoDeserialization, NodeV10) { - + 1 3 22 @@ -54,7 +92,7 @@ TEST(RTInfoDeserialization, NodeV10) { - + 1 3 22 @@ -69,13 +107,15 @@ TEST(RTInfoDeserialization, NodeV10) { )V0G0N"; - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(model, InferenceEngine::Blob::Ptr()); - auto f = net.getFunction(); + auto f = getWithIRFrontend(model); + ASSERT_NE(nullptr, f); auto check_rt_info = [](const RTMap & info) { const std::string & key = VariantWrapper::get_type_info_static(); - ASSERT_FALSE(info.count(key)); + EXPECT_FALSE(info.count(key)); + + const std::string & key_old_api = ov::OldApiMap::get_type_info_static(); + EXPECT_FALSE(info.count(key_old_api)); }; auto check_version = [](const std::shared_ptr& f) { @@ -93,16 +133,71 @@ TEST(RTInfoDeserialization, NodeV10) { auto result = f->get_results()[0]; auto round = result->get_input_node_ptr(0); check_rt_info(round->get_rt_info()); + + // read IR v10 with old API + { + InferenceEngine::Core core; + auto f_10 = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()); + ASSERT_NE(nullptr, f_10.getFunction()); + + auto res = compare_functions(f, f_10.getFunction()); + EXPECT_TRUE(res.first) << res.second; + } + + // read IR v10 with new API and check that CNNNetwork precision conversions are applied + { + ngraph::Shape shape{1, 3, 22, 22}; + auto type = ngraph::element::f32; + auto param = std::make_shared(type, shape); + param->set_friendly_name("in1"); + param->get_output_tensor(0).set_names({"input_tensor"}); + + auto convert_param = std::make_shared(param, ngraph::element::f16); + convert_param->get_output_tensor(0).set_names({"input_tensor/convert_element_type"}); + + auto round = std::make_shared(convert_param, + ngraph::opset8::Round::RoundMode::HALF_TO_EVEN); + round->set_friendly_name("Round"); + // TODO: why it has this name? + round->get_output_tensor(0).set_names({"output_tensor"}); + + auto convert_result = std::make_shared(round, type); + convert_result->set_friendly_name("Round/convert_element_type"); + convert_result->get_output_tensor(0).set_names({"output_tensor"}); + + auto result = std::make_shared(convert_result); + result->set_friendly_name("output"); + + auto f_10_ref = std::make_shared(ngraph::ResultVector{result}, + ngraph::ParameterVector{param}); + f_10_ref->set_friendly_name("Network"); + + ov::runtime::Core core; + auto f_10_core = core.read_model(model, InferenceEngine::Blob::CPtr()); + ASSERT_NE(nullptr, f_10_core); + + auto& rt_info = f_10_core->get_rt_info(); + EXPECT_EQ(0, rt_info.count("version")); + + const auto fc = FunctionsComparator::with_default() + .enable(FunctionsComparator::ATTRIBUTES) + .enable(FunctionsComparator::PRECISIONS) + .enable(FunctionsComparator::RUNTIME_KEYS) + .enable(FunctionsComparator::NAMES) + .enable(FunctionsComparator::CONST_VALUES); + auto res = fc.compare(f_10_core, f_10_ref); + EXPECT_TRUE(res.valid) << res.message; + } } -TEST(RTInfoDeserialization, InputAndOutputV10) { +TEST_F(RTInfoDeserialization, InputAndOutputV10) { std::string model = R"V0G0N( - + - + @@ -135,7 +230,7 @@ TEST(RTInfoDeserialization, InputAndOutputV10) { - + @@ -148,7 +243,7 @@ TEST(RTInfoDeserialization, InputAndOutputV10) { - + @@ -167,9 +262,8 @@ TEST(RTInfoDeserialization, InputAndOutputV10) { )V0G0N"; - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(model, InferenceEngine::Blob::Ptr()); - auto f = net.getFunction(); + auto f = getWithIRFrontend(model); + ASSERT_NE(nullptr, f); auto check_rt_info = [](const RTMap & info) { const std::string & key = VariantWrapper::get_type_info_static(); @@ -195,19 +289,71 @@ TEST(RTInfoDeserialization, InputAndOutputV10) { check_rt_info(add->input(0).get_rt_info()); check_rt_info(add->input(1).get_rt_info()); check_rt_info(add->output(0).get_rt_info()); + + // read IR v10 with old API + { + InferenceEngine::Core core; + auto f_10 = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()); + ASSERT_NE(nullptr, f_10.getFunction()); + + auto res = compare_functions(f, f_10.getFunction()); + EXPECT_TRUE(res.first) << res.second; + } + + // read IR v10 with new API and check that CNNNetwork precision conversions are applied + { + const ngraph::Shape shape{1, 3, 22, 22}; + const auto type = ngraph::element::i64; + auto param = std::make_shared(type, shape); + param->set_friendly_name("in1"); + param->get_output_tensor(0).set_names({"input_tensor"}); + + auto sum = std::make_shared(param, param); + sum->set_friendly_name("sum"); + // TODO: why it has this name? + sum->get_output_tensor(0).set_names({"output_tensor"}); + + auto convert_result = std::make_shared(sum, ngraph::element::i32); + convert_result->set_friendly_name("sum/convert_element_type"); + convert_result->get_output_tensor(0).set_names({"output_tensor"}); + + auto result = std::make_shared(convert_result); + result->set_friendly_name("output"); + + auto f_10_ref = std::make_shared(ngraph::ResultVector{result}, + ngraph::ParameterVector{param}); + f_10_ref->set_friendly_name("Network"); + + ov::runtime::Core core; + auto f_10_core = core.read_model(model, InferenceEngine::Blob::CPtr()); + ASSERT_NE(nullptr, f_10_core); + + auto& rt_info = f_10_core->get_rt_info(); + EXPECT_EQ(0, rt_info.count("version")); + + const auto fc = FunctionsComparator::with_default() + .enable(FunctionsComparator::ATTRIBUTES) + .enable(FunctionsComparator::PRECISIONS) + .enable(FunctionsComparator::RUNTIME_KEYS) + .enable(FunctionsComparator::NAMES) + .enable(FunctionsComparator::CONST_VALUES); + auto res = fc.compare(f_10_core, f_10_ref); + EXPECT_TRUE(res.valid) << res.message; + } } -TEST(RTInfoDeserialization, NodeV11) { +TEST_F(RTInfoDeserialization, NodeV11) { std::string model = R"V0G0N( - + + - + 1 3 22 @@ -229,7 +375,7 @@ TEST(RTInfoDeserialization, NodeV11) { - + 1 3 22 @@ -238,6 +384,9 @@ TEST(RTInfoDeserialization, NodeV11) { + + + 1 @@ -254,41 +403,156 @@ TEST(RTInfoDeserialization, NodeV11) { )V0G0N"; - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(model, InferenceEngine::Blob::Ptr()); - auto f = net.getFunction(); + auto f = getWithIRFrontend(model); + ASSERT_NE(nullptr, f); auto check_fused_names = [](const RTMap & info, const std::string & names) { const std::string & key = VariantWrapper::get_type_info_static(); ASSERT_TRUE(info.count(key)); auto fused_names_attr = std::dynamic_pointer_cast>(info.at(key)); ASSERT_TRUE(fused_names_attr); - ASSERT_EQ(fused_names_attr->get().getNames(), names); + EXPECT_EQ(fused_names_attr->get().getNames(), names); }; + auto check_old_api_map = [](const RTMap & info, const std::vector & order, const ngraph::element::Type& type) { + const std::string & old_api_map_key = ov::OldApiMap::get_type_info_static(); + ASSERT_TRUE(info.count(old_api_map_key)); + auto old_api_map_attr = std::dynamic_pointer_cast(info.at(old_api_map_key)); + ASSERT_TRUE(old_api_map_attr); + auto old_api_map_attr_val = old_api_map_attr->get(); + EXPECT_EQ(old_api_map_attr_val.get_order(), order); + EXPECT_EQ(old_api_map_attr_val.get_type(), type); + }; auto check_version = [](const std::shared_ptr& f) { auto& rt_info = f->get_rt_info(); ASSERT_TRUE(rt_info.count("version")); auto version = std::dynamic_pointer_cast>(rt_info.at("version")); ASSERT_NE(version, nullptr); - ASSERT_EQ(version->get(), 11); + EXPECT_EQ(version->get(), 11); }; check_version(f); auto param = f->get_parameters()[0]; check_fused_names(param->get_rt_info(), "in1"); + check_old_api_map(param->get_rt_info(), + std::vector({0, 2, 3, 1}), + ngraph::element::Type_t::f16); - auto result = f->get_results()[0]; + auto result = f->get_result(); + check_old_api_map(result->get_rt_info(), + std::vector({0, 3, 1, 2}), + ngraph::element::Type_t::f16); auto round = result->get_input_node_ptr(0); check_fused_names(round->get_rt_info(), "Round1,Round2"); + + // read IR v11 with new API + { + ov::runtime::Core core; + auto f_11 = core.read_model(model, InferenceEngine::Blob::CPtr()); + ASSERT_NE(nullptr, f_11); + + check_old_api_map(f_11->get_parameters()[0]->get_rt_info(), + std::vector({0, 2, 3, 1}), + ngraph::element::Type_t::f16); + + check_old_api_map(f_11->get_result()->get_rt_info(), + std::vector({0, 3, 1, 2}), + ngraph::element::Type_t::f16); + + auto res = compare_functions(f, f_11); + EXPECT_TRUE(res.first) << res.second; + } + + // read IR v11 with old API and check that old_api_map is applied + { + const ngraph::PartialShape shape{1, 3, 22, 22}; + auto type = ngraph::element::f16; + auto param = std::make_shared(type, shape); + param->set_friendly_name("in1"); + param->get_output_tensor(0).set_names({"input_tensor"}); + + auto convert_param = std::make_shared(param, ngraph::element::f32); + convert_param->set_friendly_name("in1/convert_element_type"); + convert_param->get_output_tensor(0).set_names({"input_tensor/convert_element_type"}); + + auto constant_param = std::make_shared(ngraph::element::i64, ngraph::Shape{4}, + std::vector{0, 2, 3, 1}); + auto transpose_param = std::make_shared(convert_param, constant_param); + transpose_param->set_friendly_name("in1/convert_element_type/convert_layout"); + transpose_param->get_output_tensor(0).set_names({"input_tensor/convert_element_type/convert_layout"}); + + auto round = std::make_shared(transpose_param, + ngraph::opset8::Round::RoundMode::HALF_TO_EVEN); + round->set_friendly_name("Round"); + // TODO: why it has this name? + round->get_output_tensor(0).set_names({"output_tensor"}); + round->get_rt_info()[VariantWrapper::get_type_info_static()] = + std::make_shared>(ngraph::FusedNames("Round1,Round2")); + + auto constant_result = std::make_shared(ngraph::element::i64, ngraph::Shape{4}, + std::vector{0, 3, 1, 2}); + auto transpose_result = std::make_shared(round, constant_result); + transpose_result->set_friendly_name("Round/convert_layout"); + + auto convert_result = std::make_shared(transpose_result, type); + convert_result->set_friendly_name("Round/convert_layout/convert_element_type"); + convert_result->get_output_tensor(0).set_names({"output_tensor"}); + + auto result = std::make_shared(convert_result); + result->set_friendly_name("output"); + + auto f_10_ref = std::make_shared(ngraph::ResultVector{result}, + ngraph::ParameterVector{param}); + f_10_ref->set_friendly_name("Network"); + + InferenceEngine::Core core; + auto cnn_core = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()); + auto f_10_core = cnn_core.getFunction(); + ASSERT_NE(nullptr, f_10_core); + + auto& rt_info = f_10_core->get_rt_info(); + EXPECT_EQ(0, rt_info.count("version")); + + const auto fc = FunctionsComparator::with_default() + .enable(FunctionsComparator::ATTRIBUTES) + .enable(FunctionsComparator::PRECISIONS) + .enable(FunctionsComparator::RUNTIME_KEYS) + .enable(FunctionsComparator::NAMES) + .enable(FunctionsComparator::CONST_VALUES); + auto res = fc.compare(f_10_core, f_10_ref); + EXPECT_TRUE(res.valid) << res.message; + + EXPECT_EQ(shape, f_10_ref->input().get_partial_shape()); + EXPECT_EQ(shape, f_10_core->input().get_partial_shape()); + EXPECT_EQ(shape, f_10_ref->get_output_partial_shape(0)); + EXPECT_EQ(shape, f_10_core->get_output_partial_shape(0)); + + // check that old api map is removed once applied + auto check_old_api_rt_info = [](const RTMap & info) { + const std::string & key = ov::OldApiMap::get_type_info_static(); + EXPECT_FALSE(info.count(key)); + }; + + check_old_api_rt_info(f_10_core->get_parameters()[0]->get_rt_info()); + check_old_api_rt_info(f_10_core->get_result()->get_rt_info()); + + // check information about layout + EXPECT_TRUE(f_10_core->get_parameters()[0]->get_layout().empty()) + << f_10_core->get_parameters()[0]->get_layout().to_string(); + EXPECT_TRUE(f_10_core->get_results()[0]->get_layout().empty()) + << f_10_core->get_results()[0]->get_layout().to_string(); + } } -TEST(RTInfoDeserialization, InputAndOutputV11) { +TEST_F(RTInfoDeserialization, InputAndOutputV11) { std::string model = R"V0G0N( + + + @@ -335,6 +599,9 @@ TEST(RTInfoDeserialization, InputAndOutputV11) { + + + @@ -355,9 +622,8 @@ TEST(RTInfoDeserialization, InputAndOutputV11) { )V0G0N"; - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(model, InferenceEngine::Blob::Ptr()); - auto f = net.getFunction(); + auto f = getWithIRFrontend(model); + ASSERT_NE(nullptr, f); auto check_version = [](const std::shared_ptr& f) { auto& rt_info = f->get_rt_info(); @@ -376,19 +642,57 @@ TEST(RTInfoDeserialization, InputAndOutputV11) { ASSERT_EQ(fused_names_attr->get().getNames(), names); }; + auto check_old_api_map = [](const RTMap & info, const std::vector & order, ngraph::element::Type type) { + const std::string & old_api_map_key = ov::OldApiMap::get_type_info_static(); + ASSERT_TRUE(info.count(old_api_map_key)); + auto old_api_map_attr = std::dynamic_pointer_cast(info.at(old_api_map_key)); + ASSERT_TRUE(old_api_map_attr); + auto old_api_map_attr_val = old_api_map_attr->get(); + ASSERT_EQ(old_api_map_attr_val.get_order(), order); + ASSERT_EQ(old_api_map_attr_val.get_type(), type); + }; + auto param = f->get_parameters()[0]; check_fused_names(param->output(0).get_rt_info(), "test1,test2"); + check_old_api_map(param->get_rt_info(), + std::vector({}), + ngraph::element::Type_t::undefined); - auto result = f->get_results()[0]; + auto result = f->get_result(); check_fused_names(result->input(0).get_rt_info(), "test5,test6"); + check_old_api_map(result->get_rt_info(), + std::vector({}), + ngraph::element::Type_t::undefined); auto add = result->get_input_node_ptr(0); check_fused_names(add->input(0).get_rt_info(), "test2,test3"); check_fused_names(add->input(1).get_rt_info(), "test3,test4"); check_fused_names(add->output(0).get_rt_info(), "test4,test5"); + + // read IR v11 with old API - the function is the same since no old_api_map is applied + { + InferenceEngine::Core core; + auto cnn = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()); + auto f_10 = cnn.getFunction(); + ASSERT_NE(nullptr, f_10); + + EXPECT_EQ(0, f_10->get_rt_info().count("version")); + + // check that old api map is removed once applied + auto check_old_api_rt_info = [](const RTMap & info) { + const std::string & key = ov::OldApiMap::get_type_info_static(); + EXPECT_FALSE(info.count(key)); + }; + + check_old_api_rt_info(f_10->get_parameters()[0]->get_rt_info()); + check_old_api_rt_info(f_10->get_result()->get_rt_info()); + + auto res = compare_functions(f, f_10); + EXPECT_TRUE(res.first) << res.second; + } } -TEST(RTInfoDeserialization, IndexesInputAndOutputV11) { +TEST_F(RTInfoDeserialization, IndexesInputAndOutputV11) { std::string model = R"V0G0N( @@ -486,9 +790,8 @@ TEST(RTInfoDeserialization, IndexesInputAndOutputV11) { )V0G0N"; - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(model, InferenceEngine::Blob::Ptr()); - auto f = net.getFunction(); + auto f = getWithIRFrontend(model); + ASSERT_NE(nullptr, f); auto check_version = [](const std::shared_ptr& f) { auto& rt_info = f->get_rt_info(); diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_serialization.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_serialization.cpp index 9187eb0c01f24d..2ac68b52c48dc5 100644 --- a/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_serialization.cpp +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/rt_info_serialization.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include #include @@ -13,6 +14,7 @@ #include "transformations/serialize.hpp" #include #include +#include "frontend_manager/frontend_manager.hpp" using namespace ngraph; @@ -23,9 +25,28 @@ class RTInfoSerializationTest : public CommonTestUtils::TestsCommon { std::string m_out_bin_path = test_name + ".bin"; void TearDown() override { - std::remove(m_out_xml_path.c_str()); - std::remove(m_out_bin_path.c_str()); + CommonTestUtils::removeIRFiles(m_out_xml_path, m_out_bin_path); } + + std::shared_ptr getWithIRFrontend(const std::string& model_path, + const std::string& weights_path) { + ngraph::frontend::FrontEnd::Ptr FE; + ngraph::frontend::InputModel::Ptr inputModel; + + ov::VariantVector params{ov::make_variant(model_path), ov::make_variant(weights_path)}; + + FE = manager.load_by_model(params); + if (FE) + inputModel = FE->load(params); + + if (inputModel) + return FE->convert(inputModel); + + return nullptr; + } + +private: + ngraph::frontend::FrontEndManager manager; }; TEST_F(RTInfoSerializationTest, all_attributes_latest) { @@ -34,6 +55,8 @@ TEST_F(RTInfoSerializationTest, all_attributes_latest) { std::make_shared>(ngraph::FusedNames("add")); info[ov::PrimitivesPriority::get_type_info_static()] = std::make_shared("priority"); + info[ov::OldApiMap::get_type_info_static()] = std::make_shared( + ov::OldApiMapAttr(std::vector{0, 2, 3, 1}, ngraph::element::Type_t::f32)); }; std::shared_ptr function; @@ -51,9 +74,8 @@ TEST_F(RTInfoSerializationTest, all_attributes_latest) { m.register_pass(m_out_xml_path, m_out_bin_path); m.run_passes(function); - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(m_out_xml_path, m_out_bin_path); - auto f = net.getFunction(); + auto f = getWithIRFrontend(m_out_xml_path, m_out_bin_path); + ASSERT_NE(nullptr, f); auto check_info = [](const RTMap & info) { const std::string & key = VariantWrapper::get_type_info_static(); @@ -67,6 +89,14 @@ TEST_F(RTInfoSerializationTest, all_attributes_latest) { auto primitives_priority_attr = std::dynamic_pointer_cast(info.at(pkey)); ASSERT_TRUE(primitives_priority_attr); ASSERT_EQ(primitives_priority_attr->get(), "priority"); + + const std::string & old_api_map_key = ov::OldApiMap::get_type_info_static(); + ASSERT_TRUE(info.count(old_api_map_key)); + auto old_api_map_attr = std::dynamic_pointer_cast(info.at(old_api_map_key)); + ASSERT_TRUE(old_api_map_attr); + auto old_api_map_attr_val = old_api_map_attr->get(); + ASSERT_EQ(old_api_map_attr_val.get_order(), std::vector({0, 2, 3, 1})); + ASSERT_EQ(old_api_map_attr_val.get_type(), ngraph::element::Type_t::f32); }; auto add = f->get_results()[0]->get_input_node_ptr(0); @@ -99,9 +129,8 @@ TEST_F(RTInfoSerializationTest, all_attributes_v10) { m.register_pass(m_out_xml_path, m_out_bin_path, pass::Serialize::Version::IR_V10); m.run_passes(function); - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(m_out_xml_path, m_out_bin_path); - auto f = net.getFunction(); + auto f = getWithIRFrontend(m_out_xml_path, m_out_bin_path); + ASSERT_NE(nullptr, f); auto check_info = [](const RTMap & info) { const std::string & key = VariantWrapper::get_type_info_static(); @@ -138,9 +167,8 @@ TEST_F(RTInfoSerializationTest, all_attributes_v11) { m.register_pass(m_out_xml_path, m_out_bin_path); m.run_passes(function); - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(m_out_xml_path, m_out_bin_path); - auto f = net.getFunction(); + auto f = getWithIRFrontend(m_out_xml_path, m_out_bin_path); + ASSERT_NE(nullptr, f); auto check_info = [](const RTMap & info) { const std::string & key = VariantWrapper::get_type_info_static(); @@ -191,9 +219,8 @@ TEST_F(RTInfoSerializationTest, parameter_result_v11) { m.register_pass(m_out_xml_path, m_out_bin_path, pass::Serialize::Version::IR_V11); m.run_passes(function); - auto core = InferenceEngine::Core(); - auto net = core.ReadNetwork(m_out_xml_path, m_out_bin_path); - auto f = net.getFunction(); + auto f = getWithIRFrontend(m_out_xml_path, m_out_bin_path); + ASSERT_NE(nullptr, f); ASSERT_EQ(function->get_results().size(), f->get_results().size()); ASSERT_EQ(function->get_parameters().size(), f->get_parameters().size()); diff --git a/inference-engine/tests/functional/inference_engine/task_executor_tests.cpp b/inference-engine/tests/functional/inference_engine/task_executor_tests.cpp index 2e9b8341b31aff..482dbd2dfa452c 100644 --- a/inference-engine/tests/functional/inference_engine/task_executor_tests.cpp +++ b/inference-engine/tests/functional/inference_engine/task_executor_tests.cpp @@ -205,6 +205,18 @@ static auto Executors = ::testing::Values( return std::make_shared(IStreamsExecutor::Config{"TestCPUStreamsExecutor", streams, threads/streams, IStreamsExecutor::ThreadBindingType::NONE}); }, + [] { + auto streams = getNumberOfLogicalCPUCores(true); + auto threads = parallel_get_max_threads(); + return std::make_shared(IStreamsExecutor::Config{"TestCPUStreamsExecutor", + streams, threads/streams, IStreamsExecutor::ThreadBindingType::NONE}); + }, + [] { + auto streams = getNumberOfLogicalCPUCores(false); + auto threads = parallel_get_max_threads(); + return std::make_shared(IStreamsExecutor::Config{"TestCPUStreamsExecutor", + streams, threads/streams, IStreamsExecutor::ThreadBindingType::NONE}); + }, [] { return std::make_shared(); } @@ -218,6 +230,18 @@ static auto AsyncExecutors = ::testing::Values( auto threads = parallel_get_max_threads(); return std::make_shared(IStreamsExecutor::Config{"TestCPUStreamsExecutor", streams, threads/streams, IStreamsExecutor::ThreadBindingType::NONE}); + }, + [] { + auto streams = getNumberOfLogicalCPUCores(true); + auto threads = parallel_get_max_threads(); + return std::make_shared(IStreamsExecutor::Config{"TestCPUStreamsExecutor", + streams, threads/streams, IStreamsExecutor::ThreadBindingType::NONE}); + }, + [] { + auto streams = getNumberOfLogicalCPUCores(false); + auto threads = parallel_get_max_threads(); + return std::make_shared(IStreamsExecutor::Config{"TestCPUStreamsExecutor", + streams, threads/streams, IStreamsExecutor::ThreadBindingType::NONE}); } ); diff --git a/inference-engine/tests/functional/inference_engine/transformations/const_folding_for_if.cpp b/inference-engine/tests/functional/inference_engine/transformations/const_folding_for_if.cpp new file mode 100644 index 00000000000000..d503b2f7825848 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/const_folding_for_if.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_common.hpp" +#include +#include "common_test_utils/ngraph_test_utils.hpp" + +#include +#include "ngraph/opsets/opset1.hpp" +#include "ngraph/opsets/opset5.hpp" +#include "ngraph/opsets/opset8.hpp" +#include + +using namespace testing; +using namespace std; +using namespace ngraph; + +TEST(TransformationTests, if_constant_folding) { + std::shared_ptr fun(nullptr); + { + auto cond = std::make_shared(element::boolean, Shape{ 1 }, false); + auto A1 = std::make_shared(element::f32, Shape{ 1 }, 37.0); + auto A2 = std::make_shared(element::f32, Shape{ 1 }, 45.0); + auto B1 = std::make_shared(element::f32, Shape{ 1 }, 10.0); + auto B2 = std::make_shared(element::f32, Shape{ 1 }, 3.0); + auto Xt = make_shared(element::f32, PartialShape::dynamic()); + auto Yt = make_shared(element::f32, PartialShape::dynamic()); + auto Xe = make_shared(element::f32, PartialShape::dynamic()); + auto Ye = make_shared(element::f32, PartialShape::dynamic()); + auto a_add = std::make_shared(Xt, Yt); + auto b_pow = std::make_shared(Xe, Ye); + auto then_res = std::make_shared(a_add); + auto then_body = make_shared(OutputVector{ then_res }, ParameterVector{ Xt, Yt }); + auto else_res = std::make_shared(b_pow); + auto else_body = make_shared(OutputVector{ else_res }, ParameterVector{ Xe, Ye }); + auto if_op = make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(A1, Xt, nullptr); + if_op->set_input(A2, Yt, nullptr); + if_op->set_input(B1, nullptr, Xe); + if_op->set_input(B2, nullptr, Ye); + auto if_res = if_op->set_output(then_res, else_res); + auto param_add = make_shared(element::f32, Shape{ 1 }); + auto add = make_shared(if_res, param_add); + auto add_res = make_shared(add); + fun = make_shared(OutputVector{ add_res }, ParameterVector{ param_add }); + ngraph::pass::ConstantFolding().run_on_function(fun); + } + std::shared_ptr f_ref(nullptr); + { + auto constant_folding_if = make_shared(element::f32, Shape{ 1 }, 1000.0f); + auto param_add = make_shared(element::f32, Shape{ 1 }); + auto add = make_shared(constant_folding_if, param_add); + auto add_res = make_shared(add); + f_ref = std::make_shared(ngraph::NodeVector{ add_res }, ngraph::ParameterVector{ param_add }); + } + + auto res = compare_functions(fun, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp index 3155580319da2f..1bb3a669a0cfd2 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp @@ -128,5 +128,53 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(std::vector({1, 3, 30, 30})), ::testing::Values(CommonTestUtils::DEVICE_CPU)), DeformableConvolutionLayerTest::getTestCaseName); - +/* ============= Multiple groups case ============= */ +INSTANTIATE_TEST_SUITE_P( + smoke_DeformableConvolution2D_MultipleGroups, DeformableConvolutionLayerTest, + ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(std::vector> {{1, 16, 2, 2}}), // offsets + ::testing::ValuesIn(std::vector> {{2, 2, 2, 2}}), // ker. + ::testing::ValuesIn(strides), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations), + ::testing::ValuesIn(std::vector {2}), // gr. + ::testing::ValuesIn(std::vector {2}), // def. gr. + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(with_bilinear_interpolation_pad), + ::testing::ValuesIn(with_modulated_scalar)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 4, 3, 3})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + DeformableConvolutionLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_DeformableConvolution2D_MultipleGroups_2, DeformableConvolutionLayerTest, + ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(std::vector> {{1, 18, 66, 66}}), // offsets + ::testing::ValuesIn(std::vector> {{4, 2, 3, 3}}), // ker. + ::testing::ValuesIn(strides), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations), + ::testing::ValuesIn(std::vector {4}), // gr. + ::testing::ValuesIn(std::vector {1}), // def. gr. + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(with_bilinear_interpolation_pad), + ::testing::ValuesIn(with_modulated_scalar)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({1, 8, 68, 68})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + DeformableConvolutionLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/scatter_update.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/scatter_update.cpp index 31362e0b2646b2..855adb4112e517 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/scatter_update.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/scatter_update.cpp @@ -25,6 +25,9 @@ const std::vector idxPrecisions = { // map> std::map, std::map, std::vector>> axesShapeInShape { + {{10, 16, 12, 15}, {{{2, 2, 2}, {0, 1, 2, 3}}, {{2, 4}, {0, 1, 2, 3}}, {{8}, {0, 1, 2, 3}}}}, + {{10, 9, 10, 9, 10}, {{{8}, {0, 1, 2, 3, 4}}, {{4, 2}, {0, 1, 2, 3, 4}}}}, + {{10, 9, 10, 9, 10, 12}, {{{8}, {0, 1, 2, 3, 4, 5}}}}, {{10, 16, 12, 15}, {{{2, 4}, {0, 1, 2, 3}}, {{8}, {-1, -2, -3, -4}}}}, {{10, 9, 10, 9, 10}, {{{8}, {-3, -1, 0, 2, 4}}, {{4, 2}, {-2, 2}}}}, }; @@ -43,4 +46,4 @@ const auto ScatterUpdateCase = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_ScatterUpdate, ScatterUpdateLayerTest, ScatterUpdateCase, ScatterUpdateLayerTest::getTestCaseName); -} // namespace +} // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 81264f99b95d05..e5670b491c04d4 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -111,6 +111,8 @@ std::vector disabledTestPatterns() { // Issue 66685 R"(smoke_PrePostProcess.*resize_linear_nhwc.*)", + // Issue 67214 + R"(smoke_PrePostProcess.*resize_and_convert_layout_i8.*)", }; #define FIX_62820 0 diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/deformable_convolution.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/deformable_convolution.cpp new file mode 100644 index 00000000000000..5b7ea755f5b6b4 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/deformable_convolution.cpp @@ -0,0 +1,357 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils/cpu_test_utils.hpp" + +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { +enum OffsetType {ZERO, NATURAL, REAL_POSITIVE, REAL_NEGATIVE, REAL_MISC}; + +typedef std::tuple< + bool, // with_bilinear_interpolation_pad + bool, // with_modulation + OffsetType // type of def. offsets + > DefConvSpecificParams; + +typedef std::tuple< + size_t, // batches + std::vector, // input spatial shape + std::vector, // offsets spatial shape + std::vector, // kernel spatial shape + ngraph::op::PadType, // pad. type + std::vector, // pad. begin + std::vector, // pad. end + std::vector, // strides + std::vector // dilations + > SpatialParams; + +typedef std::tuple< + size_t, // groups + size_t, // deformable groups + size_t, // input channels per group + size_t // output channels per group + > ChannelParams; + +typedef std::tuple< + SpatialParams, + ChannelParams, + DefConvSpecificParams, + InferenceEngine::Precision, // Net precision + LayerTestsUtils::TargetDevice // Device name + > DefConvLayerTestParams; + +typedef std::tuple< + CPULayerTestsDefinitions::DefConvLayerTestParams, + CPUSpecificParams> DefConvLayerCPUTestParamsSet; + +class DefConvLayerCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase { +public: + OffsetType offsetType; + static std::string getTestCaseName(testing::TestParamInfo obj) { + CPULayerTestsDefinitions::DefConvLayerTestParams basicParamsSet; + std::string td; + Precision netPr; + InferenceEngine::Precision inPrc, outPrc; + ChannelParams chParams; + SpatialParams spParams; + CPULayerTestsDefinitions::DefConvSpecificParams dcSpecificParams; + CPUSpecificParams cpuParams; + std::tie(basicParamsSet, cpuParams) = obj.param; + std::tie(spParams, chParams, dcSpecificParams, netPr, td) = basicParamsSet; + inPrc = outPrc = netPr; + ngraph::op::PadType padType; + size_t batch; + InferenceEngine::SizeVector offsets, filter, stride, dilation; + std::vector padBegin, padEnd; + InferenceEngine::SizeVector inSpatShape, offSpatShape, kerSpatShape; + std::tie(batch, inSpatShape, offSpatShape, kerSpatShape, + padType, padBegin, padEnd, stride, dilation) = spParams; + size_t groups, deformableGroups, inGrCh, outGrCh; + std::tie(groups, deformableGroups, inGrCh, outGrCh) = chParams; + bool withBilinearInterpolationPad, withModulation; + OffsetType offType; + std::tie(withBilinearInterpolationPad, withModulation, offType) = dcSpecificParams; + std::ostringstream result; + result << "DefConvTest("; + result << std::to_string(obj.index) << ")_"; + result << "IS=(" << batch << "_" << groups * inGrCh << "_" << inSpatShape[0] << "_" << inSpatShape[1] << ")_"; + result << "OS=(" << batch << "_" << groups * outGrCh << "_" << offSpatShape[0] << "_" << offSpatShape[1] << ")_"; + result << "K" << CommonTestUtils::vec2str(kerSpatShape) << "_"; + result << "S" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "O=" << groups * outGrCh << "_"; + result << "AP=" << padType << "_"; + result << "netPRC=" << netPr.name() << "_"; + result << "inPRC=" << inPrc.name() << "_"; + result << "outPRC=" << outPrc.name() << "_"; + result << "withBilPad=" << withBilinearInterpolationPad << "_"; + result << "withMod=" << withModulation << "_"; + result << "offsetType=" << offType << "_"; + result << "trgDev=" << td; + result << CPUTestsBase::getTestCaseName(cpuParams); + return result.str(); + } +protected: + void GenerateInputs() override { + for (const auto &input : cnnNetwork.getInputsInfo()) { + const auto info = input.second.get(); + const auto &name = info->name(); + InferenceEngine::Blob::Ptr blob; + if (name == "a_data") { + blob = GenerateInput(*info); + } else if (name == "b_offset_vals") { + if (offsetType == OffsetType::NATURAL) { + blob = FuncTestUtils::createAndFillBlobFloat(info->getTensorDesc(), 10, 0, 1); + } else if (offsetType == OffsetType::ZERO) { + blob = FuncTestUtils::createAndFillBlobFloat(info->getTensorDesc(), 0, 1, 1); + } else if (offsetType == OffsetType::REAL_POSITIVE) { + blob = FuncTestUtils::createAndFillBlobFloat(info->getTensorDesc(), 2, 0, 100); + } else if (offsetType == OffsetType::REAL_NEGATIVE) { + blob = FuncTestUtils::createAndFillBlobFloat(info->getTensorDesc(), 2, -2, 100); + } else if (offsetType == OffsetType::REAL_MISC) { + blob = FuncTestUtils::createAndFillBlobFloat(info->getTensorDesc(), 4, -2, 100); + } else { + IE_THROW() << "Unexpected offset type"; + } + } else if (name == "c_filter_vals") { + blob = GenerateInput(*info); + } else if (name == "c_modulation_scalars") { + blob = FuncTestUtils::createAndFillBlobFloat(info->getTensorDesc(), 1, 0, 100); + } else { + IE_THROW() << "Unknown input of DeformableConvolution"; + } + inputs.push_back(blob); + } + } + void SetUp() override { + ChannelParams chParams; + SpatialParams spParams; + CPULayerTestsDefinitions::DefConvSpecificParams dcSpecificParams; + + std::vector inShape; + InferenceEngine::Precision netPrecision; + CPULayerTestsDefinitions::DefConvLayerTestParams basicParamsSet; + CPUSpecificParams cpuParams; + std::tie(basicParamsSet, cpuParams) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + + std::tie(spParams, chParams, dcSpecificParams, netPrecision, targetDevice) = basicParamsSet; + + inPrc = outPrc = netPrecision; + inLayout = outLayout = InferenceEngine::Layout::ANY; + ngraph::op::PadType padType; + size_t batch; + InferenceEngine::SizeVector offsets, filter, stride, dilation; + std::vector padBegin, padEnd; + InferenceEngine::SizeVector inSpatShape, offSpatShape, kerSpatShape; + std::tie(batch, inSpatShape, offSpatShape, kerSpatShape, + padType, padBegin, padEnd, stride, dilation) = spParams; + + size_t groups, deformableGroups, inGrCh, outGrCh; + std::tie(groups, deformableGroups, inGrCh, outGrCh) = chParams; + bool withBilinearInterpolationPad, withModulation; + std::tie(withBilinearInterpolationPad, withModulation, offsetType) = dcSpecificParams; + + inShape = std::vector({batch, groups * inGrCh, inSpatShape[0], inSpatShape[1]}); + offsets = std::vector {batch, deformableGroups * kerSpatShape[0] * kerSpatShape[1] * 2, + offSpatShape[0], offSpatShape[1]}; + filter = std::vector {groups * outGrCh, inGrCh, kerSpatShape[0], kerSpatShape[1]}; + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inShape, offsets, filter}); + auto paramOuts = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(params)); + auto data = std::make_shared(ngPrc, ngraph::Shape(inShape)); + data->set_friendly_name("a_data"); + auto offset_vals = std::make_shared(ngPrc, ngraph::Shape(offsets)); + offset_vals->set_friendly_name("b_offset_vals"); + auto filter_vals = std::make_shared(ngPrc, ngraph::Shape(filter)); + filter_vals->set_friendly_name("c_filter_vals"); + ngraph::ParameterVector parameters{data, offset_vals, filter_vals}; + std::shared_ptr deformable_conv; + if (withModulation) { + auto modulation_shape = ngraph::Shape(offsets); + modulation_shape[1] = offsets[1] / 2; + auto modulation_scalars = std::make_shared(ngPrc, modulation_shape); + modulation_scalars->set_friendly_name("c_modulation_scalars"); + + deformable_conv = std::make_shared(data, offset_vals, filter_vals, modulation_scalars, stride, padBegin, + padEnd, dilation, padType, groups, deformableGroups, + withBilinearInterpolationPad); + parameters.push_back(modulation_scalars); + } else { + deformable_conv = std::make_shared(data, offset_vals, filter_vals, stride, padBegin, padEnd, dilation, + padType, groups, deformableGroups, withBilinearInterpolationPad); + } + + function = makeNgraphFunction(ngPrc, parameters, deformable_conv, "deformable_convolution"); + } +}; + +TEST_P(DefConvLayerCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + Run(); + CheckPluginRelatedResults(executableNetwork, "DeformableConvolution"); +} + +namespace { + +/* CPU PARAMS */ +std::vector filterCPUInfoForDevice(bool enforceRef = false) { + std::vector resCPUParams; + if (enforceRef) { + resCPUParams.push_back(CPUSpecificParams{{}, {}, {}, {"ref_FP32"}}); + } else if (with_cpu_x86_avx512f()) { + resCPUParams.push_back(CPUSpecificParams{{}, {}, {}, {"jit_avx512_FP32"}}); + } else if (with_cpu_x86_avx2()) { + resCPUParams.push_back(CPUSpecificParams{{}, {}, {}, {"jit_avx2_FP32"}}); + } else if (with_cpu_x86_sse42()) { + resCPUParams.push_back(CPUSpecificParams{{}, {}, {}, {"jit_sse42"}}); + } else { + resCPUParams.push_back(CPUSpecificParams{{}, {}, {}, {"ref_FP32"}}); + } + return resCPUParams; +} + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 +}; + +const auto defConvSpecificParams = ::testing::Combine( + ::testing::ValuesIn(std::vector { + true, + false + }), // with_bilinear_interpolation_pad + ::testing::ValuesIn(std::vector { + true, + false + }), // with_modulation + ::testing::ValuesIn(std::vector { + OffsetType::NATURAL, + OffsetType::ZERO, + OffsetType::REAL_MISC, + OffsetType::REAL_POSITIVE, + OffsetType::REAL_NEGATIVE + }) // offset type +); + +std::vector padTypes = { + ngraph::op::PadType::EXPLICIT, + ngraph::op::PadType::VALID +}; + +const auto spParams1 = ::testing::Combine( + ::testing::Values(1), // batch + ::testing::Values(std::vector({68, 68})), // in. spat. shape + ::testing::Values(std::vector({66, 66})), // off. spat. shape + ::testing::Values(std::vector({3, 3})), // ker. spat. shape + ::testing::ValuesIn(padTypes), // pad. type + ::testing::Values(std::vector({0, 0})), // pad. begin + ::testing::Values(std::vector({0, 0})), // pad. end + ::testing::Values(std::vector {1, 1}), // strides + ::testing::Values(std::vector {1, 1}) // dilations +); + +const auto spParams2 = ::testing::Combine( + ::testing::Values(1), // batch + ::testing::Values(std::vector({3, 3})), // in. spat. shape + ::testing::Values(std::vector({2, 2})), // off. spat. shape + ::testing::Values(std::vector({2, 2})), // ker. spat. shape + ::testing::ValuesIn(padTypes), // pad. type + ::testing::Values(std::vector({0, 0})), // pad. begin + ::testing::Values(std::vector({0, 0})), // pad. end + ::testing::Values(std::vector {1, 1}), // strides + ::testing::Values(std::vector {1, 1}) // dilations +); + +const auto spParams3 = ::testing::Combine( + ::testing::Values(1), // batch + ::testing::Values(std::vector({5, 5})), // in. spat. shape + ::testing::Values(std::vector({4, 4})), // off. spat. shape + ::testing::Values(std::vector({2, 2})), // ker. spat. shape + ::testing::ValuesIn(padTypes), // pad. type + ::testing::Values(std::vector({0, 0})), // pad. begin + ::testing::Values(std::vector({0, 0})), // pad. end + ::testing::Values(std::vector {1, 1}), // strides + ::testing::Values(std::vector {1, 1}) // dilations +); +const auto spParams4 = ::testing::Combine( + ::testing::Values(1), // batch + ::testing::Values(std::vector({3, 2})), // in. spat. shape + ::testing::Values(std::vector({2, 1})), // off. spat. shape + ::testing::Values(std::vector({2, 2})), // ker. spat. shape + ::testing::ValuesIn(padTypes), // pad. type + ::testing::Values(std::vector({0, 0})), // pad. begin + ::testing::Values(std::vector({0, 0})), // pad. end + ::testing::Values(std::vector {1, 1}), // strides + ::testing::Values(std::vector {1, 1}) // dilations +); + +const auto chParamsSingleGr = ::testing::Combine( + ::testing::ValuesIn(std::vector {1}), // gr. 1 + ::testing::ValuesIn(std::vector {1, 2}), // def. gr. 1,2 + ::testing::ValuesIn(std::vector {16, 32}), // in. ch. per gr. + ::testing::ValuesIn(std::vector {16, 32})); // out. ch. per gr. + +const auto chParamsMulGr = ::testing::Combine( + ::testing::ValuesIn(std::vector {2, 4}), // gr. 2,4 + ::testing::ValuesIn(std::vector {1, 2}), // def. gr. 1,2 + ::testing::ValuesIn(std::vector {3, 7}), // in. ch. per gr. + ::testing::ValuesIn(std::vector {3, 7})); // out. ch. per gr. + +const auto params1 = ::testing::Combine( + ::testing::Combine( + spParams1, + chParamsSingleGr, + defConvSpecificParams, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice())); +const auto params2 = ::testing::Combine( + ::testing::Combine( + spParams2, + chParamsSingleGr, + defConvSpecificParams, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice())); +const auto params3 = ::testing::Combine( + ::testing::Combine( + spParams3, + chParamsSingleGr, + defConvSpecificParams, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice())); +const auto params4 = ::testing::Combine( + ::testing::Combine( + spParams4, + chParamsSingleGr, + defConvSpecificParams, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice())); +const auto params5 = ::testing::Combine( + ::testing::Combine( + spParams4, + chParamsMulGr, + defConvSpecificParams, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice(true))); +INSTANTIATE_TEST_SUITE_P(smoke_DefConvLayoutTest1, DefConvLayerCPUTest, params1, DefConvLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_DefConvLayoutTest2, DefConvLayerCPUTest, params2, DefConvLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_DefConvLayoutTest3, DefConvLayerCPUTest, params3, DefConvLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_DefConvLayoutTest4, DefConvLayerCPUTest, params4, DefConvLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_DefConvLayoutTest5, DefConvLayerCPUTest, params5, DefConvLayerCPUTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp index 4966f3e2a0b03b..d65be3b3b62093 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "shared_test_classes/single_layer/mat_mul.hpp" #include "test_utils/fusing_test_utils.hpp" #include "ngraph_functions/builders.hpp" @@ -18,42 +18,24 @@ enum class MatMulNodeType { FullyConnected }; -using MatMulLayerTestParams = std::tuple, - Precision, - helpers::InputLayerType, - bool, - bool>; - -using MatMulLayerCPUTestParamSet = std::tuple; class MatMulLayerCPUTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing { public: - static std::string getTestCaseName(testing::TestParamInfo obj) { - MatMulLayerTestParams basicParamsSet; - fusingSpecificParams fusingParams; + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + MatMulLayerTestParamsSet basicParamsSet; MatMulNodeType nodeType; - std::tie(basicParamsSet, nodeType, fusingParams) = obj.param; + fusingSpecificParams fusingParams; - std::pair IS; - SizeVector isA, isB; - bool transpA, transpB; - Precision prec; - helpers::InputLayerType typeB; - std::tie(IS, prec, typeB, transpA, transpB) = basicParamsSet; - isA = IS.first; isB = IS.second; + std::tie(basicParamsSet, nodeType, fusingParams) = obj.param; std::ostringstream result; result << (nodeType == MatMulNodeType::MatMul ? "MatMul_" : "FullyConnected_"); - result << "IS_A=" << CommonTestUtils::vec2str(isA) << "_"; - result << "IS_B=" << CommonTestUtils::vec2str(isB) << "_"; - result << "Transp_A=" << transpA << "_"; - result << "Transp_B=" << transpB << "_"; - result << "Prec=" << prec << "_"; - result << "typeB=" << typeB; - + result << LayerTestsDefinitions::MatMulTest::getTestCaseName( + testing::TestParamInfo(basicParamsSet, 0)); result << CpuTestWithFusing::getTestCaseName(fusingParams); return result.str(); @@ -63,36 +45,52 @@ class MatMulLayerCPUTest : public testing::WithParamInterfaceGetParam(); - std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + ShapeRelatedParams shapeRelatedParams; + Precision netPrecision; + helpers::InputLayerType secondaryInputType; + std::map additionalConfig; + + std::tie(shapeRelatedParams, netPrecision, inPrc, outPrc, inLayout, secondaryInputType, targetDevice, additionalConfig) = basicParamsSet; + + SizeVector inShapeA = shapeRelatedParams.input1.first; + SizeVector inShapeB = shapeRelatedParams.input2.first; + bool transpA = shapeRelatedParams.input1.second; + bool transpB = shapeRelatedParams.input2.second; + + /* @todo + * Currently nodes are not fused thought Reshape + * Check can be deleted after this limitation is gone + */ + if (nodeType == MatMulNodeType::MatMul && inShapeA.size() < 4 && inShapeB.size() < 4) + std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) + inPrc = outPrc = netPrecision = Precision::BF16; + else + inPrc = outPrc = netPrecision; cpuNodeType = nodeType == MatMulNodeType::MatMul ? "MatMul" : "FullyConnected"; - std::pair IS; - SizeVector isA, isB; - bool transpA, transpB; - Precision prec; - helpers::InputLayerType typeB; - std::tie(IS, prec, typeB, transpA, transpB) = basicParamsSet; - - isA = IS.first; isB = IS.second; - if (transpA) { - IE_ASSERT(isA.size() > 1); - std::swap(*(isA.end() - 1), *(isA.end() - 2)); - } - if (transpB) { - IE_ASSERT(isB.size() > 1); - std::swap(*(isB.end() - 1), *(isB.end() - 2)); - } + auto transpose = [](SizeVector& shape) { + IE_ASSERT(shape.size() > 1); + std::swap(*(shape.end() - 1), *(shape.end() - 2)); + }; + + if (transpA) transpose(inShapeA); + if (transpB) transpose(inShapeB); - auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(prec); - auto params = builder::makeParams(ngPrec, {isA}); - auto matrixB = builder::makeInputLayer(ngPrec, typeB, isB); - if (typeB == helpers::InputLayerType::PARAMETER) { + auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = builder::makeParams(ngPrec, {inShapeA}); + auto matrixB = builder::makeInputLayer(ngPrec, secondaryInputType, inShapeB); + if (secondaryInputType == helpers::InputLayerType::PARAMETER) { params.push_back(std::dynamic_pointer_cast(matrixB)); } auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); @@ -117,6 +115,16 @@ const std::vector transpose = { true, false }; +std::vector> additionalConfig { + std::map{/* empty config */}, + {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}} +}; + +const std::vector netPRCs { + Precision::FP32, + Precision::BF16 +}; + /* ============= FullyConnected ============= */ namespace fullyConnected { @@ -126,11 +134,26 @@ const auto fusingBiasFC = fusingSpecificParams{std::make_shared(st return std::make_shared(inpNode, bias); }, "fusingBiasFC"}}), {"Add"}}; -const std::vector> IS2D = { - {{59, 1}, {1, 120}}, - {{59, 120}, {120, 1}}, - {{1, 120}, {120, 59}}, - {{71, 128}, {128, 20}} +const std::vector IS2D { + {{{59, 1}, false}, {{1, 120}, false}}, + {{{59, 1}, true}, {{1, 120}, false}}, + {{{59, 1}, false}, {{1, 120}, true}}, + {{{59, 1}, true}, {{1, 120}, true}}, + + {{{59, 120}, false}, {{120, 1}, false}}, + {{{59, 120}, true}, {{120, 1}, false}}, + {{{59, 120}, false}, {{120, 1}, true}}, + {{{59, 120}, true}, {{120, 1}, true}}, + + {{{1, 120}, false}, {{120, 59}, false}}, + {{{1, 120}, true}, {{120, 59}, false}}, + {{{1, 120}, false}, {{120, 59}, true}}, + {{{1, 120}, true}, {{120, 59}, true}}, + + {{{71, 128}, false}, {{128, 20}, false}}, + {{{71, 128}, true}, {{128, 20}, false}}, + {{{71, 128}, false}, {{128, 20}, true}}, + {{{71, 128}, true}, {{128, 20}, true}}, }; std::vector fusingParamsSet2D { @@ -142,20 +165,30 @@ std::vector fusingParamsSet2D { }; const auto fullyConnectedParams2D = ::testing::Combine(::testing::ValuesIn(IS2D), - ::testing::Values(Precision::FP32), + ::testing::ValuesIn(netPRCs), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Layout::ANY), ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::ValuesIn(transpose), - ::testing::ValuesIn(transpose)); + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig)); const auto testParams2D = ::testing::Combine(fullyConnectedParams2D, ::testing::Values(MatMulNodeType::FullyConnected), ::testing::ValuesIn(fusingParamsSet2D)); -INSTANTIATE_TEST_SUITE_P(smoke_Check_2D, MatMulLayerCPUTest, testParams2D, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulLayerCPUTest, testParams2D, MatMulLayerCPUTest::getTestCaseName); + +const std::vector IS3D = { + {{{1, 32, 120}, false}, {{120, 5}, false}}, + {{{1, 32, 120}, true}, {{120, 5}, false}}, + {{{1, 32, 120}, false}, {{120, 5}, true}}, + {{{1, 32, 120}, true}, {{120, 5}, true}}, -const std::vector> IS3D = { - {{1, 32, 120}, {120, 5}}, - {{7, 32, 120}, {120, 50}} + {{{7, 32, 120}, false}, {{120, 50}, false}}, + {{{7, 32, 120}, true}, {{120, 50}, false}}, + {{{7, 32, 120}, false}, {{120, 50}, true}}, + {{{7, 32, 120}, true}, {{120, 50}, true}}, }; std::vector fusingParamsSet3D { @@ -164,43 +197,69 @@ std::vector fusingParamsSet3D { }; const auto fullyConnectedParams3D = ::testing::Combine(::testing::ValuesIn(IS3D), - ::testing::Values(Precision::FP32), + ::testing::ValuesIn(netPRCs), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Layout::ANY), ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::ValuesIn(transpose), - ::testing::ValuesIn(transpose)); + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig)); const auto testParams3D = ::testing::Combine(fullyConnectedParams3D, ::testing::Values(MatMulNodeType::FullyConnected), ::testing::ValuesIn(fusingParamsSet3D)); -INSTANTIATE_TEST_SUITE_P(smoke_Check_3D, MatMulLayerCPUTest, testParams3D, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulLayerCPUTest, testParams3D, MatMulLayerCPUTest::getTestCaseName); }; // namespace fullyConnected -/* ============= Gemm ============= */ -namespace gemm { +/* ============= MatMul ============= */ +namespace matmul { -const std::vector> IS = { - {{1, 2, 32, 120}, {120, 5}}, - {{7, 32, 120}, {3, 7, 120, 50}}, - {{10, 10, 10}, {10, 10, 10}}, - {{55, 12}, {12, 55}} +const std::vector IS = { + {{{1, 2, 32, 120}, false}, {{120, 5}, false}}, + {{{1, 2, 32, 120}, true}, {{120, 5}, false}}, + {{{1, 2, 32, 120}, false}, {{120, 5}, true}}, + {{{1, 2, 32, 120}, true}, {{120, 5}, true}}, + + {{{7, 32, 120}, false}, {{3, 7, 120, 50}, false}}, + {{{7, 32, 120}, true}, {{3, 7, 120, 50}, false}}, + {{{7, 32, 120}, false}, {{3, 7, 120, 50}, true}}, + {{{7, 32, 120}, true}, {{3, 7, 120, 50}, true}}, + + {{{10, 10, 10}, false}, {{10, 10, 10}, false}}, + {{{10, 10, 10}, true}, {{10, 10, 10}, false}}, + {{{10, 10, 10}, false}, {{10, 10, 10}, true}}, + {{{10, 10, 10}, true}, {{10, 10, 10}, true}}, + + {{{55, 12}, false}, {{12, 55}, false}}, + {{{55, 12}, true}, {{12, 55}, false}}, + {{{55, 12}, false}, {{12, 55}, true}}, + {{{55, 12}, true}, {{12, 55}, true}}, +}; + +std::vector matmulFusingParams { + emptyFusingSpec, + fusingElu, }; -const auto gemmParams = ::testing::Combine(::testing::ValuesIn(IS), - ::testing::Values(Precision::FP32), - ::testing::Values(helpers::InputLayerType::PARAMETER), - ::testing::ValuesIn(transpose), - ::testing::ValuesIn(transpose)); +const auto matMulParams = ::testing::Combine(::testing::ValuesIn(IS), + ::testing::ValuesIn(netPRCs), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Precision::UNSPECIFIED), + ::testing::Values(Layout::ANY), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig)); -const auto testParams = ::testing::Combine(gemmParams, +const auto testParams = ::testing::Combine(matMulParams, ::testing::Values(MatMulNodeType::MatMul), - ::testing::Values(emptyFusingSpec)); + ::testing::ValuesIn(matmulFusingParams)); -INSTANTIATE_TEST_SUITE_P(smoke_Check, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_MM, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName); -}; // namespace gemm +}; // namespace matmul } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/align_mamtul_input_ranks.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/align_mamtul_input_ranks.cpp new file mode 100644 index 00000000000000..fdffeb99b4b2de --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/align_mamtul_input_ranks.cpp @@ -0,0 +1,101 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils/cpu_test_utils.hpp" +#include "test_utils/fusing_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "common_test_utils/common_utils.hpp" + +#include +#include + +using namespace ngraph; +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace SubgraphTestsDefinitions { + +using AlignMatMulInputRanksTestParams = std::tuple, // IS fully connected + fusingSpecificParams>; + +class AlignMatMulInputRanksTest : public testing::WithParamInterface, public CpuTestWithFusing, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::pair supportedInputShapes; + fusingSpecificParams fusingParams; + std::tie(supportedInputShapes, fusingParams) = obj.param; + SizeVector inputShapeA = supportedInputShapes.first; SizeVector inputShapeB = supportedInputShapes.second; + + std::ostringstream result; + result << "IS_A=" << CommonTestUtils::vec2str(inputShapeA) << "_"; + result << "IS_B=" << CommonTestUtils::vec2str(inputShapeB) << "_"; + result << CpuTestWithFusing::getTestCaseName(fusingParams); + + return result.str(); + } + +protected: + void SetUp() override { + targetDevice = CommonTestUtils::DEVICE_CPU; + std::pair inShapes; + fusingSpecificParams fusingParams; + std::tie(inShapes, fusingParams) = this->GetParam(); + + if (inShapes.first.size() != inShapes.second.size()) + expectedNumOfReshapes++; // one input will be unsqueezed + if (inShapes.first.size() == 1 || inShapes.second.size() == 1) + expectedNumOfReshapes++; // output will be squeezed + if (inShapes.first.size() == 1 && inShapes.second.size() == 1) + expectedNumOfReshapes+=2; // both inputs unsqueezed and output squeezed + + if (inShapes.first.size() != 1 && inShapes.second.size() != 1) // no fusing through Reshape after output + std::tie(postOpMgrPtr, fusedOps) = fusingParams; + + const auto ngPrec = element::f32; + auto inputParams = builder::makeParams(ngPrec, {inShapes.first, inShapes.second}); + const auto outputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes(inputParams)); + const auto matMul = builder::makeMatMul(outputNodes[0], outputNodes[1], false, false); + + function = makeNgraphFunction(ngPrec, inputParams, matMul, "AlignMatMulInputRanks"); + } + + int expectedNumOfReshapes = 0; +}; + +TEST_P(AlignMatMulInputRanksTest, supportedInputShapes) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckNodeOfTypeCount(executableNetwork, "Reshape", expectedNumOfReshapes); // Squeeze / Unsqueeze turns into Reshape + CheckFusingResults(executableNetwork, "MatMul"); +} + +namespace { + +const std::vector> supportedInputShapes = { + {{4, 10, 5}, {1, 5, 10}}, // nothing to be done + {{3}, {3}}, // 3x1 * 1x3 -> 1 + {{18}, {1, 5, 18, 20}}, // 1x1x1x18 * 1x5x18x20 -> 1x5x20 + {{2, 3, 4, 4, 4, 10, 5}, {5}}, // 2x3x4x4x4x10x5 * 1x1x1x1x1x5x1 -> 1x1x1x1x1x5 + {{1, 18}, {1, 5, 18, 20}}, + {{1, 70, 18}, {1, 5, 18, 20}}, + {{7, 1, 10, 3, 2, 7}, {1, 7, 5}}, + {{2, 3, 4, 4, 4, 10, 5}, {5, 20}}, +}; + +// verify fusing just in case +std::vector fusingParamsSet { + emptyFusingSpec, + fusingElu, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Check, AlignMatMulInputRanksTest, + ::testing::Combine(::testing::ValuesIn(supportedInputShapes), + ::testing::ValuesIn(fusingParamsSet)), + AlignMatMulInputRanksTest::getTestCaseName); + +} // namespace + +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/gna/scale_factors_tests/weighable_layer_without_fq.cpp b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/weighable_layer_without_fq.cpp new file mode 100644 index 00000000000000..5bf20670c8415c --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/weighable_layer_without_fq.cpp @@ -0,0 +1,111 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +namespace SubgraphTestsDefinitions { + +typedef std::tuple< + InferenceEngine::Precision, // Net precision + std::vector, // Input shape; + std::vector, // Constant shape; + LayerTestsUtils::TargetDevice, // Device name + std::map // Additional backend configuration and alis name to it +> WeighableLayerWithoutFqParamsSet; + +/* + * This test emulates cases in which the ConcatAlignFilter layer is created and the model has FakeQuantize layers. + */ +class WeighableLayerWithoutFqTest : + public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map config; + std::vector inputShape; + std::vector constantShape; + std::tie(netPrecision, constantShape, inputShape, targetDevice, config) = obj.param; + + std::ostringstream result; + result << "netPRC=" << netPrecision.name() << "_"; + result << "trgDev=" << targetDevice; + for (auto const& configItem : config) { + result << "_configItem=" << configItem.first << "_" << configItem.second; + } + return result.str(); + } + +protected: + void SetUp() override { + std::map config; + InferenceEngine::Precision netPrecision; + std::vector inputShape; + std::vector constantShape; + std::tie(netPrecision, constantShape, inputShape, targetDevice, config) = this->GetParam(); + configuration.insert(config.begin(), config.end()); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto fq1 = std::make_shared( + params[0], + ngraph::opset8::Constant::create(ngraph::element::f32, {1}, {1.}), + ngraph::opset8::Constant::create(ngraph::element::f32, {1}, {1.}), + ngraph::opset8::Constant::create(ngraph::element::f32, {1}, {1.}), + ngraph::opset8::Constant::create(ngraph::element::f32, {1}, {1.}), + 255); + auto constant = ngraph::builder::makeConstant(ngPrc, constantShape, std::vector{}, true); + auto fq2 = std::make_shared( + constant, + ngraph::opset8::Constant::create(ngraph::element::f32, {1}, {1}), + ngraph::opset8::Constant::create(ngraph::element::f32, {1}, {1.}), + ngraph::opset8::Constant::create(ngraph::element::f32, {1}, {1.}), + ngraph::opset8::Constant::create(ngraph::element::f32, {1}, {1.}), + 255); + auto concat = ngraph::builder::makeConcat({fq1, fq2}, 0); + function = std::make_shared(concat, params, "WeighableLayerWithoutFq"); + } +}; // class WeighableLayerWithoutFqTest + +TEST_P(WeighableLayerWithoutFqTest, CompareWithRefs) { + Run(); +} + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, +}; + +const std::vector> inputShapes = { + {{1, 5}} +}; + +const std::vector> constantShapes = { + {{16, 5}} +}; + +const std::vector> configs = { + {{"GNA_DEVICE_MODE", "GNA_SW_FP32"}}, + {{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_WeighableLayerWithoutFqTest, WeighableLayerWithoutFqTest, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(constantShapes), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs)), + WeighableLayerWithoutFqTest::getTestCaseName); +} // namespace +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 5ce91823b725b3..987c9344b94b08 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -71,6 +71,8 @@ std::vector disabledTestPatterns() { // TODO: until issue is xxx-59670 is resolved R"(.*Gather8LayerTest.*)", // TODO: Issue 66516 - R"(.*smoke_PrePostProcess_GPU.*convert_element_type_and_mean.*)" + R"(.*smoke_PrePostProcess_GPU.*convert_element_type_and_mean.*)", + // TODO: Issue 67408 + R"(.*smoke_LSTMSequenceCommonClip.*LSTMSequenceTest.*CompareWithRefs.*)", }; -} \ No newline at end of file +} diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/preprocess.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/preprocess.cpp index 78e37cee17a5d8..74584005c8a69f 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/preprocess.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/preprocess.cpp @@ -30,6 +30,7 @@ void PrePostProcessTest::SetUp() { std::tie(func, targetDevice) = GetParam(); function = (std::get<0>(func))(); threshold = std::get<2>(func); + abs_threshold = std::get<2>(func); } TEST_P(PrePostProcessTest, CompareWithRefs) { diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/preprocess/preprocess_builders.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/preprocess/preprocess_builders.hpp index 0ec6abc7c5c16c..96521f7d2c20d9 100644 --- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/preprocess/preprocess_builders.hpp +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/preprocess/preprocess_builders.hpp @@ -270,6 +270,63 @@ inline std::shared_ptr resize_and_convert_layout() { return function; } +inline std::shared_ptr resize_and_convert_layout_i8() { + using namespace ov::preprocess; + auto function = create_preprocess_1input(element::i8, PartialShape{1, 30, 20, 3}); + function = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo() + .set_layout("NHWC") + .set_spatial_static_shape(40, 30)) + .preprocess(PreProcessSteps() + .convert_layout() + .resize(ResizeAlgorithm::RESIZE_LINEAR)) + .network(InputNetworkInfo().set_layout("NCHW"))) + .build(function); + return function; +} + +inline std::shared_ptr cvt_color_nv12_to_rgb_single_plane() { + using namespace ov::preprocess; + auto function = create_preprocess_1input(element::f32, PartialShape{1, 20, 20, 3}); + function = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_SINGLE_PLANE)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))) + .build(function); + return function; +} + +inline std::shared_ptr cvt_color_nv12_to_bgr_two_planes() { + using namespace ov::preprocess; + auto function = create_preprocess_1input(element::f32, PartialShape{1, 20, 20, 3}); + function = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))) + .build(function); + return function; +} + +inline std::shared_ptr cvt_color_nv12_cvt_layout_resize() { + using namespace ov::preprocess; + auto function = create_preprocess_1input(element::f32, PartialShape{1, 3, 10, 10}); + function = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo() + .set_color_format(ColorFormat::NV12_TWO_PLANES) + .set_element_type(element::u8) + .set_spatial_static_shape(20, 20)) + .preprocess(PreProcessSteps() + .convert_color(ColorFormat::RGB) + .convert_layout() + .convert_element_type(element::f32) + .resize(ResizeAlgorithm::RESIZE_LINEAR)) + .network(InputNetworkInfo().set_layout("NCHW"))) + .build(function); + return function; +} + inline std::vector generic_preprocess_functions() { return std::vector { preprocess_func(mean_only, "mean_only", 0.01f), @@ -290,6 +347,10 @@ inline std::vector generic_preprocess_functions() { preprocess_func(resize_linear_nhwc, "resize_linear_nhwc", 0.01f), preprocess_func(resize_cubic, "resize_cubic", 0.01f), preprocess_func(resize_and_convert_layout, "resize_and_convert_layout", 0.01f), + preprocess_func(resize_and_convert_layout_i8, "resize_and_convert_layout_i8", 0.01f), + preprocess_func(cvt_color_nv12_to_rgb_single_plane, "cvt_color_nv12_to_rgb_single_plane", 2.f), + preprocess_func(cvt_color_nv12_to_bgr_two_planes, "cvt_color_nv12_to_bgr_two_planes", 2.f), + preprocess_func(cvt_color_nv12_cvt_layout_resize, "cvt_color_nv12_cvt_layout_resize", 2.f), }; } diff --git a/inference-engine/tests/unit/gna/ngraph/transformations/gna_insert_reshape_around_matmul.cpp b/inference-engine/tests/unit/gna/ngraph/transformations/gna_insert_reshape_around_matmul.cpp index 68422b5a9be3fd..ff22308a54e3c2 100644 --- a/inference-engine/tests/unit/gna/ngraph/transformations/gna_insert_reshape_around_matmul.cpp +++ b/inference-engine/tests/unit/gna/ngraph/transformations/gna_insert_reshape_around_matmul.cpp @@ -13,7 +13,7 @@ #include #include -template +template struct InsertReshapeAroundMatmulTest { static std::shared_ptr CreateAdd(std::shared_ptr input, const ngraph::Shape& constant_shape) { std::vector data(ngraph::shape_size(constant_shape)); @@ -24,7 +24,8 @@ struct InsertReshapeAroundMatmulTest { static std::shared_ptr CreateMatmul( std::shared_ptr input, - const ngraph::Shape& matmul_constant_shape) { + const ngraph::Shape& matmul_constant_shape, + const ngraph::Shape& permutation_shape) { std::vector data(ngraph::shape_size(matmul_constant_shape)); std::iota(std::begin(data), std::end(data), 1); auto constant = ngraph::opset8::Constant::create(ngraph::element::i64, matmul_constant_shape, data); @@ -55,16 +56,22 @@ struct InsertReshapeAroundMatmulTest { 255); } + if (TRANSPOSE) { + node = std::make_shared( + node, + ngraph::opset8::Constant::create(ngraph::element::i64, {permutation_shape.size()}, permutation_shape)); + } + return node; } static std::shared_ptr CreateFunction( const ngraph::Shape& input_shape, const ngraph::Shape& matmul_constant_shape, - const ngraph::Shape& result_shape) { + const ngraph::Shape& permutation_shape = ngraph::Shape()) { auto input = std::make_shared(ngraph::element::i64, input_shape); auto before = std::make_shared(input); - auto matmul = CreateMatmul(before, matmul_constant_shape); + auto matmul = CreateMatmul(before, matmul_constant_shape, permutation_shape); auto after = std::make_shared(matmul); return std::make_shared( ngraph::ResultVector{std::make_shared(after)}, @@ -73,16 +80,16 @@ struct InsertReshapeAroundMatmulTest { static std::shared_ptr CreateReferenceFunction( const ngraph::Shape& input_shape, - const ngraph::Shape& reshape_before_shape, + const std::vector& reshape_before_shape, const ngraph::Shape& matmul_constant_shape, const ngraph::Shape& reshape_after_shape, - const ngraph::Shape& result_shape) { + const ngraph::Shape& permutation_shape = ngraph::Shape()) { auto input = std::make_shared(ngraph::element::i64, input_shape); auto before = std::make_shared(input); auto reshape_before_constant = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_before_shape.size()}, reshape_before_shape); auto reshape_before = std::make_shared(before, reshape_before_constant, false); - auto matmul = CreateMatmul(reshape_before, matmul_constant_shape); + auto matmul = CreateMatmul(reshape_before, matmul_constant_shape, permutation_shape); auto reshape_after_constant = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_after_shape.size()}, reshape_after_shape); auto reshape_after = std::make_shared(matmul, reshape_after_constant, false); @@ -116,75 +123,144 @@ void RunTest(const std::shared_ptr& func, const std::shared_pt TEST(TransformationTests, InsertReshapeAroundMatmul) { RunTest( - InsertReshapeAroundMatmulTest:: - CreateFunction({1, 6, 8}, {8, 10}, {1, 6, 10}), - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); - RunTest( - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10}), - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); - RunTest( - InsertReshapeAroundMatmulTest:: - CreateFunction({1, 6, 1, 8}, {8, 10}, {1, 6, 1, 10}), - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 1, 8}, {6, 8}, {8, 10}, {1, 6, 1, 10}, {1, 6, 1, 10})); - RunTest( - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 1, 8}, {6, 8}, {8, 10}, {1, 6, 1, 10}, {1, 6, 1, 10}), - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 1, 8}, {6, 8}, {8, 10}, {1, 6, 1, 10}, {1, 6, 1, 10})); + InsertReshapeAroundMatmulTest<>:: + CreateFunction({1, 6, 8}, {8, 10}), + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); + RunTest( + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10}), + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); + RunTest( + InsertReshapeAroundMatmulTest<>:: + CreateFunction({1, 6, 1, 8}, {8, 10}), + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 6, 1, 8}, {-1, 8}, {8, 10}, {1, 6, 1, 10})); + RunTest( + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 6, 1, 8}, {-1, 8}, {8, 10}, {1, 6, 1, 10}), + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 6, 1, 8}, {-1, 8}, {8, 10}, {1, 6, 1, 10})); + RunTest( + InsertReshapeAroundMatmulTest<>:: + CreateFunction({1, 1, 8}, {8, 10}), + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 1, 10})); + RunTest( + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 1, 10}), + InsertReshapeAroundMatmulTest<>:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 1, 10})); } TEST(TransformationTests, InsertReshapeAroundMatmulWithAdd) { RunTest( - InsertReshapeAroundMatmulTest:: - CreateFunction({1, 6, 8}, {8, 10}, {1, 6, 10}), - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); + InsertReshapeAroundMatmulTest:: + CreateFunction({1, 6, 8}, {8, 10}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); RunTest( - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10}), - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); } TEST(TransformationTests, InsertReshapeAroundMatmulWithAdd_AddFirstInputConstant) { RunTest( - InsertReshapeAroundMatmulTest:: - CreateFunction({1, 6, 8}, {8, 10}, {1, 6, 10}), - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); + InsertReshapeAroundMatmulTest:: + CreateFunction({1, 6, 8}, {8, 10}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); RunTest( - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10}), - InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); } TEST(TransformationTests, InsertReshapeAroundMatmulWithFq) { RunTest( InsertReshapeAroundMatmulTest:: - CreateFunction({1, 6, 8}, {8, 10}, {1, 6, 10}), + CreateFunction({1, 6, 8}, {8, 10}), InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); RunTest( InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10}), + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10}), InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); } TEST(TransformationTests, InsertReshapeAroundMatmulWithAddAndFq) { RunTest( InsertReshapeAroundMatmulTest:: - CreateFunction({1, 6, 8}, {8, 10}, {1, 6, 10}), + CreateFunction({1, 6, 8}, {8, 10}), InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); RunTest( InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10}), + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10}), InsertReshapeAroundMatmulTest:: - CreateReferenceFunction({1, 6, 8}, {6, 8}, {8, 10}, {1, 6, 10}, {1, 6, 10})); + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 6, 10})); +} + +TEST(TransformationTests, InsertReshapeAroundMatmulWithTranspose) { + RunTest( + InsertReshapeAroundMatmulTest:: + CreateFunction({1, 6, 8}, {8, 10}, {0, 2, 1}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0})); + RunTest( + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0})); + RunTest( + InsertReshapeAroundMatmulTest:: + CreateFunction({1, 1, 8}, {8, 10}, {0, 2, 1}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 10, 1}, {1, 0})); + RunTest( + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 10, 1}, {1, 0}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 10, 1}, {1, 0})); +} + +TEST(TransformationTests, InsertReshapeAroundMatmulWithFqAndTranspose) { + RunTest( + InsertReshapeAroundMatmulTest:: + CreateFunction({1, 6, 8}, {8, 10}, {0, 2, 1}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0})); + RunTest( + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0})); + RunTest( + InsertReshapeAroundMatmulTest:: + CreateFunction({1, 1, 8}, {8, 10}, {0, 2, 1}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 10, 1}, {1, 0})); + RunTest( + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 10, 1}, {1, 0}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 1, 8}, {-1, 8}, {8, 10}, {1, 10, 1}, {1, 0})); +} + +TEST(TransformationTests, InsertReshapeAroundMatmulWithAddAndFqAndTranspose) { + RunTest( + InsertReshapeAroundMatmulTest:: + CreateFunction({1, 6, 8}, {8, 10}, {0, 2, 1}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0})); + RunTest( + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0}), + InsertReshapeAroundMatmulTest:: + CreateReferenceFunction({1, 6, 8}, {-1, 8}, {8, 10}, {1, 10, 6}, {1, 0})); } diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp index d72f68900ba8cb..d0c90088305db6 100644 --- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp @@ -30,8 +30,6 @@ struct gfx_version { struct device_info { uint32_t execution_units_count; ///< Number of available execution units. uint32_t gpu_frequency; ///< Clock frequency in MHz. - uint32_t max_threads_per_execution_unit; ///< Number of available HW threads on EU. - uint32_t max_threads_per_device; ///< Maximum number of HW threads on device. uint64_t max_work_group_size; ///< Maximum number of work-items in a work-group executing a kernel using the data parallel execution model. uint64_t max_local_mem_size; ///< Maximum size of local memory arena in bytes. diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp index 733f6612777fbe..8805980745c465 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp @@ -44,7 +44,8 @@ CommonDispatchData QuantizeKernelScaleShift::SetDefault(const quantize_params& p dispatchData.lws[0] = 1; dispatchData.lws[1] = sub_group_size; dispatchData.lws[2] = 1; - } else if (output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32) { + } else if (output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32 || output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 || + output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16) { dispatchData.gws[0] = output.Y().v * output.X().v; dispatchData.gws[1] = Align(output.Feature().v, feature_size); dispatchData.gws[2] = Align(output.Batch().v, feature_size); @@ -63,8 +64,9 @@ CommonDispatchData QuantizeKernelScaleShift::SetDefault(const quantize_params& p JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const { JitConstants jit = Parent::GetJitConstants(params, dispatchData); - if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 || - params.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32) { + if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 || params.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32 || + params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 || params.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16) { + jit.AddConstant(MakeJitConstant("FEATURE_BLOCKED_FORMAT", true)); jit.AddConstant(MakeJitConstant("GWS_BATCH", 2)); jit.AddConstant(MakeJitConstant("GWS_FEATURE", 1)); jit.AddConstant(MakeJitConstant("GWS_YX", 0)); @@ -74,21 +76,31 @@ JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& pa jit.Merge(tensor_jits); } + auto can_use_output_range = params.per_tensor_output_range && params.out_lo < params.out_hi; + auto has_output_range_round = !(params.output.GetDType() == Datatype::INT8 || params.output.GetDType() == Datatype::UINT8); + jit.AddConstant(MakeJitConstant("HAS_POST_SCALE", params.has_post_scale)); jit.AddConstant(MakeJitConstant("HAS_POST_SHIFT", params.has_post_shift)); jit.AddConstant(MakeJitConstant("HAS_PRE_SHIFT", params.has_pre_shift)); jit.AddConstant(MakeJitConstant("HAS_CLAMP", params.has_clamp)); + jit.AddConstant(MakeJitConstant("HAS_MIN_CLAMP", params.has_min_clamp)); + jit.AddConstant(MakeJitConstant("HAS_MAX_CLAMP", params.has_max_clamp)); jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_RANGE", params.per_tensor_input_range)); + jit.AddConstant(MakeJitConstant("PER_TENSOR_OUTPUT_RANGE", params.per_tensor_output_range)); jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_SCALE", params.per_tensor_input_scale)); jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_SHIFT", params.per_tensor_input_shift)); jit.AddConstant(MakeJitConstant("PER_TENSOR_OUTPUT_SCALE", params.per_tensor_output_scale)); jit.AddConstant(MakeJitConstant("PER_TENSOR_OUTPUT_SHIFT", params.per_tensor_output_shift)); jit.AddConstant(MakeJitConstant("IN_LO_VAL", params.in_lo)); jit.AddConstant(MakeJitConstant("IN_HI_VAL", params.in_hi)); + jit.AddConstant(MakeJitConstant("OUT_LO_VAL", params.out_lo)); + jit.AddConstant(MakeJitConstant("OUT_HI_VAL", params.out_hi)); jit.AddConstant(MakeJitConstant("IN_SCALE_VAL", params.in_scale)); jit.AddConstant(MakeJitConstant("IN_SHIFT_VAL", params.in_shift)); jit.AddConstant(MakeJitConstant("OUT_SCALE_VAL", params.out_scale)); jit.AddConstant(MakeJitConstant("OUT_SHIFT_VAL", params.out_shift)); + jit.AddConstant(MakeJitConstant("CAN_USE_OUTPUT_RANGE", can_use_output_range)); + jit.AddConstant(MakeJitConstant("HAS_OUTPUT_RANGE_ROUND", has_output_range_round)); return jit; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_ref.cl index 0aacaaa07e18ab..a490602414e707 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_ref.cl @@ -33,43 +33,53 @@ KERNEL(activation)( const unsigned x = get_global_id(0); const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y; const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y; -#if OUTPUT_BATCH_NUM == 1 - const unsigned feature = (uint)get_global_id(2); - const unsigned batch = 0; -#else - const unsigned feature = (uint)get_global_id(2) % OUTPUT_FEATURE_NUM; - const unsigned batch = (uint)get_global_id(2) / OUTPUT_FEATURE_NUM; -#endif -#else -#if defined OUTPUT_LAYOUT_YXFB || defined OUTPUT_LAYOUT_B_FS_YX_FSV16 - const unsigned x = (uint)get_global_id(1); - const unsigned y = (uint)get_global_id(2); -#define z 0 -#if OUTPUT_BATCH_NUM == 1 - const unsigned feature = (uint)get_global_id(0); - const unsigned batch = 0; -#else - const unsigned feature = (uint)get_global_id(0) % OUTPUT_FEATURE_NUM; - const unsigned batch = (uint)get_global_id(0) / OUTPUT_FEATURE_NUM; -#endif -#else -#define z 0 - const unsigned x = (uint)get_global_id(0); - const unsigned y = (uint)get_global_id(1); -#if OUTPUT_BATCH_NUM == 1 - const unsigned feature = (uint)get_global_id(2); - const unsigned batch = 0; + #if OUTPUT_BATCH_NUM == 1 + const unsigned feature = (uint)get_global_id(2); + const unsigned batch = 0; + #else + const unsigned feature = (uint)get_global_id(2) % OUTPUT_FEATURE_NUM; + const unsigned batch = (uint)get_global_id(2) / OUTPUT_FEATURE_NUM; + #endif #else - const unsigned feature = (uint)get_global_id(2) % OUTPUT_FEATURE_NUM; - const unsigned batch = (uint)get_global_id(2) / OUTPUT_FEATURE_NUM; -#endif -#endif + #if defined OUTPUT_LAYOUT_YXFB || defined OUTPUT_LAYOUT_B_FS_YX_FSV16 || defined OUTPUT_LAYOUT_B_FS_YX_FSV32 + const unsigned x = (uint)get_global_id(1); + const unsigned y = (uint)get_global_id(2); + #define z 0 + #if OUTPUT_BATCH_NUM == 1 + const unsigned feature = (uint)get_global_id(0); + const unsigned batch = 0; + #else + const unsigned feature = (uint)get_global_id(0) % OUTPUT_FEATURE_NUM; + const unsigned batch = (uint)get_global_id(0) / OUTPUT_FEATURE_NUM; + #endif + #elif defined OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV32 || defined OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV16 + const unsigned x = (uint)get_global_id(0) % OUTPUT_SIZE_X; + const unsigned y = (uint)get_global_id(0) / OUTPUT_SIZE_X; + const unsigned feature = (uint)get_global_id(1); + const unsigned batch = (uint)get_global_id(2); + #else + #define z 0 + const unsigned x = (uint)get_global_id(0); + const unsigned y = (uint)get_global_id(1); + #if OUTPUT_BATCH_NUM == 1 + const unsigned feature = (uint)get_global_id(2); + const unsigned batch = 0; + #else + const unsigned feature = (uint)get_global_id(2) % OUTPUT_FEATURE_NUM; + const unsigned batch = (uint)get_global_id(2) / OUTPUT_FEATURE_NUM; + #endif + #endif #endif -#if defined(OUTPUT_LAYOUT_B_FS_YX_FSV16) && OUTPUT_FEATURE_NUM % 16 != 0 - // b_fs_yx_fsv16 has dispatch features aligned to multiple of 16 +// GWS.feature and GWS.batch is aligned to 16. Otherwise, there are some idling WIs. +#if (defined(OUTPUT_LAYOUT_B_FS_YX_FSV16) || defined(OUTPUT_LAYOUT_B_FS_YX_FSV32)) \ + && OUTPUT_FEATURE_NUM % 16 != 0 if (feature >= OUTPUT_FEATURE_NUM) return; +#elif (defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV16) || defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV32)) \ + && (OUTPUT_FEATURE_NUM % 16 != 0 || OUTPUT_BATCH_NUM % 16 != 0) + if (batch >= OUTPUT_BATCH_NUM || feature >= OUTPUT_FEATURE_NUM) + return; #endif const unsigned src_index = GET_INDEX(INPUT,0,ORDER); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl index 5b5517db5717d1..061d19074ffc25 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl @@ -5,6 +5,9 @@ #include "include/batch_headers/data_types.cl" #include "include/batch_headers/fetch_data.cl" +#define TO_OUTPUT_TYPE CAT(convert_, OUTPUT_TYPE) +#define TO_OUTPUT_TYPE_SAT_RTE CAT(TO_OUTPUT_TYPE, _sat_rte) + #ifdef SUB_GROUP_SIZE __attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE))) #endif @@ -22,49 +25,60 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input, { const int b = get_global_id(GWS_BATCH); const int of = get_global_id(GWS_FEATURE); + #if OUTPUT_DIMS <= 4 const int yx = get_global_id(GWS_YX); + const int x = yx % OUTPUT_SIZE_X; const int y = yx / OUTPUT_SIZE_X; const int z = 0; + + const int output_offset = OUTPUT_GET_INDEX(b, of, y, x); #elif OUTPUT_DIMS == 5 const int zyx = get_global_id(GWS_YX); + const int zyx_div_x = zyx / OUTPUT_SIZE_X; + const int x = zyx % OUTPUT_SIZE_X; - const int y = (zyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; - const int z = (zyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y; + const int y = zyx_div_x % OUTPUT_SIZE_Y; + const int z = zyx_div_x / OUTPUT_SIZE_Y; + + const int output_offset = OUTPUT_GET_INDEX(b, of, z, y, x); #elif OUTPUT_DIMS == 6 const int wzyx = get_global_id(GWS_YX); + const int wzyx_div_x = wzyx / OUTPUT_SIZE_X; + const int wzyx_div_xy = wzyx_div_x / OUTPUT_SIZE_Y; + const int x = wzyx % OUTPUT_SIZE_X; - const int y = (wzyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; - const int z = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z; - const int w = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) / OUTPUT_SIZE_Z; + const int y = wzyx_div_x % OUTPUT_SIZE_Y; + const int z = wzyx_div_xy % OUTPUT_SIZE_Z; + const int w = wzyx_div_xy / OUTPUT_SIZE_Z; + + const int output_offset = OUTPUT_GET_INDEX(b, of, w, z, y, x); +#else +# error quantize_gpu_scale_shift_opt.cl: output tensors with more than 6 dimensions are unsupported #endif -#if INPUT0_DIMS == 6 - const int input_offset = INPUT0_GET_INDEX(b, of, w, z, y, x); +#if INPUT0_DIMS <= 4 + const int input_offset = INPUT0_GET_INDEX(b, of, y, x); #elif INPUT0_DIMS == 5 const int input_offset = INPUT0_GET_INDEX(b, of, z, y, x); -#elif INPUT0_DIMS <= 4 - const int input_offset = INPUT0_GET_INDEX(b, of, y, x); -#endif - -#if OUTPUT_DIMS == 6 - const int output_offset = OUTPUT_GET_INDEX(b, of, w, z, y, x); -#elif OUTPUT_DIMS == 5 - const int output_offset = OUTPUT_GET_INDEX(b, of, z, y, x); -#elif OUTPUT_DIMS <= 4 - const int output_offset = OUTPUT_GET_INDEX(b, of, y, x); +#elif INPUT0_DIMS == 6 + const int input_offset = INPUT0_GET_INDEX(b, of, w, z, y, x); +#else +# error quantize_gpu_scale_shift_opt.cl: input tensors with more than 6 dimensions are unsupported #endif -#if HAS_CLAMP && !PER_TENSOR_INPUT_RANGE +#if HAS_CLAMP && !PER_TENSOR_INPUT_RANGE && !CAN_USE_OUTPUT_RANGE #if INPUT1_DIMS == 4 const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, y, x); #elif INPUT1_DIMS == 5 const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, z, y, x); #elif INPUT1_DIMS == 6 const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, w, z, y, x); +#else +# error quantize_gpu_scale_shift_opt.cl: unsupported INPUT1_DIMS size #endif -#endif +#endif // HAS_CLAMP && !PER_TENSOR_INPUT_RANGE && !CAN_USE_OUTPUT_RANGE #if INPUT7_DIMS == 4 const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, y, x); @@ -72,6 +86,8 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input, const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, z, y, x); #elif INPUT7_DIMS == 6 const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, w, z, y, x); +#else +# error quantize_gpu_scale_shift_opt.cl: unsupported INPUT7_DIMS size #endif #if PER_TENSOR_INPUT_SCALE @@ -79,6 +95,7 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input, #else INPUT1_TYPE input_scale_val = input_scale[scales_offset]; #endif + #if PER_TENSOR_INPUT_SHIFT INPUT1_TYPE input_shift_val = IN_SHIFT_VAL; #else @@ -97,19 +114,67 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input, INPUT1_TYPE output_shift_val = output_shift[scales_offset]; #endif -#if PER_TENSOR_INPUT_RANGE && HAS_CLAMP +#if HAS_CLAMP +#if CAN_USE_OUTPUT_RANGE + INPUT1_TYPE output_low_val = OUT_LO_VAL; + INPUT1_TYPE output_high_val = OUT_HI_VAL; +#else +#if PER_TENSOR_INPUT_RANGE INPUT1_TYPE input_low_val = IN_LO_VAL; INPUT1_TYPE input_high_val = IN_HI_VAL; -#elif HAS_CLAMP +#else INPUT1_TYPE input_low_val = input_low[in_range_offset]; INPUT1_TYPE input_high_val = input_high[in_range_offset]; +#endif // PER_TENSOR_INPUT_RANGE +#endif // CAN_USE_OUTPUT_RANGE +#endif // HAS_CLAMP + +// ************************************************************* // +// Calculations for optimized branch with the output range usage // +// ************************************************************* // + +#if CAN_USE_OUTPUT_RANGE + +#if HAS_PRE_SHIFT + INPUT1_TYPE val = TO_INPUT1_TYPE(input[input_offset]) * input_scale_val + input_shift_val; +#else + INPUT1_TYPE val = TO_INPUT1_TYPE(input[input_offset]) * input_scale_val; +#endif + +#if HAS_OUTPUT_RANGE_ROUND + val = round(val); +#endif + +#if HAS_POST_SCALE + val *= output_scale_val; +#endif + +#if HAS_POST_SHIFT + val += output_shift_val; #endif #if HAS_CLAMP - INPUT1_TYPE val = min(max(TO_INPUT1_TYPE(input[input_offset]), input_low_val), input_high_val); +#if HAS_MIN_CLAMP && HAS_MAX_CLAMP + val = clamp(val, output_low_val, output_high_val); +#elif HAS_MIN_CLAMP + val = max(val, output_low_val); +#else // HAS_MAX_CLAMP + val = min(val, output_high_val); +#endif +#endif // HAS_CLAMP + +// ************************************************************** // +// Calculations for alternative branch with the input range usage // +// ************************************************************** // + +#else // CAN_USE_OUTPUT_RANGE + +#if HAS_CLAMP + INPUT1_TYPE val = clamp(TO_INPUT1_TYPE(input[input_offset]), input_low_val, input_high_val); #else INPUT1_TYPE val = TO_INPUT1_TYPE(input[input_offset]); #endif + #if HAS_PRE_SHIFT val = round(val * input_scale_val + input_shift_val); #else @@ -117,18 +182,28 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input, #endif #if HAS_POST_SCALE - val = val*output_scale_val; + val *= output_scale_val; #endif + #if HAS_POST_SHIFT val += output_shift_val; #endif -#if OUTPUT_LAYOUT_B_FS_YX_FSV16 +#endif // CAN_USE_OUTPUT_RANGE + +// *********************************** // +// Common section with results writing // +// *********************************** // + +#if FEATURE_BLOCKED_FORMAT if (of < OUTPUT_FEATURE_NUM) #endif #if OUTPUT_IS_FP - output[output_offset] = TO_OUTPUT_TYPE_SAT(val); + output[output_offset] = TO_OUTPUT_TYPE_SAT(val); #else - output[output_offset] = TO_OUTPUT_TYPE_SAT(round(val)); + output[output_offset] = TO_OUTPUT_TYPE_SAT_RTE(val); #endif } + +#undef TO_OUTPUT_TYPE +#undef TO_OUTPUT_TYPE_SAT_RTE diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp index 7d294c2a8ea586..5a5a36919e03a5 100644 --- a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp @@ -226,9 +226,6 @@ device_info init_device_info(const cl::Device& device) { info.supports_imad = get_imad_support(device); info.supports_immad = false; - info.max_threads_per_execution_unit = 7; - info.max_threads_per_device = static_cast(info.execution_units_count * info.max_threads_per_execution_unit); - info.supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos; info.supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos && diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp index de0e86a12787ee..4c7cd17da9c470 100644 --- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp +++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp @@ -832,8 +832,8 @@ void set_params(const program_node& node, kernel_selector::params& params) { params.engineInfo.maxImage2dWidth = device_info.max_image2d_width; params.engineInfo.maxImage2dHeight = device_info.max_image2d_height; params.engineInfo.computeUnitsCount = device_info.execution_units_count; - params.engineInfo.maxThreadsPerExecutionUnit = device_info.max_threads_per_execution_unit; - params.engineInfo.maxThreadsPerDevice = device_info.max_threads_per_device; + params.engineInfo.maxThreadsPerExecutionUnit = device_info.num_threads_per_eu > 0 ? device_info.num_threads_per_eu : 7; + params.engineInfo.maxThreadsPerDevice = params.engineInfo.maxThreadsPerExecutionUnit * device_info.execution_units_count; params.engineInfo.deviceCache = program.get_tuning_cache(); params.engineInfo.driverVersion = device_info.driver_version; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp index 5f54dabf7f4ea5..509108f437f7e4 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp @@ -1459,3 +1459,181 @@ TEST(activation_f32_fw_gpu, b_fs_yx_fsv16_prelu) { EXPECT_EQ(expected[i], out_ptr[i]) << "at i=" << i; } } + +struct activation_random_test_params { + data_types input_type; + format::type input_format; + tensor input_size; + activation_func func_type; + activation_additional_params additional_params; + padding padd; +}; + +struct activation_random_test : testing::TestWithParam +{ + bool enable_profiling = false; + + size_t get_x_pitch(layout& layout) { + auto tensor_x0 = tensor(batch(0), feature(0), spatial(0, 0, 0, 0)); + auto tensor_x1 = tensor(batch(0), feature(0), spatial(1, 0, 0, 0)); + auto x0 = layout.get_linear_offset(tensor_x0); + auto x1 = layout.get_linear_offset(tensor_x1); + return (x1 - x0); + } + + template + void fill_random_typed(memory::ptr mem, int min, int max, int k) { + auto size = mem->get_layout().size; + size_t b = size.batch[0]; + size_t f = size.feature[0]; + size_t x = size.spatial[0]; + size_t y = size.spatial[1]; + + auto data = generate_random_4d(b, f, y, x, min, max, k); + mem_lock ptr{mem, get_test_stream()}; + for (size_t bi = 0; bi < b; ++bi) { + for (size_t fi = 0; fi < f; ++fi) { + for (size_t yi = 0; yi < y; ++yi) { + for (size_t xi = 0; xi < x; ++xi) { + auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0)); + auto offset = mem->get_layout().get_linear_offset(coords); + ptr[offset] = data[bi][fi][yi][xi]; + } + } + } + } + } + + void fill_random(memory::ptr mem) { + auto dt = mem->get_layout().data_type; + switch (dt) { + case data_types::f32: + fill_random_typed(mem, -127, 127, 2); + break; + case data_types::f16: + fill_random_typed(mem, -127, 127, 2); + break; + case data_types::i8: + fill_random_typed(mem, -127, 127, 1); + break; + case data_types::u8: + fill_random_typed(mem, 0, 255, 1); + break; + default: + break; + } + } + + template + bool compare_outputs(const memory::ptr out_ref, const memory::ptr out_opt) { + auto output_lay = out_ref->get_layout(); + auto opt_output_lay = out_opt->get_layout(); + size_t b = output_lay.size.batch[0]; + size_t f = output_lay.size.feature[0]; + size_t x = output_lay.size.spatial[0]; + size_t y = output_lay.size.spatial[1]; + cldnn::mem_lock ref_ptr(out_ref, get_test_stream()); + cldnn::mem_lock opt_ptr(out_opt, get_test_stream()); + + auto ref_x_pitch = get_x_pitch(output_lay); + auto opt_x_pitch = get_x_pitch(opt_output_lay); + + for (size_t bi = 0; bi < b; ++bi) { + for (size_t fi = 0; fi < f; ++fi) { + for (size_t yi = 0; yi < y; ++yi) { + auto ref_out_coords = tensor(batch(bi), feature(fi), spatial(0, yi, 0, 0)); + auto ref_out_offset = output_lay.get_linear_offset(ref_out_coords); + auto opt_out_offset = opt_output_lay.get_linear_offset(ref_out_coords); + for (size_t xi = 0; xi < x; ++xi) { + auto ref_out_val = ref_ptr[ref_out_offset + xi * ref_x_pitch]; + auto opt_out_val = opt_ptr[opt_out_offset + xi * opt_x_pitch]; + EXPECT_EQ(ref_out_val, opt_out_val); + } + } + } + } + + return true; + } + + void execute_compare(const activation_random_test_params& params, bool check_result) { + auto& engine = get_test_engine(); + + auto in_layout = layout(params.input_type, format::bfyx, params.input_size); + auto in_mem = engine.allocate_memory(in_layout); + fill_random(in_mem); + + /// bfyx + cldnn::topology topo; + topo.add(input_layout("in", in_layout)); + auto prim = activation("activation", "in", params.func_type); + prim.additional_params = params.additional_params; + topo.add(prim); + + auto build_opts = build_options(); + build_opts.set_option(build_option::outputs({"activation"})); + + network net(engine, topo, build_opts); + net.set_input_data("in", in_mem); + + // first execution of ref + auto result = net.execute(); + auto output = result.at("activation").get_memory(); + + cldnn::topology topo_opt; + topo_opt.add(input_layout("in", in_layout)); + topo_opt.add(reorder("in_to_input_type", "in", params.input_format, params.input_type)); + auto prim_opt = activation("activation_blocked", "in_to_input_type", params.func_type); + prim_opt.additional_params = params.additional_params; + topo_opt.add(prim_opt); + // force output format to input format. + topo_opt.add(reorder("res_to_input_format", "activation_blocked", params.input_format, params.input_type)); + + auto build_opts_opt = build_options(); + build_opts_opt.set_option(build_option::outputs({"activation_blocked", "res_to_input_format"})); + auto activation_impl_desc = implementation_desc(); + activation_impl_desc.output_format = params.input_format; + build_opts_opt.set_option(build_option::force_implementations({{"activation_blocked", {params.input_format, "activation_ref"} }})); + + network net_opt(engine, topo_opt, build_opts_opt); + + // Use in_mem from ref network + net_opt.set_input_data("in", in_mem); + + // first execution of opt + auto result_opt = net_opt.execute(); + auto output_opt = result_opt.at("res_to_input_format").get_memory(); + + if (check_result == true) { + // Check data_types + if (params.input_type == data_types::f32) { + compare_outputs(output, output_opt); + } else if (params.input_type == data_types::f16) { + compare_outputs(output, output_opt); + } else if (params.input_type == data_types::i8) { + compare_outputs(output, output_opt); + } else if (params.input_type == data_types::u8) { + compare_outputs(output, output_opt); + } else { + FAIL() << "Not supported data type: " << static_cast(params.input_type); + } + } + } +}; + +TEST_P(activation_random_test, random) { + auto param = GetParam(); + execute_compare(param, true); +} + +INSTANTIATE_TEST_SUITE_P(activation_blocked_tests, + activation_random_test, + testing::ValuesIn( + std::vector{ + { data_types::i8, format::b_fs_yx_fsv32, { 1, 32, 5, 5}, activation_func::relu, {}, {}}, + { data_types::i8, format::bs_fs_yx_bsv32_fsv32, {32, 32, 5, 5}, activation_func::relu, {}, {}}, + { data_types::f16, format::bs_fs_yx_bsv32_fsv16, {32, 32, 5, 5}, activation_func::relu, {}, {}}, + { data_types::i8, format::bs_fs_yx_bsv32_fsv32, {16, 16, 5, 5}, activation_func::relu, {}, {}}, + { data_types::f16, format::bs_fs_yx_bsv32_fsv16, {16, 16, 5, 5}, activation_func::relu, {}, {}}, + } + )); diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn index d63feb697bc5e3..3e530d9132e461 160000 --- a/inference-engine/thirdparty/mkl-dnn +++ b/inference-engine/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit d63feb697bc5e3f575f3b486a72af4c5c0587b0b +Subproject commit 3e530d9132e461510390a0b2130aa9b938d8809c diff --git a/ngraph/core/include/ngraph/function.hpp b/ngraph/core/include/ngraph/function.hpp index 2bc16a37b62cde..9c176bb980777b 100644 --- a/ngraph/core/include/ngraph/function.hpp +++ b/ngraph/core/include/ngraph/function.hpp @@ -4,6 +4,14 @@ #pragma once +#include "ngraph/ngraph_visibility.hpp" +#include "ngraph/node.hpp" +#include "ngraph/op/assign.hpp" +#include "ngraph/op/parameter.hpp" +#include "ngraph/op/read_value.hpp" +#include "ngraph/op/result.hpp" +#include "ngraph/op/sink.hpp" +#include "ngraph/op/util/variable.hpp" #include "openvino/core/function.hpp" namespace ngraph { diff --git a/ngraph/core/include/ngraph/graph_util.hpp b/ngraph/core/include/ngraph/graph_util.hpp index 5e7c19fb47643a..dd1ee8e8d576f0 100644 --- a/ngraph/core/include/ngraph/graph_util.hpp +++ b/ngraph/core/include/ngraph/graph_util.hpp @@ -23,7 +23,8 @@ namespace ov { namespace op { namespace v0 { class Parameter; -} +class Result; +} // namespace v0 } // namespace op } // namespace ov namespace ngraph { @@ -31,7 +32,8 @@ namespace ngraph { namespace op { namespace v0 { using ov::op::v0::Parameter; -} +using ov::op::v0::Result; +} // namespace v0 } // namespace op using ov::clone_function; @@ -129,7 +131,7 @@ std::list> clone_nodes(const std::vector, std::shared_ptr> insert_result_parameter_split( +std::pair, std::shared_ptr> insert_result_parameter_split( const std::shared_ptr& src_node, const std::shared_ptr& dst_node); diff --git a/ngraph/core/include/ngraph/util.hpp b/ngraph/core/include/ngraph/util.hpp index b7554c58e1286d..4e9671c232788a 100644 --- a/ngraph/core/include/ngraph/util.hpp +++ b/ngraph/core/include/ngraph/util.hpp @@ -25,11 +25,13 @@ #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/tensor.hpp" #include "ngraph/shape.hpp" +#include "openvino/core/enum_mask.hpp" namespace ov { class Node; } namespace ngraph { +using ov::EnumMask; using ov::Node; class stopwatch; @@ -227,105 +229,6 @@ AxisVector get_default_order(const Shape& shape); NGRAPH_API AxisVector get_default_order(const PartialShape& shape); -// -// EnumMask is intended to work with a scoped enum type. It's used to store -// a combination of enum values and provides easy access and manipulation -// of these enum values as a mask. -// -// EnumMask does not provide a set_all() or invert() operator because they -// could do things unexpected by the user, i.e. for enum with 4 bit values, -// invert(001000...) != 110100..., due to the extra bits. -// -template -class EnumMask { -public: - /// Make sure the template type is an enum. - static_assert(std::is_enum::value, "EnumMask template type must be an enum"); - /// Extract the underlying type of the enum. - typedef typename std::underlying_type::type value_type; - /// Some bit operations are not safe for signed values, we require enum - /// type to use unsigned underlying type. - static_assert(std::is_unsigned::value, "EnumMask enum must use unsigned type."); - - constexpr EnumMask() : m_value{0} {} - constexpr EnumMask(const T& enum_value) : m_value{static_cast(enum_value)} {} - EnumMask(const EnumMask& other) : m_value{other.m_value} {} - EnumMask(std::initializer_list enum_values) : m_value{0} { - for (auto& v : enum_values) { - m_value |= static_cast(v); - } - } - value_type value() const { - return m_value; - } - /// Check if any of the input parameter enum bit mask match - bool is_any_set(const EnumMask& p) const { - return m_value & p.m_value; - } - /// Check if all of the input parameter enum bit mask match - bool is_set(const EnumMask& p) const { - return (m_value & p.m_value) == p.m_value; - } - /// Check if any of the input parameter enum bit mask does not match - bool is_any_clear(const EnumMask& p) const { - return !is_set(p); - } - /// Check if all of the input parameter enum bit mask do not match - bool is_clear(const EnumMask& p) const { - return !is_any_set(p); - } - void set(const EnumMask& p) { - m_value |= p.m_value; - } - void clear(const EnumMask& p) { - m_value &= ~p.m_value; - } - void clear_all() { - m_value = 0; - } - bool operator[](const EnumMask& p) const { - return is_set(p); - } - bool operator==(const EnumMask& other) const { - return m_value == other.m_value; - } - bool operator!=(const EnumMask& other) const { - return m_value != other.m_value; - } - EnumMask& operator=(const EnumMask& other) { - m_value = other.m_value; - return *this; - } - EnumMask& operator&=(const EnumMask& other) { - m_value &= other.m_value; - return *this; - } - - EnumMask& operator|=(const EnumMask& other) { - m_value |= other.m_value; - return *this; - } - - EnumMask operator&(const EnumMask& other) const { - return EnumMask(m_value & other.m_value); - } - - EnumMask operator|(const EnumMask& other) const { - return EnumMask(m_value | other.m_value); - } - - friend std::ostream& operator<<(std::ostream& os, const EnumMask& m) { - os << m.m_value; - return os; - } - -private: - /// Only used internally - explicit EnumMask(const value_type& value) : m_value{value} {} - - value_type m_value; -}; - /// \brief Function to query parsed version information of the version of ngraph which /// contains this function. Version information strictly follows Semantic Versioning /// http://semver.org diff --git a/ngraph/core/include/openvino/core/attribute_visitor.hpp b/ngraph/core/include/openvino/core/attribute_visitor.hpp index 5d2b3175c8df91..5b75a5a68bd933 100644 --- a/ngraph/core/include/openvino/core/attribute_visitor.hpp +++ b/ngraph/core/include/openvino/core/attribute_visitor.hpp @@ -94,7 +94,7 @@ class OPENVINO_API AttributeVisitor { /// \brief Hook for adapters that need visitor access virtual void on_adapter(const std::string& name, VisitorAdapter& adapter); - /// \brief Provides API to handle nGraph Function attribute type, accessed as ValueAccessor + /// \brief Provides API to handle openvino Function attribute type, accessed as ValueAccessor /// \param name attribute name /// \param adapter reference to a Function ValueAccessor virtual void on_adapter(const std::string& name, ValueAccessor>& adapter); diff --git a/ngraph/core/include/openvino/core/core_visibility.hpp b/ngraph/core/include/openvino/core/core_visibility.hpp index 0152fb157ff5d4..1d841d9e01960b 100644 --- a/ngraph/core/include/openvino/core/core_visibility.hpp +++ b/ngraph/core/include/openvino/core/core_visibility.hpp @@ -7,8 +7,8 @@ #include "openvino/core/visibility.hpp" #define OV_NEW_API 1 -// Now we use the generic helper definitions above to define NGRAPH_API -// NGRAPH_API is used for the public API symbols. It either DLL imports or DLL exports +// Now we use the generic helper definitions above to define OPENVINO_API +// OPENVINO_API is used for the public API symbols. It either DLL imports or DLL exports // (or does nothing for static build) #ifdef _WIN32 diff --git a/ngraph/core/include/openvino/core/descriptor/tensor.hpp b/ngraph/core/include/openvino/core/descriptor/tensor.hpp index 416ec45c08fa13..24ff0cb64f5835 100644 --- a/ngraph/core/include/openvino/core/descriptor/tensor.hpp +++ b/ngraph/core/include/openvino/core/descriptor/tensor.hpp @@ -36,9 +36,9 @@ class OPENVINO_API Tensor { Tensor(const Tensor&) = delete; Tensor& operator=(const Tensor&) = delete; - NGRAPH_DEPRECATED("get_name() is deprecated! Please use get_names() instead.") + OPENVINO_DEPRECATED("get_name() is deprecated! Please use get_names() instead.") const std::string& get_name() const; - NGRAPH_DEPRECATED("set_name() is deprecated! Please use set_names() instead.") + OPENVINO_DEPRECATED("set_name() is deprecated! Please use set_names() instead.") void set_name(const std::string& name); const std::unordered_set& get_names() const; @@ -91,7 +91,7 @@ class OPENVINO_API Tensor { // Support for dynamic shapes required transition to ov::PartialShape. // To smoothly transition to ov::PartialShape we introduced m_partial_shape // and kept m_shape in sync with m_partial_shape. Synchronization point was placed - // in set_partial_shape which dramatically affected performance of ngraph::Function + // in set_partial_shape which dramatically affected performance of ov::Function // validation. Since we have started the transition to ov::PartialShape and reduced // Shape usage the only user of m_shape was get_shape method with signature: // const PartialShape& descriptor::Tensor::get_shape() const diff --git a/ngraph/core/include/openvino/core/enum_mask.hpp b/ngraph/core/include/openvino/core/enum_mask.hpp new file mode 100644 index 00000000000000..131fd5f9e1a791 --- /dev/null +++ b/ngraph/core/include/openvino/core/enum_mask.hpp @@ -0,0 +1,110 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov { + +// +// EnumMask is intended to work with a scoped enum type. It's used to store +// a combination of enum values and provides easy access and manipulation +// of these enum values as a mask. +// +// EnumMask does not provide a set_all() or invert() operator because they +// could do things unexpected by the user, i.e. for enum with 4 bit values, +// invert(001000...) != 110100..., due to the extra bits. +// +template +class EnumMask { +public: + /// Make sure the template type is an enum. + static_assert(std::is_enum::value, "EnumMask template type must be an enum"); + /// Extract the underlying type of the enum. + using value_type = typename std::underlying_type::type; + /// Some bit operations are not safe for signed values, we require enum + /// type to use unsigned underlying type. + static_assert(std::is_unsigned::value, "EnumMask enum must use unsigned type."); + + constexpr EnumMask() = default; + constexpr EnumMask(const T& enum_value) : m_value{static_cast(enum_value)} {} + EnumMask(const EnumMask& other) : m_value{other.m_value} {} + EnumMask(std::initializer_list enum_values) { + for (auto& v : enum_values) { + m_value |= static_cast(v); + } + } + value_type value() const { + return m_value; + } + /// Check if any of the input parameter enum bit mask match + bool is_any_set(const EnumMask& p) const { + return m_value & p.m_value; + } + /// Check if all of the input parameter enum bit mask match + bool is_set(const EnumMask& p) const { + return (m_value & p.m_value) == p.m_value; + } + /// Check if any of the input parameter enum bit mask does not match + bool is_any_clear(const EnumMask& p) const { + return !is_set(p); + } + /// Check if all of the input parameter enum bit mask do not match + bool is_clear(const EnumMask& p) const { + return !is_any_set(p); + } + void set(const EnumMask& p) { + m_value |= p.m_value; + } + void clear(const EnumMask& p) { + m_value &= ~p.m_value; + } + void clear_all() { + m_value = 0; + } + bool operator[](const EnumMask& p) const { + return is_set(p); + } + bool operator==(const EnumMask& other) const { + return m_value == other.m_value; + } + bool operator!=(const EnumMask& other) const { + return m_value != other.m_value; + } + EnumMask& operator=(const EnumMask& other) { + m_value = other.m_value; + return *this; + } + EnumMask& operator&=(const EnumMask& other) { + m_value &= other.m_value; + return *this; + } + + EnumMask& operator|=(const EnumMask& other) { + m_value |= other.m_value; + return *this; + } + + EnumMask operator&(const EnumMask& other) const { + return EnumMask(m_value & other.m_value); + } + + EnumMask operator|(const EnumMask& other) const { + return EnumMask(m_value | other.m_value); + } + + friend std::ostream& operator<<(std::ostream& os, const EnumMask& m) { + os << m.m_value; + return os; + } + +private: + /// Only used internally + explicit EnumMask(const value_type& value) : m_value{value} {} + + value_type m_value{}; +}; + +} // namespace ov diff --git a/ngraph/core/include/openvino/core/enum_names.hpp b/ngraph/core/include/openvino/core/enum_names.hpp index be32a7bdea8b92..db920e45d3e606 100644 --- a/ngraph/core/include/openvino/core/enum_names.hpp +++ b/ngraph/core/include/openvino/core/enum_names.hpp @@ -8,7 +8,7 @@ #include #include -#include "ngraph/check.hpp" +#include "openvino/core/except.hpp" namespace ov { /// Uses a pairings defined by EnumTypes::get() to convert between strings @@ -30,7 +30,7 @@ class EnumNames { return p.second; } } - NGRAPH_CHECK(false, "\"", name, "\"", " is not a member of enum ", get().m_enum_name); + OPENVINO_ASSERT(false, "\"", name, "\"", " is not a member of enum ", get().m_enum_name); } /// Converts enum values to strings @@ -40,7 +40,7 @@ class EnumNames { return p.first; } } - NGRAPH_CHECK(false, " invalid member of enum ", get().m_enum_name); + OPENVINO_ASSERT(false, " invalid member of enum ", get().m_enum_name); } private: diff --git a/ngraph/core/include/openvino/core/function.hpp b/ngraph/core/include/openvino/core/function.hpp index 88451692b1d316..345f606a4aa6c4 100644 --- a/ngraph/core/include/openvino/core/function.hpp +++ b/ngraph/core/include/openvino/core/function.hpp @@ -11,87 +11,81 @@ #include #include -#include "ngraph/op/assign.hpp" -#include "ngraph/op/parameter.hpp" -#include "ngraph/op/read_value.hpp" -#include "ngraph/op/result.hpp" -#include "ngraph/op/sink.hpp" -#include "ngraph/op/util/variable.hpp" #include "openvino/core/core_visibility.hpp" #include "openvino/core/node.hpp" #include "openvino/core/rtti.hpp" #include "openvino/core/variant.hpp" +#include "openvino/op/assign.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/read_value.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/sink.hpp" +#include "openvino/op/util/variable.hpp" namespace ov { /// A user-defined function. class OPENVINO_API Function : public std::enable_shared_from_this { public: - static constexpr ngraph::DiscreteTypeInfo type_info{"Function", 0}; - const ngraph::DiscreteTypeInfo& get_type_info() const { + static constexpr ov::DiscreteTypeInfo type_info{"Function", 0}; + const ov::DiscreteTypeInfo& get_type_info() const { return type_info; } - Function(const ngraph::NodeVector& results, - const ngraph::ParameterVector& parameters, - const std::string& name = ""); + Function(const ov::NodeVector& results, const ov::ParameterVector& parameters, const std::string& name = ""); - Function(const ngraph::OutputVector& results, - const ngraph::ParameterVector& parameters, - const std::string& name = ""); + Function(const ov::OutputVector& results, const ov::ParameterVector& parameters, const std::string& name = ""); - Function(const std::shared_ptr& result, - const ngraph::ParameterVector& parameters, + Function(const std::shared_ptr& result, + const ov::ParameterVector& parameters, const std::string& name = ""); - Function(const ngraph::ResultVector& results, - const ngraph::ParameterVector& parameters, - const std::string& name = ""); + Function(const ov::ResultVector& results, const ov::ParameterVector& parameters, const std::string& name = ""); - Function(const ngraph::ResultVector& results, - const ngraph::SinkVector& sinks, - const ngraph::ParameterVector& parameters, + Function(const ov::ResultVector& results, + const ov::SinkVector& sinks, + const ov::ParameterVector& parameters, const std::string& name = ""); - Function(const ngraph::OutputVector& results, - const ngraph::SinkVector& sinks, - const ngraph::ParameterVector& parameters, + Function(const ov::OutputVector& results, + const ov::SinkVector& sinks, + const ov::ParameterVector& parameters, const std::string& name = ""); - Function(const ngraph::ResultVector& results, - const ngraph::SinkVector& sinks, - const ngraph::ParameterVector& parameters, - const ngraph::VariableVector& variables, + Function(const ov::ResultVector& results, + const ov::SinkVector& sinks, + const ov::ParameterVector& parameters, + const ov::op::util::VariableVector& variables, const std::string& name = ""); - Function(const ngraph::OutputVector& results, - const ngraph::SinkVector& sinks, - const ngraph::ParameterVector& parameters, - const ngraph::VariableVector& variables, + Function(const ov::OutputVector& results, + const ov::SinkVector& sinks, + const ov::ParameterVector& parameters, + const ov::op::util::VariableVector& variables, const std::string& name = ""); - Function(const ngraph::ResultVector& results, - const ngraph::ParameterVector& parameters, - const ngraph::VariableVector& variables, + Function(const ov::ResultVector& results, + const ov::ParameterVector& parameters, + const ov::op::util::VariableVector& variables, const std::string& name = ""); - Function(const ngraph::OutputVector& results, - const ngraph::ParameterVector& parameters, - const ngraph::VariableVector& variables, + Function(const ov::OutputVector& results, + const ov::ParameterVector& parameters, + const ov::op::util::VariableVector& variables, const std::string& name = ""); /// Constructs a Function. Lists of parameters and variables will be generated automatically /// based on traversing the graph from the results. - explicit Function(const ngraph::OutputVector& results, const std::string& name = ""); + explicit Function(const ov::OutputVector& results, const std::string& name = ""); /// Constructs a Function. Lists of parameters and variables will be generated automatically /// based on traversing the graph from the results and the sinks. - Function(const ngraph::OutputVector& results, const ngraph::SinkVector& sinks, const std::string& name = ""); + Function(const ov::OutputVector& results, const ov::SinkVector& sinks, const std::string& name = ""); virtual ~Function() = default; /// Return the number of outputs for this function. size_t get_output_size() const; /// Return the op that generates output i - std::shared_ptr get_output_op(size_t i) const; + std::shared_ptr get_output_op(size_t i) const; /// Output functions std::vector> outputs(); @@ -115,7 +109,7 @@ class OPENVINO_API Function : public std::enable_shared_from_this { void reshape(const std::map& partial_shapes); /// Return the element type of output i - const ngraph::element::Type& get_output_element_type(size_t i) const; + const ov::element::Type& get_output_element_type(size_t i) const; /// Return the shape of element i const Shape& get_output_shape(size_t i) const; @@ -124,7 +118,7 @@ class OPENVINO_API Function : public std::enable_shared_from_this { const PartialShape& get_output_partial_shape(size_t i) const; /// Check that there is a single result and return it. - std::shared_ptr get_result() const; + std::shared_ptr get_result() const; /// \brief Get the unique name of the function. /// \returns A const reference to the function's unique name. @@ -141,13 +135,13 @@ class OPENVINO_API Function : public std::enable_shared_from_this { /// \returns A const reference to the function's friendly name. const std::string& get_friendly_name() const; - std::vector> get_ops() const; - std::vector> get_ordered_ops() const; - void map_unordered_ops(std::function f) const; + std::vector> get_ops() const; + std::vector> get_ordered_ops() const; + void map_unordered_ops(std::function f) const; friend std::ostream& operator<<(std::ostream&, const Function&); // updates graph and m_results list - void replace_node(std::shared_ptr old, std::shared_ptr repl); + void replace_node(std::shared_ptr old, std::shared_ptr repl); void validate_nodes_and_infer_types() const; @@ -166,59 +160,59 @@ class OPENVINO_API Function : public std::enable_shared_from_this { /// /// \param parameter_index The index of the parameter to replace. /// \param parameter The parameter to substitute for the `parameter_index`th parameter. - void replace_parameter(size_t parameter_index, const std::shared_ptr& parameter); + void replace_parameter(size_t parameter_index, const std::shared_ptr& parameter); - using topological_sort_t = std::function>( - const std::vector>& root_nodes)>; + using topological_sort_t = + std::function>(const std::vector>& root_nodes)>; void set_topological_sort(topological_sort_t); - virtual bool visit_attributes(ngraph::AttributeVisitor& visitor); + virtual bool visit_attributes(ov::AttributeVisitor& visitor); /// Return the function parameters - const ngraph::ParameterVector& get_parameters() const { + const ov::ParameterVector& get_parameters() const { return m_parameters; }; /// Return a list of function's outputs - const ngraph::ResultVector& get_results() const { + const ov::ResultVector& get_results() const { return m_results; }; /// Index for parameter, or -1 - int64_t get_parameter_index(const std::shared_ptr& parameter) const; + int64_t get_parameter_index(const std::shared_ptr& parameter) const; /// Index for value or result referencing it, or -1 - int64_t get_result_index(const ngraph::Output& value) const; + int64_t get_result_index(const ov::Output& value) const; /// \brief Evaluate the function on inputs, putting results in outputs. /// \param output_tensors Tensors for the outputs to compute. One for each result /// \param input_tensors Tensors for the inputs. One for each inputs. /// \param evaluation_context Storage of additional settings and attributes that can be used /// when evaluating the function. This additional information can be shared across nodes. - bool evaluate(const ngraph::HostTensorVector& output_tensors, - const ngraph::HostTensorVector& input_tensors, - ngraph::EvaluationContext evaluation_context = ngraph::EvaluationContext()) const; + bool evaluate(const ov::HostTensorVector& output_tensors, + const ov::HostTensorVector& input_tensors, + ov::EvaluationContext evaluation_context = ov::EvaluationContext()) const; /// \brief Return a list of function's sinks. - const ngraph::SinkVector& get_sinks() const { + const ov::SinkVector& get_sinks() const { return m_sinks; } /// \brief Add new sink nodes to the list. Method doesn't validate graph, it should be done /// manually after all changes. /// \param sinks new sink nodes - void add_sinks(const ngraph::SinkVector& sinks); + void add_sinks(const ov::SinkVector& sinks); /// \brief Delete sink node from the list of sinks. Method doesn't delete node from graph. /// \param sink Sink to delete - void remove_sink(const std::shared_ptr& sink); + void remove_sink(const std::shared_ptr& sink); /// \brief Add new Result nodes to the list. Method doesn't validate graph, it should be /// done manually after all changes. /// \param results new Result nodes - void add_results(const ngraph::ResultVector& results); + void add_results(const ov::ResultVector& results); /// \brief Delete Result node from the list of results. Method will not delete node from /// graph. /// \param result Result node to delete - void remove_result(const std::shared_ptr& result); + void remove_result(const std::shared_ptr& result); /// \brief Add new Parameter nodes to the list. /// @@ -230,7 +224,7 @@ class OPENVINO_API Function : public std::enable_shared_from_this { /// * call graph validation to check correctness of changes /// /// \param params new Parameter nodes - void add_parameters(const ngraph::ParameterVector& params); + void add_parameters(const ov::ParameterVector& params); /// \brief Delete Parameter node from the list of parameters. Method will not delete node /// from graph. You need to replace Parameter with other operation manually. @@ -245,25 +239,25 @@ class OPENVINO_API Function : public std::enable_shared_from_this { /// * call graph validation to check all changes /// /// \param param Parameter node to delete - void remove_parameter(const std::shared_ptr& param); + void remove_parameter(const std::shared_ptr& param); /// \brief Add new variables to the list. Method doesn't validate graph, it should be done /// manually after all changes. /// \param variables new variables to add - void add_variables(const ngraph::VariableVector& variables); + void add_variables(const ov::op::util::VariableVector& variables); /// \brief Delete variable from the list of variables. /// Method doesn't delete nodes that used this variable from the graph. /// \param variable Variable to delete - void remove_variable(const ngraph::VariablePtr& variable); + void remove_variable(const ov::op::util::Variable::Ptr& variable); /// \brief Return a list of function's variables. - const ngraph::VariableVector& get_variables() const { + const ov::op::util::VariableVector& get_variables() const { return m_variables; } /// \brief Return a variable by specified variable_id. - ngraph::VariablePtr get_variable_by_id(const std::string& variable_id) const; + ov::op::util::Variable::Ptr get_variable_by_id(const std::string& variable_id) const; RTMap& get_rt_info() { return m_rt_info; } @@ -291,17 +285,17 @@ class OPENVINO_API Function : public std::enable_shared_from_this { size_t m_placement{0}; topological_sort_t m_topological_sorter; - ngraph::ResultVector m_results; + ov::ResultVector m_results; // List of the nodes with side effect in graph. // These nodes are not outputs of graph but should not be removed even if have no children. - ngraph::SinkVector m_sinks; - ngraph::ParameterVector m_parameters; - ngraph::VariableVector m_variables; + ov::SinkVector m_sinks; + ov::ParameterVector m_parameters; + ov::op::util::VariableVector m_variables; RTMap m_rt_info; }; template <> -class NGRAPH_API AttributeAdapter> +class OPENVINO_API AttributeAdapter> : public DirectValueAccessor> { public: AttributeAdapter(std::shared_ptr& value) diff --git a/ngraph/core/include/openvino/core/layout.hpp b/ngraph/core/include/openvino/core/layout.hpp index 873d0c953d1dcc..8af42983d1a89b 100644 --- a/ngraph/core/include/openvino/core/layout.hpp +++ b/ngraph/core/include/openvino/core/layout.hpp @@ -7,7 +7,7 @@ #include #include -#include "ngraph/attribute_adapter.hpp" +#include "openvino/core/attribute_adapter.hpp" #include "openvino/core/core_visibility.hpp" #include "openvino/core/partial_shape.hpp" #include "openvino/core/rank.hpp" diff --git a/ngraph/core/include/openvino/core/node.hpp b/ngraph/core/include/openvino/core/node.hpp index 440105d8f34dda..79c764ba1b682d 100644 --- a/ngraph/core/include/openvino/core/node.hpp +++ b/ngraph/core/include/openvino/core/node.hpp @@ -71,7 +71,7 @@ class Output; class Node; /// EvaluationContext stores and manages a context (additional parameters, values and -/// environment) for evaluating ngraph::function. +/// environment) for evaluating ov::Function. using EvaluationContext = std::map>; using ResultVector = std::vector>; @@ -222,7 +222,7 @@ class OPENVINO_API Node : public std::enable_shared_from_this { virtual bool evaluate_upper(const ov::HostTensorVector& output_values) const; virtual bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values); - /// \brief Decomposes the FusedOp into a sub-graph consisting of core ngraph ops + /// \brief Decomposes the FusedOp into a sub-graph consisting of core openvino ops /// /// \return A vector of nodes comprising the sub-graph. The order of output /// tensors must match the match output tensors of the FusedOp @@ -558,11 +558,11 @@ using RawNodeOutputMap = std::map>; class OPENVINO_API NodeValidationFailure : public ov::AssertFailure { public: - NodeValidationFailure(const ngraph::CheckLocInfo& check_loc_info, const Node* node, const std::string& explanation) + NodeValidationFailure(const ov::CheckLocInfo& check_loc_info, const Node* node, const std::string& explanation) : AssertFailure(check_loc_info, node_validation_failure_loc_string(node), explanation) {} }; } // namespace ov -#define NODE_VALIDATION_CHECK(node, ...) NGRAPH_CHECK_HELPER(::ov::NodeValidationFailure, (node), __VA_ARGS__) +#define NODE_VALIDATION_CHECK(node, ...) OPENVINO_ASSERT_HELPER(::ov::NodeValidationFailure, (node), __VA_ARGS__) namespace ov { template diff --git a/ngraph/core/include/openvino/core/node_input.hpp b/ngraph/core/include/openvino/core/node_input.hpp index 18918b1fcca07c..6004b34a836b05 100644 --- a/ngraph/core/include/openvino/core/node_input.hpp +++ b/ngraph/core/include/openvino/core/node_input.hpp @@ -7,10 +7,10 @@ #include #include -#include "ngraph/shape.hpp" #include "openvino/core/core_visibility.hpp" #include "openvino/core/descriptor/tensor.hpp" #include "openvino/core/partial_shape.hpp" +#include "openvino/core/shape.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/core/variant.hpp" diff --git a/ngraph/core/include/openvino/core/preprocess/color_format.hpp b/ngraph/core/include/openvino/core/preprocess/color_format.hpp new file mode 100644 index 00000000000000..d8963ac14ce818 --- /dev/null +++ b/ngraph/core/include/openvino/core/preprocess/color_format.hpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +namespace ov { +namespace preprocess { + +/// \brief Color format enumeration for conversion +enum class ColorFormat { + UNDEFINED, + NV12_SINGLE_PLANE, // Image in NV12 format as single tensor + NV12_TWO_PLANES, // Image in NV12 format represented as separate tensors for Y and UV planes + RGB, + BGR +}; + +} // namespace preprocess +} // namespace ov diff --git a/ngraph/core/include/openvino/core/preprocess/input_tensor_info.hpp b/ngraph/core/include/openvino/core/preprocess/input_tensor_info.hpp index e064e38800d548..357ac3f37c822e 100644 --- a/ngraph/core/include/openvino/core/preprocess/input_tensor_info.hpp +++ b/ngraph/core/include/openvino/core/preprocess/input_tensor_info.hpp @@ -6,6 +6,7 @@ #include "openvino/core/core_visibility.hpp" #include "openvino/core/layout.hpp" +#include "openvino/core/preprocess/color_format.hpp" #include "openvino/core/type/element_type.hpp" namespace ov { @@ -117,6 +118,44 @@ class OPENVINO_API InputTensorInfo final { /// /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner. InputTensorInfo&& set_spatial_static_shape(size_t height, size_t width) &&; + + /// \brief Set color format for user's input tensor. + /// + /// In general way, some formats support multi-plane input, e.g. NV12 image can be represented as 2 separate tensors + /// (planes): Y plane and UV plane. set_color_format API also allows to set sub_names for such parameters for + /// convenient usage of plane parameters. + /// + /// This version allows chaining for Lvalue objects. + /// + /// \param format Color format of input image. + /// + /// \param sub_names Optional list of sub-names assigned for each plane (e.g. {"Y", "UV"}). If not specified, + /// sub-names for plane parameters are auto-generated, exact names auto-generation rules depend on specific color + /// format, and client's code shall not rely on these rules. It is not allowed to specify sub-names for single-plane + /// inputs, also is specified, number of sub-names shall match with number of planes. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. + InputTensorInfo& set_color_format(const ov::preprocess::ColorFormat& format, + const std::vector& sub_names = {}) &; + + /// \brief Set color format for user's input tensor. + /// + /// In general way, some formats support multi-plane input, e.g. NV12 image can be represented as 2 separate tensors + /// (planes): Y plane and UV plane. set_color_format API also allows to set sub_names for such parameters for + /// convenient usage of plane parameters. + /// + /// This version allows chaining for Rvalue objects. + /// + /// \param format Color format of input image. + /// + /// \param sub_names Optional list of sub-names assigned for each plane (e.g. {"Y", "UV"}). If not specified, + /// sub-names for plane parameters are auto-generated, exact names auto-generation rules depend on specific color + /// format, and client's code shall not rely on these rules. It is not allowed to specify sub-names for single-plane + /// inputs, also is specified, number of sub-names shall match with number of planes. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner. + InputTensorInfo&& set_color_format(const ov::preprocess::ColorFormat& format, + const std::vector& sub_names = {}) &&; }; } // namespace preprocess diff --git a/ngraph/core/include/openvino/core/preprocess/output_info.hpp b/ngraph/core/include/openvino/core/preprocess/output_info.hpp new file mode 100644 index 00000000000000..68aec4c688ac57 --- /dev/null +++ b/ngraph/core/include/openvino/core/preprocess/output_info.hpp @@ -0,0 +1,98 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/core_visibility.hpp" +#include "openvino/core/preprocess/output_network_info.hpp" +#include "openvino/core/preprocess/output_tensor_info.hpp" +#include "openvino/core/preprocess/postprocess_steps.hpp" + +namespace ov { +namespace preprocess { + +/// \brief Class holding postprocessing information for one output +/// From postprocessing pipeline perspective, each output can be represented as: +/// - Network's output info, (OutputInfo::network) +/// - Postprocessing steps applied to user's input (OutputInfo::postprocess) +/// - User's desired output parameter information, which is a final one after preprocessing (OutputInfo::tensor) +/// +/// API has Builder-like style to allow chaining calls in client's code, like +/// \code{.cpp} +/// auto proc = PrePostProcessor().output(InputInfo().network(...).preprocess(...).tensor(...); +/// \endcode +class OPENVINO_API OutputInfo final { + class OutputInfoImpl; + std::unique_ptr m_impl; + friend class PrePostProcessor; + +public: + /// \brief Empty constructor. Should be used only if network has exactly one output + OutputInfo(); + + /// \brief Constructor for particular output index of model + /// + /// \param output_index Index to address specified output parameter of model + OutputInfo(size_t output_index); + + /// \brief Constructor for particular output of model addressed by it's output name + /// + /// \param output_tensor_name Name of output tensor name + OutputInfo(const std::string& output_tensor_name); + + /// \brief Default move constructor + OutputInfo(OutputInfo&&) noexcept; + + /// \brief Default move assignment operator + OutputInfo& operator=(OutputInfo&&) noexcept; + + /// \brief Default destructor + ~OutputInfo(); + + /// \brief Set network's tensor information for output - Lvalue version + /// + /// \param builder Output network tensor information. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + OutputInfo& network(OutputNetworkInfo&& builder) &; + + /// \brief Set network's tensor information for output - Rvalue version + /// + /// \param builder Output network tensor information. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + OutputInfo&& network(OutputNetworkInfo&& builder) &&; + + /// \brief Set postprocessing operations for output - Lvalue version + /// + /// \param builder Postprocessing operations. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + OutputInfo& postprocess(PostProcessSteps&& builder) &; + + /// \brief Set postprocessing operations for output - Rvalue version + /// + /// \param builder Postprocessing operations. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + + OutputInfo&& postprocess(PostProcessSteps&& builder) &&; + + /// \brief Set final output tensor information for output after postprocessing - Lvalue version + /// + /// \param builder Output tensor information. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + OutputInfo& tensor(OutputTensorInfo&& builder) &; + + /// \brief Set final output tensor information for output after postprocessing - Rvalue version + /// + /// \param builder Output tensor information. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + OutputInfo&& tensor(OutputTensorInfo&& builder) &&; +}; + +} // namespace preprocess +} // namespace ov diff --git a/ngraph/core/include/openvino/core/preprocess/output_network_info.hpp b/ngraph/core/include/openvino/core/preprocess/output_network_info.hpp new file mode 100644 index 00000000000000..b32c2181ae122f --- /dev/null +++ b/ngraph/core/include/openvino/core/preprocess/output_network_info.hpp @@ -0,0 +1,65 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/core_visibility.hpp" +#include "openvino/core/layout.hpp" + +namespace ov { +namespace preprocess { + +/// \brief Information about network's output tensor. If all information is already included to loaded network, this +/// info may not be needed. However it can be set to specify additional information about network, like 'layout'. +/// +/// Example of usage of network 'layout': +/// Support network has output parameter with shape {1, 3, 224, 224} and `NHWC` layout. User may need to transpose +/// output picture to interleaved format {1, 224, 224, 3}. This can be done with the following code +/// +/// \code{.cpp} +/// +/// auto proc = +/// PrePostProcessor() +/// .output(OutputInfo() +/// .network(OutputNetworkInfo().set_layout("NCHW") +/// .preprocess(PostProcessSteps().convert_layout("NHWC"))) +/// ); +/// \endcode +class OPENVINO_API OutputNetworkInfo final { + class OutputNetworkInfoImpl; + std::unique_ptr m_impl; + friend class OutputInfo; + +public: + /// \brief Default empty constructor + OutputNetworkInfo(); + + /// \brief Default move constructor + OutputNetworkInfo(OutputNetworkInfo&&) noexcept; + + /// \brief Default move assignment + OutputNetworkInfo& operator=(OutputNetworkInfo&&) noexcept; + + /// \brief Default destructor + ~OutputNetworkInfo(); + + /// \brief Set layout for network's output tensor + /// This version allows chaining for Lvalue objects + /// + /// \param layout Layout for network's output tensor. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + OutputNetworkInfo& set_layout(const ov::Layout& layout) &; + + /// \brief Set layout for network's output tensor + /// This version allows chaining for Rvalue objects + /// + /// \param layout Layout for network's output tensor. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + OutputNetworkInfo&& set_layout(const ov::Layout& layout) &&; +}; + +} // namespace preprocess +} // namespace ov diff --git a/ngraph/core/include/openvino/core/preprocess/output_tensor_info.hpp b/ngraph/core/include/openvino/core/preprocess/output_tensor_info.hpp new file mode 100644 index 00000000000000..385a47d729898e --- /dev/null +++ b/ngraph/core/include/openvino/core/preprocess/output_tensor_info.hpp @@ -0,0 +1,79 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/core_visibility.hpp" +#include "openvino/core/layout.hpp" +#include "openvino/core/type/element_type.hpp" + +namespace ov { +namespace preprocess { + +/// \brief Information about user's desired output tensor. By default, it will be initialized to same data +/// (type/shape/etc) as network's output parameter. User application can override particular parameters (like +/// 'element_type') according to application's data and specify appropriate conversions in post-processing steps +/// +/// \code{.cpp} +/// auto proc = +/// PrePostProcessor() +/// .output(OutputInfo() +/// .postprocess() +/// .tensor(OutputTensorInfo() +/// .set_element_type(ov::element::u8)) +/// ); +/// \endcode +class OPENVINO_API OutputTensorInfo final { + class OutputTensorInfoImpl; + std::unique_ptr m_impl; + friend class OutputInfo; + +public: + /// \brief Default empty constructor + OutputTensorInfo(); + + /// \brief Default move constructor + OutputTensorInfo(OutputTensorInfo&&) noexcept; + + /// \brief Default move assignment + OutputTensorInfo& operator=(OutputTensorInfo&&) noexcept; + + /// \brief Default destructor + ~OutputTensorInfo(); + + /// \brief Set element type for user's desired output tensor. + /// This version allows chaining for Lvalue objects. + /// + /// \param type Element type for user's output tensor. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. + OutputTensorInfo& set_element_type(const ov::element::Type& type) &; + + /// \brief Set element type for user's desired output tensor. + /// This version allows chaining for Rvalue objects. + /// + /// \param type Element type for user's output tensor. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner. + OutputTensorInfo&& set_element_type(const ov::element::Type& type) &&; + + /// \brief Set layout for user's output tensor. + /// This version allows chaining for Lvalue objects + /// + /// \param layout Layout for user's output tensor. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + OutputTensorInfo& set_layout(const ov::Layout& layout) &; + + /// \brief Set layout for user's output tensor. + /// This version allows chaining for Rvalue objects + /// + /// \param layout Layout for user's output tensor. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + OutputTensorInfo&& set_layout(const ov::Layout& layout) &&; +}; + +} // namespace preprocess +} // namespace ov diff --git a/ngraph/core/include/openvino/core/preprocess/postprocess_steps.hpp b/ngraph/core/include/openvino/core/preprocess/postprocess_steps.hpp new file mode 100644 index 00000000000000..8bb17b2f2ee356 --- /dev/null +++ b/ngraph/core/include/openvino/core/preprocess/postprocess_steps.hpp @@ -0,0 +1,116 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/core_visibility.hpp" +#include "openvino/core/layout.hpp" +#include "openvino/core/type/element_type.hpp" + +namespace ov { + +class Node; + +namespace preprocess { + +/// \brief Postprocessing steps. Each step typically intends adding of some operation to output parameter +/// User application can specify sequence of postprocessing steps in a builder-like manner +/// \code{.cpp} +/// auto proc = PrePostProcessor() +/// .output(OutputInfo() +/// .postprocess(PostProcessSteps() +/// .convert_element_type(element::u8))) +/// ); +/// \endcode +class OPENVINO_API PostProcessSteps final { + class PostProcessStepsImpl; + std::unique_ptr m_impl; + friend class OutputInfo; + +public: + /// \brief Default empty constructor + PostProcessSteps(); + + /// \brief Default move constructor + PostProcessSteps(PostProcessSteps&&) noexcept; + + /// \brief Default move assignment operator + PostProcessSteps& operator=(PostProcessSteps&&) noexcept; + + /// \brief Default destructor + ~PostProcessSteps(); + + /// \brief Add convert element type post-process operation - Lvalue version + /// + /// \param type Desired type of output. If not specified, type will be obtained from 'tensor' output information + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + PostProcessSteps& convert_element_type(const ov::element::Type& type = {}) &; + + /// \brief Add convert element type post-process operation - Rvalue version + /// + /// \param type Desired type of output. If not specified, type will be obtained from 'tensor' output information + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + PostProcessSteps&& convert_element_type(const ov::element::Type& type = {}) &&; + + /// \brief Add 'convert layout' operation to specified layout - Lvalue version. + /// + /// \details Adds appropriate 'transpose' operation between network layout and user's desired layout. + /// Current implementation requires source and destination layout to have same number of dimensions + /// + /// \example Example: when network data has output in 'NCHW' layout ([1, 3, 224, 224]) but user needs + /// interleaved output image ('NHWC', [1, 224, 224, 3]). Post-processing may look like this: + /// + /// \code{.cpp} auto proc = + /// PrePostProcessor() + /// .output(OutputInfo() + /// .network(OutputTensorInfo().set_layout("NCHW")) // Network output is NCHW + /// .postprocess(PostProcessSteps() + /// .convert_layout("NHWC")) // User needs output as NHWC + /// ); + /// \endcode + /// + /// \param dst_layout New layout after conversion. If not specified - destination layout is obtained from + /// appropriate tensor output properties. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. + PostProcessSteps& convert_layout(const Layout& dst_layout = {}) &; + + /// \brief Add convert_layout operation to network dimensions - Rvalue version. + /// + /// \param dst_layout New layout after conversion. If not specified - destination layout is obtained from + /// appropriate tensor output properties. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner. + PostProcessSteps&& convert_layout(const Layout& dst_layout = {}) &&; + + /// \brief Signature for custom postprocessing operation. Custom postprocessing operation takes one output node and + /// produces one output node. For more advanced cases, client's code can use transformation passes over ov::Function + /// directly + /// + /// \param node Output node for custom post-processing operation + /// + /// \return New node after applying custom post-processing operation + using CustomPostprocessOp = std::function(const ov::Output& node)>; + + /// \brief Add custom post-process operation - Lvalue version + /// Client application can specify callback function for custom action + /// + /// \param postprocess_cb Client's custom postprocess operation. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + PostProcessSteps& custom(const CustomPostprocessOp& postprocess_cb) &; + + /// \brief Add custom post-process operation - Rvalue version + /// Client application can specify callback function for custom action + /// + /// \param postprocess_cb Client's custom postprocess operation. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + PostProcessSteps&& custom(const CustomPostprocessOp& postprocess_cb) &&; +}; + +} // namespace preprocess +} // namespace ov diff --git a/ngraph/core/include/openvino/core/preprocess/pre_post_process.hpp b/ngraph/core/include/openvino/core/preprocess/pre_post_process.hpp index 300fd1ab557295..54ac8b5ab4d423 100644 --- a/ngraph/core/include/openvino/core/preprocess/pre_post_process.hpp +++ b/ngraph/core/include/openvino/core/preprocess/pre_post_process.hpp @@ -6,6 +6,7 @@ #include "openvino/core/core_visibility.hpp" #include "openvino/core/preprocess/input_info.hpp" +#include "openvino/core/preprocess/output_info.hpp" namespace ov { @@ -42,22 +43,34 @@ class OPENVINO_API PrePostProcessor final { /// \brief Default destructor ~PrePostProcessor(); - /// \brief Adds pre-processing information and steps to input of model. This method can be used only if ov::Function - /// passed on `build` has only one input + /// \brief Adds pre-processing information and steps to input of model. /// /// \param builder Pre-processing data for input tensor of model. /// /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner PrePostProcessor& input(InputInfo&& builder) &; - /// \brief Adds pre-processing information and steps to input of model - Rvalue version. This method can be used - /// only if ov::Function passed on `build` has only one input. + /// \brief Adds pre-processing information and steps to input of model - Rvalue version. /// /// \param builder Pre-processing data for input tensor of model. /// /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner PrePostProcessor&& input(InputInfo&& builder) &&; + /// \brief Adds post-processing information and steps to output of model. + /// + /// \param builder Post-processing data for output tensor of model. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + PrePostProcessor& output(OutputInfo&& builder) &; + + /// \brief Adds pre-processing information and steps to input of model - Rvalue version. + /// + /// \param builder Post-processing data for output tensor of model. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + PrePostProcessor&& output(OutputInfo&& builder) &&; + /// \brief Adds pre/post-processing operations to existing function /// /// \param function Existing function representing loaded model diff --git a/ngraph/core/include/openvino/core/preprocess/preprocess_steps.hpp b/ngraph/core/include/openvino/core/preprocess/preprocess_steps.hpp index 8ef191dba00a36..d005e9a78cf608 100644 --- a/ngraph/core/include/openvino/core/preprocess/preprocess_steps.hpp +++ b/ngraph/core/include/openvino/core/preprocess/preprocess_steps.hpp @@ -5,6 +5,7 @@ #pragma once #include "openvino/core/core_visibility.hpp" +#include "openvino/core/preprocess/color_format.hpp" #include "openvino/core/preprocess/resize_algorithm.hpp" #include "openvino/core/type/element_type.hpp" @@ -56,6 +57,26 @@ class OPENVINO_API PreProcessSteps final { /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner PreProcessSteps&& convert_element_type(const ov::element::Type& type) &&; + /// \brief Converts color format for user's input tensor. Requires source color format to be specified by + /// InputTensorInfo::set_color_format. + /// + /// This version allows chaining for Lvalue objects + /// + /// \param dst_format Destination color format of input image + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner + PreProcessSteps& convert_color(const ov::preprocess::ColorFormat& dst_format) &; + + /// \brief Converts color format for user's input tensor. Requires source color format to be specified by + /// InputTensorInfo::set_color_format. + /// + /// This version allows chaining for Rvalue objects. + /// + /// \param dst_format Color format of input image. + /// + /// \return Rvalue reference to 'this' to allow chaining with other calls in a builder-like manner + PreProcessSteps&& convert_color(const ov::preprocess::ColorFormat& dst_format) &&; + /// \brief Add scale preprocess operation - Lvalue version /// Divide each element of input by specified value /// diff --git a/ngraph/core/include/openvino/core/type/element_type.hpp b/ngraph/core/include/openvino/core/type/element_type.hpp index abb500b4179bc0..d867d2cd4181a4 100644 --- a/ngraph/core/include/openvino/core/type/element_type.hpp +++ b/ngraph/core/include/openvino/core/type/element_type.hpp @@ -14,10 +14,10 @@ #include #include -#include "ngraph/attribute_adapter.hpp" -#include "ngraph/deprecated.hpp" -#include "ngraph/except.hpp" +#include "openvino/core/attribute_adapter.hpp" #include "openvino/core/core_visibility.hpp" +#include "openvino/core/deprecated.hpp" +#include "openvino/core/except.hpp" #include "openvino/core/rtti.hpp" #include "openvino/core/type/bfloat16.hpp" #include "openvino/core/type/float16.hpp" @@ -196,7 +196,7 @@ class OPENVINO_API AttributeAdapter : public ValueAccessor -class NGRAPH_API AttributeAdapter : public DirectValueAccessor { +class OPENVINO_API AttributeAdapter : public DirectValueAccessor { public: OPENVINO_RTTI("AttributeAdapter"); BWDCMP_RTTI_DECLARATION; diff --git a/ngraph/core/include/openvino/op/adaptive_max_pool.hpp b/ngraph/core/include/openvino/op/adaptive_max_pool.hpp index 9013cf335350f9..817ef2f94aea21 100644 --- a/ngraph/core/include/openvino/op/adaptive_max_pool.hpp +++ b/ngraph/core/include/openvino/op/adaptive_max_pool.hpp @@ -31,7 +31,7 @@ class OPENVINO_API AdaptiveMaxPool : public Op { /// AdaptiveMaxPool(const Output& data, const Output& output_shape, - const ngraph::element::Type& index_element_type = ngraph::element::i64); + const ov::element::Type& index_element_type = ov::element::i64); void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; @@ -43,7 +43,7 @@ class OPENVINO_API AdaptiveMaxPool : public Op { } protected: - ngraph::element::Type m_index_element_type = ngraph::element::i64; + ov::element::Type m_index_element_type = ov::element::i64; }; } // namespace v8 } // namespace op diff --git a/ngraph/core/include/openvino/op/assign.hpp b/ngraph/core/include/openvino/op/assign.hpp index 6f8853092995c9..d7f06bd6fcad58 100644 --- a/ngraph/core/include/openvino/op/assign.hpp +++ b/ngraph/core/include/openvino/op/assign.hpp @@ -60,7 +60,7 @@ class OPENVINO_API Assign : public util::AssignBase { bool visit_attributes(AttributeVisitor& visitor) override; std::string get_variable_id() const override { - NGRAPH_CHECK(m_variable, "Variable is not initialized. Variable_id is unavailable"); + OPENVINO_ASSERT(m_variable, "Variable is not initialized. Variable_id is unavailable"); return m_variable->get_info().variable_id; } bool evaluate(const HostTensorVector& outputs, diff --git a/ngraph/core/include/openvino/op/constant.hpp b/ngraph/core/include/openvino/op/constant.hpp index f1208e047d49e7..73f1edf7601906 100644 --- a/ngraph/core/include/openvino/op/constant.hpp +++ b/ngraph/core/include/openvino/op/constant.hpp @@ -10,7 +10,6 @@ #include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/shared_buffer.hpp" -#include "ngraph/util.hpp" #include "openvino/core/coordinate_diff.hpp" #include "openvino/core/node.hpp" #include "openvino/core/type/element_type.hpp" @@ -364,7 +363,7 @@ class OPENVINO_API Constant : public Op { template const typename element_type_traits::value_type* get_data_ptr() const { - NGRAPH_CHECK(ET == get_element_type(), "get_data_ptr() called for incorrect element type."); + OPENVINO_ASSERT(ET == get_element_type(), "get_data_ptr() called for incorrect element type."); return static_cast::value_type*>(get_data_ptr()); } @@ -530,7 +529,7 @@ class OPENVINO_API Constant : public Op { template typename element_type_traits::value_type* get_data_ptr_nc() { - NGRAPH_CHECK(ET == get_element_type(), "get_data_ptr_nc() called for incorrect element type."); + OPENVINO_ASSERT(ET == get_element_type(), "get_data_ptr_nc() called for incorrect element type."); return static_cast::value_type*>(get_data_ptr_nc()); } @@ -668,21 +667,21 @@ class OPENVINO_API Constant : public Op { # pragma GCC diagnostic pop #endif } - template ::type = true> - static ngraph::fundamental_type_for value_in_range(const ValueT& value) { - const auto result = ngraph::fundamental_type_for(value); - NGRAPH_CHECK(0 <= result && result <= 15, "assigned value out of range u4 values"); + typename std::enable_if::type = true> + static ov::fundamental_type_for value_in_range(const ValueT& value) { + const auto result = ov::fundamental_type_for(value); + OPENVINO_ASSERT(0 <= result && result <= 15, "assigned value out of range u4 values"); return result; } - template ::type = true> - static ngraph::fundamental_type_for value_in_range(const ValueT& value) { - const auto result = ngraph::fundamental_type_for(value); - NGRAPH_CHECK(-8 <= result && result <= 7, "assigned value out of range i4 values"); + typename std::enable_if::type = true> + static ov::fundamental_type_for value_in_range(const ValueT& value) { + const auto result = ov::fundamental_type_for(value); + OPENVINO_ASSERT(-8 <= result && result <= 7, "assigned value out of range i4 values"); return result; } diff --git a/ngraph/core/include/openvino/op/convolution.hpp b/ngraph/core/include/openvino/op/convolution.hpp index 2f09c881aeb249..379e2db177e630 100644 --- a/ngraph/core/include/openvino/op/convolution.hpp +++ b/ngraph/core/include/openvino/op/convolution.hpp @@ -114,119 +114,140 @@ class OPENVINO_API ConvolutionBackpropData : public Op { /// \brief Constructs a batched-convolution data batch-backprop operation. ConvolutionBackpropData() = default; // clang-format off - // - // \brief Constructs a batched-convolution data batch-backprop operation. - // - // \param data The node producing data from forward-prop. Shape: [N, - // C_INPUT, X1, ..., XD]. - // \param filters The node producing the filter from forward-prop. Shape: - // [C_INPUT, C_OUTPUT, K_D, ..., K_1] - // \param output_shape The shape of the data batch from forward-prop. It's size - // should be equal to number of data spatial dimensions. - // \param strides The strides from forward-prop. - // \param pads_begin The padding-below sizes from forward-prop. - // \param pads_end The padding-above sizes from forward-prop. - // \param dilations The dilations from forward-prop. - // \param auto_pad The pad type for automatically computing padding sizes. - // \param output_padding The output padding adds additional amount of paddings per - // each spatial axis in the output tensor. clang-format on - // - ConvolutionBackpropData(const Output& data, - const Output& filters, - const Output& output_shape, - const Strides& strides, - const CoordinateDiff& pads_begin, - const CoordinateDiff& pads_end, - const Strides& dilations, - const PadType& auto_pad = PadType::EXPLICIT, - const CoordinateDiff& output_padding = {}); - - // clang-format off - // - // \brief Constructs a batched-convolution data batch-backprop operation. - // - // \param data The node producing data from forward-prop. Shape: [N, - // C_INPUT, X1, ..., XD]. - // \param filters The node producing the filter from forward-prop. Shape: - // [C_INPUT, C_OUTPUT, K_D, ..., K_1] - // \param strides The strides from forward-prop. - // \param pads_begin The padding-below sizes from forward-prop. - // \param pads_end The padding-above sizes from forward-prop. - // \param dilations The dilations from forward-prop. - // \param auto_pad The pad type for automatically computing padding sizes. - // \param output_padding The output padding adds additional amount of paddings per - // each spatial axis in the output tensor. clang-format on - // - ConvolutionBackpropData(const Output& data, - const Output& filters, - const Strides& strides, - const CoordinateDiff& pads_begin, - const CoordinateDiff& pads_end, - const Strides& dilations, - const PadType& auto_pad = PadType::EXPLICIT, - const CoordinateDiff& output_padding = {}); - - void validate_and_infer_types() override; - bool visit_attributes(AttributeVisitor& visitor) override; - bool is_dynamic() const override; - - std::shared_ptr - clone_with_new_inputs(const OutputVector& new_args) const override; - - /// \return The output spatial dimensions shape. - const PartialShape get_output_shape() const; - void set_output_shape(const Shape& output_shape); - /// \return The strides from the forward prop. - const Strides& get_strides() const { return m_strides; } - void set_strides(const Strides& strides) { m_strides = strides; } - /// \return The dilations from the forward prop. - const Strides& get_dilations() const { return m_dilations; } - void set_dilations(const Strides& dilations) { m_dilations = dilations; } - /// \return The padding-below sizes (possibly negative) from the forward prop. - const CoordinateDiff& get_pads_begin() const { return m_pads_begin; } - void set_pads_begin(const CoordinateDiff& pads_begin) { m_pads_begin = pads_begin; } - /// \return The padding-above sizes (possibly negative) from the forward prop. - const CoordinateDiff& get_pads_end() const { return m_pads_end; } - void set_pads_end(const CoordinateDiff& pads_end) { m_pads_end = pads_end; } - /// \return The auto pad. - const PadType& get_auto_pad() const { return m_auto_pad; } - void set_auto_pad(const PadType& auto_pad) { m_auto_pad = auto_pad; } - /// \return The output padding. - const CoordinateDiff& get_output_padding() const { return m_output_padding; } - void set_output_padding(const CoordinateDiff& output_padding) - { - m_output_padding = output_padding; - } - /// \brief Calculates output spatial features size. - /// - /// \param[in] input_data_shape The input data partial shape - /// \param[in] filters_shape The filters partial shape - /// \param[in] strides The strides values. - /// \param[in] dilations The dilations values. - /// \param[in] pads_begin The paddings at the beginning of axis. - /// \param[in] pads_end The paddings at the end of axis. - /// \param[in] output_padding The output padding values. - /// \param output_spatial_shape The placeholder for computed output spatial partial - /// shape. - /// - void - infer_conv_backprop_output_spatial_shape(const std::vector& input_data_shape, - const std::vector& filters_shape, - const Strides& strides, - const Strides& dilations, - const CoordinateDiff& pads_begin, - const CoordinateDiff& pads_end, - const CoordinateDiff& output_padding, - std::vector& output_spatial_shape); - - protected: - Strides m_strides; - Strides m_dilations; - CoordinateDiff m_pads_begin; - CoordinateDiff m_pads_end; - PadType m_auto_pad; - CoordinateDiff m_output_padding; - }; - } // namespace v1 - } // namespace op -} // namespace ngraph + // + // \brief Constructs a batched-convolution data batch-backprop operation. + // + // \param data The node producing data from forward-prop. Shape: [N, + // C_INPUT, X1, ..., XD]. + // \param filters The node producing the filter from forward-prop. Shape: + // [C_INPUT, C_OUTPUT, K_D, ..., K_1] + // \param output_shape The shape of the data batch from forward-prop. It's size + // should be equal to number of data spatial dimensions. + // \param strides The strides from forward-prop. + // \param pads_begin The padding-below sizes from forward-prop. + // \param pads_end The padding-above sizes from forward-prop. + // \param dilations The dilations from forward-prop. + // \param auto_pad The pad type for automatically computing padding sizes. + // \param output_padding The output padding adds additional amount of paddings per + // each spatial axis in the output tensor. clang-format on + // + // clang-format on + ConvolutionBackpropData(const Output& data, + const Output& filters, + const Output& output_shape, + const Strides& strides, + const CoordinateDiff& pads_begin, + const CoordinateDiff& pads_end, + const Strides& dilations, + const PadType& auto_pad = PadType::EXPLICIT, + const CoordinateDiff& output_padding = {}); + + // clang-format off + // + // \brief Constructs a batched-convolution data batch-backprop operation. + // + // \param data The node producing data from forward-prop. Shape: [N, + // C_INPUT, X1, ..., XD]. + // \param filters The node producing the filter from forward-prop. Shape: + // [C_INPUT, C_OUTPUT, K_D, ..., K_1] + // \param strides The strides from forward-prop. + // \param pads_begin The padding-below sizes from forward-prop. + // \param pads_end The padding-above sizes from forward-prop. + // \param dilations The dilations from forward-prop. + // \param auto_pad The pad type for automatically computing padding sizes. + // \param output_padding The output padding adds additional amount of paddings per + // each spatial axis in the output tensor. clang-format on + // + // clang-format on + ConvolutionBackpropData(const Output& data, + const Output& filters, + const Strides& strides, + const CoordinateDiff& pads_begin, + const CoordinateDiff& pads_end, + const Strides& dilations, + const PadType& auto_pad = PadType::EXPLICIT, + const CoordinateDiff& output_padding = {}); + + void validate_and_infer_types() override; + bool visit_attributes(AttributeVisitor& visitor) override; + bool is_dynamic() const override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + /// \return The output spatial dimensions shape. + const PartialShape get_output_shape() const; + void set_output_shape(const Shape& output_shape); + /// \return The strides from the forward prop. + const Strides& get_strides() const { + return m_strides; + } + void set_strides(const Strides& strides) { + m_strides = strides; + } + /// \return The dilations from the forward prop. + const Strides& get_dilations() const { + return m_dilations; + } + void set_dilations(const Strides& dilations) { + m_dilations = dilations; + } + /// \return The padding-below sizes (possibly negative) from the forward prop. + const CoordinateDiff& get_pads_begin() const { + return m_pads_begin; + } + void set_pads_begin(const CoordinateDiff& pads_begin) { + m_pads_begin = pads_begin; + } + /// \return The padding-above sizes (possibly negative) from the forward prop. + const CoordinateDiff& get_pads_end() const { + return m_pads_end; + } + void set_pads_end(const CoordinateDiff& pads_end) { + m_pads_end = pads_end; + } + /// \return The auto pad. + const PadType& get_auto_pad() const { + return m_auto_pad; + } + void set_auto_pad(const PadType& auto_pad) { + m_auto_pad = auto_pad; + } + /// \return The output padding. + const CoordinateDiff& get_output_padding() const { + return m_output_padding; + } + void set_output_padding(const CoordinateDiff& output_padding) { + m_output_padding = output_padding; + } + /// \brief Calculates output spatial features size. + /// + /// \param[in] input_data_shape The input data partial shape + /// \param[in] filters_shape The filters partial shape + /// \param[in] strides The strides values. + /// \param[in] dilations The dilations values. + /// \param[in] pads_begin The paddings at the beginning of axis. + /// \param[in] pads_end The paddings at the end of axis. + /// \param[in] output_padding The output padding values. + /// \param output_spatial_shape The placeholder for computed output spatial partial + /// shape. + /// + void infer_conv_backprop_output_spatial_shape(const std::vector& input_data_shape, + const std::vector& filters_shape, + const Strides& strides, + const Strides& dilations, + const CoordinateDiff& pads_begin, + const CoordinateDiff& pads_end, + const CoordinateDiff& output_padding, + std::vector& output_spatial_shape); + +protected: + Strides m_strides; + Strides m_dilations; + CoordinateDiff m_pads_begin; + CoordinateDiff m_pads_end; + PadType m_auto_pad; + CoordinateDiff m_output_padding; +}; +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/ngraph/core/include/openvino/op/gather_tree.hpp b/ngraph/core/include/openvino/op/gather_tree.hpp index 07192e47d11034..c8fb8902dd43b8 100644 --- a/ngraph/core/include/openvino/op/gather_tree.hpp +++ b/ngraph/core/include/openvino/op/gather_tree.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/op.hpp" +#include "openvino/op/op.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/if.hpp b/ngraph/core/include/openvino/op/if.hpp index f5932652cc6f16..49414e1249ac18 100644 --- a/ngraph/core/include/openvino/op/if.hpp +++ b/ngraph/core/include/openvino/op/if.hpp @@ -31,28 +31,28 @@ class OPENVINO_API If : public util::MultiSubGraphOp { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - /// \brief gets then_body as ngraph::Function. + /// \brief gets then_body as ov::Function. /// - /// \return then_body as ngraph::Function. + /// \return then_body as ov::Function. const std::shared_ptr& get_then_body() const { return m_bodies[THEN_BODY_INDEX]; } - /// \brief gets else_body as ngraph::Function. + /// \brief gets else_body as ov::Function. /// - /// \return else_body as ngraph::Function. + /// \return else_body as ov::Function. const std::shared_ptr& get_else_body() const { return m_bodies[ELSE_BODY_INDEX]; } - /// \brief sets new ngraph::Function as new then_body. + /// \brief sets new ov::Function as new then_body. /// /// \param body new body for 'then' branch. void set_then_body(const std::shared_ptr& body) { m_bodies[THEN_BODY_INDEX] = body; } - /// \brief sets new ngraph::Function as new else_body. + /// \brief sets new ov::Function as new else_body. /// /// \param body new body for 'else' branch. void set_else_body(const std::shared_ptr& body) { diff --git a/ngraph/core/include/openvino/op/log.hpp b/ngraph/core/include/openvino/op/log.hpp index 1ac1d2ce21e069..dfe584e528d9d1 100644 --- a/ngraph/core/include/openvino/op/log.hpp +++ b/ngraph/core/include/openvino/op/log.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/util/unary_elementwise_arithmetic.hpp" +#include "openvino/op/util/unary_elementwise_arithmetic.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/log_softmax.hpp b/ngraph/core/include/openvino/op/log_softmax.hpp index d597cbb1848172..9dc3c787dcef37 100644 --- a/ngraph/core/include/openvino/op/log_softmax.hpp +++ b/ngraph/core/include/openvino/op/log_softmax.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/op.hpp" +#include "openvino/op/op.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/loop.hpp b/ngraph/core/include/openvino/op/loop.hpp index 337d56952b3876..636ba9b1495502 100644 --- a/ngraph/core/include/openvino/op/loop.hpp +++ b/ngraph/core/include/openvino/op/loop.hpp @@ -15,7 +15,7 @@ namespace ov { namespace op { namespace v5 { /// \brief Iterate a body over tensors, accumulating into tensors. -class NGRAPH_API Loop : public op::util::SubGraphOp { +class OPENVINO_API Loop : public op::util::SubGraphOp { public: /// \brief Allows to define the purpose of inputs/outputs in the body struct SpecialBodyPorts { @@ -76,7 +76,7 @@ class NGRAPH_API Loop : public op::util::SubGraphOp { } // namespace op template <> -class NGRAPH_API AttributeAdapter +class OPENVINO_API AttributeAdapter : public DirectValueAccessor { public: AttributeAdapter(op::v5::Loop::SpecialBodyPorts& value) diff --git a/ngraph/core/include/openvino/op/lrn.hpp b/ngraph/core/include/openvino/op/lrn.hpp index 5f2112811fe501..af7a4f43b2d264 100644 --- a/ngraph/core/include/openvino/op/lrn.hpp +++ b/ngraph/core/include/openvino/op/lrn.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/op.hpp" +#include "openvino/op/op.hpp" namespace ov { namespace op { @@ -24,7 +24,7 @@ namespace v0 { /// | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | /// | \f$N[n, c, d_1,\dots,d_n]\f$ | The tensor \f$T\f$, where \f$T[n, c, d_1,\dots,d_n] = \frac{N[n,i,d_1,\dots,d_n]}{ (bias + alpha * (\sum_{i=max(0,(nsize-1)/2)}^{min(C, (nsize-1)/2)+1} N[n,i,d_1,\dots,d_n]^{2}) ^ {2})}\f$ | // clang-format on -class NGRAPH_API LRN : public Op { +class OPENVINO_API LRN : public Op { public: OPENVINO_OP("LRN", "opset1"); BWDCMP_RTTI_DECLARATION; diff --git a/ngraph/core/include/openvino/op/matrix_nms.hpp b/ngraph/core/include/openvino/op/matrix_nms.hpp index 91ebea543aa8aa..2567e36e6a2f56 100644 --- a/ngraph/core/include/openvino/op/matrix_nms.hpp +++ b/ngraph/core/include/openvino/op/matrix_nms.hpp @@ -25,7 +25,7 @@ class OPENVINO_API MatrixNms : public util::NmsBase { // not bool sort_result_across_batch = false; // specifies the output tensor type - ngraph::element::Type output_type = ngraph::element::i64; + ov::element::Type output_type = ov::element::i64; // specifies minimum score to consider box for the processing float score_threshold = 0.0f; // specifies maximum number of boxes to be selected per class, -1 meaning to diff --git a/ngraph/core/include/openvino/op/multiclass_nms.hpp b/ngraph/core/include/openvino/op/multiclass_nms.hpp index 6c9f86ae5588e7..5a3d108f6f0b71 100644 --- a/ngraph/core/include/openvino/op/multiclass_nms.hpp +++ b/ngraph/core/include/openvino/op/multiclass_nms.hpp @@ -23,7 +23,7 @@ class OPENVINO_API MulticlassNms : public util::NmsBase { // not bool sort_result_across_batch = false; // specifies the output tensor type - ngraph::element::Type output_type = ngraph::element::i64; + ov::element::Type output_type = ov::element::i64; // specifies intersection over union threshold float iou_threshold = 0.0f; // specifies minimum score to consider box for the processing diff --git a/ngraph/core/include/openvino/op/non_max_suppression.hpp b/ngraph/core/include/openvino/op/non_max_suppression.hpp index e63c563de7dc78..2d3e930aca94b8 100644 --- a/ngraph/core/include/openvino/op/non_max_suppression.hpp +++ b/ngraph/core/include/openvino/op/non_max_suppression.hpp @@ -108,7 +108,7 @@ class OPENVINO_API NonMaxSuppression : public Op { const Output& score_threshold, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); /// \brief Constructs a NonMaxSuppression operation with default values for the last /// 3 inputs @@ -123,7 +123,7 @@ class OPENVINO_API NonMaxSuppression : public Op { const Output& scores, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); bool visit_attributes(AttributeVisitor& visitor) override; void validate_and_infer_types() override; @@ -154,7 +154,7 @@ class OPENVINO_API NonMaxSuppression : public Op { protected: BoxEncodingType m_box_encoding = BoxEncodingType::CORNER; bool m_sort_result_descending = true; - ngraph::element::Type m_output_type = ngraph::element::i64; + ov::element::Type m_output_type = ov::element::i64; void validate(); int64_t max_boxes_output_from_input() const; }; @@ -188,7 +188,7 @@ class OPENVINO_API NonMaxSuppression : public op::v3::NonMaxSuppression { const Output& score_threshold, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); /// \brief Constructs a NonMaxSuppression operation with default values for the last /// 3 inputs @@ -203,7 +203,7 @@ class OPENVINO_API NonMaxSuppression : public op::v3::NonMaxSuppression { const Output& scores, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); void validate_and_infer_types() override; @@ -235,7 +235,7 @@ class OPENVINO_API NonMaxSuppression : public Op { const Output& scores, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); /// \brief Constructs a NonMaxSuppression operation with default values in the last. /// 3 inputs. @@ -253,7 +253,7 @@ class OPENVINO_API NonMaxSuppression : public Op { const Output& max_output_boxes_per_class, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); /// \brief Constructs a NonMaxSuppression operation with default values in the last. /// 2 inputs. @@ -273,7 +273,7 @@ class OPENVINO_API NonMaxSuppression : public Op { const Output& iou_threshold, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); /// \brief Constructs a NonMaxSuppression operation with default value in the last. /// input. @@ -295,7 +295,7 @@ class OPENVINO_API NonMaxSuppression : public Op { const Output& score_threshold, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); /// \brief Constructs a NonMaxSuppression operation. /// @@ -318,7 +318,7 @@ class OPENVINO_API NonMaxSuppression : public Op { const Output& soft_nms_sigma, const BoxEncodingType box_encoding = BoxEncodingType::CORNER, const bool sort_result_descending = true, - const ngraph::element::Type& output_type = ngraph::element::i64); + const ov::element::Type& output_type = ov::element::i64); bool visit_attributes(AttributeVisitor& visitor) override; void validate_and_infer_types() override; @@ -355,7 +355,7 @@ class OPENVINO_API NonMaxSuppression : public Op { protected: BoxEncodingType m_box_encoding = BoxEncodingType::CORNER; bool m_sort_result_descending = true; - ngraph::element::Type m_output_type = ngraph::element::i64; + ov::element::Type m_output_type = ov::element::i64; void validate(); }; } // namespace v5 diff --git a/ngraph/core/include/openvino/op/parameter.hpp b/ngraph/core/include/openvino/op/parameter.hpp index 5092b0c19d0741..76b31adf66f674 100644 --- a/ngraph/core/include/openvino/op/parameter.hpp +++ b/ngraph/core/include/openvino/op/parameter.hpp @@ -25,7 +25,7 @@ class OPENVINO_API Parameter : public op::Op { /// /// \param element_type The element type of the parameter. /// \param pshape The partial shape of the parameter. - Parameter(const ngraph::element::Type& element_type, const PartialShape& pshape); + Parameter(const ov::element::Type& element_type, const PartialShape& pshape); bool visit_attributes(AttributeVisitor& visitor) override; diff --git a/ngraph/core/include/openvino/op/random_uniform.hpp b/ngraph/core/include/openvino/op/random_uniform.hpp index dbfdba15104157..3b917ce74b7e20 100644 --- a/ngraph/core/include/openvino/op/random_uniform.hpp +++ b/ngraph/core/include/openvino/op/random_uniform.hpp @@ -28,7 +28,7 @@ class OPENVINO_API RandomUniform : public Op { RandomUniform(const Output& out_shape, const Output& min_val, const Output& max_val, - const ngraph::element::Type& out_type, + const ov::element::Type& out_type, uint64_t global_seed = 0, uint64_t op_seed = 0); @@ -44,10 +44,10 @@ class OPENVINO_API RandomUniform : public Op { } /// \return The output tensor type. - const ngraph::element::Type& get_out_type() const { + const ov::element::Type& get_out_type() const { return m_output_type; } - void set_out_type(const ngraph::element::Type& output_type) { + void set_out_type(const ov::element::Type& output_type) { m_output_type = output_type; } @@ -72,7 +72,7 @@ class OPENVINO_API RandomUniform : public Op { bool has_evaluate() const override; protected: - ngraph::element::Type m_output_type; + ov::element::Type m_output_type; uint64_t m_global_seed; uint64_t m_op_seed; diff --git a/ngraph/core/include/openvino/op/read_value.hpp b/ngraph/core/include/openvino/op/read_value.hpp index 77ca82d6e1dae6..744a8bb7199684 100644 --- a/ngraph/core/include/openvino/op/read_value.hpp +++ b/ngraph/core/include/openvino/op/read_value.hpp @@ -65,7 +65,7 @@ class OPENVINO_API ReadValue : public util::ReadValueBase { bool visit_attributes(AttributeVisitor& visitor) override; std::string get_variable_id() const override { - NGRAPH_CHECK(m_variable, "Variable is not initialized. Variable_id is unavailable"); + OPENVINO_ASSERT(m_variable, "Variable is not initialized. Variable_id is unavailable"); return m_variable->get_info().variable_id; } diff --git a/ngraph/core/include/openvino/op/relu.hpp b/ngraph/core/include/openvino/op/relu.hpp index f95d144b451ed3..96fb9e97d508b2 100644 --- a/ngraph/core/include/openvino/op/relu.hpp +++ b/ngraph/core/include/openvino/op/relu.hpp @@ -21,7 +21,7 @@ class OPENVINO_API Relu : public util::UnaryElementwiseArithmetic { /// \brief Constructs a Relu operation. /// /// \param arg Node that produces the input tensor. - Relu(const Output& arg); + Relu(const Output& arg); std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; diff --git a/ngraph/core/include/openvino/op/result.hpp b/ngraph/core/include/openvino/op/result.hpp index dfccdd82461b1b..7f2e2156b204e8 100644 --- a/ngraph/core/include/openvino/op/result.hpp +++ b/ngraph/core/include/openvino/op/result.hpp @@ -4,6 +4,7 @@ #pragma once +#include "openvino/core/layout.hpp" #include "openvino/op/op.hpp" namespace ov { @@ -39,6 +40,12 @@ class OPENVINO_API Result : public Op { bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; bool has_evaluate() const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + + /// \brief Returns current layout, or empty Layout if it is not set + Layout get_layout() const; + + /// \brief Sets layout runtime information to tensor + void set_layout(const Layout& layout); }; } // namespace v0 } // namespace op diff --git a/ngraph/core/include/openvino/op/reverse.hpp b/ngraph/core/include/openvino/op/reverse.hpp index ae4bb041dee8f0..f11c59c29f320e 100644 --- a/ngraph/core/include/openvino/op/reverse.hpp +++ b/ngraph/core/include/openvino/op/reverse.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/op.hpp" +#include "openvino/op/op.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/select.hpp b/ngraph/core/include/openvino/op/select.hpp index 01af632239b466..eaa3fd1e3e20b2 100644 --- a/ngraph/core/include/openvino/op/select.hpp +++ b/ngraph/core/include/openvino/op/select.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/op.hpp" +#include "openvino/op/op.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/sigmoid.hpp b/ngraph/core/include/openvino/op/sigmoid.hpp index b4a0047135f8a0..e39cfebc99c8fa 100644 --- a/ngraph/core/include/openvino/op/sigmoid.hpp +++ b/ngraph/core/include/openvino/op/sigmoid.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/util/unary_elementwise_arithmetic.hpp" +#include "openvino/op/util/unary_elementwise_arithmetic.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/space_to_batch.hpp b/ngraph/core/include/openvino/op/space_to_batch.hpp index 76be6cf8e357ab..e6e7c176bca7e1 100644 --- a/ngraph/core/include/openvino/op/space_to_batch.hpp +++ b/ngraph/core/include/openvino/op/space_to_batch.hpp @@ -37,8 +37,8 @@ class OPENVINO_API SpaceToBatch : public Op { /// input. SpaceToBatch(const Output& data, const Output& block_shape, - const Output& pads_begin, - const Output& pads_end); + const Output& pads_begin, + const Output& pads_end); void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; diff --git a/ngraph/core/include/openvino/op/strided_slice.hpp b/ngraph/core/include/openvino/op/strided_slice.hpp index 83f400f3713f8e..874957e4aff215 100644 --- a/ngraph/core/include/openvino/op/strided_slice.hpp +++ b/ngraph/core/include/openvino/op/strided_slice.hpp @@ -7,8 +7,8 @@ #include #include -#include "ngraph/op/util/attr_types.hpp" #include "openvino/op/op.hpp" +#include "openvino/op/util/attr_types.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/util/fft_base.hpp b/ngraph/core/include/openvino/op/util/fft_base.hpp index 7f51f5b98de7d7..3a14001936a137 100644 --- a/ngraph/core/include/openvino/op/util/fft_base.hpp +++ b/ngraph/core/include/openvino/op/util/fft_base.hpp @@ -4,8 +4,8 @@ #pragma once -#include "ngraph/op/op.hpp" -#include "ngraph/op/util/attr_types.hpp" +#include "openvino/op/op.hpp" +#include "openvino/op/util/attr_types.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/util/framework_node.hpp b/ngraph/core/include/openvino/op/util/framework_node.hpp index fd47d5c854ec67..fab84894285c9d 100644 --- a/ngraph/core/include/openvino/op/util/framework_node.hpp +++ b/ngraph/core/include/openvino/op/util/framework_node.hpp @@ -50,7 +50,7 @@ class OPENVINO_API FrameworkNodeAttrs { return m_attrs.end(); } - std::string operator[](const std::string& key) { + std::string& operator[](const std::string& key) { return m_attrs[key]; } @@ -59,7 +59,7 @@ class OPENVINO_API FrameworkNodeAttrs { } bool operator==(const FrameworkNodeAttrs& other) const { - return m_type_name == other.m_type_name && m_opset_name == other.m_opset_name && m_attrs == m_attrs; + return m_type_name == other.m_type_name && m_opset_name == other.m_opset_name && m_attrs == other.m_attrs; } private: @@ -98,8 +98,8 @@ class OPENVINO_API FrameworkNode : public Op { void cache_output_descriptor(); private: - std::vector> m_inputs_desc; - std::vector> m_output_desc; + std::vector> m_inputs_desc; + std::vector> m_output_desc; FrameworkNodeAttrs m_attrs; }; diff --git a/ngraph/core/include/openvino/op/util/index_reduction.hpp b/ngraph/core/include/openvino/op/util/index_reduction.hpp index adf1168edba435..c2ba2b9e67f4e7 100644 --- a/ngraph/core/include/openvino/op/util/index_reduction.hpp +++ b/ngraph/core/include/openvino/op/util/index_reduction.hpp @@ -14,7 +14,7 @@ namespace ov { namespace op { namespace util { -class NGRAPH_API IndexReduction : public Op { +class OPENVINO_API IndexReduction : public Op { protected: IndexReduction(); diff --git a/ngraph/core/include/openvino/op/util/multi_subgraph_base.hpp b/ngraph/core/include/openvino/op/util/multi_subgraph_base.hpp index d7b1fc4f88c267..fc81a70faa4dd3 100644 --- a/ngraph/core/include/openvino/op/util/multi_subgraph_base.hpp +++ b/ngraph/core/include/openvino/op/util/multi_subgraph_base.hpp @@ -4,9 +4,9 @@ #pragma once -#include "ngraph/op/parameter.hpp" #include "openvino/core/function.hpp" #include "openvino/op/op.hpp" +#include "openvino/op/parameter.hpp" namespace ov { namespace op { @@ -266,7 +266,7 @@ class OPENVINO_API MultiSubGraphOp : public Op { /// /// \param value The value supplied as an input to the block. /// \param bodies_parameters vector of bodies parameters. - virtual void set_invariant_inputs(const Output& value, const ngraph::ParameterVector& bodies_parameters); + virtual void set_invariant_inputs(const Output& value, const ov::ParameterVector& bodies_parameters); /// /// \brief Set output decriptions for MultiSubGraphOp output. /// @@ -303,7 +303,7 @@ class OPENVINO_API AttributeAdapter>& value) : DirectValueAccessor>>(value) {} - OPENVINO_RTTI("AttributeAdapter>>") + OPENVINO_RTTI("AttributeAdapter>>") BWDCMP_RTTI_DECLARATION; }; @@ -314,8 +314,7 @@ class OPENVINO_API AttributeAdapter>& value) : DirectValueAccessor>>(value) {} - OPENVINO_RTTI( - "AttributeAdapter>>"); + OPENVINO_RTTI("AttributeAdapter>>"); BWDCMP_RTTI_DECLARATION; }; diff --git a/ngraph/core/include/openvino/op/util/nms_base.hpp b/ngraph/core/include/openvino/op/util/nms_base.hpp index cf250d201c52a0..c76c5219ca41f8 100644 --- a/ngraph/core/include/openvino/op/util/nms_base.hpp +++ b/ngraph/core/include/openvino/op/util/nms_base.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/op.hpp" +#include "openvino/op/op.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/util/scatter_nd_base.hpp b/ngraph/core/include/openvino/op/util/scatter_nd_base.hpp index 7337f745810ac2..138343b53ae6e3 100644 --- a/ngraph/core/include/openvino/op/util/scatter_nd_base.hpp +++ b/ngraph/core/include/openvino/op/util/scatter_nd_base.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ngraph/op/op.hpp" +#include "openvino/op/op.hpp" namespace ov { namespace op { diff --git a/ngraph/core/include/openvino/op/util/sub_graph_base.hpp b/ngraph/core/include/openvino/op/util/sub_graph_base.hpp index 70b550405fe704..1ecf16ff90e663 100644 --- a/ngraph/core/include/openvino/op/util/sub_graph_base.hpp +++ b/ngraph/core/include/openvino/op/util/sub_graph_base.hpp @@ -4,8 +4,8 @@ #pragma once -#include "ngraph/op/parameter.hpp" -#include "ngraph/op/util/multi_subgraph_base.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/util/multi_subgraph_base.hpp" namespace ov { namespace op { @@ -57,7 +57,7 @@ class OPENVINO_API SubGraphOp : public MultiSubGraphOp { /// \param end The last index on axis of the slicing /// \param axis The axis to slice along /// - virtual void set_sliced_input(const std::shared_ptr& parameter, + virtual void set_sliced_input(const std::shared_ptr& parameter, const Output& value, int64_t start, int64_t stride, @@ -75,7 +75,7 @@ class OPENVINO_API SubGraphOp : public MultiSubGraphOp { /// The value is what is active in the most recent /// completed iteration. /// - virtual void set_merged_input(const std::shared_ptr& body_parameter, + virtual void set_merged_input(const std::shared_ptr& body_parameter, const Output& initial_value, const Output& successive_value); /// @@ -86,7 +86,7 @@ class OPENVINO_API SubGraphOp : public MultiSubGraphOp { /// \param body_parameter The body parameter /// \param value The value supplied as an input to the block /// - virtual void set_invariant_input(const std::shared_ptr& body_parameter, + virtual void set_invariant_input(const std::shared_ptr& body_parameter, const Output& value); /// /// \brief Gets a value for a particular iteration point diff --git a/ngraph/core/include/openvino/op/util/variable_context.hpp b/ngraph/core/include/openvino/op/util/variable_context.hpp index 06a8c579fe303c..463e57e269122e 100644 --- a/ngraph/core/include/openvino/op/util/variable_context.hpp +++ b/ngraph/core/include/openvino/op/util/variable_context.hpp @@ -18,7 +18,7 @@ namespace util { using VariableMap = std::unordered_map; /// VariableContext stores and manages a evaluation context for Variables. -class NGRAPH_API VariableContext { +class OPENVINO_API VariableContext { public: /// \brief Constructs an uninitialized VariableContext. VariableContext() = default; @@ -74,7 +74,7 @@ class NGRAPH_API VariableContext { } // namespace util } // namespace op template <> -class NGRAPH_API VariantWrapper : public VariantImpl { +class OPENVINO_API VariantWrapper : public VariantImpl { public: OPENVINO_RTTI("VariantWrapper"); BWDCMP_RTTI_DECLARATION; diff --git a/ngraph/core/include/openvino/op/util/variable_extension.hpp b/ngraph/core/include/openvino/op/util/variable_extension.hpp index 53c16c22065d30..8208e1bb0087f8 100644 --- a/ngraph/core/include/openvino/op/util/variable_extension.hpp +++ b/ngraph/core/include/openvino/op/util/variable_extension.hpp @@ -6,7 +6,6 @@ #include -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/core/core_visibility.hpp" #include "openvino/op/util/variable.hpp" diff --git a/ngraph/core/include/openvino/pass/constant_folding.hpp b/ngraph/core/include/openvino/pass/constant_folding.hpp index b1e5a22b682da0..d1eb93022ad078 100644 --- a/ngraph/core/include/openvino/pass/constant_folding.hpp +++ b/ngraph/core/include/openvino/pass/constant_folding.hpp @@ -16,13 +16,13 @@ namespace pass { class OPENVINO_API ConstantFolding : public FunctionPass { public: OPENVINO_RTTI("ConstantFolding"); - bool run_on_function(std::shared_ptr f) override; + bool run_on_function(std::shared_ptr f) override; private: void copy_runtime_info_to_target_inputs(const std::shared_ptr& node, const Output& replacement); /// \brief Folds pre-calculated output tensor values to constants in case lower and /// upper estimations are equal. Traverses graph backwards starting from the results. - bool pre_calculated_values_folding(const std::shared_ptr& f); + bool pre_calculated_values_folding(const std::shared_ptr& f); }; } // namespace pass } // namespace ov diff --git a/ngraph/core/include/openvino/pass/convert_fp32_to_fp16.hpp b/ngraph/core/include/openvino/pass/convert_fp32_to_fp16.hpp index 1595b1eef521df..23c2654e5e2fcb 100644 --- a/ngraph/core/include/openvino/pass/convert_fp32_to_fp16.hpp +++ b/ngraph/core/include/openvino/pass/convert_fp32_to_fp16.hpp @@ -11,7 +11,7 @@ namespace pass { class OPENVINO_API ConvertFP32ToFP16 : public FunctionPass { public: OPENVINO_RTTI("ConvertFP32ToFP16"); - bool run_on_function(std::shared_ptr) override; + bool run_on_function(std::shared_ptr) override; }; } // namespace pass } // namespace ov diff --git a/ngraph/core/include/openvino/pass/low_latency.hpp b/ngraph/core/include/openvino/pass/low_latency.hpp index a0d722777481fd..b23c8c59f51988 100644 --- a/ngraph/core/include/openvino/pass/low_latency.hpp +++ b/ngraph/core/include/openvino/pass/low_latency.hpp @@ -39,7 +39,7 @@ class OPENVINO_API LowLatency2 : public FunctionPass { explicit LowLatency2(bool use_const_initializer = true) : m_use_const_initializer(use_const_initializer) {} - bool run_on_function(std::shared_ptr f) override; + bool run_on_function(std::shared_ptr f) override; private: bool m_use_const_initializer; diff --git a/ngraph/core/include/openvino/pass/manager.hpp b/ngraph/core/include/openvino/pass/manager.hpp index 6f5926b41b8782..d285371de32f02 100644 --- a/ngraph/core/include/openvino/pass/manager.hpp +++ b/ngraph/core/include/openvino/pass/manager.hpp @@ -67,8 +67,8 @@ class OPENVINO_API Manager { /// reasons for /// some cases. /// Callback example: - /// auto callback = [](const std::shared_ptr & node) -> bool { - /// return std::dynamic_pointer_cast(node) != + /// auto callback = [](const std::shared_ptr & node) -> bool { + /// return std::dynamic_pointer_cast(node) != /// nullptr; /// }; /// This callback returns true in case of DepthToSpace operation. So when execution diff --git a/ngraph/core/include/openvino/pass/pass.hpp b/ngraph/core/include/openvino/pass/pass.hpp index f281b67cdd138b..931bec26111dbf 100644 --- a/ngraph/core/include/openvino/pass/pass.hpp +++ b/ngraph/core/include/openvino/pass/pass.hpp @@ -8,9 +8,9 @@ #include #include -#include "ngraph/util.hpp" #include "openvino/core/core_visibility.hpp" #include "openvino/core/deprecated.hpp" +#include "openvino/core/enum_mask.hpp" #include "openvino/core/function.hpp" #include "openvino/core/node.hpp" #include "openvino/pass/pass_config.hpp" @@ -24,7 +24,7 @@ enum class PassProperty : uint32_t { CHANGE_DYNAMIC_STATE = 1 << 1, }; -using PassPropertyMask = ngraph::EnumMask; +using PassPropertyMask = ov::EnumMask; class OPENVINO_API PassBase { friend class Manager; @@ -61,7 +61,7 @@ class OPENVINO_API PassBase { /// This method remains here only for backward compatibility and will be removed /// after all transformations are moved to transformation_callback() method. /// \return result of callback execution for given node - NGRAPH_DEPRECATED("Please use transformation_callback method instead") + OPENVINO_DEPRECATED("Please use transformation_callback method instead") bool m_transformation_callback(const std::shared_ptr& node) { return m_pass_config->get_callback(get_type_info())(node); } @@ -91,7 +91,7 @@ class OPENVINO_API FunctionPass : public PassBase { public: OPENVINO_RTTI("ov::pass::FunctionPass"); ~FunctionPass() override; - virtual bool run_on_function(std::shared_ptr) = 0; + virtual bool run_on_function(std::shared_ptr) = 0; }; class Manager; @@ -105,6 +105,6 @@ enum class FusionType : uint32_t { FOP_FUSIONS = 0x4, ALL_FUSIONS = 0xFFFFFFFF }; -using FusionTypeMask = ngraph::EnumMask; +using FusionTypeMask = ov::EnumMask; } // namespace pass } // namespace ov diff --git a/ngraph/core/include/openvino/pass/pass_config.hpp b/ngraph/core/include/openvino/pass/pass_config.hpp index 9909178a8098be..d36992ec3d7dc6 100644 --- a/ngraph/core/include/openvino/pass/pass_config.hpp +++ b/ngraph/core/include/openvino/pass/pass_config.hpp @@ -9,7 +9,6 @@ #include #include "ngraph/compatibility.hpp" -#include "ngraph/util.hpp" #include "openvino/core/core_visibility.hpp" #include "openvino/core/deprecated.hpp" #include "openvino/core/function.hpp" @@ -96,8 +95,8 @@ class OPENVINO_API PassConfig { /// /// Example below show how to set callback for one or multiple passes using this method. /// - /// pass_config->set_callback( + /// pass_config->set_callback( /// [](const_node_ptr &node) -> bool { /// // Disable transformations for cases when input shape rank is not /// equal to 4 @@ -145,9 +144,9 @@ class OPENVINO_API PassConfig { /// \return callback lambda function template ::value, bool>::type = true> param_callback get_callback() const { - NGRAPH_SUPPRESS_DEPRECATED_START + OPENVINO_SUPPRESS_DEPRECATED_START return get_callback(T::type_info); - NGRAPH_SUPPRESS_DEPRECATED_END + OPENVINO_SUPPRESS_DEPRECATED_END } template ::value, bool>::type = true> param_callback get_callback() const { @@ -165,9 +164,9 @@ class OPENVINO_API PassConfig { /// \return true if transformation type was disabled and false otherwise template ::value, bool>::type = true> bool is_disabled() const { - NGRAPH_SUPPRESS_DEPRECATED_START + OPENVINO_SUPPRESS_DEPRECATED_START return is_disabled(T::type_info); - NGRAPH_SUPPRESS_DEPRECATED_END + OPENVINO_SUPPRESS_DEPRECATED_END } template ::value, bool>::type = true> bool is_disabled() const { @@ -185,9 +184,9 @@ class OPENVINO_API PassConfig { /// \return true if transformation type was force enabled and false otherwise template ::value, bool>::type = true> bool is_enabled() const { - NGRAPH_SUPPRESS_DEPRECATED_START + OPENVINO_SUPPRESS_DEPRECATED_START return is_enabled(T::type_info); - NGRAPH_SUPPRESS_DEPRECATED_END + OPENVINO_SUPPRESS_DEPRECATED_END } template ::value, bool>::type = true> bool is_enabled() const { @@ -197,7 +196,7 @@ class OPENVINO_API PassConfig { void add_disabled_passes(const PassConfig& rhs); private: - param_callback m_callback = [](const std::shared_ptr&) { + param_callback m_callback = [](const std::shared_ptr&) { return false; }; param_callback_map m_callback_map; diff --git a/ngraph/core/include/openvino/pass/pattern/matcher.hpp b/ngraph/core/include/openvino/pass/pattern/matcher.hpp index 261be2f86bc556..3da1da51561be3 100644 --- a/ngraph/core/include/openvino/pass/pattern/matcher.hpp +++ b/ngraph/core/include/openvino/pass/pattern/matcher.hpp @@ -9,9 +9,9 @@ #include #include -#include "ngraph/op/constant.hpp" #include "openvino/core/except.hpp" #include "openvino/core/node.hpp" +#include "openvino/op/constant.hpp" #include "openvino/pass/pattern/op/any.hpp" #include "openvino/pass/pattern/op/any_of.hpp" #include "openvino/pass/pattern/op/any_output.hpp" diff --git a/ngraph/core/include/openvino/pass/visualize_tree.hpp b/ngraph/core/include/openvino/pass/visualize_tree.hpp index b9447a6447a970..0afba2a8575b79 100644 --- a/ngraph/core/include/openvino/pass/visualize_tree.hpp +++ b/ngraph/core/include/openvino/pass/visualize_tree.hpp @@ -24,7 +24,7 @@ namespace ov { namespace pass { class OPENVINO_API VisualizeTree : public FunctionPass { public: - OPENVINO_RTTI("ngraph::pass::VisualizeTree"); + OPENVINO_RTTI("ov::pass::VisualizeTree"); using node_modifiers_t = std::function& attributes)>; VisualizeTree(const std::string& file_name, node_modifiers_t nm = nullptr, bool dot_only = false); diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/scatter_update.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/scatter_update.hpp index 0b4025d431db9f..6224726d0011d5 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/scatter_update.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/scatter_update.hpp @@ -4,13 +4,32 @@ #pragma once +#include + #include "ngraph/check.hpp" #include "ngraph/coordinate_transform.hpp" #include "ngraph/shape.hpp" +#include "ngraph/util.hpp" namespace ngraph { namespace runtime { namespace reference { +static const CoordinateTransformBasic get_target_shape(const Shape& data_shape, + const Coordinate& start_corner, + const Coordinate& end_corner) { + const auto m_n_axes = data_shape.size(); + Shape target_shape; + target_shape.reserve(m_n_axes); + AxisVector axis_order(m_n_axes); + std::iota(axis_order.begin(), axis_order.end(), 0); + const Strides strides(m_n_axes, 1); + for (size_t axis = 0; axis < m_n_axes; axis++) { + target_shape.push_back( + ceil_div(end_corner[axis_order[axis]] - start_corner[axis_order[axis]], strides[axis_order[axis]])); + } + return target_shape; +} + void scatter_update(const char* input_data, const int64_t* indices, const char* updates, @@ -36,43 +55,57 @@ void scatter_update(const char* input_data, // for d_coord in slice data[..., i_idx, ...], // u_coord in slice updates[..., i_coord, ...] // data[index(d_coord)] = updates[index(u_coord)] - - NGRAPH_SUPPRESS_DEPRECATED_START - CoordinateTransform indices_transform{indices_shape}; - CoordinateTransform data_transform{data_shape}; + CoordinateTransformBasic indices_transform{indices_shape}; + const auto indices_in_strides = row_major_strides(indices_shape); size_t indices_ndim = indices_shape.size(); size_t updates_ndim = updates_shape.size(); + size_t data_ndim = data_shape.size(); + + const auto size_after_axis = shape_size(Shape(data_shape.begin() + axis + 1, data_shape.end())); + int num_axis_jumps{0}; + int num_unary_moves{0}; + for (size_t i = axis + 1; i < updates_ndim; ++i) { + const auto updates_size_after_axis = shape_size(Shape(updates_shape.begin() + i, updates_shape.end())); + if (updates_size_after_axis > size_after_axis) + ++num_axis_jumps; + if (updates_shape[i] == 1) + ++num_unary_moves; + } + + if (!num_axis_jumps) + num_axis_jumps = updates_ndim - data_ndim; + + auto updates_axis_dim = axis + num_axis_jumps + num_unary_moves; + + if (updates_axis_dim >= updates_ndim) + updates_axis_dim = updates_ndim - 1; - // Create an outer CoordinateTransform for "update", which would allow to - // iterate only over "indices" dimensions: - // set to "1" all non-indices dimensions - // updates[1, ..., 1, m, n, ..., p, 1, 1,..., 1] Coordinate updates_indices_start_corner(updates_ndim, 0); Coordinate updates_indices_end_corner(updates_ndim, 1); + for (size_t i = 0; i < indices_ndim; ++i) { updates_indices_end_corner[axis + i] = updates_shape[axis + i]; } - CoordinateTransform updates_indices_transform(updates_shape, - updates_indices_start_corner, - updates_indices_end_corner); - // Is needed to simultaneously iterate over updates coordinates while - // iterating over indices. + + const auto updates_indices_transform = + get_target_shape(updates_shape, updates_indices_start_corner, updates_indices_end_corner); auto updates_indices_coord_iter = updates_indices_transform.begin(); + int iteration{0}; for (const Coordinate& indices_cord : indices_transform) { - const size_t indices_idx = indices_transform.index(indices_cord); + const size_t indices_idx = + std::inner_product(indices_cord.begin(), indices_cord.end(), indices_in_strides.begin(), 0); int64_t slice_index = indices[indices_idx]; - // Define the extent of coordinates which will be updated. Coordinate out_start_corner(data_shape.size(), 0); Coordinate out_end_corner(data_shape); out_start_corner[axis] = static_cast(slice_index); out_end_corner[axis] = out_start_corner[axis] + 1; - CoordinateTransform out_transform(data_shape, out_start_corner, out_end_corner); - // Define the CoordinateTransform for updates coordinates. - // All except indices-dimensions. + const auto out_transform = get_target_shape(data_shape, out_start_corner, out_end_corner); + const auto out_transform_in_strides = row_major_strides(data_shape); + if (updates_indices_coord_iter == updates_indices_transform.end()) break; Coordinate updates_update_start_corner = *updates_indices_coord_iter; @@ -80,27 +113,32 @@ void scatter_update(const char* input_data, for (size_t i = 0; i < indices_ndim; ++i) { updates_update_end_corner[axis + i] = updates_update_start_corner[axis + i] + 1; } - // The m, n, .., p symbols stand for values at those axes. - // The m+1 means value at axis m plus 1. - // udpates_shape (start): [ 0, ..., 0, m , n , ... p , 0, ..., 0] - // updates_shape (end): [-1, ..., -1, m+1, n+1, ... p+1, -1, ..., -1] - CoordinateTransform updates_update_transform(updates_shape, - updates_update_start_corner, - updates_update_end_corner); + + const auto updates_update_transform = + get_target_shape(updates_shape, updates_update_start_corner, updates_update_end_corner); + const auto updates_update_in_strides = row_major_strides(updates_shape); auto updates_update_coord_iter = updates_update_transform.begin(); + for (const Coordinate& out_cord : out_transform) { if (updates_update_coord_iter == updates_update_transform.end()) break; - const auto src_idx = updates_update_transform.index(*updates_update_coord_iter) * elem_size; - std::copy(updates + src_idx, - updates + (src_idx + elem_size), - out_buf + out_transform.index(out_cord) * elem_size); + Coordinate update_cord = *updates_update_coord_iter; + Coordinate out_coord = out_cord; + out_coord.at(axis) = slice_index; + update_cord.at(updates_axis_dim) += iteration; + const auto data_idx = + std::inner_product(out_coord.begin(), out_coord.end(), out_transform_in_strides.begin(), 0); + const auto updates_idx = + std::inner_product(update_cord.begin(), update_cord.end(), updates_update_in_strides.begin(), 0) * + elem_size; + + std::copy(updates + updates_idx, updates + (updates_idx + elem_size), out_buf + data_idx * elem_size); updates_update_coord_iter++; } updates_indices_coord_iter++; + iteration++; } - NGRAPH_SUPPRESS_DEPRECATED_END } } // namespace reference } // namespace runtime -} // namespace ngraph +} // namespace ngraph \ No newline at end of file diff --git a/ngraph/core/src/layout.cpp b/ngraph/core/src/layout.cpp index 0c23fdfd099851..f432f3a28dee9a 100644 --- a/ngraph/core/src/layout.cpp +++ b/ngraph/core/src/layout.cpp @@ -67,6 +67,11 @@ Layout Layout::scalar() { // 2. can define order and meaning for dimensions "NCHW" // 3. partial layout specialization "NC?" Layout::Layout(const std::string& layout_str) { + if (layout_str.empty()) { + m_dynamic = true; + m_left_size = m_right_size = 0; + return; + } auto layout = ngraph::trim(layout_str); OPENVINO_ASSERT(layout.length() > 0, "Cannot parse ov::Layout from an empty string"); if (layout == SCALAR) { diff --git a/ngraph/core/src/op/result.cpp b/ngraph/core/src/op/result.cpp index 2d70921d790a19..5958986b8aa392 100644 --- a/ngraph/core/src/op/result.cpp +++ b/ngraph/core/src/op/result.cpp @@ -67,6 +67,20 @@ bool op::Result::constant_fold(OutputVector& output_values, const OutputVector& return false; } +ov::Layout op::Result::get_layout() const { + auto it = get_output_tensor(0).get_rt_info().find("LAYOUT"); + if (it == get_output_tensor(0).get_rt_info().end()) { + return {}; + } + auto layout = std::dynamic_pointer_cast>(it->second); + OPENVINO_ASSERT(layout, "'LAYOUT' runtime info for node is invalid, use set_layout API"); + return layout->get(); +} + +void op::Result::set_layout(const ov::Layout& layout) { + get_output_tensor(0).get_rt_info()["LAYOUT"] = std::make_shared>(layout); +} + BWDCMP_RTTI_DEFINITION(ov::AttributeAdapter); ov::AttributeAdapter::AttributeAdapter(ResultVector& ref) : m_ref(ref) {} diff --git a/ngraph/core/src/op/scatter_update.cpp b/ngraph/core/src/op/scatter_update.cpp index d9ec7918d027f5..80b8a2cb29c9df 100644 --- a/ngraph/core/src/op/scatter_update.cpp +++ b/ngraph/core/src/op/scatter_update.cpp @@ -109,4 +109,4 @@ bool op::v3::ScatterUpdate::has_evaluate() const { break; } return false; -} +} \ No newline at end of file diff --git a/ngraph/core/src/op/util/multi_subgraph_base.cpp b/ngraph/core/src/op/util/multi_subgraph_base.cpp index abbcf5b2d6d7db..c4c1e1f6239eb9 100644 --- a/ngraph/core/src/op/util/multi_subgraph_base.cpp +++ b/ngraph/core/src/op/util/multi_subgraph_base.cpp @@ -135,6 +135,7 @@ void ov::op::util::MultiSubGraphOp::set_invariant_inputs(const Output& val } } } + validate_and_infer_types(); } ov::Output ov::op::util::MultiSubGraphOp::set_body_outputs(const ResultVector& bodies_results) { @@ -149,6 +150,7 @@ ov::Output ov::op::util::MultiSubGraphOp::set_body_outputs(const Resul } } set_output_size(output_index + 1); + validate_and_infer_types(); return Output(shared_from_this(), output_index); } diff --git a/ngraph/core/src/preprocess/color_utils.cpp b/ngraph/core/src/preprocess/color_utils.cpp new file mode 100644 index 00000000000000..60eda3cbb00ada --- /dev/null +++ b/ngraph/core/src/preprocess/color_utils.cpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "color_utils.hpp" + +using namespace ov::preprocess; + +std::unique_ptr ColorFormatInfo::get(ColorFormat format) { + std::unique_ptr res; + switch (format) { + case ColorFormat::NV12_SINGLE_PLANE: + res.reset(new ColorFormatInfoNV12_Single(format)); + break; + case ColorFormat::NV12_TWO_PLANES: + res.reset(new ColorFormatInfoNV12_TwoPlanes(format)); + break; + default: + res.reset(new ColorFormatInfo(format)); + break; + } + return res; +} diff --git a/ngraph/core/src/preprocess/color_utils.hpp b/ngraph/core/src/preprocess/color_utils.hpp new file mode 100644 index 00000000000000..3758f849c5f165 --- /dev/null +++ b/ngraph/core/src/preprocess/color_utils.hpp @@ -0,0 +1,143 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/layout.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/preprocess/color_format.hpp" + +namespace ov { +namespace preprocess { + +/// \brief Helper function to check if color format represents RGB family +inline bool is_rgb_family(const ColorFormat& format) { + return format == ColorFormat::RGB || format == ColorFormat::BGR; +} + +inline std::string color_format_name(ColorFormat format) { + std::string name; + switch (format) { + case ColorFormat::RGB: + name = "RGB"; + break; + case ColorFormat::BGR: + name = "BGR"; + break; + case ColorFormat::NV12_TWO_PLANES: + name = "NV12 (multi-plane)"; + break; + case ColorFormat::NV12_SINGLE_PLANE: + name = "NV12 (single plane)"; + break; + default: + name = "Unknown"; + break; + } + return name; +} + +/// \brief Internal helper class to get information depending on color format +class ColorFormatInfo { +public: + static std::unique_ptr get(ColorFormat format); + + virtual ~ColorFormatInfo() = default; + + virtual size_t planes_count() const { + return 1; + } + + virtual Layout default_layout() const { + return {}; + } + + // Calculate shape of plane based image shape in NHWC format + PartialShape shape(size_t plane_num, const PartialShape& image_src_shape) const { + OPENVINO_ASSERT(plane_num < planes_count(), + "Internal error: incorrect plane number specified for color format"); + return calculate_shape(plane_num, image_src_shape); + } + + std::string friendly_suffix(size_t plane_num) const { + OPENVINO_ASSERT(plane_num < planes_count(), + "Internal error: incorrect plane number specified for color format"); + return calc_name_suffix(plane_num); + } + +protected: + virtual PartialShape calculate_shape(size_t plane_num, const PartialShape& image_shape) const { + return image_shape; + } + virtual std::string calc_name_suffix(size_t plane_num) const { + return {}; + } + explicit ColorFormatInfo(ColorFormat format) : m_format(format) {} + ColorFormat m_format; +}; + +// --- Derived classes --- +class ColorFormatInfoNV12_Single : public ColorFormatInfo { +public: + explicit ColorFormatInfoNV12_Single(ColorFormat format) : ColorFormatInfo(format) {} + +protected: + PartialShape calculate_shape(size_t plane_num, const PartialShape& image_shape) const override { + PartialShape result = image_shape; + if (image_shape.rank().is_static() && image_shape.rank().get_length() == 4) { + result[3] = 1; + if (result[1].is_static()) { + result[1] = result[1].get_length() * 3 / 2; + } + } + return result; + } + + Layout default_layout() const override { + return "NHWC"; + } +}; + +class ColorFormatInfoNV12_TwoPlanes : public ColorFormatInfo { +public: + explicit ColorFormatInfoNV12_TwoPlanes(ColorFormat format) : ColorFormatInfo(format) {} + + size_t planes_count() const override { + return 2; + } + +protected: + PartialShape calculate_shape(size_t plane_num, const PartialShape& image_shape) const override { + PartialShape result = image_shape; + if (image_shape.rank().is_static() && image_shape.rank().get_length() == 4) { + if (plane_num == 0) { + result[3] = 1; + return result; + } else { + // UV plane has half or width and half of height. Number of channels is 2 + if (result[1].is_static()) { + result[1] = result[1].get_length() / 2; + } + if (result[2].is_static()) { + result[2] = result[2].get_length() / 2; + } + result[3] = 2; + } + } + return result; + } + std::string calc_name_suffix(size_t plane_num) const override { + if (plane_num == 0) { + return "/Y"; + } + return "/UV"; + } + + Layout default_layout() const override { + return "NHWC"; + } +}; + +} // namespace preprocess +} // namespace ov diff --git a/ngraph/core/src/preprocess/function_guard.hpp b/ngraph/core/src/preprocess/function_guard.hpp new file mode 100644 index 00000000000000..c7bad81c923966 --- /dev/null +++ b/ngraph/core/src/preprocess/function_guard.hpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/function.hpp" + +namespace ov { +namespace preprocess { + +/// \brief Internal guard to make preprocess builder exception-safe +class FunctionGuard { + std::shared_ptr m_function; + ParameterVector m_parameters; + std::map, std::set>> m_backup; + bool m_done = false; + +public: + FunctionGuard(const std::shared_ptr& f) : m_function(f) { + m_parameters = f->get_parameters(); + for (const auto& param : f->get_parameters()) { + m_backup.insert({param, param->output(0).get_target_inputs()}); + } + } + virtual ~FunctionGuard() { + if (!m_done) { + try { + auto params = m_function->get_parameters(); + // Remove parameters added by preprocessing + for (const auto& param : params) { + m_function->remove_parameter(param); + } + // Insert old parameters and update consumers + for (const auto& item : m_backup) { + // Replace consumers + for (auto consumer : item.second) { + consumer.replace_source_output(item.first); + } + } + m_function->add_parameters(m_parameters); + } catch (std::exception& ex) { + // Stress condition, can't recover function to original state + std::cerr << "Unrecoverable error occurred during preprocessing. Function is corrupted, exiting\n"; + exit(EXIT_FAILURE); + } + } + } + void reset() noexcept { + m_done = true; + } +}; + +} // namespace preprocess +} // namespace ov diff --git a/ngraph/core/src/preprocess/pre_post_process.cpp b/ngraph/core/src/preprocess/pre_post_process.cpp index 0445bc90bfddf4..360a6a51b8777f 100644 --- a/ngraph/core/src/preprocess/pre_post_process.cpp +++ b/ngraph/core/src/preprocess/pre_post_process.cpp @@ -4,6 +4,8 @@ #include "openvino/core/preprocess/pre_post_process.hpp" +#include "color_utils.hpp" +#include "function_guard.hpp" #include "ngraph/opsets/opset1.hpp" #include "openvino/core/function.hpp" #include "preprocess_steps_impl.hpp" @@ -11,10 +13,9 @@ namespace ov { namespace preprocess { -/// \brief InputTensorInfoImpl - internal data structure -class InputTensorInfo::InputTensorInfoImpl { +class TensorInfoImplBase { public: - InputTensorInfoImpl() = default; + TensorInfoImplBase() = default; void set_element_type(const element::Type& type) { m_type = type; @@ -38,6 +39,19 @@ class InputTensorInfo::InputTensorInfoImpl { return m_layout; } +protected: + element::Type m_type = element::dynamic; + bool m_type_set = false; + + Layout m_layout = Layout(); + bool m_layout_set = false; +}; + +/// \brief InputTensorInfoImpl - internal data structure +class InputTensorInfo::InputTensorInfoImpl : public TensorInfoImplBase { +public: + InputTensorInfoImpl() = default; + bool is_spatial_shape_set() const { return m_spatial_shape_set; } @@ -66,7 +80,39 @@ class InputTensorInfo::InputTensorInfoImpl { m_spatial_width = static_cast(width); } + const ColorFormat& get_color_format() const { + return m_color_format; + } + + void set_color_format(ColorFormat format, const std::vector& sub_names) { + auto info = ColorFormatInfo::get(format); + if (info->planes_count() == 1) { + OPENVINO_ASSERT(sub_names.empty(), + "Plane names are not allowed for single plane color format '", + color_format_name(format), + "'"); + } else if (!sub_names.empty()) { + OPENVINO_ASSERT(sub_names.size() == info->planes_count(), + "Number of sub-names (", + sub_names.size(), + ") shall match with number of planes for '", + color_format_name(format), + "' color format (", + info->planes_count(), + ")"); + } + m_planes_sub_names = sub_names; + m_color_format = format; + } + + const std::vector& planes_sub_names() const { + return m_planes_sub_names; + } + private: + ColorFormat m_color_format = ColorFormat::UNDEFINED; + std::vector m_planes_sub_names; + element::Type m_type = element::dynamic; bool m_type_set = false; @@ -78,10 +124,12 @@ class InputTensorInfo::InputTensorInfoImpl { bool m_spatial_shape_set = false; }; +class OutputTensorInfo::OutputTensorInfoImpl : public TensorInfoImplBase {}; + /// \brief InputNetworkInfoImpl - internal data structure -class InputNetworkInfo::InputNetworkInfoImpl { +class NetworkInfoImpl { public: - InputNetworkInfoImpl() = default; + NetworkInfoImpl() = default; void set_layout(const Layout& layout) { m_layout = layout; @@ -99,6 +147,10 @@ class InputNetworkInfo::InputNetworkInfoImpl { bool m_layout_set = false; }; +class InputNetworkInfo::InputNetworkInfoImpl : public NetworkInfoImpl {}; + +class OutputNetworkInfo::OutputNetworkInfoImpl : public NetworkInfoImpl {}; + /// \brief InputInfoImpl - internal data structure struct InputInfo::InputInfoImpl { InputInfoImpl() = default; @@ -120,6 +172,35 @@ struct InputInfo::InputInfoImpl { std::unique_ptr m_tensor_data; std::unique_ptr m_preprocess; std::unique_ptr m_network_data; + std::shared_ptr m_resolved_param; +}; + +/// \brief OutputInfoImpl - internal data structure +struct OutputInfo::OutputInfoImpl { + OutputInfoImpl() = default; + explicit OutputInfoImpl(size_t idx) : m_has_index(true), m_index(idx) {} + explicit OutputInfoImpl(std::string name) : m_has_name(true), m_name(std::move(name)) {} + + bool has_index() const { + return m_has_index; + } + + bool has_name() const { + return m_has_name; + } + + void create_tensor_data() { + m_tensor_data = + std::unique_ptr(new OutputTensorInfo::OutputTensorInfoImpl()); + } + + bool m_has_index = false; + size_t m_index = 0; + bool m_has_name = false; + std::string m_name; + std::unique_ptr m_tensor_data; + std::unique_ptr m_postprocess; + std::unique_ptr m_network_data; }; //-------------- InputInfo ------------------ @@ -159,10 +240,52 @@ InputInfo&& InputInfo::network(InputNetworkInfo&& builder) && { return std::move(*this); } +//-------------- OutputInfo ------------------ +OutputInfo::OutputInfo() : m_impl(std::unique_ptr(new OutputInfoImpl)) {} +OutputInfo::OutputInfo(size_t output_index) + : m_impl(std::unique_ptr(new OutputInfoImpl(output_index))) {} +OutputInfo::OutputInfo(const std::string& output_tensor_name) + : m_impl(std::unique_ptr(new OutputInfoImpl(output_tensor_name))) {} + +OutputInfo::OutputInfo(OutputInfo&&) noexcept = default; +OutputInfo& OutputInfo::operator=(OutputInfo&&) noexcept = default; +OutputInfo::~OutputInfo() = default; + +OutputInfo& OutputInfo::tensor(OutputTensorInfo&& builder) & { + m_impl->m_tensor_data = std::move(builder.m_impl); + return *this; +} + +OutputInfo&& OutputInfo::tensor(OutputTensorInfo&& builder) && { + m_impl->m_tensor_data = std::move(builder.m_impl); + return std::move(*this); +} + +OutputInfo&& OutputInfo::postprocess(PostProcessSteps&& builder) && { + m_impl->m_postprocess = std::move(builder.m_impl); + return std::move(*this); +} + +OutputInfo& OutputInfo::postprocess(PostProcessSteps&& builder) & { + m_impl->m_postprocess = std::move(builder.m_impl); + return *this; +} + +OutputInfo& OutputInfo::network(OutputNetworkInfo&& builder) & { + m_impl->m_network_data = std::move(builder.m_impl); + return *this; +} + +OutputInfo&& OutputInfo::network(OutputNetworkInfo&& builder) && { + m_impl->m_network_data = std::move(builder.m_impl); + return std::move(*this); +} + // ------------------------ PrePostProcessor -------------------- struct PrePostProcessor::PrePostProcessorImpl { public: std::list> in_contexts; + std::list> out_contexts; }; PrePostProcessor::PrePostProcessor() : m_impl(std::unique_ptr(new PrePostProcessorImpl())) {} @@ -180,7 +303,18 @@ PrePostProcessor&& PrePostProcessor::input(InputInfo&& builder) && { return std::move(*this); } +PrePostProcessor& PrePostProcessor::output(OutputInfo&& builder) & { + m_impl->out_contexts.push_back(std::move(builder.m_impl)); + return *this; +} + +PrePostProcessor&& PrePostProcessor::output(OutputInfo&& builder) && { + m_impl->out_contexts.push_back(std::move(builder.m_impl)); + return std::move(*this); +} + std::shared_ptr PrePostProcessor::build(const std::shared_ptr& function) { + FunctionGuard guard(function); bool tensor_data_updated = false; for (const auto& input : m_impl->in_contexts) { std::shared_ptr param; @@ -200,19 +334,30 @@ std::shared_ptr PrePostProcessor::build(const std::shared_ptrm_network_data && input->m_network_data->is_layout_set() && param->get_layout().empty()) { param->set_layout(input->m_network_data->get_layout()); } + input->m_resolved_param = param; + } + + for (const auto& input : m_impl->in_contexts) { + auto param = input->m_resolved_param; auto consumers = param->output(0).get_target_inputs(); if (!input->m_tensor_data) { input->create_tensor_data(param->get_element_type(), param->get_layout()); } - if (!input->m_tensor_data->is_layout_set() && param->get_layout() != Layout()) { - input->m_tensor_data->set_layout(param->get_layout()); - } if (!input->m_tensor_data->is_element_type_set()) { input->m_tensor_data->set_element_type(param->get_element_type()); } + auto color_info = ColorFormatInfo::get(input->m_tensor_data->get_color_format()); + if (!input->m_tensor_data->is_layout_set()) { + if (!color_info->default_layout().empty()) { + input->m_tensor_data->set_layout(color_info->default_layout()); + } else if (!param->get_layout().empty()) { + input->m_tensor_data->set_layout(param->get_layout()); + } + } + auto net_shape = param->get_partial_shape(); auto new_param_shape = net_shape; - if (input->m_tensor_data->is_layout_set() && param->get_layout() != Layout() && + if (input->m_tensor_data->is_layout_set() && !param->get_layout().empty() && param->get_layout() != input->m_tensor_data->get_layout()) { // Find transpose between network and tensor layouts and update tensor shape auto net_to_tensor = @@ -236,26 +381,54 @@ std::shared_ptr PrePostProcessor::build(const std::shared_ptrm_tensor_data->get_spatial_width(); } } - auto new_param = std::make_shared(input->m_tensor_data->get_element_type(), new_param_shape); - if (input->m_tensor_data->is_layout_set()) { - new_param->set_layout(input->m_tensor_data->get_layout()); - } - // Old param will be removed, so friendly name can be reused - new_param->set_friendly_name(param->get_friendly_name()); - // Also reuse names of original tensor - new_param->get_output_tensor(0).set_names(param->get_output_tensor(0).get_names()); + std::vector> nodes; + std::vector> new_params; + + // Create separate parameter for each plane. Shape and friendly name is based on color format + for (size_t plane = 0; plane < color_info->planes_count(); plane++) { + auto plane_shape = color_info->shape(plane, new_param_shape); + auto plane_param = + std::make_shared(input->m_tensor_data->get_element_type(), plane_shape); + if (plane < input->m_tensor_data->planes_sub_names().size()) { + auto sub_name = std::string("/") + input->m_tensor_data->planes_sub_names()[plane]; + inherit_friendly_names(function, param, plane_param, sub_name, false); + } else { + auto sub_name = color_info->friendly_suffix(plane); + inherit_friendly_names(function, param, plane_param, sub_name); + } + if (!input->m_tensor_data->get_layout().empty()) { + plane_param->set_layout(input->m_tensor_data->get_layout()); + } + new_params.push_back(plane_param); + nodes.push_back(plane_param); + } - std::shared_ptr node = new_param; - PreprocessingContext context(new_param->get_layout()); - context.network_layout() = param->get_layout(); + PreprocessingContext context(input->m_tensor_data->get_layout()); + context.color_format() = input->m_tensor_data->get_color_format(); + context.target_layout() = param->get_layout(); context.network_shape() = param->get_partial_shape(); + // 2. Apply preprocessing - for (const auto& action : input->m_preprocess->actions()) { - node = std::get<0>(action)({node}, context); - tensor_data_updated |= std::get<1>(action); + if (input->m_preprocess) { + for (const auto& action : input->m_preprocess->actions()) { + auto node = std::get<0>(action)(nodes, function, context); + nodes = {node}; + tensor_data_updated |= std::get<1>(action); + } } + OPENVINO_ASSERT(nodes.size() == 1, + "Multiple plane input is not allowed as network input. Consider using of convert_color " + "preprocessing operation. Current format is '", + color_format_name(context.color_format()), + "'"); + OPENVINO_ASSERT(is_rgb_family(context.color_format()) || context.color_format() == ColorFormat::UNDEFINED, + "Network shall have RGB/BGR color format. Consider add 'convert_color' preprocessing operation " + "to convert current color format '", + color_format_name(context.color_format()), + "'to RGB/BGR"); + auto node = nodes[0]; // Check final type OPENVINO_ASSERT(node->get_element_type() == param->get_element_type(), std::string("Element type after preprocessing {") + node->get_element_type().c_type_string() + @@ -267,13 +440,78 @@ std::shared_ptr PrePostProcessor::build(const std::shared_ptradd_parameters({new_param}); + function->add_parameters(new_params); // remove old parameter function->remove_parameter(param); } + + // Validate nodes after preprocessing if needed (no need to repeat it after post-processing) if (tensor_data_updated) { function->validate_nodes_and_infer_types(); } + + // Post processing + for (const auto& output : m_impl->out_contexts) { + std::shared_ptr result; + Output node; + OPENVINO_ASSERT(output, "Internal error: Invalid postprocessing output, please report a problem"); + if (output->has_index()) { + node = function->output(output->m_index); + } else if (output->has_name()) { + node = function->output(output->m_name); + } else { + node = function->output(); + } + result = std::dynamic_pointer_cast(node.get_node_shared_ptr()); + // Set result layout from 'network' information + if (output->m_network_data && output->m_network_data->is_layout_set() && result->get_layout().empty()) { + result->set_layout(output->m_network_data->get_layout()); + } + auto parent = result->get_input_source_output(0); + if (!output->m_tensor_data) { + output->create_tensor_data(); + } + PostprocessingContext context(result->get_layout()); + if (output->m_tensor_data->is_layout_set()) { + context.target_layout() = output->m_tensor_data->get_layout(); + } + if (output->m_tensor_data->is_element_type_set()) { + context.target_element_type() = output->m_tensor_data->get_element_type(); + } + // Apply post-processing + node = result->get_input_source_output(0); + if (output->m_postprocess) { + for (const auto& action : output->m_postprocess->actions()) { + auto action_result = action({node}, context); + node = std::get<0>(action_result); + } + } + // Implicit: Convert element type + layout to user's tensor implicitly + PostStepsList implicit_steps; + if (node.get_element_type() != output->m_tensor_data->get_element_type() && + output->m_tensor_data->is_element_type_set() && node.get_element_type() != element::dynamic) { + implicit_steps.add_convert_impl(output->m_tensor_data->get_element_type()); + } + + if (!context.target_layout().empty() && context.target_layout() != context.layout()) { + implicit_steps.add_convert_layout_impl(context.target_layout()); + } + for (const auto& action : implicit_steps.actions()) { + auto action_result = action({node}, context); + node = std::get<0>(action_result); + } + + // Create result + auto new_result = std::make_shared(node); + if (!context.layout().empty()) { + new_result->set_layout(context.layout()); + } + new_result->get_input_tensor(0).set_names(result->get_input_tensor(0).get_names()); + function->add_results({new_result}); + function->remove_result(result); + } + + guard.reset(); return function; } @@ -339,6 +577,18 @@ InputNetworkInfo&& InputNetworkInfo::set_layout(const Layout& layout) && { return std::move(*this); } +InputTensorInfo& InputTensorInfo::set_color_format(const ov::preprocess::ColorFormat& format, + const std::vector& sub_names) & { + m_impl->set_color_format(format, sub_names); + return *this; +} + +InputTensorInfo&& InputTensorInfo::set_color_format(const ov::preprocess::ColorFormat& format, + const std::vector& sub_names) && { + m_impl->set_color_format(format, sub_names); + return std::move(*this); +} + // --------------------- PreProcessSteps ------------------ PreProcessSteps::PreProcessSteps() : m_impl(std::unique_ptr(new PreProcessStepsImpl())) {} @@ -432,14 +682,26 @@ PreProcessSteps&& PreProcessSteps::convert_layout(const Layout& dst_layout) && { return std::move(*this); } +PreProcessSteps& PreProcessSteps::convert_color(const ov::preprocess::ColorFormat& dst_format) & { + m_impl->add_convert_color_impl(dst_format); + return *this; +} + +PreProcessSteps&& PreProcessSteps::convert_color(const ov::preprocess::ColorFormat& dst_format) && { + m_impl->add_convert_color_impl(dst_format); + return std::move(*this); +} + PreProcessSteps& PreProcessSteps::custom(const CustomPreprocessOp& preprocess_cb) & { // 'true' indicates that custom preprocessing step will trigger validate_and_infer_types m_impl->actions().emplace_back(std::make_tuple( - [preprocess_cb](const std::vector>& nodes, PreprocessingContext&) { + [preprocess_cb](const std::vector>& nodes, + const std::shared_ptr&, + PreprocessingContext&) -> std::vector> { OPENVINO_ASSERT(nodes.size() == 1, "Can't apply custom preprocessing step for multi-plane input. Suggesting to convert " "current image to RGB/BGR color format using 'convert_color'"); - return preprocess_cb(nodes[0]); + return {preprocess_cb(nodes[0])}; }, true)); return *this; @@ -448,15 +710,102 @@ PreProcessSteps& PreProcessSteps::custom(const CustomPreprocessOp& preprocess_cb PreProcessSteps&& PreProcessSteps::custom(const CustomPreprocessOp& preprocess_cb) && { // 'true' indicates that custom preprocessing step will trigger validate_and_infer_types m_impl->actions().emplace_back(std::make_tuple( - [preprocess_cb](const std::vector>& nodes, PreprocessingContext&) { + [preprocess_cb](const std::vector>& nodes, + const std::shared_ptr&, + PreprocessingContext&) -> std::vector> { OPENVINO_ASSERT(nodes.size() == 1, "Can't apply custom preprocessing step for multi-plane input. Suggesting to convert " "current image to RGB/BGR color format using 'convert_color'"); - return preprocess_cb(nodes[0]); + return {preprocess_cb(nodes[0])}; }, true)); return std::move(*this); } +// --------------------- OutputTensorInfo ------------------ +OutputTensorInfo::OutputTensorInfo() : m_impl(std::unique_ptr(new OutputTensorInfoImpl())) {} +OutputTensorInfo::OutputTensorInfo(OutputTensorInfo&&) noexcept = default; +OutputTensorInfo& OutputTensorInfo::operator=(OutputTensorInfo&&) noexcept = default; +OutputTensorInfo::~OutputTensorInfo() = default; + +OutputTensorInfo& OutputTensorInfo::set_element_type(const element::Type& type) & { + m_impl->set_element_type(type); + return *this; +} + +OutputTensorInfo&& OutputTensorInfo::set_element_type(const element::Type& type) && { + m_impl->set_element_type(type); + return std::move(*this); +} + +OutputTensorInfo& OutputTensorInfo::set_layout(const Layout& layout) & { + m_impl->set_layout(layout); + return *this; +} + +OutputTensorInfo&& OutputTensorInfo::set_layout(const Layout& layout) && { + m_impl->set_layout(layout); + return std::move(*this); +} + +// --------------------- OutputNetworkInfo ------------------ +OutputNetworkInfo::OutputNetworkInfo() : m_impl(std::unique_ptr(new OutputNetworkInfoImpl())) {} +OutputNetworkInfo::OutputNetworkInfo(OutputNetworkInfo&&) noexcept = default; +OutputNetworkInfo& OutputNetworkInfo::operator=(OutputNetworkInfo&&) noexcept = default; +OutputNetworkInfo::~OutputNetworkInfo() = default; + +OutputNetworkInfo& OutputNetworkInfo::set_layout(const Layout& layout) & { + m_impl->set_layout(layout); + return *this; +} + +OutputNetworkInfo&& OutputNetworkInfo::set_layout(const Layout& layout) && { + m_impl->set_layout(layout); + return std::move(*this); +} + +// --------------------- PostProcessSteps ------------------ + +PostProcessSteps::PostProcessSteps() : m_impl(std::unique_ptr(new PostProcessStepsImpl())) {} +PostProcessSteps::PostProcessSteps(PostProcessSteps&&) noexcept = default; +PostProcessSteps& PostProcessSteps::operator=(PostProcessSteps&&) noexcept = default; +PostProcessSteps::~PostProcessSteps() = default; + +PostProcessSteps& PostProcessSteps::convert_element_type(const element::Type& type) & { + m_impl->add_convert_impl(type); + return *this; +} + +PostProcessSteps&& PostProcessSteps::convert_element_type(const element::Type& type) && { + m_impl->add_convert_impl(type); + return std::move(*this); +} + +PostProcessSteps& PostProcessSteps::convert_layout(const Layout& dst_layout) & { + m_impl->add_convert_layout_impl(dst_layout); + return *this; +} + +PostProcessSteps&& PostProcessSteps::convert_layout(const Layout& dst_layout) && { + m_impl->add_convert_layout_impl(dst_layout); + return std::move(*this); +} + +PostProcessSteps& PostProcessSteps::custom(const CustomPostprocessOp& postprocess_cb) & { + // 'true' indicates that custom postprocessing step will trigger validate_and_infer_types + m_impl->actions().emplace_back([postprocess_cb](const Output& node, PostprocessingContext&) { + return std::make_tuple(postprocess_cb(node), true); + }); + return *this; +} + +PostProcessSteps&& PostProcessSteps::custom(const CustomPostprocessOp& postprocess_cb) && { + // 'true' indicates that custom postprocessing step will trigger validate_and_infer_types + m_impl->actions().emplace_back([postprocess_cb](const Output& node, PostprocessingContext&) { + return std::make_tuple(postprocess_cb(node), true); + }); + return std::move(*this); +} + } // namespace preprocess } // namespace ov diff --git a/ngraph/core/src/preprocess/preprocess_steps_impl.cpp b/ngraph/core/src/preprocess/preprocess_steps_impl.cpp index 93c3b22df32b32..207028eefb6deb 100644 --- a/ngraph/core/src/preprocess/preprocess_steps_impl.cpp +++ b/ngraph/core/src/preprocess/preprocess_steps_impl.cpp @@ -4,9 +4,12 @@ #include "preprocess_steps_impl.hpp" +#include "color_utils.hpp" #include "ngraph/opsets/opset1.hpp" #include "openvino/core/node.hpp" #include "openvino/core/shape.hpp" +#include "openvino/op/nv12_to_bgr.hpp" +#include "openvino/op/nv12_to_rgb.hpp" namespace ov { namespace preprocess { @@ -31,7 +34,9 @@ static Shape construct_mean_scale_shape(const std::shared_ptr& node, void PreProcessSteps::PreProcessStepsImpl::add_scale_impl(const std::vector& values) { m_actions.emplace_back(std::make_tuple( - [values](const std::vector>& nodes, PreprocessingContext& context) { + [values](const std::vector>& nodes, + const std::shared_ptr& function, + PreprocessingContext& context) -> std::vector> { OPENVINO_ASSERT(!nodes.empty(), "Internal error: Can't apply scale preprocessing for empty input."); OPENVINO_ASSERT(nodes.size() == 1, "Can't apply scale preprocessing for multi-plane input. Suggesting to convert current " @@ -43,18 +48,20 @@ void PreProcessSteps::PreProcessStepsImpl::add_scale_impl(const std::vectorset_friendly_name(nodes[0]->get_friendly_name() + "/scale/Divide_Factor"); + inherit_friendly_names(function, nodes[0], constant, "/scale/Divide_Factor"); auto new_op = std::make_shared(nodes[0], constant); - new_op->set_friendly_name(nodes[0]->get_friendly_name() + "/scale/Divide"); - return new_op; + inherit_friendly_names(function, nodes[0], new_op, "/scale/Divide"); + return {new_op}; }, false)); } void PreProcessSteps::PreProcessStepsImpl::add_mean_impl(const std::vector& values) { m_actions.emplace_back(std::make_tuple( - [values](const std::vector>& nodes, PreprocessingContext& context) { + [values](const std::vector>& nodes, + const std::shared_ptr& function, + PreprocessingContext& context) -> std::vector> { OPENVINO_ASSERT(!nodes.empty(), "Internal error: Can't apply mean preprocessing for empty input."); OPENVINO_ASSERT(nodes.size() == 1, "Can't apply scale preprocessing for multi-plane input. Suggesting to convert current " @@ -66,27 +73,34 @@ void PreProcessSteps::PreProcessStepsImpl::add_mean_impl(const std::vectorset_friendly_name(nodes[0]->get_friendly_name() + "/mean/Mean_Const"); + inherit_friendly_names(function, nodes[0], constant, "/mean/Mean_Const"); auto new_op = std::make_shared(nodes[0], constant); - new_op->set_friendly_name(nodes[0]->get_friendly_name() + "/mean/Subtract"); - return new_op; + inherit_friendly_names(function, nodes[0], new_op, "/mean/Subtract"); + return {new_op}; }, false)); } void PreProcessSteps::PreProcessStepsImpl::add_convert_impl(const ov::element::Type& type) { m_actions.emplace_back(std::make_tuple( - [type](const std::vector>& nodes, PreprocessingContext&) { + [type](const std::vector>& nodes, + const std::shared_ptr& function, + PreprocessingContext&) -> std::vector> { OPENVINO_ASSERT(!nodes.empty(), "Internal error: Can't set element type for empty input."); - OPENVINO_ASSERT(nodes.size() == 1, - "Can't set element type for multi-plane input. Suggesting to convert current image to " - "RGB/BGR color format using 'convert_color'"); - OPENVINO_ASSERT(nodes[0]->get_element_type().is_static(), - "Can't insert 'convert_element_type' for dynamic source tensor type."); - auto convert = std::make_shared(nodes[0], type); - convert->set_friendly_name(nodes[0]->get_friendly_name() + "/convert_element_type"); - return convert; + std::vector> res; + for (const auto& node : nodes) { + OPENVINO_ASSERT(node->get_element_type().is_static(), + "Can't insert 'convert_element_type' for dynamic source tensor type."); + if (node->get_element_type() != type) { + auto convert = std::make_shared(node, type); + inherit_friendly_names(function, node, convert, "/convert_element_type"); + res.emplace_back(convert); + } else { + res.emplace_back(node); + } + } + return res; }, true)); } @@ -94,7 +108,9 @@ void PreProcessSteps::PreProcessStepsImpl::add_convert_impl(const ov::element::T void PreProcessSteps::PreProcessStepsImpl::add_resize_impl(ResizeAlgorithm alg, int dst_height, int dst_width) { using InterpolateMode = op::v4::Interpolate::InterpolateMode; m_actions.emplace_back(std::make_tuple( - [alg, dst_width, dst_height](const std::vector>& nodes, PreprocessingContext& ctxt) { + [alg, dst_width, dst_height](const std::vector>& nodes, + const std::shared_ptr& function, + PreprocessingContext& ctxt) -> std::vector> { OPENVINO_ASSERT(!nodes.empty(), "Internal error: Can't add resize for empty input."); OPENVINO_ASSERT(nodes.size() == 1, "Can't resize multi-plane input. Suggesting to convert current image to " @@ -140,31 +156,123 @@ void PreProcessSteps::PreProcessStepsImpl::add_resize_impl(ResizeAlgorithm alg, {0, 0}); auto interp = std::make_shared(node, target_spatial_shape, scales, axes, attrs); - interp->set_friendly_name(nodes[0]->get_friendly_name() + "/resize"); - return interp; + inherit_friendly_names(function, nodes[0], interp, "/resize"); + return {interp}; }, true)); } void PreProcessSteps::PreProcessStepsImpl::add_convert_layout_impl(const Layout& layout) { m_actions.emplace_back(std::make_tuple( - [layout](const std::vector>& nodes, PreprocessingContext& context) { + [layout](const std::vector>& nodes, + const std::shared_ptr& function, + PreprocessingContext& context) -> std::vector> { OPENVINO_ASSERT(!nodes.empty(), "Internal error: Can't convert layout for empty input."); OPENVINO_ASSERT(nodes.size() == 1, "Can't convert layout for multi-plane input. Suggesting to convert current image to " "RGB/BGR color format using 'convert_color'"); - Layout dst_layout = layout.empty() ? context.network_layout() : layout; + Layout dst_layout = layout.empty() ? context.target_layout() : layout; auto permutation = layout::find_permutation(context.layout(), nodes[0]->get_output_partial_shape(0).rank(), dst_layout); auto perm_constant = op::v0::Constant::create(element::i64, Shape{permutation.size()}, permutation); auto transpose = std::make_shared(nodes[0], perm_constant); - transpose->set_friendly_name(nodes[0]->get_friendly_name() + "/convert_layout"); + inherit_friendly_names(function, nodes[0], transpose, "/convert_layout"); context.layout() = dst_layout; // Update context's current layout - return transpose; + return {transpose}; }, true)); } +void PreProcessSteps::PreProcessStepsImpl::add_convert_color_impl(const ColorFormat& dst_format) { + m_actions.emplace_back(std::make_tuple( + [&, dst_format](const std::vector>& nodes, + const std::shared_ptr& function, + PreprocessingContext& context) -> std::vector> { + if (context.color_format() == dst_format) { + return nodes; + } + if (context.color_format() == ColorFormat::NV12_SINGLE_PLANE) { + OPENVINO_ASSERT(nodes.size() == 1, + "Internal error: single plane NV12 image can't have multiple inputs"); + std::shared_ptr convert; + switch (dst_format) { + case ColorFormat::RGB: + convert = std::make_shared(nodes[0]); + break; + case ColorFormat::BGR: + convert = std::make_shared(nodes[0]); + break; + default: + OPENVINO_ASSERT(false, + "Unsupported conversion from NV12 to '", + color_format_name(dst_format), + "' format:"); + } + inherit_friendly_names(function, nodes[0], convert, "/convert_color_nv12_single"); + context.color_format() = dst_format; + return {convert}; + } else if (context.color_format() == ColorFormat::NV12_TWO_PLANES) { + OPENVINO_ASSERT(nodes.size() == 2, "Internal error: two-plane NV12 image must have exactly two inputs"); + std::shared_ptr convert; + switch (dst_format) { + case ColorFormat::RGB: + convert = std::make_shared(nodes[0], nodes[1]); + break; + case ColorFormat::BGR: + convert = std::make_shared(nodes[0], nodes[1]); + break; + default: + OPENVINO_ASSERT(false, + "Unsupported conversion from NV12 to '", + color_format_name(dst_format), + "' format:"); + } + inherit_friendly_names(function, nodes[0], convert, "/convert_color_nv12_two_planes"); + context.color_format() = dst_format; + return {convert}; + } + OPENVINO_ASSERT(false, + "Source color format '", + color_format_name(context.color_format()), + "' is not convertible to any other"); + }, + true)); +} + +//------------- Post processing ------ +void PostStepsList::add_convert_impl(const ov::element::Type& type) { + m_actions.emplace_back([type](const ov::Output& node, PostprocessingContext& ctxt) { + ov::element::Type t = type; + if (t == element::Type{}) { + t = ctxt.target_element_type(); + } + if (t == node.get_node()->get_element_type()) { + return std::make_tuple(node, false); + } + OPENVINO_ASSERT( + !t.is_dynamic() && t != element::undefined, + "Can't convert to dynamic/unknown element type, consider using of InputTensorInfo::set_element_type"); + auto convert = std::make_shared(node, t); + convert->set_friendly_name(node.get_node()->get_friendly_name() + "/convert_element_type"); + return std::make_tuple(ov::Output(convert), true); + }); +} + +void PostStepsList::add_convert_layout_impl(const Layout& layout) { + m_actions.emplace_back([layout](const ov::Output& node, PostprocessingContext& context) { + Layout dst_layout = layout.empty() ? context.target_layout() : layout; + if (dst_layout == context.layout()) { + return std::make_tuple(node, false); + } + auto permutation = layout::find_permutation(context.layout(), node.get_partial_shape().rank(), dst_layout); + auto perm_constant = op::v0::Constant::create(element::i64, Shape{permutation.size()}, permutation); + auto transpose = std::make_shared(node, perm_constant); + transpose->set_friendly_name(node.get_node()->get_friendly_name() + "/convert_layout"); + context.layout() = dst_layout; // Update context's current layout + return std::make_tuple(ov::Output(transpose), true); + }); +} + } // namespace preprocess } // namespace ov diff --git a/ngraph/core/src/preprocess/preprocess_steps_impl.hpp b/ngraph/core/src/preprocess/preprocess_steps_impl.hpp index 120c6849fd97a4..1aebc20a939c6c 100644 --- a/ngraph/core/src/preprocess/preprocess_steps_impl.hpp +++ b/ngraph/core/src/preprocess/preprocess_steps_impl.hpp @@ -7,8 +7,12 @@ #include #include "openvino/core/layout.hpp" +#include "openvino/core/node.hpp" #include "openvino/core/partial_shape.hpp" +#include "openvino/core/preprocess/color_format.hpp" +#include "openvino/core/preprocess/postprocess_steps.hpp" #include "openvino/core/preprocess/preprocess_steps.hpp" +#include "tensor_name_util.hpp" namespace ov { namespace preprocess { @@ -55,11 +59,37 @@ inline size_t get_and_check_channels_idx(const Layout& layout, const PartialShap return idx; } -/// \brief Preprocessing context passed to each preprocessing operation. +inline void inherit_friendly_names(const std::shared_ptr& function, + const std::shared_ptr& src_node, + const std::shared_ptr& dst_node, + const std::string& suffix, + bool search_for_available_name = true) { + OPENVINO_ASSERT(src_node->get_output_size() == 1 && dst_node->get_output_size() == 1, + "Internal error. Preprocessing steps must contain nodes with one output"); + dst_node->set_friendly_name(src_node->get_friendly_name() + suffix); + std::unordered_set new_names; + for (const auto& tensor_name : src_node->output(0).get_tensor().get_names()) { + auto new_tensor_name = tensor_name + suffix; + if (!suffix.empty()) { + // Verify that new names are unique for a function + if (!is_tensor_name_available(new_tensor_name, function) && search_for_available_name) { + // Search for available name + size_t idx = 0; + do { + new_tensor_name = tensor_name + suffix + std::to_string(idx++); + } while (!is_tensor_name_available(new_tensor_name, function)); + } + } + new_names.emplace(new_tensor_name); + } + dst_node->output(0).get_tensor().set_names(new_names); +} + +/// \brief Context passed to each pre/post-processing operation. /// This is internal structure which is not shared to custom operations yet. -class PreprocessingContext { +class PrePostProcessingContextBase { public: - explicit PreprocessingContext(const Layout& layout) : m_layout(layout) {} + explicit PrePostProcessingContextBase(Layout layout) : m_layout(std::move(layout)) {} const Layout& layout() const { return m_layout; @@ -69,45 +99,77 @@ class PreprocessingContext { return m_layout; } - const PartialShape& network_shape() const { - return m_network_shape; + // Final layout. Needed if user specified convert_layout without arguments + // For preprocessing it is parameter's network layout + // For post-processing it is result's tensor layout + const Layout& target_layout() const { + return m_target_layout; } - PartialShape& network_shape() { - return m_network_shape; + Layout& target_layout() { + return m_target_layout; + } + + element::Type target_element_type() const { + return m_target_element_type; } - const Layout& network_layout() const { - return m_network_layout; + element::Type& target_element_type() { + return m_target_element_type; } - Layout& network_layout() { - return m_network_layout; +protected: + Layout m_layout; + Layout m_target_layout; + element::Type m_target_element_type; +}; + +/// \brief Preprocessing context passed to each preprocessing operation. +/// This is internal structure which is not shared to custom operations yet. +class PreprocessingContext : public PrePostProcessingContextBase { +public: + explicit PreprocessingContext(const Layout& layout) : PrePostProcessingContextBase(layout) {} + + const PartialShape& network_shape() const { + return m_network_shape; + } + + PartialShape& network_shape() { + return m_network_shape; } size_t get_network_height_for_resize() const { - auto network_height_idx = get_and_check_height_idx(network_layout(), network_shape()); + auto network_height_idx = get_and_check_height_idx(target_layout(), network_shape()); OPENVINO_ASSERT(network_shape()[network_height_idx].is_static(), "Dynamic resize: Network height dimension shall be static"); return network_shape()[network_height_idx].get_length(); } size_t get_network_width_for_resize() const { - auto network_width_idx = get_and_check_width_idx(network_layout(), network_shape()); + auto network_width_idx = get_and_check_width_idx(target_layout(), network_shape()); OPENVINO_ASSERT(network_shape()[network_width_idx].is_static(), "Dynamic resize: Network width dimension shall be static"); return network_shape()[network_width_idx].get_length(); } + const ColorFormat& color_format() const { + return m_color_format; + } + + ColorFormat& color_format() { + return m_color_format; + } + private: - Layout m_layout; PartialShape m_network_shape; Layout m_network_layout; + ColorFormat m_color_format = ColorFormat::UNDEFINED; }; using InternalPreprocessOp = - std::function(const std::vector>& nodes, - PreprocessingContext& context)>; + std::function>(const std::vector>& nodes, + const std::shared_ptr& function, + PreprocessingContext& context)>; /// \brief PreProcessStepsImpl - internal data structure class PreProcessSteps::PreProcessStepsImpl { @@ -117,6 +179,7 @@ class PreProcessSteps::PreProcessStepsImpl { void add_convert_impl(const element::Type& type); void add_resize_impl(ResizeAlgorithm alg, int dst_height, int dst_width); void add_convert_layout_impl(const Layout& layout); + void add_convert_color_impl(const ColorFormat& dst_format); const std::list>& actions() const { return m_actions; @@ -129,5 +192,33 @@ class PreProcessSteps::PreProcessStepsImpl { std::list> m_actions; }; +//------ Post process ----- +class PostprocessingContext : public PrePostProcessingContextBase { +public: + explicit PostprocessingContext(const Layout& layout) : PrePostProcessingContextBase(layout) {} +}; + +using InternalPostprocessOp = std::function, bool>(const ov::Output& node, + PostprocessingContext& context)>; + +/// \brief PostProcessStepsImpl - internal data structure +class PostStepsList { +public: + void add_convert_impl(const element::Type& type); + void add_convert_layout_impl(const Layout& layout); + + const std::list& actions() const { + return m_actions; + } + std::list& actions() { + return m_actions; + } + +private: + std::list m_actions; +}; + +class PostProcessSteps::PostProcessStepsImpl : public PostStepsList {}; + } // namespace preprocess } // namespace ov diff --git a/ngraph/core/src/tensor_name_util.hpp b/ngraph/core/src/tensor_name_util.hpp new file mode 100644 index 00000000000000..b77c248561d605 --- /dev/null +++ b/ngraph/core/src/tensor_name_util.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/except.hpp" +#include "openvino/core/function.hpp" + +namespace ov { + +/// \brief Check that specified tensor name is unique for a given function. +/// +/// \param tensor_name Name to check across all tensors in a function. +/// \param function Function. +/// \return False if tensor name is already used in some function's node, True otherwise +inline bool is_tensor_name_available(const std::string& tensor_name, const std::shared_ptr& function) { + for (const auto& node : function->get_ordered_ops()) { + for (const auto& output : node->outputs()) { + const auto& tensor = output.get_tensor(); + if (tensor.get_names().count(tensor_name)) { + return false; + } + } + } + return true; +} + +} // namespace ov diff --git a/ngraph/core/src/type/element_type.cpp b/ngraph/core/src/type/element_type.cpp index fd12c5e4f24474..0fbe4da4872551 100644 --- a/ngraph/core/src/type/element_type.cpp +++ b/ngraph/core/src/type/element_type.cpp @@ -12,8 +12,8 @@ #include "ngraph/log.hpp" #include "ngraph/type/element_type_traits.hpp" -BWDCMP_RTTI_DEFINITION(ngraph::AttributeAdapter); -BWDCMP_RTTI_DEFINITION(ngraph::AttributeAdapter); +BWDCMP_RTTI_DEFINITION(ov::AttributeAdapter); +BWDCMP_RTTI_DEFINITION(ov::AttributeAdapter); namespace { class TypeInfo { @@ -313,8 +313,8 @@ size_t compiler_byte_size(ov::element::Type_t et) { return 0; } - throw ngraph::ngraph_error("compiler_byte_size: Unsupported value of ov::element::Type_t: " + - std::to_string(static_cast(et))); + throw ov::Exception("compiler_byte_size: Unsupported value of ov::element::Type_t: " + + std::to_string(static_cast(et))); } namespace ov { diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp index aaa51f7a0de130..607636e9c344d6 100644 --- a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp +++ b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp @@ -79,7 +79,7 @@ class FRONTEND_API FrontEndManager final { }; template <> -FrontEnd::Ptr FrontEndManager::load_by_model(const std::vector>& variants); +FRONTEND_API FrontEnd::Ptr FrontEndManager::load_by_model(const std::vector>& variants); // --------- Plugin exporting information -------------- diff --git a/ngraph/frontend/ir/src/model.cpp b/ngraph/frontend/ir/src/model.cpp index be9326b3385ee1..b968659d80ce0e 100644 --- a/ngraph/frontend/ir/src/model.cpp +++ b/ngraph/frontend/ir/src/model.cpp @@ -194,7 +194,7 @@ class InputModelIR::InputModelIRImpl { m_extensions(extensions) { pugi::xml_parse_result res = m_xml_doc.load(stream); if (res.status != pugi::status_ok) { - IE_THROW() << res.description() << "at offset " << res.offset; + IE_THROW() << res.description() << " at offset " << res.offset; } m_root = m_xml_doc.document_element(); } diff --git a/ngraph/frontend/ir/src/rt_info_deserializer.hpp b/ngraph/frontend/ir/src/rt_info_deserializer.hpp index fbe181c9cc2bd7..de9c708b78d5cb 100644 --- a/ngraph/frontend/ir/src/rt_info_deserializer.hpp +++ b/ngraph/frontend/ir/src/rt_info_deserializer.hpp @@ -94,6 +94,16 @@ class RTInfoDeserializer : public ngraph::AttributeVisitor { adapter.set(value); } + void on_adapter(const std::string& name, ngraph::ValueAccessor>& adapter) override { + check_attribute_name(name); + std::string val; + if (!getStrAttribute(m_node, name, val)) + return; + std::vector value; + str_to_container(val, value); + adapter.set(value); + } + void on_adapter(const std::string& name, ngraph::ValueAccessor>& adapter) override { check_attribute_name(name); std::string val; diff --git a/ngraph/frontend/onnx/frontend/src/op/com.microsoft/embed_layer_normalization.cpp b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/embed_layer_normalization.cpp new file mode 100644 index 00000000000000..0616cc5403204a --- /dev/null +++ b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/embed_layer_normalization.cpp @@ -0,0 +1,74 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op/com.microsoft/embed_layer_normalization.hpp" + +#include "default_opset.hpp" +#include "onnx_import/core/null_node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector embed_layer_normalization(const Node& node) { + auto nodes = node.get_ng_inputs(); + auto num_nodes = nodes.size(); + + NGRAPH_CHECK(num_nodes >= 7 && num_nodes <= 8, + "EmbedLayerNormalization takes 7 or 8 inputs. Provided " + std::to_string(num_nodes)); + NGRAPH_CHECK(nodes[0].get_element_type() == element::i32, "input_ids must have int32 type"); + + const auto& input_ids = nodes[0]; + const auto& segment_ids = nodes[1]; + const auto& word_embeddings = nodes[2]; + const auto& position_embeddings = nodes[3]; + const auto& segment_embeddings = nodes[4]; + const auto& gamma = nodes[5]; + const auto& beta = nodes[6]; + + auto zero = default_opset::Constant::create(element::i32, Shape{1}, {0}); + std::shared_ptr input = std::make_shared(word_embeddings, input_ids, zero, 0); + input = std::make_shared(input, position_embeddings); + + // add segment embeddings if available + if (!ngraph::op::is_null(segment_ids)) { + NGRAPH_CHECK(!ngraph::op::is_null(segment_embeddings), + "segment_ids provided, but segment_embedding input is missing"); + NGRAPH_CHECK(nodes[1].get_element_type() == element::i32, "segment_ids must have int32 type"); + auto gathered_segment_embeddings = + std::make_shared(segment_embeddings, segment_ids, zero, 0); + input = std::make_shared(input, gathered_segment_embeddings); + } + + float eps = node.get_attribute_value("epsilon"); + // reduce over hidden_size + // hidden_size dimension is 2 here, because the shape after Gather(word_embedding, input_ids) + // is (batch_size, seq_len, hidden_size) + int hidden_size_dim = 2; + const auto reduction_axes = default_opset::Constant::create(element::i32, Shape{1}, {hidden_size_dim}); + std::shared_ptr result = + std::make_shared(input, reduction_axes, true, eps, ngraph::op::MVNEpsMode::INSIDE_SQRT); + + // result = gamma * result + beta + result = std::make_shared(result, gamma); + result = std::make_shared(result, beta); + + // compute mask_index output + std::shared_ptr mask_index; + if (num_nodes > 7 && !ngraph::op::is_null(nodes[7])) { + NGRAPH_CHECK(nodes[7].get_element_type() == element::i32, "mask must have int32 type"); + auto axis = default_opset::Constant::create(element::i32, Shape{}, {1}); + mask_index = std::make_shared(nodes[7], axis, false); + } else { + auto batch_size = std::make_shared(std::make_shared(nodes[0]), + zero, // indices + zero); // axis + mask_index = std::make_shared(zero, batch_size); + } + return {result, mask_index}; +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph diff --git a/ngraph/frontend/onnx/frontend/src/op/com.microsoft/embed_layer_normalization.hpp b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/embed_layer_normalization.hpp new file mode 100644 index 00000000000000..2d9fcecdcf7932 --- /dev/null +++ b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/embed_layer_normalization.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector embed_layer_normalization(const Node& node); +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph diff --git a/ngraph/frontend/onnx/frontend/src/op/matmul_integer.cpp b/ngraph/frontend/onnx/frontend/src/op/matmul_integer.cpp new file mode 100644 index 00000000000000..0952b3bd2406fd --- /dev/null +++ b/ngraph/frontend/onnx/frontend/src/op/matmul_integer.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op/matmul_integer.hpp" + +#include +#include +#include + +#include "default_opset.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector matmul_integer(const Node& node) { + const OutputVector& inputs = node.get_ng_inputs(); + + const auto& A = inputs.at(0); + const auto& B = inputs.at(1); + const auto& A_zero_point = + (inputs.size() > 2) ? inputs.at(2) : ngraph::op::Constant::create(ngraph::element::i32, {1}, {0}); + const auto& B_zero_point = + (inputs.size() > 3) ? inputs.at(3) : ngraph::op::Constant::create(ngraph::element::i32, {1}, {0}); + + const auto& converted_A = std::make_shared(A, element::i32); + const auto& converted_B = std::make_shared(B, element::i32); + + const auto& converted_A_zero_point = std::make_shared(A_zero_point, element::i32); + const auto& converted_B_zero_point = std::make_shared(B_zero_point, element::i32); + + const auto& A_zero_point_rank = A_zero_point.get_partial_shape().rank(); + + Output shifted_A; + if (A_zero_point_rank.is_static() && A_zero_point_rank.get_length() == 1) { + const auto& one_node = ngraph::op::Constant::create(ngraph::element::i32, {1}, {1}); + const auto& reshaped_A_zero_point = + std::make_shared(converted_A_zero_point, one_node); + + shifted_A = std::make_shared(converted_A, reshaped_A_zero_point); + } else { + shifted_A = std::make_shared(converted_A, converted_A_zero_point); + } + + const auto& shifted_B = std::make_shared(converted_B, converted_B_zero_point); + + const auto& result = std::make_shared(shifted_A, shifted_B); + + return {result}; +} +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph diff --git a/ngraph/frontend/onnx/frontend/src/op/matmul_integer.hpp b/ngraph/frontend/onnx/frontend/src/op/matmul_integer.hpp new file mode 100644 index 00000000000000..e1db5c2416941f --- /dev/null +++ b/ngraph/frontend/onnx/frontend/src/op/matmul_integer.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +/// \brief Performs ONNX MatMulInteger operation. +/// +/// \param node The ONNX node object representing this operation. +/// +/// \return The vector containing Ngraph nodes producing output of ONNX quantizied +/// matrix multiplication integer operation. +OutputVector matmul_integer(const Node& node); +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph diff --git a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp index 20484c78953ffd..539cc7fc0d7ef2 100644 --- a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp +++ b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp @@ -30,6 +30,7 @@ #include "op/ceil.hpp" #include "op/clip.hpp" #include "op/com.microsoft/bias_gelu.hpp" +#include "op/com.microsoft/embed_layer_normalization.hpp" #include "op/com.microsoft/skip_layer_normalization.hpp" #include "op/compress.hpp" #include "op/concat.hpp" @@ -81,6 +82,7 @@ #include "op/lrn.hpp" #include "op/lstm.hpp" #include "op/matmul.hpp" +#include "op/matmul_integer.hpp" #include "op/max.hpp" #include "op/max_pool.hpp" #include "op/mean.hpp" @@ -352,6 +354,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("LpNormalization", 1, lp_norm); REGISTER_OPERATOR("LRN", 1, lrn); REGISTER_OPERATOR("LSTM", 1, lstm); + REGISTER_OPERATOR("MatMulInteger", 1, matmul_integer); REGISTER_OPERATOR("MatMul", 1, matmul); REGISTER_OPERATOR("MaxPool", 1, max_pool); REGISTER_OPERATOR("Max", 1, max); @@ -480,6 +483,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR_WITH_DOMAIN(OPENVINO_ONNX_DOMAIN, "Swish", 1, swish); REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "BiasGelu", 1, bias_gelu); + REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "EmbedLayerNormalization", 1, embed_layer_normalization); REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "SkipLayerNormalization", 1, skip_layer_normalization); } diff --git a/ngraph/frontend/paddlepaddle/src/op/exp.cpp b/ngraph/frontend/paddlepaddle/src/op/exp.cpp new file mode 100644 index 00000000000000..c767ab67692c01 --- /dev/null +++ b/ngraph/frontend/paddlepaddle/src/op/exp.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "default_opset.hpp" + +namespace ngraph { +namespace frontend { +namespace pdpd { +namespace op { +NamedOutputs exp(const NodeContext& node) { + auto data = node.get_ng_input("X"); + return node.default_single_output_mapping({std::make_shared(data)}, {"Out"}); +} +} // namespace op +} // namespace pdpd +} // namespace frontend +} // namespace ngraph \ No newline at end of file diff --git a/ngraph/frontend/paddlepaddle/src/op/prior_box.cpp b/ngraph/frontend/paddlepaddle/src/op/prior_box.cpp new file mode 100644 index 00000000000000..fcdae08eaccf48 --- /dev/null +++ b/ngraph/frontend/paddlepaddle/src/op/prior_box.cpp @@ -0,0 +1,112 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/op/prior_box.hpp" + +#include + +#include "default_opset.hpp" + +namespace ngraph { +namespace frontend { +namespace pdpd { +namespace op { +using namespace default_opset; +using namespace element; +namespace detail { +namespace { +std::shared_ptr make_slice(const std::shared_ptr& node, int64_t start, int64_t end) { + return std::make_shared(node, + Constant::create(i64, Shape{1}, std::vector{start}), + Constant::create(i64, Shape{1}, std::vector{end}), + std::vector{0}, // begin mask + std::vector{0}); // end mask +} +} // namespace +} // namespace detail +NamedOutputs prior_box(const NodeContext& node) { + auto input = node.get_ng_input("Input"); + auto Image = node.get_ng_input("Image"); + const auto input_shape = std::make_shared(input); + const auto Image_shape = std::make_shared(Image); + const auto output_shape_slice = detail::make_slice(input_shape, 2, 4); + const auto image_shape_slice = detail::make_slice(Image_shape, 2, 4); + + ngraph::op::PriorBoxAttrs attrs; + attrs.min_size = node.get_attribute>("min_sizes", {}); + attrs.max_size = node.get_attribute>("max_sizes", {}); + attrs.aspect_ratio = node.get_attribute>("aspect_ratios", {1.0}); + attrs.flip = node.get_attribute("flip", false); + attrs.clip = node.get_attribute("clip", false); + attrs.step = node.get_attribute("step_w", 0); + + attrs.offset = node.get_attribute("offset", 0.5); + attrs.variance = node.get_attribute>("variances", {0.1, 0.1, 0.2, 0.2}); + + bool min_max_aspect_ratios_order = node.get_attribute("min_max_aspect_ratios_order", false); + + const auto ov_prior_box_node = std::make_shared(output_shape_slice, image_shape_slice, attrs); + + const auto split_axis_node = Constant::create(i64, ngraph::Shape{}, {0}); + const auto node_prior_box_split = std::make_shared(ov_prior_box_node, split_axis_node, 2); + + const auto node_boxes_origin = node_prior_box_split->output(0); + const auto node_variances_origin = node_prior_box_split->output(1); + + const auto out_shape = + std::make_shared(NodeVector{output_shape_slice, Constant::create(i64, {2}, {-1, 4})}, 0); + + auto node_boxes_reshape = std::make_shared(node_boxes_origin, out_shape, true); + const auto node_variances_reshape = std::make_shared(node_variances_origin, out_shape, true); + + int64_t total_aspect_ratios = ngraph::op::PriorBox::normalized_aspect_ratio(attrs.aspect_ratio, attrs.flip).size(); + if ((total_aspect_ratios > 1) && !attrs.min_size.empty() && !attrs.max_size.empty() && + !min_max_aspect_ratios_order) { + std::vector mask{1, 1, 1, 0, 1}; + int64_t min_size_len = static_cast(attrs.min_size.size()); + + const auto out_shape_div_numpri = std::make_shared( + NodeVector{output_shape_slice, Constant::create(i64, {3}, {min_size_len, -1, 4})}, + 0); + const auto node_boxes_div_numpri = std::make_shared(node_boxes_reshape, out_shape_div_numpri, true); + + const auto slice_begin_min = Constant::create(i64, Shape{5}, std::vector{0, 0, 0, 0, 0}); + const auto slice_end_min = std::make_shared( + NodeVector{output_shape_slice, Constant::create(i64, {3}, {min_size_len, 1, 4})}, + 0); + const auto slice_min_node = + std::make_shared(node_boxes_div_numpri, slice_begin_min, slice_end_min, mask, mask); + + const auto slice_begin_max = Constant::create(i64, Shape{5}, std::vector{0, 0, 0, 1, 0}); + const auto slice_end_max = std::make_shared( + NodeVector{output_shape_slice, Constant::create(i64, {3}, {min_size_len, 2, 4})}, + 0); + const auto slice_max_node = + std::make_shared(node_boxes_div_numpri, slice_begin_max, slice_end_max, mask, mask); + + const auto slice_begin_aspect_ratios = Constant::create(i64, Shape{5}, std::vector{0, 0, 0, 2, 0}); + const auto slice_end_aspect_ratios = std::make_shared( + NodeVector{output_shape_slice, + Constant::create(i64, {3}, {min_size_len, 2 + (total_aspect_ratios - 1), 4})}, + 0); + const auto slice_aspect_ratios_node = std::make_shared(node_boxes_div_numpri, + slice_begin_aspect_ratios, + slice_end_aspect_ratios, + mask, + mask); + + const auto node_boxes_div_numpri_reorder = + std::make_shared(NodeVector{slice_min_node, slice_aspect_ratios_node, slice_max_node}, 3); + node_boxes_reshape = std::make_shared(node_boxes_div_numpri_reorder, out_shape, true); + } + + NamedOutputs outputs; + outputs["Boxes"] = {node_boxes_reshape}; + outputs["Variances"] = {node_variances_reshape}; + return outputs; +} +} // namespace op +} // namespace pdpd +} // namespace frontend +} // namespace ngraph diff --git a/ngraph/frontend/paddlepaddle/src/op/stack.cpp b/ngraph/frontend/paddlepaddle/src/op/stack.cpp new file mode 100644 index 00000000000000..d6924290329ad8 --- /dev/null +++ b/ngraph/frontend/paddlepaddle/src/op/stack.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "default_opset.hpp" + +namespace ngraph { +namespace frontend { +namespace pdpd { +namespace op { +using namespace default_opset; +NamedOutputs stack(const NodeContext& node) { + auto datas = node.get_ng_inputs("X"); + auto axis = node.get_attribute("axis", 0); + auto data_shape = datas[0].get_partial_shape(); + auto data_type = datas[0].get_element_type(); + OutputVector node_datas_reshape; + + auto axis_const = std::make_shared(element::i64, Shape{}, axis); + if (data_shape.rank().is_static()) + PDPD_OP_VALIDATION_CHECK( + node, + (axis >= -(data_shape.rank().get_length() + 1)) && axis < (data_shape.rank().get_length() + 1), + "axis range is [-(R+1), R+1)!"); + + for (const auto& data : datas) { + PDPD_OP_VALIDATION_CHECK(node, + data_type == data.get_element_type(), + "stack input tensor must have the same data types!"); + + node_datas_reshape.push_back(std::make_shared(data, axis_const)); + } + + return node.default_single_output_mapping({std::make_shared(node_datas_reshape, axis)}, {"Y"}); +} +} // namespace op +} // namespace pdpd +} // namespace frontend +} // namespace ngraph \ No newline at end of file diff --git a/ngraph/frontend/paddlepaddle/src/op_table.cpp b/ngraph/frontend/paddlepaddle/src/op_table.cpp index e6c4106b12b7ed..105e7699dcf16f 100644 --- a/ngraph/frontend/paddlepaddle/src/op_table.cpp +++ b/ngraph/frontend/paddlepaddle/src/op_table.cpp @@ -30,6 +30,7 @@ OP_CONVERTER(elementwise_mul); OP_CONVERTER(elementwise_pow); OP_CONVERTER(elementwise_sub); OP_CONVERTER(embedding); +OP_CONVERTER(exp); OP_CONVERTER(expand_v2); OP_CONVERTER(fill_any_like); OP_CONVERTER(fill_constant_batch_size_like); @@ -51,6 +52,7 @@ OP_CONVERTER(nearest_interp_v2); OP_CONVERTER(pad3d); OP_CONVERTER(pow); OP_CONVERTER(pool2d); +OP_CONVERTER(prior_box); OP_CONVERTER(range); OP_CONVERTER(relu); OP_CONVERTER(relu6); @@ -63,6 +65,7 @@ OP_CONVERTER(softmax); OP_CONVERTER(sigmoid); OP_CONVERTER(split); OP_CONVERTER(squeeze); +OP_CONVERTER(stack); OP_CONVERTER(tanh); OP_CONVERTER(transpose2); OP_CONVERTER(unsqueeze); @@ -101,6 +104,7 @@ std::map get_supported_ops() { {"elementwise_pow", op::elementwise_pow}, {"elementwise_sub", op::elementwise_sub}, {"equal", op::elementwise_equal}, + {"exp", op::exp}, {"expand_v2", op::expand_v2}, {"fill_any_like", op::fill_any_like}, {"fill_constant_batch_size_like", op::fill_constant_batch_size_like}, @@ -126,6 +130,7 @@ std::map get_supported_ops() { {"pad3d", op::pad3d}, {"pow", op::pow}, {"pool2d", op::pool2d}, + {"prior_box", op::prior_box}, {"range", op::range}, {"relu", op::relu}, {"relu6", op::relu6}, @@ -138,6 +143,7 @@ std::map get_supported_ops() { {"sigmoid", op::sigmoid}, {"split", op::split}, {"squeeze2", op::squeeze}, + {"stack", op::stack}, {"sync_batch_norm", op::batch_norm}, {"tanh", op::tanh}, {"transpose2", op::transpose2}, diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index e8a3896b6a43f3..67e94647d20b9c 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -39,6 +39,7 @@ set(SRC eval.cpp file_util.cpp float16.cpp + framework_node.cpp function.cpp graph_rewrite.cpp includes.cpp diff --git a/ngraph/test/backend/recurrent_cells.in.cpp b/ngraph/test/backend/recurrent_cells.in.cpp index 728d79e38beafc..bf9b55a84a2de5 100644 --- a/ngraph/test/backend/recurrent_cells.in.cpp +++ b/ngraph/test/backend/recurrent_cells.in.cpp @@ -8,15 +8,14 @@ #endif // clang-format on +#include "engines_util/execute_tools.hpp" +#include "engines_util/test_case.hpp" +#include "engines_util/test_engines.hpp" #include "gtest/gtest.h" #include "ngraph/check.hpp" #include "ngraph/ngraph.hpp" #include "ngraph/opsets/opset4.hpp" - -#include "engines_util/test_engines.hpp" -#include "engines_util/test_case.hpp" #include "util/test_control.hpp" -#include "engines_util/execute_tools.hpp" using namespace std; using namespace ngraph; diff --git a/ngraph/test/engines_util/ie_engines.cpp b/ngraph/test/engines_util/ie_engines.cpp index 3238b32c18ca14..e9b918c2e0ceb4 100644 --- a/ngraph/test/engines_util/ie_engines.cpp +++ b/ngraph/test/engines_util/ie_engines.cpp @@ -337,6 +337,9 @@ testing::AssertionResult test::IE_Engine::compare_results_with_tolerance_as_fp(c comparison_result = test::compare_with_tolerance(test_results.first, test_results.second, tolerance); break; } + case InferenceEngine::Precision::I32: + comparison_result = compare_blobs(computed_output_blob, expected_output_blob, 0); + break; default: comparison_result = testing::AssertionFailure() << "Unsupported data type encountered in " "'compare_results_with_tolerance_as_fp' method"; diff --git a/ngraph/test/engines_util/interpreter_engine.cpp b/ngraph/test/engines_util/interpreter_engine.cpp index 65f614d4c73fa8..6648f7f8fca6aa 100644 --- a/ngraph/test/engines_util/interpreter_engine.cpp +++ b/ngraph/test/engines_util/interpreter_engine.cpp @@ -124,6 +124,9 @@ testing::AssertionResult test::INTERPRETER_Engine::compare_results_with_toleranc case element::Type_t::f32: comparison_result = compare_with_fp_tolerance(expected_result_constant, result_tensor, tolerance); break; + case element::Type_t::i32: + comparison_result = compare_values(expected_result_constant, result_tensor, 0); + break; default: comparison_result = testing::AssertionFailure() << "Unsupported data type encountered in " "'compare_results_with_tolerance_as_fp' method"; diff --git a/ngraph/test/framework_node.cpp b/ngraph/test/framework_node.cpp new file mode 100644 index 00000000000000..9a21eaeb8db97d --- /dev/null +++ b/ngraph/test/framework_node.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/op/util/framework_node.hpp" + +#include + +#include "gtest/gtest.h" + +TEST(framework_node, attrs) { + ov::op::util::FrameworkNodeAttrs attrs; + + attrs.set_opset_name("opset_name"); + ASSERT_EQ(attrs.get_opset_name(), "opset_name"); + + attrs.set_type_name("type_name"); + ASSERT_EQ(attrs.get_type_name(), "type_name"); + + attrs["attr1"] = "value1"; + ASSERT_EQ(attrs.at("attr1"), "value1"); + ASSERT_EQ(attrs.begin()->first, "attr1"); + ASSERT_EQ(attrs.begin()->first, "attr1"); + ASSERT_EQ(attrs.begin()->second, "value1"); + + ov::op::util::FrameworkNodeAttrs a1, a2; + a1.set_type_name("type_name"); + a2.set_type_name("type_name_"); + ASSERT_FALSE(a1 == a2); + a2.set_type_name("type_name"); + ASSERT_TRUE(a1 == a2); + a1.set_opset_name("opset_name"); + a2.set_opset_name("opset_name_"); + ASSERT_FALSE(a1 == a2); + a2.set_opset_name("opset_name"); + ASSERT_TRUE(a1 == a2); + a1["attr"] = "value"; + ASSERT_FALSE(a1 == a2); + a2["attr"] = "value_"; + ASSERT_FALSE(a1 == a2); + a2["attr"] = "value"; + ASSERT_TRUE(a1 == a2); +} \ No newline at end of file diff --git a/ngraph/test/frontend/paddlepaddle/op_fuzzy.cpp b/ngraph/test/frontend/paddlepaddle/op_fuzzy.cpp index 9e7f91096d9caf..7b74cca9469a78 100644 --- a/ngraph/test/frontend/paddlepaddle/op_fuzzy.cpp +++ b/ngraph/test/frontend/paddlepaddle/op_fuzzy.cpp @@ -90,6 +90,7 @@ static const std::vector models{std::string("argmax"), std::string("expand_v2"), std::string("expand_v2_tensor"), std::string("expand_v2_tensor_list"), + std::string("exp_test_float32"), std::string("fill_any_like"), std::string("fill_any_like_f16"), std::string("fill_any_like_f32"), @@ -155,6 +156,10 @@ static const std::vector models{std::string("argmax"), std::string("pow_int64"), // pow_int64_out_of_range(out of range of OV int64), std::string("pow_y_tensor"), + std::string("prior_box_attrs_mmar_order_true"), + std::string("prior_box_default"), + std::string("prior_box_flip_clip_false"), + std::string("prior_box_max_sizes_none"), std::string("range0"), std::string("range1"), std::string("range2"), @@ -194,6 +199,10 @@ static const std::vector models{std::string("argmax"), std::string("split_test_list_tensor"), std::string("squeeze"), std::string("squeeze_null_axes"), + std::string("stack_test_float32"), + std::string("stack_test_int32"), + std::string("stack_test_neg_axis"), + std::string("stack_test_none_axis"), std::string("tanh"), std::string("unsqueeze"), // Temporily disable them until root caused to secure CI stable. diff --git a/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_exp.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_exp.py new file mode 100644 index 00000000000000..69496a2dab20b8 --- /dev/null +++ b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_exp.py @@ -0,0 +1,38 @@ +# +# exp paddle model generator +# +import numpy as np +from save_model import saveModel +import sys + + +def exp(name: str, x): + import paddle as pdpd + pdpd.enable_static() + + with pdpd.static.program_guard(pdpd.static.Program(), pdpd.static.Program()): + node_x = pdpd.static.data(name='x', shape=x.shape, dtype=x.dtype) + out = pdpd.fluid.layers.exp(x=node_x) + cpu = pdpd.static.cpu_places(1) + exe = pdpd.static.Executor(cpu[0]) + # startup program will call initializer to initialize the parameters. + exe.run(pdpd.static.default_startup_program()) + + outs = exe.run( + feed={'x': x}, + fetch_list=[out]) + + saveModel(name, exe, feedkeys=['x'], fetchlist=[out], inputs=[ + x], outputs=[outs[0]], target_dir=sys.argv[1]) + + return outs[0] + + +def main(): + input_shape = (1, 2, 3) + input_data = np.random.rand(*input_shape).astype(np.float32) + exp("exp_test_float32", input_data) + + +if __name__ == "__main__": + main() diff --git a/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_prior_box.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_prior_box.py new file mode 100644 index 00000000000000..12e9ddd80fe80c --- /dev/null +++ b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_prior_box.py @@ -0,0 +1,124 @@ +# +# prior_box paddle model generator +# +import numpy as np +from save_model import saveModel +import sys + + +def prior_box(name: str, input_data, image_data, attrs: dict): + import paddle as pdpd + pdpd.enable_static() + + with pdpd.static.program_guard(pdpd.static.Program(), pdpd.static.Program()): + Input = pdpd.static.data( + name='Input', shape=input_data.shape, dtype=input_data.dtype) + Image = pdpd.static.data( + name='Image', shape=image_data.shape, dtype=image_data.dtype) + + box, var = pdpd.fluid.layers.prior_box(Input, + Image, + min_sizes=attrs['min_sizes'], + max_sizes=attrs['max_sizes'], + aspect_ratios=attrs['aspect_ratios'], + variance=attrs['variance'], + flip=attrs['flip'], + clip=attrs['clip'], + steps=attrs['steps'], + offset=attrs['offset'], + name=None, + min_max_aspect_ratios_order=attrs['min_max_aspect_ratios_order']) + + cpu = pdpd.static.cpu_places(1) + exe = pdpd.static.Executor(cpu[0]) + # startup program will call initializer to initialize the parameters. + exe.run(pdpd.static.default_startup_program()) + + outs = exe.run( + feed={'Input': input_data, 'Image': image_data}, + fetch_list=[box, var]) + + # Save inputs in order of ngraph function, to facilite Fuzzy test, + # which accepts inputs and outputs in this order as well. + saveModel(name, exe, feedkeys=['Input', 'Image'], fetchlist=[box, var], + inputs=[input_data, image_data], outputs=outs, target_dir=sys.argv[1]) + return outs + + +if __name__ == "__main__": + + prior_box_attrs_default = { + 'name': "prior_box_default", + 'min_sizes': np.array([2, 4]).astype('float32').tolist(), + 'max_sizes': np.array([5, 10]).astype('float32').tolist(), + 'aspect_ratios': [2.0, 3.0], + 'flip': True, + 'clip': True, + 'steps': np.array([1.25, 1.25]).astype('float32').tolist(), + 'offset': 0.5, + 'variance': np.array([0.1, 0.1, 0.2, 0.2], dtype=np.float).flatten(), + 'min_max_aspect_ratios_order': False + } + + prior_box_max_sizes_none = { + 'name': "prior_box_max_sizes_none", + 'min_sizes': np.array([2, 4]).astype('float32').tolist(), + 'max_sizes': None, + 'aspect_ratios': [2.0, 3.0], + 'flip': True, + 'clip': True, + 'steps': np.array([1.25, 1.25]).astype('float32').tolist(), + 'offset': 0.5, + 'variance': np.array([0.1, 0.1, 0.2, 0.2], dtype=np.float).flatten(), + 'min_max_aspect_ratios_order': False + } + + prior_box_flip_clip_false = { + 'name': "prior_box_flip_clip_false", + 'min_sizes': np.array([2, 4]).astype('float32').tolist(), + 'max_sizes': np.array([5, 10]).astype('float32').tolist(), + 'aspect_ratios': [2.0, 3.0], + 'flip': False, + 'clip': False, + 'steps': np.array([1.25, 1.25]).astype('float32').tolist(), + 'offset': 0.5, + 'variance': np.array([0.1, 0.1, 0.2, 0.2], dtype=np.float).flatten(), + 'min_max_aspect_ratios_order': False + } + + prior_box_attrs_mmar_order_true = { + 'name': "prior_box_attrs_mmar_order_true", + 'min_sizes': np.array([2, 4]).astype('float32').tolist(), + 'max_sizes': np.array([5, 10]).astype('float32').tolist(), + 'aspect_ratios': [2.0, 3.0], + 'flip': True, + 'clip': True, + 'steps': np.array([1.25, 1.25]).astype('float32').tolist(), + 'offset': 0.5, + 'variance': np.array([0.1, 0.1, 0.2, 0.2], dtype=np.float).flatten(), + 'min_max_aspect_ratios_order': True + } + + prior_box_attrs_list = [prior_box_attrs_default, + prior_box_max_sizes_none, prior_box_flip_clip_false, prior_box_attrs_mmar_order_true] + + layer_w = 32 + layer_h = 32 + + image_w = 40 + image_h = 40 + + input_channels = 2 + image_channels = 3 + batch_size = 10 + + input_data = np.random.random( + (batch_size, input_channels, layer_w, + layer_h)).astype('float32') + + image_data = np.random.random( + (batch_size, image_channels, image_w, + image_h)).astype('float32') + + for item in prior_box_attrs_list: + pred_pdpd = prior_box(item['name'], input_data, image_data, item) diff --git a/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_stack.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_stack.py new file mode 100644 index 00000000000000..04a6917d4fb523 --- /dev/null +++ b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_stack.py @@ -0,0 +1,81 @@ +# +# stack paddle model generator +# +import numpy as np +from save_model import saveModel +import sys + + +def stack(axis, input1, input2, input3): + import paddle as pdpd + pdpd.enable_static() + + with pdpd.static.program_guard(pdpd.static.Program(), pdpd.static.Program()): + data1 = pdpd.static.data( + 'data1', shape=input1.shape, dtype=input1.dtype) + data2 = pdpd.static.data( + 'data2', shape=input2.shape, dtype=input2.dtype) + data3 = pdpd.static.data( + 'data3', shape=input3.shape, dtype=input3.dtype) + + if (axis == None): + out = pdpd.paddle.stack([data1, data2, data3]) + else: + out = pdpd.paddle.stack([data1, data2, data3], axis) + + cpu = pdpd.static.cpu_places(1) + exe = pdpd.static.Executor(cpu[0]) + # startup program will call initializer to initialize the parameters. + exe.run(pdpd.static.default_startup_program()) + + outs = exe.run( + feed={"data1": input1, + "data2": input2, + "data3": input3}, + fetch_list=[out]) + + if (axis == None): + saveModel("stack_test_none_axis", exe, feedkeys=['data1', 'data2', 'data3'], fetchlist=[out], inputs=[ + input1, input2, input3], outputs=[outs[0]], target_dir=sys.argv[1]) + elif (axis < 0): + saveModel("stack_test_neg_axis", exe, feedkeys=['data1', 'data2', 'data3'], fetchlist=[out], inputs=[ + input1, input2, input3], outputs=[outs[0]], target_dir=sys.argv[1]) + else: + saveModel("stack_test_" + str(input1.dtype), exe, feedkeys=['data1', 'data2', 'data3'], fetchlist=[out], inputs=[ + input1, input2, input3], outputs=[outs[0]], target_dir=sys.argv[1]) + + return outs[0] + + +def main(): + in_dtype = np.float32 + axis_num = 1 + input1 = np.random.random([1, 2]).astype(in_dtype) + input2 = np.random.random([1, 2]).astype(in_dtype) + input3 = np.random.random([1, 2]).astype(in_dtype) + stack(axis_num, input1, input2, input3) + + in_dtype = np.int32 + axis_num = 0 + input1 = np.random.random([1, 2]).astype(in_dtype) + input2 = np.random.random([1, 2]).astype(in_dtype) + input3 = np.random.random([1, 2]).astype(in_dtype) + stack(axis_num, input1, input2, input3) + + in_dtype = np.float32 + axis_num = None + input1 = np.random.random([1, 2]).astype(in_dtype) + input2 = np.random.random([1, 2]).astype(in_dtype) + input3 = np.random.random([1, 2]).astype(in_dtype) + stack(axis_num, input1, input2, input3) + + in_dtype = np.float32 + axis_num = -1 + input1 = np.random.random([1, 2]).astype(in_dtype) + input2 = np.random.random([1, 2]).astype(in_dtype) + input3 = np.random.random([1, 2]).astype(in_dtype) + stack(axis_num, input1, input2, input3) + + +if __name__ == "__main__": + main() diff --git a/ngraph/test/layout.cpp b/ngraph/test/layout.cpp index e11a626e98942c..2b3d980dd522ae 100644 --- a/ngraph/test/layout.cpp +++ b/ngraph/test/layout.cpp @@ -48,6 +48,7 @@ TEST(layout, advanced_syntax) { TEST(layout, empty) { Layout l; + EXPECT_TRUE(Layout("").empty()); EXPECT_FALSE(layout::has_batch(l)); EXPECT_THROW(layout::batch(l), ov::AssertFailure); EXPECT_FALSE(layout::has_channels(l)); @@ -132,7 +133,6 @@ TEST(layout, dims_valid_syntax) { TEST(layout, dims_wrong_syntax) { Layout l; - EXPECT_THROW(l = "", ov::AssertFailure); EXPECT_THROW(l = " ", ov::AssertFailure); std::string invalidChars = "`~!@#$%^&*()-=+{}\"'><,|"; for (auto c : invalidChars) { diff --git a/ngraph/test/models/onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.prototxt b/ngraph/test/models/onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.prototxt new file mode 100644 index 00000000000000..577926c6d3114f --- /dev/null +++ b/ngraph/test/models/onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.prototxt @@ -0,0 +1,186 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input_ids" + input: "segment_ids" + input: "word_embeddings" + input: "position_embeddings" + input: "segment_embeddings" + input: "gamma" + input: "beta" + input: "mask" + output: "output" + output: "mask_index" + name: "EmbedLayerNormalization_1" + op_type: "EmbedLayerNormalization" + attribute { + name: "epsilon" + f: 9.999999960041972e-13 + type: FLOAT + } + domain: "com.microsoft" + } + name: "graph" + input { + name: "input_ids" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_param: "batch_size" + } + dim { + dim_param: "seq_len" + } + } + } + } + } + input { + name: "segment_ids" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_param: "batch_size" + } + dim { + dim_param: "seq_len" + } + } + } + } + } + input { + name: "word_embeddings" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_param: "word_embed_len" + } + dim { + dim_value: 5 + } + } + } + } + } + input { + name: "position_embeddings" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_param: "pos_embed_len" + } + dim { + dim_value: 5 + } + } + } + } + } + input { + name: "segment_embeddings" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_param: "segment_embed_len" + } + dim { + dim_value: 5 + } + } + } + } + } + input { + name: "gamma" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 5 + } + } + } + } + } + input { + name: "beta" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 5 + } + } + } + } + } + input { + name: "mask" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_param: "batch_size" + } + dim { + dim_param: "seq_len" + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_param: "batch_size" + } + dim { + dim_param: "seq_len" + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "mask_index" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_param: "batch_size" + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/embed_layer_normalization.prototxt b/ngraph/test/models/onnx/embed_layer_normalization.prototxt new file mode 100644 index 00000000000000..1cd1bfcc1b3d70 --- /dev/null +++ b/ngraph/test/models/onnx/embed_layer_normalization.prototxt @@ -0,0 +1,187 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input_ids" + input: "" + input: "word_embeddings" + input: "position_embeddings" + input: "" + input: "gamma" + input: "beta" + output: "output" + name: "EmbedLayerNormalization_1" + op_type: "EmbedLayerNormalization" + attribute { + name: "epsilon" + f: 9.999999960041972e-13 + type: FLOAT + } + domain: "com.microsoft" + } + name: "graph" + initializer { + dims: 10 + dims: 5 + data_type: 1 + name: "word_embeddings" + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + float_data: -0.061639595776796341 + float_data: -0.070428818464279175 + float_data: 0.080737568438053131 + float_data: -0.014098187908530235 + float_data: -0.066207133233547211 + float_data: 0.078362509608268738 + float_data: -0.021088391542434692 + float_data: -0.022340660914778709 + float_data: -0.065533898770809174 + float_data: -0.022695079445838928 + float_data: 0.01550679374486208 + float_data: -0.022843297570943832 + float_data: 0.044251278042793274 + float_data: -0.0071350894868373871 + } + initializer { + dims: 8 + dims: 5 + data_type: 1 + name: "position_embeddings" + float_data: 0.11355137079954147 + float_data: 0.048468157649040222 + float_data: 0.053486518561840057 + float_data: 0.01513370219618082 + float_data: 0.14626613259315491 + float_data: -0.18863441050052643 + float_data: 0.10133393853902817 + float_data: 0.098319537937641144 + float_data: 0.070722959935665131 + float_data: -0.018062451854348183 + float_data: -0.018210677430033684 + float_data: 0.018454158678650856 + float_data: 0.025413623079657555 + float_data: -0.017915787175297737 + float_data: 0.088725067675113678 + float_data: -0.10261145234107971 + float_data: -0.16650274395942688 + float_data: 0.087947741150856018 + float_data: -0.072966478765010834 + float_data: -0.072863951325416565 + float_data: -0.057195741683244705 + float_data: 0.052380021661520004 + float_data: 0.150204136967659 + float_data: 0.036691628396511078 + float_data: -0.055858571082353592 + float_data: 0.013746094889938831 + float_data: -0.041797593235969543 + float_data: 0.036348219960927963 + float_data: 0.032991457730531693 + float_data: -0.031414791941642761 + float_data: -0.026756083592772484 + float_data: -0.077081479132175446 + float_data: 0.039385091513395309 + float_data: -0.028280897065997124 + float_data: -0.039638441056013107 + float_data: 0.1023884043097496 + float_data: -0.038734495639801025 + float_data: 0.034112773835659027 + float_data: -0.024975193664431572 + float_data: -0.061074573546648026 + } + initializer { + dims: 5 + data_type: 1 + name: "gamma" + float_data: 0.037749473005533218 + float_data: -0.10285304486751556 + float_data: -0.030169183388352394 + float_data: -0.02105225995182991 + float_data: 0.11735564470291138 + } + initializer { + dims: 5 + data_type: 1 + name: "beta" + float_data: -0.058927357196807861 + float_data: -0.019592402502894402 + float_data: 0.0062640579417347908 + float_data: -0.010709371417760849 + float_data: -0.010058049112558365 + } + input { + name: "input_ids" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + dim { + dim_value: 5 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding.prototxt b/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding.prototxt new file mode 100644 index 00000000000000..36b7a1deaaa870 --- /dev/null +++ b/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding.prototxt @@ -0,0 +1,239 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input_ids" + input: "segment_ids" + input: "word_embeddings" + input: "position_embeddings" + input: "segment_embeddings" + input: "gamma" + input: "beta" + output: "output" + output: "mask_index" + name: "EmbedLayerNormalization_1" + op_type: "EmbedLayerNormalization" + attribute { + name: "epsilon" + f: 9.999999960041972e-13 + type: FLOAT + } + domain: "com.microsoft" + } + name: "graph" + initializer { + dims: 10 + dims: 5 + data_type: 1 + name: "word_embeddings" + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + float_data: -0.061639595776796341 + float_data: -0.070428818464279175 + float_data: 0.080737568438053131 + float_data: -0.014098187908530235 + float_data: -0.066207133233547211 + float_data: 0.078362509608268738 + float_data: -0.021088391542434692 + float_data: -0.022340660914778709 + float_data: -0.065533898770809174 + float_data: -0.022695079445838928 + float_data: 0.01550679374486208 + float_data: -0.022843297570943832 + float_data: 0.044251278042793274 + float_data: -0.0071350894868373871 + } + initializer { + dims: 8 + dims: 5 + data_type: 1 + name: "position_embeddings" + float_data: 0.11355137079954147 + float_data: 0.048468157649040222 + float_data: 0.053486518561840057 + float_data: 0.01513370219618082 + float_data: 0.14626613259315491 + float_data: -0.18863441050052643 + float_data: 0.10133393853902817 + float_data: 0.098319537937641144 + float_data: 0.070722959935665131 + float_data: -0.018062451854348183 + float_data: -0.018210677430033684 + float_data: 0.018454158678650856 + float_data: 0.025413623079657555 + float_data: -0.017915787175297737 + float_data: 0.088725067675113678 + float_data: -0.10261145234107971 + float_data: -0.16650274395942688 + float_data: 0.087947741150856018 + float_data: -0.072966478765010834 + float_data: -0.072863951325416565 + float_data: -0.057195741683244705 + float_data: 0.052380021661520004 + float_data: 0.150204136967659 + float_data: 0.036691628396511078 + float_data: -0.055858571082353592 + float_data: 0.013746094889938831 + float_data: -0.041797593235969543 + float_data: 0.036348219960927963 + float_data: 0.032991457730531693 + float_data: -0.031414791941642761 + float_data: -0.026756083592772484 + float_data: -0.077081479132175446 + float_data: 0.039385091513395309 + float_data: -0.028280897065997124 + float_data: -0.039638441056013107 + float_data: 0.1023884043097496 + float_data: -0.038734495639801025 + float_data: 0.034112773835659027 + float_data: -0.024975193664431572 + float_data: -0.061074573546648026 + } + initializer { + dims: 3 + dims: 5 + data_type: 1 + name: "segment_embeddings" + float_data: -0.027431340888142586 + float_data: -0.01666862890124321 + float_data: -0.052050836384296417 + float_data: -0.074926018714904785 + float_data: 0.0045464779250323772 + float_data: 0.054949179291725159 + float_data: 0.046781986951828003 + float_data: 0.065758734941482544 + float_data: -0.036851223558187485 + float_data: -0.041801471263170242 + float_data: 0.025191636756062508 + float_data: -0.046526473015546799 + float_data: 0.027152393013238907 + float_data: 0.026372035965323448 + float_data: -0.020972840487957001 + } + initializer { + dims: 5 + data_type: 1 + name: "gamma" + float_data: 0.037749473005533218 + float_data: -0.10285304486751556 + float_data: -0.030169183388352394 + float_data: -0.02105225995182991 + float_data: 0.11735564470291138 + } + initializer { + dims: 5 + data_type: 1 + name: "beta" + float_data: -0.058927357196807861 + float_data: -0.019592402502894402 + float_data: 0.0062640579417347908 + float_data: -0.010709371417760849 + float_data: -0.010058049112558365 + } + input { + name: "input_ids" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + } + } + } + } + input { + name: "segment_ids" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "mask_index" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 3 + } + } + } + } + } + +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding_and_mask.prototxt b/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding_and_mask.prototxt new file mode 100644 index 00000000000000..1181538c82ccaa --- /dev/null +++ b/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding_and_mask.prototxt @@ -0,0 +1,256 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input_ids" + input: "segment_ids" + input: "word_embeddings" + input: "position_embeddings" + input: "segment_embeddings" + input: "gamma" + input: "beta" + input: "mask" + output: "output" + output: "mask_index" + name: "EmbedLayerNormalization_1" + op_type: "EmbedLayerNormalization" + attribute { + name: "epsilon" + f: 9.999999960041972e-13 + type: FLOAT + } + domain: "com.microsoft" + } + name: "graph" + initializer { + dims: 10 + dims: 5 + data_type: 1 + name: "word_embeddings" + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + float_data: -0.061639595776796341 + float_data: -0.070428818464279175 + float_data: 0.080737568438053131 + float_data: -0.014098187908530235 + float_data: -0.066207133233547211 + float_data: 0.078362509608268738 + float_data: -0.021088391542434692 + float_data: -0.022340660914778709 + float_data: -0.065533898770809174 + float_data: -0.022695079445838928 + float_data: 0.01550679374486208 + float_data: -0.022843297570943832 + float_data: 0.044251278042793274 + float_data: -0.0071350894868373871 + } + initializer { + dims: 8 + dims: 5 + data_type: 1 + name: "position_embeddings" + float_data: 0.11355137079954147 + float_data: 0.048468157649040222 + float_data: 0.053486518561840057 + float_data: 0.01513370219618082 + float_data: 0.14626613259315491 + float_data: -0.18863441050052643 + float_data: 0.10133393853902817 + float_data: 0.098319537937641144 + float_data: 0.070722959935665131 + float_data: -0.018062451854348183 + float_data: -0.018210677430033684 + float_data: 0.018454158678650856 + float_data: 0.025413623079657555 + float_data: -0.017915787175297737 + float_data: 0.088725067675113678 + float_data: -0.10261145234107971 + float_data: -0.16650274395942688 + float_data: 0.087947741150856018 + float_data: -0.072966478765010834 + float_data: -0.072863951325416565 + float_data: -0.057195741683244705 + float_data: 0.052380021661520004 + float_data: 0.150204136967659 + float_data: 0.036691628396511078 + float_data: -0.055858571082353592 + float_data: 0.013746094889938831 + float_data: -0.041797593235969543 + float_data: 0.036348219960927963 + float_data: 0.032991457730531693 + float_data: -0.031414791941642761 + float_data: -0.026756083592772484 + float_data: -0.077081479132175446 + float_data: 0.039385091513395309 + float_data: -0.028280897065997124 + float_data: -0.039638441056013107 + float_data: 0.1023884043097496 + float_data: -0.038734495639801025 + float_data: 0.034112773835659027 + float_data: -0.024975193664431572 + float_data: -0.061074573546648026 + } + initializer { + dims: 3 + dims: 5 + data_type: 1 + name: "segment_embeddings" + float_data: -0.027431340888142586 + float_data: -0.01666862890124321 + float_data: -0.052050836384296417 + float_data: -0.074926018714904785 + float_data: 0.0045464779250323772 + float_data: 0.054949179291725159 + float_data: 0.046781986951828003 + float_data: 0.065758734941482544 + float_data: -0.036851223558187485 + float_data: -0.041801471263170242 + float_data: 0.025191636756062508 + float_data: -0.046526473015546799 + float_data: 0.027152393013238907 + float_data: 0.026372035965323448 + float_data: -0.020972840487957001 + } + initializer { + dims: 5 + data_type: 1 + name: "gamma" + float_data: 0.037749473005533218 + float_data: -0.10285304486751556 + float_data: -0.030169183388352394 + float_data: -0.02105225995182991 + float_data: 0.11735564470291138 + } + initializer { + dims: 5 + data_type: 1 + name: "beta" + float_data: -0.058927357196807861 + float_data: -0.019592402502894402 + float_data: 0.0062640579417347908 + float_data: -0.010709371417760849 + float_data: -0.010058049112558365 + } + input { + name: "input_ids" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + } + } + } + } + input { + name: "segment_ids" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + } + } + } + } + input { + name: "mask" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + } + } + } + } + + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 8 + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "mask_index" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/matmul_integer.prototxt b/ngraph/test/models/onnx/matmul_integer.prototxt new file mode 100644 index 00000000000000..813f4920ce7db0 --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer.prototxt @@ -0,0 +1,91 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + input: "a_zero_point" + input: "b_zero_point" + output: "Y" + op_type: "MatMulInteger" + } + name: "MatMulInt" + input { + name: "A" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 2 + } + } + } + } + } + input { + name: "a_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "b_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + } + } + } + } +} +opset_import { + domain: "" + version: 13 +} diff --git a/ngraph/test/models/onnx/matmul_integer_2d_x_3d.prototxt b/ngraph/test/models/onnx/matmul_integer_2d_x_3d.prototxt new file mode 100644 index 00000000000000..dcac5092c5a206 --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_2d_x_3d.prototxt @@ -0,0 +1,83 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + input: "a_zero_point" + output: "Y" + op_type: "MatMulInteger" + } + name: "MatMulInt" + input { + name: "A" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "a_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/models/onnx/matmul_integer_3d.prototxt b/ngraph/test/models/onnx/matmul_integer_3d.prototxt new file mode 100644 index 00000000000000..e640c7714d4a86 --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_3d.prototxt @@ -0,0 +1,106 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + input: "a_zero_point" + input: "b_zero_point" + output: "Y" + op_type: "MatMulInteger" + } + name: "MatMulInt" + input { + name: "A" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "a_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "b_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/models/onnx/matmul_integer_3d_x_2d.prototxt b/ngraph/test/models/onnx/matmul_integer_3d_x_2d.prototxt new file mode 100644 index 00000000000000..e359be5d04d20e --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_3d_x_2d.prototxt @@ -0,0 +1,97 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + input: "a_zero_point" + input: "b_zero_point" + output: "Y" + op_type: "MatMulInteger" + } + name: "MatMulInt" + input { + name: "A" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "a_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "b_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/models/onnx/matmul_integer_4d.prototxt b/ngraph/test/models/onnx/matmul_integer_4d.prototxt new file mode 100644 index 00000000000000..a1c29d86d62a21 --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_4d.prototxt @@ -0,0 +1,109 @@ +ir_version: 5 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + input: "a_zero_point" + input: "b_zero_point" + output: "Y" + op_type: "MatMulInteger" + } + name: "test" + input { + name: "A" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "a_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "b_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/models/onnx/matmul_integer_4d_no_zero_point.prototxt b/ngraph/test/models/onnx/matmul_integer_4d_no_zero_point.prototxt new file mode 100644 index 00000000000000..6f14518837ba1f --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_4d_no_zero_point.prototxt @@ -0,0 +1,81 @@ +ir_version: 5 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + output: "Y" + op_type: "MatMulInteger" + } + name: "test" + input { + name: "A" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/models/onnx/matmul_integer_int8.prototxt b/ngraph/test/models/onnx/matmul_integer_int8.prototxt new file mode 100644 index 00000000000000..e28705d03b8790 --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_int8.prototxt @@ -0,0 +1,91 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + input: "a_zero_point" + input: "b_zero_point" + output: "Y" + op_type: "MatMulInteger" + } + name: "MatMulInt" + input { + name: "A" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "a_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "b_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/models/onnx/matmul_integer_matrix_zero_point.prototxt b/ngraph/test/models/onnx/matmul_integer_matrix_zero_point.prototxt new file mode 100644 index 00000000000000..31bfa2e775da4d --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_matrix_zero_point.prototxt @@ -0,0 +1,127 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + input: "a_zero_point" + input: "b_zero_point" + output: "Y" + op_type: "MatMulInteger" + } + name: "MatMulInt" + input { + name: "A" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "a_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "b_zero_point" + type { + tensor_type { + elem_type: 3 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 1 + } + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/models/onnx/matmul_integer_no_zero_point.prototxt b/ngraph/test/models/onnx/matmul_integer_no_zero_point.prototxt new file mode 100644 index 00000000000000..aefb304823cd3a --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_no_zero_point.prototxt @@ -0,0 +1,63 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + output: "Y" + op_type: "MatMulInteger" + } + name: "MatMulInt" + input { + name: "A" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 5 + } + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/models/onnx/matmul_integer_vectorized_zero_point.prototxt b/ngraph/test/models/onnx/matmul_integer_vectorized_zero_point.prototxt new file mode 100644 index 00000000000000..12974e6b4caf9f --- /dev/null +++ b/ngraph/test/models/onnx/matmul_integer_vectorized_zero_point.prototxt @@ -0,0 +1,91 @@ +ir_version: 7 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "A" + input: "B" + input: "a_zero_point" + input: "b_zero_point" + output: "Y" + op_type: "MatMulInteger" + } + name: "MatMulInt" + input { + name: "A" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } + input { + name: "B" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 5 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "a_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "b_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "Y" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + domain: "" + version: 10 +} diff --git a/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp b/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp index 43f74e64b06703..63611843a27dfd 100644 --- a/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp +++ b/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp @@ -73,7 +73,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta test_case.add_input(input); test_case.add_input(skip); test_case.add_expected_output(expected); - test_case.run(5); + test_case.run_with_tolerance_as_fp(); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta) { @@ -99,7 +99,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta test_case.add_input(input); test_case.add_input(skip); test_case.add_expected_output(expected); - test_case.run(7); + test_case.run_with_tolerance_as_fp(); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) { @@ -125,7 +125,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) { test_case.add_input(input); test_case.add_input(skip); test_case.add_expected_output(expected); - test_case.run(6); + test_case.run_with_tolerance_as_fp(); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes) { @@ -173,5 +173,219 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes) test_case.add_input(Shape{4}, beta); test_case.add_input(Shape{4}, bias); test_case.add_expected_output(Shape{3, 2, 4}, expected); - test_case.run(7); + test_case.run_with_tolerance_as_fp(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization) { + const auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization.onnx")); + + std::vector input_ids = { + 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, + }; + std::vector expected_output = { + -0.06615843, -0.18040463, 0.02199928, 0.01868065, 0.05397778, -0.11761580, -0.09138932, -0.02506775, + -0.02368510, -0.10373901, -0.05551499, -0.20972314, 0.01365213, 0.01132561, -0.08603337, -0.08906764, + 0.09692993, -0.04444099, -0.02037602, -0.03453060, -0.10214549, -0.13331436, -0.02665862, -0.01228805, + -0.14232540, -0.07032782, 0.05511986, -0.00120272, -0.04875736, -0.13051267, -0.05709254, 0.17854357, + -0.01759873, -0.01819968, 0.07573269, 0.00557164, 0.06232717, 0.00530490, -0.01565807, -0.14841977, + -0.02299280, 0.02038561, -0.00049481, 0.02575402, 0.10081697, -0.12517214, -0.09316762, -0.00974943, + -0.03093284, -0.06309240, -0.05551499, -0.20972314, 0.01365213, 0.01132561, -0.08603337, -0.06176658, + 0.08304203, -0.05025182, 0.00383657, -0.02288112, -0.11407227, -0.01386134, -0.04411830, -0.00537948, + 0.00164397, -0.03739140, 0.09941526, 0.00333974, -0.04251949, -0.12992151, -0.09509478, -0.11811313, + -0.03307065, -0.00866115, -0.15162414, 0.01106802, 0.06037656, 0.00035292, -0.00223284, -0.11215645, + -0.01390734, 0.07064321, 0.04028325, -0.00290875, 0.12875907, -0.12517214, -0.09316762, -0.00974943, + -0.03093284, -0.06309240, -0.08723789, 0.03130914, 0.03131931, -0.01526242, 0.20811458, -0.05696163, + 0.16304255, -0.02407495, -0.02955675, -0.03086288, -0.08130091, -0.05001551, -0.04875683, 0.00143666, + -0.12153473, -0.00018507, 0.10957482, -0.00416618, -0.01612359, -0.11605026, -0.08593204, 0.09055272, + -0.03054028, -0.03603891, -0.08479506, -0.00034568, 0.03713699, 0.00163411, -0.01738501, -0.18267182, + }; + + auto test_case = test::TestCase(function); + test_case.add_input(input_ids); + test_case.add_expected_output(expected_output); + test_case.run_with_tolerance_as_fp(1e-7f); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization_with_segment_embedding.onnx")); + + std::vector input_ids = { + 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, + }; + std::vector segment_ids = { + 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, + }; + std::vector expected_output = { + -0.06044213, -0.14845914, 0.02457689, 0.02091519, 0.09514004, -0.10280035, -0.02087995, -0.03323204, + -0.02967127, -0.13447416, -0.05191760, -0.16518904, 0.02340531, 0.02176395, 0.04972410, -0.07360736, + 0.12192874, -0.04081530, -0.02338044, -0.05671440, -0.09475864, -0.08944942, -0.03362993, -0.01683486, + -0.16770349, -0.07382569, 0.06230322, 0.02215859, -0.05212611, -0.03934773, -0.04748865, 0.18134241, + -0.01965741, -0.02202452, 0.01973994, 0.01575558, 0.04300199, 0.01436110, -0.00198062, -0.09065692, + -0.02923042, -0.00748686, 0.00717049, 0.02638642, 0.12174864, -0.12973398, -0.11872391, -0.00549398, + -0.02386289, -0.02210563, -0.03590920, -0.13728066, -0.01337939, 0.01538021, -0.14687485, -0.05033565, + 0.03818212, -0.04939338, 0.00961064, -0.07407621, -0.09624685, 0.05594898, -0.04948713, -0.01305631, + -0.03779668, -0.01469170, 0.12346989, 0.02082030, -0.03449103, -0.06029151, -0.09300473, -0.16308543, + -0.02370042, 0.01066893, -0.06523034, 0.00497636, 0.01933458, -0.00900802, 0.00430878, -0.13999483, + -0.02377289, 0.01760014, 0.03896973, 0.00831112, 0.15634246, -0.11109130, -0.11997811, -0.02304414, + -0.01989413, -0.12763791, -0.05698400, 0.17125534, 0.00499324, -0.02953288, 0.09178342, -0.05001877, + 0.16157132, -0.02312993, -0.02932195, -0.04914058, -0.07994118, -0.07199102, -0.04517454, 0.01249476, + -0.07525793, -0.00207180, 0.03993115, -0.01676321, -0.00214832, -0.16074482, -0.05012497, -0.00552153, + -0.04302063, -0.00549224, -0.18399858, -0.00767871, -0.02209404, -0.01383207, -0.00082931, -0.19533031, + }; + + std::vector expected_mask_index = { + 0, + 0, + 0, + }; + + auto test_case = test::TestCase(function); + test_case.add_input(input_ids); + test_case.add_input(segment_ids); + test_case.add_expected_output(expected_output); + test_case.add_expected_output(expected_mask_index); + test_case.run_with_tolerance_as_fp(1e-7); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding_and_mask) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization_with_segment_embedding_and_mask.onnx")); + + std::vector input_ids = { + 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, + }; + std::vector segment_ids = { + 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, + }; + std::vector mask = { + 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, + }; + std::vector expected_output = { + -0.06044213, -0.14845914, 0.02457689, 0.02091519, 0.09514004, -0.10280035, -0.02087995, -0.03323204, + -0.02967127, -0.13447416, -0.05191760, -0.16518904, 0.02340531, 0.02176395, 0.04972410, -0.07360736, + 0.12192874, -0.04081530, -0.02338044, -0.05671440, -0.09475864, -0.08944942, -0.03362993, -0.01683486, + -0.16770349, -0.07382569, 0.06230322, 0.02215859, -0.05212611, -0.03934773, -0.04748865, 0.18134241, + -0.01965741, -0.02202452, 0.01973994, 0.01575558, 0.04300199, 0.01436110, -0.00198062, -0.09065692, + -0.02923042, -0.00748686, 0.00717049, 0.02638642, 0.12174864, -0.12973398, -0.11872391, -0.00549398, + -0.02386289, -0.02210563, -0.03590920, -0.13728066, -0.01337939, 0.01538021, -0.14687485, -0.05033565, + 0.03818212, -0.04939338, 0.00961064, -0.07407621, -0.09624685, 0.05594898, -0.04948713, -0.01305631, + -0.03779668, -0.01469170, 0.12346989, 0.02082030, -0.03449103, -0.06029151, -0.09300473, -0.16308543, + -0.02370042, 0.01066893, -0.06523034, 0.00497636, 0.01933458, -0.00900802, 0.00430878, -0.13999483, + -0.02377289, 0.01760014, 0.03896973, 0.00831112, 0.15634246, -0.11109130, -0.11997811, -0.02304414, + -0.01989413, -0.12763791, -0.05698400, 0.17125534, 0.00499324, -0.02953288, 0.09178342, -0.05001877, + 0.16157132, -0.02312993, -0.02932195, -0.04914058, -0.07994118, -0.07199102, -0.04517454, 0.01249476, + -0.07525793, -0.00207180, 0.03993115, -0.01676321, -0.00214832, -0.16074482, -0.05012497, -0.00552153, + -0.04302063, -0.00549224, -0.18399858, -0.00767871, -0.02209404, -0.01383207, -0.00082931, -0.19533031, + }; + std::vector expected_mask_index = { + 5, + 3, + 4, + }; + + auto test_case = test::TestCase(function); + test_case.add_input(input_ids); + test_case.add_input(segment_ids); + test_case.add_input(mask); + test_case.add_expected_output(expected_output); + test_case.add_expected_output(expected_mask_index); + test_case.run_with_tolerance_as_fp(1e-7); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.onnx")); + + std::vector input_ids = { + 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, + }; + std::vector segment_ids = { + 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, + }; + std::vector word_embeddings = { + 0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407, + 0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763, + 0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499, + 0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352, + 0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760, + 0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412, + }; + std::vector position_embeddings = { + 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.36980811, 0.14644176, 0.56961840, + 0.70373726, 0.28847644, 0.43328807, 0.75610667, 0.39609829, 0.89603841, 0.63892108, 0.89155442, + 0.68005556, 0.44919774, 0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, + 0.31321833, 0.96541619, 0.58846509, 0.65966839, 0.53320622, 0.23053302, 0.39486930, 0.61880857, + 0.47486752, 0.47013220, 0.71607453, 0.28799102, 0.38346222, 0.74916983, 0.87845218, 0.10286336, + }; + std::vector segment_embeddings = { + 0.09237389, + 0.35404667, + 0.55181628, + 0.03362509, + 0.96896178, + 0.32099724, + 0.22126268, + 0.14126390, + 0.09725992, + 0.98404223, + 0.26034093, + 0.53702253, + 0.44792616, + 0.09956909, + 0.35231167, + }; + std::vector gamma = { + 0.46924916, + 0.84114015, + 0.90464777, + 0.03755938, + 0.50831544, + }; + std::vector beta = { + 0.16684751, + 0.77905101, + 0.86493331, + 0.41139671, + 0.13997258, + }; + std::vector mask = { + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, + }; + std::vector expected_output = { + -0.04089922, 0.35108989, 0.30442458, 0.39546335, 1.15422225, 0.10419128, -0.19301927, 0.01070970, + 0.43977541, 0.89119899, -0.51436460, 1.99256825, 1.41077507, 0.38642293, 0.17583044, 0.03320138, + 1.16508031, -0.24356931, 0.47440714, -0.17844005, 0.20463173, 1.90038323, 1.14138567, 0.34504607, + 0.16403235, -0.24976699, 0.29362509, 0.34502214, 0.41751838, 1.09390712, 0.12354189, 1.83025289, + 1.05569196, 0.34413773, 0.35469764, -0.69760042, 0.76338542, 1.75443077, 0.44126555, 0.18181801, + 0.73277575, 0.45443264, 0.17068321, 0.36591727, 0.72869974, -0.56090516, 0.14415455, 1.47314119, + 0.42908576, 0.73084539, -0.22373237, 2.26550221, 0.05606699, 0.39417523, 0.35234636, 0.78569502, + 0.77521765, -0.65131050, 0.40168875, 0.45527256, 0.38715565, 0.98521245, 2.21446753, 0.36345237, + -0.33269632, 0.36558092, 1.36846578, 1.37523413, 0.33698002, 0.28889543, -0.40639281, 1.01643157, + 0.59668219, 0.39197800, 1.03101778, 0.02551098, -0.03612846, -0.01371557, 0.43444607, 0.96746695, + 0.60583955, -0.10362893, 0.40574494, 0.38046724, 0.87445319, -0.00880148, -0.15437943, 0.08118075, + 0.44650543, 0.85956848, -0.27865338, 2.10837507, 0.04798460, 0.43948367, -0.10185169, 0.19978794, + 1.32323360, 1.20525467, 0.44288942, -0.84200430, 0.52563053, 0.69949460, 0.73987913, 0.34668452, + 0.74545687, 0.57696682, 0.22452033, -0.27099937, 0.39649010, 0.87083614, -0.18965788, 0.58206403, + -0.08108193, 0.42067638, 1.05117214, -0.34287399, 0.20424896, 0.27994895, 0.46011117, 0.70890665, + }; + std::vector expected_mask_index = { + 6, + 5, + 5, + }; + + auto test_case = test::TestCase(function); + test_case.add_input(Shape{3, 8}, input_ids); + test_case.add_input(Shape{3, 8}, segment_ids); + test_case.add_input(Shape{10, 5}, word_embeddings); + test_case.add_input(Shape{8, 5}, position_embeddings); + test_case.add_input(Shape{3, 5}, segment_embeddings); + test_case.add_input(Shape{5}, gamma); + test_case.add_input(Shape{5}, beta); + test_case.add_input(Shape{3, 8}, mask); + test_case.add_expected_output(Shape{3, 8, 5}, expected_output); + test_case.add_expected_output(Shape{3}, expected_mask_index); + test_case.run_with_tolerance_as_fp(1e-6); } diff --git a/ngraph/test/onnx/onnx_import_quant.in.cpp b/ngraph/test/onnx/onnx_import_quant.in.cpp index 551952ee090353..049a1712442205 100644 --- a/ngraph/test/onnx/onnx_import_quant.in.cpp +++ b/ngraph/test/onnx/onnx_import_quant.in.cpp @@ -402,6 +402,316 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_onnx_example) { test_case.run(); } +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_2d_simple_zero_point) { + auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{11, 7, 3, + 10, 6, 2, + 9, 5, 1, + 8, 4, 0}); // A + test_case.add_input(std::vector{1, 4, + 2, 5, + 3, 6}); // B + test_case.add_input(std::vector{12}); // a_zero_point + test_case.add_input(std::vector{0}); // b_zero_point + + test_case.add_expected_output({4, 2}, std::vector{-38, -83, + -44, -98, + -50, -113, + -56, -128}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_int8) { + auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_int8.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{-3, 7, 5, -6, + 4, -5, 8, 7}); // A + test_case.add_input(std::vector{ 5, -3, 7, 8, + -6, -8, -3, 6, + 7, 9, 9, -5, + 8, 7, -6, 7}); // B + test_case.add_input(std::vector{5}); // a_zero_point + test_case.add_input(std::vector{5}); // b_zero_point + + test_case.add_expected_output({2, 4}, std::vector{-55, 16, 89, -44, + 122, 154, 68, -39}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_vectorized_zero_point) { + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_vectorized_zero_point.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{11, 22, 33, 44, 55, + 22, 33, 44, 55, 66, + 33, 44, 55, 66, 77, + 44, 55, 66, 77, 88}); // A + test_case.add_input(std::vector{ 13, 1, 3, + 21, 49, 31, + 9, 0, 2, + 107, 7, 94, + 1, 63, 127}); // B + test_case.add_input(std::vector{33, 44, 55, 66}); // a_zero_point + test_case.add_input(std::vector{10, 20, 30}); // b_zero_point + + test_case.add_expected_output({4, 3}, std::vector{682, 902, 3421, + 682, 902, 3421, + 682, 902, 3421, + 682, 902, 3421}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_no_zero_point) { + auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_no_zero_point.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{11, 22, 33, 44, 55, + 22, 33, 44, 55, 66, + 33, 44, 55, 66, 77, + 44, 55, 66, 77, 88}); // A + test_case.add_input(std::vector{ 13, 1, 3, + 21, 49, 31, + 9, 0, 2, + 107, 7, 94, + 1, 63, 127}); // B + + test_case.add_expected_output({4, 3}, std::vector{ 5665, 4862, 11902, + 7326, 6182, 14729, + 8987, 7502, 17556, + 10648, 8822, 20383}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_2d_x_3d) { + auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_2d_x_3d.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{7, -3, 1, 2, + 0, 2, -4, 6}); // A + test_case.add_input(std::vector{1, -13, 10, + 2, -16, 14, + 3, -19, 18, + 4, -22, 22, + + -1, 13, -10, + -2, 16, -14, + -3, 19, -18, + -4, 22, -22}); // B + test_case.add_input(std::vector{-4}); // a_zero_point + + test_case.add_expected_output({2, 2, 3}, std::vector{52, -386, 346, + 56, -368, 344, + + -52, 386, -346, + -56, 368, -344}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_3d_x_2d) { + auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_3d_x_2d.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{-13, 11, -1, -2, + 4, -2, 3, 10, + + 8, -2, 4, 5, + -4, -3, 1, 2}); // A + test_case.add_input(std::vector{ 1, -3, 5, + 7, -2, -10, + -13, 9, 7, + 11, 3, -3}); // B + test_case.add_input(std::vector{4}); // a_zero_point + test_case.add_input(std::vector{-3}); // a_zero_point + + test_case.add_expected_output({2, 2, 3}, std::vector{-32, -89, -235, + 34, 18, 32, + + -30, 0, 74, + -100, -55, -45}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_3d) { + auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_3d.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{125, 135, 145, 155, + 130, 140, 150, 160, + + 125, 135, 145, 155, + 130, 140, 150, 160}); // A + test_case.add_input(std::vector{-10, -5, 0, 5, + -5, 0, 5, 10, + -5, -4, -3, -2, + -1, 0, 1, 2, + + 10, 5, 0, -5, + 5, 0, -5, -10, + 5, 4, 3, 2, + 1, 0, -1, -2}); // B + test_case.add_input(std::vector{150}); // a_zero_point + test_case.add_input(std::vector{5, 10, 15, 20, + -5, -10, -15, -20}); // b_zero_point + + test_case.add_expected_output({2, 2, 4}, std::vector{545, 545, 545, 545, + 340, 300, 260, 220, + + -545, -545, -545, -545, + -340, -300, -260, -220}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_4d) { + auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_4d.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + + 12, 13, 14, + 15, 16, 17, + 18, 19, 20, + 21, 22, 23}); // A + test_case.add_input(std::vector{0, 1, 2, + 3, 4, 5, + 6, 7, 8, + 9, 10, 11, + + 12, 13, 14, + 15, 16, 17, + 18, 19, 20, + 21, 22, 23}); // B + test_case.add_input(std::vector{0}); // a_zero_point + test_case.add_input(std::vector{0}); // b_zero_point + + test_case.add_expected_output({1, 2, 3, 3}, std::vector {42, 48, 54, + 114, 136, 158, + 186, 224, 262, + + 906, 960, 1014, + 1170, 1240, 1310, + 1434, 1520, 1606}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_4d_zero_point) { + auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_4d.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + + 12, 13, 14, 15, + 16, 17, 18, 19, + 20, 21, 22, 23}); // A + test_case.add_input(std::vector{0, 1, 2, + 3, 4, 5, + 6, 7, 8, + 9, 10, 11, + + 12, 13, 14, + 15, 16, 17, + 18, 19, 20, + 21, 22, 23}); // B + test_case.add_input(std::vector{1}); // a_zero_point + test_case.add_input(std::vector{1}); // b_zero_point + + test_case.add_expected_output({1, 2, 3, 3}, std::vector{22, 24, 26, + 78, 96, 114, + 134, 168, 202, + + 790, 840, 890, + 1038, 1104, 1170, + 1286, 1368, 1450}); // Y + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_integer_matrix_zero_point) { + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_integer_matrix_zero_point.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{0, 1, 2, 3, + 4, 5, 6, 7, + + 8, 9, 10, 11, + 12, 13, 14, 15}); // A + test_case.add_input(std::vector{0, 1, 2, + 3, 4, 5, + 6, 7, 8, + 9, 10, 11, + + 12, 13, 14, + 15, 16, 17, + 18, 19, 20, + 21, 22, 23}); // B + test_case.add_input(std::vector{1, + 2, + + 3, + 4}); // a_zero_point + test_case.add_input(std::vector{1, 2, 3, + + 4, 5, 6}); // b_zero_point + + test_case.add_expected_output({1, 2, 2, 3}, std::vector{22, 22, 22, + 64, 64, 64, + + 340, 340, 340, + 490, 490, 490}); // Y + // clang-format on + test_case.run(); +} + NGRAPH_TEST(${BACKEND_NAME}, onnx_model_qlinear_matmul_3d) { auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_matmul_3d.onnx")); diff --git a/ngraph/test/op_eval/if_eval.cpp b/ngraph/test/op_eval/if_eval.cpp deleted file mode 100644 index 41a1cb9424f30d..00000000000000 --- a/ngraph/test/op_eval/if_eval.cpp +++ /dev/null @@ -1,368 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include - -#include "gtest/gtest.h" -#include "ngraph/opsets/opset1.hpp" -#include "ngraph/opsets/opset5.hpp" -#include "ngraph/opsets/opset8.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" -#include "runtime/backend.hpp" -#include "util/test_tools.hpp" - -using namespace std; -using namespace ngraph; - -TEST(op_eval, if_condition_const) { - auto X = make_shared(element::f32, Shape{1, 2, 2}); - auto Y = make_shared(element::f32, Shape{1, 2, 2}); - auto cond = std::make_shared(element::boolean, Shape{1}, true); - auto cond2 = std::make_shared(element::boolean, Shape{1}, false); - auto Xt = make_shared(element::f32, PartialShape::dynamic()); - auto Yt = make_shared(element::f32, PartialShape::dynamic()); - auto Xe = make_shared(element::f32, PartialShape::dynamic()); - auto Ye = make_shared(element::f32, PartialShape::dynamic()); - auto then_op = std::make_shared(Xt, Yt); - auto res0 = make_shared(then_op); - auto res1 = make_shared(Xe); - auto then_body = make_shared(OutputVector{res0}, ParameterVector{Xt, Yt}); - auto else_body = make_shared(OutputVector{res1}, ParameterVector{Xe}); - auto if_op = make_shared(cond); - if_op->set_then_body(then_body); - if_op->set_else_body(else_body); - if_op->set_input(X, Xt, Xe); - if_op->set_input(Y, Yt, nullptr); - if_op->set_output(res0, res1); - if_op->validate_and_infer_types(); - auto if_op2 = if_op->clone_with_new_inputs(OutputVector{cond2, X, Y}); - std::vector X_v{1.0, 1.0, 1.0, 1.0}; - std::vector Y_v{2.0, 2.0, 2.0, 2.0}; - auto fun = make_shared(OutputVector{if_op}, ParameterVector{X, Y}); - auto fun2 = make_shared(OutputVector{if_op2}, ParameterVector{X, Y}); - auto result = make_shared(); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{1, 2, 2}, Y_v)})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({1, 2, 2})}); - auto result_data = read_vector(result); - std::vector expected_results{2.0, 2.0, 2.0, 2.0}; - for (auto i = 0; i < expected_results.size(); i++) - EXPECT_NEAR(result_data[i], expected_results[i], 0.000001); - - auto result1 = make_shared(); - ASSERT_TRUE(fun2->evaluate({result1}, - {make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{1, 2, 2}, Y_v)})); - EXPECT_EQ(result1->get_element_type(), element::f32); - EXPECT_EQ(result1->get_shape(), Shape{std::vector({1, 2, 2})}); - auto result_data1 = read_vector(result1); - for (auto i = 0; i < expected_results.size(); i++) - EXPECT_NEAR(result_data1[i], X_v[i], 0.000001); -} - -TEST(op_eval, if_condition_non_const) { - auto X = make_shared(element::f32, Shape{1, 2, 2}); - auto Y = make_shared(element::f32, Shape{1, 2, 2}); - auto cond = make_shared(element::boolean, Shape{1}); - // Set up the cell body, a function from (Xi, Yi) -> (Zo) - // Body parameters - auto Xt = make_shared(element::f32, PartialShape::dynamic()); - auto Yt = make_shared(element::f32, PartialShape::dynamic()); - auto Xe = make_shared(element::f32, PartialShape::dynamic()); - auto Ye = make_shared(element::f32, PartialShape::dynamic()); - // Body - auto then_op = std::make_shared(Xt, Yt); - auto else_op = std::make_shared(Xe, Ye); - auto then_op_result = make_shared(then_op); - auto else_op_result = make_shared(else_op); - auto then_body = make_shared(OutputVector{then_op_result}, ParameterVector{Xt, Yt}); - auto else_body = make_shared(OutputVector{else_op_result}, ParameterVector{Xe, Ye}); - auto if_op = make_shared(cond); - if_op->set_then_body(then_body); - if_op->set_else_body(else_body); - if_op->set_input(X, Xt, Xe); - if_op->set_input(Y, Yt, Ye); - if_op->set_output(then_op_result, else_op_result); - if_op->validate_and_infer_types(); - std::vector X_v{1.0, 2.0, 3.0, 4.0}; - std::vector Y_v{2.0, 1.0, 2.0, 3.0}; - auto fun = make_shared(OutputVector{if_op}, ParameterVector{cond, X, Y}); - auto result = make_shared(); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{1}, {true}), - make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{1, 2, 2}, Y_v)})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({1, 2, 2})}); - auto result_data = read_vector(result); - std::vector expected_results{2.0, 2.0, 6.0, 12.0}; - for (auto i = 0; i < expected_results.size(); i++) - EXPECT_NEAR(result_data[i], expected_results[i], 0.000001); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{1}, {false}), - make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{1, 2, 2}, Y_v)})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({1, 2, 2})}); - result_data = read_vector(result); - expected_results = {3.0, 3.0, 5.0, 7.0}; - - for (auto i = 0; i < expected_results.size(); i++) - EXPECT_NEAR(result_data[i], expected_results[i], 0.000001); -} - -TEST(op_eval, if_free_sample) { - auto cond = make_shared(element::boolean, Shape{1}); - auto A = std::make_shared(element::f32, Shape{1}, 8.0); - auto B = std::make_shared(element::f32, Shape{1}, 2.0); - auto A_res = std::make_shared(A); - auto B_res = std::make_shared(B); - auto then_body = make_shared(OutputVector{A_res}, ParameterVector{}); - auto else_body = make_shared(OutputVector{B_res}, ParameterVector{}); - auto if_op = make_shared(cond); - if_op->set_then_body(then_body); - if_op->set_else_body(else_body); - auto res = if_op->set_output(A_res, B_res); - auto fun = make_shared(OutputVector{res}, ParameterVector{cond}); - fun->validate_nodes_and_infer_types(); - auto result1 = make_shared(), result2 = make_shared(); - ASSERT_TRUE(fun->evaluate({result1}, {make_host_tensor(Shape{1}, {true})})); - ASSERT_TRUE(fun->evaluate({result2}, {make_host_tensor(Shape{1}, {false})})); - auto result_data1 = read_vector(result1); - auto result_data2 = read_vector(result2); - EXPECT_EQ(result1->get_element_type(), element::f32); - EXPECT_EQ(result1->get_shape(), Shape{std::vector({1})}); - EXPECT_EQ(result2->get_element_type(), element::f32); - EXPECT_EQ(result2->get_shape(), Shape{std::vector({1})}); - EXPECT_NEAR(result_data1[0], 8.0, 0.000001); - EXPECT_NEAR(result_data2[0], 2.0, 0.000001); -} - -TEST(op_eval, if_constant_folding) { - auto cond = std::make_shared(element::boolean, Shape{1}, false); - auto A1 = std::make_shared(element::f32, Shape{1}, 37.0); - auto A2 = std::make_shared(element::f32, Shape{1}, 45.0); - auto B1 = std::make_shared(element::f32, Shape{1}, 10.0); - auto B2 = std::make_shared(element::f32, Shape{1}, 3.0); - auto Xt = make_shared(element::f32, PartialShape::dynamic()); - auto Yt = make_shared(element::f32, PartialShape::dynamic()); - auto Xe = make_shared(element::f32, PartialShape::dynamic()); - auto Ye = make_shared(element::f32, PartialShape::dynamic()); - auto a_add = std::make_shared(Xt, Yt); - auto b_pow = std::make_shared(Xe, Ye); - auto then_res = std::make_shared(a_add); - auto then_body = make_shared(OutputVector{then_res}, ParameterVector{Xt, Yt}); - auto else_res = std::make_shared(b_pow); - auto else_body = make_shared(OutputVector{else_res}, ParameterVector{Xe, Ye}); - auto if_op = make_shared(cond); - if_op->set_then_body(then_body); - if_op->set_else_body(else_body); - if_op->set_input(A1, Xt, nullptr); - if_op->set_input(A2, Yt, nullptr); - if_op->set_input(B1, nullptr, Xe); - if_op->set_input(B2, nullptr, Ye); - if_op->set_output(then_res, else_res); - - auto fun = make_shared(OutputVector{if_op}, ParameterVector{}); - fun->validate_nodes_and_infer_types(); - ngraph::pass::ConstantFolding().run_on_function(fun); - auto results = fun->get_results(); - EXPECT_EQ(results.size(), 1); - auto result = results[0]; - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{1}); - const auto& cond_value = get_constant_from_source(result); - auto val = cond_value->cast_vector(); - EXPECT_NEAR(val[0], 1000.0, 0.000001); -} - -TEST(op_eval, if_dynamism) { - auto X = make_shared(element::f32, Shape{1, 2, 2}); - auto Y = make_shared(element::f32, Shape{4, 2, 2}); - auto Z = make_shared(element::f32, Shape{8, 8, 8}); - auto cond = make_shared(element::boolean, Shape{1}); - // Set up the cell body, a function from (Xi, Yi) -> (Zo) - // Body parameters - auto Xt = make_shared(element::f32, PartialShape::dynamic()); - auto Yt = make_shared(element::f32, PartialShape::dynamic()); - auto Xe = make_shared(element::f32, PartialShape::dynamic()); - auto Ze = make_shared(element::f32, PartialShape::dynamic()); - // Body - auto then_op = std::make_shared(Xt, Xt); - auto else_op = std::make_shared(Xe, Xe); - auto then_op_result1 = make_shared(then_op); - auto then_op_result2 = make_shared(Yt); - auto else_op_result1 = make_shared(else_op); - auto else_op_result2 = make_shared(Ze); - auto then_body = - make_shared(OutputVector{then_op_result1, then_op_result2}, ParameterVector{Xt, Yt}); - auto else_body = - make_shared(OutputVector{else_op_result1, else_op_result2}, ParameterVector{Xe, Ze}); - auto if_op = make_shared(cond); - if_op->set_then_body(then_body); - if_op->set_else_body(else_body); - if_op->set_input(X, Xt, Xe); - if_op->set_input(Y, Yt, nullptr); - if_op->set_input(Z, nullptr, Ze); - auto res1 = if_op->set_output(then_op_result1, else_op_result1); - auto res2 = if_op->set_output(then_op_result2, else_op_result2); - auto result_if1 = make_shared(res1); - auto result_if2 = make_shared(res2); - if_op->validate_and_infer_types(); - std::vector X_v{1.0, 2.0, 3.0, 4.0}; - std::vector Y_v, Z_v; - for (auto c_ind = 0; c_ind < 4; ++c_ind) { - for (auto d_ind = 0; d_ind < 4; ++d_ind) { - Y_v.push_back(static_cast(c_ind * d_ind)); - } - } - for (auto c_ind = 0; c_ind < 8; ++c_ind) { - for (auto d_ind = 0; d_ind < 64; ++d_ind) { - Z_v.push_back(static_cast(c_ind * d_ind)); - } - } - auto fun = make_shared(OutputVector{result_if1, result_if2}, ParameterVector{cond, X, Y, Z}); - auto result1 = make_shared(); - auto result2 = make_shared(); - ASSERT_TRUE(fun->evaluate({result1, result2}, - {make_host_tensor(Shape{1}, {true}), - make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{4, 2, 2}, Y_v), - make_host_tensor(Shape{8, 8, 8}, Z_v)})); - EXPECT_EQ(result1->get_element_type(), element::f32); - EXPECT_EQ(result1->get_shape(), Shape{std::vector({1, 2, 2})}); - auto result1_data = read_vector(result1); - std::vector expected_results1{1.0, 4.0, 9.0, 16.0}; - for (auto i = 0; i < expected_results1.size(); i++) - EXPECT_NEAR(result1_data[i], expected_results1[i], 0.000001); - EXPECT_EQ(result2->get_element_type(), element::f32); - EXPECT_EQ(result2->get_shape(), Shape{std::vector({4, 2, 2})}); - auto result2_data = read_vector(result2); - for (auto i = 0; i < Y_v.size(); i++) - EXPECT_NEAR(result2_data[i], Y_v[i], 0.000001); - auto result3 = make_shared(); - auto result4 = make_shared(); - ASSERT_TRUE(fun->evaluate({result3, result4}, - {make_host_tensor(Shape{1}, {false}), - make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{4, 2, 2}, Y_v), - make_host_tensor(Shape{8, 8, 8}, Z_v)})); - EXPECT_EQ(result3->get_element_type(), element::f32); - EXPECT_EQ(result3->get_shape(), Shape{std::vector({1, 2, 2})}); - auto result3_data = read_vector(result3); - std::vector expected_results2{2.0, 4.0, 6.0, 8.0}; - for (auto i = 0; i < expected_results2.size(); i++) - EXPECT_NEAR(result3_data[i], expected_results2[i], 0.000001); - EXPECT_EQ(result4->get_element_type(), element::f32); - EXPECT_EQ(result4->get_shape(), Shape{std::vector({8, 8, 8})}); - auto result4_data = read_vector(result4); - for (auto i = 0; i < Z_v.size(); i++) - EXPECT_NEAR(result4_data[i], Z_v[i], 0.000001); -} - -TEST(op_eval, if_condition_non_const_scalar) { - auto X = make_shared(element::f32, Shape{1, 2, 2}); - auto Y = make_shared(element::f32, Shape{1, 2, 2}); - auto cond = make_shared(element::boolean, Shape{}); - // Set up the cell body, a function from (Xi, Yi) -> (Zo) - // Body parameters - auto Xt = make_shared(element::f32, PartialShape::dynamic()); - auto Yt = make_shared(element::f32, PartialShape::dynamic()); - auto Xe = make_shared(element::f32, PartialShape::dynamic()); - auto Ye = make_shared(element::f32, PartialShape::dynamic()); - // Body - auto then_op = std::make_shared(Xt, Yt); - auto else_op = std::make_shared(Xe, Ye); - auto then_op_result = make_shared(then_op); - auto else_op_result = make_shared(else_op); - auto then_body = make_shared(OutputVector{then_op_result}, ParameterVector{Xt, Yt}); - auto else_body = make_shared(OutputVector{else_op_result}, ParameterVector{Xe, Ye}); - auto if_op = make_shared(cond); - if_op->set_then_body(then_body); - if_op->set_else_body(else_body); - if_op->set_input(X, Xt, Xe); - if_op->set_input(Y, Yt, Ye); - if_op->set_output(then_op_result, else_op_result); - if_op->validate_and_infer_types(); - std::vector X_v{1.0, 2.0, 3.0, 4.0}; - std::vector Y_v{2.0, 1.0, 2.0, 3.0}; - auto fun = make_shared(OutputVector{if_op}, ParameterVector{cond, X, Y}); - auto result = make_shared(); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{1}, {true}), - make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{1, 2, 2}, Y_v)})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({1, 2, 2})}); - auto result_data = read_vector(result); - std::vector expected_results{2.0, 2.0, 6.0, 12.0}; - for (auto i = 0; i < expected_results.size(); i++) - EXPECT_NEAR(result_data[i], expected_results[i], 0.000001); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{1}, {false}), - make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{1, 2, 2}, Y_v)})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({1, 2, 2})}); - result_data = read_vector(result); - expected_results = {3.0, 3.0, 5.0, 7.0}; - - for (auto i = 0; i < expected_results.size(); i++) - EXPECT_NEAR(result_data[i], expected_results[i], 0.000001); -} -TEST(op_eval, if_condition_is_dynamic) { - auto X = make_shared(element::f32, Shape{1, 2, 2}); - auto Y = make_shared(element::f32, Shape{1, 2, 2}); - auto cond = make_shared(element::boolean, PartialShape{Dimension::dynamic()}); - // Set up the cell body, a function from (Xi, Yi) -> (Zo) - // Body parameters - auto Xt = make_shared(element::f32, PartialShape::dynamic()); - auto Yt = make_shared(element::f32, PartialShape::dynamic()); - auto Xe = make_shared(element::f32, PartialShape::dynamic()); - auto Ye = make_shared(element::f32, PartialShape::dynamic()); - // Body - auto then_op = std::make_shared(Xt, Yt); - auto else_op = std::make_shared(Xe, Ye); - auto then_op_result = make_shared(then_op); - auto else_op_result = make_shared(else_op); - auto then_body = make_shared(OutputVector{then_op_result}, ParameterVector{Xt, Yt}); - auto else_body = make_shared(OutputVector{else_op_result}, ParameterVector{Xe, Ye}); - auto if_op = make_shared(cond); - if_op->set_then_body(then_body); - if_op->set_else_body(else_body); - if_op->set_input(X, Xt, Xe); - if_op->set_input(Y, Yt, Ye); - if_op->set_output(then_op_result, else_op_result); - if_op->validate_and_infer_types(); - std::vector X_v{1.0, 2.0, 3.0, 4.0}; - std::vector Y_v{2.0, 1.0, 2.0, 3.0}; - auto fun = make_shared(OutputVector{if_op}, ParameterVector{cond, X, Y}); - auto result = make_shared(); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{1}, {true}), - make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{1, 2, 2}, Y_v)})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({1, 2, 2})}); - auto result_data = read_vector(result); - std::vector expected_results{2.0, 2.0, 6.0, 12.0}; - for (auto i = 0; i < expected_results.size(); i++) - EXPECT_NEAR(result_data[i], expected_results[i], 0.000001); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{1}, {false}), - make_host_tensor(Shape{1, 2, 2}, X_v), - make_host_tensor(Shape{1, 2, 2}, Y_v)})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({1, 2, 2})}); - result_data = read_vector(result); - expected_results = {3.0, 3.0, 5.0, 7.0}; - - for (auto i = 0; i < expected_results.size(); i++) - EXPECT_NEAR(result_data[i], expected_results[i], 0.000001); -} \ No newline at end of file diff --git a/ngraph/test/preprocess.cpp b/ngraph/test/preprocess.cpp index 94d6863eeda590..227d9fab2d3329 100644 --- a/ngraph/test/preprocess.cpp +++ b/ngraph/test/preprocess.cpp @@ -18,8 +18,11 @@ static std::shared_ptr create_simple_function(element::Type type, cons auto data1 = std::make_shared(type, shape); data1->set_friendly_name("input1"); data1->get_output_tensor(0).set_names({"tensor_input1"}); - auto res = std::make_shared(data1); - res->set_friendly_name("Result"); + auto op = std::make_shared(data1); + op->set_friendly_name("Relu"); + auto res = std::make_shared(op); + res->set_friendly_name("Result1"); + res->get_output_tensor(0).set_names({"tensor_output1"}); return std::make_shared(ResultVector{res}, ParameterVector{data1}); } @@ -27,13 +30,19 @@ static std::shared_ptr create_2inputs(element::Type type, const Partia auto data1 = std::make_shared(type, shape); data1->set_friendly_name("input1"); data1->get_output_tensor(0).set_names({"tensor_input1"}); + auto op1 = std::make_shared(data1); + op1->set_friendly_name("Relu1"); auto data2 = std::make_shared(type, shape); data2->set_friendly_name("input2"); - data1->get_output_tensor(0).set_names({"tensor_input2"}); - auto res1 = std::make_shared(data1); + data2->get_output_tensor(0).set_names({"tensor_input2"}); + auto op2 = std::make_shared(data2); + op2->set_friendly_name("Relu2"); + auto res1 = std::make_shared(op1); res1->set_friendly_name("Result1"); - auto res2 = std::make_shared(data2); + res1->get_output_tensor(0).set_names({"tensor_output1"}); + auto res2 = std::make_shared(op2); res2->set_friendly_name("Result2"); + res2->get_output_tensor(0).set_names({"tensor_output2"}); return std::make_shared(ResultVector{res1, res2}, ParameterVector{data1, data2}); } @@ -57,6 +66,13 @@ TEST(pre_post_process, convert_element_type_and_scale) { EXPECT_EQ(f->get_output_element_type(0), element::i8); } +TEST(pre_post_process, empty_preprocess) { + auto f = create_simple_function(element::i8, Shape{1, 3, 2, 2}); + f = PrePostProcessor().input(InputInfo().tensor(InputTensorInfo().set_element_type(element::i8))).build(f); + EXPECT_EQ(f->get_parameters().front()->get_element_type(), element::i8); + EXPECT_EQ(f->get_output_element_type(0), element::i8); +} + TEST(pre_post_process, convert_element_type_from_unknown) { auto f = create_simple_function(element::i32, Shape{1, 3, 224, 224}); ASSERT_THROW( @@ -107,6 +123,265 @@ TEST(pre_post_process, tensor_element_type_and_scale) { EXPECT_EQ(f->get_parameters().front()->get_layout(), Layout()); } +TEST(pre_post_process, convert_color_nv12_rgb_single) { + auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 2, 2, 3}); + auto name = f->get_parameters()[0]->get_friendly_name(); + auto tensor_names = f->get_parameters().front()->get_output_tensor(0).get_names(); + f = PrePostProcessor() + .input( + InputInfo() + .tensor(InputTensorInfo() + .set_element_type(element::u8) + .set_color_format(ColorFormat::NV12_SINGLE_PLANE)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::RGB).convert_element_type(element::f32))) + .build(f); + + EXPECT_EQ(f->get_parameters().size(), 1); + EXPECT_EQ(f->get_parameters().front()->get_element_type(), element::u8); + EXPECT_EQ(f->get_parameters().front()->get_layout(), "NHWC"); + EXPECT_EQ(f->get_parameters().front()->get_partial_shape(), (PartialShape{Dimension::dynamic(), 3, 2, 1})); + EXPECT_EQ(f->get_parameters().front()->get_friendly_name(), name); + EXPECT_EQ(f->get_parameters().front()->get_output_tensor(0).get_names(), tensor_names); +} + +TEST(pre_post_process, convert_color_nv12_bgr_single) { + auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 2, 2, 3}); + auto name = f->get_parameters()[0]->get_friendly_name(); + auto tensor_names = f->get_parameters().front()->get_output_tensor(0).get_names(); + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_SINGLE_PLANE)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))) + .build(f); + + EXPECT_EQ(f->get_parameters().size(), 1); + EXPECT_EQ(f->get_parameters().front()->get_element_type(), element::f32); + EXPECT_EQ(f->get_parameters().front()->get_layout(), "NHWC"); + EXPECT_EQ(f->get_parameters().front()->get_partial_shape(), (PartialShape{Dimension::dynamic(), 3, 2, 1})); + EXPECT_EQ(f->get_parameters().front()->get_friendly_name(), name); + EXPECT_EQ(f->get_parameters().front()->get_output_tensor(0).get_names(), tensor_names); +} + +TEST(pre_post_process, convert_color_nv12_bgr_2_planes) { + auto f = create_simple_function(element::f32, Shape{5, 2, 2, 3}); + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES, {"TestY", "TestUV"})) + .preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))) + .build(f); + + EXPECT_EQ(f->get_parameters().size(), 2); + EXPECT_EQ(f->get_parameters()[0]->get_friendly_name(), "input1/TestY"); + EXPECT_EQ(*f->get_parameters()[0]->output(0).get_tensor().get_names().begin(), "tensor_input1/TestY"); + EXPECT_EQ(f->get_parameters()[0]->get_element_type(), element::f32); + EXPECT_EQ(f->get_parameters()[0]->get_partial_shape(), (PartialShape{5, 2, 2, 1})); + + EXPECT_EQ(f->get_parameters()[1]->get_friendly_name(), "input1/TestUV"); + EXPECT_EQ(*f->get_parameters()[1]->output(0).get_tensor().get_names().begin(), "tensor_input1/TestUV"); + EXPECT_EQ(f->get_parameters()[1]->get_element_type(), element::f32); + EXPECT_EQ(f->get_parameters()[1]->get_partial_shape(), (PartialShape{5, 1, 1, 2})); +} + +TEST(pre_post_process, convert_color_nv12_rgb_2_planes) { + auto f = create_simple_function(element::f32, Shape{5, 2, 2, 3}); + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))) + .build(f); + + EXPECT_EQ(f->get_parameters().size(), 2); + EXPECT_EQ(f->get_parameters()[0]->get_element_type(), element::f32); + EXPECT_EQ(f->get_parameters()[1]->get_element_type(), element::f32); + EXPECT_EQ(f->get_parameters()[0]->get_partial_shape(), (PartialShape{5, 2, 2, 1})); + EXPECT_EQ(f->get_parameters()[1]->get_partial_shape(), (PartialShape{5, 1, 1, 2})); + + EXPECT_EQ(f->get_parameters()[0]->get_friendly_name(), "input1/Y"); + EXPECT_EQ(*f->get_parameters()[0]->output(0).get_tensor().get_names().begin(), "tensor_input1/Y"); + + EXPECT_EQ(f->get_parameters()[1]->get_friendly_name(), "input1/UV"); + EXPECT_EQ(*f->get_parameters()[1]->output(0).get_tensor().get_names().begin(), "tensor_input1/UV"); +} + +TEST(pre_post_process, convert_color_nv12_bgr_2_planes_u8_lvalue) { + auto f = create_simple_function(element::u8, Shape{1, 2, 2, 3}); + auto input_tensor_info = InputTensorInfo(); + input_tensor_info.set_color_format(ColorFormat::NV12_TWO_PLANES); + auto steps = PreProcessSteps(); + steps.convert_color(ColorFormat::BGR); + f = PrePostProcessor() + .input(InputInfo().tensor(std::move(input_tensor_info)).preprocess(std::move(steps))) + .build(f); + + EXPECT_EQ(f->get_parameters().size(), 2); + EXPECT_EQ(f->get_parameters()[0]->get_element_type(), element::u8); + EXPECT_EQ(f->get_parameters()[0]->get_partial_shape(), (PartialShape{1, 2, 2, 1})); + EXPECT_EQ(f->get_parameters()[1]->get_element_type(), element::u8); + EXPECT_EQ(f->get_parameters()[1]->get_partial_shape(), (PartialShape{1, 1, 1, 2})); +} + +TEST(pre_post_process, convert_color_nv12_bgr_2_planes_el_type) { + auto f = create_simple_function(element::u8, Shape{1, 2, 2, 3}); + EXPECT_NO_THROW( + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo() + .set_element_type(element::f32) + .set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess( + PreProcessSteps().convert_element_type(element::u8).convert_color(ColorFormat::BGR))) + .build(f)); + + EXPECT_EQ(f->get_parameters().size(), 2); + EXPECT_EQ(f->get_parameters()[0]->get_element_type(), element::f32); + EXPECT_EQ(f->get_parameters()[1]->get_element_type(), element::f32); +} + +TEST(pre_post_process, convert_color_same_type) { + auto f = create_simple_function(element::u8, Shape{1, 2, 2, 3}); + EXPECT_NO_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::RGB)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))) + .build(f)); + + EXPECT_EQ(f->get_parameters().size(), 1); + EXPECT_EQ(f->get_parameters()[0]->get_partial_shape(), (PartialShape{1, 2, 2, 3})); +} + +TEST(pre_post_process, convert_color_unsupported) { + // Feel free to update this test when more color conversions are supported in future + auto f = create_simple_function(element::f32, PartialShape{1, 4, 4, 3}); + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_SINGLE_PLANE)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::UNDEFINED))) + .build(f), + ov::AssertFailure); + + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::UNDEFINED))) + .build(f), + ov::AssertFailure); + + auto colors = {ColorFormat::NV12_TWO_PLANES, ColorFormat::NV12_SINGLE_PLANE, ColorFormat::RGB, ColorFormat::BGR}; + for (const auto& color : colors) { + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::UNDEFINED)) + .preprocess(PreProcessSteps().convert_color(color))) + .build(f), + ov::AssertFailure); + + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(color)) + .preprocess(PreProcessSteps().convert_color(ColorFormat::UNDEFINED))) + .build(f), + ov::AssertFailure); + } +} + +TEST(pre_post_process, convert_color_incorrect_subnames) { + auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 2, 2, 3}); + auto name = f->get_parameters()[0]->get_friendly_name(); + auto tensor_names = f->get_parameters().front()->get_output_tensor(0).get_names(); + EXPECT_THROW( + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_SINGLE_PLANE, {"Test"})) + .preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))) + .build(f), + ov::AssertFailure); + + EXPECT_THROW( + f = PrePostProcessor() + .input(InputInfo().tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES, {"Test"}))) + .build(f), + ov::AssertFailure); + + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo().tensor( + InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES, {"1", "2", "3"}))) + .build(f), + ov::AssertFailure); +} + +TEST(pre_post_process, convert_color_duplicate_subnames) { + auto f = create_2inputs(element::f32, PartialShape{1, 2, 2, 3}); + f->get_parameters()[0]->get_output_tensor(0).set_names({"tensor_input1"}); + f->get_parameters()[1]->get_output_tensor(0).set_names({"tensor_input1/CustomUV"}); + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_SINGLE_PLANE, + {"CustomY", "CustomUV"})) + .preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))) + .build(f), + ov::AssertFailure); +} + +TEST(pre_post_process, convert_color_duplicate_internal_subnames_mean) { + auto f = create_simple_function(element::f32, PartialShape{1, 2, 2, 3}); + for (int i = 0; i < 10; i++) { + // Create preprocessing step several times (try to duplicate internal node names this way) + EXPECT_NO_THROW(f = PrePostProcessor().input(InputInfo().preprocess(PreProcessSteps().mean(0.1f))).build(f)); + EXPECT_NO_THROW(f = PrePostProcessor().input(InputInfo().preprocess(PreProcessSteps().scale(1.1f))).build(f)); + EXPECT_NO_THROW( + f = PrePostProcessor() + .input(InputInfo().preprocess( + PreProcessSteps().convert_element_type(element::u8).convert_element_type(element::f32))) + .build(f)); + EXPECT_NO_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_layout("NHWC")) + .preprocess(PreProcessSteps().convert_layout("NCHW"))) + .build(f)); + EXPECT_NO_THROW( + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_layout("NHWC").set_spatial_static_shape(480, 640)) + .preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))) + .build(f)); + } +} + +TEST(pre_post_process, unsupported_network_color_format) { + auto f = create_simple_function(element::f32, PartialShape{1, 4, 4, 3}); + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo().tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_SINGLE_PLANE))) + .build(f), + ov::AssertFailure); + + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo().tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES))) + .build(f), + ov::AssertFailure); + + EXPECT_THROW( + f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps().convert_layout("NCHW").convert_color(ColorFormat::RGB))) + .build(f), + ov::AssertFailure); + + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps().mean(0.1f).convert_color(ColorFormat::RGB))) + .build(f), + ov::AssertFailure); + + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo() + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps().scale(2.1f).convert_color(ColorFormat::RGB))) + .build(f), + ov::AssertFailure); +} + TEST(pre_post_process, custom_preprocessing) { auto f = create_simple_function(element::i32, Shape{1, 3, 1, 1}); f = PrePostProcessor() @@ -327,3 +602,241 @@ TEST(pre_post_process, resize_no_tensor_width) { .build(f), ov::AssertFailure); } + +// --- PostProcess - set/convert element type --- + +TEST(pre_post_process, postprocess_convert_element_type_explicit) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + f = PrePostProcessor() + .output(OutputInfo().postprocess(PostProcessSteps().convert_element_type(element::u8))) + .build(f); + EXPECT_EQ(f->get_results().size(), 1); + EXPECT_EQ(f->get_results()[0]->get_element_type(), element::u8); + auto ops = f->get_ordered_ops(); + auto res_count = std::count_if(ops.begin(), ops.end(), [](std::shared_ptr n) { + return std::dynamic_pointer_cast(n) != nullptr; + }); + EXPECT_EQ(res_count, 1); +} + +TEST(pre_post_process, postprocess_convert_element_type_default) { + auto f = create_2inputs(element::f32, Shape{1, 3, 2, 2}); + f = PrePostProcessor() + .output(OutputInfo(1) + .postprocess(PostProcessSteps().convert_element_type()) + .tensor(OutputTensorInfo().set_element_type(element::u8))) + .build(f); + EXPECT_EQ(f->get_results()[0]->get_element_type(), element::f32); + EXPECT_EQ(f->get_results()[1]->get_element_type(), element::u8); +} + +TEST(pre_post_process, postprocess_convert_element_type_same) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + auto size_old = f->get_ordered_ops().size(); + f = PrePostProcessor() + .output(OutputInfo("tensor_output1") + .postprocess(PostProcessSteps().convert_element_type(element::f32)) + .tensor(OutputTensorInfo().set_element_type(element::f32))) + .build(f); + EXPECT_EQ(f->get_results()[0]->get_element_type(), element::f32); + + // Verify that redundant ops were not added + EXPECT_EQ(size_old, f->get_ordered_ops().size()); +} + +TEST(pre_post_process, postprocess_convert_element_type_default_error) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + EXPECT_THROW( + f = PrePostProcessor().output(OutputInfo().postprocess(PostProcessSteps().convert_element_type())).build(f), + ov::AssertFailure); +} + +TEST(pre_post_process, postprocess_convert_element_type_implicit) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + f = PrePostProcessor().output(OutputInfo().tensor(OutputTensorInfo().set_element_type(element::u8))).build(f); + EXPECT_EQ(f->get_results()[0]->get_element_type(), element::u8); +} + +// --- PostProcess - set/convert layout --- +TEST(pre_post_process, postprocess_set_layout_network) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + f = PrePostProcessor().output(OutputInfo().network(OutputNetworkInfo().set_layout("NCHW"))).build(f); + EXPECT_EQ(f->get_results()[0]->get_layout(), "NCHW"); +} + +TEST(pre_post_process, postprocess_set_layout_tensor) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + // no layout is specified for network, no way to implicitly convert it to user's layout + EXPECT_THROW(f = PrePostProcessor().output(OutputInfo().tensor(OutputTensorInfo().set_layout("NHWC"))).build(f), + ov::AssertFailure); +} + +TEST(pre_post_process, postprocess_convert_layout_implicit) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + + f = PrePostProcessor() + .output(OutputInfo() + .network(OutputNetworkInfo().set_layout("NCHW")) + .tensor(OutputTensorInfo().set_layout("NHWC"))) + .build(f); + EXPECT_EQ(f->get_results()[0]->get_layout(), "NHWC"); + EXPECT_EQ(f->get_results()[0]->get_output_tensor(0).get_partial_shape(), (PartialShape{1, 2, 2, 3})); +} + +TEST(pre_post_process, postprocess_convert_layout_explicit_no_target) { + auto f = create_2inputs(element::f32, Shape{1, 3, 2, 2}); + f = PrePostProcessor() + .output(OutputInfo(1) + .network(OutputNetworkInfo().set_layout("NCHW")) + .postprocess(PostProcessSteps().convert_layout("NHWC"))) + .build(f); + EXPECT_EQ(f->get_results()[0]->get_output_tensor(0).get_partial_shape(), (PartialShape{1, 3, 2, 2})); + EXPECT_EQ(f->get_results()[1]->get_output_tensor(0).get_partial_shape(), (PartialShape{1, 2, 2, 3})); +} + +TEST(pre_post_process, postprocess_convert_layout_default) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + + f = PrePostProcessor() + .output(OutputInfo() + .network(OutputNetworkInfo().set_layout("NCHW")) + .postprocess(PostProcessSteps().convert_layout()) + .tensor(OutputTensorInfo().set_layout("NHWC"))) + .build(f); + EXPECT_EQ(f->get_results()[0]->get_layout(), "NHWC"); + EXPECT_EQ(f->get_results()[0]->get_output_tensor(0).get_partial_shape(), (PartialShape{1, 2, 2, 3})); +} + +TEST(pre_post_process, postprocess_convert_layout_same) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + auto size_old = f->get_ordered_ops().size(); + + f = PrePostProcessor() + .output(OutputInfo() + .network(OutputNetworkInfo().set_layout("NCHW")) + .postprocess(PostProcessSteps().convert_layout("NCHW")) + .tensor(OutputTensorInfo().set_layout("NCHW"))) + .build(f); + EXPECT_EQ(f->get_results()[0]->get_layout(), "NCHW"); + EXPECT_EQ(f->get_results()[0]->get_output_tensor(0).get_partial_shape(), (PartialShape{1, 3, 2, 2})); + // Verify that redundant ops were not added + EXPECT_EQ(size_old, f->get_ordered_ops().size()); +} + +TEST(pre_post_process, postprocess_convert_layout_default_error) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + + EXPECT_THROW(f = PrePostProcessor() + .output(OutputInfo() + .network(OutputNetworkInfo().set_layout("NCHW")) + .postprocess(PostProcessSteps().convert_layout())) + .build(f), + ov::AssertFailure); +} + +// Postprocessing - other + +TEST(pre_post_process, postprocess_custom_step) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + std::string name; + f = PrePostProcessor() + .output(OutputInfo().postprocess( + PostProcessSteps().custom([&name](const ov::Output& node) -> ov::Output { + auto abs = std::make_shared(node); + abs->set_friendly_name(node.get_node()->get_friendly_name() + "/abs"); + name = node.get_node()->get_friendly_name() + "/abs"; + return abs; + }))) + .build(f); + EXPECT_FALSE(name.empty()); + EXPECT_EQ(f->get_results()[0]->get_input_source_output(0).get_node()->get_friendly_name(), name); +} + +TEST(pre_post_process, postprocess_implicit_convert_element_type_and_layout) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + f = PrePostProcessor() + .output(OutputInfo() + .network(OutputNetworkInfo().set_layout("NCHW")) + .tensor(OutputTensorInfo().set_layout("NHWC").set_element_type(element::u8))) + .build(f); + EXPECT_EQ(f->get_results()[0]->get_element_type(), element::u8); + EXPECT_EQ(f->get_results()[0]->get_layout(), "NHWC"); + EXPECT_EQ(f->get_results()[0]->get_output_tensor(0).get_partial_shape(), (PartialShape{1, 2, 2, 3})); +} + +TEST(pre_post_process, postprocess_assert_output_without_index) { + auto f = create_2inputs(element::f32, Shape{1, 3, 2, 2}); + auto out = OutputInfo(); + EXPECT_ANY_THROW(f = PrePostProcessor().output(std::move(out)).build(f)); + out = OutputInfo("some_non_existing_name"); + EXPECT_ANY_THROW(f = PrePostProcessor().output(std::move(out)).build(f)); +} + +TEST(pre_post_process, postprocess_lvalues_1) { + auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); + bool custom_called = false; + + auto netInfo = OutputNetworkInfo(); + netInfo.set_layout("NCHW"); + + auto steps = PostProcessSteps(); + steps.convert_layout(); + steps.convert_element_type(); + steps.custom([&custom_called](const ov::Output& node) -> ov::Output { + auto abs = std::make_shared(node); + abs->set_friendly_name(node.get_node()->get_friendly_name() + "/abs"); + custom_called = true; + return abs; + }); + + auto tensorInfo = OutputTensorInfo(); + tensorInfo.set_layout("NHWC"); + tensorInfo.set_element_type(element::u8); + + auto outputInfo = OutputInfo("tensor_output1"); + outputInfo.network(std::move(netInfo)); + outputInfo.postprocess(std::move(steps)); + outputInfo.tensor(std::move(tensorInfo)); + + auto p = PrePostProcessor(); + p.output(std::move(outputInfo)); + + f = p.build(f); + EXPECT_EQ(f->get_results().size(), 1); + EXPECT_EQ(f->output().get_tensor().get_names().count("tensor_output1"), 1); + EXPECT_EQ(f->get_results()[0]->get_element_type(), element::u8); + EXPECT_EQ(f->get_results()[0]->get_layout(), "NHWC"); + EXPECT_EQ(f->get_results()[0]->get_output_tensor(0).get_partial_shape(), (PartialShape{1, 2, 2, 3})); + EXPECT_TRUE(custom_called); +} + +TEST(pre_post_process, exception_safety) { + auto f = create_2inputs(element::f32, Shape{1, 3, 224, 224}); + auto name0 = f->get_parameters()[0]->get_friendly_name(); + auto tensor_names0 = f->get_parameters()[0]->get_output_tensor(0).get_names(); + auto name1 = f->get_parameters()[1]->get_friendly_name(); + auto tensor_names1 = f->get_parameters()[1]->get_output_tensor(0).get_names(); + EXPECT_THROW(f = PrePostProcessor() + .input(InputInfo(0) // this one is correct + .tensor(InputTensorInfo().set_element_type(element::u8)) + .preprocess(PreProcessSteps().convert_element_type(element::f32))) + .input(InputInfo(1) // This one is not + .tensor(InputTensorInfo().set_color_format(ColorFormat::NV12_TWO_PLANES)) + .preprocess(PreProcessSteps().custom( + [](const std::shared_ptr& node) -> std::shared_ptr { + throw ngraph::ngraph_error("test error"); + }))) + .build(f), + ov::AssertFailure); + EXPECT_EQ(f->get_parameters().size(), 2); + + EXPECT_EQ(f->get_parameters()[0]->get_element_type(), element::f32); + EXPECT_EQ(f->get_parameters()[0]->get_partial_shape(), (PartialShape{1, 3, 224, 224})); + EXPECT_EQ(f->get_parameters()[0]->get_friendly_name(), name0); + EXPECT_EQ(f->get_parameters()[0]->get_output_tensor(0).get_names(), tensor_names0); + + EXPECT_EQ(f->get_parameters()[1]->get_element_type(), element::f32); + EXPECT_EQ(f->get_parameters()[1]->get_partial_shape(), (PartialShape{1, 3, 224, 224})); + EXPECT_EQ(f->get_parameters()[1]->get_friendly_name(), name1); + EXPECT_EQ(f->get_parameters()[1]->get_output_tensor(0).get_names(), tensor_names1); +} diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index fb73dc5038232b..c66b35eccaa8cb 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -31,13 +31,16 @@ onnx_model_conv_integer_no_zero_point onnx_model_conv_integer_pads # Unsupported operator detected in the graph: QuantizedDot -onnx_model_matmul_integer -onnx_model_matmul_integer_zero_point_zero +onnx_model_matmul_integer_2d_simple_zero_point +onnx_model_matmul_integer_int8 +onnx_model_matmul_integer_vectorized_zero_point onnx_model_matmul_integer_no_zero_point -onnx_model_matmul_integer_scalar +onnx_model_matmul_integer_2d_x_3d +onnx_model_matmul_integer_3d_x_2d +onnx_model_matmul_integer_3d onnx_model_matmul_integer_4d onnx_model_matmul_integer_4d_zero_point -onnx_model_matmul_integer_4d_no_zero_point +onnx_model_matmul_integer_matrix_zero_point onnx_model_qlinear_matmul onnx_model_qlinear_matmul_3d @@ -1080,6 +1083,12 @@ IE_CPU.onnx_constant_sparse_tensor_double_3x4 IE_CPU.onnx_constant_sparse_tensor_int16_3x4 IE_CPU.onnx_constant_sparse_tensor_uint16_3x4 +# 48230 +# Some slight accuracy diviations after recent MatMul changes +# (for CPU plugin gru_cell with clip decomposes into MatMuls) +# Fix or increase threshold +IE_CPU.gru_cell_bias_clip + #------------------------------------------------------------------------------- # # Inference Engine GPU plugin excludes @@ -1575,3 +1584,5 @@ IE_CPU.onnx_model_gather_float_2D_neg_indices # CPU plug-in doesn't support operation with dynamic rank onnx_model_skip_layer_normalization_dynamic_shapes +# Doesn't support op with dynamic shapes +onnx_model_embed_layer_normalization_dynamic_shapes diff --git a/ngraph/test/runtime/interpreter/unit_test.manifest b/ngraph/test/runtime/interpreter/unit_test.manifest index 637269119df812..4b04419aae449f 100644 --- a/ngraph/test/runtime/interpreter/unit_test.manifest +++ b/ngraph/test/runtime/interpreter/unit_test.manifest @@ -14,13 +14,6 @@ INTERPRETER.onnx_resize11_sizes_nearest_asymmetric_floor # nGraph does not support the following ONNX operations INTERPRETER.onnx_model_qlinear_matmul INTERPRETER.onnx_model_qlinear_matmul_3d -INTERPRETER.onnx_model_matmul_integer -INTERPRETER.onnx_model_matmul_integer_zero_point_zero -INTERPRETER.onnx_model_matmul_integer_no_zero_point -INTERPRETER.onnx_model_matmul_integer_scalar -INTERPRETER.onnx_model_matmul_integer_4d -INTERPRETER.onnx_model_matmul_integer_4d_zero_point -INTERPRETER.onnx_model_matmul_integer_4d_no_zero_point # Disabled tests for disabled reference implementations INTERPRETER.onnx_dyn_shapes_expand_uint16_dyn_shape @@ -139,4 +132,4 @@ INTERPRETER.zero_sized_negative # No support yet for RandomUniform INTERPRETER.onnx_model_random_uniform -INTERPRETER.onnx_model_random_uniform_like \ No newline at end of file +INTERPRETER.onnx_model_random_uniform_like diff --git a/ngraph/test/type_prop/if.cpp b/ngraph/test/type_prop/if.cpp index d60d4890f1d961..80cf9a7f3dfd97 100644 --- a/ngraph/test/type_prop/if.cpp +++ b/ngraph/test/type_prop/if.cpp @@ -38,8 +38,6 @@ TEST(type_prop, if_simple_test) { if_op->set_input(X, Xt, Xe); if_op->set_input(Y, Yt, Ye); auto res = if_op->set_output(then_op_res, else_op_res); - if_op->validate_and_infer_types(); - auto result0 = make_shared(res); Shape out0_shape{32, 40, 10}; auto sh = result0->get_output_shape(0); @@ -73,7 +71,6 @@ TEST(type_prop, if_non_const_condition_test) { if_op->set_input(X, Xt, Xe); if_op->set_input(Y, Yt, Ye); auto res = if_op->set_output(then_body_res, else_body_res); - if_op->validate_and_infer_types(); auto result0 = make_shared(res); Shape out0_shape{32, 40, 10}; auto sh = result0->get_output_shape(0); @@ -100,14 +97,12 @@ TEST(type_prop, if_clone_test) { auto else_op = std::make_shared(Xe, Ye); auto else_body_res = make_shared(else_op); auto else_body = make_shared(OutputVector{else_body_res}, ParameterVector{Xe, Ye}); - auto if_op = make_shared(cond); if_op->set_then_body(then_body); if_op->set_else_body(else_body); if_op->set_input(X, Xt, Xe); if_op->set_input(Y, Yt, Ye); auto res = if_op->set_output(then_body_res, else_body_res); - auto new_if = std::dynamic_pointer_cast(if_op->clone_with_new_inputs(OutputVector{cond, Xnew, Ynew})); EXPECT_EQ(true, true); } @@ -147,7 +142,6 @@ TEST(type_prop, if_multiple_outputs) { if_op->set_input(Y, Yt, Ye); auto res1 = if_op->set_output(then_body_res_1, else_body_res_1); auto res2 = if_op->set_output(then_body_res_2, else_body_res_2); - if_op->validate_and_infer_types(); auto result1 = make_shared(res1); auto result2 = make_shared(res2); Shape out0_shape{32, 40, 10}; @@ -184,7 +178,6 @@ TEST(type_prop, if_scalar_condition) { if_op->set_input(X, Xt, Xe); if_op->set_input(Y, Yt, Ye); auto res = if_op->set_output(then_body_res, else_body_res); - if_op->validate_and_infer_types(); auto result0 = make_shared(res); Shape out0_shape{32, 40, 10}; auto sh = result0->get_output_shape(0); @@ -218,7 +211,6 @@ TEST(type_prop, if_dynamic_output) { if_op->set_input(X, Xt, nullptr); if_op->set_input(Y, nullptr, Ye); auto res = if_op->set_output(then_body_res, else_body_res); - if_op->validate_and_infer_types(); auto result0 = make_shared(res); auto dynamic_shape = result0->get_output_partial_shape(0); @@ -265,7 +257,6 @@ TEST(type_prop, if_dynamic_inputs) { if_op->set_input(X, Xt, Xe); if_op->set_input(Y, Yt, Ye); auto res = if_op->set_output(then_body_res, else_body_res); - if_op->validate_and_infer_types(); auto result0 = make_shared(res); auto dynamic_shape = result0->get_output_partial_shape(0); auto expected_result = PartialShape{Dimension::dynamic(), 20, 30};