From da79964bd34f5ce6a2e9f15e5ccabdaee165f4e9 Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Mon, 11 Sep 2023 12:15:47 +0200 Subject: [PATCH 01/31] [DOCS] banner what's new text (#19730) --- docs/home.rst | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/home.rst b/docs/home.rst index 696efedadd0068..92079e97058f68 100644 --- a/docs/home.rst +++ b/docs/home.rst @@ -23,10 +23,15 @@ OpenVINO 2023.0
From fb59d0eb3660e90179a8c9ed68a8cdbffc0b6e2e Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Mon, 11 Sep 2023 14:55:36 +0400 Subject: [PATCH 02/31] `VariadicSplitLayerTest` refactoring to API2.0 (#19648) --- .../single_layer_tests/variadic_split.cpp | 52 ++++++++--------- .../single_op_tests/variadic_split.hpp | 15 +++++ .../single_op/variadic_split.hpp | 33 +++++++++++ .../src/single_op/variadic_split.cpp | 56 +++++++++++++++++++ 4 files changed, 128 insertions(+), 28 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/variadic_split.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/variadic_split.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/variadic_split.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/variadic_split.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/variadic_split.cpp index f1bfbe7ad85fa4..39ddd9788f4cad 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/variadic_split.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/variadic_split.cpp @@ -2,39 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "single_op_tests/variadic_split.hpp" + #include -#include "single_layer_tests/variadic_split.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::VariadicSplitLayerTest; namespace { +const std::vector model_types = {ov::element::f32, ov::element::f16}; + +// Sum of elements numSplits = inputShapes[Axis] +const std::vector> num_splits = {{1, 16, 5, 8}, + {2, 19, 5, 4}, + {7, 13, 2, 8}, + {5, 8, 12, 5}, + {4, 11, 6, 9}}; + +const std::vector> input_shapes_static = { + {{30, 30, 30, 30}}, +}; - const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 - }; - - // Sum of elements numSplits = inputShapes[Axis] - const std::vector> numSplits = { - {1, 16, 5, 8}, - {2, 19, 5, 4}, - {7, 13, 2, 8}, - {5, 8, 12, 5}, - {4, 11, 6, 9} - }; - - INSTANTIATE_TEST_SUITE_P(smoke_NumSplitsCheck, VariadicSplitLayerTest, - ::testing::Combine( - ::testing::ValuesIn(numSplits), - ::testing::Values(0, 1, 2, 3), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({30, 30, 30, 30})), - ::testing::Values(ov::test::utils::DEVICE_CPU)), - VariadicSplitLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P( + smoke_NumSplitsCheck, + VariadicSplitLayerTest, + ::testing::Combine(::testing::ValuesIn(num_splits), + ::testing::Values(0, 1, 2, 3), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + VariadicSplitLayerTest::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/variadic_split.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/variadic_split.hpp new file mode 100644 index 00000000000000..e055cc48234e3c --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/variadic_split.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/variadic_split.hpp" + +namespace ov { +namespace test { +TEST_P(VariadicSplitLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/variadic_split.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/variadic_split.hpp new file mode 100644 index 00000000000000..421e12d32e4dd5 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/variadic_split.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +typedef std::tuple, // Num splits + int64_t, // Axis + ov::element::Type, // Model type + std::vector, // Input shapes + std::string // Target device name + > + VariadicSplitParams; + +class VariadicSplitLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/variadic_split.cpp b/src/tests/functional/shared_test_classes/src/single_op/variadic_split.cpp new file mode 100644 index 00000000000000..e89e256fbc3411 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/variadic_split.cpp @@ -0,0 +1,56 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/variadic_split.hpp" + +namespace ov { +namespace test { +std::string VariadicSplitLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + int64_t axis; + std::vector num_splits; + ov::element::Type model_type; + std::vector input_shapes; + ov::test::TargetDevice target_device; + std::tie(num_splits, axis, model_type, input_shapes, target_device) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < input_shapes.size(); i++) { + result << ov::test::utils::partialShape2str({input_shapes[i].first}) + << (i < input_shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < input_shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < input_shapes.size(); j++) { + result << ov::test::utils::vec2str(input_shapes[j].second[i]) << (j < input_shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "numSplits=" << ov::test::utils::vec2str(num_splits) << "_"; + result << "axis=" << axis << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void VariadicSplitLayerTest::SetUp() { + int64_t axis; + std::vector num_splits; + std::vector input_shapes; + ov::element::Type model_type; + std::tie(num_splits, axis, model_type, input_shapes, targetDevice) = this->GetParam(); + + init_input_shapes(input_shapes); + + auto param = std::make_shared(model_type, inputDynamicShapes.front()); + auto split_axis_const = + std::make_shared(element::i64, ngraph::Shape{}, std::vector{axis}); + auto num_split_const = + std::make_shared(element::u64, ngraph::Shape{num_splits.size()}, num_splits); + auto variadic_split = std::make_shared(param, split_axis_const, num_split_const); + function = std::make_shared(variadic_split->outputs(), ov::ParameterVector{param}, "VariadicSplit"); +} +} // namespace test +} // namespace ov \ No newline at end of file From 3d872f14e423551d3fd27240cb30631946bd92cf Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Mon, 11 Sep 2023 12:56:08 +0200 Subject: [PATCH 03/31] [DOCS] Remove index file from notebooks (#19619) --- docs/CMakeLists.txt | 2 +- docs/nbdoc/consts.py | 21 ------------------ docs/nbdoc/nbdoc.py | 51 ++++++++++++++------------------------------ docs/nbdoc/utils.py | 33 +--------------------------- docs/tutorials.md | 2 +- 5 files changed, 19 insertions(+), 90 deletions(-) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 6799a559f1260c..1f7d3a5357f690 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -76,7 +76,7 @@ function(build_docs) # build with openvino notebooks if(ENABLE_OPENVINO_NOTEBOOKS) set(NBDOC_SCRIPT "${DOCS_SOURCE_DIR}/nbdoc/nbdoc.py") - list(APPEND commands + list(PREPEND commands COMMAND ${PYTHON_EXECUTABLE} "${NBDOC_SCRIPT}" "${DOCS_SOURCE_DIR}/notebooks" "${RST_OUTPUT}/notebooks" ) endif() diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index bc8d7dc08c196c..7e2ea87907f937 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -1,5 +1,3 @@ -notebooks_docs = "notebooks.rst" - notebooks_path = "notebooks" repo_directory = "notebooks" @@ -12,9 +10,6 @@ blacklisted_extensions = ['.xml', '.bin'] -section_names = ["Getting Started", "Convert & Optimize", - "Model Demos", "Model Training", "Live Demos"] - # Templates binder_template = """ @@ -100,19 +95,3 @@ \n """ - -rst_template = """ -OpenVINO notebooks documentation -================================ - -{% for section in sections %} -{{section.name}} --------------------------------- - -.. toctree:: - :maxdepth: 1 - -{% for notebook in section.notebooks %} {{notebook.path}}\n{% endfor %} -{% endfor %} - -""" diff --git a/docs/nbdoc/nbdoc.py b/docs/nbdoc/nbdoc.py index 7825bec1aa2a55..2f7b0ac9aa3bad 100644 --- a/docs/nbdoc/nbdoc.py +++ b/docs/nbdoc/nbdoc.py @@ -4,9 +4,7 @@ from utils import ( create_content, add_content_below, - process_notebook_name, verify_notebook_name, - split_notebooks_into_sections, ) from consts import ( artifacts_link, @@ -14,14 +12,11 @@ colab_template, binder_colab_template, blacklisted_extensions, - notebooks_docs, notebooks_path, no_binder_template, repo_directory, repo_name, repo_owner, - rst_template, - section_names, ) from notebook import Notebook from section import Section @@ -31,6 +26,7 @@ from urllib.request import urlretrieve from requests import get import os +import sys class NbTravisDownloader: @@ -79,22 +75,6 @@ def traverse(path: Path, link: str, blacklisted_extensions: list = blacklisted_e class NbProcessor: def __init__(self, nb_path: str = notebooks_path): self.nb_path = nb_path - notebooks = [ - Notebook( - name=process_notebook_name(notebook), - path=notebook, - ) - for notebook in os.listdir(self.nb_path) - if verify_notebook_name(notebook) - ] - notebooks = split_notebooks_into_sections(notebooks) - self.rst_data = { - "sections": [ - Section(name=section_name, notebooks=section_notebooks) - for section_name, section_notebooks in zip(section_names, notebooks) - ] - - } self.binder_data = { "owner": repo_owner, "repo": repo_name, @@ -164,18 +144,16 @@ def add_binder(self, buttons_list: list, cbuttons_list: list, template_with_col if not add_content_below(button_text, f"{self.nb_path}/{notebook}"): raise FileNotFoundError("Unable to modify file") - def render_rst(self, path: str = notebooks_docs, template: str = rst_template): - """Rendering rst file for all notebooks - - :param path: Path to notebook main rst file. Defaults to notebooks_docs. - :type path: str - :param template: Template for default rst page. Defaults to rst_template. - :type template: str - - """ - with open(path, "w+") as nb_file: - nb_file.writelines(Template(template).render(self.rst_data)) - +def add_glob_directive(tutorials_file): + with open(tutorials_file, 'r+', encoding='cp437') as mainfile: + readfile = mainfile.read() + if ':glob:' not in readfile: + add_glob = readfile\ + .replace(":hidden:\n", ":hidden:\n :glob:\n")\ + .replace("notebooks_installation\n", "notebooks_installation\n notebooks/*\n") + mainfile.seek(0) + mainfile.write(add_glob) + mainfile.truncate() def main(): parser = argparse.ArgumentParser() @@ -185,6 +163,10 @@ def main(): args = parser.parse_args() sourcedir = args.sourcedir outdir = args.outdir + + main_tutorials_file = Path('../../docs/tutorials.md').resolve(strict=True) + add_glob_directive(main_tutorials_file) + if args.download: outdir.mkdir(parents=True, exist_ok=True) # Step 2. Run default pipeline for downloading @@ -196,8 +178,7 @@ def main(): buttons_list = nbp.fetch_binder_list('notebooks_with_binder_buttons.txt') cbuttons_list = nbp.fetch_colab_list('notebooks_with_colab_buttons.txt') nbp.add_binder(buttons_list, cbuttons_list) - nbp.render_rst(outdir.joinpath(notebooks_docs)) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/docs/nbdoc/utils.py b/docs/nbdoc/utils.py index 8ddd34c4ec44c7..bbef186640b209 100644 --- a/docs/nbdoc/utils.py +++ b/docs/nbdoc/utils.py @@ -1,6 +1,4 @@ from jinja2 import Template -from os import path, remove -from shutil import rmtree def create_content(template: str, notebooks_data: dict, file_name: str): @@ -45,25 +43,6 @@ def add_content_below(text: str, path: str, line=3) -> bool: return False -def process_notebook_name(notebook_name: str) -> str: - """Processes notebook name - - :param notebook_name: Notebook name by default keeps convention: - [3 digit]-name-with-dashes-with-output.rst, - example: 001-hello-world-with-output.rst - :type notebook_name: str - :returns: Processed notebook name, - 001-hello-world-with-output.rst -> 001. hello world - :rtype: str - - """ - return ( - notebook_name[:3] - + "." - + " ".join(notebook_name[4:].split(".")[0].split("-")[:-2]) - ) - - def verify_notebook_name(notebook_name: str) -> bool: """Verification based on notebook name @@ -75,14 +54,4 @@ def verify_notebook_name(notebook_name: str) -> bool: :rtype: bool """ - return notebook_name[:3].isdigit() and notebook_name[-4:] == ".rst" - - -def split_notebooks_into_sections(notebooks: list) -> list: - series = [list() for _ in range(5)] - for notebook in notebooks: - try: - series[int(notebook.name[0])].append(notebook) - except IndexError: - pass - return series \ No newline at end of file + return notebook_name[:3].isdigit() and notebook_name[-4:] == ".rst" \ No newline at end of file diff --git a/docs/tutorials.md b/docs/tutorials.md index c21005bab47bd4..bb38b9b40a3e07 100644 --- a/docs/tutorials.md +++ b/docs/tutorials.md @@ -15,7 +15,7 @@ :hidden: notebooks_installation - notebooks/notebooks + This collection of Python tutorials are written for running on Jupyter notebooks. The tutorials provide an introduction to the OpenVINO™ toolkit and explain how to From 9f4e918ee239ff143e886ed3233461b384443c1f Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Mon, 11 Sep 2023 15:18:45 +0400 Subject: [PATCH 04/31] Gracefully fail if test models weren't generated (#19705) * Gracefully fail if test models weren't generated * Add assert instead of return `nullptr` --- cmake/test_model_zoo.cmake | 2 +- src/core/tests/pass/serialization/read_ir.hpp | 6 ++++-- src/frontends/paddle/tests/CMakeLists.txt | 3 ++- src/frontends/tensorflow/tests/CMakeLists.txt | 2 +- src/frontends/tensorflow_lite/tests/CMakeLists.txt | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/cmake/test_model_zoo.cmake b/cmake/test_model_zoo.cmake index f065b9ca4d81f5..9a786ed069792f 100644 --- a/cmake/test_model_zoo.cmake +++ b/cmake/test_model_zoo.cmake @@ -8,7 +8,7 @@ if(ENABLE_OV_ONNX_FRONTEND) # if requirements are not installed automatically, we need to checks whether they are here ov_check_pip_packages(REQUIREMENTS_FILE "${OpenVINO_SOURCE_DIR}/src/frontends/onnx/tests/requirements.txt" RESULT_VAR onnx_FOUND - WARNING_MESSAGE "ONNX frontend tests will be skipped" + WARNING_MESSAGE "ONNX testing models weren't generated, some tests will fail due .onnx models not found" MESSAGE_MODE WARNING) endif() diff --git a/src/core/tests/pass/serialization/read_ir.hpp b/src/core/tests/pass/serialization/read_ir.hpp index b500b67cc6c543..90e466016b542f 100644 --- a/src/core/tests/pass/serialization/read_ir.hpp +++ b/src/core/tests/pass/serialization/read_ir.hpp @@ -7,11 +7,13 @@ #include "openvino/core/except.hpp" #include "openvino/core/model.hpp" #include "openvino/frontend/manager.hpp" +#include "openvino/util/file_util.hpp" namespace ov { namespace test { inline std::shared_ptr readModel(const std::string& model_path, const std::string& weights_path) { + OPENVINO_ASSERT(ov::util::file_exists(model_path), "Model ", model_path, " not found"); static ov::frontend::FrontEndManager manager; ov::frontend::FrontEnd::Ptr FE; ov::frontend::InputModel::Ptr inputModel; @@ -27,7 +29,7 @@ inline std::shared_ptr readModel(const std::string& model_path, const if (inputModel) return FE->convert(inputModel); - OPENVINO_ASSERT(false, "Failed to read the model"); + OPENVINO_ASSERT(false, "Failed to read the model ", model_path); } inline std::shared_ptr readModel(const std::string& model) { @@ -46,7 +48,7 @@ inline std::shared_ptr readModel(const std::string& model) { if (inputModel) return FE->convert(inputModel); - return nullptr; + OPENVINO_ASSERT(false, "Failed to read the model"); } } // namespace test diff --git a/src/frontends/paddle/tests/CMakeLists.txt b/src/frontends/paddle/tests/CMakeLists.txt index 2dc4d5e6724859..58b12313d424cf 100644 --- a/src/frontends/paddle/tests/CMakeLists.txt +++ b/src/frontends/paddle/tests/CMakeLists.txt @@ -36,7 +36,8 @@ endif() if(NOT EXIT_CODE EQUAL 0) set(paddlepaddle_FOUND OFF) - message(WARNING "Python requirement file ${PADDLE_REQ} is not installed, PaddlePaddle frontend unit tests will be skipped") + message(WARNING "Python requirement file ${PADDLE_REQ} is not installed, PaddlePaddle testing models weren't generated, + some tests will fail due models not found") else() set(paddlepaddle_FOUND ON) endif() diff --git a/src/frontends/tensorflow/tests/CMakeLists.txt b/src/frontends/tensorflow/tests/CMakeLists.txt index 89552e8deae6f7..f5f7e5a6817b29 100644 --- a/src/frontends/tensorflow/tests/CMakeLists.txt +++ b/src/frontends/tensorflow/tests/CMakeLists.txt @@ -30,7 +30,7 @@ ov_add_test_target( ov_check_pip_packages(REQUIREMENTS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" MESSAGE_MODE WARNING - WARNING_MESSAGE "TensorFlow frontend unit tests will be skipped" + WARNING_MESSAGE "TensorFlow testing models weren't generated, some tests will fail due models not found" RESULT_VAR tensorflow_FOUND) set(TEST_TENSORFLOW_MODELS_DIRNAME test_model_zoo/tensorflow_test_models) diff --git a/src/frontends/tensorflow_lite/tests/CMakeLists.txt b/src/frontends/tensorflow_lite/tests/CMakeLists.txt index 6202dc7fccd87a..278f7d313f0847 100644 --- a/src/frontends/tensorflow_lite/tests/CMakeLists.txt +++ b/src/frontends/tensorflow_lite/tests/CMakeLists.txt @@ -24,7 +24,7 @@ ov_add_test_target( ov_check_pip_packages(REQUIREMENTS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt" MESSAGE_MODE WARNING - WARNING_MESSAGE "TensorFlow Lite frontend unit tests will be skipped" + WARNING_MESSAGE "TensorFlow Lite testing models weren't generated, some tests will fail due models not found" RESULT_VAR tensorflow_FOUND) set(TEST_TENSORFLOW_LITE_MODELS_DIRNAME test_model_zoo/tensorflow_lite_test_models) From 847eb3f4f1c0e65db06897ed5ba166dd3692f5f2 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Mon, 11 Sep 2023 16:15:53 +0400 Subject: [PATCH 05/31] `TransposeLayerTest` to API2.0 (#19671) --- .../single_layer_tests/transpose.cpp | 76 ++++++++----------- .../include/single_op_tests/transpose.hpp | 15 ++++ .../single_op/transpose.hpp | 32 ++++++++ .../src/single_op/transpose.cpp | 54 +++++++++++++ 4 files changed, 131 insertions(+), 46 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/transpose.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/transpose.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/transpose.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/transpose.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/transpose.cpp index 85f9ffd95bfee6..bda97551dda42e 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/transpose.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/transpose.cpp @@ -4,57 +4,49 @@ #include -#include "single_layer_tests/transpose.hpp" +#include "single_op_tests/transpose.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +using ov::test::TransposeLayerTest; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I16, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::U8, +const std::vector model_types = { + ov::element::f32, + ov::element::f16, + ov::element::i64, + ov::element::i32, + ov::element::i16, + ov::element::i8, + ov::element::u8, }; -std::vector> inputShape2D = {{2, 10}, {10, 2}, {10, 10}}; -std::vector> order2D = {{0, 1}, {1, 0}}; +std::vector> input_shape_static_2D = {{{2, 10}}, {{10, 2}}, {{10, 10}}}; +std::vector> order_2D = {{0, 1}, {1, 0}}; INSTANTIATE_TEST_SUITE_P(smoke_Transpose2D, TransposeLayerTest, ::testing::Combine( - ::testing::ValuesIn(order2D), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShape2D), + ::testing::ValuesIn(order_2D), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shape_static_2D)), ::testing::Values(ov::test::utils::DEVICE_CPU)), TransposeLayerTest::getTestCaseName); -std::vector> inputShape4D = {{2, 2, 2, 2}, {1, 10, 2, 3}, {2, 3, 4, 5}}; -std::vector> order4D = { +std::vector> input_shape_static_4D = {{{2, 2, 2, 2}}, {{1, 10, 2, 3}}, {{2, 3, 4, 5}}}; +std::vector> order_4D = { {0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {0, 2, 3, 1}, {0, 3, 1, 2}, {0, 3, 2, 1}, {1, 0, 2, 3}, {1, 0, 3, 2}, {1, 2, 0, 3}, {1, 2, 3, 0}, {1, 3, 0, 2}, {1, 3, 2, 0}, {2, 0, 1, 3}, {2, 0, 3, 1}, {2, 1, 0, 3}, {2, 1, 3, 0}, {2, 3, 0, 1}, {2, 3, 1, 0}, {3, 0, 1, 2}, {3, 0, 2, 1}, {3, 1, 0, 2}, {3, 1, 2, 0}, {3, 2, 0, 1}, {3, 2, 1, 0}}; INSTANTIATE_TEST_SUITE_P(smoke_Transpose4D, TransposeLayerTest, ::testing::Combine( - ::testing::ValuesIn(order4D), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShape4D), + ::testing::ValuesIn(order_4D), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shape_static_4D)), ::testing::Values(ov::test::utils::DEVICE_CPU)), TransposeLayerTest::getTestCaseName); -std::vector> inputShape5D = {{2, 2, 2, 2, 2}, {1, 10, 2, 3, 4}, {2, 3, 4, 5, 6}}; -std::vector> order5D = { +std::vector> input_shape_static_5D = {{{2, 2, 2, 2, 2}}, {{1, 10, 2, 3, 4}}, {{2, 3, 4, 5, 6}}}; +std::vector> order_5D = { {0, 1, 2, 3, 4}, {1, 0, 2, 3, 4}, {4, 3, 2, 1, 0}, @@ -68,18 +60,14 @@ std::vector> order5D = { INSTANTIATE_TEST_SUITE_P(smoke_Transpose5D, TransposeLayerTest, ::testing::Combine( - ::testing::ValuesIn(order5D), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShape5D), + ::testing::ValuesIn(order_5D), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shape_static_5D)), ::testing::Values(ov::test::utils::DEVICE_CPU)), TransposeLayerTest::getTestCaseName); -std::vector> inputShape6D = {{2, 2, 2, 2, 2, 2}, {1, 10, 2, 3, 4, 5}, {2, 3, 4, 5, 6, 7}}; -std::vector> order6D = { +std::vector> input_shape_static_6D = {{{2, 2, 2, 2, 2, 2}}, {{1, 10, 2, 3, 4, 5}}, {{2, 3, 4, 5, 6, 7}}}; +std::vector> order_6D = { {0, 1, 2, 3, 4, 5}, {1, 0, 2, 3, 4, 5}, {5, 4, 3, 2, 1, 0}, @@ -93,13 +81,9 @@ std::vector> order6D = { INSTANTIATE_TEST_SUITE_P(smoke_Transpose6D, TransposeLayerTest, ::testing::Combine( - ::testing::ValuesIn(order6D), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::ValuesIn(inputShape6D), + ::testing::ValuesIn(order_6D), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shape_static_6D)), ::testing::Values(ov::test::utils::DEVICE_CPU)), TransposeLayerTest::getTestCaseName); diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/transpose.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/transpose.hpp new file mode 100644 index 00000000000000..c9d5448cd81c1c --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/transpose.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/transpose.hpp" + +namespace ov { +namespace test { +TEST_P(TransposeLayerTest, Inference) { + run(); +}; +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/transpose.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/transpose.hpp new file mode 100644 index 00000000000000..18b5068822c07c --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/transpose.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +typedef std::tuple< + std::vector, // Input order + ov::element::Type, // Model type + std::vector, // Input shapes + std::string // Target device name +> transposeParams; + +class TransposeLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/transpose.cpp b/src/tests/functional/shared_test_classes/src/single_op/transpose.cpp new file mode 100644 index 00000000000000..453692099a4756 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/transpose.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/transpose.hpp" + +namespace ov { +namespace test { +std::string TransposeLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + ov::element::Type modelType; + std::vector inputOrder; + std::vector input_shapes; + std::string targetDevice; + std::tie(inputOrder, modelType, input_shapes, targetDevice) = obj.param; + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < input_shapes.size(); i++) { + result << ov::test::utils::partialShape2str({input_shapes[i].first}) + << (i < input_shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < input_shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < input_shapes.size(); j++) { + result << ov::test::utils::vec2str(input_shapes[j].second[i]) << (j < input_shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "inputOrder=" << ov::test::utils::vec2str(inputOrder) << "_"; + result << "modelType=" << modelType.to_string() << "_"; + result << "trgDev=" << targetDevice; + return result.str(); +} + +void TransposeLayerTest::SetUp() { + std::vector input_order; + std::vector input_shapes; + ov::element::Type model_type; + std::tie(input_order, model_type, input_shapes, targetDevice) = this->GetParam(); + + init_input_shapes({input_shapes}); + + auto param = std::make_shared(model_type, inputDynamicShapes.front()); + + const auto in_order_shape = input_order.empty() ? ov::Shape({0}) : ov::Shape({inputDynamicShapes.front().size()}); + const auto input_order_const = std::make_shared(ov::element::i64, + in_order_shape, + input_order); + const auto transpose = std::make_shared(param, input_order_const); + const ov::ResultVector results{std::make_shared(transpose)}; + function = std::make_shared(results, ov::ParameterVector{param}, "Transpose"); +} +} // namespace test +} // namespace ov From a1cc5e66926227c42e8176c41b596fd8e748513c Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 11 Sep 2023 17:15:42 +0400 Subject: [PATCH 06/31] Resolve ARM CPU plugin illegal instruction on older Linux systems (like Ubuntu 18.04) (#19717) --- src/plugins/intel_cpu/thirdparty/onednn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index ae825539bd850d..a1aa20ca8f1946 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit ae825539bd850d1ad5c83d4bb0d56c65d46d5842 +Subproject commit a1aa20ca8f19465dc2fd18389953ed83798b2fd3 From 7e3e1e248090f78bbc05b5b47e3d4c725ad2e89a Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 11 Sep 2023 18:11:34 +0400 Subject: [PATCH 07/31] [GPU] Support of int8 compressed weights for matmul (#19548) --- .../op/fully_connected_compressed.hpp | 35 ++++ .../intel_gpu/plugin/primitives_list.hpp | 1 + .../intel_gpu/primitives/fully_connected.hpp | 48 +++++ .../intel_gpu/src/graph/fully_connected.cpp | 5 + .../prepare_primitive_fusing.cpp | 5 + .../src/graph/impls/ocl/fully_connected.cpp | 41 +++++ .../intel_gpu/src/graph/layout_optimizer.cpp | 4 + .../fully_connected_gpu_bf_tiled.cl | 86 +++++++-- .../fully_connected_gpu_bfyx_ref.cl | 35 +++- .../kernel_selector/kernel_selector_params.h | 2 + .../fully_connected_kernel_base.cpp | 29 ++- .../fully_connected_kernel_bf_tiled.cpp | 5 +- .../fully_connected_kernel_bfyx_ref.cpp | 1 + .../kernel_selector/weight_bias_params.cpp | 4 + .../src/kernel_selector/weight_bias_params.h | 5 + .../src/plugin/ops/fully_connected.cpp | 36 +++- .../intel_gpu/src/plugin/program_builder.cpp | 4 + .../convert_fc_to_compressed.cpp | 106 +++++++++++ .../convert_fc_to_compressed.hpp | 19 ++ .../op/fully_connected_compressed.cpp | 51 ++++++ .../src/plugin/transformations_pipeline.cpp | 16 +- .../test_cases/fully_connected_gpu_test.cpp | 170 ++++++++++++++++++ .../unit/test_cases/hash_key_gpu_test.cpp | 4 +- 23 files changed, 678 insertions(+), 34 deletions(-) create mode 100644 src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp create mode 100644 src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp new file mode 100644 index 00000000000000..6835088eb88967 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "fully_connected.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { + +class FullyConnectedCompressed : public FullyConnected { +public: + OPENVINO_OP("FullyConnectedCompressed", "gpu_opset"); + + FullyConnectedCompressed() = default; + + FullyConnectedCompressed(const ov::Output &A, + const ov::Output &B, + const ov::Output &decompression_scale, + const ov::Output &decompression_zero_point, + const ov::element::Type output_type = ov::element::undefined); + + FullyConnectedCompressed(const ov::Output &A, + const ov::Output &B, + const ov::Output &decompression_scale, + const ov::element::Type output_type = ov::element::undefined); + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; +}; + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 5fd423e8e7b4be..c36997481aa747 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -262,3 +262,4 @@ REGISTER_FACTORY(internal, GenerateProposalsIEInternal); REGISTER_FACTORY(internal, NmsStaticShapeIE8); REGISTER_FACTORY(internal, MulticlassNmsIEInternal); REGISTER_FACTORY(internal, FullyConnected); +REGISTER_FACTORY(internal, FullyConnectedCompressed); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp index d71afd76f1f4c4..5e5220d80e337b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp @@ -76,10 +76,43 @@ struct fully_connected : public primitive_base { weights_rank(weights_rank) {} + /// @brief Constructs fully connected compressed layer. + /// @param id This primitive id. + /// @param input Input primitive id. + /// @param weights Primitive id containing weights data. + /// @param bias Primitive id containing bias data. + /// @param compression_scale Primitive id containing scale factors for weights decompression. + /// @param compression_zero_point Primitive id containing zero points for weights decompression. + fully_connected(const primitive_id& id, + const input_info& input, + const primitive_id& weights, + const primitive_id& bias, + const primitive_id& decompression_scale, + const primitive_id& decompression_zero_point, + const data_types data_type, + const padding& output_padding = padding(), + const size_t input_size = 2, + const size_t weights_rank = 2) + : primitive_base(id, { input }, {output_padding}, {optional_data_type{data_type}}), + weights(weights), + bias(bias), + compressed_weights(true), + decompression_scale(decompression_scale), + decompression_zero_point(decompression_zero_point), + input_size(input_size), + weights_rank(weights_rank) { + OPENVINO_ASSERT(!decompression_scale.empty(), "[GPU] Compressed fully connected requires at least decompression scale input"); + } + /// @brief Primitive id containing weights data. primitive_id weights; /// @brief Primitive id containing bias data. primitive_id bias; + + bool compressed_weights = false; + primitive_id decompression_scale = ""; + primitive_id decompression_zero_point = ""; + /// @brief Primitive dimension size. size_t input_size = 2; /// @brief Primitive weights rank. @@ -90,6 +123,9 @@ struct fully_connected : public primitive_base { seed = hash_combine(seed, input_size); seed = hash_combine(seed, weights_rank); seed = hash_combine(seed, bias.empty()); + seed = hash_combine(seed, compressed_weights); + seed = hash_combine(seed, !decompression_scale.empty()); + seed = hash_combine(seed, !decompression_zero_point.empty()); return seed; } @@ -108,6 +144,9 @@ struct fully_connected : public primitive_base { primitive_base::save(ob); ob << weights; ob << bias; + ob << compressed_weights; + ob << decompression_scale; + ob << decompression_zero_point; ob << input_size; ob << weights_rank; } @@ -116,6 +155,9 @@ struct fully_connected : public primitive_base { primitive_base::load(ib); ib >> weights; ib >> bias; + ib >> compressed_weights; + ib >> decompression_scale; + ib >> decompression_zero_point; ib >> input_size; ib >> weights_rank; } @@ -128,6 +170,12 @@ struct fully_connected : public primitive_base { if (!bias.empty()) ret.push_back(bias); + if (!decompression_scale.empty()) + ret.push_back(decompression_scale); + + if (!decompression_zero_point.empty()) + ret.push_back(decompression_zero_point); + return ret; } }; diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index 92933598579126..38d10a5e35f071 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -217,6 +217,11 @@ std::string fully_connected_inst::to_string(fully_connected_node const& node) { json_composite fc_info; fc_info.add("weights id", weights_id); fc_info.add("bias id", bias_id); + fc_info.add("compressed weights", desc->compressed_weights ? "true" : "false"); + if (desc->compressed_weights) { + fc_info.add("decompression scale id", desc->decompression_scale); + fc_info.add("decompression zp id", desc->decompression_zero_point); + } node_info->add("fully connected info", fc_info); node_info->dump(primitive_description); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 50125f1c70b043..f7b3fd2a08ffe8 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -466,6 +466,11 @@ void prepare_primitive_fusing::fuse_bias(program &p) { desc->output_paddings[0], desc->input_size); + if (desc->compressed_weights) { + fc_with_bias_prim->compressed_weights = true; + fc_with_bias_prim->decompression_scale = desc->decompression_scale; + fc_with_bias_prim->decompression_zero_point = desc->decompression_zero_point; + } auto& new_fc_node = p.get_or_create(fc_with_bias_prim); fuse_bias_f(fc, new_fc_node, bias_node, eltw_node); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index f6c4e19133edaf..18b729caf68e41 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -26,10 +26,19 @@ struct fully_connected_impl : typed_primitive_impl_ocl { protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args = parent::get_arguments(instance); + const auto& desc = instance.get_typed_desc(); args.weights = instance.weights_memory(); args.bias = instance.bias_term() ? instance.bias_memory() : nullptr; + args.inputs = { instance.input_memory_ptr(0) }; + size_t in_id = instance.bias_term() ? 3 : 2; + if (!desc->decompression_scale.empty()) + args.inputs.push_back(instance.dep_memory_ptr(in_id++)); + + if (!desc->decompression_zero_point.empty()) + args.inputs.push_back(instance.dep_memory_ptr(in_id)); + return args; } @@ -72,6 +81,27 @@ struct fully_connected_impl : typed_primitive_impl_ocl { std::vector layouts{input0_layout, input1_layout}; + bool has_zp = !primitive->decompression_zero_point.empty(); + bool has_scale = !primitive->decompression_scale.empty(); + + size_t offset = primitive->bias.empty() ? 2 : 3; + const auto& weights_pshape = input1_layout.get_partial_shape(); + if (has_scale) { + auto scale_layout = input_layouts[offset++]; + if (input1_pshape.size() != 2) { + scale_layout.set_partial_shape(reshape_to_2d(scale_layout.get_partial_shape(), weights_pshape[0], primitive->weights_rank)); + } + layouts.push_back(scale_layout); + } + + if (has_zp) { + auto zp_layout = input_layouts[offset]; + if (input1_pshape.size() != 2) { + zp_layout.set_partial_shape(reshape_to_2d(zp_layout.get_partial_shape(), weights_pshape[0], primitive->weights_rank)); + } + layouts.push_back(zp_layout); + } + return layouts; }; @@ -105,6 +135,17 @@ struct fully_connected_impl : typed_primitive_impl_ocl { auto optional_params = get_default_weights_bias_optional_params(progam); optional_params.allowInputReordering = true; + bool commpressed = !primitive->decompression_scale.empty(); + bool with_zp = !primitive->decompression_zero_point.empty(); + if (commpressed) { + params.compressed = true; + params.decompression_scale = convert_data_tensor(input_layouts[2]); + if (with_zp) { + params.has_decompression_zp = true; + params.decompression_zero_point = convert_data_tensor(input_layouts[3]); + } + } + if (primitive->input_size != 3) params.outputs = { params.outputs[0].FlattenFeatureAndSpatials() }; diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index d5b762b7d2160a..63e1b04e5dbf91 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -872,6 +872,10 @@ static bool is_node_for_onednn(deconvolution_node const& node) { static bool is_node_for_onednn(fully_connected_node const& node) { auto fc_prim = node.get_primitive(); + // onednn impl doesn't support compressed weights for now + if (fc_prim->compressed_weights) + return false; + auto output_layout = node.get_output_layout(); auto ps = output_layout.get_partial_shape(); size_t non_spatial_count = 2 + (fc_prim->input_size == 3 ? 1 : 0); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index 62f8548514f603..d4992801a80447 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -39,14 +39,15 @@ #endif // Macros for vectorized types. -#define INPUT_VEC_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, TILE_IFM) -#define ACCUMULATOR_VEC_TYPE MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, TILE_OFM) -#define FILTER_VEC_TYPE MAKE_VECTOR_TYPE(FILTER_TYPE, TILE_K_OFM) -#define BIAS_VEC_TYPE MAKE_VECTOR_TYPE(BIAS_TYPE, TILE_OFM) -#define OUTPUT_VEC_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, TILE_OFM) -#define ACTIVATION_VEC_TYPE MAKE_VECTOR_TYPE(ACTIVATION_TYPE, TILE_OFM) -#define TO_OUTPUT_VEC_TYPE(x) CAT(convert_, OUTPUT_VEC_TYPE)(x) -#define TO_ACTIVATION_VEC_TYPE(x) CAT(convert_, ACTIVATION_VEC_TYPE)(x) +#define INPUT_VEC_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, TILE_IFM) +#define ACCUMULATOR_VEC_TYPE MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, TILE_OFM) +#define FILTER_VEC_TYPE MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, TILE_K_OFM) +#define BIAS_VEC_TYPE MAKE_VECTOR_TYPE(BIAS_TYPE, TILE_OFM) +#define OUTPUT_VEC_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, TILE_OFM) +#define ACTIVATION_VEC_TYPE MAKE_VECTOR_TYPE(ACTIVATION_TYPE, TILE_OFM) +#define TO_OUTPUT_VEC_TYPE(x) CAT(convert_, OUTPUT_VEC_TYPE)(x) +#define TO_ACTIVATION_VEC_TYPE(x) CAT(convert_, ACTIVATION_VEC_TYPE)(x) +#define TO_FILTER_VEC_TYPE(x) CAT(convert_, FILTER_VEC_TYPE)(x) #define INPUT_BLOCK_READ(ptr, offset) BLOCK_READN(INPUT0_TYPE, TILE_IFM, ptr, offset) #define FILTER_BLOCK_READ(ptr, offset) BLOCK_READN(FILTER_TYPE, TILE_K_OFM, ptr, offset) @@ -81,6 +82,12 @@ REQD_SUB_GROUP_SIZE(SIMD) KERNEL(fc)( OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input, +#if DECOMPRESSION_SCALE_TERM + const __global DECOMPRESSION_SCALE_TYPE* decompression_scale, +#endif +#if DECOMPRESSION_ZP_TERM + const __global DECOMPRESSION_ZP_TYPE* decompression_zp, +#endif __global OUTPUT_TYPE* output, const __global FILTER_TYPE* weights #if BIAS_TERM @@ -113,13 +120,48 @@ KERNEL(fc)( uint input_offset = out_b * TILE_IN_B_PITCH + INPUT0_OFFSET; uint weights_offset = out_f * INPUT_ELEMENTS_COUNT; +#if COMPRESSED_WEIGHTS + #if DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % SIMD == 0 + ACCUMULATOR_VEC_TYPE d_scale = BLOCK_READN(ACCUMULATOR_TYPE, TILE_OFM, decompression_scale, out_f); + #elif DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % SIMD != 0 + ACCUMULATOR_VEC_TYPE d_scale = 0; + unroll_for(uint of = 0; of < TILE_OFM; ++of) { + uint offset = out_f + of*SIMD + get_sub_group_local_id(); + if (offset < DECOMPRESSION_SCALE_LENGTH) + ((ACCUMULATOR_TYPE*)(&d_scale))[of] = decompression_scale[offset]; + } + #else + ACCUMULATOR_VEC_TYPE d_scale = decompression_scale[0]; + #endif + + #if !DECOMPRESSION_ZP_TERM + ACCUMULATOR_VEC_TYPE d_zp = 0; + #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD == 0 + ACCUMULATOR_VEC_TYPE d_zp = BLOCK_READN(ACCUMULATOR_TYPE, TILE_OFM, decompression_zp, out_f); + #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD != 0 + ACCUMULATOR_VEC_TYPE d_zp = 0; + unroll_for(uint of = 0; of < TILE_OFM; ++of) { + uint offset = out_f + of*SIMD + get_sub_group_local_id(); + if (offset < DECOMPRESSION_ZP_LENGTH) + ((ACCUMULATOR_TYPE*)(&d_zp))[of] = decompression_zp[offset]; + } + #else + ACCUMULATOR_VEC_TYPE d_zp = decompression_zp[0]; + #endif + + ACCUMULATOR_TYPE* ds = (ACCUMULATOR_TYPE*)(&d_scale); + ACCUMULATOR_TYPE* dzp = (ACCUMULATOR_TYPE*)(&d_zp); +#endif + #if REALIGN_FP16_OFFSET // For fp16 we need to ensure that all block reads are aligned to 4 byte (2 words) boundary. // To do this solve first input feature separately. { INPUT0_TYPE tmp_input = input[input_offset + get_sub_group_local_id() % TILE_B * TILE_IN_B_PITCH]; - MAKE_VECTOR_TYPE(FILTER_TYPE, TILE_OFM) tmp_wei = BLOCK_READN(FILTER_TYPE, TILE_OFM, weights, weights_offset); - + ACCUMULATOR_VEC_TYPE tmp_wei = TO_ACCUMULATOR_VEC_TYPE(BLOCK_READN(FILTER_TYPE, TILE_OFM, weights, weights_offset)); + #if COMPRESSED_WEIGHTS + tmp_wei = (tmp_wei - d_zp) * d_scale; + #endif unroll_for(uint bi = 0; bi < TILE_B; ++bi) { acc[bi] = _sub_group_shuffle(tmp_input, bi) * tmp_wei; } @@ -146,7 +188,15 @@ KERNEL(fc)( // but significantly degrades readability and generality of code. // It doesn't also show noticable performance improvement on tested configurations. unroll_for(uint ki = 0; ki < (TILE_IFM * SIMD) / TILE_K; ++ki) { - wei = FILTER_BLOCK_READ(weights, weights_offset); + wei = TO_FILTER_VEC_TYPE(FILTER_BLOCK_READ(weights, weights_offset)); + #if COMPRESSED_WEIGHTS + ACCUMULATOR_TYPE* w = (ACCUMULATOR_TYPE*)(&wei); + unroll_for(uint kii = 0; kii < TILE_K; ++kii) { + unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { + w[kii * TILE_OFM + fi] = (w[kii * TILE_OFM + fi] - dzp[fi]) * ds[fi]; + } + } + #endif weights_offset += TILE_K_OFM * SIMD; unroll_for (uint kii = 0; kii < TILE_K; ++kii) { @@ -154,7 +204,7 @@ KERNEL(fc)( unroll_for (uint bi = 0; bi < TILE_B; ++bi) { INPUT0_TYPE in_val = _sub_group_shuffle(((INPUT0_TYPE*)(&in_0[bi]))[total_k / SIMD], total_k % SIMD); unroll_for (uint fi = 0; fi < TILE_OFM; ++fi) { - ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += in_val * ((FILTER_TYPE*)(&wei))[kii * TILE_OFM + fi]; + ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += in_val * ((ACCUMULATOR_TYPE*)(&wei))[kii * TILE_OFM + fi]; } } } @@ -175,7 +225,15 @@ KERNEL(fc)( #undef LOAD_IN_0 input_offset += TILE_IFM * SIMD - TILE_IN_B_PITCH * TILE_B; unroll_for(uint ki = 0; ki < CEIL_DIV(LEFTOVER_IFM, TILE_K); ++ki) { - wei = FILTER_BLOCK_READ(weights, weights_offset); + wei = TO_FILTER_VEC_TYPE(FILTER_BLOCK_READ(weights, weights_offset)); + #if COMPRESSED_WEIGHTS + ACCUMULATOR_TYPE* w = (ACCUMULATOR_TYPE*)(&wei); + unroll_for(uint kii = 0; kii < TILE_K; ++kii) { + unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { + w[kii * TILE_OFM + fi] = (w[kii * TILE_OFM + fi] - dzp[fi]) * ds[fi]; + } + } + #endif weights_offset += TILE_K_OFM * SIMD; unroll_for (uint kii = 0; kii < TILE_K; ++kii) { @@ -184,7 +242,7 @@ KERNEL(fc)( const uint total_k = ki * TILE_K + kii; if (total_k < LEFTOVER_IFM) { INPUT0_TYPE in_val = _sub_group_shuffle(((INPUT0_TYPE*)(&in_0[bi]))[total_k / SIMD], total_k % SIMD); - ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += in_val * ((FILTER_TYPE*)(&wei))[kii * TILE_OFM + fi]; + ((ACCUMULATOR_TYPE*)(&acc[bi]))[fi] += in_val * ((ACCUMULATOR_TYPE*)(&wei))[kii * TILE_OFM + fi]; } } } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl index a8adbe42c80409..72e8d6d7d3d855 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl @@ -8,6 +8,12 @@ KERNEL(fc)( OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input, +#if DECOMPRESSION_SCALE_TERM + const __global DECOMPRESSION_SCALE_TYPE* decompression_scale, +#endif +#if DECOMPRESSION_ZP_TERM + const __global DECOMPRESSION_ZP_TYPE* decompression_zp, +#endif __global OUTPUT_TYPE* output, const __global FILTER_TYPE* weights #if BIAS_TERM @@ -31,7 +37,19 @@ KERNEL(fc)( { const uint input0_idx = INPUT0_GET_INDEX(b, ofm, y, x); const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); - dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); + #if COMPRESSED_WEIGHTS + ACCUMULATOR_TYPE filter_compressed = TO_ACCUMULATOR_TYPE(weights[filter_idx]); + #if DECOMPRESSION_ZP_TERM + ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[DECOMPRESSION_ZP_GET_INDEX_SAFE(0, oym, 0, 0)]); + #else + ACCUMULATOR_TYPE zp = ACCUMULATOR_VAL_ZERO; + #endif + DECOMPRESSION_SCALE_TYPE scale = decompression_scale[DECOMPRESSION_SCALE_GET_INDEX_SAFE(0, oym, 0, 0)]; + ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - TO_ACCUMULATOR_TYPE(zp)) * scale; + dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(filter_val); + #else + dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); + #endif } } @@ -50,7 +68,20 @@ KERNEL(fc)( { const uint input0_idx = INPUT0_GET_INDEX(b, ifm, y, x); const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); - dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); + #if COMPRESSED_WEIGHTS + FILTER_TYPE filter_compressed = weights[filter_idx]; + #if DECOMPRESSION_ZP_TERM + ACCUMULATOR_TYPE zp = decompression_zp[DECOMPRESSION_ZP_GET_INDEX_SAFE(0, ofm, 0, 0)]; + #else + ACCUMULATOR_TYPE zp = ACCUMULATOR_VAL_ZERO; + #endif + + DECOMPRESSION_SCALE_TYPE scale = decompression_scale[DECOMPRESSION_SCALE_GET_INDEX_SAFE(0, ofm, 0, 0)]; + ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - TO_ACCUMULATOR_TYPE(zp)) * scale; + dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(filter_val); + #else + dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); + #endif } } } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h index 5c71c907b74c33..187b6c5e2ccc13 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h @@ -130,6 +130,7 @@ class ParamsKey { uint32_t asym_w_quantization : 1; uint32_t asym_d_quantization : 1; uint32_t dynamic_shapes : 1; + uint32_t compressed_weights : 1; union dedicated_t { struct argm_t { @@ -318,6 +319,7 @@ class ParamsKey { void EnablePoolRemainder(PoolRemainder r); void EnablePoolDilation() { key.restrict.val.dedicated.pooling.dilation = 1; } void EnablePoolIndicesOutput() { key.restrict.val.dedicated.pooling.indices_output = 1; } + void EnableWeightsCompression() { key.restrict.val.compressed_weights = 1; } void EnableQuantization(QuantizationType q); void EnablePositionSensitivePooling() { key.restrict.val.dedicated.pooling.position_sensitive = 1; } void EnableDilation() { key.restrict.val.dedicated.conv.dilation = 1; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp index 0ee4792c08cac2..3e9eb35cdaaff0 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp @@ -21,6 +21,17 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par const auto x_size = input.LogicalSize() / input.Batch().v; jit.AddConstant(MakeJitConstant("INPUT0_ELEMENTS_COUNT", x_size)); } + + if (params.compressed) { + jit.AddConstants({MakeJitConstant("COMPRESSED_WEIGHTS", 1)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_TERM", 1)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE", params.decompression_scale)}); + if (params.has_decompression_zp) { + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_TERM", 1)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP", params.decompression_zero_point)}); + } + } + return jit; } @@ -93,11 +104,11 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params ¶ms, auto cldnn_jit = GetJitConstants(newParams, dispatchData); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); - uint32_t fused_deps_total = 0; - for (auto& fused_dep : newParams.fused_ops) { - for (int i = 0; i < static_cast(fused_dep.dep_size); i++) { - fused_deps_total++; - } + int inputs_count = 1; + if (newParams.compressed) { + inputs_count++; + if (newParams.has_decompression_zp) + inputs_count++; } auto& kernel = kd.kernels[0]; @@ -110,8 +121,8 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params ¶ms, exeMode, true, !orgParams.bias.empty(), - 1, - fused_deps_total, + inputs_count, + GetFusedPrimitiveInputsCount(params), 1, orgParams.outputs[0].is_dynamic()); @@ -176,10 +187,10 @@ Datatype FullyConnectedKernelBase::GetAccumulatorType(const fully_connected_para return Datatype::INT32; // If we either weights or input is quantized, then we use fp32 accumulator to avoid fp16 overflow - if (quantized_inputs || quantized_weights) + if ((quantized_inputs || quantized_weights) && !params.compressed) return Datatype::F32; - return params.inputs[0].GetDType(); + return in_dt; } Datatype FullyConnectedKernelBase::GetActivationType(const fully_connected_params& params) const { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index d7f93eccae523f..6b0407f6580cad 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -52,6 +52,7 @@ ParamsKey FullyConnected_bf_tiled::GetSupportedKey() const { k.EnableDifferentTypes(); k.EnableDifferentInputWeightsTypes(); k.EnableDynamicShapesSupport(); + k.EnableWeightsCompression(); return k; } @@ -200,7 +201,9 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, while (max_tile_ofm * 2 * simd <= output_f && max_tile_ofm < 4) max_tile_ofm *= 2; - if (params.is_shape_agnostic) { + if (params.compressed && params.engineInfo.supports_immad) { + return selector.Default(tune_params(1, 1, 1, 4, 1, 1, EXE_MODE_DEFAULT)); + } else if (params.is_shape_agnostic) { // Use special tuning params for Gen12HP dGPUs, since these parameters demonstrate higher performance // due to better HW utilization (reduced TILE_OFM parameter) and better assembler kernel's code // generation (extended TILE_K parameter) for both FP16 and FP32 data types diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp index f112d03dee3777..9b08e67245258a 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp @@ -36,6 +36,7 @@ ParamsKey FullyConnected_bfyx_Ref::GetSupportedKey() const { k.EnableBatching(); k.EnableQuantization(QuantizationType::SYMMETRIC); k.EnableDynamicShapesSupport(); + k.EnableWeightsCompression(); return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.cpp b/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.cpp index 86d5ed51c1d4e8..ecda4bfab295d3 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.cpp @@ -24,6 +24,10 @@ ParamsKey weight_bias_params::GetParamsKey() const { k.EnableBiasPerOutput(); } + if (compressed) { + k.EnableWeightsCompression(); + } + return k; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.h b/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.h index 16db27f5cb5c2b..ad1c03cb8a7298 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.h +++ b/src/plugins/intel_gpu/src/kernel_selector/weight_bias_params.h @@ -17,6 +17,11 @@ struct weight_bias_params : public base_params { WeightsTensor weights; MultiDataTensor bias; + bool compressed = false; + bool has_decompression_zp = false; + DataTensor decompression_scale; + DataTensor decompression_zero_point; + ParamsKey GetParamsKey() const override; }; diff --git a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp index c6f957afc97f59..2c2d42b842d696 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp @@ -6,16 +6,17 @@ #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/op/fully_connected.hpp" +#include "intel_gpu/op/fully_connected_compressed.hpp" #include "intel_gpu/primitives/fully_connected.hpp" #include "intel_gpu/primitives/reshape.hpp" #include "intel_gpu/primitives/reorder.hpp" - namespace ov { namespace op { namespace internal { using FullyConnected = ov::intel_gpu::op::FullyConnected; +using FullyConnectedCompressed = ov::intel_gpu::op::FullyConnectedCompressed; } // namespace internal } // namespace op } // namespace ov @@ -23,13 +24,37 @@ using FullyConnected = ov::intel_gpu::op::FullyConnected; namespace ov { namespace intel_gpu { +static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {3, 4}); + auto inputs = p.GetInputInfo(op); + std::string primitive_name = layer_type_name_ID(op); + + auto input_name = inputs[0].pid; + auto weights_name = inputs[1].pid; + auto scale_name = inputs[2].pid; + auto zp_name = inputs.size() == 4 ? inputs[3].pid : ""; + + auto fc = cldnn::fully_connected(primitive_name, + cldnn::input_info(input_name), + weights_name, + "", + scale_name, + zp_name, + cldnn::element_type_to_data_type(op->get_output_element_type(0)), + cldnn::padding(), + op->get_input_partial_shape(0).size(), + op->get_input_partial_shape(1).size()); + + p.add_primitive(*op, fc); +} + static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptr& op) { validate_inputs_count(op, {2}); auto inputs = p.GetInputInfo(op); std::string layerName = layer_type_name_ID(op); - auto inputName = inputs[0].pid; - auto weightsName = inputs[1].pid; + auto input_name = inputs[0].pid; + auto weights_name = inputs[1].pid; auto shape_a = op->get_input_partial_shape(0); auto shape_b = op->get_input_partial_shape(1); @@ -38,8 +63,8 @@ static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptrget_output_element_type(0)), cldnn::padding(), @@ -78,6 +103,7 @@ static void CreateFullyConnectedOp(ProgramBuilder& p, const std::shared_ptr @@ -556,6 +557,9 @@ bool ProgramBuilder::requires_new_shape_infer(const ov::Node& op) const { return true; } + if (ov::is_type(&op)) + return true; + for (size_t i = 0; i < op.get_output_size(); i++) { if (op.get_output_partial_shape(i).size() > 6) return true; diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp new file mode 100644 index 00000000000000..a1c4d60b81977c --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "convert_fc_to_compressed.hpp" + +#include "intel_gpu/op/fully_connected.hpp" +#include "intel_gpu/op/fully_connected_compressed.hpp" + +#include "openvino/op/subtract.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov { +namespace intel_gpu { + +ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed() { + using namespace ov::pass::pattern; + + auto weights_m = wrap_type(consumers_count(1)); + auto convert_m = wrap_type({weights_m}); + + auto sub_const_m = wrap_type(consumers_count(1)); + auto subtract_m = wrap_type({convert_m, sub_const_m}); + + auto mul_const_m = wrap_type(consumers_count(1)); + auto mul_with_sub_m = wrap_type({subtract_m, mul_const_m}); + auto mul_no_sub_m = wrap_type({convert_m, mul_const_m}); + auto mul_m = std::make_shared(OutputVector{mul_with_sub_m, mul_no_sub_m}); + + auto transpose_const_m = wrap_type(); + auto transpose_m = wrap_type({mul_m, transpose_const_m}); + auto weights_input_m = std::make_shared(ov::OutputVector{mul_m, transpose_m}); + + auto data_m = any_input(); + auto fully_connected_m = wrap_type({data_m, weights_input_m}); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + OPENVINO_ASSERT(pattern_map.count(fully_connected_m)); + OPENVINO_ASSERT(pattern_map.count(mul_const_m)); + OPENVINO_ASSERT(pattern_map.count(weights_m)); + OPENVINO_ASSERT(pattern_map.count(convert_m)); + auto fc = std::dynamic_pointer_cast(pattern_map.at(fully_connected_m).get_node_shared_ptr()); + if (!fc || transformation_callback(fc)) { + return false; + } + + const auto& fc_input_a = fc->get_input_node_shared_ptr(0); + const auto& scale = pattern_map.at(mul_const_m).get_node_shared_ptr(); + std::shared_ptr optional_zero_point = nullptr; + + ov::NodeVector nodes_to_copy_info{pattern_map.at(fully_connected_m).get_node_shared_ptr(), + pattern_map.at(convert_m).get_node_shared_ptr()}; + if (pattern_map.count(mul_no_sub_m)) { + nodes_to_copy_info.push_back(pattern_map.at(mul_no_sub_m).get_node_shared_ptr()); + } + if (pattern_map.count(mul_with_sub_m)) { + nodes_to_copy_info.push_back(pattern_map.at(mul_with_sub_m).get_node_shared_ptr()); + } + + const bool with_zero_point = pattern_map.count(subtract_m) > 0; + if (with_zero_point) { + optional_zero_point = pattern_map.at(sub_const_m).get_node_shared_ptr(); + nodes_to_copy_info.push_back(subtract_m); + } + + std::shared_ptr fc_input_b = pattern_map.at(weights_m).get_node_shared_ptr(); + if (pattern_map.count(transpose_m)) { + const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); + const auto& transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr(); + fc_input_b = transpose->clone_with_new_inputs({ fc_input_b->output(0), transpose_const }); + } + + std::shared_ptr new_fc = nullptr; + if (with_zero_point) { + new_fc = std::make_shared(fc_input_a, + fc_input_b, + scale, + optional_zero_point, + fc->get_output_type()); + } else { + new_fc = std::make_shared(fc_input_a, + fc_input_b, + scale, + fc->get_output_type()); + } + + new_fc->set_friendly_name(fc->get_friendly_name()); + ov::copy_runtime_info(nodes_to_copy_info, new_fc); + ov::replace_node(fc, new_fc); + return true; + }; + + auto m = std::make_shared(fully_connected_m); + this->register_matcher(m, callback); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp new file mode 100644 index 00000000000000..16e784fde6710f --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_gpu { + +class ConvertFullyConnectedToFullyConnectedCompressed: public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedCompressed", "0"); + ConvertFullyConnectedToFullyConnectedCompressed(); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp new file mode 100644 index 00000000000000..1ecfc1e21081b5 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp @@ -0,0 +1,51 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/op/fully_connected_compressed.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { + +FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output& A, + const ov::Output& B, + const ov::Output& decompression_scale, + const ov::Output& decompression_zero_point, + const ov::element::Type output_type) + : FullyConnected(A, B, output_type) { + set_argument(2, decompression_scale); + set_argument(3, decompression_zero_point); + validate_and_infer_types(); +} + +FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output& A, + const ov::Output& B, + const ov::Output& decompression_scale, + const ov::element::Type output_type) + : FullyConnected(A, B, output_type) { + set_argument(2, decompression_scale); + validate_and_infer_types(); +} + +std::shared_ptr FullyConnectedCompressed::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + + if (new_args.size() == 3) + return std::make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + m_output_type); + else if (new_args.size() == 4) + return std::make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + m_output_type); + else + OPENVINO_THROW("Unexpected inputs count for FullyConnectedCompressed op: ", new_args.size()); +} + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index e0515ecdc2af26..d10eb959395b30 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -35,6 +35,7 @@ #include "openvino/pass/constant_folding.hpp" #include "openvino/core/deprecated.hpp" +#include "openvino/pass/visualize_tree.hpp" #include "transformations/einsum_decomposition.hpp" #include "transformations/convert_pooling_to_reduce.hpp" #include "transformations/decompose_reduce_for_false_keepdims.hpp" @@ -46,6 +47,7 @@ #include "transformations/control_flow/unroll_tensor_iterator.hpp" #include "transformations/resolve_names_collisions.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" #include "transformations/common_optimizations/common_optimizations.hpp" #include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" @@ -55,6 +57,7 @@ #include "transformations/common_optimizations/transpose_sinking.hpp" #include "transformations/common_optimizations/softmax_fusion.hpp" #include "transformations/common_optimizations/mvn_fusion.hpp" +#include "transformations/common_optimizations/compress_float_constants.hpp" #include "transformations/op_conversions/convert_depth_to_space.hpp" #include "transformations/op_conversions/convert_space_to_depth.hpp" @@ -106,6 +109,7 @@ #include "plugin/transformations/convert_matmul_to_fc.hpp" #include "plugin/transformations/move_fc_reshape_to_weights.hpp" +#include "plugin/transformations/convert_fc_to_compressed.hpp" #include "transformations/low_precision/mark_dequantization_subgraph.hpp" #include "low_precision/pull_reshape_through_dequantization.hpp" @@ -147,6 +151,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); { ov::pass::Manager manager; + auto pass_config = manager.get_pass_config(); manager.set_per_pass_validation(false); enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func); @@ -213,6 +218,15 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // decompose MVNs that sre not supported in GPU, so that they will be marked as precision sensitive in ConvertPrecision manager.register_pass(); + auto is_matmul_output = [](const_node_ptr &node) -> bool { + const auto outputs = node->get_output_target_inputs(0); + return !is_type(outputs.begin()->get_node()); + }; + + manager.register_pass(); + manager.register_pass(ov::element::TypeVector{ov::element::u8}, true); + pass_config->set_callback(is_matmul_output); + const bool keep_precision_sensitive_in_fp32_1 = true; manager.register_pass(fp_convert_precision_map, empty_fuse_map, @@ -269,7 +283,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(int_convert_precision_map); - auto pass_config = manager.get_pass_config(); pass_config->disable(); // disable conversion to legacy and use the new mixed precision @@ -614,6 +627,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { ov::pass::Manager manager; manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.run_passes(func); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index b483cdfef20585..89f3598f2d421f 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -2,7 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/layout.hpp" +#include "openvino/core/partial_shape.hpp" #include "test_utils.h" +#include "float16.h" #include "random_generator.hpp" #include "network_test.h" #include @@ -656,6 +660,172 @@ TEST(fully_connected_gpu, x_f32_relu) { ASSERT_EQ(0.00f, output_ptr[3]); } +TEST(fully_connected_gpu, compressed_scale_zp_bias) { + auto& engine = get_test_engine(); + + auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f32, format::bfyx }); + auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto zp_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + + set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, + 0.5f, -2.0f, -0.5f, -1.0f }); + set_values(weights_mem, { 1.5f, 1.0f, 0.5f, -1.0f, + 0.0f, 0.5f, 0.5f, -0.5f, + -2.0f, -0.5f, 1.0f, 1.5f, + -2.0f, -0.5f, 1.0f, 1.5f, + 2.0f, 0.5f, -1.0f, -1.5f, + 2.0f, 0.5f, -1.0f, -1.5f, + -1.5f, -1.0f, -0.5f, 1.0f, + 0.0f, -0.5f, 0.5f, 0.5f }); + + set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 2.0f }); + set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 2.0f }); + set_values(zp_mem, { 1.0f, 2.0f, 2.0f, 1.0f, 4.0f, 1.0f, 6.0f, 2.0f }); + + topology topology( + input_layout("input", input_mem->get_layout()), + data("weights", weights_mem), + data("bias", bias_mem), + data("scale", scale_mem), + data("zp", zp_mem), + fully_connected("fc_prim", input_info("input"), "weights", "bias", "scale", "zp", data_types::f32, padding(), 3, 2) + ); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + network network(engine, topology, config); + network.set_input_data("input", input_mem); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc_prim"); + + auto output_mem = outputs.begin()->second.get_memory(); + + cldnn::mem_lock output_ptr (output_mem, get_test_stream()); + + ov::PartialShape expected_shape{1, 2, 8}; + ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); + + std::vector expected_result = {-4.0f, -23.0f, 11.0f, 0.0f, -2.0f, -3.5f, -30.0f, -10.5f, + 6.0f, 19.0f, -5.0f, -8.0f, 12.0f, -8.5f, 44.0f, 14.5f}; + + for (size_t i = 0; i < expected_result.size(); i++) { + ASSERT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; + } +} + +TEST(fully_connected_gpu, compressed_scale_bias) { + auto& engine = get_test_engine(); + + auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f32, format::bfyx }); + auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + + set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, + 0.5f, -2.0f, -0.5f, -1.0f }); + set_values(weights_mem, { 1.5f, 1.0f, 0.5f, -1.0f, + 0.0f, 0.5f, 0.5f, -0.5f, + -2.0f, -0.5f, 1.0f, 1.5f, + -2.0f, -0.5f, 1.0f, 1.5f, + 2.0f, 0.5f, -1.0f, -1.5f, + 2.0f, 0.5f, -1.0f, -1.5f, + -1.5f, -1.0f, -0.5f, 1.0f, + 0.0f, -0.5f, 0.5f, 0.5f }); + + set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }); + set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 1.0f }); + + topology topology( + input_layout("input", input_mem->get_layout()), + data("weights", weights_mem), + data("bias", bias_mem), + data("scale", scale_mem), + fully_connected("fc_prim", input_info("input"), "weights", "bias", "scale", "", data_types::f32, padding(), 3, 2) + ); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + network network(engine, topology, config); + network.set_input_data("input", input_mem); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc_prim"); + + auto output_mem = outputs.begin()->second.get_memory(); + + cldnn::mem_lock output_ptr (output_mem, get_test_stream()); + + ov::PartialShape expected_shape{1, 2, 8}; + ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); + + std::vector expected_result = {2.0f, 1.0f, -1.0f, -12.0f, 4.0f, -5.0f, 6.0f, -8.25f, + 0.0f, -5.0f, 7.0f, 4.0f, 6.0f, -7.0f, 8.0f, -7.75f}; + + for (size_t i = 0; i < expected_result.size(); i++) { + ASSERT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; + } +} + +TEST(fully_connected_gpu, compressed_scale_fp16) { + auto& engine = get_test_engine(); + + auto input_mem = engine.allocate_memory({ { 2, 4}, data_types::f16, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f16, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {1, 8}, data_types::f16, format::bfyx }); + + set_values(input_mem, { FLOAT16(-0.5f), FLOAT16(2.0f), FLOAT16(0.5f), FLOAT16(1.0f), + FLOAT16(0.5f), FLOAT16(-2.0f), FLOAT16(-0.5f), FLOAT16(-1.0f) }); + set_values(weights_mem, {FLOAT16( 1.5f), FLOAT16( 1.0f), FLOAT16( 0.5f), FLOAT16(-1.0f), + FLOAT16( 0.0f), FLOAT16( 0.5f), FLOAT16( 0.5f), FLOAT16(-0.5f), + FLOAT16(-2.0f), FLOAT16(-0.5f), FLOAT16( 1.0f), FLOAT16( 1.5f), + FLOAT16(-2.0f), FLOAT16(-0.5f), FLOAT16( 1.0f), FLOAT16( 1.5f), + FLOAT16( 2.0f), FLOAT16( 0.5f), FLOAT16(-1.0f), FLOAT16(-1.5f), + FLOAT16( 2.0f), FLOAT16( 0.5f), FLOAT16(-1.0f), FLOAT16(-1.5f), + FLOAT16(-1.5f), FLOAT16(-1.0f), FLOAT16(-0.5f), FLOAT16( 1.0f), + FLOAT16( 0.0f), FLOAT16(-0.5f), FLOAT16(0.5f), FLOAT16( 0.5f) }); + + set_values(scale_mem, {FLOAT16(2.0f), FLOAT16(4.0f), FLOAT16(-2.0f), FLOAT16(-4.0f), FLOAT16(0.5f), FLOAT16(-0.5f), FLOAT16(2.0f), FLOAT16(2.0f)}); + + topology topology( + input_layout("input", input_mem->get_layout()), + data("weights", weights_mem), + data("scale", scale_mem), + fully_connected("fc_prim", input_info("input"), "weights", "", "scale", "", data_types::f32, padding(), 2, 2) + ); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + network network(engine, topology, config); + network.set_input_data("input", input_mem); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc_prim"); + + auto output_mem = outputs.begin()->second.get_memory(); + + cldnn::mem_lock output_ptr (output_mem, get_test_stream()); + + ov::PartialShape expected_shape{2, 8}; + ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); + + std::vector expected_result = { + FLOAT16(1.0f), FLOAT16( 3.0f), FLOAT16(-4.0f), FLOAT16(-8.0f), FLOAT16(-1.0f), FLOAT16( 1.0f), FLOAT16(-1.0f), FLOAT16(-0.5f), + FLOAT16(-1.0f), FLOAT16(-3.0f), FLOAT16( 4.0f), FLOAT16( 8.0f), FLOAT16( 1.0f), FLOAT16(-1.0f), FLOAT16( 1.0f), FLOAT16( 0.5f)}; + + for (size_t i = 0; i < expected_result.size(); i++) { + ASSERT_FLOAT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; + } +} + TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) { // Input : 3x1 // Output : 4x1 diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp index aa3d3935728947..eaf253c248cbbb 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp @@ -71,8 +71,8 @@ class check_hash_value: public ::testing::Test { const auto primitive_hash = primitve->hash(); const auto params_hash = primitve->type->get_fake_aligned_params(*prim_inst->get_impl_params()).hash(); - ASSERT_EQ(primitive_hash, 2197080758510296176UL); - ASSERT_EQ(params_hash, 4714860879383010855UL); + ASSERT_EQ(primitive_hash, 6924775129729406941UL); + ASSERT_EQ(params_hash, 8552673460001178483UL); } void test_gather_basic(bool is_caching_test) { From d0dda74fc23f142fd6b8ae181d3ae73badd20099 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Mon, 11 Sep 2023 16:31:39 +0200 Subject: [PATCH 08/31] Handle negative values in GroupedSliceToVSplitOptimization (#19495) * Handle negative values in GroupedSliceToVSplitOptimization CVS-118897 * change the way of getting slice inputs * clamp value --------- Co-authored-by: Ivan Tikhonov --- .../optimize_strided_slice.cpp | 46 +++++++++++++------ .../optimize_strided_slice_test.cpp | 26 +++++++++++ 2 files changed, 59 insertions(+), 13 deletions(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp index d443e962f4e783..2bd46f3cccdbb2 100644 --- a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp @@ -274,30 +274,47 @@ struct SliceWithAttrs { }; bool slice_is_suitable_for_optimization(const std::shared_ptr& op, SliceAttrs& attrs) { - const auto& data_rank = op->get_input_partial_shape(0).rank(); + const auto& input_shape = op->get_input_partial_shape(0); + const auto& data_rank = input_shape.rank(); if (op->get_input_size() != 5 || data_rank.is_dynamic()) return false; + const auto rank = data_rank.get_length(); - for (size_t i = 1; i < 5; ++i) { - auto input_as_constant = ov::as_type_ptr(op->get_input_node_shared_ptr(i)); - if (!input_as_constant) + auto get_scalar = [](const std::shared_ptr& node, int64_t& value) -> bool { + auto constant = ov::as_type_ptr(node); + if (!constant) return false; - if (shape_size(input_as_constant->get_shape()) != 1) + if (shape_size(constant->get_shape()) != 1) return false; + value = constant->cast_vector()[0]; + return true; + }; + + enum { START = 1, STOP, STRIDE, AXIS }; + + int64_t stride; + if (!get_scalar(op->get_input_node_shared_ptr(STRIDE), stride) || stride != 1) + return false; + if (!get_scalar(op->get_input_node_shared_ptr(AXIS), attrs.axis)) + return false; + attrs.axis = attrs.axis >= 0 ? attrs.axis : attrs.axis + rank; - int64_t value = input_as_constant->cast_vector()[0]; + if (input_shape[attrs.axis].is_dynamic()) + return false; + const auto dimension = input_shape[attrs.axis].get_length(); - if (((i == 1 || i == 2) && value < 0) || (i == 3 && value != 1)) + for (int i = START; i <= STOP; i++) { + int64_t value; + if (!get_scalar(op->get_input_node_shared_ptr(i), value)) return false; - else if (i == 1) + value = value >= 0 ? value : value + dimension; + value = std::max(std::min(value, dimension), 0); + if (i == START) attrs.start = value; - else if (i == 2) + else if (i == STOP) attrs.stop = value; - else if (i == 4) - attrs.axis = value >= 0 ? value : value + data_rank.get_length(); } - if (attrs.axis < 0 || op->get_input_partial_shape(0)[attrs.axis].is_dynamic()) - return false; + return true; } @@ -335,6 +352,9 @@ bool ov::pass::GroupedSliceToVSplitOptimization::run_on_model(const std::shared_ const auto& axis = output_with_axis.second; auto attributes = source_to_op_with_attrs[output_with_axis]; + if (attributes.size() < 2) + continue; + std::sort(attributes.begin(), attributes.end(), [](const SliceWithAttrs& lhs, const SliceWithAttrs& rhs) { if (lhs.attrs.start == rhs.attrs.start) return lhs.attrs.stop < rhs.attrs.stop; diff --git a/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp b/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp index 6a7c4d56590b82..a7ab79e36341b5 100644 --- a/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp +++ b/src/common/transformations/tests/common_optimizations/optimize_strided_slice_test.cpp @@ -1175,3 +1175,29 @@ TEST_F(TransformationTestsF, GroupedSliceToVSplitSameSourceDifferentAxis) { model_ref = std::make_shared(ov::NodeVector{concat_2}, ov::ParameterVector{data}); } } + +TEST_F(TransformationTestsF, GroupedSliceToVSplitNegativeStartStop) { + { + auto data = std::make_shared(ov::element::f32, ov::PartialShape{-1, 5, -1, -1}); + auto relu = std::make_shared(data); + + auto slice_0 = make_slice(relu, -50, 1, 1, -3); + auto slice_1 = make_slice(relu, -4, -2, 1, 1); + auto slice_2 = make_slice(relu, -2, INT32_MAX, 1, 1); + + auto concat = std::make_shared(ov::OutputVector{slice_0, slice_2, slice_1}, 1); + + model = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{data}); + manager.register_pass(); + } + { + auto data = std::make_shared(ov::element::f32, ov::PartialShape{-1, 5, -1, -1}); + auto relu = std::make_shared(data); + + auto vsplit = make_vsplit(relu, 1, {1, 2, 2}); + + auto concat = std::make_shared(ov::OutputVector{vsplit[0], vsplit[2], vsplit[1]}, 1); + + model_ref = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{data}); + } +} From e614b8f69a5f639a85dc4d9d37376a04eb08dae8 Mon Sep 17 00:00:00 2001 From: Maciej Smyk Date: Mon, 11 Sep 2023 16:49:31 +0200 Subject: [PATCH 09/31] [DOCS] Update of model_conversion_diagram.svg for master (#19737) * Update model_conversion_diagram.svg * Update model_conversion_diagram.svg * Update model_conversion_diagram.svg --- docs/_static/images/model_conversion_diagram.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_static/images/model_conversion_diagram.svg b/docs/_static/images/model_conversion_diagram.svg index 8b872e2604ea68..8bb8d171bd1eee 100644 --- a/docs/_static/images/model_conversion_diagram.svg +++ b/docs/_static/images/model_conversion_diagram.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:894cc0f49385b304f7129b31e616cfc47dd188e910fca8d726b006bcbb3082f3 -size 252381 +oid sha256:ab1d83dbd1546cb8eaada19501cf08d26e3ca1e2ce72fce63356e897fa26750e +size 253024 From 2320329a5113afb85ee759b201ed56ca737fd72d Mon Sep 17 00:00:00 2001 From: Bartlomiej Bielawa Date: Mon, 11 Sep 2023 17:07:28 +0200 Subject: [PATCH 10/31] [DOCS] Modify dropdowns css --- docs/_static/css/custom.css | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css index 57b74a233847eb..b984dded78c2bf 100644 --- a/docs/_static/css/custom.css +++ b/docs/_static/css/custom.css @@ -102,6 +102,30 @@ ul#navbar-main-elements > li:hover { top: 5px; } +/* Moving dropdown arrows to the left */ +details.sd-dropdown .sd-summary-up, +details.sd-dropdown .sd-summary-down { + left: 10px; +} + + +/* Ttile is at the same place for both open and close states */ +details.sd-dropdown:not([open]).sd-card { + padding: 0px; +} + + +/* Ttile is at the same place for both open and close states */ +details.sd-dropdown[open].sd-card { + padding: 0px; +} + +/* Move title 40px away from the arrow */ +details.sd-dropdown .sd-summary-title { + padding-left: 40px; +} + + /* Second level items */ #bd-docs-nav > div > ul > li > ul { From 3e95e483095d841c88c1dd735a7db476d91a0155 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Mon, 11 Sep 2023 19:37:42 +0100 Subject: [PATCH 11/31] [CI] [GHA] Remove unnecessary steps and `cmake` options, use proper # of CPU cores for ARM64 pipeline (#19746) * address comments * rm * use machine's # of cpu core --- .github/workflows/linux.yml | 9 --------- .github/workflows/linux_android_arm64.yml | 14 -------------- .github/workflows/linux_arm64.yml | 17 ++--------------- 3 files changed, 2 insertions(+), 38 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index f0d7acd3d1c79b..9f93a7b5e310d6 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -136,13 +136,6 @@ jobs: restore-keys: | linux-ubuntu - - name: Get tools versions - run: | - ninja --version || exit 1 - ccache --version || exit 1 - python3 --version || exit 1 - cmake --version || exit 1 - - name: Get number of CPU cores uses: SimenB/github-actions-cpu-cores@v2 id: cpu-cores @@ -164,8 +157,6 @@ jobs: -DENABLE_STRICT_DEPENDENCIES=OFF \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_LINKER_LAUNCHER=ccache \ - -DCMAKE_C_LINKER_LAUNCHER=ccache \ -DENABLE_SYSTEM_SNAPPY=ON \ -DENABLE_SYSTEM_TBB=ON \ -DBUILD_nvidia_plugin=OFF \ diff --git a/.github/workflows/linux_android_arm64.yml b/.github/workflows/linux_android_arm64.yml index f0b0c8f22a76a1..d2106b22b63b55 100644 --- a/.github/workflows/linux_android_arm64.yml +++ b/.github/workflows/linux_android_arm64.yml @@ -65,11 +65,6 @@ jobs: fetch-depth: '0' submodules: 'recursive' - - name: Create Directories - run: | - mkdir -p ${{ env.BUILD_DIR }} - mkdir -p ${{ env.INSTALL_DIR }} - - name: Setup Python 3.11 uses: actions/setup-python@v4 with: @@ -118,13 +113,6 @@ jobs: restore-keys: | ${{ github.job }}-linux-android-arm64 - - name: Get tools versions - run: | - ninja --version - ccache --version - python3 --version - cmake --version - # # Build # @@ -163,8 +151,6 @@ jobs: -DENABLE_TESTS=ON \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_LINKER_LAUNCHER=ccache \ - -DCMAKE_C_LINKER_LAUNCHER=ccache \ -S ${{ env.OPENVINO_REPO }} \ -B ${{ env.BUILD_DIR }} diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index effb2d9c97444b..4b8793616eb8ee 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -1,4 +1,4 @@ -name: Linux ARM64 (Ubuntu 20.04, Python 3.11) +name: Linux ARM64 with Conan (Ubuntu 20.04, Python 3.11) on: schedule: # run daily at 00:00 @@ -40,7 +40,6 @@ jobs: CMAKE_GENERATOR: 'Ninja' CMAKE_CXX_COMPILER_LAUNCHER: ccache CMAKE_C_COMPILER_LAUNCHER: ccache - NUM_PROC: 2 BUILD_TYPE: Release OPENVINO_REPO: ${{ github.workspace }}/openvino BUILD_DIR: ${{ github.workspace }}/build @@ -62,11 +61,6 @@ jobs: git submodule update --init -- ${{ env.OPENVINO_REPO }}/thirdparty/open_model_zoo popd - - name: Create Directories - run: | - mkdir -p ${{ env.BUILD_DIR }} - mkdir -p ${{ env.INSTALL_DIR }} - - name: Setup Python 3.11 uses: actions/setup-python@v4 with: @@ -124,13 +118,6 @@ jobs: restore-keys: | ${{ github.job }}-linux-arm64 - - name: Get tools versions - run: | - ninja --version - ccache --version - python3 --version - cmake --version - - name: Install conan and dependencies run: | python3 -m pip install conan @@ -183,7 +170,7 @@ jobs: -DCMAKE_TOOLCHAIN_FILE=${{ env.BUILD_DIR }}/dependencies/conan_toolchain.cmake \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DARM_COMPUTE_SCONS_JOBS=${{ env.NUM_PROC }} \ + -DARM_COMPUTE_SCONS_JOBS=${{ steps.cpu-cores.outputs.count }} \ -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} \ -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} \ -DENABLE_PYTHON_PACKAGING=ON \ From fc5696321aac0b7de09b15dd71467e812bf3758c Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 12 Sep 2023 01:05:50 +0400 Subject: [PATCH 12/31] [TF Hub][GA] Separate Workflow for TF Hub Tests Validation (#19754) * [TF Hub][GA] Use Ubuntu 20.04 for TensorFlow Hub Models validation and Separate job Signed-off-by: Kazantsev, Roman * Apply review comments: ubuntu-20.04 use and install deps * Simplify validation pipeline for TF Hub Models * Remove extra deps installations * Remove not needed code * Try to fix * Try 22.04 --------- Signed-off-by: Kazantsev, Roman --- .github/workflows/linux.yml | 81 +++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 13 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 9f93a7b5e310d6..adfa7420d67c00 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -660,19 +660,6 @@ jobs: env: TEST_DEVICE: CPU - - name: TensorFlow Hub Tests - TF FE - run: | - python3 -m pip install -r ${{ env.MODEL_HUB_TESTS_INSTALL_DIR }}/tf_hub_tests/requirements.txt - - export PYTHONPATH=${{ env.MODEL_HUB_TESTS_INSTALL_DIR }}:$PYTHONPATH - - source ${{ env.INSTALL_DIR }}/setupvars.sh - - python3 -m pytest ${{ env.MODEL_HUB_TESTS_INSTALL_DIR }}/tf_hub_tests/ -m ${{ env.TYPE }} --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_hub_tf_fe.xml --html=${{ env.INSTALL_TEST_DIR }}/TEST-tf_hub_tf_fe.html --self-contained-html - env: - TYPE: ${{ github.event_name == 'schedule' && 'nightly' || 'precommit'}} - TEST_DEVICE: CPU - - name: TensorFlow 1 Layer Tests - Legacy FE run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt @@ -814,3 +801,71 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/logs/interapted/*.log ${{ env.INSTALL_TEST_DIR }}/logs/disabled_tests.log if-no-files-found: 'error' + + TensorFlow_Hub_Models_Tests: + needs: Build + defaults: + run: + shell: bash + runs-on: ubuntu-22.04 + env: + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests + + steps: + - name: Create Directories + run: | + mkdir -p ${{ env.INSTALL_DIR }} ${{ env.INSTALL_TEST_DIR }} + + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Download OpenVINO package + uses: actions/download-artifact@v3 + with: + name: openvino_package + path: ${{ env.INSTALL_DIR }} + + - name: Download OpenVINO tests package + uses: actions/download-artifact@v3 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + - name: Extract OpenVINO packages + run: | + pushd ${{ env.INSTALL_DIR }} + tar -xzf openvino_package.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_package.tar.gz || exit 1 + popd + + pushd ${{ env.INSTALL_TEST_DIR }} + tar -xzf openvino_tests.tar.gz -C ${{ env.INSTALL_DIR }} && rm openvino_tests.tar.gz || exit 1 + popd + + - name: Install Python wheels + run: | + python3 -m pip install openvino --find-links=${{ env.INSTALL_DIR }}/tools + + - name: TensorFlow Hub Tests - TF FE + run: | + python3 -m pip install openvino --find-links=${{ env.INSTALL_DIR }}/tools + + python3 -m pip install -r ${{ env.MODEL_HUB_TESTS_INSTALL_DIR }}/tf_hub_tests/requirements.txt + + export PYTHONPATH=${{ env.MODEL_HUB_TESTS_INSTALL_DIR }}:$PYTHONPATH + + python3 -m pytest ${{ env.MODEL_HUB_TESTS_INSTALL_DIR }}/tf_hub_tests/ -m ${{ env.TYPE }} --html=${{ env.INSTALL_TEST_DIR }}/TEST-tf_hub_tf_fe.html --self-contained-html + env: + TYPE: ${{ github.event_name == 'schedule' && 'nightly' || 'precommit'}} + TEST_DEVICE: CPU + + - name: Upload Test Results + uses: actions/upload-artifact@v3 + if: ${{ always() }} + with: + name: test-results-tensorflow-hub-models + path: | + ${{ env.INSTALL_TEST_DIR }}/TEST*.html + if-no-files-found: 'error' From 497f42bd82cb33369129399d1c89c4125aa1eb40 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Tue, 12 Sep 2023 01:31:57 +0400 Subject: [PATCH 13/31] Post commit fix for #19521. (#19741) --- .../tests/functional/subgraph_tests/src/custom_op_scalar.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_scalar.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_scalar.cpp index 95b26d3b24cd36..15eef344b6ffd2 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_scalar.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/custom_op_scalar.cpp @@ -49,11 +49,11 @@ class CustomOpScalar : public ov::op::Op { bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override { for (size_t i = 0llu; i < inputs.size(); i++) { - OPENVINO_ASSERT(inputs[i].get_shape().size() == get_input_partial_shape(i).rank().get_length(), + OPENVINO_ASSERT(inputs[i].get_shape().size() == static_cast(get_input_partial_shape(i).rank().get_length()), "Invalid input shape rank: ", inputs[i].get_shape().size()); } for (size_t i = 0llu; i < outputs.size(); i++) { - OPENVINO_ASSERT(outputs[i].get_shape().size() == get_output_partial_shape(i).rank().get_length(), + OPENVINO_ASSERT(outputs[i].get_shape().size() == static_cast(get_output_partial_shape(i).rank().get_length()), "Invalid outputs shape rank: ", outputs[i].get_shape().size()); } From 7becaf84944c6433b249027d0322f1861b57504f Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 12 Sep 2023 07:09:12 +0400 Subject: [PATCH 14/31] Remove legacy API from common test utils (#19647) * Remove legacy API from common test utils * Fixed code style * Fixed build * Try to fix Windows build * Fixed GNA build --- src/core/tests/reshape_opt_kernel.cpp | 5 +- src/frontends/onnx/tests/onnx_import.in.cpp | 279 ++++++------ .../tests/onnx_import_com_microsoft.in.cpp | 1 + .../onnx/tests/onnx_import_convpool.in.cpp | 86 ++-- .../onnx/tests/onnx_import_deprecated.in.cpp | 25 +- .../tests/onnx_import_org_openvino.in.cpp | 1 + .../onnx/tests/onnx_import_org_pytorch.in.cpp | 1 + .../onnx/tests/onnx_import_quant.in.cpp | 1 + .../onnx/tests/onnx_import_reshape.in.cpp | 135 +++--- .../onnx/tests/onnx_import_rnn.in.cpp | 1 + .../onnx/tests/onnx_import_signal.in.cpp | 2 + .../onnx/tests/onnx_import_with_editor.in.cpp | 1 + .../onnx/tests/onnx_tensor_names.cpp | 1 + .../deprecated/helpers/tests_file_utils.cpp | 4 + .../common_test_utils/CMakeLists.txt | 2 +- .../include/common_test_utils/all_close.hpp | 10 +- .../common_test_utils/common_utils.hpp | 6 +- .../include/common_test_utils/data_utils.hpp | 218 ++++++---- .../include/common_test_utils/file_utils.hpp | 50 +-- .../include/common_test_utils/matcher.hpp | 5 +- .../include/common_test_utils/ndarray.hpp | 7 +- .../common_test_utils/ov_tensor_utils.hpp | 63 ++- .../common_test_utils/ov_test_utils.hpp | 2 +- .../include/common_test_utils/test_case.hpp | 3 +- .../include/common_test_utils/test_common.hpp | 2 +- .../common_test_utils/test_constants.hpp | 30 +- .../common_test_utils/test_control.hpp | 66 +-- .../include/common_test_utils/test_enums.hpp | 29 +- .../include/common_test_utils/test_tools.hpp | 3 +- .../common_test_utils/unicode_utils.hpp | 53 +-- .../include/common_test_utils/w_dirent.h | 167 ++++---- .../common_test_utils/src/all_close.cpp | 36 +- .../common_test_utils/src/all_close_f.cpp | 69 ++- .../common_test_utils/src/data_utils.cpp | 309 +++++++++----- .../common_test_utils/src/file_utils.cpp | 24 +- .../src/graph_comparator.cpp | 6 + .../common_test_utils/src/ov_tensor_utils.cpp | 255 ++++++----- .../common_test_utils/src/ov_test_utils.cpp | 3 +- .../common_test_utils/src/precomp.hpp | 26 +- .../common_test_utils/src/test_common.cpp | 24 +- .../common_test_utils/src/test_constants.cpp | 18 +- .../common_test_utils/src/unicode_utils.cpp | 18 +- .../tests/graph_comparator_tests.cpp | 401 +++++++++--------- 43 files changed, 1298 insertions(+), 1150 deletions(-) diff --git a/src/core/tests/reshape_opt_kernel.cpp b/src/core/tests/reshape_opt_kernel.cpp index c2f894589ca463..d23b25308a39f7 100644 --- a/src/core/tests/reshape_opt_kernel.cpp +++ b/src/core/tests/reshape_opt_kernel.cpp @@ -12,7 +12,6 @@ #include "openvino/core/axis_vector.hpp" using namespace ov; -using namespace ngraph; namespace { using ElementValue = int32_t; @@ -32,8 +31,8 @@ AxisVector get_axis_order(AxisOrder order, size_t size) { struct TestParams { AxisOrder order; - ngraph::test::NDArrayBase input; - ngraph::test::NDArrayBase output; + ov::test::NDArrayBase input; + ov::test::NDArrayBase output; }; struct ReshapeOptKernel : ::testing::TestWithParam {}; diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index d2044d99a0ca52..70e40178d8a879 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -25,6 +25,7 @@ // clang-format on #include "common_test_utils/file_utils.hpp" #include "common_test_utils/ov_test_utils.hpp" +#include "ngraph/file_util.hpp" #include "default_opset.hpp" #include "openvino/opsets/opset12.hpp" #include "common_test_utils/test_case.hpp" @@ -765,20 +766,21 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_gemm_abc) { Inputs inputs; inputs.emplace_back( - test::NDArray({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}, {13, 14, 15, 16, 17, 18}}).get_vector()); - - inputs.emplace_back(test::NDArray({{19, 20, 21, 22}, - {23, 24, 25, 26}, - {27, 28, 29, 30}, - {31, 32, 33, 34}, - {35, 36, 37, 38}, - {39, 40, 41, 42}}) + ov::test::NDArray({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}, {13, 14, 15, 16, 17, 18}}) + .get_vector()); + + inputs.emplace_back(ov::test::NDArray({{19, 20, 21, 22}, + {23, 24, 25, 26}, + {27, 28, 29, 30}, + {31, 32, 33, 34}, + {35, 36, 37, 38}, + {39, 40, 41, 42}}) .get_vector()); - inputs.emplace_back(test::NDArray({{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}).get_vector()); + inputs.emplace_back(ov::test::NDArray({{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}).get_vector()); auto expected_output = - test::NDArray({{340, 350.5, 361, 371.5}, {862, 890.5, 919, 947.5}, {1384, 1430.5, 1477, 1523.5}}) + ov::test::NDArray({{340, 350.5, 361, 371.5}, {862, 890.5, 919, 947.5}, {1384, 1430.5, 1477, 1523.5}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -793,11 +795,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_matmul) { std::vector> inputs; - inputs.emplace_back(test::NDArray({{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}).get_vector()); + inputs.emplace_back(ov::test::NDArray({{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}).get_vector()); - inputs.emplace_back(test::NDArray({{13, 14, 15}, {16, 17, 18}, {19, 20, 21}, {22, 23, 24}}).get_vector()); + inputs.emplace_back( + ov::test::NDArray({{13, 14, 15}, {16, 17, 18}, {19, 20, 21}, {22, 23, 24}}).get_vector()); - auto expected_output = test::NDArray({{190, 200, 210}, {470, 496, 522}, {750, 792, 834}}).get_vector(); + auto expected_output = + ov::test::NDArray({{190, 200, 210}, {470, 496, 522}, {750, 792, 834}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -997,11 +1001,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_sub) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/sub.onnx")); Inputs inputs; - inputs.emplace_back(test::NDArray({{{1, 2, 3}}}).get_vector()); + inputs.emplace_back(ov::test::NDArray({{{1, 2, 3}}}).get_vector()); - inputs.emplace_back(test::NDArray({{{4, 5, 7}}}).get_vector()); + inputs.emplace_back(ov::test::NDArray({{{4, 5, 7}}}).get_vector()); - auto expected_output = test::NDArray({{{-3, -3, -4}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{-3, -3, -4}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1014,10 +1018,10 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_div) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/div.onnx")); Inputs inputs; - inputs.emplace_back(test::NDArray({{{1, 2, 3}}}).get_vector()); - inputs.emplace_back(test::NDArray({{{1, 4, 12}}}).get_vector()); + inputs.emplace_back(ov::test::NDArray({{{1, 2, 3}}}).get_vector()); + inputs.emplace_back(ov::test::NDArray({{{1, 4, 12}}}).get_vector()); - auto expected_output = test::NDArray({{{1, 0.5, 0.25}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{1, 0.5, 0.25}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1030,17 +1034,18 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_add_bcast) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/add_bcast.onnx")); Inputs inputs; - inputs.emplace_back(test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) - .get_vector()); + inputs.emplace_back( + ov::test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) + .get_vector()); - inputs.emplace_back(test::NDArray({1, 2, 3, 4, 5}).get_vector()); + inputs.emplace_back(ov::test::NDArray({1, 2, 3, 4, 5}).get_vector()); auto expected_output = - test::NDArray({{{{2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}}, - {{2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}}, - {{2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}}}}) + ov::test::NDArray({{{{2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}}, + {{2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}}, + {{2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}, {2, 3, 4, 5, 6}}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -1177,10 +1182,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_log_sum) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/reduce_log_sum.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; + Inputs inputs{ + ov::test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{2.77258872f}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{2.77258872f}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1194,10 +1200,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_log_sum_exp) { "onnx/reduce_log_sum_exp.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; + Inputs inputs{ + ov::test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{3.77258872f}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{3.77258872f}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1210,10 +1217,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_l1) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/reduce_l1.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; + Inputs inputs{ + ov::test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{16}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{16}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1226,10 +1234,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_l2) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/reduce_l2.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; + Inputs inputs{ + ov::test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{4}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{4}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1243,10 +1252,10 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_max) { // input data shape (1, 1, 4, 4) Inputs inputs{ - test::NDArray({{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}}}).get_vector()}; + ov::test::NDArray({{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{16}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{16}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1266,10 +1275,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_mean) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/reduce_mean.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; + Inputs inputs{ + ov::test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{1}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{1}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1283,10 +1293,10 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_min) { // input data shape (1, 1, 4, 4) Inputs inputs{ - test::NDArray({{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}}}).get_vector()}; + ov::test::NDArray({{{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}, {13, 14, 15, 16}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{1}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{1}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1299,10 +1309,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_prod) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/reduce_prod.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; + Inputs inputs{ + ov::test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{1}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{1}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1315,10 +1326,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_sum) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/reduce_sum.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; + Inputs inputs{ + ov::test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{16}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{16}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1344,10 +1356,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_square) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/reduce_sum_square.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; + Inputs inputs{ + ov::test::NDArray({{{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}}).get_vector()}; // output data shape (1,) - auto expected_output = test::NDArray({{{{16}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{16}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -1360,10 +1373,10 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant) { SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_constant.onnx")); - Inputs inputs{test::NDArray({{{{1.0f, 1.0f, 1.0f, 1.0f}, - {1.0f, 1.0f, 1.0f, 1.0f}, - {1.0f, 1.0f, 1.0f, 1.0f}, - {1.0f, 1.0f, 1.0f, 1.0f}}}}) + Inputs inputs{ov::test::NDArray({{{{1.0f, 1.0f, 1.0f, 1.0f}, + {1.0f, 1.0f, 1.0f, 1.0f}, + {1.0f, 1.0f, 1.0f, 1.0f}, + {1.0f, 1.0f, 1.0f, 1.0f}}}}) .get_vector()}; auto test_case = ov::test::TestCase(function, s_device); @@ -1380,7 +1393,7 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant_single_ SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_constant_single_axis.onnx")); - Inputs inputs{test::NDArray({{{1, 2, 3}, {4, 5, 6}}, {{7, 8, 9}, {10, 11, 12}}}).get_vector()}; + Inputs inputs{ov::test::NDArray({{{1, 2, 3}, {4, 5, 6}}, {{7, 8, 9}, {10, 11, 12}}}).get_vector()}; auto test_case = ov::test::TestCase(function, s_device); @@ -1397,10 +1410,10 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant_keepdim "onnx/reduce_sum_13_axes_as_constant_keepdims_off.onnx")); // input data shape (1, 1, 4, 4) - Inputs inputs{test::NDArray({{{{1.0f, 1.0f, 1.0f, 1.0f}, - {1.0f, 1.0f, 1.0f, 1.0f}, - {1.0f, 1.0f, 1.0f, 1.0f}, - {1.0f, 1.0f, 1.0f, 1.0f}}}}) + Inputs inputs{ov::test::NDArray({{{{1.0f, 1.0f, 1.0f, 1.0f}, + {1.0f, 1.0f, 1.0f, 1.0f}, + {1.0f, 1.0f, 1.0f, 1.0f}, + {1.0f, 1.0f, 1.0f, 1.0f}}}}) .get_vector()}; auto test_case = ov::test::TestCase(function, s_device); @@ -2174,10 +2187,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_shape) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/shape.onnx")); Inputs inputs; - inputs.emplace_back(test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) - .get_vector()); + inputs.emplace_back( + ov::test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) + .get_vector()); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -2191,13 +2205,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_elu) { Inputs inputs; inputs.emplace_back( - test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) + ov::test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) .get_vector()); auto expected_output = - test::NDArray( + ov::test::NDArray( {{{-1.999753180391830f, -1.999329074744190f, -1.998176236068890f, -1.995042495646670f, -1.986524106001830f}, {-1.963368722222530f, -1.900425863264270f, -1.729329433526770f, -1.264241117657120f, 0}, {1, 2, 3, 4, 5}, @@ -2224,13 +2238,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_leaky_relu) { Inputs inputs; inputs.emplace_back( - test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) + ov::test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) .get_vector()); auto expected_output = - test::NDArray( + ov::test::NDArray( {{{-0.9f, -0.8f, -0.7f, -0.6f, -0.5f}, {-0.4f, -0.3f, -0.2f, -0.1f, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, {{-0.4f, -0.3f, -0.2f, -0.1f, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, {{1, 1, 1, 1, 1}, {-0.1f, -0.1f, -0.1f, -0.1f, -0.1f}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) @@ -2248,20 +2262,21 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_prelu_nd) { Inputs inputs; inputs.emplace_back( - test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) + ov::test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) .get_vector()); - inputs.emplace_back(test::NDArray({{{1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}, {1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}}, - {{0, 1, 0, 1, 0}, {1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}, {1, 0, 1, 0, 1}}, - {{1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}, {1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}}}) - .get_vector()); + inputs.emplace_back( + ov::test::NDArray({{{1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}, {1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}}, + {{0, 1, 0, 1, 0}, {1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}, {1, 0, 1, 0, 1}}, + {{1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}, {1, 0, 1, 0, 1}, {0, 1, 0, 1, 0}}}) + .get_vector()); auto expected_output = - test::NDArray({{{-9, 0, -7, 0, -5}, {0, -3, 0, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{0, -3, 0, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{1, 1, 1, 1, 1}, {0, -1, 0, -1, 0}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) + ov::test::NDArray({{{-9, 0, -7, 0, -5}, {0, -3, 0, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{0, -3, 0, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{1, 1, 1, 1, 1}, {0, -1, 0, -1, 0}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -2376,13 +2391,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_selu) { Inputs inputs; inputs.emplace_back( - test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) + ov::test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) .get_vector()); auto expected_output = - test::NDArray( + ov::test::NDArray( {{{-5.99925954117548f, -5.99798722423258f, -5.99452870820667f, -5.98512748694000f, -5.95957231800549f}, {-5.89010616666759f, -5.70127758979282f, -5.18798830058032f, -3.79272335297135f, 0}, {3, 6, 9, 12, 15}, @@ -2409,13 +2424,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_sigmoid) { Inputs inputs; inputs.emplace_back( - test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) + ov::test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) .get_vector()); auto expected_output = - test::NDArray( + ov::test::NDArray( {{{0.00012339457598623f, 0.00033535013046648f, 0.00091105119440065f, @@ -2446,13 +2461,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_tanh) { Inputs inputs; inputs.emplace_back( - test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) + ov::test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) .get_vector()); auto expected_output = - test::NDArray( + ov::test::NDArray( {{{-0.999999969540041f, -0.999999774929676f, -0.999998336943945f, -0.999987711650796f, -0.999909204262595f}, {-0.999329299739067f, -0.995054753686731f, -0.964027580075817f, -0.761594155955765f, 0}, {0.761594155955765f, 0.964027580075817f, 0.995054753686731f, 0.999329299739067f, 0.999909204262595f}, @@ -2479,15 +2494,15 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_thresholded_relu) { Inputs inputs; inputs.emplace_back( - test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) + ov::test::NDArray({{{-9, -8, -7, -6, -5}, {-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{-4, -3, -2, -1, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}, {2, 2, 2, 2, 2}}}) .get_vector()); auto expected_output = - test::NDArray({{{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 3, 4, 5}, {6, 7, 8, 9, 10}}, - {{0, 0, 0, 0, 0}, {0, 0, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, - {{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}}}) + ov::test::NDArray({{{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 3, 4, 5}, {6, 7, 8, 9, 10}}, + {{0, 0, 0, 0, 0}, {0, 0, 3, 4, 5}, {6, 7, 8, 9, 10}, {11, 12, 13, 14, 15}}, + {{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -2502,9 +2517,9 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_matmul_vec_ten3d) { Inputs inputs; inputs.emplace_back(std::vector{0.f, 1.f}); - inputs.emplace_back(test::NDArray{{{0.f}, {1.f}}, {{2.f}, {3.f}}, {{4.f}, {5.f}}}.get_vector()); + inputs.emplace_back(ov::test::NDArray{{{0.f}, {1.f}}, {{2.f}, {3.f}}, {{4.f}, {5.f}}}.get_vector()); - auto expected_output = test::NDArray{{1.f}, {3.f}, {5.f}}.get_vector(); + auto expected_output = ov::test::NDArray{{1.f}, {3.f}, {5.f}}.get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -2574,15 +2589,15 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_sum_opset8) { Inputs inputs; inputs.emplace_back(std::vector{1.0f, 2.0f, 3.0f}); - inputs.emplace_back(test::NDArray{{10.0f}, {20.0f}, {30.0f}}.get_vector()); - inputs.emplace_back(test::NDArray{{{100.0f}}, {{200.0f}}, {{300.0f}}}.get_vector()); + inputs.emplace_back(ov::test::NDArray{{10.0f}, {20.0f}, {30.0f}}.get_vector()); + inputs.emplace_back(ov::test::NDArray{{{100.0f}}, {{200.0f}}, {{300.0f}}}.get_vector()); auto expected_output = - test::NDArray{{{111.0f, 112.0f, 113.0f}, {121.0f, 122.0f, 123.0f}, {131.0f, 132.0f, 133.0f}}, + ov::test::NDArray{{{111.0f, 112.0f, 113.0f}, {121.0f, 122.0f, 123.0f}, {131.0f, 132.0f, 133.0f}}, - {{211.0f, 212.0f, 213.0f}, {221.0f, 222.0f, 223.0f}, {231.0f, 232.0f, 233.0f}}, + {{211.0f, 212.0f, 213.0f}, {221.0f, 222.0f, 223.0f}, {231.0f, 232.0f, 233.0f}}, - {{311.0f, 312.0f, 313.0f}, {321.0f, 322.0f, 323.0f}, {331.0f, 332.0f, 333.0f}}} + {{311.0f, 312.0f, 313.0f}, {321.0f, 322.0f, 323.0f}, {331.0f, 332.0f, 333.0f}}} .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -2903,13 +2918,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_erf) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/erf.onnx")); Inputs inputs; - inputs.emplace_back(test::NDArray{ + inputs.emplace_back(ov::test::NDArray{ {-std::numeric_limits::infinity(), std::numeric_limits::infinity()}, {-3.141592f, 0.0f}, {0.5f, 1.0f}}.get_vector()); const std::vector expected_output = - test::NDArray{{-1.0f, 1.0f}, {-0.99999112f, 0.0f}, {0.52049988f, 0.84270079f}}.get_vector(); + ov::test::NDArray{{-1.0f, 1.0f}, {-0.99999112f, 0.0f}, {0.52049988f, 0.84270079f}}.get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -5512,15 +5527,15 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unsqueeze_ai_onnx_domain) { SERIALIZED_ZOO, "onnx/unsqueeze_ai_onnx_domain.onnx")); - auto input = test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) + auto input = ov::test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) .get_vector(); auto expected_output = - test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) + ov::test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -5534,15 +5549,15 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unsqueeze_default_domain) { SERIALIZED_ZOO, "onnx/unsqueeze_default_domain.onnx")); - auto input = test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) + auto input = ov::test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) .get_vector(); auto expected_output = - test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) + ov::test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -5556,14 +5571,14 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unsqueeze_default_domain_opset13) { SERIALIZED_ZOO, "onnx/unsqueeze_default_domain_opset13.onnx")); - auto input = test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) + auto input = ov::test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) .get_vector(); auto expected_output = - test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) + ov::test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -5577,14 +5592,14 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unsqueeze_ai_onnx_domain_opset13) { SERIALIZED_ZOO, "onnx/unsqueeze_ai_onnx_domain_opset13.onnx")); - auto input = test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) + auto input = ov::test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) .get_vector(); auto expected_output = - test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) + ov::test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -6458,10 +6473,10 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_squeeze_default_domain_opset13) { SERIALIZED_ZOO, "onnx/squeeze_default_domain_opset13.onnx")); - auto input = - test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}).get_vector(); + auto input = ov::test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) + .get_vector(); auto expected_output = - test::NDArray({{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}).get_vector(); + ov::test::NDArray({{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_input(input); diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp index c3df8ccc545f74..75a71e94d88277 100644 --- a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp @@ -18,6 +18,7 @@ #include "common_test_utils/test_case.hpp" #include "onnx_import/onnx.hpp" #include "onnx_utils.hpp" +#include "ngraph/file_util.hpp" #include "common_test_utils/test_control.hpp" OPENVINO_SUPPRESS_DEPRECATED_START diff --git a/src/frontends/onnx/tests/onnx_import_convpool.in.cpp b/src/frontends/onnx/tests/onnx_import_convpool.in.cpp index c90bb8c3d41da5..8dbdbfcf359baa 100644 --- a/src/frontends/onnx/tests/onnx_import_convpool.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_convpool.in.cpp @@ -20,6 +20,7 @@ #include "common_test_utils/test_control.hpp" #include "common_test_utils/test_tools.hpp" #include "gtest/gtest.h" +#include "ngraph/file_util.hpp" #include "ngraph/ngraph.hpp" #include "onnx_import/onnx.hpp" #include "onnx_utils.hpp" @@ -42,21 +43,22 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_conv2d_strides_padding) { Inputs inputs; // data (1, 1, 7, 5) input tensor - inputs.emplace_back(test::NDArray{{{{{0.f, 1.f, 2.f, 3.f, 4.f}, - {5.f, 6.f, 7.f, 8.f, 9.f}, - {10.f, 11.f, 12.f, 13.f, 14.f}, - {15.f, 16.f, 17.f, 18.f, 19.f}, - {20.f, 21.f, 22.f, 23.f, 24.f}, - {25.f, 26.f, 27.f, 28.f, 29.f}, - {30.f, 31.f, 32.f, 33.f, 34.f}}}}} + inputs.emplace_back(ov::test::NDArray{{{{{0.f, 1.f, 2.f, 3.f, 4.f}, + {5.f, 6.f, 7.f, 8.f, 9.f}, + {10.f, 11.f, 12.f, 13.f, 14.f}, + {15.f, 16.f, 17.f, 18.f, 19.f}, + {20.f, 21.f, 22.f, 23.f, 24.f}, + {25.f, 26.f, 27.f, 28.f, 29.f}, + {30.f, 31.f, 32.f, 33.f, 34.f}}}}} .get_vector()); // filters (1, 1, 3, 3) aka convolution weights - inputs.emplace_back(test::NDArray{{{{{1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}}}}}.get_vector()); + inputs.emplace_back( + ov::test::NDArray{{{{{1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}}}}}.get_vector()); // (1, 1, 4, 3) auto expected_output = - test::NDArray( + ov::test::NDArray( {{{{12.f, 27.f, 24.f}, {63.f, 108.f, 81.f}, {123.f, 198.f, 141.f}, {112.f, 177.f, 124.f}}}}) .get_vector(); @@ -74,20 +76,21 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_conv2d_strides_no_padding) { Inputs inputs; // data (1, 1, 7, 5) input tensor - inputs.emplace_back(test::NDArray{{{{{0.f, 1.f, 2.f, 3.f, 4.f}, - {5.f, 6.f, 7.f, 8.f, 9.f}, - {10.f, 11.f, 12.f, 13.f, 14.f}, - {15.f, 16.f, 17.f, 18.f, 19.f}, - {20.f, 21.f, 22.f, 23.f, 24.f}, - {25.f, 26.f, 27.f, 28.f, 29.f}, - {30.f, 31.f, 32.f, 33.f, 34.f}}}}} + inputs.emplace_back(ov::test::NDArray{{{{{0.f, 1.f, 2.f, 3.f, 4.f}, + {5.f, 6.f, 7.f, 8.f, 9.f}, + {10.f, 11.f, 12.f, 13.f, 14.f}, + {15.f, 16.f, 17.f, 18.f, 19.f}, + {20.f, 21.f, 22.f, 23.f, 24.f}, + {25.f, 26.f, 27.f, 28.f, 29.f}, + {30.f, 31.f, 32.f, 33.f, 34.f}}}}} .get_vector()); // filters (1, 1, 3, 3) aka convolution weights - inputs.emplace_back(test::NDArray{{{{{1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}}}}}.get_vector()); + inputs.emplace_back( + ov::test::NDArray{{{{{1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}}}}}.get_vector()); // (1, 1, 3, 2) - auto expected_output = test::NDArray({{{{54.f, 72.f}, {144.f, 162.f}, {234.f, 252.f}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{54.f, 72.f}, {144.f, 162.f}, {234.f, 252.f}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -104,21 +107,22 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_conv2d_strides_assymetric_padding) { Inputs inputs; // data (1, 1, 7, 5) input tensor - inputs.emplace_back(test::NDArray{{{{{0.f, 1.f, 2.f, 3.f, 4.f}, - {5.f, 6.f, 7.f, 8.f, 9.f}, - {10.f, 11.f, 12.f, 13.f, 14.f}, - {15.f, 16.f, 17.f, 18.f, 19.f}, - {20.f, 21.f, 22.f, 23.f, 24.f}, - {25.f, 26.f, 27.f, 28.f, 29.f}, - {30.f, 31.f, 32.f, 33.f, 34.f}}}}} + inputs.emplace_back(ov::test::NDArray{{{{{0.f, 1.f, 2.f, 3.f, 4.f}, + {5.f, 6.f, 7.f, 8.f, 9.f}, + {10.f, 11.f, 12.f, 13.f, 14.f}, + {15.f, 16.f, 17.f, 18.f, 19.f}, + {20.f, 21.f, 22.f, 23.f, 24.f}, + {25.f, 26.f, 27.f, 28.f, 29.f}, + {30.f, 31.f, 32.f, 33.f, 34.f}}}}} .get_vector()); // filters (1, 1, 3, 3) aka convolution weights - inputs.emplace_back(test::NDArray{{{{{1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}}}}}.get_vector()); + inputs.emplace_back( + ov::test::NDArray{{{{{1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}, {1.f, 1.f, 1.f}}}}}.get_vector()); // (1, 1, 4, 2) auto expected_output = - test::NDArray({{{{21.f, 33.f}, {99.f, 117.f}, {189.f, 207.f}, {171.f, 183.f}}}}).get_vector(); + ov::test::NDArray({{{{21.f, 33.f}, {99.f, 117.f}, {189.f, 207.f}, {171.f, 183.f}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -142,20 +146,20 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_conv2d_dilation_assymetric_pads_stride Inputs inputs; // {2, 1, 1, 1} inputs.emplace_back( - test::NDArray({{{{-0.09103918075561523f}}}, {{{-0.32513630390167236f}}}}).get_vector()); + ov::test::NDArray({{{{-0.09103918075561523f}}}, {{{-0.32513630390167236f}}}}).get_vector()); // {2, 1, 3, 3} inputs.emplace_back( - test::NDArray({{{{0.4312484860420227f, -0.12559029459953308f, 0.44889551401138306f}, - {-0.3100617825984955f, 0.13522827625274658f, -0.06791308522224426f}, - {0.22671669721603394f, -0.17391827702522278f, -0.31299442052841187f}}}, - {{{-0.31545522809028625f, 0.06560015678405762f, 0.2656586766242981f}, - {0.41363757848739624f, 0.31231558322906494f, -0.376018226146698f}, - {-0.005708813667297363f, 0.34922850131988525f, 0.45095211267471313f}}}}) + ov::test::NDArray({{{{0.4312484860420227f, -0.12559029459953308f, 0.44889551401138306f}, + {-0.3100617825984955f, 0.13522827625274658f, -0.06791308522224426f}, + {0.22671669721603394f, -0.17391827702522278f, -0.31299442052841187f}}}, + {{{-0.31545522809028625f, 0.06560015678405762f, 0.2656586766242981f}, + {0.41363757848739624f, 0.31231558322906494f, -0.376018226146698f}, + {-0.005708813667297363f, 0.34922850131988525f, 0.45095211267471313f}}}}) .get_vector()); // {2, 2, 1, 2} auto expected_output = - test::NDArray( + ov::test::NDArray( {{{{-0.012311071157455444f, 0.02822777070105076f}}, {{-0.028432954102754593f, -0.037657227367162704f}}}, {{{-0.04396762326359749f, 0.10081233829259872f}}, {{-0.10154513269662857f, -0.13448859751224518f}}}}) .get_vector(); @@ -298,12 +302,12 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_average_pool_2d) { // input data shape (1, 1, 4, 4) Inputs inputs; inputs.push_back( - test::NDArray( + ov::test::NDArray( {{{{0.f, 1.f, 2.f, 3.f}, {4.f, 5.f, 6.f, 7.f}, {8.f, 9.f, 10.f, 11.f}, {12.f, 13.f, 14.f, 15.f}}}}) .get_vector()); // (1, 1, 2, 2) - auto expected_output = test::NDArray({{{{2.5f, 4.5f}, {10.5f, 12.5f}}}}).get_vector(); + auto expected_output = ov::test::NDArray({{{{2.5f, 4.5f}, {10.5f, 12.5f}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -320,13 +324,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_average_pool_2d_pads) { // input data shape (1, 1, 4, 4) Inputs inputs; inputs.push_back( - test::NDArray( + ov::test::NDArray( {{{{0.f, 1.f, 2.f, 3.f}, {4.f, 5.f, 6.f, 7.f}, {8.f, 9.f, 10.f, 11.f}, {12.f, 13.f, 14.f, 15.f}}}}) .get_vector()); // (1, 1, 3, 3) auto expected_output = - test::NDArray({{{{0.f, 1.5f, 3.f}, {6.f, 7.5f, 9.f}, {12.f, 13.5f, 15.f}}}}).get_vector(); + ov::test::NDArray({{{{0.f, 1.5f, 3.f}, {6.f, 7.5f, 9.f}, {12.f, 13.5f, 15.f}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -370,13 +374,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_max_pool_2d_pads) { // input data shape (1, 1, 4, 4) Inputs inputs; inputs.push_back( - test::NDArray( + ov::test::NDArray( {{{{0.f, 1.f, 2.f, 3.f}, {4.f, 5.f, 6.f, 7.f}, {8.f, 9.f, 10.f, 11.f}, {12.f, 13.f, 14.f, 15.f}}}}) .get_vector()); // (1, 1, 3, 3) auto expected_output = - test::NDArray({{{{0.f, 2.f, 3.f}, {8.f, 10.f, 11.f}, {12.f, 14.f, 15.f}}}}).get_vector(); + ov::test::NDArray({{{{0.f, 2.f, 3.f}, {8.f, 10.f, 11.f}, {12.f, 14.f, 15.f}}}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); diff --git a/src/frontends/onnx/tests/onnx_import_deprecated.in.cpp b/src/frontends/onnx/tests/onnx_import_deprecated.in.cpp index 072161ababde90..be5fd6aaa0523e 100644 --- a/src/frontends/onnx/tests/onnx_import_deprecated.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_deprecated.in.cpp @@ -20,6 +20,7 @@ #include "common_test_utils/test_control.hpp" #include "common_test_utils/test_tools.hpp" #include "gtest/gtest.h" +#include "ngraph/file_util.hpp" #include "ngraph/ngraph.hpp" #include "onnx_import/onnx.hpp" #include "onnx_utils.hpp" @@ -36,8 +37,8 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_affine) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/affine.onnx")); // input/output shape (1, 3) - auto input = test::NDArray{{{0.f, 1.f, 2.f}}}.get_vector(); - auto expected_output = test::NDArray{{{50.f, 50.5f, 51.f}}}.get_vector(); + auto input = ov::test::NDArray{{{0.f, 1.f, 2.f}}}.get_vector(); + auto expected_output = ov::test::NDArray{{{50.f, 50.5f, 51.f}}}.get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_input(Shape{1, 3}, input); @@ -50,14 +51,14 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_crop) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/crop.onnx")); // input shape (1, 1, 4, 4) - auto input = test::NDArray({{{{19.f, 20.f, 21.f, 22.f}, - {23.f, 24.f, 25.f, 26.f}, - {27.f, 28.f, 29.f, 30.f}, - {31.f, 32.f, 33.f, 34.f}}}}) + auto input = ov::test::NDArray({{{{19.f, 20.f, 21.f, 22.f}, + {23.f, 24.f, 25.f, 26.f}, + {27.f, 28.f, 29.f, 30.f}, + {31.f, 32.f, 33.f, 34.f}}}}) .get_vector(); // output shape (1, 1, 2, 2) - auto expected_output = test::NDArray{{{{24.f, 25.f}, {28.f, 29.f}}}}.get_vector(); + auto expected_output = ov::test::NDArray{{{{24.f, 25.f}, {28.f, 29.f}}}}.get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_input(Shape{1, 1, 4, 4}, input); @@ -70,14 +71,14 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_crop_with_scale) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/crop_with_scale.onnx")); // input shape (1, 1, 4, 4) - auto input = test::NDArray({{{{19.f, 20.f, 21.f, 22.f}, - {23.f, 24.f, 25.f, 26.f}, - {27.f, 28.f, 29.f, 30.f}, - {31.f, 32.f, 33.f, 34.f}}}}) + auto input = ov::test::NDArray({{{{19.f, 20.f, 21.f, 22.f}, + {23.f, 24.f, 25.f, 26.f}, + {27.f, 28.f, 29.f, 30.f}, + {31.f, 32.f, 33.f, 34.f}}}}) .get_vector(); // output shape (1, 1, 2, 3) - auto expected_output = test::NDArray{{{{24.f, 25.f, 26.f}, {28.f, 29.f, 30.f}}}}.get_vector(); + auto expected_output = ov::test::NDArray{{{{24.f, 25.f, 26.f}, {28.f, 29.f, 30.f}}}}.get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_input(Shape{1, 1, 4, 4}, input); diff --git a/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp b/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp index dcb2a9867fbe99..29cea1750c6d25 100644 --- a/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp @@ -29,6 +29,7 @@ #include "ngraph/ngraph.hpp" #include "ngraph/pass/constant_folding.hpp" #include "ngraph/pass/manager.hpp" +#include "ngraph/file_util.hpp" #include "onnx_import/core/null_node.hpp" #include "onnx_import/onnx.hpp" #include "onnx_import/onnx_utils.hpp" diff --git a/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp b/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp index fb3f9a203cf195..846219aa32acba 100644 --- a/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp @@ -18,6 +18,7 @@ #include "common_test_utils/test_case.hpp" #include "onnx_import/onnx.hpp" #include "common_test_utils/test_control.hpp" +#include "ngraph/file_util.hpp" #include "onnx_utils.hpp" OPENVINO_SUPPRESS_DEPRECATED_START diff --git a/src/frontends/onnx/tests/onnx_import_quant.in.cpp b/src/frontends/onnx/tests/onnx_import_quant.in.cpp index b35b388d6a9dc2..1c5b8baf62d151 100644 --- a/src/frontends/onnx/tests/onnx_import_quant.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_quant.in.cpp @@ -21,6 +21,7 @@ #include "common_test_utils/test_control.hpp" #include "common_test_utils/test_tools.hpp" #include "gtest/gtest.h" +#include "ngraph/file_util.hpp" #include "ngraph/ngraph.hpp" #include "onnx_import/onnx.hpp" #include "onnx_utils.hpp" diff --git a/src/frontends/onnx/tests/onnx_import_reshape.in.cpp b/src/frontends/onnx/tests/onnx_import_reshape.in.cpp index 64a9a75692462f..773a9df618a2e9 100644 --- a/src/frontends/onnx/tests/onnx_import_reshape.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_reshape.in.cpp @@ -19,6 +19,7 @@ #include "common_test_utils/test_control.hpp" #include "common_test_utils/test_tools.hpp" #include "gtest/gtest.h" +#include "ngraph/file_util.hpp" #include "ngraph/ngraph.hpp" #include "onnx_import/onnx.hpp" #include "onnx_utils.hpp" @@ -39,13 +40,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reshape_reduced_dims) { "onnx/reshape_reduced_dims.onnx")); // input data shape (2, 3, 4) - auto input = test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, - {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) + auto input = ov::test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, + {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) .get_vector(); // output data shape (2, 12) - auto expected_output = test::NDArray({{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, - {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}}) + auto expected_output = ov::test::NDArray({{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -60,15 +61,15 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reshape_reordered_dims) { "onnx/reshape_reordered_dims.onnx")); // input data shape (2, 3, 4) - auto input = test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, - {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) + auto input = ov::test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, + {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) .get_vector(); // output data shape (4, 2, 3) - auto expected_output = test::NDArray({{{0, 1, 2}, {3, 4, 5}}, - {{6, 7, 8}, {9, 10, 11}}, - {{12, 13, 14}, {15, 16, 17}}, - {{18, 19, 20}, {21, 22, 23}}}) + auto expected_output = ov::test::NDArray({{{0, 1, 2}, {3, 4, 5}}, + {{6, 7, 8}, {9, 10, 11}}, + {{12, 13, 14}, {15, 16, 17}}, + {{18, 19, 20}, {21, 22, 23}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -83,14 +84,14 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reshape_extended_dims) { "onnx/reshape_extended_dims.onnx")); // input data shape (2, 3, 4) - auto input = test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, - {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) + auto input = ov::test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, + {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) .get_vector(); // output data shape (3, 2, 2, 2) - auto expected_output = test::NDArray({{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}}, - {{{8, 9}, {10, 11}}, {{12, 13}, {14, 15}}}, - {{{16, 17}, {18, 19}}, {{20, 21}, {22, 23}}}}) + auto expected_output = ov::test::NDArray({{{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}}, + {{{8, 9}, {10, 11}}, {{12, 13}, {14, 15}}}, + {{{16, 17}, {18, 19}}, {{20, 21}, {22, 23}}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -105,14 +106,14 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reshape_single_dim) { "onnx/reshape_single_dim.onnx")); // input data shape (2, 3, 4) - auto input = test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, - {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) + auto input = ov::test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, + {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) .get_vector(); // output data shape (24, ) - auto expected_output = - test::NDArray({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}) - .get_vector(); + auto expected_output = ov::test::NDArray( + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}) + .get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_input(Shape{2, 3, 4}, input); @@ -127,29 +128,29 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_dim) { "onnx/reshape_negative_dim.onnx")); // 2x3x4 - auto input = test::NDArray({{{0.5488135f, 0.71518934f, 0.60276335f, 0.5448832f}, - {0.4236548f, 0.6458941f, 0.4375872f, 0.891773f}, - {0.96366274f, 0.3834415f, 0.79172504f, 0.5288949f}}, + auto input = ov::test::NDArray({{{0.5488135f, 0.71518934f, 0.60276335f, 0.5448832f}, + {0.4236548f, 0.6458941f, 0.4375872f, 0.891773f}, + {0.96366274f, 0.3834415f, 0.79172504f, 0.5288949f}}, - {{0.56804454f, 0.92559665f, 0.07103606f, 0.0871293f}, - {0.0202184f, 0.83261985f, 0.77815676f, 0.87001216f}, - {0.9786183f, 0.7991586f, 0.46147937f, 0.7805292f}}}) + {{0.56804454f, 0.92559665f, 0.07103606f, 0.0871293f}, + {0.0202184f, 0.83261985f, 0.77815676f, 0.87001216f}, + {0.9786183f, 0.7991586f, 0.46147937f, 0.7805292f}}}) .get_vector(); // 2x6x2 - auto expected_output = test::NDArray({{{0.5488135f, 0.71518934f}, - {0.60276335f, 0.5448832f}, - {0.4236548f, 0.6458941f}, - {0.4375872f, 0.891773f}, - {0.96366274f, 0.3834415f}, - {0.79172504f, 0.5288949f}}, - - {{0.56804454f, 0.92559665f}, - {0.07103606f, 0.0871293f}, - {0.0202184f, 0.83261985f}, - {0.77815676f, 0.87001216f}, - {0.9786183f, 0.7991586f}, - {0.46147937f, 0.7805292f}}}) + auto expected_output = ov::test::NDArray({{{0.5488135f, 0.71518934f}, + {0.60276335f, 0.5448832f}, + {0.4236548f, 0.6458941f}, + {0.4375872f, 0.891773f}, + {0.96366274f, 0.3834415f}, + {0.79172504f, 0.5288949f}}, + + {{0.56804454f, 0.92559665f}, + {0.07103606f, 0.0871293f}, + {0.0202184f, 0.83261985f}, + {0.77815676f, 0.87001216f}, + {0.9786183f, 0.7991586f}, + {0.46147937f, 0.7805292f}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -164,13 +165,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_with_zero_dim) { "onnx/reshape_negative_with_zero_dims.onnx")); // input data shape (2, 3, 4) - auto input = test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, - {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) + auto input = ov::test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, + {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) .get_vector(); // output data shape (2, 6, 2) - auto expected_output = test::NDArray({{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, - {{12, 13}, {14, 15}, {16, 17}, {18, 19}, {20, 21}, {22, 23}}}) + auto expected_output = ov::test::NDArray({{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, + {{12, 13}, {14, 15}, {16, 17}, {18, 19}, {20, 21}, {22, 23}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -185,13 +186,13 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_reshape_output_shape_as_input) { "onnx/reshape_output_shape_as_input.onnx")); // input data shape (2, 3, 4) - auto input = test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, - {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) + auto input = ov::test::NDArray({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}}, + {{12, 13, 14, 15}, {16, 17, 18, 19}, {20, 21, 22, 23}}}) .get_vector(); // output data shape (2, 6, 2) - auto expected_output = test::NDArray({{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, - {{12, 13}, {14, 15}, {16, 17}, {18, 19}, {20, 21}, {22, 23}}}) + auto expected_output = ov::test::NDArray({{{0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}, {10, 11}}, + {{12, 13}, {14, 15}, {16, 17}, {18, 19}, {20, 21}, {22, 23}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -352,12 +353,12 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_squeeze) { file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/squeeze.onnx")); // {1, 4, 1, 1, 2} - auto input = test::NDArray({{{{{1.0f, 2.0f}}}, {{{3.0f, 4.0f}}}, {{{5.0f, 6.0f}}}, {{{7.0f, 8.0f}}}}}) + auto input = ov::test::NDArray({{{{{1.0f, 2.0f}}}, {{{3.0f, 4.0f}}}, {{{5.0f, 6.0f}}}, {{{7.0f, 8.0f}}}}}) .get_vector(); // {4, 2} auto expected_output = - test::NDArray({{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}, {7.0f, 8.0f}}).get_vector(); + ov::test::NDArray({{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}, {7.0f, 8.0f}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_input(Shape{1, 4, 1, 1, 2}, input); @@ -393,15 +394,15 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unsqueeze) { auto function = onnx_import::import_onnx_model( file_util::path_join(ov::test::utils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/unsqueeze.onnx")); - auto input = test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) + auto input = ov::test::NDArray({{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}) .get_vector(); auto expected_output = - test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, - {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) + ov::test::NDArray({{{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}, + {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -415,15 +416,15 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_unsqueeze_negative_axes) { SERIALIZED_ZOO, "onnx/unsqueeze_negative_axes.onnx")); - auto input = test::NDArray({{{{-1.8427763f, -1.0467733f, 0.50550157f, 1.4897262f, 0.33057404f}}, - {{1.9244908f, -0.3804572f, 0.76275414f, -0.8183123f, 0.93889356f}}, - {{-0.05270234f, 0.7113202f, -0.45783648f, -1.3378475f, 0.26926285f}}}}) + auto input = ov::test::NDArray({{{{-1.8427763f, -1.0467733f, 0.50550157f, 1.4897262f, 0.33057404f}}, + {{1.9244908f, -0.3804572f, 0.76275414f, -0.8183123f, 0.93889356f}}, + {{-0.05270234f, 0.7113202f, -0.45783648f, -1.3378475f, 0.26926285f}}}}) .get_vector(); auto expected_output = - test::NDArray({{{{{-1.8427763f, -1.0467733f, 0.50550157f, 1.4897262f, 0.33057404f}}}, - {{{1.9244908f, -0.3804572f, 0.76275414f, -0.8183123f, 0.93889356f}}}, - {{{-0.05270234f, 0.7113202f, -0.45783648f, -1.3378475f, 0.26926285f}}}}}) + ov::test::NDArray({{{{{-1.8427763f, -1.0467733f, 0.50550157f, 1.4897262f, 0.33057404f}}}, + {{{1.9244908f, -0.3804572f, 0.76275414f, -0.8183123f, 0.93889356f}}}, + {{{-0.05270234f, 0.7113202f, -0.45783648f, -1.3378475f, 0.26926285f}}}}}) .get_vector(); auto test_case = ov::test::TestCase(function, s_device); @@ -438,10 +439,10 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_concat) { Inputs inputs; - inputs.emplace_back(test::NDArray({1, 2}).get_vector()); - inputs.emplace_back(test::NDArray({3, 4}).get_vector()); + inputs.emplace_back(ov::test::NDArray({1, 2}).get_vector()); + inputs.emplace_back(ov::test::NDArray({3, 4}).get_vector()); - auto expected_output = test::NDArray({1, 2, 3, 4}).get_vector(); + auto expected_output = ov::test::NDArray({1, 2, 3, 4}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -456,10 +457,10 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_concat_negative_axis) { Inputs inputs; - inputs.emplace_back(test::NDArray({{1, 2}, {3, 4}}).get_vector()); - inputs.emplace_back(test::NDArray({{5, 6}, {7, 8}}).get_vector()); + inputs.emplace_back(ov::test::NDArray({{1, 2}, {3, 4}}).get_vector()); + inputs.emplace_back(ov::test::NDArray({{5, 6}, {7, 8}}).get_vector()); - auto expected_output = test::NDArray({{1, 2}, {3, 4}, {5, 6}, {7, 8}}).get_vector(); + auto expected_output = ov::test::NDArray({{1, 2}, {3, 4}, {5, 6}, {7, 8}}).get_vector(); auto test_case = ov::test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); diff --git a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp index ac0e1624c358b5..c7be2262ea90fb 100644 --- a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp @@ -21,6 +21,7 @@ #include "common_test_utils/test_control.hpp" #include "common_test_utils/test_tools.hpp" #include "gtest/gtest.h" +#include "ngraph/file_util.hpp" #include "ngraph/ngraph.hpp" #include "onnx_import/onnx.hpp" #include "onnx_utils.hpp" diff --git a/src/frontends/onnx/tests/onnx_import_signal.in.cpp b/src/frontends/onnx/tests/onnx_import_signal.in.cpp index adeb819ff81fd4..e8e2bb5e9f4a77 100644 --- a/src/frontends/onnx/tests/onnx_import_signal.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_signal.in.cpp @@ -4,6 +4,8 @@ #include +#include "ngraph/file_util.hpp" + // clang-format off #ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS #define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS diff --git a/src/frontends/onnx/tests/onnx_import_with_editor.in.cpp b/src/frontends/onnx/tests/onnx_import_with_editor.in.cpp index face82c465d155..8660c59e47f717 100644 --- a/src/frontends/onnx/tests/onnx_import_with_editor.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_with_editor.in.cpp @@ -16,6 +16,7 @@ #include "common_test_utils/test_case.hpp" #include "gtest/gtest.h" #include "ngraph/ngraph.hpp" +#include "ngraph/file_util.hpp" #include "common_test_utils/test_control.hpp" #include "onnx_utils.hpp" diff --git a/src/frontends/onnx/tests/onnx_tensor_names.cpp b/src/frontends/onnx/tests/onnx_tensor_names.cpp index b78aaeafa50db9..9e4de0cd78e115 100644 --- a/src/frontends/onnx/tests/onnx_tensor_names.cpp +++ b/src/frontends/onnx/tests/onnx_tensor_names.cpp @@ -6,6 +6,7 @@ #include "common_test_utils/test_case.hpp" #include "common_test_utils/test_control.hpp" #include "gtest/gtest.h" +#include "ngraph/file_util.hpp" #include "ngraph/ngraph.hpp" #include "onnx_import/onnx.hpp" #include "onnx_import/onnx_utils.hpp" diff --git a/src/plugins/intel_gna/tests/deprecated/helpers/tests_file_utils.cpp b/src/plugins/intel_gna/tests/deprecated/helpers/tests_file_utils.cpp index 234f817a81844f..d01b246c3712e9 100644 --- a/src/plugins/intel_gna/tests/deprecated/helpers/tests_file_utils.cpp +++ b/src/plugins/intel_gna/tests/deprecated/helpers/tests_file_utils.cpp @@ -17,9 +17,13 @@ #ifdef _WIN32 // Copied from linux libc sys/stat.h: +#ifndef S_ISREG # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif +#ifndef S_ISDIR # define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) #endif +#endif using namespace ::testing; using namespace std; diff --git a/src/tests/test_utils/common_test_utils/CMakeLists.txt b/src/tests/test_utils/common_test_utils/CMakeLists.txt index cda76943a4f82b..86418a9f4fdb7c 100644 --- a/src/tests/test_utils/common_test_utils/CMakeLists.txt +++ b/src/tests/test_utils/common_test_utils/CMakeLists.txt @@ -22,7 +22,7 @@ function(add_common_utils ADD_TARGET_NAME) ROOT ${CMAKE_CURRENT_SOURCE_DIR} EXCLUDED_SOURCE_PATHS ${TARGET_EXCLUDED_SOURCE_PATHS} - ADD_CPPLINT + ADD_CLANG_FORMAT DEVELOPER_PACKAGE tests LINK_LIBRARIES diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/all_close.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/all_close.hpp index 5ccdd762d88a08..db55bc3bc57583 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/all_close.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/all_close.hpp @@ -21,8 +21,8 @@ namespace utils { /// \returns true if shapes match and for all elements, |a_i-b_i| <= atol + rtol*|b_i|. template typename std::enable_if::value, ::testing::AssertionResult>::type all_close( - const T * const a, - const T * const b, + const T* const a, + const T* const b, size_t size, T rtol = static_cast(1e-5), T atol = static_cast(1e-8)) { @@ -52,8 +52,8 @@ typename std::enable_if::value, ::testing::AssertionRe /// \returns true if shapes match and for all elements, |a_i-b_i| <= atol + rtol*|b_i|. template typename std::enable_if::value, ::testing::AssertionResult>::type all_close( - const T * const a, - const T * const b, + const T* const a, + const T* const b, size_t size, T rtol = static_cast(1e-5), T atol = static_cast(1e-8)) { @@ -70,7 +70,6 @@ typename std::enable_if::value, ::testing::AssertionResult>: return rc ? ::testing::AssertionSuccess() : ar_fail; } - /// \brief Same as numpy.allclose /// \param a First tensor to compare /// \param b Second tensor to compare @@ -89,7 +88,6 @@ typename std::enable_if::value, ::testing::AssertionRe return all_close(a.data(), b.data(), a.size(), rtol, atol); } - /// \brief Same as numpy.allclose /// \param a First tensor to compare /// \param b Second tensor to compare diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp index 5d0fea09708c11..cc45a47d779d57 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp @@ -4,19 +4,19 @@ #pragma once -#include "openvino/core/partial_shape.hpp" - #include #include +#include #include #include #include #include #include -#include #include #include +#include "openvino/core/partial_shape.hpp" + namespace ov { namespace test { namespace utils { diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp index 2fc62746137a37..eacad438e30dfb 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/data_utils.hpp @@ -5,24 +5,20 @@ #pragma once #include -#include #include +#include #include "common_test_utils/common_utils.hpp" #include "gtest/gtest.h" #include "ie_blob.h" #include "openvino/core/type/element_type_traits.hpp" #include "openvino/runtime/tensor.hpp" -#include "ngraph/type/bfloat16.hpp" -#include "ngraph/type/float16.hpp" - namespace ov { namespace test { namespace utils { -OPENVINO_SUPPRESS_DEPRECATED_START -inline void fill_data(float *data, size_t size, size_t duty_ratio = 10) { +inline void fill_data(float* data, size_t size, size_t duty_ratio = 10) { for (size_t i = 0; i < size; i++) { if ((i / duty_ratio) % 2 == 1) { data[i] = 0.0f; @@ -54,7 +50,9 @@ inline std::vector generate_float_numbers(std::size_t vec_len, float min, * @param blob tensor to fill in * @param values src tensor which should be broadcast */ -void fill_data_with_broadcast(InferenceEngine::Blob::Ptr &blob, InferenceEngine::Blob::Ptr &values); +OPENVINO_SUPPRESS_DEPRECATED_START +void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine::Blob::Ptr& values); +OPENVINO_SUPPRESS_DEPRECATED_END void fill_data_with_broadcast(ov::Tensor& tensor, ov::Tensor& values); /** @@ -64,7 +62,9 @@ void fill_data_with_broadcast(ov::Tensor& tensor, ov::Tensor& values); * @param axis Axis to apply values * @param values data to broadcast */ -void fill_data_with_broadcast(InferenceEngine::Blob::Ptr &blob, size_t axis, std::vector values); +OPENVINO_SUPPRESS_DEPRECATED_START +void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, size_t axis, std::vector values); +OPENVINO_SUPPRESS_DEPRECATED_END void fill_data_with_broadcast(ov::Tensor& tensor, size_t axis, std::vector values); /** * Make a view blob with new shape. It will reinterpret original tensor data as a tensor with new shape. @@ -76,22 +76,31 @@ void fill_data_with_broadcast(ov::Tensor& tensor, size_t axis, std::vector -inline void fill_roi_raw_ptr(T* data, size_t data_size, const uint32_t range, const int32_t height, const int32_t width, const float omega, - const bool is_roi_max_mode, const int32_t seed = 1) { +template +inline void fill_roi_raw_ptr(T* data, + size_t data_size, + const uint32_t range, + const int32_t height, + const int32_t width, + const float omega, + const bool is_roi_max_mode, + const int32_t seed = 1) { std::default_random_engine random(seed); std::uniform_int_distribution distribution(0, range); @@ -129,23 +138,33 @@ inline void fill_roi_raw_ptr(T* data, size_t data_size, const uint32_t range, co } } -template -inline void -fill_data_roi(InferenceEngine::Blob::Ptr &blob, const uint32_t range, const int height, const int width, const float omega, - const bool is_roi_max_mode, const int seed = 1, void (*propGenerator)(InferenceEngine::Blob::Ptr &) = nullptr) { +OPENVINO_SUPPRESS_DEPRECATED_START +template +inline void fill_data_roi(InferenceEngine::Blob::Ptr& blob, + const uint32_t range, + const int height, + const int width, + const float omega, + const bool is_roi_max_mode, + const int seed = 1, + void (*propGenerator)(InferenceEngine::Blob::Ptr&) = nullptr) { if (propGenerator != nullptr) { propGenerator(blob); return; } using T = typename InferenceEngine::PrecisionTrait::value_type; - auto *data = blob->buffer().as(); + auto* data = blob->buffer().as(); fill_roi_raw_ptr(data, blob->size(), range, height, width, omega, is_roi_max_mode, seed); } -template -inline void -fill_data_roi(ov::runtime::Tensor& tensor, const uint32_t range, const int height, const int width, const float omega, - const bool is_roi_max_mode, const int seed = 1) { +template +inline void fill_data_roi(ov::runtime::Tensor& tensor, + const uint32_t range, + const int height, + const int width, + const float omega, + const bool is_roi_max_mode, + const int seed = 1) { using T = typename InferenceEngine::PrecisionTrait::value_type; auto* data = static_cast(tensor.data()); std::default_random_engine random(seed); @@ -184,11 +203,15 @@ fill_data_roi(ov::runtime::Tensor& tensor, const uint32_t range, const int heigh data[i + 4] = static_cast(max_y); } } +OPENVINO_SUPPRESS_DEPRECATED_END -template -void inline -fill_data_random(T *pointer, std::size_t size, const uint32_t range = 10, double_t start_from = 0, const int32_t k = 1, - const int seed = 1) { +template +void inline fill_data_random(T* pointer, + std::size_t size, + const uint32_t range = 10, + double_t start_from = 0, + const int32_t k = 1, + const int seed = 1) { if (range == 0) { for (std::size_t i = 0; i < size; i++) { pointer[i] = static_cast(start_from); @@ -197,7 +220,7 @@ fill_data_random(T *pointer, std::size_t size, const uint32_t range = 10, double } testing::internal::Random random(seed); - const uint32_t k_range = k * range; // range with respect to k + const uint32_t k_range = k * range; // range with respect to k random.Generate(k_range); if (start_from < 0 && !std::is_signed::value) { @@ -245,10 +268,10 @@ void inline fill_random_unique_sequence(T* rawBlobDataPtr, while (elems.size() != size) { auto value = static_cast(dist(generator)); value /= static_cast(k); - if (std::is_same::value) { - elems.insert(static_cast(ngraph::float16(value).to_bits())); - } else if (std::is_same::value) { - elems.insert(static_cast(ngraph::bfloat16(value).to_bits())); + if (std::is_same::value) { + elems.insert(static_cast(ov::float16(value).to_bits())); + } else if (std::is_same::value) { + elems.insert(static_cast(ov::bfloat16(value).to_bits())); } else { elems.insert(static_cast(value)); } @@ -266,7 +289,11 @@ void inline fill_random_unique_sequence(T* rawBlobDataPtr, * - With k = 2 numbers resolution will 1/2 so outputs only .0 or .50 * - With k = 4 numbers resolution will 1/4 so outputs only .0 .25 .50 0.75 and etc. */ -void fill_tensor_random(ov::Tensor& tensor, const double range = 10, const double start_from = 0, const int32_t k = 1, const int seed = 1); +void fill_tensor_random(ov::Tensor& tensor, + const double range = 10, + const double start_from = 0, + const int32_t k = 1, + const int seed = 1); /** @brief Fill blob with random data. * @@ -278,11 +305,15 @@ void fill_tensor_random(ov::Tensor& tensor, const double range = 10, const doubl * - With k = 2 numbers resolution will 1/2 so outputs only .0 or .50 * - With k = 4 numbers resolution will 1/4 so outputs only .0 .25 .50 0.75 and etc. */ -template -void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, - const int32_t k = 1, const int seed = 1) { +OPENVINO_SUPPRESS_DEPRECATED_START +template +void inline fill_data_random(InferenceEngine::Blob::Ptr& blob, + const uint32_t range = 10, + int32_t start_from = 0, + const int32_t k = 1, + const int seed = 1) { using T = typename InferenceEngine::PrecisionTrait::value_type; - auto *rawBlobDataPtr = blob->buffer().as(); + auto* rawBlobDataPtr = blob->buffer().as(); if (PRC == InferenceEngine::Precision::U4 || PRC == InferenceEngine::Precision::I4 || PRC == InferenceEngine::Precision::BIN) { fill_data_random(rawBlobDataPtr, blob->byteSize(), range, start_from, k, seed); @@ -290,6 +321,7 @@ void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t ra fill_data_random(rawBlobDataPtr, blob->size(), range, start_from, k, seed); } } +OPENVINO_SUPPRESS_DEPRECATED_END /** @brief Fill blob with a sorted sequence of unique elements randomly generated. * @@ -304,14 +336,15 @@ void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t ra * - With k = 2 numbers resolution will 1/2 so outputs only .0 or .50 * - With k = 4 numbers resolution will 1/4 so outputs only .0 .25 .50 0.75 and etc. */ -template -void inline fill_random_unique_sequence(InferenceEngine::Blob::Ptr &blob, +OPENVINO_SUPPRESS_DEPRECATED_START +template +void inline fill_random_unique_sequence(InferenceEngine::Blob::Ptr& blob, uint64_t range, int64_t start_from = 0, const int64_t k = 1, const int32_t seed = 1) { using T = typename InferenceEngine::PrecisionTrait::value_type; - auto *rawBlobDataPtr = blob->buffer().as(); + auto* rawBlobDataPtr = blob->buffer().as(); if (start_from < 0 && !std::is_signed::value) { start_from = 0; @@ -329,16 +362,21 @@ void inline fill_random_unique_sequence(InferenceEngine::Blob::Ptr &blob, auto value = static_cast(dist(generator)); value /= static_cast(k); if (PRC == InferenceEngine::Precision::FP16) { - elems.insert(static_cast(ngraph::float16(value).to_bits())); + elems.insert(static_cast(ov::float16(value).to_bits())); } else { elems.insert(static_cast(value)); } } std::copy(elems.begin(), elems.end(), rawBlobDataPtr); } +OPENVINO_SUPPRESS_DEPRECATED_END -template -void inline fill_data_ptr_consistently(T* data, size_t size, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1) { +template +void inline fill_data_ptr_consistently(T* data, + size_t size, + const uint32_t range = 10, + int32_t start_from = 0, + const int32_t k = 1) { int64_t value = start_from; const int64_t maxValue = start_from + range; for (size_t i = 0; i < size; i++) { @@ -351,51 +389,57 @@ void inline fill_data_ptr_consistently(T* data, size_t size, const uint32_t rang } } -template -void inline fill_data_consistently(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, +OPENVINO_SUPPRESS_DEPRECATED_START +template +void inline fill_data_consistently(InferenceEngine::Blob::Ptr& blob, + const uint32_t range = 10, + int32_t start_from = 0, const int32_t k = 1) { using T = typename InferenceEngine::PrecisionTrait::value_type; - auto *rawBlobDataPtr = blob->buffer().as(); + auto* rawBlobDataPtr = blob->buffer().as(); if (start_from < 0 && !std::is_signed::value) { start_from = 0; } fill_data_ptr_consistently(rawBlobDataPtr, blob->size(), range, start_from, k); } -template -void inline -fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k, - const int seed = 1) { +template +void inline fill_data_random_float(InferenceEngine::Blob::Ptr& blob, + const uint32_t range, + int32_t start_from, + const int32_t k, + const int seed = 1) { using T = typename InferenceEngine::PrecisionTrait::value_type; std::default_random_engine random(seed); // 1/k is the resolution of the floating point numbers std::uniform_int_distribution distribution(k * start_from, k * (start_from + range)); - auto *rawBlobDataPtr = blob->buffer().as(); + auto* rawBlobDataPtr = blob->buffer().as(); for (size_t i = 0; i < blob->size(); i++) { auto value = static_cast(distribution(random)); value /= static_cast(k); if (PRC == InferenceEngine::Precision::FP16) { - rawBlobDataPtr[i] = static_cast(ngraph::float16(value).to_bits()); + rawBlobDataPtr[i] = static_cast(ov::float16(value).to_bits()); } else if (PRC == InferenceEngine::Precision::BF16) { - rawBlobDataPtr[i] = static_cast(ngraph::bfloat16(value).to_bits()); + rawBlobDataPtr[i] = static_cast(ov::bfloat16(value).to_bits()); } else { rawBlobDataPtr[i] = static_cast(value); } } } -template +template void inline fill_data_ptr_normal_random_float(T* data, - size_t size, - const float mean, - const float stddev, - const int seed = 1) { + size_t size, + const float mean, + const float stddev, + const int seed = 1) { std::default_random_engine random(seed); std::normal_distribution<> normal_d{mean, stddev}; for (size_t i = 0; i < size; i++) { auto value = static_cast(normal_d(random)); - if (typeid(T) == typeid(typename InferenceEngine::PrecisionTrait::value_type)) { + if (typeid(T) == + typeid(typename InferenceEngine::PrecisionTrait::value_type)) { data[i] = static_cast(ov::float16(value).to_bits()); } else { data[i] = static_cast(value); @@ -403,26 +447,26 @@ void inline fill_data_ptr_normal_random_float(T* data, } } -template -void inline fill_data_normal_random_float(InferenceEngine::Blob::Ptr &blob, +template +void inline fill_data_normal_random_float(InferenceEngine::Blob::Ptr& blob, const float mean, const float stddev, const int seed = 1) { using T = typename InferenceEngine::PrecisionTrait::value_type; - auto *rawBlobDataPtr = blob->buffer().as(); + auto* rawBlobDataPtr = blob->buffer().as(); fill_data_ptr_normal_random_float(rawBlobDataPtr, blob->size(), mean, stddev, seed); } -template -void inline fill_data_float_array(InferenceEngine::Blob::Ptr &blob, const T values[], const size_t size) { +template +void inline fill_data_float_array(InferenceEngine::Blob::Ptr& blob, const T values[], const size_t size) { using Type = typename InferenceEngine::PrecisionTrait::value_type; - auto *rawBlobDataPtr = blob->buffer().as(); + auto* rawBlobDataPtr = blob->buffer().as(); for (size_t i = 0; i < std::min(size, blob->size()); i++) { auto value = values[i]; if (typeid(Type) == typeid(typename InferenceEngine::PrecisionTrait::value_type)) { - rawBlobDataPtr[i] = static_cast(ngraph::float16(value).to_bits()); + rawBlobDataPtr[i] = static_cast(ov::float16(value).to_bits()); } else { rawBlobDataPtr[i] = static_cast(value); @@ -430,8 +474,8 @@ void inline fill_data_float_array(InferenceEngine::Blob::Ptr &blob, const T valu } } -template<> -void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, +template <> +void inline fill_data_random(InferenceEngine::Blob::Ptr& blob, const uint32_t range, int32_t start_from, const int32_t k, @@ -439,45 +483,43 @@ void inline fill_data_random(InferenceEngine:: fill_data_random_float(blob, range, start_from, k, seed); } -template<> -void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, +template <> +void inline fill_data_random(InferenceEngine::Blob::Ptr& blob, const uint32_t range, int32_t start_from, - const int32_t k, const int seed) { + const int32_t k, + const int seed) { fill_data_random_float(blob, range, start_from, k, seed); } -template<> -void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, +template <> +void inline fill_data_random(InferenceEngine::Blob::Ptr& blob, const uint32_t range, int32_t start_from, - const int32_t k, const int seed) { + const int32_t k, + const int seed) { fill_data_random_float(blob, range, start_from, k, seed); } +OPENVINO_SUPPRESS_DEPRECATED_END -template -typename std::enable_if::value, T>::type -inline ie_abs(const T &val) { +template +typename std::enable_if::value, T>::type inline ie_abs(const T& val) { return std::abs(val); } -template -typename std::enable_if::value, T>::type -inline ie_abs(const T &val) { +template +typename std::enable_if::value, T>::type inline ie_abs(const T& val) { return val; } -inline ngraph::bfloat16 ie_abs(const ngraph::bfloat16 &val) { - return ngraph::bfloat16::from_bits(val.to_bits() & 0x7FFF); +inline ov::bfloat16 ie_abs(const ov::bfloat16& val) { + return ov::bfloat16::from_bits(val.to_bits() & 0x7FFF); } -inline ngraph::float16 ie_abs(const ngraph::float16 &val) { - return ngraph::float16::from_bits(val.to_bits() & 0x7FFF); +inline ov::float16 ie_abs(const ov::float16& val) { + return ov::float16::from_bits(val.to_bits() & 0x7FFF); } -OPENVINO_SUPPRESS_DEPRECATED_END - } // namespace utils } // namespace test } // namespace ov - diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp index ee0f7a7f854095..6c5e66faabe4a0 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/file_utils.hpp @@ -4,33 +4,33 @@ #pragma once -#include +#include + #include +#include #include #include -#include +#include "common_test_utils/common_utils.hpp" #include "common_test_utils/test_constants.hpp" #include "common_test_utils/w_dirent.h" -#include "common_test_utils/common_utils.hpp" - +#include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/iplugin.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" -#include "openvino/runtime/internal_properties.hpp" #ifdef _WIN32 -#include -#define rmdir(dir) _rmdir(dir) +# include +# define rmdir(dir) _rmdir(dir) #else // _WIN32 -#include +# include #endif // _WIN32 namespace ov { namespace test { namespace utils { -template +template inline std::string to_string_c_locale(T value) { std::stringstream val_stream; val_stream.imbue(std::locale("C")); @@ -38,25 +38,26 @@ inline std::string to_string_c_locale(T value) { return val_stream.str(); } -inline std::string makePath(const std::string &folder, const std::string &file) { - if (folder.empty()) return file; +inline std::string makePath(const std::string& folder, const std::string& file) { + if (folder.empty()) + return file; return folder + FileSeparator + file; } -inline long long fileSize(const char *fileName) { +inline long long fileSize(const char* fileName) { std::ifstream in(fileName, std::ios_base::binary | std::ios_base::ate); return in.tellg(); } -inline long long fileSize(const std::string &fileName) { +inline long long fileSize(const std::string& fileName) { return fileSize(fileName.c_str()); } -inline bool fileExists(const char *fileName) { +inline bool fileExists(const char* fileName) { return fileSize(fileName) >= 0; } -inline bool fileExists(const std::string &fileName) { +inline bool fileExists(const std::string& fileName) { return fileExists(fileName.c_str()); } @@ -72,7 +73,7 @@ inline void removeFile(const std::string& path) { } } -inline void removeIRFiles(const std::string &xmlFilePath, const std::string &binFileName) { +inline void removeIRFiles(const std::string& xmlFilePath, const std::string& binFileName) { if (fileExists(xmlFilePath)) { std::remove(xmlFilePath.c_str()); } @@ -87,8 +88,8 @@ inline void removeIRFiles(const std::string &xmlFilePath, const std::string &bin // < 0 - error // >= 0 - count of removed files inline int removeFilesWithExt(std::string path, std::string ext) { - struct dirent *ent; - DIR *dir = opendir(path.c_str()); + struct dirent* ent; + DIR* dir = opendir(path.c_str()); int ret = 0; if (dir != nullptr) { while ((ent = readdir(dir)) != NULL) { @@ -114,8 +115,8 @@ inline int removeFilesWithExt(std::string path, std::string ext) { // Return value: // vector of strings representing file paths inline std::vector listFilesWithExt(const std::string& path, const std::string& ext) { - struct dirent *ent; - DIR *dir = opendir(path.c_str()); + struct dirent* ent; + DIR* dir = opendir(path.c_str()); std::vector res; if (dir != nullptr) { while ((ent = readdir(dir)) != NULL) { @@ -131,11 +132,11 @@ inline std::vector listFilesWithExt(const std::string& path, const return res; } -inline int removeDir(const std::string &path) { +inline int removeDir(const std::string& path) { return rmdir(path.c_str()); } -inline bool directoryExists(const std::string &path) { +inline bool directoryExists(const std::string& path) { struct stat sb; if (stat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) { @@ -145,7 +146,6 @@ inline bool directoryExists(const std::string &path) { return false; } - inline void directoryFileListRecursive(const std::string& name, std::vector& file_list) { struct CloseDir { void operator()(DIR* d) const noexcept { @@ -156,7 +156,7 @@ inline void directoryFileListRecursive(const std::string& name, std::vector; Dir directory(opendir(name.c_str())); - struct dirent *entire; + struct dirent* entire; if (directory) { const std::string current_dir{"."}; const std::string parent_dir{".."}; @@ -219,7 +219,7 @@ inline std::vector getFileListByPatternRecursive(const std::vector< }; std::vector result; - for (auto &&folderPath : folderPaths) { + for (auto&& folderPath : folderPaths) { if (!directoryExists(folderPath)) { std::string msg = "Input directory (" + folderPath + ") doesn't not exist!"; throw std::runtime_error(msg); diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/matcher.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/matcher.hpp index 52e54ef36537c4..648f6accdb4009 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/matcher.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/matcher.hpp @@ -12,8 +12,7 @@ class TestMatcher : public ov::pass::pattern::Matcher { public: TestMatcher() = default; - bool match_value(const ov::Output& pattern_value, - const ov::Output& graph_value) override { + bool match_value(const ov::Output& pattern_value, const ov::Output& graph_value) override { if (ov::is_type(pattern_value.get_node_shared_ptr())) { bool result = pattern_value == graph_value; if (result) { @@ -28,7 +27,7 @@ class TestMatcher : public ov::pass::pattern::Matcher { public: bool match(const std::shared_ptr& pattern_node, const std::shared_ptr& graph_node) { OPENVINO_ASSERT(pattern_node && graph_node); // the same condition throws an exception in the - // non-test version of `match` + // non-test version of `match` OPENVINO_DEBUG << "Starting match pattern = " << pattern_node->get_name() << " , graph_node = " << graph_node->get_name(); diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/ndarray.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ndarray.hpp index a4306c2befa6ca..6fbfd1663b5856 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/ndarray.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/ndarray.hpp @@ -12,7 +12,7 @@ #include "openvino/core/shape.hpp" -namespace ngraph { +namespace ov { namespace test { namespace init { // Recursively define types for N-deep initializer lists @@ -73,7 +73,8 @@ typename std::enable_if<(N > 1), void>::type fill_shape(ov::Shape& shape, const } template -typename std::enable_if<(N > 1), void>::type check_shape(const ov::Shape& shape, const NestedInitializerList& inits) { +typename std::enable_if<(N > 1), void>::type check_shape(const ov::Shape& shape, + const NestedInitializerList& inits) { if (shape.at(shape.size() - N) != inits.size()) { throw std::invalid_argument("Initializers do not match shape"); } @@ -188,4 +189,4 @@ class NDArray : public NDArrayBase { } }; } // namespace test -} // namespace ngraph +} // namespace ov diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp index 75404d0a4f82ed..5bb12e821ad4c4 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_tensor_utils.hpp @@ -9,13 +9,12 @@ namespace ov { namespace test { namespace utils { -ov::Tensor create_and_fill_tensor( - const ov::element::Type element_type, - const ov::Shape &shape, - const uint32_t range = 10, - const double_t start_from = 0, - const int32_t resolution = 1, - const int seed = 1); +ov::Tensor create_and_fill_tensor(const ov::element::Type element_type, + const ov::Shape& shape, + const uint32_t range = 10, + const double_t start_from = 0, + const int32_t resolution = 1, + const int seed = 1); template static ov::runtime::Tensor create_tensor(const ov::element::Type& element_type, @@ -23,37 +22,33 @@ static ov::runtime::Tensor create_tensor(const ov::element::Type& element_type, const std::vector& values, const size_t size = 0) { const size_t real_size = size ? size : values.size() * sizeof(T) / element_type.size(); - ov::runtime::Tensor tensor { element_type, shape }; + ov::runtime::Tensor tensor{element_type, shape}; std::memcpy(tensor.data(), values.data(), std::min(real_size * element_type.size(), sizeof(T) * values.size())); return tensor; } -ov::runtime::Tensor create_and_fill_tensor_unique_sequence( - const ov::element::Type element_type, - const ov::Shape& shape, - const int32_t start_from = 0, - const int32_t resolution = 1, - const int seed = 1); - -ov::runtime::Tensor create_and_fill_tensor_normal_distribution( - const ov::element::Type element_type, - const ov::Shape& shape, - const float mean, - const float stddev, - const int seed = 1); - -ov::runtime::Tensor create_and_fill_tensor_consistently( - const ov::element::Type element_type, - const ov::Shape& shape, - const uint32_t range, - const int32_t start_from, - const int32_t resolution); - -void compare( - const ov::Tensor &expected, - const ov::Tensor &actual, - const double abs_threshold = std::numeric_limits::max(), - const double rel_threshold = std::numeric_limits::max()); +ov::runtime::Tensor create_and_fill_tensor_unique_sequence(const ov::element::Type element_type, + const ov::Shape& shape, + const int32_t start_from = 0, + const int32_t resolution = 1, + const int seed = 1); + +ov::runtime::Tensor create_and_fill_tensor_normal_distribution(const ov::element::Type element_type, + const ov::Shape& shape, + const float mean, + const float stddev, + const int seed = 1); + +ov::runtime::Tensor create_and_fill_tensor_consistently(const ov::element::Type element_type, + const ov::Shape& shape, + const uint32_t range, + const int32_t start_from, + const int32_t resolution); + +void compare(const ov::Tensor& expected, + const ov::Tensor& actual, + const double abs_threshold = std::numeric_limits::max(), + const double rel_threshold = std::numeric_limits::max()); } // namespace utils } // namespace test } // namespace ov diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_test_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_test_utils.hpp index cf645581fa5cfc..87d12fd0cee2ac 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_test_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/ov_test_utils.hpp @@ -5,11 +5,11 @@ #pragma once #include "common_test_utils/graph_comparator.hpp" +#include "common_test_utils/test_common.hpp" #include "openvino/core/dimension.hpp" #include "openvino/core/model.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/pass.hpp" -#include "common_test_utils/test_common.hpp" #include "transformations/init_node_info.hpp" #define DYN ov::Dimension::dynamic() diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_case.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_case.hpp index eaa5790c30a948..2d95881e4a755d 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_case.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_case.hpp @@ -9,7 +9,6 @@ #include "common_test_utils/all_close.hpp" #include "common_test_utils/all_close_f.hpp" #include "common_test_utils/test_tools.hpp" -#include "ngraph/file_util.hpp" #include "openvino/runtime/core.hpp" #include "openvino/util/file_util.hpp" @@ -29,7 +28,7 @@ class TestCase { OPENVINO_ASSERT(input_pshape.compatible(shape), "Provided input shape ", shape, - " is not compatible with nGraph function's expected input shape ", + " is not compatible with OpenVINO model's expected input shape ", input_pshape, " for input ", m_input_index); diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_common.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_common.hpp index 708405c77d096b..019025c16f2a76 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_common.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_common.hpp @@ -14,7 +14,7 @@ namespace ov { namespace test { namespace utils { class PostgreSQLLink; -} //namespace utils +} // namespace utils class TestsCommon : virtual public ::testing::Test { /// \brief Holds a pointer on PostgreSQL interface implementation (see postgres_link.hpp). diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp index 8b7740912bcb6e..70778266d8f1b2 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_constants.hpp @@ -28,22 +28,22 @@ const char DEVICE_SUFFIX_SEPARATOR = '.'; const unsigned int maxFileNameLength = 140; #ifdef _WIN32 - #if defined(__MINGW32__) || defined(__MINGW64__) - const char pre[] = "lib"; - #else - const char pre[] = ""; - #endif - const char ext[] = ".dll"; - const char FileSeparator[] = "\\"; +# if defined(__MINGW32__) || defined(__MINGW64__) +const char pre[] = "lib"; +# else +const char pre[] = ""; +# endif +const char ext[] = ".dll"; +const char FileSeparator[] = "\\"; #else - #if defined __APPLE__ - const char pre[] = "lib"; - const char ext[] = ".so"; - #else - const char pre[] = "lib"; - const char ext[] = ".so"; - #endif - const char FileSeparator[] = "/"; +# if defined __APPLE__ +const char pre[] = "lib"; +const char ext[] = ".so"; +# else +const char pre[] = "lib"; +const char ext[] = ".so"; +# endif +const char FileSeparator[] = "/"; #endif } // namespace utils diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_control.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_control.hpp index 3d30451ca37397..277813dc0c8038 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_control.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_control.hpp @@ -22,31 +22,35 @@ std::string combine_test_backend_and_case(const std::string& backend_name, const #define OPENVINO_GTEST_TEST_(backend_name, test_case_name, test_name, parent_class, parent_id) \ class OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name) : public parent_class { \ - public: \ + public: \ OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)() {} \ - \ - private: \ - void TestBody() override; \ - static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_; \ + \ + private: \ + void TestBody() override; \ + static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_; \ GTEST_DISALLOW_COPY_AND_ASSIGN_(OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)); \ - }; \ - \ + }; \ + \ ::testing::TestInfo* const OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)::test_info_ = \ - ::testing::internal::MakeAndRegisterTestInfo( \ - ::ov::combine_test_backend_and_case(#backend_name, #test_case_name).c_str(), \ - ::ov::prepend_disabled(#backend_name, #test_name, s_manifest).c_str(), \ - nullptr, \ - nullptr, \ - ::testing::internal::CodeLocation(__FILE__, __LINE__), \ - (parent_id), \ - parent_class::SetUpTestCase, \ - parent_class::TearDownTestCase, \ - new ::testing::internal::TestFactoryImpl< \ + ::testing::internal::MakeAndRegisterTestInfo( \ + ::ov::combine_test_backend_and_case(#backend_name, #test_case_name).c_str(), \ + ::ov::prepend_disabled(#backend_name, #test_name, s_manifest).c_str(), \ + nullptr, \ + nullptr, \ + ::testing::internal::CodeLocation(__FILE__, __LINE__), \ + (parent_id), \ + parent_class::SetUpTestCase, \ + parent_class::TearDownTestCase, \ + new ::testing::internal::TestFactoryImpl< \ OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)>); \ void OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)::TestBody() #define OPENVINO_TEST(test_case_name, test_name) \ - OPENVINO_GTEST_TEST_(test_case_name, test_case_name, test_name, ::testing::Test, ::testing::internal::GetTestTypeId()) + OPENVINO_GTEST_TEST_(test_case_name, \ + test_case_name, \ + test_name, \ + ::testing::Test, \ + ::testing::internal::GetTestTypeId()) // OPENVINO_TEST_F facilitates the use of the same configuration parameters for multiple // unit tests similar to the original TEST_F, but with the introduction of a new 0th @@ -68,10 +72,10 @@ std::string combine_test_backend_and_case(const std::string& backend_name, const // (rather than the BACKENDNAME.* that worked before the use of OPENVINO_TEST_F) #define OPENVINO_TEST_F(backend_name, test_fixture, test_name) \ OPENVINO_GTEST_TEST_(backend_name, \ - test_fixture, \ - test_name, \ - test_fixture, \ - ::testing::internal::GetTypeId()) + test_fixture, \ + test_name, \ + test_fixture, \ + ::testing::internal::GetTypeId()) // OPENVINO_TEST_P combined with OPENVINO_INSTANTIATE_TEST_SUITE_P facilate the generation // of value parameterized tests (similar to the original TEST_P and INSTANTIATE_TEST_SUITE_P @@ -81,10 +85,10 @@ std::string combine_test_backend_and_case(const std::string& backend_name, const // Start by defining a class derived from ::testing::TestWithParam, which you'll pass // for the test_case_name parameter. // Then use OPENVINO_INSTANTIATE_TEST_SUITE_P to define each generation of test cases (see below). -#define OPENVINO_TEST_P(backend_name, test_case_name, test_name) \ - class OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name) : public test_case_name { \ +#define OPENVINO_TEST_P(backend_name, test_case_name, test_name) \ + class OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name) : public test_case_name { \ public: \ - OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)() {} \ + OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)() {} \ void TestBody() override; \ \ private: \ @@ -95,16 +99,16 @@ std::string combine_test_backend_and_case(const std::string& backend_name, const ::testing::internal::CodeLocation(__FILE__, __LINE__)) \ ->AddTestPattern( \ #backend_name "/" #test_case_name, \ - ::ov::prepend_disabled(#backend_name "/" #test_case_name, #test_name, s_manifest).c_str(), \ + ::ov::prepend_disabled(#backend_name "/" #test_case_name, #test_name, s_manifest).c_str(), \ new ::testing::internal::TestMetaFactory< \ - OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)>()); \ + OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)>()); \ return 0; \ } \ static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \ - GTEST_DISALLOW_COPY_AND_ASSIGN_(OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)); \ + GTEST_DISALLOW_COPY_AND_ASSIGN_(OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)); \ }; \ - int OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)::gtest_registering_dummy_ = \ - OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)::AddToRegistry(); \ + int OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)::gtest_registering_dummy_ = \ + OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)::AddToRegistry(); \ void OPENVINO_GTEST_TEST_CLASS_NAME_(backend_name, test_case_name, test_name)::TestBody() // Use OPENVINO_INSTANTIATE_TEST_SUITE_P to create a generated set of test case variations. @@ -140,7 +144,7 @@ std::string combine_test_backend_and_case(const std::string& backend_name, const // the filter to run all the tests for a given backend should be: // --gtest_filter=BACKENDNAME*.* // (rather than the BACKENDNAME.* that worked before the use of OPENVINO_TEST_P) -#define OPENVINO_INSTANTIATE_TEST_SUITE_P(backend_name, prefix, test_suite_name, generator) \ +#define OPENVINO_INSTANTIATE_TEST_SUITE_P(backend_name, prefix, test_suite_name, generator) \ static ::testing::internal::ParamGenerator \ gtest_##prefix##backend_name##test_suite_name##_EvalGenerator_() { \ return generator; \ diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp index f96ae67fc3a89f..ef538e13723094 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp @@ -8,34 +8,11 @@ namespace ov { namespace test { namespace utils { -enum ComparisonTypes { - EQUAL, - NOT_EQUAL, - IS_FINITE, - IS_INF, - IS_NAN, - LESS, - LESS_EQUAL, - GREATER, - GREATER_EQUAL -}; +enum ComparisonTypes { EQUAL, NOT_EQUAL, IS_FINITE, IS_INF, IS_NAN, LESS, LESS_EQUAL, GREATER, GREATER_EQUAL }; -enum ConversionTypes { - CONVERT, - CONVERT_LIKE -}; +enum ConversionTypes { CONVERT, CONVERT_LIKE }; -enum ReductionType { - Mean, - Max, - Min, - Prod, - Sum, - LogicalOr, - LogicalAnd, - L1, - L2 -}; +enum ReductionType { Mean, Max, Min, Prod, Sum, LogicalOr, LogicalAnd, L1, L2 }; } // namespace utils } // namespace test diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_tools.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_tools.hpp index c152c453e5dd3d..61960de31c8c52 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_tools.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_tools.hpp @@ -4,12 +4,11 @@ #pragma once -#include #include #include +#include #include - /// \brief Reads a binary file to a vector. /// /// \param[in] path The path where the file is located. diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/unicode_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/unicode_utils.hpp index 33833ce74b9b75..9b72fa448ee8b3 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/unicode_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/unicode_utils.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include -#include #include +#include +#include +#include #include "common_test_utils/common_utils.hpp" #include "common_test_utils/w_dirent.h" @@ -20,11 +20,11 @@ namespace ov { namespace test { namespace utils { -inline void fixSlashes(std::string &str) { +inline void fixSlashes(std::string& str) { std::replace(str.begin(), str.end(), '/', '\\'); } -inline void fixSlashes(std::wstring &str) { +inline void fixSlashes(std::wstring& str) { std::replace(str.begin(), str.end(), L'/', L'\\'); } @@ -33,15 +33,15 @@ inline std::wstring stringToWString(std::string input) { } inline bool copyFile(std::wstring source_path, std::wstring dest_path) { -#ifndef _WIN32 +# ifndef _WIN32 std::ifstream source(ov::util::wstring_to_string(source_path), std::ios::binary); std::ofstream dest(ov::util::wstring_to_string(dest_path), std::ios::binary); -#else +# else fixSlashes(source_path); fixSlashes(dest_path); std::ifstream source(source_path.c_str(), std::ios::binary); std::ofstream dest(dest_path.c_str(), std::ios::binary); -#endif +# endif bool result = source && dest; std::istreambuf_iterator begin_source(source); std::istreambuf_iterator end_source; @@ -70,17 +70,18 @@ inline std::wstring addUnicodePostfixToPath(std::string source_path, std::wstrin inline void removeFile(std::wstring path) { int result = 0; if (!path.empty()) { -#ifdef _WIN32 +# ifdef _WIN32 result = _wremove(path.c_str()); -#else +# else result = remove(ov::util::wstring_to_string(path).c_str()); -#endif +# endif } (void)result; } inline bool endsWith(const std::wstring& source, const std::wstring& expectedSuffix) { - return expectedSuffix.size() <= source.size() && source.compare(source.size() - expectedSuffix.size(), expectedSuffix.size(), expectedSuffix) == 0; + return expectedSuffix.size() <= source.size() && + source.compare(source.size() - expectedSuffix.size(), expectedSuffix.size(), expectedSuffix) == 0; } // Removes all files with extension=ext from the given directory @@ -89,9 +90,9 @@ inline bool endsWith(const std::wstring& source, const std::wstring& expectedSuf // >= 0 - count of removed files inline int removeFilesWithExt(std::wstring path, std::wstring ext) { int ret = 0; -#ifdef _WIN32 - struct _wdirent *ent; - _WDIR *dir = _wopendir(path.c_str()); +# ifdef _WIN32 + struct _wdirent* ent; + _WDIR* dir = _wopendir(path.c_str()); if (dir != nullptr) { while ((ent = _wreaddir(dir)) != NULL) { auto file = ::FileUtils::makePath(path, std::wstring(ent->wd_name)); @@ -108,11 +109,11 @@ inline int removeFilesWithExt(std::wstring path, std::wstring ext) { } _wclosedir(dir); } -#else - struct dirent *ent; +# else + struct dirent* ent; auto path_mb = ov::util::wstring_to_string(path); auto ext_mb = ov::util::wstring_to_string(ext); - DIR *dir = opendir(path_mb.c_str()); + DIR* dir = opendir(path_mb.c_str()); if (dir != nullptr) { while ((ent = readdir(dir)) != NULL) { std::string file = ::FileUtils::makePath(path_mb, std::string(ent->d_name)); @@ -129,34 +130,34 @@ inline int removeFilesWithExt(std::wstring path, std::wstring ext) { } closedir(dir); } -#endif +# endif return ret; } inline int removeDir(std::wstring path) { int result = 0; if (!path.empty()) { -#ifdef _WIN32 +# ifdef _WIN32 result = _wrmdir(path.c_str()); -#else +# else result = rmdir(ov::util::wstring_to_string(path).c_str()); -#endif +# endif } return result; } -inline bool directoryExists(const std::wstring &path) { -#ifdef _WIN32 +inline bool directoryExists(const std::wstring& path) { +# ifdef _WIN32 struct _stat64i32 sb; if (_wstat(path.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) { return true; } -#else +# else struct stat sb; if (stat(ov::util::wstring_to_string(path).c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) { return true; } -#endif +# endif return false; } diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/w_dirent.h b/src/tests/test_utils/common_test_utils/include/common_test_utils/w_dirent.h index c8d34bc7632fd5..60ebac35450adc 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/w_dirent.h +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/w_dirent.h @@ -6,50 +6,52 @@ #if defined(_WIN32) -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN_UNDEF -#endif - -#ifndef NOMINMAX -# define NOMINMAX -# define NOMINMAX_UNDEF -#endif - -#if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_) -# define _X86_ -#endif - -#if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_) -# define _AMD64_ -#endif - -#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_) -# define _ARM_ -#endif - -#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_) -# define _ARM64_ -#endif - -#include -#include -#include -#include -#include -#include +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN_UNDEF +# endif + +# ifndef NOMINMAX +# define NOMINMAX +# define NOMINMAX_UNDEF +# endif + +# if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_) +# define _X86_ +# endif + +# if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_) +# define _AMD64_ +# endif + +# if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_) +# define _ARM_ +# endif + +# if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_) +# define _ARM64_ +# endif + +// clang-format off +# include +# include +# include +# include +# include +# include +// clang-format on // Copied from linux libc sys/stat.h: -#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +# define S_ISREG(m) (((m)&S_IFMT) == S_IFREG) +# define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) struct dirent { - char *d_name; + char* d_name; - explicit dirent(const wchar_t *wsFilePath) { + explicit dirent(const wchar_t* wsFilePath) { size_t i; auto slen = wcslen(wsFilePath); - d_name = static_cast(malloc(slen + 1)); + d_name = static_cast(malloc(slen + 1)); wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen); } ~dirent() { @@ -60,22 +62,23 @@ struct dirent { class DIR { WIN32_FIND_DATAA FindFileData; HANDLE hFind; - dirent *next; + dirent* next; - static inline bool endsWith(const std::string &src, const char *with) { + static inline bool endsWith(const std::string& src, const char* with) { int wl = static_cast(strlen(with)); int so = static_cast(src.length()) - wl; - if (so < 0) return false; + if (so < 0) + return false; return 0 == strncmp(with, &src[so], wl); } public: - DIR(const DIR &other) = delete; - DIR(DIR &&other) = delete; - DIR& operator=(const DIR &other) = delete; - DIR& operator=(DIR &&other) = delete; + DIR(const DIR& other) = delete; + DIR(DIR&& other) = delete; + DIR& operator=(const DIR& other) = delete; + DIR& operator=(DIR&& other) = delete; - explicit DIR(const char *dirPath) : next(nullptr) { + explicit DIR(const char* dirPath) : next(nullptr) { std::string ws = dirPath; if (endsWith(ws, "\\")) ws += "*"; @@ -86,7 +89,8 @@ class DIR { } ~DIR() { - if (!next) delete next; + if (!next) + delete next; next = nullptr; FindClose(hFind); } @@ -96,10 +100,12 @@ class DIR { } dirent* nextEnt() { - if (next != nullptr) delete next; + if (next != nullptr) + delete next; next = nullptr; - if (!FindFileData.dwReserved0) return nullptr; + if (!FindFileData.dwReserved0) + return nullptr; wchar_t wbuf[4096]; @@ -112,11 +118,11 @@ class DIR { }; struct _wdirent { - wchar_t *wd_name; + wchar_t* wd_name; - explicit _wdirent(const wchar_t *wsFilePath) { + explicit _wdirent(const wchar_t* wsFilePath) { auto slen = wcslen(wsFilePath); - wd_name = static_cast(malloc(sizeof(wchar_t) * (slen + 1))); + wd_name = static_cast(malloc(sizeof(wchar_t) * (slen + 1))); wcscpy_s(wd_name, slen + 1, wsFilePath); } ~_wdirent() { @@ -127,22 +133,23 @@ struct _wdirent { class _WDIR { WIN32_FIND_DATAW FindFileData; HANDLE hFind; - _wdirent *next; + _wdirent* next; - static inline bool endsWith(const std::wstring &src, const wchar_t *with) { + static inline bool endsWith(const std::wstring& src, const wchar_t* with) { int wl = static_cast(wcslen(with)); int so = static_cast(src.length()) - wl; - if (so < 0) return false; + if (so < 0) + return false; return 0 == wcsncmp(with, &src[so], wl); } public: - _WDIR(const _WDIR &other) = delete; - _WDIR(_WDIR &&other) = delete; - _WDIR& operator=(const _WDIR &other) = delete; - _WDIR& operator=(_WDIR &&other) = delete; + _WDIR(const _WDIR& other) = delete; + _WDIR(_WDIR&& other) = delete; + _WDIR& operator=(const _WDIR& other) = delete; + _WDIR& operator=(_WDIR&& other) = delete; - explicit _WDIR(const wchar_t *dirPath) : next(nullptr) { + explicit _WDIR(const wchar_t* dirPath) : next(nullptr) { std::wstring ws = dirPath; if (endsWith(ws, L"\\")) ws += L"*"; @@ -153,7 +160,8 @@ class _WDIR { } ~_WDIR() { - if (!next) delete next; + if (!next) + delete next; next = nullptr; FindClose(hFind); } @@ -163,10 +171,12 @@ class _WDIR { } _wdirent* nextEnt() { - if (next != nullptr) delete next; + if (next != nullptr) + delete next; next = nullptr; - if (!FindFileData.dwReserved0) return nullptr; + if (!FindFileData.dwReserved0) + return nullptr; std::wstring buf(FindFileData.cFileName); next = new _wdirent(buf.c_str()); @@ -175,8 +185,7 @@ class _WDIR { } }; - -static DIR* opendir(const char *dirPath) { +static DIR* opendir(const char* dirPath) { auto dp = new DIR(dirPath); if (!dp->isValid()) { delete dp; @@ -185,7 +194,7 @@ static DIR* opendir(const char *dirPath) { return dp; } -static _WDIR* _wopendir(const wchar_t *dirPath) { +static _WDIR* _wopendir(const wchar_t* dirPath) { auto dp = new _WDIR(dirPath); if (!dp->isValid()) { delete dp; @@ -194,35 +203,35 @@ static _WDIR* _wopendir(const wchar_t *dirPath) { return dp; } -static struct dirent* readdir(DIR *dp) { +static struct dirent* readdir(DIR* dp) { return dp->nextEnt(); } -static struct _wdirent* _wreaddir(_WDIR *dp) { +static struct _wdirent* _wreaddir(_WDIR* dp) { return dp->nextEnt(); } -static void closedir(DIR *dp) { +static void closedir(DIR* dp) { delete dp; } -static void _wclosedir(_WDIR *dp) { +static void _wclosedir(_WDIR* dp) { delete dp; } -#ifdef WIN32_LEAN_AND_MEAN_UNDEF -# undef WIN32_LEAN_AND_MEAN -# undef WIN32_LEAN_AND_MEAN_UNDEF -#endif +# ifdef WIN32_LEAN_AND_MEAN_UNDEF +# undef WIN32_LEAN_AND_MEAN +# undef WIN32_LEAN_AND_MEAN_UNDEF +# endif -#ifdef NOMINMAX_UNDEF -# undef NOMINMAX_UNDEF -# undef NOMINMAX -#endif +# ifdef NOMINMAX_UNDEF +# undef NOMINMAX_UNDEF +# undef NOMINMAX +# endif #else -#include -#include +# include +# include #endif diff --git a/src/tests/test_utils/common_test_utils/src/all_close.cpp b/src/tests/test_utils/common_test_utils/src/all_close.cpp index 75cd70e06ffa13..abd77f62b94ce3 100644 --- a/src/tests/test_utils/common_test_utils/src/all_close.cpp +++ b/src/tests/test_utils/common_test_utils/src/all_close.cpp @@ -15,29 +15,23 @@ ::testing::AssertionResult all_close(const ov::Tensor& a, const ov::Tensor& b, f return ::testing::AssertionFailure() << "Cannot compare tensors with different element types"; } -#define all_close_ov_type(type)\ - case ov::element::type:\ - return all_close::value_type>(a, b, \ - static_cast::value_type>(rtol), \ - static_cast::value_type>(atol));\ +#define all_close_ov_type(type) \ + case ov::element::type: \ + return all_close::value_type>( \ + a, \ + b, \ + static_cast::value_type>(rtol), \ + static_cast::value_type>(atol)); switch (a.get_element_type()) { - all_close_ov_type(u8) - all_close_ov_type(u16) - all_close_ov_type(u32) - all_close_ov_type(u64) - all_close_ov_type(i8) - all_close_ov_type(i16) - all_close_ov_type(i32) - all_close_ov_type(i64) - // all_close_ov_type(bf16) - // all_close_ov_type(f16) - all_close_ov_type(f32) - all_close_ov_type(f64) - all_close_ov_type(boolean) - default: - return ::testing::AssertionFailure() - << "Cannot compare tensors with unsupported element type: " << a.get_element_type(); + all_close_ov_type(u8) all_close_ov_type(u16) all_close_ov_type(u32) all_close_ov_type(u64) all_close_ov_type(i8) + all_close_ov_type(i16) all_close_ov_type(i32) all_close_ov_type(i64) + // all_close_ov_type(bf16) + // all_close_ov_type(f16) + all_close_ov_type(f32) all_close_ov_type(f64) all_close_ov_type(boolean) default + : return ::testing::AssertionFailure() + << "Cannot compare tensors with unsupported element type: " + << a.get_element_type(); } } } // namespace utils diff --git a/src/tests/test_utils/common_test_utils/src/all_close_f.cpp b/src/tests/test_utils/common_test_utils/src/all_close_f.cpp index f87956dd526836..7f25c578f04234 100644 --- a/src/tests/test_utils/common_test_utils/src/all_close_f.cpp +++ b/src/tests/test_utils/common_test_utils/src/all_close_f.cpp @@ -8,8 +8,6 @@ #include "openvino/core/type/element_type_traits.hpp" #include "precomp.hpp" -#include "common_test_utils/float_util.hpp" - namespace ov { namespace test { namespace utils { @@ -153,10 +151,7 @@ bool close_f(double a, double b, int tolerance_bits, double min_signal) { return (distance <= tolerance) || (distance == DOUBLE_BELOW_MIN_SIGNAL); } -std::vector float_distances(const float* const a, - const float* const b, - size_t size, - float min_signal) { +std::vector float_distances(const float* const a, const float* const b, size_t size, float min_signal) { std::vector distances(size); for (size_t i = 0; i < size; ++i) { distances[i] = float_distance(a[i], b[i], min_signal); @@ -165,10 +160,7 @@ std::vector float_distances(const float* const a, return distances; } -std::vector float_distances(const double* const a, - const double* const b, - size_t size, - double min_signal) { +std::vector float_distances(const double* const a, const double* const b, size_t size, double min_signal) { std::vector distances(size); for (size_t i = 0; i < size; ++i) { distances[i] = float_distance(a[i], b[i], min_signal); @@ -480,12 +472,8 @@ ::testing::AssertionResult all_close_f(const std::vector& a, return all_close_f(a.data(), b.data(), a.size(), tolerance_bits, min_signal); } - -template -::testing::AssertionResult all_close_f(const ov::Tensor& a, - const ov::Tensor& b, - int tolerance_bits, - float min_signal) { +template +::testing::AssertionResult all_close_f(const ov::Tensor& a, const ov::Tensor& b, int tolerance_bits, float min_signal) { if (a.get_size() != b.get_size()) { return ::testing::AssertionFailure() << "a.size() != b.size() for all_close_f comparison."; } @@ -493,38 +481,37 @@ ::testing::AssertionResult all_close_f(const ov::Tensor& a, return ::testing::AssertionSuccess() << "No elements to compare"; } - return all_close_f(static_cast(a.data()), static_cast(b.data()), a.get_size(), tolerance_bits, min_signal); + return all_close_f(static_cast(a.data()), + static_cast(b.data()), + a.get_size(), + tolerance_bits, + min_signal); } - -::testing::AssertionResult all_close_f(const ov::Tensor& a, - const ov::Tensor& b, - int tolerance_bits, - float min_signal) { - if (a.get_element_type() != b.get_element_type()) { +::testing::AssertionResult all_close_f(const ov::Tensor& a, const ov::Tensor& b, int tolerance_bits, float min_signal) { + if (a.get_element_type() != b.get_element_type()) { return ::testing::AssertionFailure() << "Cannot compare tensors with different element types"; } -#define all_close_f_ov_type(type)\ - case ov::element::type:\ - return all_close_f::value_type>(a, b, tolerance_bits, min_signal);\ +#define all_close_f_ov_type(type) \ + case ov::element::type: \ + return all_close_f::value_type>(a, b, tolerance_bits, min_signal); switch (a.get_element_type()) { - // all_close_f_ov_type(u8) - // all_close_f_ov_type(u16) - // all_close_f_ov_type(u32) - // all_close_f_ov_type(u64) - // all_close_f_ov_type(i8) - // all_close_f_ov_type(i16) - // all_close_f_ov_type(i32) - // all_close_f_ov_type(i64) - // all_close_f_ov_type(bf16) - // all_close_f_ov_type(f16) - all_close_f_ov_type(f32) - all_close_f_ov_type(f64) - default: - return ::testing::AssertionFailure() - << "Cannot compare tensors with unsupported element type: " << a.get_element_type(); + // all_close_f_ov_type(u8) + // all_close_f_ov_type(u16) + // all_close_f_ov_type(u32) + // all_close_f_ov_type(u64) + // all_close_f_ov_type(i8) + // all_close_f_ov_type(i16) + // all_close_f_ov_type(i32) + // all_close_f_ov_type(i64) + // all_close_f_ov_type(bf16) + // all_close_f_ov_type(f16) + all_close_f_ov_type(f32) all_close_f_ov_type(f64) default + : return ::testing::AssertionFailure() + << "Cannot compare tensors with unsupported element type: " + << a.get_element_type(); } } diff --git a/src/tests/test_utils/common_test_utils/src/data_utils.cpp b/src/tests/test_utils/common_test_utils/src/data_utils.cpp index 9fa59cb9cdf7af..9991b976b467f6 100644 --- a/src/tests/test_utils/common_test_utils/src/data_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/data_utils.cpp @@ -4,10 +4,9 @@ #include "common_test_utils/data_utils.hpp" +#include "blob_factory.hpp" #include "debug.h" // to allow putting vector into exception string stream - #include "ie_blob.h" -#include "blob_factory.hpp" #include "openvino/core/deprecated.hpp" #include "openvino/core/type/element_type_traits.hpp" #include "openvino/runtime/tensor.hpp" @@ -29,8 +28,10 @@ bool isDenseBlob(const InferenceEngine::Blob::Ptr& blob) { IE_ASSERT(dims.size() == strs.size()) << " isDenseBlob: inconsistent tensor descriptor"; auto size = dims.size(); - if (size == 0) return true; - if (size == 1) return strs[0] == 1; + if (size == 0) + return true; + if (size == 1) + return strs[0] == 1; for (auto i = size - 1; i > 0; i--) { if (strs[i - 1] != strs[i - 1] * dims[i]) @@ -40,20 +41,44 @@ bool isDenseBlob(const InferenceEngine::Blob::Ptr& blob) { return true; } -template -void copy_7D(void *src_raw_ptr, std::vector &src_str, void *dst_raw_ptr, std::vector &dst_str, std::vector &dims) { +template +void copy_7D(void* src_raw_ptr, + std::vector& src_str, + void* dst_raw_ptr, + std::vector& dst_str, + std::vector& dims) { auto src_ptr = static_cast(src_raw_ptr); auto dst_ptr = static_cast(dst_raw_ptr); - for (size_t d0 = 0; d0 < dims[0]; d0++) { auto src_ptr_0 = src_ptr + src_str[0]*d0; auto dst_ptr_0 = dst_ptr + dst_str[0]*d0; - for (size_t d1 = 0; d1 < dims[1]; d1++) { auto src_ptr_1 = src_ptr_0 + src_str[1]*d1; auto dst_ptr_1 = dst_ptr_0 + dst_str[1]*d1; - for (size_t d2 = 0; d2 < dims[2]; d2++) { auto src_ptr_2 = src_ptr_1 + src_str[2]*d2; auto dst_ptr_2 = dst_ptr_1 + dst_str[2]*d2; - for (size_t d3 = 0; d3 < dims[3]; d3++) { auto src_ptr_3 = src_ptr_2 + src_str[3]*d3; auto dst_ptr_3 = dst_ptr_2 + dst_str[3]*d3; - for (size_t d4 = 0; d4 < dims[4]; d4++) { auto src_ptr_4 = src_ptr_3 + src_str[4]*d4; auto dst_ptr_4 = dst_ptr_3 + dst_str[4]*d4; - for (size_t d5 = 0; d5 < dims[5]; d5++) { auto src_ptr_5 = src_ptr_4 + src_str[5]*d5; auto dst_ptr_5 = dst_ptr_4 + dst_str[5]*d5; - for (size_t d6 = 0; d6 < dims[6]; d6++) { auto src_ptr_6 = src_ptr_5 + src_str[6]*d6; auto dst_ptr_6 = dst_ptr_5 + dst_str[6]*d6; - *dst_ptr_6 = *src_ptr_6; - }}}}}}} + for (size_t d0 = 0; d0 < dims[0]; d0++) { + auto src_ptr_0 = src_ptr + src_str[0] * d0; + auto dst_ptr_0 = dst_ptr + dst_str[0] * d0; + for (size_t d1 = 0; d1 < dims[1]; d1++) { + auto src_ptr_1 = src_ptr_0 + src_str[1] * d1; + auto dst_ptr_1 = dst_ptr_0 + dst_str[1] * d1; + for (size_t d2 = 0; d2 < dims[2]; d2++) { + auto src_ptr_2 = src_ptr_1 + src_str[2] * d2; + auto dst_ptr_2 = dst_ptr_1 + dst_str[2] * d2; + for (size_t d3 = 0; d3 < dims[3]; d3++) { + auto src_ptr_3 = src_ptr_2 + src_str[3] * d3; + auto dst_ptr_3 = dst_ptr_2 + dst_str[3] * d3; + for (size_t d4 = 0; d4 < dims[4]; d4++) { + auto src_ptr_4 = src_ptr_3 + src_str[4] * d4; + auto dst_ptr_4 = dst_ptr_3 + dst_str[4] * d4; + for (size_t d5 = 0; d5 < dims[5]; d5++) { + auto src_ptr_5 = src_ptr_4 + src_str[5] * d5; + auto dst_ptr_5 = dst_ptr_4 + dst_str[5] * d5; + for (size_t d6 = 0; d6 < dims[6]; d6++) { + auto src_ptr_6 = src_ptr_5 + src_str[6] * d6; + auto dst_ptr_6 = dst_ptr_5 + dst_str[6] * d6; + *dst_ptr_6 = *src_ptr_6; + } + } + } + } + } + } + } } void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine::Blob::Ptr& values) { @@ -82,7 +107,7 @@ void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine: IE_ASSERT(compatible); - auto fill_strides_like_plain = [] (ov::Shape dims) { + auto fill_strides_like_plain = [](ov::Shape dims) { ov::Shape str(dims.size()); if (str.empty()) return str; @@ -90,12 +115,14 @@ void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine: str.back() = 1; // stride[i] = stride[i+1]*d[i+1] - std::transform(dims.rbegin(), dims.rend() - 1, str.rbegin(), str.rbegin() + 1, - [] (size_t d, size_t s) { return d * s; }); + std::transform(dims.rbegin(), dims.rend() - 1, str.rbegin(), str.rbegin() + 1, [](size_t d, size_t s) { + return d * s; + }); // zeroing broadcast dimension equal 1 - std::transform(str.begin(), str.end(), dims.begin(), str.begin(), - [] (size_t s, size_t d) { return d == 1 ? 0 : s; }); + std::transform(str.begin(), str.end(), dims.begin(), str.begin(), [](size_t s, size_t d) { + return d == 1 ? 0 : s; + }); return str; }; @@ -103,7 +130,7 @@ void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine: SizeVector src_strides = fill_strides_like_plain(src_dims); SizeVector dst_strides = fill_strides_like_plain(dst_dims); - auto get_data = [] (InferenceEngine::Blob::Ptr &blob) { + auto get_data = [](InferenceEngine::Blob::Ptr& blob) { auto mem_blob = dynamic_cast(blob.get()); auto mem = mem_blob->rwmap(); return mem.as(); @@ -113,30 +140,30 @@ void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine: auto src_ptr = get_data(values); switch (blob->getTensorDesc().getPrecision()) { - case InferenceEngine::Precision::U64: - case InferenceEngine::Precision::I64: - copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); - break; - case InferenceEngine::Precision::FP32: - case InferenceEngine::Precision::I32: - copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); - break; - case InferenceEngine::Precision::I16: - case InferenceEngine::Precision::U16: - case InferenceEngine::Precision::FP16: - case InferenceEngine::Precision::BF16: - copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); - break; - case InferenceEngine::Precision::U8: - case InferenceEngine::Precision::I8: - copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); - break; - default: - IE_THROW() << "Unsupported precision by fill_data_with_broadcast function"; + case InferenceEngine::Precision::U64: + case InferenceEngine::Precision::I64: + copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); + break; + case InferenceEngine::Precision::FP32: + case InferenceEngine::Precision::I32: + copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); + break; + case InferenceEngine::Precision::I16: + case InferenceEngine::Precision::U16: + case InferenceEngine::Precision::FP16: + case InferenceEngine::Precision::BF16: + copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); + break; + case InferenceEngine::Precision::U8: + case InferenceEngine::Precision::I8: + copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); + break; + default: + IE_THROW() << "Unsupported precision by fill_data_with_broadcast function"; } } -template +template void copy_with_convert(InferenceEngine::Blob::Ptr& src_blob, InferenceEngine::Blob::Ptr& dst_blob) { using SRC_TYPE = typename InferenceEngine::PrecisionTrait::value_type; using DST_TYPE = typename InferenceEngine::PrecisionTrait::value_type; @@ -151,7 +178,8 @@ void copy_with_convert(InferenceEngine::Blob::Ptr& src_blob, InferenceEngine::Bl std::copy(src_ptr, src_ptr + src_size, dst_ptr); } -InferenceEngine::Blob::Ptr make_with_precision_convert(InferenceEngine::Blob::Ptr& blob, InferenceEngine::Precision prc) { +InferenceEngine::Blob::Ptr make_with_precision_convert(InferenceEngine::Blob::Ptr& blob, + InferenceEngine::Precision prc) { IE_ASSERT(isDenseBlob(blob)); auto td = blob->getTensorDesc(); td.setPrecision(prc); @@ -159,11 +187,22 @@ InferenceEngine::Blob::Ptr make_with_precision_convert(InferenceEngine::Blob::Pt auto new_blob = make_blob_with_precision(td); new_blob->allocate(); -#define CASE(_PRC) case InferenceEngine::Precision::_PRC: \ - copy_with_convert (blob, new_blob); break +#define CASE(_PRC) \ + case InferenceEngine::Precision::_PRC: \ + copy_with_convert(blob, new_blob); \ + break switch (prc) { - CASE(FP32); CASE(I64); CASE(U64); CASE(I32); CASE(U32); CASE(I16); CASE(U16); CASE(I8); CASE(U8); - default: IE_THROW() << "Unsupported precision case"; + CASE(FP32); + CASE(I64); + CASE(U64); + CASE(I32); + CASE(U32); + CASE(I16); + CASE(U16); + CASE(I8); + CASE(U8); + default: + IE_THROW() << "Unsupported precision case"; } #undef CASE @@ -188,7 +227,8 @@ void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, size_t axis, std fill_data_with_broadcast(blob, values_blob); } -InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &blob, InferenceEngine::SizeVector new_shape) { +InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr& blob, + InferenceEngine::SizeVector new_shape) { using InferenceEngine::TensorDesc; auto new_size = std::accumulate(new_shape.begin(), new_shape.end(), 1, std::multiplies()); IE_ASSERT(new_size == blob->size()); @@ -197,19 +237,19 @@ InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &b auto orig_mem = orig_mem_blob->rwmap(); auto orig_ptr = orig_mem.as(); - auto new_tdesc = TensorDesc(blob->getTensorDesc().getPrecision(), new_shape, TensorDesc::getLayoutByDims(new_shape)); + auto new_tdesc = + TensorDesc(blob->getTensorDesc().getPrecision(), new_shape, TensorDesc::getLayoutByDims(new_shape)); auto new_blob = make_blob_with_precision(new_tdesc, orig_ptr); return new_blob; } -size_t byte_size(const InferenceEngine::TensorDesc &tdesc) { +size_t byte_size(const InferenceEngine::TensorDesc& tdesc) { auto prc = tdesc.getPrecision(); auto dims = tdesc.getDims(); return prc.size() * std::accumulate(std::begin(dims), std::end(dims), (size_t)1, std::multiplies()); } OPENVINO_SUPPRESS_DEPRECATED_END - void fill_data_with_broadcast(ov::Tensor& tensor, ov::Tensor& values) { constexpr size_t MAX_N_DIMS = 7; // Suppose it's enough @@ -235,7 +275,7 @@ void fill_data_with_broadcast(ov::Tensor& tensor, ov::Tensor& values) { OPENVINO_ASSERT(compatible); - auto fill_strides_like_plain = [] (ov::Shape dims) { + auto fill_strides_like_plain = [](ov::Shape dims) { ov::Shape str(dims.size()); if (str.empty()) return str; @@ -243,12 +283,14 @@ void fill_data_with_broadcast(ov::Tensor& tensor, ov::Tensor& values) { str.back() = 1; // stride[i] = stride[i+1]*d[i+1] - std::transform(dims.rbegin(), dims.rend() - 1, str.rbegin(), str.rbegin() + 1, - [] (size_t d, size_t s) { return d * s; }); + std::transform(dims.rbegin(), dims.rend() - 1, str.rbegin(), str.rbegin() + 1, [](size_t d, size_t s) { + return d * s; + }); // zeroing broadcast dimension equal 1 - std::transform(str.begin(), str.end(), dims.begin(), str.begin(), - [] (size_t s, size_t d) { return d == 1 ? 0 : s; }); + std::transform(str.begin(), str.end(), dims.begin(), str.begin(), [](size_t s, size_t d) { + return d == 1 ? 0 : s; + }); return str; }; @@ -261,30 +303,30 @@ void fill_data_with_broadcast(ov::Tensor& tensor, ov::Tensor& values) { using namespace ov::element; switch (tensor.get_element_type()) { - case u64: - case i64: - copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); - break; - case f32: - case i32: - copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); - break; - case i16: - case u16: - case f16: - case bf16: - copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); - break; - case u8: - case i8: - copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); - break; - default: - OPENVINO_THROW("Unsupported precision by fill_data_with_broadcast function"); + case u64: + case i64: + copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); + break; + case f32: + case i32: + copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); + break; + case i16: + case u16: + case f16: + case bf16: + copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); + break; + case u8: + case i8: + copy_7D(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); + break; + default: + OPENVINO_THROW("Unsupported precision by fill_data_with_broadcast function"); } } -template::type = 0> +template ::type = 0> void copy_tensor_with_convert(const ov::Tensor& src_tensor, ov::Tensor& dst_tensor) { using SRC_TYPE = typename ov::fundamental_type_for; using DST_TYPE = typename ov::fundamental_type_for; @@ -296,12 +338,14 @@ void copy_tensor_with_convert(const ov::Tensor& src_tensor, ov::Tensor& dst_tens auto dst_ptr = dst_tensor.data(); - auto converter = [] (SRC_TYPE value) {return static_cast(value);}; + auto converter = [](SRC_TYPE value) { + return static_cast(value); + }; std::transform(src_ptr, src_ptr + src_size, dst_ptr, converter); } -template::type = 0> +template ::type = 0> void copy_tensor_with_convert(const ov::Tensor& src_tensor, ov::Tensor& dst_tensor) { src_tensor.copy_to(dst_tensor); } @@ -310,30 +354,46 @@ ov::Tensor make_tensor_with_precision_convert(const ov::Tensor& tensor, ov::elem ov::Tensor new_tensor(prc, tensor.get_shape()); auto src_prc = tensor.get_element_type(); -#define CASE0(SRC_PRC, DST_PRC) case ov::element::DST_PRC : \ - copy_tensor_with_convert (tensor, new_tensor); break; - -#define CASE(SRC_PRC) \ - case ov::element::SRC_PRC: \ - switch (prc) { \ - CASE0(SRC_PRC, bf16) \ - CASE0(SRC_PRC, f16) \ - CASE0(SRC_PRC, f32) \ - CASE0(SRC_PRC, f64) \ - CASE0(SRC_PRC, i8) \ - CASE0(SRC_PRC, i16) \ - CASE0(SRC_PRC, i32) \ - CASE0(SRC_PRC, i64) \ - CASE0(SRC_PRC, u8) \ - CASE0(SRC_PRC, u16) \ - CASE0(SRC_PRC, u32) \ - CASE0(SRC_PRC, u64) \ - default: OPENVINO_THROW("Unsupported precision case: ", prc.c_type_string()); \ - } break; +#define CASE0(SRC_PRC, DST_PRC) \ + case ov::element::DST_PRC: \ + copy_tensor_with_convert(tensor, new_tensor); \ + break; + +#define CASE(SRC_PRC) \ + case ov::element::SRC_PRC: \ + switch (prc) { \ + CASE0(SRC_PRC, bf16) \ + CASE0(SRC_PRC, f16) \ + CASE0(SRC_PRC, f32) \ + CASE0(SRC_PRC, f64) \ + CASE0(SRC_PRC, i8) \ + CASE0(SRC_PRC, i16) \ + CASE0(SRC_PRC, i32) \ + CASE0(SRC_PRC, i64) \ + CASE0(SRC_PRC, u8) \ + CASE0(SRC_PRC, u16) \ + CASE0(SRC_PRC, u32) \ + CASE0(SRC_PRC, u64) \ + default: \ + OPENVINO_THROW("Unsupported precision case: ", prc.c_type_string()); \ + } \ + break; switch (src_prc) { - CASE(f64); CASE(f32); CASE(f16); CASE(bf16); CASE(i64); CASE(u64); CASE(i32); CASE(u32); CASE(i16); CASE(u16); CASE(i8); CASE(u8); - default: OPENVINO_THROW("Unsupported precision case: ", src_prc.c_type_string()); + CASE(f64); + CASE(f32); + CASE(f16); + CASE(bf16); + CASE(i64); + CASE(u64); + CASE(i32); + CASE(u32); + CASE(i16); + CASE(u16); + CASE(i8); + CASE(u8); + default: + OPENVINO_THROW("Unsupported precision case: ", src_prc.c_type_string()); } #undef CASE0 #undef CASE @@ -356,44 +416,61 @@ void fill_data_with_broadcast(ov::Tensor& tensor, size_t axis, std::vector -void fill_tensor_random(ov::Tensor& tensor, const uint32_t range, const int32_t start_from, const int32_t k, const int seed) { +template +void fill_tensor_random(ov::Tensor& tensor, + const uint32_t range, + const int32_t start_from, + const int32_t k, + const int seed) { using T = typename ov::element_type_traits
::value_type; - auto *rawBlobDataPtr = static_cast(tensor.data()); - if (DT == ov::element::u4 || DT == ov::element::i4 || - DT == ov::element::u1) { + auto* rawBlobDataPtr = static_cast(tensor.data()); + if (DT == ov::element::u4 || DT == ov::element::i4 || DT == ov::element::u1) { fill_data_random(rawBlobDataPtr, tensor.get_byte_size(), range, start_from, k, seed); } else { fill_data_random(rawBlobDataPtr, tensor.get_size(), range, start_from, k, seed); } } -template -void fill_tensor_random_float(ov::Tensor& tensor, const double range, const double start_from, const int32_t k, const int seed) { +template +void fill_tensor_random_float(ov::Tensor& tensor, + const double range, + const double start_from, + const int32_t k, + const int seed) { using T = typename ov::element_type_traits
::value_type; std::default_random_engine random(seed); // 1/k is the resolution of the floating point numbers std::uniform_real_distribution distribution(k * start_from, k * (start_from + range)); - auto *rawBlobDataPtr = static_cast(tensor.data()); + auto* rawBlobDataPtr = static_cast(tensor.data()); for (size_t i = 0; i < tensor.get_size(); i++) { auto value = static_cast(distribution(random)); value /= static_cast(k); if (DT == ov::element::Type_t::f16) { - rawBlobDataPtr[i] = static_cast(ngraph::float16(value).to_bits()); + rawBlobDataPtr[i] = static_cast(ov::float16(value).to_bits()); } else if (DT == ov::element::Type_t::bf16) { - rawBlobDataPtr[i] = static_cast(ngraph::bfloat16(value).to_bits()); + rawBlobDataPtr[i] = static_cast(ov::bfloat16(value).to_bits()); } else { rawBlobDataPtr[i] = static_cast(value); } } } -void fill_tensor_random(ov::Tensor& tensor, const double range, const double start_from, const int32_t k, const int seed) { +void fill_tensor_random(ov::Tensor& tensor, + const double range, + const double start_from, + const int32_t k, + const int seed) { auto element_type = tensor.get_element_type(); -#define CASE(X) case X: fill_tensor_random(tensor, static_cast(range), static_cast(start_from), k, seed); break; -#define CASE_FLOAT(X) case X: fill_tensor_random_float(tensor, range, start_from, k, seed); break; +#define CASE(X) \ + case X: \ + fill_tensor_random(tensor, static_cast(range), static_cast(start_from), k, seed); \ + break; +#define CASE_FLOAT(X) \ + case X: \ + fill_tensor_random_float(tensor, range, start_from, k, seed); \ + break; switch (element_type) { CASE_FLOAT(ov::element::f64) @@ -411,8 +488,8 @@ void fill_tensor_random(ov::Tensor& tensor, const double range, const double sta CASE(ov::element::i16) CASE(ov::element::i32) CASE(ov::element::i64) - default: - OPENVINO_THROW("Wrong precision specified: ", element_type); + default: + OPENVINO_THROW("Wrong precision specified: ", element_type); } #undef CASE #undef CASE_FLOAT diff --git a/src/tests/test_utils/common_test_utils/src/file_utils.cpp b/src/tests/test_utils/common_test_utils/src/file_utils.cpp index 0b8fcfcba5e810..dc653f9940c2b2 100644 --- a/src/tests/test_utils/common_test_utils/src/file_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/file_utils.cpp @@ -7,20 +7,20 @@ #include "precomp.hpp" #ifdef __APPLE__ -# include +# include #endif #ifdef _WIN32 -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include -# include -# include +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include +# include +# include #else -# include -# include -# include +# include +# include +# include #endif namespace ov { @@ -35,7 +35,7 @@ std::string getExecutableDirectory() { #elif defined(__APPLE__) Dl_info info; dladdr(reinterpret_cast(getExecutableDirectory), &info); - const char * buffer = info.dli_fname; + const char* buffer = info.dli_fname; int len = std::strlen(buffer); #else char buffer[PATH_MAX]; @@ -51,7 +51,7 @@ std::string getExecutableDirectory() { std::string getCurrentWorkingDir() { std::string path; #ifdef _WIN32 - char * buffer = _getcwd(NULL, 0); + char* buffer = _getcwd(NULL, 0); if (buffer != NULL) { path = std::string(buffer); free(buffer); diff --git a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp index c95ba61ffbc7e1..0100f8f1416ff7 100644 --- a/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp +++ b/src/tests/test_utils/common_test_utils/src/graph_comparator.cpp @@ -895,6 +895,7 @@ void check_rt_info(const std::shared_ptr& f) { namespace attributes { namespace detail { +OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccessor& adapter) { if (auto inputs = ov::as_type>(&adapter)) { insert(name, inputs->get()); @@ -922,6 +923,7 @@ void ReadAndStoreAttributes::on_adapter(const std::string& name, ov::ValueAccess adapter.get_type_info().name + "']"; } } +OPENVINO_SUPPRESS_DEPRECATED_END template void ReadAndCompareAttributes::verify(const std::string& name, const AttrValue& attr_value) { if (should_return()) { @@ -940,6 +942,7 @@ void ReadAndCompareAttributes::verify(const std::string& name, const AttrValue& } } +OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndCompareAttributes::verify_mem_buf(const std::string& name, const std::shared_ptr& buffer) { if (should_return()) { @@ -958,6 +961,7 @@ void ReadAndCompareAttributes::verify_mem_buf(const std::string& name, return; } } +OPENVINO_SUPPRESS_DEPRECATED_END void ReadAndCompareAttributes::verify_function(const std::string& name, ModelAccessor& adapter) { if (should_return()) { @@ -976,6 +980,7 @@ void ReadAndCompareAttributes::verify_function(const std::string& name, ModelAcc } } +OPENVINO_SUPPRESS_DEPRECATED_START void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueAccessor& adapter) { if (auto inputs = ov::as_type>(&adapter)) { verify(name, inputs->get()); @@ -1000,6 +1005,7 @@ void ReadAndCompareAttributes::verify_others(const std::string& name, ov::ValueA adapter.get_type_info().name + "']"; } } +OPENVINO_SUPPRESS_DEPRECATED_END } // namespace detail diff --git a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp index 7ccc2041f90ed3..f455b49abdcb04 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp @@ -11,18 +11,22 @@ namespace ov { namespace test { namespace utils { -ov::Tensor create_and_fill_tensor( - const ov::element::Type element_type, - const ov::Shape& shape, - const uint32_t range, - const double_t start_from, - const int32_t resolution, - const int seed) { +ov::Tensor create_and_fill_tensor(const ov::element::Type element_type, + const ov::Shape& shape, + const uint32_t range, + const double_t start_from, + const int32_t resolution, + const int seed) { auto tensor = ov::Tensor{element_type, shape}; -#define CASE(X) case X: fill_data_random( \ - tensor.data::value_type>(), \ - shape_size(shape), \ - range, start_from, resolution, seed); break; +#define CASE(X) \ + case X: \ + fill_data_random(tensor.data::value_type>(), \ + shape_size(shape), \ + range, \ + start_from, \ + resolution, \ + seed); \ + break; switch (element_type) { CASE(ov::element::Type_t::boolean) CASE(ov::element::Type_t::i8) @@ -37,34 +41,38 @@ ov::Tensor create_and_fill_tensor( CASE(ov::element::Type_t::f16) CASE(ov::element::Type_t::f32) CASE(ov::element::Type_t::f64) - case ov::element::Type_t::u1: - case ov::element::Type_t::i4: - case ov::element::Type_t::u4: - fill_data_random( - static_cast(tensor.data()), - tensor.get_byte_size(), - range, start_from, resolution, seed); break; - default: OPENVINO_THROW("Unsupported element type: ", element_type); + case ov::element::Type_t::u1: + case ov::element::Type_t::i4: + case ov::element::Type_t::u4: + fill_data_random(static_cast(tensor.data()), + tensor.get_byte_size(), + range, + start_from, + resolution, + seed); + break; + default: + OPENVINO_THROW("Unsupported element type: ", element_type); } #undef CASE return tensor; } ov::Tensor create_and_fill_tensor_unique_sequence(const ov::element::Type element_type, - const ov::Shape& shape, - const int32_t start_from, - const int32_t resolution, - const int seed) { + const ov::Shape& shape, + const int32_t start_from, + const int32_t resolution, + const int seed) { auto tensor = ov::Tensor{element_type, shape}; auto range = shape_size(shape) * 2; -#define CASE(X) \ - case X: \ +#define CASE(X) \ + case X: \ fill_random_unique_sequence(tensor.data::value_type>(), \ - shape_size(shape), \ - range, \ - start_from, \ - resolution, \ - seed); \ + shape_size(shape), \ + range, \ + start_from, \ + resolution, \ + seed); \ break; switch (element_type) { @@ -85,11 +93,11 @@ ov::Tensor create_and_fill_tensor_unique_sequence(const ov::element::Type elemen case ov::element::Type_t::i4: case ov::element::Type_t::u4: fill_random_unique_sequence(static_cast(tensor.data()), - tensor.get_byte_size(), - range, - start_from, - resolution, - seed); + tensor.get_byte_size(), + range, + start_from, + resolution, + seed); break; default: OPENVINO_THROW("Unsupported element type: ", element_type); @@ -98,17 +106,20 @@ ov::Tensor create_and_fill_tensor_unique_sequence(const ov::element::Type elemen return tensor; } -ov::runtime::Tensor create_and_fill_tensor_normal_distribution( - const ov::element::Type element_type, - const ov::Shape& shape, - const float mean, - const float stddev, - const int seed) { +ov::runtime::Tensor create_and_fill_tensor_normal_distribution(const ov::element::Type element_type, + const ov::Shape& shape, + const float mean, + const float stddev, + const int seed) { auto tensor = ov::runtime::Tensor{element_type, shape}; -#define CASE(X) case X: fill_data_ptr_normal_random_float( \ - tensor.data::value_type>(), \ - shape_size(shape), \ - mean, stddev, seed); break; +#define CASE(X) \ + case X: \ + fill_data_ptr_normal_random_float(tensor.data::value_type>(), \ + shape_size(shape), \ + mean, \ + stddev, \ + seed); \ + break; switch (element_type) { CASE(ov::element::Type_t::boolean) CASE(ov::element::Type_t::i8) @@ -123,28 +134,36 @@ ov::runtime::Tensor create_and_fill_tensor_normal_distribution( CASE(ov::element::Type_t::f16) CASE(ov::element::Type_t::f32) CASE(ov::element::Type_t::f64) - case ov::element::Type_t::u1: - case ov::element::Type_t::i4: - case ov::element::Type_t::u4: - fill_data_ptr_normal_random_float( - static_cast(tensor.data()), - tensor.get_byte_size(), - mean, stddev, seed); break; - default: OPENVINO_THROW("Unsupported element type: ", element_type); + case ov::element::Type_t::u1: + case ov::element::Type_t::i4: + case ov::element::Type_t::u4: + fill_data_ptr_normal_random_float(static_cast(tensor.data()), + tensor.get_byte_size(), + mean, + stddev, + seed); + break; + default: + OPENVINO_THROW("Unsupported element type: ", element_type); } #undef CASE return tensor; } -ov::runtime::Tensor create_and_fill_tensor_consistently( - const ov::element::Type element_type, - const ov::Shape& shape, - const uint32_t range, - const int32_t start_from, - const int32_t resolution) { +ov::runtime::Tensor create_and_fill_tensor_consistently(const ov::element::Type element_type, + const ov::Shape& shape, + const uint32_t range, + const int32_t start_from, + const int32_t resolution) { auto tensor = ov::runtime::Tensor{element_type, shape}; -#define CASE(X) case X: fill_data_ptr_consistently(tensor.data::value_type>(), \ -tensor.get_size(), range, start_from, resolution); break; +#define CASE(X) \ + case X: \ + fill_data_ptr_consistently(tensor.data::value_type>(), \ + tensor.get_size(), \ + range, \ + start_from, \ + resolution); \ + break; switch (element_type) { CASE(ov::element::Type_t::boolean) CASE(ov::element::Type_t::i8) @@ -159,13 +178,17 @@ tensor.get_size(), range, start_from, resolution); break; CASE(ov::element::Type_t::f16) CASE(ov::element::Type_t::f32) CASE(ov::element::Type_t::f64) - case ov::element::Type_t::u1: - case ov::element::Type_t::i4: - case ov::element::Type_t::u4: - fill_data_ptr_consistently( - static_cast(tensor.data()), - tensor.get_byte_size(), range, start_from, resolution); break; - default: OPENVINO_THROW("Unsupported element type: ", element_type); + case ov::element::Type_t::u1: + case ov::element::Type_t::i4: + case ov::element::Type_t::u4: + fill_data_ptr_consistently(static_cast(tensor.data()), + tensor.get_byte_size(), + range, + start_from, + resolution); + break; + default: + OPENVINO_THROW("Unsupported element type: ", element_type); } #undef CASE return tensor; @@ -205,16 +228,16 @@ inline double calculate_median(std::vector& abs_values) { auto expected_shape = abs_values.size(); if (expected_shape % 2) { std::nth_element(abs_values.begin(), abs_values.begin() + expected_shape / 2, abs_values.end()); - abs_median = abs_values[expected_shape / 2]; + abs_median = abs_values[expected_shape / 2]; } else { std::nth_element(abs_values.begin(), abs_values.begin() + expected_shape / 2, abs_values.end()); std::nth_element(abs_values.begin(), abs_values.begin() + (expected_shape - 1) / 2, abs_values.end()); - abs_median = (abs_values[(expected_shape - 1) / 2] + abs_values[expected_shape / 2]) / 2.0; + abs_median = (abs_values[(expected_shape - 1) / 2] + abs_values[expected_shape / 2]) / 2.0; } return abs_median; } -template +template void compare(const ov::Tensor& expected, const ov::Tensor& actual, const double abs_threshold_ = std::numeric_limits::max(), @@ -224,7 +247,7 @@ void compare(const ov::Tensor& expected, if (expected_shape != actual_shape) { std::ostringstream out_stream; out_stream << "Expected and actual shape are different: " << expected_shape << " " << actual_shape; - throw std::runtime_error(out_stream.str()); + throw std::runtime_error(out_stream.str()); } if (shape_size(actual_shape) == 0) { return; @@ -281,52 +304,55 @@ void compare(const ov::Tensor& expected, if (!(less_or_equal(abs_error.max, abs_threshold) && less_or_equal(rel_error.max, rel_threshold))) { std::ostringstream out_stream; - out_stream << "abs_max < abs_threshold && rel_max < rel_threshold" << - "\n\t abs_max: " << abs_error.max << - "\n\t\t coordinate " << abs_error.max_coordinate<< - "; abs errors count " << abs_error.count << "; abs mean " << - abs_error.mean << "; abs threshold " << abs_threshold << - "\n\t rel_max: " << rel_error.max << - "\n\t\t coordinate " << rel_error.max_coordinate << - "; rel errors count " << rel_error.count << "; rel mean " << - rel_error.mean << "; rel threshold " << rel_threshold; + out_stream << "abs_max < abs_threshold && rel_max < rel_threshold" + << "\n\t abs_max: " << abs_error.max << "\n\t\t coordinate " << abs_error.max_coordinate + << "; abs errors count " << abs_error.count << "; abs mean " << abs_error.mean << "; abs threshold " + << abs_threshold << "\n\t rel_max: " << rel_error.max << "\n\t\t coordinate " + << rel_error.max_coordinate << "; rel errors count " << rel_error.count << "; rel mean " + << rel_error.mean << "; rel threshold " << rel_threshold; throw std::runtime_error(out_stream.str()); } } -void compare( - const ov::Tensor& expected, - const ov::Tensor& actual, - const double abs_threshold, - const double rel_threshold) { -#define CASE0(X, Y) case Y : compare< \ - element_type_traits::value_type, \ - element_type_traits::value_type>( \ - expected, actual, abs_threshold, rel_threshold); break; +void compare(const ov::Tensor& expected, + const ov::Tensor& actual, + const double abs_threshold, + const double rel_threshold) { +#define CASE0(X, Y) \ + case Y: \ + compare::value_type, element_type_traits::value_type>(expected, \ + actual, \ + abs_threshold, \ + rel_threshold); \ + break; -#define CASE(X) \ - case X: \ - switch (actual.get_element_type()) { \ - CASE0(X, ov::element::Type_t::boolean) \ - CASE0(X, ov::element::Type_t::bf16) \ - CASE0(X, ov::element::Type_t::f16) \ - CASE0(X, ov::element::Type_t::f32) \ - CASE0(X, ov::element::Type_t::f64) \ - CASE0(X, ov::element::Type_t::i4) \ - CASE0(X, ov::element::Type_t::i8) \ - CASE0(X, ov::element::Type_t::i16) \ - CASE0(X, ov::element::Type_t::i32) \ - CASE0(X, ov::element::Type_t::i64) \ - CASE0(X, ov::element::Type_t::u1) \ - CASE0(X, ov::element::Type_t::u4) \ - CASE0(X, ov::element::Type_t::u8) \ - CASE0(X, ov::element::Type_t::u16) \ - CASE0(X, ov::element::Type_t::u32) \ - CASE0(X, ov::element::Type_t::u64) \ - default: OPENVINO_THROW("Unsupported element type: ", \ - "expected ", expected.get_element_type(), \ - ", actual ", actual.get_element_type()); \ - } break; +#define CASE(X) \ + case X: \ + switch (actual.get_element_type()) { \ + CASE0(X, ov::element::Type_t::boolean) \ + CASE0(X, ov::element::Type_t::bf16) \ + CASE0(X, ov::element::Type_t::f16) \ + CASE0(X, ov::element::Type_t::f32) \ + CASE0(X, ov::element::Type_t::f64) \ + CASE0(X, ov::element::Type_t::i4) \ + CASE0(X, ov::element::Type_t::i8) \ + CASE0(X, ov::element::Type_t::i16) \ + CASE0(X, ov::element::Type_t::i32) \ + CASE0(X, ov::element::Type_t::i64) \ + CASE0(X, ov::element::Type_t::u1) \ + CASE0(X, ov::element::Type_t::u4) \ + CASE0(X, ov::element::Type_t::u8) \ + CASE0(X, ov::element::Type_t::u16) \ + CASE0(X, ov::element::Type_t::u32) \ + CASE0(X, ov::element::Type_t::u64) \ + default: \ + OPENVINO_THROW("Unsupported element type: ", \ + "expected ", \ + expected.get_element_type(), \ + ", actual ", \ + actual.get_element_type()); \ + } \ + break; switch (expected.get_element_type()) { CASE(ov::element::Type_t::boolean) @@ -345,7 +371,8 @@ void compare( CASE(ov::element::Type_t::u16) CASE(ov::element::Type_t::u32) CASE(ov::element::Type_t::u64) - default: OPENVINO_THROW("Unsupported element type: ", expected.get_element_type()); + default: + OPENVINO_THROW("Unsupported element type: ", expected.get_element_type()); } #undef CASE0 #undef CASE diff --git a/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp index ef179e61771186..18af2ec243a1b3 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp @@ -32,8 +32,7 @@ class CopyTensorNamesToRefModel : public ov::pass::ModelPass { } // namespace pass } // namespace ov -TransformationTestsF::TransformationTestsF() - : comparator(FunctionsComparator::no_default()) { +TransformationTestsF::TransformationTestsF() : comparator(FunctionsComparator::no_default()) { m_unh = std::make_shared(); comparator.enable(FunctionsComparator::CmpValues::NODES); comparator.enable(FunctionsComparator::CmpValues::PRECISIONS); diff --git a/src/tests/test_utils/common_test_utils/src/precomp.hpp b/src/tests/test_utils/common_test_utils/src/precomp.hpp index dbdfa9a360a2ba..9242ac9aa80e7a 100644 --- a/src/tests/test_utils/common_test_utils/src/precomp.hpp +++ b/src/tests/test_utils/common_test_utils/src/precomp.hpp @@ -5,31 +5,31 @@ #pragma once #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include #include #include #include #include +#include #include #include #include +#include #include #include #include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include \ No newline at end of file +#include \ No newline at end of file diff --git a/src/tests/test_utils/common_test_utils/src/test_common.cpp b/src/tests/test_utils/common_test_utils/src/test_common.cpp index 95fc3f1772fd77..0a84aab376d4d6 100644 --- a/src/tests/test_utils/common_test_utils/src/test_common.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_common.cpp @@ -44,18 +44,26 @@ std::string TestsCommon::GetTimestamp() { } std::string TestsCommon::GetTestName() const { - std::string test_name = - ::testing::UnitTest::GetInstance()->current_test_info()->name(); - std::replace_if(test_name.begin(), test_name.end(), - [](char c) { return !std::isalnum(c); }, '_'); + std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + std::replace_if( + test_name.begin(), + test_name.end(), + [](char c) { + return !std::isalnum(c); + }, + '_'); return test_name; } std::string TestsCommon::GetFullTestName() const { - std::string suite_name = - ::testing::UnitTest::GetInstance()->current_test_info()->test_suite_name(); - std::replace_if(suite_name.begin(), suite_name.end(), - [](char c) { return !std::isalnum(c); }, '_'); + std::string suite_name = ::testing::UnitTest::GetInstance()->current_test_info()->test_suite_name(); + std::replace_if( + suite_name.begin(), + suite_name.end(), + [](char c) { + return !std::isalnum(c); + }, + '_'); std::string test_name = GetTestName(); diff --git a/src/tests/test_utils/common_test_utils/src/test_constants.cpp b/src/tests/test_utils/common_test_utils/src/test_constants.cpp index 7018d0283f1548..8270a61dbde724 100644 --- a/src/tests/test_utils/common_test_utils/src/test_constants.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_constants.cpp @@ -8,15 +8,15 @@ namespace ov { namespace test { namespace utils { -const char *DEVICE_AUTO = "AUTO"; -const char *DEVICE_CPU = "CPU"; -const char *DEVICE_GNA = "GNA"; -const char *DEVICE_GPU = "GPU"; -const char *DEVICE_KEEMBAY = "NPU"; -const char *DEVICE_BATCH = "BATCH"; -const char *DEVICE_MULTI = "MULTI"; -const char *DEVICE_TEMPLATE = "TEMPLATE"; -const char *DEVICE_HETERO = "HETERO"; +const char* DEVICE_AUTO = "AUTO"; +const char* DEVICE_CPU = "CPU"; +const char* DEVICE_GNA = "GNA"; +const char* DEVICE_GPU = "GPU"; +const char* DEVICE_KEEMBAY = "NPU"; +const char* DEVICE_BATCH = "BATCH"; +const char* DEVICE_MULTI = "MULTI"; +const char* DEVICE_TEMPLATE = "TEMPLATE"; +const char* DEVICE_HETERO = "HETERO"; } // namespace utils } // namespace test diff --git a/src/tests/test_utils/common_test_utils/src/unicode_utils.cpp b/src/tests/test_utils/common_test_utils/src/unicode_utils.cpp index 4b4feca22ea0c5..534c3c4daef826 100644 --- a/src/tests/test_utils/common_test_utils/src/unicode_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/unicode_utils.cpp @@ -10,16 +10,14 @@ namespace ov { namespace test { namespace utils { -const std::vector test_unicode_postfix_vector = { - L"unicode_Яㅎあ", - L"ひらがな日本語", - L"大家有天分", - L"עפצקרשתםןףץ", - L"ث خ ذ ض ظ غ", - L"그것이정당하다", - L"АБВГДЕЁЖЗИЙ", - L"СТУФХЦЧШЩЬЮЯ" -}; +const std::vector test_unicode_postfix_vector = {L"unicode_Яㅎあ", + L"ひらがな日本語", + L"大家有天分", + L"עפצקרשתםןףץ", + L"ث خ ذ ض ظ غ", + L"그것이정당하다", + L"АБВГДЕЁЖЗИЙ", + L"СТУФХЦЧШЩЬЮЯ"}; } // namespace utils } // namespace test diff --git a/src/tests/test_utils/common_test_utils/tests/graph_comparator_tests.cpp b/src/tests/test_utils/common_test_utils/tests/graph_comparator_tests.cpp index f9dd8436e78a14..5f87391829d65c 100644 --- a/src/tests/test_utils/common_test_utils/tests/graph_comparator_tests.cpp +++ b/src/tests/test_utils/common_test_utils/tests/graph_comparator_tests.cpp @@ -4,8 +4,7 @@ #include -#include - +#include "common_test_utils/graph_comparator.hpp" #include "openvino/op/add.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/convolution.hpp" @@ -15,24 +14,25 @@ #include "openvino/op/squeeze.hpp" #include "openvino/op/tensor_iterator.hpp" #include "openvino/op/unsqueeze.hpp" +#include "openvino/op/util/variable.hpp" TEST(GraphComparatorTests, AllEnablePositiveCheck) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add}, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); function = function_ref->clone(); } comparator.enable(FunctionsComparator::NAMES) - .enable(FunctionsComparator::NODES) - .enable(FunctionsComparator::CONST_VALUES) - .enable(FunctionsComparator::PRECISIONS) - .enable(FunctionsComparator::ATTRIBUTES) - .enable(FunctionsComparator::RUNTIME_KEYS) - .enable(FunctionsComparator::TENSOR_NAMES); + .enable(FunctionsComparator::NODES) + .enable(FunctionsComparator::CONST_VALUES) + .enable(FunctionsComparator::PRECISIONS) + .enable(FunctionsComparator::ATTRIBUTES) + .enable(FunctionsComparator::RUNTIME_KEYS) + .enable(FunctionsComparator::TENSOR_NAMES); auto res = comparator.compare(function, function_ref); ASSERT_TRUE(res.valid) << res.message; @@ -42,16 +42,16 @@ TEST(GraphComparatorTests, CheckbyDefault) { FunctionsComparator comparator(FunctionsComparator::with_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto input2 = std::make_shared(ngraph::element::i64, ov::Shape{3}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto input2 = std::make_shared(ov::element::i64, ov::Shape{3}); auto add = std::make_shared(input, input2); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input, input2 }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input, input2}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {12}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {12}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; @@ -61,18 +61,18 @@ TEST(GraphComparatorTests, CheckResultsNumber) { FunctionsComparator comparator(FunctionsComparator::with_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto input2 = std::make_shared(ngraph::element::i64, ov::Shape{3}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto input2 = std::make_shared(ov::element::i64, ov::Shape{3}); auto add = std::make_shared(input, input2); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input, input2 }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input, input2}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {12}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {12}); auto add = std::make_shared(input, constant); auto result1 = std::make_shared(constant); auto result2 = std::make_shared(add); - function = std::make_shared(ngraph::ResultVector{ result1, result2 }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::ResultVector{result1, result2}, ov::ParameterVector{input}); } auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; @@ -82,25 +82,24 @@ TEST(GraphComparatorTests, NamesCheckPositive) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); input->set_friendly_name("new_name1"); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); constant->set_friendly_name("new_name2"); auto add = std::make_shared(input, constant); add->set_friendly_name("new_name3"); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); input->set_friendly_name("new_name1"); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); constant->set_friendly_name("new_name2"); auto add = std::make_shared(input, constant); add->set_friendly_name("new_name3"); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } - comparator.enable(FunctionsComparator::NAMES) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::NAMES).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_TRUE(res.valid) << res.message; } @@ -109,25 +108,24 @@ TEST(GraphComparatorTests, NamesCheckNegative) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); input->set_friendly_name("new_name1"); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); constant->set_friendly_name("new_name2"); auto add = std::make_shared(input, constant); add->set_friendly_name("new_name3"); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); input->set_friendly_name("new_name1"); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); constant->set_friendly_name("new_name2"); auto add = std::make_shared(input, constant); add->set_friendly_name("new_name3_different"); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } - comparator.enable(FunctionsComparator::NAMES) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::NAMES).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; } @@ -136,16 +134,16 @@ TEST(GraphComparatorTests, ConstCheckWithoutEnable) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {12}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {12}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } comparator.enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); @@ -156,19 +154,18 @@ TEST(GraphComparatorTests, ConstCheckNegative) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {12}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {12}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } - comparator.enable(FunctionsComparator::CONST_VALUES) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::CONST_VALUES).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; } @@ -177,15 +174,14 @@ TEST(GraphComparatorTests, TensorNamesCheckNegative) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); function = function_ref->clone(); add->get_input_tensor(0).set_names({"new_name"}); } - comparator.enable(FunctionsComparator::TENSOR_NAMES) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::TENSOR_NAMES).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; } @@ -194,10 +190,10 @@ TEST(GraphComparatorTests, TensorNamesCheckWithoutEnable) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); function = function_ref->clone(); add->get_input_tensor(0).set_names({"new_name"}); } @@ -210,35 +206,36 @@ TEST(GraphComparatorTests, CheckAttributesNegative) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); - auto const_weights = ov::op::v0::Constant::create(ov::element::f16, - ov::Shape{ 1, 3, 3, 3 }, - { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 3, 12, 12}); + auto const_weights = ov::op::v0::Constant::create( + ov::element::f16, + ov::Shape{1, 3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}); auto convert_ins1 = std::make_shared(const_weights, ov::element::f32); auto conv = std::make_shared(input, convert_ins1, - ov::Strides{ 1, 1 }, - ov::CoordinateDiff{ 1, 1 }, - ov::CoordinateDiff{ 1, 1 }, - ov::Strides{ 1, 1 }); - function_ref = std::make_shared(ngraph::NodeVector{ conv }, ngraph::ParameterVector{ input }); - } - { - auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); - auto const_weights = ov::op::v0::Constant::create(ov::element::f16, - ov::Shape{ 1, 3, 3, 3 }, - { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + ov::Strides{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::Strides{1, 1}); + function_ref = std::make_shared(ov::NodeVector{conv}, ov::ParameterVector{input}); + } + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 3, 12, 12}); + auto const_weights = ov::op::v0::Constant::create( + ov::element::f16, + ov::Shape{1, 3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}); auto convert_ins1 = std::make_shared(const_weights, ov::element::f32); auto conv = std::make_shared(input, convert_ins1, - ov::Strides{ 1, 1 }, - ov::CoordinateDiff{ 0, 0 }, - ov::CoordinateDiff{ 0, 0 }, - ov::Strides{ 1, 1 }); - function = std::make_shared(ngraph::NodeVector{ conv }, ngraph::ParameterVector{ input }); - } - comparator.enable(FunctionsComparator::ATTRIBUTES) - .enable(FunctionsComparator::NODES); + ov::Strides{1, 1}, + ov::CoordinateDiff{0, 0}, + ov::CoordinateDiff{0, 0}, + ov::Strides{1, 1}); + function = std::make_shared(ov::NodeVector{conv}, ov::ParameterVector{input}); + } + comparator.enable(FunctionsComparator::ATTRIBUTES).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; } @@ -247,19 +244,18 @@ TEST(GraphComparatorTests, CheckPrecisionsNegative) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::f32, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::f32, {3}, {0}); + auto input = std::make_shared(ov::element::f32, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::f32, {3}, {0}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } - comparator.enable(FunctionsComparator::PRECISIONS) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::PRECISIONS).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; } @@ -268,16 +264,16 @@ TEST(GraphComparatorTests, CheckPrecisionsWithoutEnable) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::f32, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::f32, {3}, {0}); + auto input = std::make_shared(ov::element::f32, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::f32, {3}, {0}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } comparator.enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); @@ -288,20 +284,19 @@ TEST(GraphComparatorTests, CheckRTInfo) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); add->get_rt_info()["my_info"] = 42; - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } - comparator.enable(FunctionsComparator::RUNTIME_KEYS) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::RUNTIME_KEYS).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; } @@ -310,20 +305,19 @@ TEST(GraphComparatorTests, CheckRTInfoReverse) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); add->get_rt_info()["my_info"] = 42; - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } - comparator.enable(FunctionsComparator::RUNTIME_KEYS) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::RUNTIME_KEYS).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_TRUE(res.valid) << res.message; } @@ -332,20 +326,19 @@ TEST(GraphComparatorTests, CheckRTInfoInput) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); add->input(0).get_rt_info()["my_info"] = 42; - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } - comparator.enable(FunctionsComparator::RUNTIME_KEYS) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::RUNTIME_KEYS).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; } @@ -354,20 +347,19 @@ TEST(GraphComparatorTests, CheckRTInfoOutput) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); add->output(0).get_rt_info()["my_info"] = 42; - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{3}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {3}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{3}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {3}, {0}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } - comparator.enable(FunctionsComparator::RUNTIME_KEYS) - .enable(FunctionsComparator::NODES); + comparator.enable(FunctionsComparator::RUNTIME_KEYS).enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); ASSERT_FALSE(res.valid) << res.message; } @@ -376,29 +368,28 @@ TEST(GraphComparatorTests, CheckTensorIteratorPositive) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto X = std::make_shared(ngraph::element::f32, ngraph::Shape{2, 1, 16}); - auto Y = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 128}); + auto X = std::make_shared(ov::element::f32, ov::Shape{2, 1, 16}); + auto Y = std::make_shared(ov::element::f32, ov::Shape{1, 128}); - auto Xi = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 1, 16}); - auto Yi = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 128}); + auto Xi = std::make_shared(ov::element::f32, ov::Shape{1, 1, 16}); + auto Yi = std::make_shared(ov::element::f32, ov::Shape{1, 128}); // Body - auto axis = ov::op::v0::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); + auto axis = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {0}); auto squeeze = std::make_shared(Xi, axis); - auto w_val = std::vector(384*16, 0); - auto r_val = std::vector(384*128, 0); + auto w_val = std::vector(384 * 16, 0); + auto r_val = std::vector(384 * 128, 0); auto b_val = std::vector(384, 0); - auto W = ov::op::v0::Constant::create(ngraph::element::f32, ngraph::Shape{384, 16}, w_val); - auto R = ov::op::v0::Constant::create(ngraph::element::f32, ngraph::Shape{384, 128}, r_val); - auto B = ov::op::v0::Constant::create(ngraph::element::f32, ngraph::Shape{384}, b_val); + auto W = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{384, 16}, w_val); + auto R = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{384, 128}, r_val); + auto B = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{384}, b_val); auto gru_cell = std::make_shared(squeeze, Yi, W, R, B, 128); auto res_1 = std::make_shared(gru_cell); auto unsqueeze = std::make_shared(gru_cell, axis); auto res_2 = std::make_shared(unsqueeze); - auto body = std::make_shared(ngraph::OutputVector{res_1, res_2}, - ngraph::ParameterVector{Xi, Yi}); + auto body = std::make_shared(ov::OutputVector{res_1, res_2}, ov::ParameterVector{Xi, Yi}); auto tensor_iterator = std::make_shared(); tensor_iterator->set_body(body); @@ -410,8 +401,7 @@ TEST(GraphComparatorTests, CheckTensorIteratorPositive) { auto out1 = tensor_iterator->get_concatenated_slices(res_2, 0, 1, 1, -1, 0); auto res_ti_1 = std::make_shared(tensor_iterator->output(1)); - function_ref = std::make_shared(ngraph::NodeVector{res_ti_1}, - ngraph::ParameterVector{X, Y}); + function_ref = std::make_shared(ov::NodeVector{res_ti_1}, ov::ParameterVector{X, Y}); function = function_ref->clone(); } comparator.enable(FunctionsComparator::NODES); @@ -433,8 +423,8 @@ std::shared_ptr make_check_loop_model(bool different_body) { auto M_body = std::make_shared(ov::element::f32, ov::PartialShape::dynamic()); auto body_condition = std::make_shared(ov::element::boolean, ov::Shape{1}, true); - auto trip_count = std::make_shared(ngraph::element::i64, ov::Shape{1}, 3); - auto exec_condition = std::make_shared(ngraph::element::boolean, ov::Shape{1}, true); + auto trip_count = std::make_shared(ov::element::i64, ov::Shape{1}, 3); + auto exec_condition = std::make_shared(ov::element::boolean, ov::Shape{1}, true); // Body auto sum = std::make_shared(Xi, Yi); std::shared_ptr Zo; @@ -492,8 +482,8 @@ TEST(GraphComparatorTests, CheckSinksPositive) { auto arg = std::make_shared(ov::element::f32, ov::Shape{1, 1}); auto init_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1}, {0}); const std::string variable_name("variable0"); - auto variable = std::make_shared(ngraph::VariableInfo{ov::PartialShape::dynamic(), - ov::element::dynamic, variable_name}); + auto variable = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_name}); auto read = std::make_shared(init_const, variable); auto read2 = std::make_shared(init_const, variable); @@ -505,7 +495,8 @@ TEST(GraphComparatorTests, CheckSinksPositive) { auto res = std::make_shared(add); auto res2 = std::make_shared(add2); - function_ref = std::make_shared(ov::ResultVector({res, res2}), ov::SinkVector({assign, assign2}), + function_ref = std::make_shared(ov::ResultVector({res, res2}), + ov::SinkVector({assign, assign2}), ov::ParameterVector({arg})); function = function_ref->clone(); } @@ -521,8 +512,8 @@ TEST(GraphComparatorTests, CheckSinksNegative) { auto arg = std::make_shared(ov::element::f32, ov::Shape{1, 1}); auto init_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1}, {0}); const std::string variable_name("variable0"); - auto variable = std::make_shared(ngraph::VariableInfo{ov::PartialShape::dynamic(), - ov::element::dynamic, variable_name}); + auto variable = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_name}); auto read = std::make_shared(init_const, variable); auto read2 = std::make_shared(init_const, variable); @@ -534,7 +525,8 @@ TEST(GraphComparatorTests, CheckSinksNegative) { auto res = std::make_shared(add); auto res2 = std::make_shared(add2); - function_ref = std::make_shared(ov::ResultVector({res, res2}), ov::SinkVector({assign, assign2}), + function_ref = std::make_shared(ov::ResultVector({res, res2}), + ov::SinkVector({assign, assign2}), ov::ParameterVector({arg})); } @@ -542,8 +534,8 @@ TEST(GraphComparatorTests, CheckSinksNegative) { auto arg = std::make_shared(ov::element::f32, ov::Shape{1, 1}); auto init_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1, 1}, {0}); const std::string variable_name("variable_different"); - auto variable = std::make_shared(ngraph::VariableInfo{ov::PartialShape::dynamic(), - ov::element::dynamic, variable_name}); + auto variable = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_name}); auto read = std::make_shared(init_const, variable); auto read2 = std::make_shared(init_const, variable); @@ -555,7 +547,8 @@ TEST(GraphComparatorTests, CheckSinksNegative) { auto res = std::make_shared(add); auto res2 = std::make_shared(add2); - function = std::make_shared(ov::ResultVector({res, res2}), ov::SinkVector({assign, assign2}), + function = std::make_shared(ov::ResultVector({res, res2}), + ov::SinkVector({assign, assign2}), ov::ParameterVector({arg})); } comparator.enable(FunctionsComparator::NODES); @@ -567,10 +560,10 @@ TEST(GraphComparatorTests, DisableCheck) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); function = function_ref->clone(); } comparator.enable(FunctionsComparator::NODES); @@ -583,16 +576,16 @@ TEST(GraphComparatorTests, CheckAccuracyPositive) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add}, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add}, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } comparator.enable(FunctionsComparator::ACCURACY); auto res = comparator.compare(function, function_ref); @@ -603,16 +596,16 @@ TEST(GraphComparatorTests, CheckAccuracyNegative) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {12}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {12}); auto add = std::make_shared(input, constant); - function_ref = std::make_shared(ngraph::NodeVector{ add}, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {200}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {200}); auto add = std::make_shared(input, constant); - function = std::make_shared(ngraph::NodeVector{ add}, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{add}, ov::ParameterVector{input}); } comparator.enable(FunctionsComparator::ACCURACY); auto res = comparator.compare(function, function_ref); @@ -623,32 +616,34 @@ TEST(GraphComparatorTests, CheckAccuracyNotEnabled) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); - auto const_weights = ov::op::v0::Constant::create(ov::element::f16, - ov::Shape{ 1, 3, 3, 3 }, - { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 3, 12, 12}); + auto const_weights = ov::op::v0::Constant::create( + ov::element::f16, + ov::Shape{1, 3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}); auto convert_ins1 = std::make_shared(const_weights, ov::element::f32); auto conv = std::make_shared(input, convert_ins1, - ov::Strides{ 1, 1 }, - ov::CoordinateDiff{ 1, 1 }, - ov::CoordinateDiff{ 1, 1 }, - ov::Strides{ 1, 1 }); - function_ref = std::make_shared(ngraph::NodeVector{ conv }, ngraph::ParameterVector{ input }); - } - { - auto input = std::make_shared(ov::element::f32, ov::Shape{ 1, 3, 12, 12 }); - auto const_weights = ov::op::v0::Constant::create(ov::element::f16, - ov::Shape{ 1, 3, 3, 3 }, - { 1, 9, 3, 4, 5, 6, 7, 8, 9, 1, 12, 3, 9, 5, 0, 7, 8, 9, 1, 2, 12, 4, 9, 6, 7, 8, 9 }); + ov::Strides{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::Strides{1, 1}); + function_ref = std::make_shared(ov::NodeVector{conv}, ov::ParameterVector{input}); + } + { + auto input = std::make_shared(ov::element::f32, ov::Shape{1, 3, 12, 12}); + auto const_weights = ov::op::v0::Constant::create( + ov::element::f16, + ov::Shape{1, 3, 3, 3}, + {1, 9, 3, 4, 5, 6, 7, 8, 9, 1, 12, 3, 9, 5, 0, 7, 8, 9, 1, 2, 12, 4, 9, 6, 7, 8, 9}); auto convert_ins1 = std::make_shared(const_weights, ov::element::f32); auto conv = std::make_shared(input, convert_ins1, - ov::Strides{ 1, 1 }, - ov::CoordinateDiff{ 1, 1 }, - ov::CoordinateDiff{ 1, 1 }, - ov::Strides{ 1, 1 }); - function = std::make_shared(ngraph::NodeVector{ conv }, ngraph::ParameterVector{ input }); + ov::Strides{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::CoordinateDiff{1, 1}, + ov::Strides{1, 1}); + function = std::make_shared(ov::NodeVector{conv}, ov::ParameterVector{input}); } comparator.enable(FunctionsComparator::NODES); auto res = comparator.compare(function, function_ref); @@ -659,20 +654,20 @@ TEST(GraphComparatorTests, CheckConsumersCountPositive) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add_1 = std::make_shared(input, constant); auto add_2 = std::make_shared(input, constant); auto mul = std::make_shared(add_1, add_2); - function_ref = std::make_shared(ngraph::NodeVector{ mul }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{mul}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add_1 = std::make_shared(input, constant); auto add_2 = std::make_shared(input, constant); auto mul = std::make_shared(add_1, add_2); - function = std::make_shared(ngraph::NodeVector{ mul }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{mul}, ov::ParameterVector{input}); } comparator.enable(FunctionsComparator::NODES).enable(FunctionsComparator::CONSUMERS_COUNT); auto res = comparator.compare(function, function_ref); @@ -683,21 +678,21 @@ TEST(GraphComparatorTests, CheckConsumersCountNegative) { FunctionsComparator comparator(FunctionsComparator::no_default()); std::shared_ptr function, function_ref; { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add_1 = std::make_shared(input, constant); auto add_2 = std::make_shared(input, constant); auto mul = std::make_shared(add_1, add_2); - function_ref = std::make_shared(ngraph::NodeVector{ mul }, ngraph::ParameterVector{ input }); + function_ref = std::make_shared(ov::NodeVector{mul}, ov::ParameterVector{input}); } { - auto input = std::make_shared(ngraph::element::i64, ov::Shape{1}); - auto constant_1 = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); - auto constant_2 = ov::op::v0::Constant::create(ngraph::element::i64, {1}, {0}); + auto input = std::make_shared(ov::element::i64, ov::Shape{1}); + auto constant_1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); + auto constant_2 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto add_1 = std::make_shared(input, constant_1); auto add_2 = std::make_shared(input, constant_2); auto mul = std::make_shared(add_1, add_2); - function = std::make_shared(ngraph::NodeVector{ mul }, ngraph::ParameterVector{ input }); + function = std::make_shared(ov::NodeVector{mul}, ov::ParameterVector{input}); } comparator.enable(FunctionsComparator::NODES).enable(FunctionsComparator::CONSUMERS_COUNT); auto res = comparator.compare(function, function_ref); From 3be8b58d2a6cad8b8f9f625bce8417f56423af93 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 12 Sep 2023 07:09:45 +0400 Subject: [PATCH 15/31] Update classes func tests (#19663) * Remove legacy classes from functional_test_utils * Fixed code style * Fixed build all for macOS * Suppress warning * Revert old functions for internal plugins --- .../tests/functional/core_threading.cpp | 2 +- .../tests/functional/ov_core_threading.cpp | 5 +- .../subgraphs_dumper/include/utils/node.hpp | 8 +- .../conformance_infra/include/conformance.hpp | 2 +- .../conformance_infra/src/main.cpp | 2 +- .../src/read_ir/read_ir.cpp | 2 +- .../src/behavior/plugin/hetero_synthetic.cpp | 4 +- .../functional/plugin/shared/src/main.cpp | 4 +- .../src/base/layer_test_utils.cpp | 2 +- .../src/base/ov_subgraph.cpp | 2 +- .../src/base/snippets_test_utils.cpp | 2 +- .../src/single_layer/conversion.cpp | 2 +- .../src/single_layer/memory.cpp | 5 +- .../src/subgraph/mul_conv_fusion.cpp | 16 +- .../functional_test_utils/CMakeLists.txt | 2 +- .../functional_test_utils/blob_utils.hpp | 506 ++++++++++-------- .../functional_test_utils/crash_handler.hpp | 6 +- .../functional_test_utils/node_utils.hpp | 8 +- .../functional_test_utils/ov_plugin_cache.hpp | 8 +- .../functional_test_utils/plugin_cache.hpp | 5 +- .../functional_test_utils/precision_utils.hpp | 81 ++- .../skip_tests_config.hpp | 38 +- .../summary/api_summary.hpp | 13 +- .../summary/environment.hpp | 4 +- .../functional_test_utils/summary/op_info.hpp | 4 +- .../summary/op_summary.hpp | 36 +- .../functional_test_utils/summary/summary.hpp | 38 +- .../test_model/test_model.hpp | 33 +- .../src/crash_handler.cpp | 30 +- .../src/ov_plugin_cache.cpp | 22 +- .../src/plugin_cache.cpp | 31 +- .../functional_test_utils/src/precomp.hpp | 17 +- .../src/skip_tests_config.cpp | 24 +- .../src/summary/api_summary.cpp | 104 ++-- .../src/summary/op_info.cpp | 11 +- .../src/summary/op_summary.cpp | 122 +++-- .../src/summary/summary.cpp | 37 +- .../src/test_model/test_model.cpp | 47 +- 38 files changed, 691 insertions(+), 594 deletions(-) diff --git a/src/inference/tests/functional/core_threading.cpp b/src/inference/tests/functional/core_threading.cpp index 12d83b591a0004..f3d73bf13423eb 100644 --- a/src/inference/tests/functional/core_threading.cpp +++ b/src/inference/tests/functional/core_threading.cpp @@ -33,7 +33,7 @@ class IECoreThreadingTests : public ::testing::Test { auto prefix = ov::test::utils::generateTestFilePrefix(); modelName = prefix + modelName; weightsName = prefix + weightsName; - FuncTestUtils::TestModel::generateTestModel(modelName, weightsName); + ov::test::utils::generate_test_model(modelName, weightsName); } void TearDown() override { diff --git a/src/inference/tests/functional/ov_core_threading.cpp b/src/inference/tests/functional/ov_core_threading.cpp index c03c1e453e99bc..c935c8952358f1 100644 --- a/src/inference/tests/functional/ov_core_threading.cpp +++ b/src/inference/tests/functional/ov_core_threading.cpp @@ -14,6 +14,7 @@ #include "common_test_utils/file_utils.hpp" #include "common_test_utils/test_assertions.hpp" #include "functional_test_utils/test_model/test_model.hpp" +#include "ie_extension.h" #include "openvino/runtime/core.hpp" #include "openvino/util/file_util.hpp" #ifdef __GLIBC__ @@ -32,7 +33,7 @@ class CoreThreadingTests : public ::testing::Test { auto prefix = ov::test::utils::generateTestFilePrefix(); modelName = prefix + modelName; weightsName = prefix + weightsName; - FuncTestUtils::TestModel::generateTestModel(modelName, weightsName); + ov::test::utils::generate_test_model(modelName, weightsName); } void TearDown() override { @@ -60,10 +61,12 @@ class CoreThreadingTests : public ::testing::Test { void safeAddExtension(ov::Core& core) { try { + OPENVINO_SUPPRESS_DEPRECATED_START auto extension = std::make_shared( ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), std::string("template_extension") + IE_BUILD_POSTFIX)); core.add_extension(extension); + OPENVINO_SUPPRESS_DEPRECATED_END } catch (const ov::Exception& ex) { ASSERT_STR_CONTAINS(ex.what(), "name: custom_opset. Opset"); } diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp index cc6eea4809c1f2..6d2412c639651e 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/node.hpp @@ -57,7 +57,7 @@ inline std::string get_node_type(const std::shared_ptr& node) { } static std::map get_max_ops_versions() { - std::map> unique_ops = FuncTestUtils::get_unique_ops(); + std::map> unique_ops = ov::test::utils::get_unique_ops(); std::map max_ops_versions; for (auto op_info : unique_ops) { @@ -78,7 +78,7 @@ static std::map get_last_opset_version_map() { std::string opset_name = std::prev(opset_map.end())->first; const ov::OpSet& opset = std::prev(opset_map.end())->second(); for (const auto& op : opset.get_type_info_set()) { - res[op.name] = FuncTestUtils::get_op_version(op.get_version()); + res[op.name] = ov::test::utils::get_op_version(op.get_version()); } return res; @@ -93,7 +93,7 @@ inline size_t get_node_priority_by_version(const std::shared_ptr& node size_t priority = 1; auto type_info = node->get_type_info(); if (max_ops_versions.count(type_info.name)) { - std::string version_id = FuncTestUtils::get_op_version(type_info.version_id); + std::string version_id = ov::test::utils::get_op_version(type_info.version_id); if (version_id == max_ops_versions[type_info.name]) { priority = 2; if (version_id == last_opset_versions_map[type_info.name]) { @@ -107,4 +107,4 @@ inline size_t get_node_priority_by_version(const std::shared_ptr& node } // namespace subgraph_dumper } // namespace tools -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/conformance.hpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/conformance.hpp index c3a9c64d2320d6..3704f2cb12e28c 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/conformance.hpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/conformance.hpp @@ -90,7 +90,7 @@ static std::set get_element_type_names() { return result; } -static auto unique_ops = FuncTestUtils::get_unique_ops(); +static auto unique_ops = ov::test::utils::get_unique_ops(); static auto element_type_names = get_element_type_names(); inline std::string get_ref_path(const std::string& model_path) { diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp index 81d6a212dad7e7..5e32ef4f9de47c 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp @@ -74,7 +74,7 @@ int main(int argc, char* argv[]) { throw std::runtime_error("Using mutually exclusive arguments: --extend_report and --report_unique_name"); } - FuncTestUtils::SkipTestsConfig::disable_tests_skipping = FLAGS_disable_test_config; + ov::test::utils::disable_tests_skipping = FLAGS_disable_test_config; ov::test::utils::OpSummary::setExtendReport(FLAGS_extend_report); ov::test::utils::OpSummary::setExtractBody(FLAGS_extract_body); ov::test::utils::OpSummary::setSaveReportWithUniqueName(FLAGS_report_unique_name); diff --git a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp index 80cd1641013771..d9f161eb632712 100644 --- a/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/op_conformance_runner/src/read_ir/read_ir.cpp @@ -136,7 +136,7 @@ void ReadIRTest::query_model() { } s.setDeviceName(targetDevice); - if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { + if (ov::test::utils::current_test_is_disabled()) { s.updateOPsStats(functionRefs, ov::test::utils::PassRate::Statuses::SKIPPED, rel_influence_coef); GTEST_SKIP() << "Disabled test due to configuration" << std::endl; } else { diff --git a/src/tests/functional/plugin/shared/src/behavior/plugin/hetero_synthetic.cpp b/src/tests/functional/plugin/shared/src/behavior/plugin/hetero_synthetic.cpp index aafc3c6b266df5..133db56d94e6be 100644 --- a/src/tests/functional/plugin/shared/src/behavior/plugin/hetero_synthetic.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/plugin/hetero_synthetic.cpp @@ -151,7 +151,7 @@ void HeteroSyntheticTest::SetUp() { } void HeteroSyntheticTest::TearDown() { - if (!FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { + if (!ov::test::utils::current_test_is_disabled()) { for (auto&& pluginName : _registredPlugins) { PluginCache::get().ie()->UnregisterPlugin(pluginName); } @@ -194,7 +194,7 @@ TEST_P(HeteroSyntheticTest, someLayersToMajorPluginOthersToFallback) { auto affinities = SetUpAffinity(); SCOPED_TRACE(affinities); Run(); - if (!FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { + if (!ov::test::utils::current_test_is_disabled()) { ASSERT_NE(nullptr, cnnNetwork.getFunction()); } } diff --git a/src/tests/functional/plugin/shared/src/main.cpp b/src/tests/functional/plugin/shared/src/main.cpp index 2d9295c1318be9..ec0869b1b9f412 100644 --- a/src/tests/functional/plugin/shared/src/main.cpp +++ b/src/tests/functional/plugin/shared/src/main.cpp @@ -11,12 +11,12 @@ #include "set_device_name.hpp" int main(int argc, char *argv[]) { - FuncTestUtils::SkipTestsConfig::disable_tests_skipping = false; + ov::test::utils::disable_tests_skipping = false; bool print_custom_help = false; std::string outputFolderPath("."); for (int i = 0; i < argc; ++i) { if (std::string(argv[i]) == "--disable_tests_skipping") { - FuncTestUtils::SkipTestsConfig::disable_tests_skipping = true; + ov::test::utils::disable_tests_skipping = true; } else if (std::string(argv[i]) == "--extract_body") { ov::test::utils::OpSummary::setExtractBody(true); } else if (std::string(argv[i]) == "--help") { diff --git a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp index b53e9e95241336..5e325150357b4d 100644 --- a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp +++ b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp @@ -24,7 +24,7 @@ LayerTestsCommon::LayerTestsCommon() : threshold(1e-2f), abs_threshold(-1.f) { } void LayerTestsCommon::Run() { - bool isCurrentTestDisabled = FuncTestUtils::SkipTestsConfig::currentTestIsDisabled(); + bool isCurrentTestDisabled = ov::test::utils::current_test_is_disabled(); ov::test::utils::PassRate::Statuses status = isCurrentTestDisabled ? ov::test::utils::PassRate::Statuses::SKIPPED : diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index cbbeccb811c8fa..49c11629432543 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -43,7 +43,7 @@ std::ostream& operator <<(std::ostream& os, const InputShape& inputShape) { void SubgraphBaseTest::run() { is_reported = true; - bool isCurrentTestDisabled = FuncTestUtils::SkipTestsConfig::currentTestIsDisabled(); + bool isCurrentTestDisabled = ov::test::utils::current_test_is_disabled(); ov::test::utils::PassRate::Statuses status = isCurrentTestDisabled ? ov::test::utils::PassRate::Statuses::SKIPPED : diff --git a/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp index dbceeaec585aaa..d6bf5c5b487c42 100644 --- a/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp +++ b/src/tests/functional/shared_test_classes/src/base/snippets_test_utils.cpp @@ -9,7 +9,7 @@ namespace ov { namespace test { void SnippetsTestsCommon::validateNumSubgraphs() { - bool isCurrentTestDisabled = FuncTestUtils::SkipTestsConfig::currentTestIsDisabled(); + bool isCurrentTestDisabled = ov::test::utils::current_test_is_disabled(); if (isCurrentTestDisabled) GTEST_SKIP() << "Disabled test due to configuration" << std::endl; diff --git a/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp b/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp index 5f8099c3eb0882..0b1876e4297e85 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp @@ -28,7 +28,7 @@ std::string ConversionLayerTest::getTestCaseName(const testing::TestParamInfo(ngPrc, inputShape); - auto variable_context = VariableContext(); + auto variable_context = ov::op::util::VariableContext(); auto variable_value = std::make_shared(hostTensor); variable_context.set_variable_value(function->get_variable_by_id("v0"), variable_value); eval_context["VariableContext"] = variable_context; @@ -66,7 +67,7 @@ namespace LayerTestsDefinitions { auto &s = ov::test::utils::OpSummary::getInstance(); s.setDeviceName(targetDevice); - if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { + if (ov::test::utils::current_test_is_disabled()) { s.updateOPsStats(function, ov::test::utils::PassRate::Statuses::SKIPPED); GTEST_SKIP() << "Disabled test due to configuration" << std::endl; } else { diff --git a/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp b/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp index 3ee7a34be3a11b..1bc3e834641746 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/mul_conv_fusion.cpp @@ -71,9 +71,7 @@ void MulConvFusion::SetUp() { std::shared_ptr conv; if (conv_type == ngraph::opset8::Convolution::get_type_info_static()) { weights = std::make_shared(weights, mul_const); - OPENVINO_SUPPRESS_DEPRECATED_START - weights = ngraph::get_constant_from_source(weights); - OPENVINO_SUPPRESS_DEPRECATED_END + weights = ov::get_constant_from_source(weights); ASSERT_NE(nullptr, weights); conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); } else if (conv_type == ngraph::opset8::GroupConvolution::get_type_info_static()) { @@ -84,9 +82,7 @@ void MulConvFusion::SetUp() { auto reshape = std::make_shared(mul_const, ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false); weights = std::make_shared(weights, reshape); - OPENVINO_SUPPRESS_DEPRECATED_START - weights = ngraph::get_constant_from_source(weights); - OPENVINO_SUPPRESS_DEPRECATED_END + weights = ov::get_constant_from_source(weights); ASSERT_NE(nullptr, weights); conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); } else if (conv_type == ngraph::opset8::ConvolutionBackpropData::get_type_info_static()) { @@ -96,9 +92,7 @@ void MulConvFusion::SetUp() { auto reshape = std::make_shared(mul_const, ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false); weights = std::make_shared(weights, reshape); - OPENVINO_SUPPRESS_DEPRECATED_START - weights = ngraph::get_constant_from_source(weights); - OPENVINO_SUPPRESS_DEPRECATED_END + weights = ov::get_constant_from_source(weights); ASSERT_NE(nullptr, weights); conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); } else if (conv_type == ngraph::opset8::GroupConvolutionBackpropData::get_type_info_static()) { @@ -110,9 +104,7 @@ void MulConvFusion::SetUp() { auto reshape = std::make_shared(mul_const, ngraph::op::Constant::create(ngraph::element::u64, ngraph::Shape{const_shape.size()}, const_shape), false); weights = std::make_shared(weights, reshape); - OPENVINO_SUPPRESS_DEPRECATED_START - weights = ngraph::get_constant_from_source(weights); - OPENVINO_SUPPRESS_DEPRECATED_END + weights = ov::get_constant_from_source(weights); ASSERT_NE(nullptr, weights); conv = std::make_shared(param, weights, strides, pad_begin, pad_end, strides); } else { diff --git a/src/tests/test_utils/functional_test_utils/CMakeLists.txt b/src/tests/test_utils/functional_test_utils/CMakeLists.txt index 15b7bc0167f0df..414055027c3c38 100644 --- a/src/tests/test_utils/functional_test_utils/CMakeLists.txt +++ b/src/tests/test_utils/functional_test_utils/CMakeLists.txt @@ -8,7 +8,7 @@ addIeTarget( NAME ${TARGET_NAME} TYPE STATIC ROOT ${CMAKE_CURRENT_SOURCE_DIR} - ADD_CPPLINT + ADD_CLANG_FORMAT DEVELOPER_PACKAGE tests INCLUDES diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/blob_utils.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/blob_utils.hpp index 9b617911cd1a5d..ceb4ee4aacd072 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/blob_utils.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/blob_utils.hpp @@ -4,31 +4,29 @@ #pragma once +#include +#include #include #include -#include -#include #include +#include -#include #include "blob_factory.hpp" #include "blob_transform.hpp" -#include "ie_compound_blob.h" -#include "precision_utils.h" #include "common_test_utils/data_utils.hpp" #include "common_test_utils/test_constants.hpp" - -#include "openvino/runtime/common.hpp" +#include "ie_compound_blob.h" #include "ie_ngraph_utils.hpp" - +#include "openvino/runtime/common.hpp" +#include "precision_utils.h" namespace FuncTestUtils { namespace Bf16TestUtils { inline short reducePrecisionBitwiseS(const float in); } // namespace Bf16TestUtils -enum CompareType{ +enum CompareType { ABS, REL, ABS_AND_REL // if absolute and relative differences are too high, an exception is thrown @@ -48,10 +46,14 @@ enum CompareType{ * @param thr2 Second threshold of difference * @param printData A flag if data printing is demanded */ -template -inline void compareRawBuffers(const dType *res, const dType *ref, - size_t resSize, size_t refSize, - CompareType compareType, float thr1 = 0.01, float thr2 = 0.01, +template +inline void compareRawBuffers(const dType* res, + const dType* ref, + size_t resSize, + size_t refSize, + CompareType compareType, + float thr1 = 0.01, + float thr2 = 0.01, bool printData = false) { if (printData) { std::cout << "Reference results: " << std::endl; @@ -67,31 +69,31 @@ inline void compareRawBuffers(const dType *res, const dType *ref, } switch (compareType) { - case CompareType::ABS: - for (size_t i = 0; i < refSize; i++) { - float absDiff = std::abs(res[i] - ref[i]); - ASSERT_LE(absDiff, thr1) << "Relative comparison of values ref: " << ref[i] << " and res: " - << res[i] << " , index in blobs: " << i << " failed!"; - } - break; - case CompareType::REL: - for (size_t i = 0; i < refSize; i++) { - float absDiff = std::abs(res[i] - ref[i]); + case CompareType::ABS: + for (size_t i = 0; i < refSize; i++) { + float absDiff = std::abs(res[i] - ref[i]); + ASSERT_LE(absDiff, thr1) << "Relative comparison of values ref: " << ref[i] << " and res: " << res[i] + << " , index in blobs: " << i << " failed!"; + } + break; + case CompareType::REL: + for (size_t i = 0; i < refSize; i++) { + float absDiff = std::abs(res[i] - ref[i]); + float relDiff = absDiff / std::max(res[i], ref[i]); + ASSERT_LE(relDiff, thr2) << "Relative comparison of values ref: " << ref[i] << " and res: " << res[i] + << " , index in blobs: " << i << " failed!"; + } + break; + case CompareType::ABS_AND_REL: + for (size_t i = 0; i < refSize; i++) { + float absDiff = std::abs(res[i] - ref[i]); + if (absDiff > thr1) { float relDiff = absDiff / std::max(res[i], ref[i]); - ASSERT_LE(relDiff, thr2) << "Relative comparison of values ref: " << ref[i] << " and res: " - << res[i] << " , index in blobs: " << i << " failed!"; + ASSERT_LE(relDiff, thr2) << "Comparison of values ref: " << ref[i] << " and res: " << res[i] + << " , index in blobs: " << i << " failed!"; } - break; - case CompareType::ABS_AND_REL: - for (size_t i = 0; i < refSize; i++) { - float absDiff = std::abs(res[i] - ref[i]); - if (absDiff > thr1) { - float relDiff = absDiff / std::max(res[i], ref[i]); - ASSERT_LE(relDiff, thr2) << "Comparison of values ref: " << ref[i] << " and res: " - << res[i] << " , index in blobs: " << i << " failed!"; - } - } - break; + } + break; } } /** @@ -105,9 +107,11 @@ inline void compareRawBuffers(const dType *res, const dType *ref, * @param thr Threshold of difference, absolute and relative simultaneously * @param printData Flag if data printing is demanded */ -template -inline void compareRawBuffers(const dType *res, const dType *ref, - size_t resSize, size_t refSize, +template +inline void compareRawBuffers(const dType* res, + const dType* ref, + size_t resSize, + size_t refSize, float thr = 0.01, bool printData = false) { compareRawBuffers(res, ref, resSize, refSize, CompareType::ABS_AND_REL, thr, thr, printData); @@ -127,18 +131,24 @@ inline void compareRawBuffers(const dType *res, const dType *ref, * @param thr2 Second threshold of difference * @param printData A flag if data printing is demanded */ -template -inline void compareRawBuffers(const std::vector res, const std::vector ref, - const std::vector &resSizes, const std::vector &refSizes, +template +inline void compareRawBuffers(const std::vector res, + const std::vector ref, + const std::vector& resSizes, + const std::vector& refSizes, CompareType compareType, - float thr1 = 0.01, float thr2 = 0.01, bool printData = false) { + float thr1 = 0.01, + float thr2 = 0.01, + bool printData = false) { ASSERT_TRUE(res.size() == ref.size()) << "Reference and Results vector have to be same length"; ASSERT_TRUE(res.size() == resSizes.size()) << "Results vector and elements count vector have to be same length"; ASSERT_TRUE(ref.size() == refSizes.size()) << "Reference vector and elements count vector have to be same length"; for (size_t i = 0; i < res.size(); i++) { - if (printData) std::cout << "BEGIN CHECK BUFFER [" << i << "]" << std::endl; + if (printData) + std::cout << "BEGIN CHECK BUFFER [" << i << "]" << std::endl; compareRawBuffers(res[i], ref[i], resSizes[i], refSizes[i], compareType, thr1, thr2, printData); - if (printData) std::cout << "END CHECK BUFFER [" << i << "]" << std::endl; + if (printData) + std::cout << "END CHECK BUFFER [" << i << "]" << std::endl; } } /** @@ -152,10 +162,13 @@ inline void compareRawBuffers(const std::vector res, const std::vector< * @param thr Threshold of difference, absolute and relative simultaneously * @param printData A flag if data printing is demanded */ -template -inline void compareRawBuffers(const std::vector res, const std::vector ref, - const std::vector &resSizes, const std::vector &refSizes, - float thr = 0.01, bool printData = false) { +template +inline void compareRawBuffers(const std::vector res, + const std::vector ref, + const std::vector& resSizes, + const std::vector& refSizes, + float thr = 0.01, + bool printData = false) { compareRawBuffers(res, ref, resSizes, refSizes, CompareType::ABS_AND_REL, thr, thr, printData); } /** @@ -173,18 +186,24 @@ inline void compareRawBuffers(const std::vector res, const std::vector< * @param thr2 Second threshold of difference * @param printData A flag if data printing is demanded */ -template -inline void compareRawBuffers(const std::vector res, const std::vector> ref, - const std::vector &resSizes, const std::vector &refSizes, +template +inline void compareRawBuffers(const std::vector res, + const std::vector> ref, + const std::vector& resSizes, + const std::vector& refSizes, CompareType compareType, - float thr1 = 0.01, float thr2 = 0.01, bool printData = false) { + float thr1 = 0.01, + float thr2 = 0.01, + bool printData = false) { ASSERT_TRUE(res.size() == ref.size()) << "Reference and Results vector have to be same length"; ASSERT_TRUE(res.size() == resSizes.size()) << "Results vector and elements count vector have to be same length"; ASSERT_TRUE(ref.size() == refSizes.size()) << "Reference vector and elements count vector have to be same length"; for (size_t i = 0; i < res.size(); i++) { - if (printData) std::cout << "BEGIN CHECK BUFFER [" << i << "]" << std::endl; + if (printData) + std::cout << "BEGIN CHECK BUFFER [" << i << "]" << std::endl; compareRawBuffers(res[i], *ref[i], resSizes[i], refSizes[i], compareType, thr1, thr2, printData); - if (printData) std::cout << "END CHECK BUFFER [" << i << "]" << std::endl; + if (printData) + std::cout << "END CHECK BUFFER [" << i << "]" << std::endl; } } /** @@ -198,22 +217,27 @@ inline void compareRawBuffers(const std::vector res, const std::vector< * @param thr Threshold of difference, absolute and relative simultaneously * @param printData A flag if data printing is demanded */ -template -inline void compareRawBuffers(const std::vector res, const std::vector> ref, - const std::vector &resSizes, const std::vector &refSizes, - float thr = 0.01, bool printData = false) { +template +inline void compareRawBuffers(const std::vector res, + const std::vector> ref, + const std::vector& resSizes, + const std::vector& refSizes, + float thr = 0.01, + bool printData = false) { compareRawBuffers(res, ref, resSizes, refSizes, CompareType::ABS_AND_REL, thr, thr, printData); } -template -inline void -compareBlobData(const InferenceEngine::Blob::Ptr &res, const InferenceEngine::Blob::Ptr &ref, float max_diff = 0.01, - const std::string &assertDetails = "", bool printData = false) { +template +inline void compareBlobData(const InferenceEngine::Blob::Ptr& res, + const InferenceEngine::Blob::Ptr& ref, + float max_diff = 0.01, + const std::string& assertDetails = "", + bool printData = false) { using dataType = typename InferenceEngine::PrecisionTrait::value_type; - const dataType *res_ptr = res->cbuffer().as(); + const dataType* res_ptr = res->cbuffer().as(); size_t res_size = res->byteSize(); - const dataType *ref_ptr = ref->cbuffer().as(); + const dataType* ref_ptr = ref->cbuffer().as(); size_t ref_size = ref->byteSize(); ASSERT_EQ(res_size, ref_size) << "Comparing blobs have different size. " << assertDetails; @@ -231,25 +255,28 @@ compareBlobData(const InferenceEngine::Blob::Ptr &res, const InferenceEngine::Bl } for (size_t i = 0; i < ref_size / sizeof(dataType); i++) { - auto resVal = PRC == InferenceEngine::Precision::FP16 ? InferenceEngine::PrecisionUtils::f16tof32(static_cast(res_ptr[i])) - : static_cast(res_ptr[i]); - auto refVal = PRC == InferenceEngine::Precision::FP16 ? InferenceEngine::PrecisionUtils::f16tof32(static_cast(ref_ptr[i])) - : static_cast(ref_ptr[i]); + auto resVal = PRC == InferenceEngine::Precision::FP16 + ? InferenceEngine::PrecisionUtils::f16tof32(static_cast(res_ptr[i])) + : static_cast(res_ptr[i]); + auto refVal = PRC == InferenceEngine::Precision::FP16 + ? InferenceEngine::PrecisionUtils::f16tof32(static_cast(ref_ptr[i])) + : static_cast(ref_ptr[i]); float absDiff = std::abs(resVal - refVal); if (absDiff > max_diff) { float relDiff = absDiff / std::max(res_ptr[i], ref_ptr[i]); - ASSERT_LE(relDiff, max_diff) << "Relative comparison of values ref: " << ref_ptr[i] << " and res: " - << res_ptr[i] << " , index in blobs: " << i << " failed!" << assertDetails; + ASSERT_LE(relDiff, max_diff) << "Relative comparison of values ref: " << ref_ptr[i] + << " and res: " << res_ptr[i] << " , index in blobs: " << i << " failed!" + << assertDetails; } } } - -template -inline void -compareBlobData(const std::vector &res, const std::vector &ref, - float max_diff = 0.01, - const std::string &assertDetails = "", bool printData = false) { +template +inline void compareBlobData(const std::vector& res, + const std::vector& ref, + float max_diff = 0.01, + const std::string& assertDetails = "", + bool printData = false) { IE_ASSERT(res.size() == ref.size()) << "Length of comparing and references blobs vector are not equal!" << assertDetails; for (size_t i = 0; i < res.size(); i++) { @@ -261,46 +288,47 @@ compareBlobData(const std::vector &res, const std::v } } -inline void -compareBlobs(const InferenceEngine::Blob::Ptr &res, const InferenceEngine::Blob::Ptr &ref, float max_diff = 0.01, - const std::string &assertDetails = "", bool printData = false) { - ASSERT_EQ(res->byteSize(), ref->byteSize()) << "Blobs have different byteSize(): " - << res->byteSize() << " and " << ref->byteSize(); +inline void compareBlobs(const InferenceEngine::Blob::Ptr& res, + const InferenceEngine::Blob::Ptr& ref, + float max_diff = 0.01, + const std::string& assertDetails = "", + bool printData = false) { + ASSERT_EQ(res->byteSize(), ref->byteSize()) + << "Blobs have different byteSize(): " << res->byteSize() << " and " << ref->byteSize(); ASSERT_EQ(res->getTensorDesc(), ref->getTensorDesc()) << "Blobs have different TensorDesc()"; switch (res->getTensorDesc().getPrecision()) { -#define COMPARE_WITH_REF(TYPE) case TYPE: { \ - FuncTestUtils::compareBlobData(res, \ - ref, \ - max_diff, \ - assertDetails, \ - printData); break; } +#define COMPARE_WITH_REF(TYPE) \ + case TYPE: { \ + FuncTestUtils::compareBlobData(res, ref, max_diff, assertDetails, printData); \ + break; \ + } COMPARE_WITH_REF(InferenceEngine::Precision::FP32); COMPARE_WITH_REF(InferenceEngine::Precision::FP16); COMPARE_WITH_REF(InferenceEngine::Precision::I64); #undef COMPARE_WITH_REF - default: - IE_THROW() << "Precision " << res->getTensorDesc().getPrecision().name() - << " is not covered by FuncTestUtils::compareBlobs() method"; + default: + IE_THROW() << "Precision " << res->getTensorDesc().getPrecision().name() + << " is not covered by FuncTestUtils::compareBlobs() method"; } } -inline void GetComparisonThreshold(InferenceEngine::Precision prc, float &absoluteThreshold, float &relativeThreshold) { +inline void GetComparisonThreshold(InferenceEngine::Precision prc, float& absoluteThreshold, float& relativeThreshold) { switch (prc) { - case InferenceEngine::Precision::FP32: - absoluteThreshold = relativeThreshold = 1e-4f; - break; - case InferenceEngine::Precision::FP16: - absoluteThreshold = relativeThreshold = 1e-2f; - break; - case InferenceEngine::Precision::I16: - case InferenceEngine::Precision::I8: - case InferenceEngine::Precision::U8: - absoluteThreshold = relativeThreshold = 1; - break; - default: - IE_THROW() << "Unhandled precision " << prc << " passed to the GetComparisonThreshold()"; + case InferenceEngine::Precision::FP32: + absoluteThreshold = relativeThreshold = 1e-4f; + break; + case InferenceEngine::Precision::FP16: + absoluteThreshold = relativeThreshold = 1e-2f; + break; + case InferenceEngine::Precision::I16: + case InferenceEngine::Precision::I8: + case InferenceEngine::Precision::U8: + absoluteThreshold = relativeThreshold = 1; + break; + default: + IE_THROW() << "Unhandled precision " << prc << " passed to the GetComparisonThreshold()"; } } @@ -311,9 +339,9 @@ inline float GetComparisonThreshold(InferenceEngine::Precision prc) { } // Copy from net_pass.h -template -inline void convertArrayPrecision(typename InferenceEngine::PrecisionTrait::value_type *dst, - const typename InferenceEngine::PrecisionTrait::value_type *src, +template +inline void convertArrayPrecision(typename InferenceEngine::PrecisionTrait::value_type* dst, + const typename InferenceEngine::PrecisionTrait::value_type* src, size_t nelem) { using dst_type = typename InferenceEngine::PrecisionTrait::value_type; @@ -322,80 +350,82 @@ inline void convertArrayPrecision(typename InferenceEngine::PrecisionTrait -inline void -convertArrayPrecision(float *dst, const short *src, - size_t nelem) { +template <> +inline void convertArrayPrecision(float* dst, + const short* src, + size_t nelem) { InferenceEngine::PrecisionUtils::f16tof32Arrays(dst, src, nelem, 1.0f, 0.0f); } -template<> -inline void -convertArrayPrecision(float *dst, const short *src, - size_t nelem) { +template <> +inline void convertArrayPrecision(float* dst, + const short* src, + size_t nelem) { auto srcBf16 = reinterpret_cast(src); for (size_t i = 0; i < nelem; i++) { dst[i] = static_cast(srcBf16[i]); } } -template -inline InferenceEngine::Blob::Ptr convertBlobPrecision(const InferenceEngine::Blob::Ptr &blob) { +template +inline InferenceEngine::Blob::Ptr convertBlobPrecision(const InferenceEngine::Blob::Ptr& blob) { using from_d_type = typename InferenceEngine::PrecisionTrait::value_type; using to_d_type = typename InferenceEngine::PrecisionTrait::value_type; auto tensor_desc = blob->getTensorDesc(); InferenceEngine::Blob::Ptr new_blob = InferenceEngine::make_shared_blob( - InferenceEngine::TensorDesc{PREC_TO, tensor_desc.getDims(), tensor_desc.getLayout()}); + InferenceEngine::TensorDesc{PREC_TO, tensor_desc.getDims(), tensor_desc.getLayout()}); new_blob->allocate(); - auto target = new_blob->buffer().as(); - auto source = blob->buffer().as(); + auto target = new_blob->buffer().as(); + auto source = blob->buffer().as(); convertArrayPrecision(target, source, blob->size()); return new_blob; } // Copy from net_pass.h - -template -inline InferenceEngine::Blob::Ptr copyBlobWithCast(const InferenceEngine::Blob::Ptr &blob) { +template +inline InferenceEngine::Blob::Ptr copyBlobWithCast(const InferenceEngine::Blob::Ptr& blob) { InferenceEngine::Blob::Ptr newBlob; switch (blob->getTensorDesc().getPrecision()) { - case InferenceEngine::Precision::FP32: - newBlob = FuncTestUtils::convertBlobPrecision(blob); - break; - case InferenceEngine::Precision::FP16: - newBlob = FuncTestUtils::convertBlobPrecision(blob); - break; - case InferenceEngine::Precision::I16: - newBlob = FuncTestUtils::convertBlobPrecision(blob); - break; - case InferenceEngine::Precision::I8: - newBlob = FuncTestUtils::convertBlobPrecision(blob); - break; - case InferenceEngine::Precision::U8: - newBlob = FuncTestUtils::convertBlobPrecision(blob); - break; - case InferenceEngine::Precision::I32: - newBlob = FuncTestUtils::convertBlobPrecision(blob); - break; - case InferenceEngine::Precision::BOOL: - newBlob = FuncTestUtils::convertBlobPrecision(blob); - break; - default: - IE_THROW() << "Conversion from blob with precision " << blob->getTensorDesc().getPrecision().name() - << " not implemented yet!"; + case InferenceEngine::Precision::FP32: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; + case InferenceEngine::Precision::FP16: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; + case InferenceEngine::Precision::I16: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; + case InferenceEngine::Precision::I8: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; + case InferenceEngine::Precision::U8: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; + case InferenceEngine::Precision::I32: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; + case InferenceEngine::Precision::BOOL: + newBlob = FuncTestUtils::convertBlobPrecision(blob); + break; + default: + IE_THROW() << "Conversion from blob with precision " << blob->getTensorDesc().getPrecision().name() + << " not implemented yet!"; } return newBlob; } -inline InferenceEngine::Blob::Ptr createAndFillBlobFloatNormalDistribution(const InferenceEngine::TensorDesc &td, +inline InferenceEngine::Blob::Ptr createAndFillBlobFloatNormalDistribution(const InferenceEngine::TensorDesc& td, const float mean, const float stddev, const int32_t seed = 1) { InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td); blob->allocate(); switch (td.getPrecision()) { -#define CASE(X) case X: ov::test::utils::fill_data_normal_random_float(blob, mean, stddev, seed); break; +#define CASE(X) \ + case X: \ + ov::test::utils::fill_data_normal_random_float(blob, mean, stddev, seed); \ + break; CASE(InferenceEngine::Precision::FP32) CASE(InferenceEngine::Precision::FP16) CASE(InferenceEngine::Precision::U8) @@ -407,22 +437,25 @@ inline InferenceEngine::Blob::Ptr createAndFillBlobFloatNormalDistribution(const CASE(InferenceEngine::Precision::I32) CASE(InferenceEngine::Precision::BOOL) #undef CASE - default: - IE_THROW() << "Wrong precision specified: " << td.getPrecision().name(); + default: + IE_THROW() << "Wrong precision specified: " << td.getPrecision().name(); } return blob; } -inline InferenceEngine::Blob::Ptr createAndFillBlobFloat(const InferenceEngine::TensorDesc &td, - const uint32_t range = 10, - const int32_t start_from = 0, - const int32_t resolution = 1, - const int32_t seed = 1) { +inline InferenceEngine::Blob::Ptr createAndFillBlobFloat(const InferenceEngine::TensorDesc& td, + const uint32_t range = 10, + const int32_t start_from = 0, + const int32_t resolution = 1, + const int32_t seed = 1) { InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td); blob->allocate(); switch (td.getPrecision()) { -#define CASE(X) case X: ov::test::utils::fill_data_random_float(blob, range, start_from, resolution, seed); break; +#define CASE(X) \ + case X: \ + ov::test::utils::fill_data_random_float(blob, range, start_from, resolution, seed); \ + break; CASE(InferenceEngine::Precision::FP32) CASE(InferenceEngine::Precision::FP16) CASE(InferenceEngine::Precision::U8) @@ -434,20 +467,23 @@ inline InferenceEngine::Blob::Ptr createAndFillBlobFloat(const InferenceEngine:: CASE(InferenceEngine::Precision::I32) CASE(InferenceEngine::Precision::BOOL) #undef CASE - default: - IE_THROW() << "Wrong precision specified: " << td.getPrecision().name(); + default: + IE_THROW() << "Wrong precision specified: " << td.getPrecision().name(); } return blob; } -template -inline InferenceEngine::Blob::Ptr createAndFillBlobWithFloatArray(const InferenceEngine::TensorDesc &td, +template +inline InferenceEngine::Blob::Ptr createAndFillBlobWithFloatArray(const InferenceEngine::TensorDesc& td, const T values[], const int size) { InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td); blob->allocate(); switch (td.getPrecision()) { -#define CASE(X) case X: ov::test::utils::fill_data_float_array(blob, values, size); break; +#define CASE(X) \ + case X: \ + ov::test::utils::fill_data_float_array(blob, values, size); \ + break; CASE(InferenceEngine::Precision::FP32) CASE(InferenceEngine::Precision::FP16) CASE(InferenceEngine::Precision::U8) @@ -459,21 +495,24 @@ inline InferenceEngine::Blob::Ptr createAndFillBlobWithFloatArray(const Inferenc CASE(InferenceEngine::Precision::I32) CASE(InferenceEngine::Precision::BOOL) #undef CASE - default: - IE_THROW() << "Wrong precision specified: " << td.getPrecision().name(); + default: + IE_THROW() << "Wrong precision specified: " << td.getPrecision().name(); } return blob; } -inline InferenceEngine::Blob::Ptr createAndFillBlob(const InferenceEngine::TensorDesc &td, - const uint32_t range = 10, - const int32_t start_from = 0, - const int32_t resolution = 1, - const int seed = 1) { +inline InferenceEngine::Blob::Ptr createAndFillBlob(const InferenceEngine::TensorDesc& td, + const uint32_t range = 10, + const int32_t start_from = 0, + const int32_t resolution = 1, + const int seed = 1) { InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td); blob->allocate(); switch (td.getPrecision()) { -#define CASE(X) case X: ov::test::utils::fill_data_random(blob, range, start_from, resolution, seed); break; +#define CASE(X) \ + case X: \ + ov::test::utils::fill_data_random(blob, range, start_from, resolution, seed); \ + break; CASE(InferenceEngine::Precision::FP64) CASE(InferenceEngine::Precision::FP32) CASE(InferenceEngine::Precision::FP16) @@ -491,21 +530,23 @@ inline InferenceEngine::Blob::Ptr createAndFillBlob(const InferenceEngine::Tenso CASE(InferenceEngine::Precision::BIN) CASE(InferenceEngine::Precision::BOOL) #undef CASE - default: - IE_THROW() << "Wrong precision specified: " << td.getPrecision().name(); + default: + IE_THROW() << "Wrong precision specified: " << td.getPrecision().name(); } return blob; } -inline InferenceEngine::Blob::Ptr createAndFillBlobConsistently( - const InferenceEngine::TensorDesc &td, - const uint32_t range, - const int32_t start_from, - const int32_t resolution) { +inline InferenceEngine::Blob::Ptr createAndFillBlobConsistently(const InferenceEngine::TensorDesc& td, + const uint32_t range, + const int32_t start_from, + const int32_t resolution) { InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td); blob->allocate(); switch (td.getPrecision()) { -#define CASE(X) case X: ov::test::utils::fill_data_consistently(blob, range, start_from, resolution); break; +#define CASE(X) \ + case X: \ + ov::test::utils::fill_data_consistently(blob, range, start_from, resolution); \ + break; CASE(InferenceEngine::Precision::FP32) CASE(InferenceEngine::Precision::FP16) CASE(InferenceEngine::Precision::U8) @@ -523,17 +564,19 @@ inline InferenceEngine::Blob::Ptr createAndFillBlobConsistently( return blob; } -inline InferenceEngine::Blob::Ptr createAndFillBlobUniqueSequence( - const InferenceEngine::TensorDesc &td, - const int32_t start_from = 0, - const int32_t resolution = 1, - const int32_t seed = 1) { +inline InferenceEngine::Blob::Ptr createAndFillBlobUniqueSequence(const InferenceEngine::TensorDesc& td, + const int32_t start_from = 0, + const int32_t resolution = 1, + const int32_t seed = 1) { InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td); blob->allocate(); auto shape = td.getDims(); auto range = std::accumulate(begin(shape), end(shape), uint64_t(1), std::multiplies()) * 2; switch (td.getPrecision()) { -#define CASE(X) case X: ov::test::utils::fill_random_unique_sequence(blob, range, start_from, resolution, seed); break; +#define CASE(X) \ + case X: \ + ov::test::utils::fill_random_unique_sequence(blob, range, start_from, resolution, seed); \ + break; CASE(InferenceEngine::Precision::FP32) CASE(InferenceEngine::Precision::FP16) CASE(InferenceEngine::Precision::U8) @@ -569,7 +612,7 @@ inline InferenceEngine::Blob::Ptr convertBlobLayout(const InferenceEngine::Blob: return out; } -template +template inline void fillInputsBySinValues(dType* data, size_t size) { if (std::is_same::value) { for (size_t i = 0; i < size; i++) { @@ -598,9 +641,9 @@ inline int fillInputsBySinValues(InferenceEngine::Blob::Ptr blob) { namespace Bf16TestUtils { #if defined __GNUC__ -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wstrict-aliasing" -# pragma GCC diagnostic ignored "-Wuninitialized" +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wstrict-aliasing" +# pragma GCC diagnostic ignored "-Wuninitialized" #endif inline float reducePrecisionBitwise(const float in) { @@ -625,7 +668,7 @@ inline short reducePrecisionBitwiseS(const float in) { } #if defined __GNUC__ -# pragma GCC diagnostic pop +# pragma GCC diagnostic pop #endif } // namespace Bf16TestUtils @@ -666,55 +709,56 @@ inline InferenceEngine::Blob::Ptr createBlobByType(const InferenceEngine::Tensor for (size_t i = 0; i < subBlobsNum; i++) { subBlobs.push_back(createAndFillBlob(subBlobDesc)); } - return blobType == BlobType::Batched ? InferenceEngine::make_shared_blob(subBlobs) : - InferenceEngine::make_shared_blob(subBlobs); + return blobType == BlobType::Batched + ? InferenceEngine::make_shared_blob(subBlobs) + : InferenceEngine::make_shared_blob(subBlobs); } -// TODO: ocl + remote -// case BlobType::Remote: -// return InferenceEngine::as(createAndFillBlob(td)); + // TODO: ocl + remote + // case BlobType::Remote: + // return InferenceEngine::as(createAndFillBlob(td)); default: IE_THROW() << "Test does not support the blob kind"; } } -inline bool checkLayout(InferenceEngine::Layout layout, const std::vector &inputShapes) { +inline bool checkLayout(InferenceEngine::Layout layout, const std::vector& inputShapes) { bool check = false; switch (layout) { - case InferenceEngine::Layout::SCALAR: - check = inputShapes.size() == 0; - break; - case InferenceEngine::Layout::C: - check = 1 == inputShapes.size(); - break; - case InferenceEngine::Layout::BLOCKED: - case InferenceEngine::Layout::ANY: - check = true; - break; - case InferenceEngine::Layout::GOIDHW: - check = 6 == inputShapes.size(); - break; - case InferenceEngine::Layout::NCDHW: - case InferenceEngine::Layout::NDHWC: - case InferenceEngine::Layout::OIDHW: - case InferenceEngine::Layout::GOIHW: - check = 5 == inputShapes.size(); - break; - case InferenceEngine::Layout::OIHW: - case InferenceEngine::Layout::NCHW: - case InferenceEngine::Layout::NHWC: - check = 4 == inputShapes.size(); - break; - case InferenceEngine::Layout::CHW: - case InferenceEngine::Layout::HWC: - check = 3 == inputShapes.size(); - break; - case InferenceEngine::Layout::CN: - case InferenceEngine::Layout::NC: - case InferenceEngine::Layout::HW: - check = 2 == inputShapes.size(); - break; - default: - break; + case InferenceEngine::Layout::SCALAR: + check = inputShapes.size() == 0; + break; + case InferenceEngine::Layout::C: + check = 1 == inputShapes.size(); + break; + case InferenceEngine::Layout::BLOCKED: + case InferenceEngine::Layout::ANY: + check = true; + break; + case InferenceEngine::Layout::GOIDHW: + check = 6 == inputShapes.size(); + break; + case InferenceEngine::Layout::NCDHW: + case InferenceEngine::Layout::NDHWC: + case InferenceEngine::Layout::OIDHW: + case InferenceEngine::Layout::GOIHW: + check = 5 == inputShapes.size(); + break; + case InferenceEngine::Layout::OIHW: + case InferenceEngine::Layout::NCHW: + case InferenceEngine::Layout::NHWC: + check = 4 == inputShapes.size(); + break; + case InferenceEngine::Layout::CHW: + case InferenceEngine::Layout::HWC: + check = 3 == inputShapes.size(); + break; + case InferenceEngine::Layout::CN: + case InferenceEngine::Layout::NC: + case InferenceEngine::Layout::HW: + check = 2 == inputShapes.size(); + break; + default: + break; } return check; } diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp index 2da5a6efe8b9cd..21837ee1789658 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp @@ -5,12 +5,11 @@ #pragma once #include +#include +#include #include "common_test_utils/common_utils.hpp" -#include -#include - namespace ov { namespace test { namespace utils { @@ -24,6 +23,7 @@ class CrashHandler { private: static unsigned int MAX_TEST_WORK_TIME; static bool IGNORE_CRASH; + public: CrashHandler(CONFORMANCE_TYPE type = CONFORMANCE_TYPE::op); ~CrashHandler(); diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/node_utils.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/node_utils.hpp index 4bcc8ca9f74bf1..ed2649def3b6f1 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/node_utils.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/node_utils.hpp @@ -7,7 +7,9 @@ #include "openvino/opsets/opset.hpp" -namespace FuncTestUtils { +namespace ov { +namespace test { +namespace utils { inline std::string get_op_version(std::string version_full_name) { std::string op_version(version_full_name); @@ -38,4 +40,6 @@ static std::map> get_unique_ops() { return res; } -} // namespace FuncTestUtils \ No newline at end of file +} // namespace utils +} // namespace test +} // namespace ov diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/ov_plugin_cache.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/ov_plugin_cache.hpp index 16692f09ec4862..5f651729c0118e 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/ov_plugin_cache.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/ov_plugin_cache.hpp @@ -16,15 +16,15 @@ namespace utils { class PluginCache { public: - std::shared_ptr core(const std::string &deviceToCheck = std::string()); + std::shared_ptr core(const std::string& deviceToCheck = std::string()); - static PluginCache &get(); + static PluginCache& get(); void reset(); - PluginCache(const PluginCache &) = delete; + PluginCache(const PluginCache&) = delete; - PluginCache &operator=(const PluginCache &) = delete; + PluginCache& operator=(const PluginCache&) = delete; private: PluginCache(); diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/plugin_cache.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/plugin_cache.hpp index 13012023fd4a01..bdecdd2079a5a9 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/plugin_cache.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/plugin_cache.hpp @@ -4,15 +4,14 @@ #pragma once +#include #include #include #include -#include - class PluginCache { public: - std::shared_ptr ie(const std::string &deviceToCheck = std::string()); + std::shared_ptr ie(const std::string& deviceToCheck = std::string()); static PluginCache& get(); diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/precision_utils.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/precision_utils.hpp index 1b7814478c2fd6..ca2a3163f919e3 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/precision_utils.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/precision_utils.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include #include "ie_precision.hpp" @@ -17,45 +16,45 @@ namespace PrecisionUtils { inline ::ngraph::element::Type convertIE2nGraphPrc(const InferenceEngine::Precision& precision) { InferenceEngine::Precision::ePrecision pType = precision; switch (pType) { - case InferenceEngine::Precision::UNSPECIFIED: - return ::ngraph::element::Type(::ngraph::element::Type_t::undefined); - case InferenceEngine::Precision::FP64: - return ::ngraph::element::Type(::ngraph::element::Type_t::f64); - case InferenceEngine::Precision::FP32: - return ::ngraph::element::Type(::ngraph::element::Type_t::f32); - case InferenceEngine::Precision::FP16: - return ::ngraph::element::Type(::ngraph::element::Type_t::f16); - case InferenceEngine::Precision::BF16: - return ::ngraph::element::Type(::ngraph::element::Type_t::bf16); - case InferenceEngine::Precision::U4: - return ::ngraph::element::Type(::ngraph::element::Type_t::u4); - case InferenceEngine::Precision::I4: - return ::ngraph::element::Type(::ngraph::element::Type_t::i4); - case InferenceEngine::Precision::U8: - return ::ngraph::element::Type(::ngraph::element::Type_t::u8); - case InferenceEngine::Precision::I8: - return ::ngraph::element::Type(::ngraph::element::Type_t::i8); - case InferenceEngine::Precision::U16: - return ::ngraph::element::Type(::ngraph::element::Type_t::u16); - case InferenceEngine::Precision::I16: - return ::ngraph::element::Type(::ngraph::element::Type_t::i16); - case InferenceEngine::Precision::U32: - return ::ngraph::element::Type(::ngraph::element::Type_t::u32); - case InferenceEngine::Precision::I32: - return ::ngraph::element::Type(::ngraph::element::Type_t::i32); - case InferenceEngine::Precision::I64: - return ::ngraph::element::Type(::ngraph::element::Type_t::i64); - case InferenceEngine::Precision::U64: - return ::ngraph::element::Type(::ngraph::element::Type_t::u64); - case InferenceEngine::Precision::BOOL: - return ::ngraph::element::Type(::ngraph::element::Type_t::boolean); - case InferenceEngine::Precision::BIN: - return ::ngraph::element::Type(::ngraph::element::Type_t::u1); - case InferenceEngine::Precision::Q78: - case InferenceEngine::Precision::MIXED: - case InferenceEngine::Precision::CUSTOM: - default: - IE_THROW() << "Incorrect precision!"; + case InferenceEngine::Precision::UNSPECIFIED: + return ::ngraph::element::Type(::ngraph::element::Type_t::undefined); + case InferenceEngine::Precision::FP64: + return ::ngraph::element::Type(::ngraph::element::Type_t::f64); + case InferenceEngine::Precision::FP32: + return ::ngraph::element::Type(::ngraph::element::Type_t::f32); + case InferenceEngine::Precision::FP16: + return ::ngraph::element::Type(::ngraph::element::Type_t::f16); + case InferenceEngine::Precision::BF16: + return ::ngraph::element::Type(::ngraph::element::Type_t::bf16); + case InferenceEngine::Precision::U4: + return ::ngraph::element::Type(::ngraph::element::Type_t::u4); + case InferenceEngine::Precision::I4: + return ::ngraph::element::Type(::ngraph::element::Type_t::i4); + case InferenceEngine::Precision::U8: + return ::ngraph::element::Type(::ngraph::element::Type_t::u8); + case InferenceEngine::Precision::I8: + return ::ngraph::element::Type(::ngraph::element::Type_t::i8); + case InferenceEngine::Precision::U16: + return ::ngraph::element::Type(::ngraph::element::Type_t::u16); + case InferenceEngine::Precision::I16: + return ::ngraph::element::Type(::ngraph::element::Type_t::i16); + case InferenceEngine::Precision::U32: + return ::ngraph::element::Type(::ngraph::element::Type_t::u32); + case InferenceEngine::Precision::I32: + return ::ngraph::element::Type(::ngraph::element::Type_t::i32); + case InferenceEngine::Precision::I64: + return ::ngraph::element::Type(::ngraph::element::Type_t::i64); + case InferenceEngine::Precision::U64: + return ::ngraph::element::Type(::ngraph::element::Type_t::u64); + case InferenceEngine::Precision::BOOL: + return ::ngraph::element::Type(::ngraph::element::Type_t::boolean); + case InferenceEngine::Precision::BIN: + return ::ngraph::element::Type(::ngraph::element::Type_t::u1); + case InferenceEngine::Precision::Q78: + case InferenceEngine::Precision::MIXED: + case InferenceEngine::Precision::CUSTOM: + default: + IE_THROW() << "Incorrect precision!"; } } diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/skip_tests_config.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/skip_tests_config.hpp index f325b5cf8b5fa4..8f76752cd6d8d0 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/skip_tests_config.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/skip_tests_config.hpp @@ -4,28 +4,40 @@ #pragma once -#include -#include -#include - #include +#include +#include +#include std::vector disabledTestPatterns(); -namespace FuncTestUtils { -namespace SkipTestsConfig { +namespace ov { +namespace test { +namespace utils { extern bool disable_tests_skipping; -bool currentTestIsDisabled(); +bool current_test_is_disabled(); + +} // namespace utils +} // namespace test +} // namespace ov + +// TODO: Remove after migration of internal components +namespace FuncTestUtils { +namespace SkipTestsConfig { + +inline bool currentTestIsDisabled() { + return ov::test::utils::current_test_is_disabled(); +} } // namespace SkipTestsConfig } // namespace FuncTestUtils -#define SKIP_IF_CURRENT_TEST_IS_DISABLED() \ -{ \ - if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { \ - GTEST_SKIP() << "Disabled test due to configuration" << std::endl; \ - } \ -} +#define SKIP_IF_CURRENT_TEST_IS_DISABLED() \ + { \ + if (ov::test::utils::current_test_is_disabled()) { \ + GTEST_SKIP() << "Disabled test due to configuration" << std::endl; \ + } \ + } diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/api_summary.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/api_summary.hpp index d71808c6a08027..37f113cd1bacdf 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/api_summary.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/api_summary.hpp @@ -24,17 +24,17 @@ class ApiSummary; class ApiSummaryDestroyer { private: - ApiSummary *p_instance; + ApiSummary* p_instance; public: ~ApiSummaryDestroyer(); - void initialize(ApiSummary *p); + void initialize(ApiSummary* p); }; class ApiSummary : public virtual Summary { private: - static ApiSummary *p_instance; + static ApiSummary* p_instance; std::map> apiStats; static const std::map apiInfo; ov_entity getOvEntityByName(const std::string& name); @@ -45,14 +45,15 @@ class ApiSummary : public virtual Summary { friend class ApiSummaryDestroyer; public: - static ApiSummary &getInstance(); + static ApiSummary& getInstance(); inline void getStatisticFromReport(const std::string& filePath); - std::map> getApiStats() { return apiStats; } + std::map> getApiStats() { + return apiStats; + } void updateStat(ov_entity, const std::string& device, PassRate::Statuses, double rel_influence_coef = 1); void saveReport() override; }; - } // namespace utils } // namespace test } // namespace ov diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/environment.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/environment.hpp index 3a43a1041f68a5..dd2d3594849f6c 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/environment.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/environment.hpp @@ -6,10 +6,8 @@ #include -#include "ngraph/ngraph.hpp" - -#include "functional_test_utils/summary/op_summary.hpp" #include "functional_test_utils/summary/api_summary.hpp" +#include "functional_test_utils/summary/op_summary.hpp" namespace ov { namespace test { diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp index bb132562984d09..df4377d5cf9ad4 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp @@ -12,11 +12,11 @@ namespace functional { // todo: reuse in summary std::string get_node_version(const std::shared_ptr& node, const std::string& postfix = ""); + } // namespace functional } // namespace test } // namespace ov - // todo: remove these structure after remove old subgraphs dumper namespace LayerTestsUtils { @@ -45,4 +45,4 @@ struct OPInfo { OPInfo() = default; }; -} // namespace LayerTestsUtils +} // namespace LayerTestsUtils diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp index ed9557e5474d34..4dc2d08b2d2ce9 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp @@ -4,11 +4,10 @@ #pragma once -#include "summary.hpp" - -#include "openvino/opsets/opset.hpp" #include "openvino/openvino.hpp" +#include "openvino/opsets/opset.hpp" #include "openvino/opsets/opset10.hpp" +#include "summary.hpp" namespace ov { namespace test { @@ -18,16 +17,17 @@ class OpSummary; class OpSummaryDestroyer { private: - OpSummary *p_instance; + OpSummary* p_instance; + public: ~OpSummaryDestroyer(); - void initialize(OpSummary *p); + void initialize(OpSummary* p); }; class OpSummary : public virtual Summary { private: - static OpSummary *p_instance; + static OpSummary* p_instance; static bool extractBody; std::map opsStats = {}; @@ -39,21 +39,29 @@ class OpSummary : public virtual Summary { friend class OpSummaryDestroyer; public: - static OpSummary &getInstance(); + static OpSummary& getInstance(); - std::map getOPsStats() { return opsStats; } + std::map getOPsStats() { + return opsStats; + } - static void setExtractBody(bool val) { extractBody = val; } - static bool getExtractBody() { return extractBody; } + static void setExtractBody(bool val) { + extractBody = val; + } + static bool getExtractBody() { + return extractBody; + } std::map getStatisticFromReport(); void saveReport() override; - void updateOPsStats(const std::shared_ptr &model, const PassRate::Statuses &status, double rel_influence_coef = 1); - void updateOPsImplStatus(const std::shared_ptr &model, const bool implStatus); + void updateOPsStats(const std::shared_ptr& model, + const PassRate::Statuses& status, + double rel_influence_coef = 1); + void updateOPsImplStatus(const std::shared_ptr& model, const bool implStatus); - void updateOPsStats(const ov::NodeTypeInfo &op, const PassRate::Statuses &status, double rel_influence_coef = 1); - void updateOPsImplStatus(const ov::NodeTypeInfo &op, const bool implStatus); + void updateOPsStats(const ov::NodeTypeInfo& op, const PassRate::Statuses& status, double rel_influence_coef = 1); + void updateOPsImplStatus(const ov::NodeTypeInfo& op, const bool implStatus); }; } // namespace utils diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/summary.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/summary.hpp index a3d6934e1a768c..71ea95a791b8f7 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/summary.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/summary.hpp @@ -4,26 +4,18 @@ #pragma once -#include #include +#include -#include "openvino/openvino.hpp" - -#include "common_test_utils/test_constants.hpp" #include "common_test_utils/common_utils.hpp" +#include "common_test_utils/test_constants.hpp" namespace ov { namespace test { namespace utils { struct PassRate { - enum Statuses { - PASSED, - FAILED, - SKIPPED, - CRASHED, - HANGED - }; + enum Statuses { PASSED, FAILED, SKIPPED, CRASHED, HANGED }; unsigned long passed = 0; unsigned long failed = 0; unsigned long skipped = 0; @@ -36,7 +28,13 @@ struct PassRate { PassRate() = default; - PassRate(unsigned long p, unsigned long f, unsigned long s, unsigned long c, unsigned long h, double rel_p = 0, double rel_a = 0); + PassRate(unsigned long p, + unsigned long f, + unsigned long s, + unsigned long c, + unsigned long h, + double rel_p = 0, + double rel_a = 0); void setImplementationStatus(bool implStatus); @@ -57,7 +55,7 @@ class Summary { static bool isHangReported; static bool extendReport; static bool saveReportWithUniqueName; - static const char *outputFolder; + static const char* outputFolder; Summary() = default; @@ -68,13 +66,17 @@ class Summary { std::string getDeviceName() const; - // #define IE_TEST_DEBUG #ifdef IE_TEST_DEBUG - void saveDebugReport(const char* className, const char* opName, unsigned long passed, unsigned long failed, - unsigned long skipped, unsigned long crashed, unsigned long hanged); -#endif //IE_TEST_DEBUG + void saveDebugReport(const char* className, + const char* opName, + unsigned long passed, + unsigned long failed, + unsigned long skipped, + unsigned long crashed, + unsigned long hanged); +#endif // IE_TEST_DEBUG virtual void saveReport() {} @@ -89,7 +91,7 @@ class Summary { static void setSaveReportTimeout(size_t val); static size_t getSaveReportTimeout(); - static void setOutputFolder(const std::string &val); + static void setOutputFolder(const std::string& val); }; } // namespace utils diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/test_model/test_model.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/test_model/test_model.hpp index 9145cb86ff2c2d..a1c0f8621d57af 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/test_model/test_model.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/test_model/test_model.hpp @@ -7,25 +7,26 @@ #include #include -#include "inference_engine.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/type/element_type.hpp" -namespace FuncTestUtils { -namespace TestModel { +namespace ov { +namespace test { +namespace utils { /** * @brief generates IR files (XML and BIN files) with the test model. - * Passed reference vector is filled with CNN layers to validate after the network reading. - * @param modelPath used to serialize the generated network - * @param weightsPath used to serialize the generated weights - * @param netPrc precision of the generated network - * @param inputDims dims on the input layer of the generated network + * Passed reference vector is filled with OpenVINO operations to validate after the network reading. + * @param model_path used to serialize the generated network + * @param weights_path used to serialize the generated weights + * @param input_type input element type of the generated model + * @param input_shape dims on the input layer of the generated model */ -void generateTestModel(const std::string &modelPath, - const std::string &weightsPath, - const InferenceEngine::Precision &netPrc = InferenceEngine::Precision::FP32, - const InferenceEngine::SizeVector &inputDims = {1, 3, 227, 227}); +void generate_test_model(const std::string& model_path, + const std::string& weights_path, + const ov::element::Type& input_type = ov::element::f32, + const ov::PartialShape& input_shape = {1, 3, 227, 227}); -const char incorrect_input_name[] = "incorrect_input_name"; - -} // namespace TestModel -} // namespace FuncTestUtils +} // namespace utils +} // namespace test +} // namespace ov diff --git a/src/tests/test_utils/functional_test_utils/src/crash_handler.cpp b/src/tests/test_utils/functional_test_utils/src/crash_handler.cpp index fcd790dccec8ac..25769df69e3476 100644 --- a/src/tests/test_utils/functional_test_utils/src/crash_handler.cpp +++ b/src/tests/test_utils/functional_test_utils/src/crash_handler.cpp @@ -2,27 +2,28 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "functional_test_utils/summary/op_summary.hpp" -#include "functional_test_utils/summary/api_summary.hpp" - #include "functional_test_utils/crash_handler.hpp" -#include + #include +#include + +#include "functional_test_utils/summary/api_summary.hpp" +#include "functional_test_utils/summary/op_summary.hpp" namespace ov { namespace test { namespace utils { #if defined(__APPLE__) - typedef sig_t sighandler; +typedef sig_t sighandler; #elif defined(_WIN32) -#ifdef __GNUC__ - typedef __p_sig_fn_t sighandler; -#else - typedef _crt_signal_t sighandler; -#endif +# ifdef __GNUC__ +typedef __p_sig_fn_t sighandler; +# else +typedef _crt_signal_t sighandler; +# endif #else - typedef sighandler_t sighandler; +typedef sighandler_t sighandler; #endif // enviroment to restore in case of crash @@ -51,7 +52,7 @@ CrashHandler::CrashHandler(CONFORMANCE_TYPE type) { #endif if (!CrashHandler::IGNORE_CRASH) { - auto &s = ov::test::utils::OpSummary::getInstance(); + auto& s = ov::test::utils::OpSummary::getInstance(); s.saveReport(); std::abort(); } @@ -73,7 +74,8 @@ CrashHandler::CrashHandler(CONFORMANCE_TYPE type) { if (type == CONFORMANCE_TYPE::api) { crashHandler = [](int errCode) { - std::cerr << "Unexpected application crash with code: " << errCode << ". Program will aborted." << std::endl; + std::cerr << "Unexpected application crash with code: " << errCode << ". Program will aborted." + << std::endl; // reset custom signal handler to avoid infinit loop // if for some reasons sigsetjmp will not be available @@ -85,7 +87,7 @@ CrashHandler::CrashHandler(CONFORMANCE_TYPE type) { signal(SIGFPE, SIG_DFL); signal(SIGALRM, SIG_DFL); #endif - auto &s = ov::test::utils::ApiSummary::getInstance(); + auto& s = ov::test::utils::ApiSummary::getInstance(); s.saveReport(); std::abort(); }; diff --git a/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp b/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp index 69977f8d1410af..5d32d1a7b6abda 100644 --- a/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp +++ b/src/tests/test_utils/functional_test_utils/src/ov_plugin_cache.cpp @@ -3,8 +3,6 @@ // #include "functional_test_utils/ov_plugin_cache.hpp" -#include "common_test_utils/file_utils.hpp" -#include "openvino/util/file_util.hpp" #include @@ -12,13 +10,16 @@ #include #include +#include "common_test_utils/file_utils.hpp" +#include "openvino/util/file_util.hpp" + namespace ov { namespace test { namespace utils { namespace { class TestListener : public testing::EmptyTestEventListener { public: - void OnTestEnd(const testing::TestInfo &testInfo) override { + void OnTestEnd(const testing::TestInfo& testInfo) override { if (auto testResult = testInfo.result()) { if (testResult->Failed()) { PluginCache::get().reset(); @@ -28,12 +29,12 @@ class TestListener : public testing::EmptyTestEventListener { }; } // namespace -PluginCache &PluginCache::get() { +PluginCache& PluginCache::get() { static PluginCache instance; return instance; } -std::shared_ptr PluginCache::core(const std::string &deviceToCheck) { +std::shared_ptr PluginCache::core(const std::string& deviceToCheck) { std::lock_guard lock(g_mtx); if (std::getenv("DISABLE_PLUGIN_CACHE") != nullptr) { #ifndef NDEBUG @@ -58,7 +59,9 @@ std::shared_ptr PluginCache::core(const std::string &deviceToCheck) { try { std::string pluginName = "openvino_template_plugin"; pluginName += IE_BUILD_POSTFIX; - ov_core->register_plugin(ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), pluginName), "TEMPLATE"); + ov_core->register_plugin( + ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), pluginName), + "TEMPLATE"); } catch (...) { } @@ -66,8 +69,7 @@ std::shared_ptr PluginCache::core(const std::string &deviceToCheck) { auto properties = ov_core->get_property(deviceToCheck, ov::supported_properties); if (std::find(properties.begin(), properties.end(), ov::available_devices) != properties.end()) { - auto availableDevices = - ov_core->get_property(deviceToCheck, ov::available_devices); + auto availableDevices = ov_core->get_property(deviceToCheck, ov::available_devices); if (availableDevices.empty()) { std::cerr << "No available devices for " << deviceToCheck << std::endl; @@ -77,7 +79,7 @@ std::shared_ptr PluginCache::core(const std::string &deviceToCheck) { #ifndef NDEBUG std::cout << "Available devices for " << deviceToCheck << ":" << std::endl; - for (const auto &device : availableDevices) { + for (const auto& device : availableDevices) { std::cout << " " << device << std::endl; } #endif @@ -97,7 +99,7 @@ void PluginCache::reset() { } PluginCache::PluginCache() { - auto &listeners = testing::UnitTest::GetInstance()->listeners(); + auto& listeners = testing::UnitTest::GetInstance()->listeners(); listeners.Append(new TestListener); } } // namespace utils diff --git a/src/tests/test_utils/functional_test_utils/src/plugin_cache.cpp b/src/tests/test_utils/functional_test_utils/src/plugin_cache.cpp index c0db3b5ae036c5..99fff0a5ef90e7 100644 --- a/src/tests/test_utils/functional_test_utils/src/plugin_cache.cpp +++ b/src/tests/test_utils/functional_test_utils/src/plugin_cache.cpp @@ -2,22 +2,23 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "common_test_utils/test_constants.hpp" #include "functional_test_utils/plugin_cache.hpp" -#include "functional_test_utils/ov_plugin_cache.hpp" -#include "common_test_utils/file_utils.hpp" + +#include #include +#include #include -#include -#include +#include "common_test_utils/file_utils.hpp" +#include "common_test_utils/test_constants.hpp" +#include "functional_test_utils/ov_plugin_cache.hpp" #include "openvino/util/file_util.hpp" namespace { class TestListener : public testing::EmptyTestEventListener { public: - void OnTestEnd(const testing::TestInfo &testInfo) override { + void OnTestEnd(const testing::TestInfo& testInfo) override { if (auto testResult = testInfo.result()) { if (testResult->Failed()) { PluginCache::get().reset(); @@ -27,12 +28,12 @@ class TestListener : public testing::EmptyTestEventListener { }; } // namespace -PluginCache &PluginCache::get() { +PluginCache& PluginCache::get() { static PluginCache instance; return instance; } -std::shared_ptr PluginCache::ie(const std::string &deviceToCheck) { +std::shared_ptr PluginCache::ie(const std::string& deviceToCheck) { std::lock_guard lock(g_mtx); if (std::getenv("DISABLE_PLUGIN_CACHE") != nullptr) { #ifndef NDEBUG @@ -57,8 +58,11 @@ std::shared_ptr PluginCache::ie(const std::string &device try { std::string pluginName = "openvino_template_plugin"; pluginName += IE_BUILD_POSTFIX; - ie_core->RegisterPlugin(ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), pluginName), "TEMPLATE"); - } catch (...) {} + ie_core->RegisterPlugin( + ov::util::make_plugin_library_name(ov::test::utils::getExecutableDirectory(), pluginName), + "TEMPLATE"); + } catch (...) { + } if (!deviceToCheck.empty()) { std::vector metrics; @@ -69,7 +73,8 @@ std::shared_ptr PluginCache::ie(const std::string &device metrics = {ie_core->GetMetric(deviceToCheck, METRIC_KEY(SUPPORTED_METRICS)).as()}; } if (std::find(metrics.begin(), metrics.end(), METRIC_KEY(AVAILABLE_DEVICES)) != metrics.end()) { - auto availableDevices = ie_core->GetMetric(deviceToCheck, METRIC_KEY(AVAILABLE_DEVICES)).as>(); + auto availableDevices = + ie_core->GetMetric(deviceToCheck, METRIC_KEY(AVAILABLE_DEVICES)).as>(); if (availableDevices.empty()) { std::cerr << "No available devices for " << deviceToCheck << std::endl; @@ -79,7 +84,7 @@ std::shared_ptr PluginCache::ie(const std::string &device #ifndef NDEBUG std::cout << "Available devices for " << deviceToCheck << ":" << std::endl; - for (const auto &device : availableDevices) { + for (const auto& device : availableDevices) { std::cout << " " << device << std::endl; } #endif @@ -99,6 +104,6 @@ void PluginCache::reset() { } PluginCache::PluginCache() { - auto &listeners = testing::UnitTest::GetInstance()->listeners(); + auto& listeners = testing::UnitTest::GetInstance()->listeners(); listeners.Append(new TestListener); } diff --git a/src/tests/test_utils/functional_test_utils/src/precomp.hpp b/src/tests/test_utils/functional_test_utils/src/precomp.hpp index ba3cb93434004e..0578805a4b9230 100644 --- a/src/tests/test_utils/functional_test_utils/src/precomp.hpp +++ b/src/tests/test_utils/functional_test_utils/src/precomp.hpp @@ -6,11 +6,12 @@ #include -#include -#include -#include - #include +#include +#include +#include +#include +#include #include #include #include @@ -28,8 +29,6 @@ #include #include -#include -#include -#include -#include -#include +#include "openvino/core/type/float16.hpp" +#include "openvino/op/ops.hpp" +#include "openvino/openvino.hpp" diff --git a/src/tests/test_utils/functional_test_utils/src/skip_tests_config.cpp b/src/tests/test_utils/functional_test_utils/src/skip_tests_config.cpp index b14d4ba5585b2e..cf73d27f1408c7 100644 --- a/src/tests/test_utils/functional_test_utils/src/skip_tests_config.cpp +++ b/src/tests/test_utils/functional_test_utils/src/skip_tests_config.cpp @@ -2,25 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "functional_test_utils/skip_tests_config.hpp" + #include +#include #include "common_test_utils/file_utils.hpp" -#include "functional_test_utils/skip_tests_config.hpp" -namespace FuncTestUtils { -namespace SkipTestsConfig { +namespace ov { +namespace test { +namespace utils { bool disable_tests_skipping = false; -bool currentTestIsDisabled() { +bool current_test_is_disabled() { if (disable_tests_skipping) return false; - const auto fullName = ::testing::UnitTest::GetInstance()->current_test_info()->test_case_name() - + std::string(".") + ::testing::UnitTest::GetInstance()->current_test_info()->name(); + const auto fullName = ::testing::UnitTest::GetInstance()->current_test_info()->test_case_name() + std::string(".") + + ::testing::UnitTest::GetInstance()->current_test_info()->name(); - for (const auto &pattern : disabledTestPatterns()) { + for (const auto& pattern : disabledTestPatterns()) { std::regex re(pattern); if (std::regex_match(fullName, re)) return true; @@ -28,5 +30,7 @@ bool currentTestIsDisabled() { return false; } -} // namespace SkipTestsConfig -} // namespace FuncTestUtils + +} // namespace utils +} // namespace test +} // namespace ov diff --git a/src/tests/test_utils/functional_test_utils/src/summary/api_summary.cpp b/src/tests/test_utils/functional_test_utils/src/summary/api_summary.cpp index 99b2dd7a101832..6a7978ce4d4e26 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/api_summary.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/api_summary.cpp @@ -2,34 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "functional_test_utils/summary/api_summary.hpp" + #include -#include "functional_test_utils/summary/api_summary.hpp" #include "common_test_utils/file_utils.hpp" using namespace ov::test::utils; #ifdef _WIN32 -# define getpid _getpid +# define getpid _getpid #endif -ApiSummary *ApiSummary::p_instance = nullptr; +ApiSummary* ApiSummary::p_instance = nullptr; ApiSummaryDestroyer ApiSummary::destroyer; const std::map ApiSummary::apiInfo({ - { ov_entity::ov_infer_request, "ov_infer_request"}, - { ov_entity::ov_plugin, "ov_plugin"}, - { ov_entity::ov_compiled_model, "ov_compiled_model"}, - { ov_entity::ie_infer_request, "ie_infer_request"}, - { ov_entity::ie_plugin, "ie_plugin"}, - { ov_entity::ie_executable_network, "ie_executable_network"}, - { ov_entity::undefined, "undefined"}, + {ov_entity::ov_infer_request, "ov_infer_request"}, + {ov_entity::ov_plugin, "ov_plugin"}, + {ov_entity::ov_compiled_model, "ov_compiled_model"}, + {ov_entity::ie_infer_request, "ie_infer_request"}, + {ov_entity::ie_plugin, "ie_plugin"}, + {ov_entity::ie_executable_network, "ie_executable_network"}, + {ov_entity::undefined, "undefined"}, }); ApiSummaryDestroyer::~ApiSummaryDestroyer() { delete p_instance; } -void ApiSummaryDestroyer::initialize(ApiSummary *p) { +void ApiSummaryDestroyer::initialize(ApiSummary* p) { p_instance = p; } @@ -39,7 +40,7 @@ ApiSummary::ApiSummary() : apiStats() { isHangReported = false; } -ApiSummary &ApiSummary::getInstance() { +ApiSummary& ApiSummary::getInstance() { if (!p_instance) { p_instance = new ApiSummary(); destroyer.initialize(p_instance); @@ -47,9 +48,13 @@ ApiSummary &ApiSummary::getInstance() { return *p_instance; } -void ApiSummary::updateStat(ov_entity entity, const std::string& target_device, PassRate::Statuses status, double rel_influence_coef) { +void ApiSummary::updateStat(ov_entity entity, + const std::string& target_device, + PassRate::Statuses status, + double rel_influence_coef) { if (apiStats.empty()) { - std::string outputFilePath = outputFolder + std::string(ov::test::utils::FileSeparator) + reportFilename + ov::test::utils::REPORT_EXTENSION; + std::string outputFilePath = outputFolder + std::string(ov::test::utils::FileSeparator) + reportFilename + + ov::test::utils::REPORT_EXTENSION; const bool fileExists = ov::test::utils::fileExists(outputFilePath); if (extendReport && !isReported && fileExists) { getStatisticFromReport(outputFilePath); @@ -77,31 +82,31 @@ void ApiSummary::updateStat(ov_entity entity, const std::string& target_device, return; } switch (status) { - case PassRate::Statuses::SKIPPED: { - cur_stat[real_device].skipped++; - break; - } - case PassRate::Statuses::PASSED: { - if (!cur_stat[real_device].isImplemented) { - cur_stat[real_device].isImplemented = true; - } - cur_stat[real_device].passed++; - cur_stat[real_device].rel_passed += rel_influence_coef; - break; - } - case PassRate::Statuses::HANGED: { - cur_stat[real_device].hanged++; - isHangReported = true; - break; - } - case PassRate::Statuses::FAILED: { - cur_stat[real_device].failed++; - break; + case PassRate::Statuses::SKIPPED: { + cur_stat[real_device].skipped++; + break; + } + case PassRate::Statuses::PASSED: { + if (!cur_stat[real_device].isImplemented) { + cur_stat[real_device].isImplemented = true; } - case PassRate::Statuses::CRASHED: - cur_stat[real_device].crashed++; - isCrashReported = true; - break; + cur_stat[real_device].passed++; + cur_stat[real_device].rel_passed += rel_influence_coef; + break; + } + case PassRate::Statuses::HANGED: { + cur_stat[real_device].hanged++; + isHangReported = true; + break; + } + case PassRate::Statuses::FAILED: { + cur_stat[real_device].failed++; + break; + } + case PassRate::Statuses::CRASHED: + cur_stat[real_device].crashed++; + isCrashReported = true; + break; } } @@ -122,7 +127,7 @@ void ApiSummary::getStatisticFromReport(const std::string& filePath) { pugi::xml_node resultsNode = root.child("results"); pugi::xml_node currentDeviceNode = resultsNode.child(deviceName.c_str()); - for (auto &entityNode : currentDeviceNode.children()) { + for (auto& entityNode : currentDeviceNode.children()) { std::string entityName = entityNode.name(); ov_entity entity = getOvEntityByName(entityName); for (const auto& realDeviceNode : entityNode.children()) { @@ -157,7 +162,7 @@ void ApiSummary::saveReport() { std::string outputFilePath = outputFolder + std::string(ov::test::utils::FileSeparator) + filename; - auto &summary = ApiSummary::getInstance(); + auto& summary = ApiSummary::getInstance(); auto stats = summary.getApiStats(); pugi::xml_document doc; @@ -165,12 +170,12 @@ void ApiSummary::saveReport() { const bool fileExists = ov::test::utils::fileExists(outputFilePath); time_t rawtime; - struct tm *timeinfo; + struct tm* timeinfo; char timeNow[80]; time(&rawtime); // cpplint require to use localtime_r instead which is not available in C++11 - timeinfo = localtime(&rawtime); // NOLINT + timeinfo = localtime(&rawtime); // NOLINT strftime(timeNow, sizeof(timeNow), "%d-%m-%Y %H:%M:%S", timeinfo); @@ -178,7 +183,7 @@ void ApiSummary::saveReport() { if (fileExists) { doc.load_file(outputFilePath.c_str()); root = doc.child("report"); - //Ugly but shorter than to write predicate for find_atrribute() to update existing one + // Ugly but shorter than to write predicate for find_atrribute() to update existing one root.remove_attribute("timestamp"); root.append_attribute("timestamp").set_value(timeNow); @@ -191,16 +196,16 @@ void ApiSummary::saveReport() { } pugi::xml_node opsNode = root.append_child("api_list"); - for (const auto &api : apiInfo) { + for (const auto& api : apiInfo) { std::string name = api.second; pugi::xml_node entry = opsNode.append_child(name.c_str()); - (void) entry; + (void)entry; } pugi::xml_node resultsNode = root.child("results"); pugi::xml_node currentDeviceNode = resultsNode.append_child(summary.deviceName.c_str()); std::unordered_set opList; - for (const auto &stat_entity : stats) { + for (const auto& stat_entity : stats) { pugi::xml_node currentEntity = currentDeviceNode.append_child(apiInfo.at(stat_entity.first).c_str()); for (const auto& stat_device : stat_entity.second) { pugi::xml_node entry = currentEntity.append_child(stat_device.first.c_str()); @@ -211,8 +216,10 @@ void ApiSummary::saveReport() { entry.append_attribute("crashed").set_value(static_cast(stat_device.second.crashed)); entry.append_attribute("hanged").set_value(static_cast(stat_device.second.hanged)); entry.append_attribute("passrate").set_value(stat_device.second.getPassrate()); - entry.append_attribute("relative_passed").set_value(static_cast(stat_device.second.rel_passed)); - entry.append_attribute("relative_all").set_value(static_cast(stat_device.second.rel_all)); + entry.append_attribute("relative_passed") + .set_value(static_cast(stat_device.second.rel_passed)); + entry.append_attribute("relative_all") + .set_value(static_cast(stat_device.second.rel_all)); entry.append_attribute("relative_passrate").set_value(stat_device.second.getRelPassrate()); } } @@ -230,4 +237,3 @@ void ApiSummary::saveReport() { isReported = true; } } - diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp index cd302e15b9a86f..0082cba61a343f 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp @@ -15,7 +15,7 @@ std::string get_node_version(const std::shared_ptr& node, const std::s std::string opset_name = "opset"; auto pos = opset_version.find(opset_name); if (pos != std::string::npos) { - op_name += "-" + opset_version.substr(pos + opset_name.size()); + op_name += "-" + opset_version.substr(pos + opset_name.size()); } if (!postfix.empty()) { op_name += "_" + postfix; @@ -33,9 +33,10 @@ ModelInfo::ModelInfo(size_t _op_cnt, const std::map& _model : unique_op_cnt(_op_cnt), model_paths(_model_paths) {} - -PortInfo::PortInfo(double min, double max, bool convert_to_const) : min(min), max(max), - convert_to_const(convert_to_const) {} +PortInfo::PortInfo(double min, double max, bool convert_to_const) + : min(min), + max(max), + convert_to_const(convert_to_const) {} PortInfo::PortInfo() { min = std::numeric_limits::min(); @@ -48,4 +49,4 @@ OPInfo::OPInfo(const std::string& source_model, const std::string& model_path, s ports_info = {}; } -} // namespace LayerTestsUtils +} // namespace LayerTestsUtils diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp index 85e5ef3f544ad0..cd7e502ed9c90b 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp @@ -2,21 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "functional_test_utils/summary/op_summary.hpp" +#include #include - -#include "functional_test_utils/summary/op_summary.hpp" #include "common_test_utils/file_utils.hpp" using namespace ov::test::utils; #ifdef _WIN32 -# define getpid _getpid +# define getpid _getpid #endif -OpSummary *OpSummary::p_instance = nullptr; +OpSummary* OpSummary::p_instance = nullptr; bool OpSummary::extractBody = false; OpSummaryDestroyer OpSummary::destroyer; @@ -24,7 +23,7 @@ OpSummaryDestroyer::~OpSummaryDestroyer() { delete p_instance; } -void OpSummaryDestroyer::initialize(OpSummary *p) { +void OpSummaryDestroyer::initialize(OpSummary* p) { p_instance = p; } @@ -32,7 +31,7 @@ OpSummary::OpSummary() { reportFilename = ov::test::utils::OP_REPORT_FILENAME; } -OpSummary &OpSummary::getInstance() { +OpSummary& OpSummary::getInstance() { if (!p_instance) { p_instance = new OpSummary(); destroyer.initialize(p_instance); @@ -40,12 +39,14 @@ OpSummary &OpSummary::getInstance() { return *p_instance; } -void OpSummary::updateOPsStats(const ov::NodeTypeInfo &op, const PassRate::Statuses &status, double rel_influence_coef) { +void OpSummary::updateOPsStats(const ov::NodeTypeInfo& op, + const PassRate::Statuses& status, + double rel_influence_coef) { auto it = opsStats.find(op); if (opsStats.find(op) == opsStats.end()) { opsStats.insert({op, PassRate()}); } - auto &passrate = opsStats[op]; + auto& passrate = opsStats[op]; if (isCrashReported) { isCrashReported = false; if (passrate.crashed > 0) @@ -58,33 +59,33 @@ void OpSummary::updateOPsStats(const ov::NodeTypeInfo &op, const PassRate::Statu return; } switch (status) { - case PassRate::PASSED: - if (!passrate.isImplemented) { - passrate.isImplemented = true; - } - passrate.passed++; - passrate.rel_passed += rel_influence_coef; - break; - case PassRate::FAILED: - passrate.failed++; - break; - case PassRate::SKIPPED: - passrate.skipped++; - break; - case PassRate::CRASHED: { - passrate.crashed++; - isCrashReported = true; - break; - } - case PassRate::HANGED: { - passrate.hanged++; - isHangReported = true; - break; + case PassRate::PASSED: + if (!passrate.isImplemented) { + passrate.isImplemented = true; } + passrate.passed++; + passrate.rel_passed += rel_influence_coef; + break; + case PassRate::FAILED: + passrate.failed++; + break; + case PassRate::SKIPPED: + passrate.skipped++; + break; + case PassRate::CRASHED: { + passrate.crashed++; + isCrashReported = true; + break; + } + case PassRate::HANGED: { + passrate.hanged++; + isHangReported = true; + break; + } } } -void OpSummary::updateOPsImplStatus(const ov::NodeTypeInfo &op, const bool implStatus) { +void OpSummary::updateOPsImplStatus(const ov::NodeTypeInfo& op, const bool implStatus) { auto it = opsStats.find(op); if (it != opsStats.end()) { if (!it->second.isImplemented && implStatus) { @@ -119,7 +120,7 @@ std::map OpSummary::getStatisticFromReport() { pugi::xml_node resultsNode = root.child("results"); pugi::xml_node currentDeviceNode = resultsNode.child(deviceName.c_str()); std::map oldOpsStat; - for (auto &child : currentDeviceNode.children()) { + for (auto& child : currentDeviceNode.children()) { std::string entry = child.name(); auto p = std::stoi(child.attribute("passed").value()); auto f = std::stoi(child.attribute("failed").value()); @@ -134,12 +135,12 @@ std::map OpSummary::getStatisticFromReport() { return oldOpsStat; } -void OpSummary::updateOPsStats(const std::shared_ptr &model, const PassRate::Statuses &status, double k) { +void OpSummary::updateOPsStats(const std::shared_ptr& model, const PassRate::Statuses& status, double k) { if (model->get_parameters().empty()) { return; } bool isFunctionalGraph = false, isReportConvert = true; - for (const auto &op : model->get_ordered_ops()) { + for (const auto& op : model->get_ordered_ops()) { if (!std::dynamic_pointer_cast(op) && !std::dynamic_pointer_cast(op) && !std::dynamic_pointer_cast(op)) { @@ -154,10 +155,11 @@ void OpSummary::updateOPsStats(const std::shared_ptr &model, const Pa } } - for (const auto &op : model->get_ordered_ops()) { + for (const auto& op : model->get_ordered_ops()) { if ((std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) && isFunctionalGraph) { + std::dynamic_pointer_cast(op)) && + isFunctionalGraph) { continue; } // todo: remove w/a to provide correct convert reporting after merge CVS-110714 @@ -193,12 +195,12 @@ void OpSummary::updateOPsStats(const std::shared_ptr &model, const Pa } } -void OpSummary::updateOPsImplStatus(const std::shared_ptr &model, const bool implStatus) { +void OpSummary::updateOPsImplStatus(const std::shared_ptr& model, const bool implStatus) { if (model->get_parameters().empty()) { return; } bool isFunctionalGraph = false; - for (const auto &op : model->get_ordered_ops()) { + for (const auto& op : model->get_ordered_ops()) { if (!std::dynamic_pointer_cast(op) && !std::dynamic_pointer_cast(op) && !std::dynamic_pointer_cast(op)) { @@ -207,10 +209,11 @@ void OpSummary::updateOPsImplStatus(const std::shared_ptr &model, con } } - for (const auto &op : model->get_ordered_ops()) { + for (const auto& op : model->get_ordered_ops()) { if ((std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) && isFunctionalGraph) { + std::dynamic_pointer_cast(op)) && + isFunctionalGraph) { continue; } else if (std::dynamic_pointer_cast(op)) { updateOPsImplStatus(op->get_type_info(), implStatus); @@ -229,15 +232,21 @@ void OpSummary::updateOPsImplStatus(const std::shared_ptr &model, con } #ifdef IE_TEST_DEBUG -void Summary::saveDebugReport(const char* className, const char* opName, unsigned long passed, unsigned long failed, - unsigned long skipped, unsigned long crashed, unsigned long hanged) { +void Summary::saveDebugReport(const char* className, + const char* opName, + unsigned long passed, + unsigned long failed, + unsigned long skipped, + unsigned long crashed, + unsigned long hanged) { std::string outputFilePath = "./part_report.txt"; std::ofstream file; file.open(outputFilePath, std::ios_base::app); - file << className << ' ' << opName << ' ' << passed << ' ' << failed << ' ' << skipped << ' ' << crashed << ' ' << hanged << '\n'; + file << className << ' ' << opName << ' ' << passed << ' ' << failed << ' ' << skipped << ' ' << crashed << ' ' + << hanged << '\n'; file.close(); } -#endif //IE_TEST_DEBUG +#endif // IE_TEST_DEBUG void OpSummary::saveReport() { if (isReported) { @@ -262,10 +271,10 @@ void OpSummary::saveReport() { std::string outputFilePath = outputFolder + std::string(ov::test::utils::FileSeparator) + filename; std::map opsInfo; - for (const auto &opset_pair : get_available_opsets()) { + for (const auto& opset_pair : get_available_opsets()) { std::string opset_version = opset_pair.first; const ov::OpSet& opset = opset_pair.second(); - const auto &type_info_set = opset.get_type_info_set(); + const auto& type_info_set = opset.get_type_info_set(); for (const auto& type_info : type_info_set) { auto it = opsInfo.find(type_info); std::string op_version = getOpVersion(opset_version); @@ -278,7 +287,7 @@ void OpSummary::saveReport() { } } - auto &summary = OpSummary::getInstance(); + auto& summary = OpSummary::getInstance(); auto stats = summary.getOPsStats(); pugi::xml_document doc; @@ -286,12 +295,12 @@ void OpSummary::saveReport() { const bool fileExists = ov::test::utils::fileExists(outputFilePath); time_t rawtime; - struct tm *timeinfo; + struct tm* timeinfo; char timeNow[80]; time(&rawtime); // cpplint require to use localtime_r instead which is not available in C++11 - timeinfo = localtime(&rawtime); // NOLINT + timeinfo = localtime(&rawtime); // NOLINT strftime(timeNow, sizeof(timeNow), "%d-%m-%Y %H:%M:%S", timeinfo); @@ -299,7 +308,7 @@ void OpSummary::saveReport() { if (fileExists) { doc.load_file(outputFilePath.c_str()); root = doc.child("report"); - //Ugly but shorter than to write predicate for find_atrribute() to update existing one + // Ugly but shorter than to write predicate for find_atrribute() to update existing one root.remove_attribute("timestamp"); root.append_attribute("timestamp").set_value(timeNow); @@ -312,7 +321,7 @@ void OpSummary::saveReport() { } pugi::xml_node opsNode = root.append_child("ops_list"); - for (const auto &op : opsInfo) { + for (const auto& op : opsInfo) { std::string name = std::string(op.first.name) + "-" + getOpVersion(op.first.version_id); opsNode.append_child(name.c_str()).append_attribute("opsets").set_value(op.second.c_str()); } @@ -320,7 +329,7 @@ void OpSummary::saveReport() { pugi::xml_node resultsNode = root.child("results"); pugi::xml_node currentDeviceNode = resultsNode.append_child(summary.deviceName.c_str()); std::unordered_set opList; - for (const auto &it : stats) { + for (const auto& it : stats) { std::string name = std::string(it.first.name) + "-" + getOpVersion(it.first.version_id); opList.insert(name); pugi::xml_node entry = currentDeviceNode.append_child(name.c_str()); @@ -338,7 +347,7 @@ void OpSummary::saveReport() { if (extendReport && fileExists) { auto opStataFromReport = summary.getStatisticFromReport(); - for (auto &item : opStataFromReport) { + for (auto& item : opStataFromReport) { pugi::xml_node entry; if (opList.find(item.first) == opList.end()) { entry = currentDeviceNode.append_child(item.first.c_str()); @@ -364,9 +373,8 @@ void OpSummary::saveReport() { auto rel_all = std::stoi(entry.attribute("relative_all").value()) + item.second.rel_all; PassRate obj(p, f, s, c, h, rel_passed, rel_all); - (implStatus || obj.isImplemented) - ? entry.attribute("implemented").set_value(true) - : entry.attribute("implemented").set_value(false); + (implStatus || obj.isImplemented) ? entry.attribute("implemented").set_value(true) + : entry.attribute("implemented").set_value(false); entry.attribute("passed").set_value(static_cast(obj.passed)); entry.attribute("failed").set_value(static_cast(obj.failed)); entry.attribute("skipped").set_value(static_cast(obj.skipped)); diff --git a/src/tests/test_utils/functional_test_utils/src/summary/summary.cpp b/src/tests/test_utils/functional_test_utils/src/summary/summary.cpp index c2d39e35c5b9b2..7f42ecaff57285 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/summary.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/summary.cpp @@ -8,7 +8,13 @@ namespace ov { namespace test { namespace utils { -PassRate::PassRate(unsigned long p, unsigned long f, unsigned long s, unsigned long c, unsigned long h, double rel_p, double rel_a) { +PassRate::PassRate(unsigned long p, + unsigned long f, + unsigned long s, + unsigned long c, + unsigned long h, + double rel_p, + double rel_a) { passed = p; failed = f; skipped = s; @@ -60,17 +66,30 @@ void Summary::setReportFilename(const std::string& val) { reportFilename = val.c_str(); } -void Summary::setExtendReport(bool val) { extendReport = val; } -bool Summary::getExtendReport() { return extendReport; } - -void Summary::setSaveReportWithUniqueName(bool val) { saveReportWithUniqueName = val; } -bool Summary::getSaveReportWithUniqueName() { return saveReportWithUniqueName; } +void Summary::setExtendReport(bool val) { + extendReport = val; +} +bool Summary::getExtendReport() { + return extendReport; +} -void Summary::setSaveReportTimeout(size_t val) { saveReportTimeout = val; } -size_t Summary::getSaveReportTimeout() { return saveReportTimeout; } +void Summary::setSaveReportWithUniqueName(bool val) { + saveReportWithUniqueName = val; +} +bool Summary::getSaveReportWithUniqueName() { + return saveReportWithUniqueName; +} -void Summary::setOutputFolder(const std::string &val) { outputFolder = val.c_str(); } +void Summary::setSaveReportTimeout(size_t val) { + saveReportTimeout = val; +} +size_t Summary::getSaveReportTimeout() { + return saveReportTimeout; +} +void Summary::setOutputFolder(const std::string& val) { + outputFolder = val.c_str(); +} } // namespace utils } // namespace test diff --git a/src/tests/test_utils/functional_test_utils/src/test_model/test_model.cpp b/src/tests/test_utils/functional_test_utils/src/test_model/test_model.cpp index 099f90ff9f81a4..7a29f37f6b3120 100644 --- a/src/tests/test_utils/functional_test_utils/src/test_model/test_model.cpp +++ b/src/tests/test_utils/functional_test_utils/src/test_model/test_model.cpp @@ -2,39 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include -#include - #include "functional_test_utils/test_model/test_model.hpp" -#include "functional_test_utils/precision_utils.hpp" -#include -#include + +#include "ngraph_functions/subgraph_builders.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/pass/manager.hpp" #include "openvino/pass/serialize.hpp" -#include "ie_ngraph_utils.hpp" -namespace FuncTestUtils { -namespace TestModel { +namespace ov { +namespace test { +namespace utils { -/** - * @brief generates IR files (XML and BIN files) with the test model. - * Passed reference vector is filled with CNN layers to validate after the network reading. - * @param modelPath used to serialize the generated network - * @param weightsPath used to serialize the generated weights - * @param netPrc precision of the generated network - * @param inputDims dims on the input layer of the generated network - */ -void generateTestModel(const std::string &modelPath, - const std::string &weightsPath, - const InferenceEngine::Precision &netPrc, - const InferenceEngine::SizeVector &inputDims) { - ngraph::pass::Manager manager; - manager.register_pass(modelPath, weightsPath); - manager.run_passes(ngraph::builder::subgraph::makeConvPoolRelu( - inputDims, InferenceEngine::details::convertPrecision(netPrc))); +void generate_test_model(const std::string& model_path, + const std::string& weights_path, + const ov::element::Type& input_type, + const ov::PartialShape& input_shape) { + ov::pass::Manager manager; + manager.register_pass(model_path, weights_path); + manager.run_passes(ngraph::builder::subgraph::makeConvPoolRelu(input_shape.to_shape(), input_type)); } -} // namespace TestModel -} // namespace FuncTestUtils +} // namespace utils +} // namespace test +} // namespace ov From 6a1d680f9072dda369247a750f476b5b638b402e Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 12 Sep 2023 07:34:45 +0400 Subject: [PATCH 16/31] Partially fixed github issue 18274 (#19758) --- src/frontends/common/src/manager.cpp | 29 +- src/plugins/intel_cpu/src/cpu_types.cpp | 398 ++++++++++++------------ 2 files changed, 220 insertions(+), 207 deletions(-) diff --git a/src/frontends/common/src/manager.cpp b/src/frontends/common/src/manager.cpp index fa917dadb1caad..35df484c2cab26 100644 --- a/src/frontends/common/src/manager.cpp +++ b/src/frontends/common/src/manager.cpp @@ -20,10 +20,20 @@ class FrontEndManager::Impl { std::mutex m_loading_mutex; std::vector m_plugins; - /// \brief map of shared object per frontend - static std::unordered_map> m_shared_objects_map; - /// \brief Mutex to guard access the shared object map - static std::mutex m_shared_objects_map_mutex; + // Note, static methods below are required to create an order of initialization of static variables + // e.g. if users (not encouraged) created ov::Model globally, we need to ensure proper order of initialization + + /// \return map of shared object per frontend + static std::unordered_map>& get_shared_objects_map() { + static std::unordered_map> shared_objects_map; + return shared_objects_map; + } + + /// \return Mutex to guard access the shared object map + static std::mutex& get_shared_objects_mutex() { + static std::mutex shared_objects_map_mutex; + return shared_objects_map_mutex; + } public: Impl() { @@ -37,8 +47,8 @@ class FrontEndManager::Impl { fe_obj->m_shared_object = std::make_shared(plugin.get_so_pointer()); fe_obj->m_actual = plugin.get_creator().m_creator(); - std::lock_guard guard(m_shared_objects_map_mutex); - m_shared_objects_map.emplace(plugin.get_creator().m_name, fe_obj->m_shared_object); + std::lock_guard guard(get_shared_objects_mutex()); + get_shared_objects_map().emplace(plugin.get_creator().m_name, fe_obj->m_shared_object); return fe_obj; } @@ -128,8 +138,8 @@ class FrontEndManager::Impl { } static void shutdown() { - std::lock_guard guard(m_shared_objects_map_mutex); - m_shared_objects_map.clear(); + std::lock_guard guard(get_shared_objects_mutex()); + get_shared_objects_map().clear(); } private: @@ -224,9 +234,6 @@ class FrontEndManager::Impl { } }; -std::unordered_map> FrontEndManager::Impl::m_shared_objects_map{}; -std::mutex FrontEndManager::Impl::m_shared_objects_map_mutex{}; - FrontEndManager::FrontEndManager() : m_impl(new Impl()) {} FrontEndManager::FrontEndManager(FrontEndManager&&) noexcept = default; diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 14528718b3c24b..03fbe1a9923b7a 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -9,204 +9,210 @@ namespace ov { namespace intel_cpu { -const InferenceEngine::details::caseless_unordered_map type_to_name_tbl = { - { "Constant", Type::Input }, - { "Parameter", Type::Input }, - { "Result", Type::Output }, - { "Eye", Type::Eye }, - { "Convolution", Type::Convolution }, - { "GroupConvolution", Type::Convolution }, - { "MatMul", Type::MatMul }, - { "FullyConnected", Type::FullyConnected }, - { "MaxPool", Type::Pooling }, - { "AvgPool", Type::Pooling }, - { "AdaptiveMaxPool", Type::AdaptivePooling}, - { "AdaptiveAvgPool", Type::AdaptivePooling}, - { "Add", Type::Eltwise }, - { "IsFinite", Type::Eltwise }, - { "IsInf", Type::Eltwise }, - { "IsNaN", Type::Eltwise }, - { "Subtract", Type::Eltwise }, - { "Multiply", Type::Eltwise }, - { "Divide", Type::Eltwise }, - { "SquaredDifference", Type::Eltwise }, - { "Maximum", Type::Eltwise }, - { "Minimum", Type::Eltwise }, - { "Mod", Type::Eltwise }, - { "FloorMod", Type::Eltwise }, - { "Power", Type::Eltwise }, - { "PowerStatic", Type::Eltwise }, - { "Equal", Type::Eltwise }, - { "NotEqual", Type::Eltwise }, - { "Greater", Type::Eltwise }, - { "GreaterEqual", Type::Eltwise }, - { "Less", Type::Eltwise }, - { "LessEqual", Type::Eltwise }, - { "LogicalAnd", Type::Eltwise }, - { "LogicalOr", Type::Eltwise }, - { "LogicalXor", Type::Eltwise }, - { "LogicalNot", Type::Eltwise }, - { "Relu", Type::Eltwise }, - { "LeakyRelu", Type::Eltwise }, - { "Gelu", Type::Eltwise }, - { "Elu", Type::Eltwise }, - { "Tanh", Type::Eltwise }, - { "Sigmoid", Type::Eltwise }, - { "Abs", Type::Eltwise }, - { "Sqrt", Type::Eltwise }, - { "Clamp", Type::Eltwise }, - { "Exp", Type::Eltwise }, - { "SwishCPU", Type::Eltwise }, - { "HSwish", Type::Eltwise }, - { "Mish", Type::Eltwise }, - { "HSigmoid", Type::Eltwise }, - { "Round", Type::Eltwise }, - { "PRelu", Type::Eltwise }, - { "Erf", Type::Eltwise }, - { "SoftPlus", Type::Eltwise }, - { "SoftSign", Type::Eltwise }, - { "Select", Type::Eltwise}, - { "Log", Type::Eltwise }, - { "Reshape", Type::Reshape }, - { "Squeeze", Type::Reshape }, - { "Unsqueeze", Type::Reshape }, - { "ShapeOf", Type::ShapeOf }, - { "NonZero", Type::NonZero }, - { "Softmax", Type::Softmax }, - { "Reorder", Type::Reorder }, - { "BatchToSpace", Type::BatchToSpace }, - { "SpaceToBatch", Type::SpaceToBatch }, - { "DepthToSpace", Type::DepthToSpace }, - { "SpaceToDepth", Type::SpaceToDepth }, - { "Roll", Type::Roll }, - { "LRN", Type::Lrn }, - { "Split", Type::Split }, - { "VariadicSplit", Type::Split }, - { "Concat", Type::Concatenation }, - { "ConvolutionBackpropData", Type::Deconvolution }, - { "GroupConvolutionBackpropData", Type::Deconvolution }, - { "StridedSlice", Type::StridedSlice }, - { "Slice", Type::StridedSlice }, - { "Tile", Type::Tile }, - { "ROIAlign", Type::ROIAlign }, - { "ROIPooling", Type::ROIPooling }, - { "PSROIPooling", Type::PSROIPooling }, - { "DeformablePSROIPooling", Type::PSROIPooling }, - { "Pad", Type::Pad }, - { "Transpose", Type::Transpose }, - { "LSTMCell", Type::RNNCell }, - { "GRUCell", Type::RNNCell }, - { "AUGRUCell", Type::RNNCell }, - { "RNNCell", Type::RNNCell }, - { "LSTMSequence", Type::RNNSeq }, - { "GRUSequence", Type::RNNSeq }, - { "AUGRUSequence", Type::RNNSeq }, - { "RNNSequence", Type::RNNSeq }, - { "FakeQuantize", Type::FakeQuantize }, - { "BinaryConvolution", Type::BinaryConvolution }, - { "DeformableConvolution", Type::DeformableConvolution }, - { "TensorIterator", Type::TensorIterator }, - { "Loop", Type::TensorIterator }, - { "ReadValue", Type::MemoryInput}, // for construction from name ctor, arbitrary name is used - { "Assign", Type::MemoryOutput }, // for construction from layer ctor - { "Convert", Type::Convert }, - { "NV12toRGB", Type::ColorConvert }, - { "NV12toBGR", Type::ColorConvert }, - { "I420toRGB", Type::ColorConvert }, - { "I420toBGR", Type::ColorConvert }, - { "MVN", Type::MVN}, - { "NormalizeL2", Type::NormalizeL2}, - { "ScatterUpdate", Type::ScatterUpdate}, - { "ScatterElementsUpdate", Type::ScatterElementsUpdate}, - { "ScatterNDUpdate", Type::ScatterNDUpdate}, - { "Interpolate", Type::Interpolate}, - { "ReduceL1", Type::Reduce}, - { "ReduceL2", Type::Reduce}, - { "ReduceLogicalAnd", Type::Reduce}, - { "ReduceLogicalOr", Type::Reduce}, - { "ReduceMax", Type::Reduce}, - { "ReduceMean", Type::Reduce}, - { "ReduceMin", Type::Reduce}, - { "ReduceProd", Type::Reduce}, - { "ReduceSum", Type::Reduce}, - { "ReduceLogSum", Type::Reduce}, - { "ReduceLogSumExp", Type::Reduce}, - { "ReduceSumSquare", Type::Reduce}, - { "Broadcast", Type::Broadcast}, - { "EmbeddingSegmentsSum", Type::EmbeddingSegmentsSum}, - { "EmbeddingBagPackedSum", Type::EmbeddingBagPackedSum}, - { "EmbeddingBagOffsetsSum", Type::EmbeddingBagOffsetsSum}, - { "Gather", Type::Gather}, - { "GatherElements", Type::GatherElements}, - { "GatherND", Type::GatherND}, - { "GridSample", Type::GridSample}, - { "OneHot", Type::OneHot}, - { "RegionYolo", Type::RegionYolo}, - { "ShuffleChannels", Type::ShuffleChannels}, - { "DFT", Type::DFT}, - { "IDFT", Type::DFT}, - { "RDFT", Type::RDFT}, - { "IRDFT", Type::RDFT}, - { "Abs", Type::Math}, - { "Acos", Type::Math}, - { "Acosh", Type::Math}, - { "Asin", Type::Math}, - { "Asinh", Type::Math}, - { "Atan", Type::Math}, - { "Atanh", Type::Math}, - { "Ceil", Type::Math}, - { "Ceiling", Type::Math}, - { "Cos", Type::Math}, - { "Cosh", Type::Math}, - { "Floor", Type::Math}, - { "HardSigmoid", Type::Math}, - { "If", Type::If}, - { "Neg", Type::Math}, - { "Reciprocal", Type::Math}, - { "Selu", Type::Math}, - { "Sign", Type::Math}, - { "Sin", Type::Math}, - { "Sinh", Type::Math}, - { "SoftPlus", Type::Math}, - { "Softsign", Type::Math}, - { "Tan", Type::Math}, - { "CTCLoss", Type::CTCLoss}, - { "Bucketize", Type::Bucketize}, - { "CTCGreedyDecoder", Type::CTCGreedyDecoder}, - { "CTCGreedyDecoderSeqLen", Type::CTCGreedyDecoderSeqLen}, - { "CumSum", Type::CumSum}, - { "DetectionOutput", Type::DetectionOutput}, - { "ExperimentalDetectronDetectionOutput", Type::ExperimentalDetectronDetectionOutput}, - { "LogSoftmax", Type::LogSoftmax}, - { "TopK", Type::TopK}, - { "GatherTree", Type::GatherTree}, - { "GRN", Type::GRN}, - { "Range", Type::Range}, - { "Proposal", Type::Proposal}, - { "ReorgYolo", Type::ReorgYolo}, - { "ReverseSequence", Type::ReverseSequence}, - { "ExperimentalDetectronTopKROIs", Type::ExperimentalDetectronTopKROIs}, - { "ExperimentalDetectronROIFeatureExtractor", Type::ExperimentalDetectronROIFeatureExtractor}, - { "ExperimentalDetectronPriorGridGenerator", Type::ExperimentalDetectronPriorGridGenerator}, - { "ExperimentalDetectronGenerateProposalsSingleImage", Type::ExperimentalDetectronGenerateProposalsSingleImage}, - { "GenerateProposals", Type::GenerateProposals}, - { "ExtractImagePatches", Type::ExtractImagePatches}, - { "NonMaxSuppression", Type::NonMaxSuppression}, - { "NonMaxSuppressionIEInternal", Type::NonMaxSuppression}, - { "MatrixNms", Type::MatrixNms}, - { "MulticlassNms", Type::MulticlassNms}, - { "MulticlassNmsIEInternal", Type::MulticlassNms}, - { "Reference", Type::Reference}, - { "Subgraph", Type::Subgraph}, - { "PriorBox", Type::PriorBox}, - { "PriorBoxClustered", Type::PriorBoxClustered}, - { "Interaction", Type::Interaction}, - { "MHA", Type::MHA}, - { "Unique", Type::Unique}, - { "Ngram", Type::Ngram} -}; +using TypeToNameMap = InferenceEngine::details::caseless_unordered_map; + +static const TypeToNameMap& get_type_to_name_tbl() { + static const TypeToNameMap type_to_name_tbl = { + { "Constant", Type::Input }, + { "Parameter", Type::Input }, + { "Result", Type::Output }, + { "Eye", Type::Eye }, + { "Convolution", Type::Convolution }, + { "GroupConvolution", Type::Convolution }, + { "MatMul", Type::MatMul }, + { "FullyConnected", Type::FullyConnected }, + { "MaxPool", Type::Pooling }, + { "AvgPool", Type::Pooling }, + { "AdaptiveMaxPool", Type::AdaptivePooling}, + { "AdaptiveAvgPool", Type::AdaptivePooling}, + { "Add", Type::Eltwise }, + { "IsFinite", Type::Eltwise }, + { "IsInf", Type::Eltwise }, + { "IsNaN", Type::Eltwise }, + { "Subtract", Type::Eltwise }, + { "Multiply", Type::Eltwise }, + { "Divide", Type::Eltwise }, + { "SquaredDifference", Type::Eltwise }, + { "Maximum", Type::Eltwise }, + { "Minimum", Type::Eltwise }, + { "Mod", Type::Eltwise }, + { "FloorMod", Type::Eltwise }, + { "Power", Type::Eltwise }, + { "PowerStatic", Type::Eltwise }, + { "Equal", Type::Eltwise }, + { "NotEqual", Type::Eltwise }, + { "Greater", Type::Eltwise }, + { "GreaterEqual", Type::Eltwise }, + { "Less", Type::Eltwise }, + { "LessEqual", Type::Eltwise }, + { "LogicalAnd", Type::Eltwise }, + { "LogicalOr", Type::Eltwise }, + { "LogicalXor", Type::Eltwise }, + { "LogicalNot", Type::Eltwise }, + { "Relu", Type::Eltwise }, + { "LeakyRelu", Type::Eltwise }, + { "Gelu", Type::Eltwise }, + { "Elu", Type::Eltwise }, + { "Tanh", Type::Eltwise }, + { "Sigmoid", Type::Eltwise }, + { "Abs", Type::Eltwise }, + { "Sqrt", Type::Eltwise }, + { "Clamp", Type::Eltwise }, + { "Exp", Type::Eltwise }, + { "SwishCPU", Type::Eltwise }, + { "HSwish", Type::Eltwise }, + { "Mish", Type::Eltwise }, + { "HSigmoid", Type::Eltwise }, + { "Round", Type::Eltwise }, + { "PRelu", Type::Eltwise }, + { "Erf", Type::Eltwise }, + { "SoftPlus", Type::Eltwise }, + { "SoftSign", Type::Eltwise }, + { "Select", Type::Eltwise}, + { "Log", Type::Eltwise }, + { "Reshape", Type::Reshape }, + { "Squeeze", Type::Reshape }, + { "Unsqueeze", Type::Reshape }, + { "ShapeOf", Type::ShapeOf }, + { "NonZero", Type::NonZero }, + { "Softmax", Type::Softmax }, + { "Reorder", Type::Reorder }, + { "BatchToSpace", Type::BatchToSpace }, + { "SpaceToBatch", Type::SpaceToBatch }, + { "DepthToSpace", Type::DepthToSpace }, + { "SpaceToDepth", Type::SpaceToDepth }, + { "Roll", Type::Roll }, + { "LRN", Type::Lrn }, + { "Split", Type::Split }, + { "VariadicSplit", Type::Split }, + { "Concat", Type::Concatenation }, + { "ConvolutionBackpropData", Type::Deconvolution }, + { "GroupConvolutionBackpropData", Type::Deconvolution }, + { "StridedSlice", Type::StridedSlice }, + { "Slice", Type::StridedSlice }, + { "Tile", Type::Tile }, + { "ROIAlign", Type::ROIAlign }, + { "ROIPooling", Type::ROIPooling }, + { "PSROIPooling", Type::PSROIPooling }, + { "DeformablePSROIPooling", Type::PSROIPooling }, + { "Pad", Type::Pad }, + { "Transpose", Type::Transpose }, + { "LSTMCell", Type::RNNCell }, + { "GRUCell", Type::RNNCell }, + { "AUGRUCell", Type::RNNCell }, + { "RNNCell", Type::RNNCell }, + { "LSTMSequence", Type::RNNSeq }, + { "GRUSequence", Type::RNNSeq }, + { "AUGRUSequence", Type::RNNSeq }, + { "RNNSequence", Type::RNNSeq }, + { "FakeQuantize", Type::FakeQuantize }, + { "BinaryConvolution", Type::BinaryConvolution }, + { "DeformableConvolution", Type::DeformableConvolution }, + { "TensorIterator", Type::TensorIterator }, + { "Loop", Type::TensorIterator }, + { "ReadValue", Type::MemoryInput}, // for construction from name ctor, arbitrary name is used + { "Assign", Type::MemoryOutput }, // for construction from layer ctor + { "Convert", Type::Convert }, + { "NV12toRGB", Type::ColorConvert }, + { "NV12toBGR", Type::ColorConvert }, + { "I420toRGB", Type::ColorConvert }, + { "I420toBGR", Type::ColorConvert }, + { "MVN", Type::MVN}, + { "NormalizeL2", Type::NormalizeL2}, + { "ScatterUpdate", Type::ScatterUpdate}, + { "ScatterElementsUpdate", Type::ScatterElementsUpdate}, + { "ScatterNDUpdate", Type::ScatterNDUpdate}, + { "Interpolate", Type::Interpolate}, + { "ReduceL1", Type::Reduce}, + { "ReduceL2", Type::Reduce}, + { "ReduceLogicalAnd", Type::Reduce}, + { "ReduceLogicalOr", Type::Reduce}, + { "ReduceMax", Type::Reduce}, + { "ReduceMean", Type::Reduce}, + { "ReduceMin", Type::Reduce}, + { "ReduceProd", Type::Reduce}, + { "ReduceSum", Type::Reduce}, + { "ReduceLogSum", Type::Reduce}, + { "ReduceLogSumExp", Type::Reduce}, + { "ReduceSumSquare", Type::Reduce}, + { "Broadcast", Type::Broadcast}, + { "EmbeddingSegmentsSum", Type::EmbeddingSegmentsSum}, + { "EmbeddingBagPackedSum", Type::EmbeddingBagPackedSum}, + { "EmbeddingBagOffsetsSum", Type::EmbeddingBagOffsetsSum}, + { "Gather", Type::Gather}, + { "GatherElements", Type::GatherElements}, + { "GatherND", Type::GatherND}, + { "GridSample", Type::GridSample}, + { "OneHot", Type::OneHot}, + { "RegionYolo", Type::RegionYolo}, + { "ShuffleChannels", Type::ShuffleChannels}, + { "DFT", Type::DFT}, + { "IDFT", Type::DFT}, + { "RDFT", Type::RDFT}, + { "IRDFT", Type::RDFT}, + { "Abs", Type::Math}, + { "Acos", Type::Math}, + { "Acosh", Type::Math}, + { "Asin", Type::Math}, + { "Asinh", Type::Math}, + { "Atan", Type::Math}, + { "Atanh", Type::Math}, + { "Ceil", Type::Math}, + { "Ceiling", Type::Math}, + { "Cos", Type::Math}, + { "Cosh", Type::Math}, + { "Floor", Type::Math}, + { "HardSigmoid", Type::Math}, + { "If", Type::If}, + { "Neg", Type::Math}, + { "Reciprocal", Type::Math}, + { "Selu", Type::Math}, + { "Sign", Type::Math}, + { "Sin", Type::Math}, + { "Sinh", Type::Math}, + { "SoftPlus", Type::Math}, + { "Softsign", Type::Math}, + { "Tan", Type::Math}, + { "CTCLoss", Type::CTCLoss}, + { "Bucketize", Type::Bucketize}, + { "CTCGreedyDecoder", Type::CTCGreedyDecoder}, + { "CTCGreedyDecoderSeqLen", Type::CTCGreedyDecoderSeqLen}, + { "CumSum", Type::CumSum}, + { "DetectionOutput", Type::DetectionOutput}, + { "ExperimentalDetectronDetectionOutput", Type::ExperimentalDetectronDetectionOutput}, + { "LogSoftmax", Type::LogSoftmax}, + { "TopK", Type::TopK}, + { "GatherTree", Type::GatherTree}, + { "GRN", Type::GRN}, + { "Range", Type::Range}, + { "Proposal", Type::Proposal}, + { "ReorgYolo", Type::ReorgYolo}, + { "ReverseSequence", Type::ReverseSequence}, + { "ExperimentalDetectronTopKROIs", Type::ExperimentalDetectronTopKROIs}, + { "ExperimentalDetectronROIFeatureExtractor", Type::ExperimentalDetectronROIFeatureExtractor}, + { "ExperimentalDetectronPriorGridGenerator", Type::ExperimentalDetectronPriorGridGenerator}, + { "ExperimentalDetectronGenerateProposalsSingleImage", Type::ExperimentalDetectronGenerateProposalsSingleImage}, + { "GenerateProposals", Type::GenerateProposals}, + { "ExtractImagePatches", Type::ExtractImagePatches}, + { "NonMaxSuppression", Type::NonMaxSuppression}, + { "NonMaxSuppressionIEInternal", Type::NonMaxSuppression}, + { "MatrixNms", Type::MatrixNms}, + { "MulticlassNms", Type::MulticlassNms}, + { "MulticlassNmsIEInternal", Type::MulticlassNms}, + { "Reference", Type::Reference}, + { "Subgraph", Type::Subgraph}, + { "PriorBox", Type::PriorBox}, + { "PriorBoxClustered", Type::PriorBoxClustered}, + { "Interaction", Type::Interaction}, + { "MHA", Type::MHA}, + { "Unique", Type::Unique}, + { "Ngram", Type::Ngram} + }; + return type_to_name_tbl; +} Type TypeFromName(const std::string& type) { + const TypeToNameMap & type_to_name_tbl = get_type_to_name_tbl(); auto itType = type_to_name_tbl.find(type); if (type_to_name_tbl.end() != itType) { return itType->second; From 016c7dea8aa52585ca585486b8a93bc0f418ab14 Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Tue, 12 Sep 2023 11:23:23 +0530 Subject: [PATCH 17/31] update oneTBB with https://github.com/oneapi-src/oneTBB/releases/tag/v2021.2.3 (#19639) --- cmake/dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index e0f17f3160df51..0f9683377e18e9 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -116,10 +116,10 @@ function(ov_download_tbb) elseif(LINUX AND X86_64 AND OV_GLIBC_VERSION VERSION_GREATER_EQUAL 2.17) # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17) RESOLVE_DEPENDENCY(TBB - ARCHIVE_LIN "oneapi-tbb-2021.2.1-lin-canary.tgz" + ARCHIVE_LIN "oneapi-tbb-2021.2.3-lin.tgz" TARGET_PATH "${TEMP}/tbb" ENVIRONMENT "TBBROOT" - SHA256 "3a2c2ec79b3cce7e6a2484754ba6f029fa968db2eefc6659540792b7db8fea0c" + SHA256 "f3f2edd8e7875b02220f11ab5b201411d5af6822e525e8da5444b4a666514e8b" USE_NEW_LOCATION TRUE) elseif(YOCTO_AARCH64) RESOLVE_DEPENDENCY(TBB From 8e0d8dd36b52f5d1cd8ed94a1c3550694d8f35e4 Mon Sep 17 00:00:00 2001 From: Oleksii Khovan Date: Tue, 12 Sep 2023 08:18:04 +0200 Subject: [PATCH 18/31] [GPU] Pad-12 (#19083) * GPU primitive and kernel changes to support Pad-12 * Exclude Pad-12 from GPU transformations pipeline * add unit tests * add single-layet test for Pad-12 --- .../intel_gpu/plugin/primitives_list.hpp | 3 + .../include/intel_gpu/primitives/border.hpp | 23 +- src/plugins/intel_gpu/src/graph/border.cpp | 36 ++-- .../intel_gpu/src/graph/impls/ocl/border.cpp | 6 +- .../cl_kernels/border_gpu_ref.cl | 116 +++++------ .../kernels/border/border_kernel_base.h | 9 +- src/plugins/intel_gpu/src/plugin/ops/pad.cpp | 17 +- .../src/plugin/transformations_pipeline.cpp | 2 + .../single_layer_tests/pad.cpp | 63 ++++++ .../tests/unit/test_cases/border_gpu_test.cpp | 197 +++++++++++++++++- .../shared/include/single_layer_tests/pad.hpp | 4 + .../shared_test_classes/single_layer/pad.hpp | 19 ++ .../src/single_layer/pad.cpp | 2 +- .../include/ngraph_functions/builders.hpp | 6 +- .../ngraph_functions/src/pad.cpp | 30 ++- 15 files changed, 429 insertions(+), 104 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index c36997481aa747..7dd03dc685e02a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -256,6 +256,9 @@ REGISTER_FACTORY(v10, Unique); REGISTER_FACTORY(v11, Interpolate); REGISTER_FACTORY(v11, TopK); +// ------------------------------ Supported v12 ops ----------------------------- // +REGISTER_FACTORY(v12, Pad); + // --------------------------- Supported internal ops --------------------------- // REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal); REGISTER_FACTORY(internal, GenerateProposalsIEInternal); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/border.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/border.hpp index def9b956fffe21..4d0768c2f13c66 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/border.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/border.hpp @@ -10,17 +10,17 @@ namespace cldnn { /// @brief Adds border around input. /// -/// @details Applies border of specified type around input data. The size of output data is increased +/// @details Applies border of specified type around input data. The size of output data is increased or decreased /// by @c pads_begin and by @c pads_end. /// @n /// @n@b Requirements: -/// @n - @c pads_begin and @c pads_end must be non-negative on all dimensions and compatible +/// @n - @c pads_begin and @c pads_end must be compatible /// with size of input (describe the same dimensions). /// @n - For @c PadMode equal to @c SYMMETRIC, @c pads_begin and @c pads_end /// must be lower than or equal to size of input on corresponding dimension (for all dimensions) /// @n - For @c PadMode equal to @c REFLECT, @c pads_begin and @c pads_end /// must be lower than size of input on corresponding dimension (for all dimensions) -/// @n Breaking any of this conditions will cause exeption throw. +/// @n Breaking any of this conditions will cause exception throw. struct border : public primitive_base { CLDNN_DECLARE_PRIMITIVE(border) @@ -40,12 +40,13 @@ struct border : public primitive_base { /// @param id An identifier of new primitive. /// @param inputs An identifier list of primitives which are not constant input. /// @param non_constant_input_mask Bit mask whether inputs are non-constant or not - /// @param pads_begin Sizes of border that needs to be added from left + /// @param pads_begin Sizes of border that needs to be added (or removed) from left /// (in X dimension) and from top (in Y dimension). - /// @param pads_end Sizes of border that needs to be added from right + /// @param pads_end Sizes of border that needs to be added (or removed) from right /// (in X dimension) and from bottom (in Y dimension). /// @param pad_mode Value of elements which is used for paddings /// @param pad_value Pad's value in case of PadMode::CONSTANT + /// @param allow_negative_pad Allow negative values in pads_begin and pad_end to remove borders /// @param output_padding Optional padding for output from primitive. border(const primitive_id& id, const std::vector& inputs, @@ -54,12 +55,14 @@ struct border : public primitive_base { const ov::CoordinateDiff& pads_end = {}, const ov::op::PadMode pad_mode = ov::op::PadMode::CONSTANT, const float pad_value = 0.0f, + const bool allow_negative_pad = false, const padding& output_padding = padding()) : primitive_base(id, inputs, {output_padding}), pads_begin(pads_begin), pads_end(pads_end), pad_mode(pad_mode), pad_value(pad_value), + allow_negative_pad(allow_negative_pad), non_constant_input_mask(non_constant_input_mask) {} /// @brief Sizes of border that needs to be added from left (in X dimension) and from top (in Y dimension). @@ -69,7 +72,9 @@ struct border : public primitive_base { /// @brief Type of border that needs to be added to the input. ov::op::PadMode pad_mode = ov::op::PadMode::CONSTANT; /// @brief Border value that is used in constant mode. - float pad_value = 0.0f; + float pad_value{0.0}; + /// @brief Allow negative values in pads_begin and pad_end. + bool allow_negative_pad{false}; /// @brief Bit mask whether input is non-constant or not. Position is defined at PAD_NON_CONST_INPUT. int32_t non_constant_input_mask = 0; @@ -79,6 +84,7 @@ struct border : public primitive_base { seed = hash_range(seed, pads_end.begin(), pads_end.end()); seed = hash_combine(seed, pad_mode); seed = hash_combine(seed, pad_value); + seed = hash_combine(seed, allow_negative_pad); seed = hash_combine(seed, non_constant_input_mask); return seed; } @@ -92,7 +98,8 @@ struct border : public primitive_base { return pads_begin == rhs_casted.pads_begin && pads_end == rhs_casted.pads_end && pad_mode == rhs_casted.pad_mode && - pad_value == rhs_casted.pad_value; + pad_value == rhs_casted.pad_value && + allow_negative_pad == rhs_casted.allow_negative_pad; } void save(BinaryOutputBuffer& ob) const override { @@ -102,6 +109,7 @@ struct border : public primitive_base { ob << make_data(&pad_mode, sizeof(ov::op::PadMode)); ob << pad_value; ob << non_constant_input_mask; + ob << allow_negative_pad; } void load(BinaryInputBuffer& ib) override { @@ -111,6 +119,7 @@ struct border : public primitive_base { ib >> make_data(&pad_mode, sizeof(ov::op::PadMode)); ib >> pad_value; ib >> non_constant_input_mask; + ib >> allow_negative_pad; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/border.cpp b/src/plugins/intel_gpu/src/graph/border.cpp index 2e662781b01d4a..97d8bbc30a64db 100644 --- a/src/plugins/intel_gpu/src/graph/border.cpp +++ b/src/plugins/intel_gpu/src/graph/border.cpp @@ -107,6 +107,7 @@ std::string border_inst::to_string(border_node const& node) { border_info.add("pads_end", desc->pads_end); border_info.add("pad mode", desc->pad_mode); border_info.add("pad value", std::to_string(desc->pad_value)); + border_info.add("negative_pad", std::to_string(desc->allow_negative_pad)); node_info->add("border info", border_info); @@ -122,23 +123,24 @@ border_inst::typed_primitive_inst(network& network, border_node const& node) : p } const auto& input_sizes = input_layout.get_dims(); - auto pad_mode = argument->pad_mode; - - // Check if sizes of border are in proper range. - CLDNN_ERROR_BOOL(node.id(), - "pads_begin border sizes", - std::any_of(argument->pads_begin.begin(), argument->pads_begin.end(), - [](std::ptrdiff_t pad) { - return pad < 0; - }), - "Invalid border size: negative value"); - CLDNN_ERROR_BOOL(node.id(), - "pads_end border sizes", - std::any_of(argument->pads_end.begin(), argument->pads_end.end(), - [](std::ptrdiff_t pad) { - return pad < 0; - }), - "Invalid border size: negative value"); + const auto pad_mode = argument->pad_mode; + const bool allow_negative_pad = argument->allow_negative_pad; + + const auto check_negative_pad = [](std::ptrdiff_t pad) { + return pad < 0; + }; + + if (!allow_negative_pad) { + // Check if sizes of border are in proper range. + CLDNN_ERROR_BOOL(node.id(), + "pads_begin border sizes", + std::any_of(argument->pads_begin.begin(), argument->pads_begin.end(), check_negative_pad), + "Invalid border size: negative value"); + CLDNN_ERROR_BOOL(node.id(), + "pads_end border sizes", + std::any_of(argument->pads_end.begin(), argument->pads_end.end(), check_negative_pad), + "Invalid border size: negative value"); + } if (pad_mode == ov::op::PadMode::SYMMETRIC) { bool valid_pads = true; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp index 9265f345e259fe..97096e72720cd6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/border.cpp @@ -46,7 +46,7 @@ struct border_impl : typed_primitive_impl_ocl { begin_vec.insert(begin_vec.end(), zeros_to_add, 0); } std::vector pads_begin(begin_vec.begin(), begin_vec.end()); - params.lt_sizes = convert_dim_vector(tensor(pads_format, pads_begin, 0)); + params.lt_sizes = convert_dim_vector(tensor(pads_format, pads_begin, 0)); } else { params.begin_type = kernel_selector::base_params::ArgType::Input; @@ -65,7 +65,7 @@ struct border_impl : typed_primitive_impl_ocl { end_vec.insert(end_vec.end(), zeros_to_add, 0); } std::vector pads_end(end_vec.begin(), end_vec.end()); - params.rb_sizes = convert_dim_vector(tensor(pads_format, pads_end, 0)); + params.rb_sizes = convert_dim_vector(tensor(pads_format, pads_end, 0)); } else { params.end_type = kernel_selector::base_params::ArgType::Input; @@ -100,6 +100,8 @@ struct border_impl : typed_primitive_impl_ocl { OPENVINO_ASSERT(false, "[GPU] Encountered unhandled enum case: PadMode during translation to kernel selector enumeration."); } + params.allow_negative_pad = primitive->allow_negative_pad; + return {params, optional_params}; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/border_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/border_gpu_ref.cl index 6eccd3e3546118..97298456773ce7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/border_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/border_gpu_ref.cl @@ -19,19 +19,19 @@ KERNEL(border_gpu_ref)( __global OUTPUT_TYPE* output) { #ifdef BEGIN_TYPE - const uint begin_b = begin[0]; - const uint begin_f = begin[1]; + const int begin_b = begin[0]; + const int begin_f = begin[1]; uint begin_offset = 2; #if INPUT0_DIMS == 6 - const uint begin_w = begin[begin_offset]; + const int begin_w = begin[begin_offset]; begin_offset += 1; #endif #if INPUT0_DIMS >= 5 - const uint begin_z = begin[begin_offset]; + const int begin_z = begin[begin_offset]; begin_offset += 1; #endif - const uint begin_y = begin[begin_offset]; - const uint begin_x = begin[begin_offset + 1]; + const int begin_y = begin[begin_offset]; + const int begin_x = begin[begin_offset + 1]; #else const uint begin_b = LT_SIZES_BATCH_NUM; const uint begin_f = LT_SIZES_FEATURE_NUM; @@ -46,19 +46,19 @@ KERNEL(border_gpu_ref)( #endif #ifdef END_TYPE - const uint end_b = end[0]; - const uint end_f = end[1]; + const int end_b = end[0]; + const int end_f = end[1]; uint end_offset = 2; #if INPUT0_DIMS == 6 - const uint end_w = end[end_offset]; + const int end_w = end[end_offset]; end_offset += 1; #endif #if INPUT0_DIMS >= 5 - const uint end_z = end[end_offset]; + const int end_z = end[end_offset]; end_offset += 1; #endif - const uint end_y = end[end_offset]; - const uint end_x = end[end_offset + 1]; + const int end_y = end[end_offset]; + const int end_x = end[end_offset + 1]; #else const uint end_b = RB_SIZES_BATCH_NUM; const uint end_f = RB_SIZES_FEATURE_NUM; @@ -74,65 +74,65 @@ KERNEL(border_gpu_ref)( // [CONSTEXPR] // Border sizes(left-top): - const uint blt_sb = begin_b; - const uint blt_sf = begin_f; - const uint blt_sy = begin_y; - const uint blt_sx = begin_x; + const int blt_sb = begin_b; + const int blt_sf = begin_f; + const int blt_sy = begin_y; + const int blt_sx = begin_x; #if INPUT0_DIMS == 6 - const uint blt_sw = begin_w; + const int blt_sw = begin_w; #else - const uint blt_sw = 0; + const int blt_sw = 0; #endif #if INPUT0_DIMS >= 5 - const uint blt_sz = begin_z; + const int blt_sz = begin_z; #else - const uint blt_sz = 0; + const int blt_sz = 0; #endif // Border sizes(right-bottom): - const uint brb_sb = end_b; - const uint brb_sf = end_f; - const uint brb_sy = end_y; - const uint brb_sx = end_x; + const int brb_sb = end_b; + const int brb_sf = end_f; + const int brb_sy = end_y; + const int brb_sx = end_x; #if INPUT0_DIMS == 6 - const uint brb_sw = end_w; + const int brb_sw = end_w; #else - const uint brb_sw = 0; + const int brb_sw = 0; #endif #if INPUT0_DIMS >= 5 - const uint brb_sz = end_z; + const int brb_sz = end_z; #else - const uint brb_sz = 0; + const int brb_sz = 0; #endif // Input sizes: - const uint in_sx = INPUT0_SIZE_X; - const uint in_sy = INPUT0_SIZE_Y; - const uint in_sz = INPUT0_SIZE_Z; - const uint in_sw = INPUT0_SIZE_W; - const uint in_sf = INPUT0_FEATURE_NUM; - const uint in_sb = INPUT0_BATCH_NUM; + const int in_sx = INPUT0_SIZE_X; + const int in_sy = INPUT0_SIZE_Y; + const int in_sz = INPUT0_SIZE_Z; + const int in_sw = INPUT0_SIZE_W; + const int in_sf = INPUT0_FEATURE_NUM; + const int in_sb = INPUT0_BATCH_NUM; // Input limits (exclusive; tested on output position): - const uint in_lx = in_sx + blt_sx; - const uint in_ly = in_sy + blt_sy; - const uint in_lz = in_sz + blt_sz; - const uint in_lw = in_sw + blt_sw; - const uint in_lf = in_sf + blt_sf; - const uint in_lb = in_sb + blt_sb; + const int in_lx = in_sx + blt_sx; + const int in_ly = in_sy + blt_sy; + const int in_lz = in_sz + blt_sz; + const int in_lw = in_sw + blt_sw; + const int in_lf = in_sf + blt_sf; + const int in_lb = in_sb + blt_sb; - const uint out_xz = (uint) get_global_id(0); - const uint out_yw = (uint) get_global_id(1); - const uint out_fb = (uint) get_global_id(2); + const int out_xz = get_global_id(0); + const int out_yw = get_global_id(1); + const int out_fb = get_global_id(2); - const uint out_f = out_fb % OUTPUT_FEATURE_NUM; - const uint out_b = out_fb / OUTPUT_FEATURE_NUM; + const int out_f = out_fb % OUTPUT_FEATURE_NUM; + const int out_b = out_fb / OUTPUT_FEATURE_NUM; - const uint out_x = out_xz % OUTPUT_SIZE_X; - const uint out_z = out_xz / OUTPUT_SIZE_X; + const int out_x = out_xz % OUTPUT_SIZE_X; + const int out_z = out_xz / OUTPUT_SIZE_X; - const uint out_y = out_yw % OUTPUT_SIZE_Y; - const uint out_w = out_yw / OUTPUT_SIZE_Y; + const int out_y = out_yw % OUTPUT_SIZE_Y; + const int out_w = out_yw / OUTPUT_SIZE_Y; #ifdef BORDER_TYPE_CONSTANT #ifdef BORDER_VALUE_TYPE @@ -148,14 +148,14 @@ KERNEL(border_gpu_ref)( out_f >= blt_sf & out_f < in_lf & out_b >= blt_sb & out_b < in_lb) { - const uint in_x = out_x - blt_sx; - const uint in_y = out_y - blt_sy; - const uint in_z = out_z - blt_sz; - const uint in_w = out_w - blt_sw; - const uint in_f = out_f - blt_sf; - const uint in_b = out_b - blt_sb; - - const uint in_pos = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR in_b, in_f, in_w, in_z, in_y, in_x); + const int in_x = out_x - blt_sx; + const int in_y = out_y - blt_sy; + const int in_z = out_z - blt_sz; + const int in_w = out_w - blt_sw; + const int in_f = out_f - blt_sf; + const int in_b = out_b - blt_sb; + + const int in_pos = FUNC_CALL(get_input_index)(OPTIONAL_SHAPE_INFO_TENSOR in_b, in_f, in_w, in_z, in_y, in_x); in_val = input[in_pos]; } #elif defined BORDER_TYPE_EDGE @@ -192,6 +192,6 @@ KERNEL(border_gpu_ref)( #error Unsupported border type. #endif - const uint out_pos = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR out_b, out_f, out_w, out_z, out_y, out_x); + const int out_pos = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR out_b, out_f, out_w, out_z, out_y, out_x); output[out_pos] = in_val; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.h index aa295a7c35bc9b..0fb98d9bc23c07 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/border/border_kernel_base.h @@ -12,16 +12,19 @@ namespace kernel_selector { // border_params //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct border_params : public base_params { - DimTensor<> lt_sizes; - DimTensor<> rb_sizes; + DimTensor lt_sizes; + DimTensor rb_sizes; BorderType b_type; float border_value; + bool allow_negative_pad; ArgType begin_type; ArgType end_type; ArgType pad_value_type; - border_params() : base_params(KernelType::BORDER), b_type(BorderType::CONSTANT), border_value(0.0f), + + border_params() : base_params(KernelType::BORDER), b_type(BorderType::CONSTANT), + border_value(0.0f), allow_negative_pad(false), begin_type(ArgType::Constant), end_type(ArgType::Constant), pad_value_type(ArgType::Constant) {} ParamsKey GetParamsKey() const override { diff --git a/src/plugins/intel_gpu/src/plugin/ops/pad.cpp b/src/plugins/intel_gpu/src/plugin/ops/pad.cpp index 18a5225dac6848..af894693a02586 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/pad.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/pad.cpp @@ -13,7 +13,7 @@ namespace ov { namespace intel_gpu { -static void CreatePadOp(ProgramBuilder& p, const std::shared_ptr& op) { +static void CreatePadOpInternal(ProgramBuilder& p, const std::shared_ptr& op, bool allow_negative_pad) { validate_inputs_count(op, {3, 4}); auto inputs = p.GetInputInfo(op); std::string layerName = layer_type_name_ID(op); @@ -56,18 +56,27 @@ static void CreatePadOp(ProgramBuilder& p, const std::shared_ptrget_pad_mode(), - pad_value); + pad_value, + allow_negative_pad); + p.add_primitive(*op, borderPrim); +} + +static void CreatePadOp(ProgramBuilder& p, const std::shared_ptr& op) { + CreatePadOpInternal(p, op, false); +} - p.add_primitive(*op, tilePrim); +static void CreatePadOp(ProgramBuilder& p, const std::shared_ptr& op) { + CreatePadOpInternal(p, op, true); } REGISTER_FACTORY_IMPL(v1, Pad); +REGISTER_FACTORY_IMPL(v12, Pad); } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index d10eb959395b30..a569404fb1cdee 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -102,6 +102,7 @@ #include "transformations/op_conversions/convert_shapeof3.hpp" #include "transformations/op_conversions/convert_topk11_downgrade.hpp" #include "transformations/op_conversions/eye_decomposition.hpp" +#include "transformations/op_conversions/convert_pad12_downgrade.hpp" #include "transformations/convert_precision.hpp" #include "transformations/init_node_info.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" @@ -269,6 +270,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); precisions_map int_convert_precision_map { {ov::element::i64, ov::element::i32}, diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/pad.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/pad.cpp index dc469ca8cf4d66..e207e8911e8247 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/pad.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/pad.cpp @@ -21,6 +21,7 @@ const std::vector argPadValue = {0.f, 1.f, 2.f, -1.f}; const std::vector padMode = { ngraph::helpers::PadMode::EDGE, ngraph::helpers::PadMode::REFLECT, + ngraph::helpers::PadMode::SYMMETRIC }; INSTANTIATE_TEST_SUITE_P(smoke_Pad2DConst, @@ -82,4 +83,66 @@ INSTANTIATE_TEST_SUITE_P(smoke_Pad4D, testing::Values(ov::test::utils::DEVICE_GPU)), PadLayerTest::getTestCaseName); +const std::vector> padsBegin2DMixed = {{0, 0}, {1, 1}, {-2, 0}, {0, 3}, {2, -2}}; +const std::vector> padsEnd2DMixed = {{0, 0}, {1, 1}, {0, 1}, {-3, -2}, {2, -1}}; + +INSTANTIATE_TEST_SUITE_P(smoke_Pad2DConst, + PadLayerTest12, + testing::Combine(testing::ValuesIn(padsEnd2DMixed), + testing::ValuesIn(padsEnd2D), + testing::ValuesIn(argPadValue), + testing::Values(ngraph::helpers::PadMode::CONSTANT), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(std::vector{13, 5}), + testing::Values(ov::test::utils::DEVICE_GPU)), + PadLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Pad2D, + PadLayerTest12, + testing::Combine(testing::ValuesIn(padsBegin2DMixed), + testing::ValuesIn(padsEnd2DMixed), + testing::Values(-333), + testing::ValuesIn(padMode), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(std::vector{13, 5}), + testing::Values(ov::test::utils::DEVICE_GPU)), + PadLayerTest::getTestCaseName); + +const std::vector> padsBegin4DMixed = {{0, 0, 0, 0}, {0, 3, 0, 0}, {0, 0, 0, 1}, {0, 0, -1, 1}, {2, 0, 0, 0}, {0, 3, 0, -1}}; +const std::vector> padsEnd4DMixed = {{0, 0, 0, 0}, {0, 3, 0, 0}, {1, 0, 0, 0}, {0, 0, 0, 2}, {1, -3, 0, 0}, {0, 3, 0, -1}}; + +INSTANTIATE_TEST_SUITE_P(smoke_Pad4DConst, + PadLayerTest12, + testing::Combine(testing::ValuesIn(padsBegin4DMixed), + testing::ValuesIn(padsEnd4DMixed), + testing::ValuesIn(argPadValue), + testing::Values(ngraph::helpers::PadMode::CONSTANT), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(std::vector{3, 5, 10, 11}), + testing::Values(ov::test::utils::DEVICE_GPU)), + PadLayerTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Pad4D, + PadLayerTest12, + testing::Combine(testing::ValuesIn(padsBegin4DMixed), + testing::ValuesIn(padsEnd4DMixed), + testing::Values(-333), + testing::ValuesIn(padMode), + testing::ValuesIn(netPrecisions), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Precision::UNSPECIFIED), + testing::Values(InferenceEngine::Layout::ANY), + testing::Values(std::vector{3, 5, 10, 11}), + testing::Values(ov::test::utils::DEVICE_GPU)), + PadLayerTest::getTestCaseName); + } // namespace diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/border_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/border_gpu_test.cpp index 2d5a1b631e4d26..a8f30a0da42795 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/border_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/border_gpu_test.cpp @@ -16,6 +16,7 @@ using namespace cldnn; using namespace ::tests; +namespace { template static std::vector generate_rnd_real_input( const std::vector sizes, @@ -55,6 +56,7 @@ using border_test_param = std::tuple, // shape in std::array, // coord diff lt std::array, // coord diff rb + bool, // allow negative pads bool>; // is_caching_test template @@ -65,11 +67,12 @@ class border_test : public ::testing::TestWithParam> { T pad_value; format::type fmt; std::array sh_in, cd_lt, cd_rb, sh_out; + bool allow_negative_pads; bool is_caching_test; void SetUp() override { ::testing::TestWithParam>::SetUp(); rg.set_seed(GET_SUITE_NAME); - std::tie(pad_mode, pad_value, fmt, sh_in, cd_lt, cd_rb, is_caching_test) = this->GetParam(); + std::tie(pad_mode, pad_value, fmt, sh_in, cd_lt, cd_rb, allow_negative_pads, is_caching_test) = this->GetParam(); sh_out = {sh_in[0] + cd_lt[0] + cd_rb[0], sh_in[1] + cd_lt[1] + cd_rb[1], sh_in[2] + cd_lt[2] + cd_rb[2], @@ -88,7 +91,8 @@ class border_test : public ::testing::TestWithParam> { ov::CoordinateDiff(cd_lt.begin(), cd_lt.end()), ov::CoordinateDiff(cd_rb.begin(), cd_rb.end()), pad_mode, - pad_value), + pad_value, + allow_negative_pads), reorder("output", input_info("border"), cldnn::format::bfyx, T_dt)); cldnn::network::ptr target_network = get_network(engine, target_topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); target_network->set_input_data("input", input); @@ -103,7 +107,8 @@ class border_test : public ::testing::TestWithParam> { ov::CoordinateDiff(cd_lt.begin(), cd_lt.end()), ov::CoordinateDiff(cd_rb.begin(), cd_rb.end()), pad_mode, - pad_value)); + pad_value, + allow_negative_pads)); cldnn::network base_network(engine, base_topology, get_test_default_config(engine)); base_network.set_input_data("input", input); @@ -123,6 +128,7 @@ INSTANTIATE_TEST_SUITE_P(border_test_i8, testing::Values(std::array{2, 3, 4, 5}), testing::Values(std::array{1, 2, 3, 4}), testing::Values(std::array{1, 1, 1, 1}), + testing::Values(false), testing::Values(false))); using border_test_u8 = border_test; TEST_P(border_test_u8, border_test_u8) {} @@ -134,6 +140,7 @@ INSTANTIATE_TEST_SUITE_P(border_test_u8, testing::Values(std::array{2, 3, 4, 5}), testing::Values(std::array{1, 2, 3, 4}), testing::Values(std::array{1, 1, 1, 1}), + testing::Values(false), testing::Values(false))); using border_test_i32 = border_test; TEST_P(border_test_i32, border_test_i32) {} @@ -145,7 +152,19 @@ INSTANTIATE_TEST_SUITE_P(border_test_i32, testing::Values(std::array{2, 3, 4, 5}), testing::Values(std::array{1, 2, 3, 4}), testing::Values(std::array{1, 1, 1, 1}), + testing::Values(false), testing::Values(false))); +INSTANTIATE_TEST_SUITE_P(negative_pads, + border_test_i32, + testing::Combine(testing::Values(PAD_MODES), + testing::Values(-333), + testing::Values(format::type::b_fs_yx_fsv16), + testing::Values(std::array{6, 8, 7, 11}), + testing::ValuesIn({std::array{-1, -2, -2, -3}, std::array{-1, 3, 4, -3}}), + testing::ValuesIn({std::array{-1, -2, -2, -1}, std::array{2, -3, 3, -2}}), + testing::Values(true), + testing::Values(false))); + using border_test_f16 = border_test; TEST_P(border_test_f16, border_test_f16) {} INSTANTIATE_TEST_SUITE_P(border_test_f16, @@ -156,6 +175,7 @@ INSTANTIATE_TEST_SUITE_P(border_test_f16, testing::Values(std::array{2, 3, 4, 5}), testing::Values(std::array{1, 2, 3, 4}), testing::Values(std::array{1, 1, 1, 1}), + testing::Values(false), testing::Values(false))); INSTANTIATE_TEST_SUITE_P(export_import, border_test_f16, @@ -165,6 +185,7 @@ INSTANTIATE_TEST_SUITE_P(export_import, testing::Values(std::array{2, 3, 4, 5}), testing::Values(std::array{1, 2, 3, 4}), testing::Values(std::array{1, 1, 1, 1}), + testing::Values(false), testing::Values(true))); using border_test_f32 = border_test; TEST_P(border_test_f32, border_test_f32) {} @@ -176,6 +197,7 @@ INSTANTIATE_TEST_SUITE_P(border_test_f32, testing::Values(std::array{2, 3, 4, 5}), testing::Values(std::array{1, 2, 3, 4}), testing::Values(std::array{1, 1, 1, 1}), + testing::Values(false), testing::Values(false))); INSTANTIATE_TEST_SUITE_P(bsv16fsv16_reorder, @@ -186,6 +208,7 @@ INSTANTIATE_TEST_SUITE_P(bsv16fsv16_reorder, testing::Values(std::array{2, 3, 4, 5}), testing::Values(std::array{1, 2, 3, 4}), testing::Values(std::array{1, 1, 1, 1}), + testing::Values(false), testing::Values(false))); TEST(border_gpu, bsv16fsv16_without_reorder) { @@ -1636,3 +1659,171 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant_dynamic) { } } } + +struct border_dynamic_test_param { + ov::op::PadMode mode; + std::array in_shape; + std::array lt; + std::array rb; +}; + +class border_dynamic_test : public ::testing::TestWithParam { +public: + void SetUp() override { + ::testing::TestWithParam::SetUp(); + + const border_dynamic_test_param p = this->GetParam(); + + mode = p.mode; + in_size_b = p.in_shape[0]; + in_size_f = p.in_shape[1]; + in_size_y = p.in_shape[2]; + in_size_x = p.in_shape[3]; + + blt_size_b = p.lt[0]; + blt_size_f = p.lt[1]; + blt_size_y = p.lt[2]; + blt_size_x = p.lt[3]; + + brb_size_b = p.rb[0]; + brb_size_f = p.rb[1]; + brb_size_y = p.rb[2]; + brb_size_x = p.rb[3]; + + out_size_b = in_size_b + blt_size_b + brb_size_b; + out_size_f = in_size_f + blt_size_f + brb_size_f; + out_size_y = in_size_y + blt_size_y + brb_size_y; + out_size_x = in_size_x + blt_size_x + brb_size_x; + + auto& engine = get_test_engine(); + + const auto input_layout_dynamic = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx}; + const auto input_layout_static = layout{ov::PartialShape{in_size_b, in_size_f, in_size_y, in_size_x}, data_types::f32, format::bfyx}; + const auto input = engine.allocate_memory(input_layout_static); + const auto pads_begin = engine.allocate_memory({{4}, data_types::i32, format::bfyx}); + const auto pads_end = engine.allocate_memory({{4}, data_types::i32, format::bfyx}); + + set_values(pads_begin, {blt_size_b, blt_size_f, blt_size_y, blt_size_x}); + set_values(pads_end, {brb_size_b, brb_size_f, brb_size_y, brb_size_x}); + + constexpr auto pad_value = -333.0f; + + topology topology; + topology.add(input_layout("input", input_layout_dynamic)); + topology.add(data("pads_begin", pads_begin)); + topology.add(data("pads_end", pads_end)); + topology.add(border("output", + {input_info("input"), input_info("pads_begin"), input_info("pads_end")}, + cldnn::border::PAD_NON_CONST_INPUT::BEGIN | + cldnn::border::PAD_NON_CONST_INPUT::END, + std::vector{}, + std::vector{}, + mode, + pad_value, + true)); + + const std::vector sizes{ static_cast(in_size_b), static_cast(in_size_f), + static_cast(in_size_y), static_cast(in_size_x) }; + const std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); + set_values(input, input_data); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + network.set_input_data("input", input); + + const auto inst = network.get_primitive("output"); + const auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + const auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "output"); + + const auto output = outputs.at("output").get_memory(); + const cldnn::mem_lock output_ptr(output, get_test_stream()); + + const auto expected_size = out_size_b * out_size_f * out_size_y * out_size_x; + ASSERT_EQ(output_ptr.size(), expected_size); + + for (auto b = 0; b < out_size_b; ++b) { + for (auto f = 0; f < out_size_f; ++f) { + for (auto y = 0; y < out_size_y; ++y) { + for (auto x = 0; x < out_size_x; ++x) { + const auto output_off = ((b * out_size_f + f) * out_size_y + y) * out_size_x + x; + ASSERT_GE(output_off, 0); + + if (mode == ov::op::PadMode::CONSTANT) { + if (b < blt_size_b || b >= out_size_b - brb_size_b || + f < blt_size_f || f >= out_size_f - brb_size_f || + y < blt_size_y || y >= out_size_y - brb_size_y || + x < blt_size_x || x >= out_size_x - brb_size_x) { + ASSERT_EQ(output_ptr[output_off], pad_value); + } else { + const auto input_off = (((b - blt_size_b) * in_size_f + f - blt_size_f) * in_size_y + y - blt_size_y) * in_size_x + x - blt_size_x; // BFYX + ASSERT_GE(input_off, 0); + ASSERT_EQ(output_ptr[output_off], input_data[input_off]); + } + } else { + int in_b, in_f, in_y, in_x; + CalcInIndices(b, f, y, x, in_b, in_f, in_y, in_x); + const auto input_off = ((in_b * in_size_f + in_f) * in_size_y + in_y) * in_size_x + in_x; + ASSERT_GE(input_off, 0); + ASSERT_EQ(output_ptr[output_off], input_data[input_off]); + } + } + } + } + } + } + +private: + void CalcInIndices(const int b, const int f, const int y, const int x, int& in_b, int& in_f, int& in_y, int& in_x) { + switch (mode) { + case ov::op::PadMode::REFLECT: { + in_b = (b >= blt_size_b && b < out_size_b - brb_size_b) ? b - blt_size_b : (b < blt_size_b ? blt_size_b - b : in_size_b + out_size_b - brb_size_b - 2 - b); + in_f = (f >= blt_size_f && f < out_size_f - brb_size_f) ? f - blt_size_f : (f < blt_size_f ? blt_size_f - f : in_size_f + out_size_f - brb_size_f - 2 - f); + in_y = (y >= blt_size_y && y < out_size_y - brb_size_y) ? y - blt_size_y : (y < blt_size_y ? blt_size_y - y : in_size_y + out_size_y - brb_size_y - 2 - y); + in_x = (x >= blt_size_x && x < out_size_x - brb_size_x) ? x - blt_size_x : (x < blt_size_x ? blt_size_x - x : in_size_x + out_size_x - brb_size_x - 2 - x); + break; + } + case ov::op::PadMode::SYMMETRIC: { + in_b = (b >= blt_size_b && b < out_size_b - brb_size_b) ? b - blt_size_b : (b < blt_size_b ? blt_size_b - 1 - b : in_size_b + out_size_b - brb_size_b - 1 - b); + in_f = (f >= blt_size_f && f < out_size_f - brb_size_f) ? f - blt_size_f : (f < blt_size_f ? blt_size_f - 1 - f : in_size_f + out_size_f - brb_size_f - 1 - f); + in_y = (y >= blt_size_y && y < out_size_y - brb_size_y) ? y - blt_size_y : (y < blt_size_y ? blt_size_y - 1 - y : in_size_y + out_size_y - brb_size_y - 1 - y); + in_x = (x >= blt_size_x && x < out_size_x - brb_size_x) ? x - blt_size_x : (x < blt_size_x ? blt_size_x - 1 - x : in_size_x + out_size_x - brb_size_x - 1 - x); + break; + } + case ov::op::PadMode::EDGE: { + in_b = (b >= blt_size_b && b < out_size_b - brb_size_b) ? b - blt_size_b : (b < blt_size_b ? 0 : in_size_b - 1); + in_f = (f >= blt_size_f && f < out_size_f - brb_size_f) ? f - blt_size_f : (f < blt_size_f ? 0 : in_size_f - 1); + in_y = (y >= blt_size_y && y < out_size_y - brb_size_y) ? y - blt_size_y : (y < blt_size_y ? 0 : in_size_y - 1); + in_x = (x >= blt_size_x && x < out_size_x - brb_size_x) ? x - blt_size_x : (x < blt_size_x ? 0 : in_size_x - 1); + break; + } + default: { + throw std::runtime_error("Invalid PadMode"); + } + } + } + + ov::op::PadMode mode; + int in_size_b, in_size_f, in_size_y, in_size_x; + int blt_size_b, blt_size_f, blt_size_y, blt_size_x; + int brb_size_b, brb_size_f, brb_size_y, brb_size_x; + int out_size_b, out_size_f, out_size_y, out_size_x; +}; + +const std::vector dynamic_params { + {ov::op::PadMode::CONSTANT, {2, 3, 5, 4}, {-1, 2, -2, 3}, {2, -1, 3, -2}}, + {ov::op::PadMode::EDGE, {3, 4, 6, 5}, {-1, 1, -3, 2}, {3, -1, 1, -3}}, + {ov::op::PadMode::REFLECT, {3, 4, 6, 5}, {-1, 1, -3, 2}, {2, -1, 2, -3}}, + {ov::op::PadMode::SYMMETRIC, {2, 3, 5, 4}, {-1, 2, -2, 3}, {2, -1, 3, -2}} + }; +TEST_P(border_dynamic_test, border_dynamic_test) {} +INSTANTIATE_TEST_SUITE_P(border_dynamic_test, + border_dynamic_test, + ::testing::ValuesIn(dynamic_params)); +}; // namespace + diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/pad.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/pad.hpp index a58f05ca27a6a7..8bdf9a3d2c283c 100644 --- a/src/tests/functional/plugin/shared/include/single_layer_tests/pad.hpp +++ b/src/tests/functional/plugin/shared/include/single_layer_tests/pad.hpp @@ -12,4 +12,8 @@ TEST_P(PadLayerTest, CompareWithRefs) { Run(); } +TEST_P(PadLayerTest12, CompareWithRefs) { + Run(); +} + } // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/pad.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/pad.hpp index ff9a9f2712bd9e..613a0659b24314 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/pad.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/pad.hpp @@ -33,6 +33,25 @@ class PadLayerTest : public testing::WithParamInterface, protected: void SetUp() override; + virtual std::shared_ptr CreatePadOp(const ngraph::Output& data, + const std::vector& padsBegin, + const std::vector& padsEnd, + float argPadValue, + ngraph::helpers::PadMode padMode) const { + const auto pad = ngraph::builder::makePad(data, padsBegin, padsEnd, argPadValue, padMode, false); + return pad; + } }; +class PadLayerTest12 : public PadLayerTest { +protected: + std::shared_ptr CreatePadOp(const ngraph::Output& data, + const std::vector& padsBegin, + const std::vector& padsEnd, + float argPadValue, + ngraph::helpers::PadMode padMode) const override { + const auto pad = ngraph::builder::makePad(data, padsBegin, padsEnd, argPadValue, padMode, true); + return pad; + } +}; } // namespace LayerTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/single_layer/pad.cpp b/src/tests/functional/shared_test_classes/src/single_layer/pad.cpp index 6f796483b2b229..2c92716bed8eba 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/pad.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/pad.cpp @@ -46,7 +46,7 @@ void PadLayerTest::SetUp() { ov::ParameterVector params{std::make_shared(ngPrc, ov::Shape(inputShape))}; auto paramOuts = ngraph::helpers::convert2OutputVector( ngraph::helpers::castOps2Nodes(params)); - auto pad = ngraph::builder::makePad(paramOuts[0], padsBegin, padsEnd, argPadValue, padMode); + auto pad = CreatePadOp(paramOuts[0], padsBegin, padsEnd, argPadValue, padMode); ngraph::ResultVector results{std::make_shared(pad)}; function = std::make_shared(results, params, "pad"); } diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp index d3f4b5165890b9..b4ce38a5921ebc 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp +++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp @@ -521,13 +521,15 @@ std::shared_ptr makePad(const ngraph::Output& data, const std::vector& padsBegin, const std::vector& padsEnd, float argPadValue, - ngraph::helpers::PadMode padMode); + ngraph::helpers::PadMode padMode, + const bool allow_negative_pad = false); std::shared_ptr makePad(const ov::Output& in, const ov::Output& beginNode, const ov::Output& endNode, const ov::Output& valueNode, - ngraph::helpers::PadMode padMode); + ngraph::helpers::PadMode padMode, + const bool allow_negative_pad = false); std::shared_ptr makeBatchNormInference(const ngraph::Output& data, double epsilon); diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/pad.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/pad.cpp index 962353afebbc91..8b53059a023011 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/src/pad.cpp +++ b/src/tests/ngraph_helpers/ngraph_functions/src/pad.cpp @@ -13,7 +13,8 @@ std::shared_ptr makePad(const ngraph::Output& data, const std::vector& padsBegin, const std::vector& padsEnd, float argPadValue, - ngraph::helpers::PadMode padMode) { + ngraph::helpers::PadMode padMode, + const bool allow_negative_pad) { ngraph::op::PadMode pad_mode; switch (padMode) { case ngraph::helpers::PadMode::CONSTANT: @@ -37,14 +38,20 @@ std::shared_ptr makePad(const ngraph::Output& data, auto pads_end = std::make_shared(ngraph::element::i64, ngraph::Shape{padsEnd.size()}, padsEnd.data()); auto arg_pad_value = std::make_shared(data.get_element_type(), ngraph::Shape{}, &argPadValue); - return std::make_shared(data, pads_begin, pads_end, arg_pad_value, pad_mode); + + if (allow_negative_pad) { + return std::make_shared(data, pads_begin, pads_end, arg_pad_value, pad_mode); + } else { + return std::make_shared(data, pads_begin, pads_end, arg_pad_value, pad_mode); + } } std::shared_ptr makePad(const ov::Output& in, const ov::Output& beginNode, const ov::Output& endNode, const ov::Output& valueNode, - ngraph::helpers::PadMode padMode) { + ngraph::helpers::PadMode padMode, + const bool allow_negative_pad) { ngraph::op::PadMode pad_mode; switch (padMode) { case ngraph::helpers::PadMode::CONSTANT: @@ -62,10 +69,19 @@ std::shared_ptr makePad(const ov::Output& in, default: throw std::runtime_error("Can't create layer for this pad mode"); } - if (valueNode.get_node_shared_ptr() == nullptr) - return std::make_shared(in, beginNode, endNode, pad_mode); - else - return std::make_shared(in, beginNode, endNode, valueNode, pad_mode); + if (valueNode.get_node_shared_ptr() == nullptr) { + if (allow_negative_pad) { + return std::make_shared(in, beginNode, endNode, pad_mode); + } else { + return std::make_shared(in, beginNode, endNode, pad_mode); + } + } else { + if (allow_negative_pad) { + return std::make_shared(in, beginNode, endNode, valueNode, pad_mode); + } else { + return std::make_shared(in, beginNode, endNode, valueNode, pad_mode); + } + } } } // namespace builder From 47fe50ca35305d0f24e4e34c2f84d571c4730694 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 12 Sep 2023 11:13:59 +0400 Subject: [PATCH 19/31] [GPU] 2.0 plugin api impl (#18920) --- .../c/tests/ov_remote_context_test.cpp | 8 +- .../openvino/runtime/isync_infer_request.hpp | 2 +- .../dev_api/openvino/runtime/make_tensor.hpp | 2 +- src/inference/dev_api/remote_utils.hpp | 184 +++ .../openvino/runtime/intel_gpu/ocl/dx.hpp | 2 +- .../openvino/runtime/intel_gpu/ocl/ocl.hpp | 5 + .../openvino/runtime/intel_gpu/ocl/va.hpp | 2 +- src/inference/src/dev/converter_utils.cpp | 76 +- src/inference/src/dev/converter_utils.hpp | 3 +- src/inference/src/dev/make_tensor.cpp | 136 +- .../intel_gpu/docs/basic_data_structures.md | 10 +- src/plugins/intel_gpu/docs/gpu_debug_utils.md | 30 +- .../intel_gpu/docs/simplified_workflow.md | 160 +-- .../include/intel_gpu/graph/network.hpp | 2 +- .../intel_gpu/graph/serialization/utils.hpp | 36 +- .../intel_gpu/plugin/async_infer_request.hpp | 24 +- .../plugin/async_infer_request_legacy.hpp | 33 - .../include/intel_gpu/plugin/common_utils.hpp | 149 +-- .../intel_gpu/plugin/compiled_model.hpp | 83 +- .../include/intel_gpu/plugin/graph.hpp | 93 +- .../intel_gpu/plugin/infer_request.hpp | 106 -- .../intel_gpu/plugin/infer_request_legacy.hpp | 115 -- .../intel_gpu/plugin/legacy_api_helper.hpp | 1 - .../intel_gpu/plugin/legacy_remote_blob.hpp | 145 ++ .../plugin/legacy_remote_context.hpp | 152 +++ .../include/intel_gpu/plugin/plugin.hpp | 78 +- .../intel_gpu/plugin/program_builder.hpp | 96 +- .../intel_gpu/plugin/remote_allocators.hpp | 80 +- .../include/intel_gpu/plugin/remote_blob.hpp | 176 --- .../intel_gpu/plugin/remote_context.hpp | 170 +-- .../intel_gpu/plugin/remote_tensor.hpp | 84 ++ .../intel_gpu/plugin/sync_infer_request.hpp | 109 ++ .../intel_gpu/plugin/variable_state.hpp | 37 +- .../intel_gpu/primitives/convert_color.hpp | 11 +- .../include/intel_gpu/runtime/file_util.hpp | 19 + .../include/intel_gpu/runtime/layout.hpp | 2 +- .../intel_gpu/src/graph/convert_color.cpp | 25 +- src/plugins/intel_gpu/src/graph/crop.cpp | 2 +- .../src/graph/impls/ocl/convert_color.cpp | 6 +- .../impls/onednn/primitive_onednn_base.h | 4 +- src/plugins/intel_gpu/src/graph/network.cpp | 16 +- src/plugins/intel_gpu/src/graph/program.cpp | 10 +- .../intel_gpu/src/graph/program_node.cpp | 10 +- src/plugins/intel_gpu/src/graph/reorder.cpp | 12 +- src/plugins/intel_gpu/src/graph/unique.cpp | 2 - .../cl_kernels/convert_color_ref.cl | 53 +- .../cl_kernels/reorder_data.cl | 4 +- .../kernel_selector_common.cpp | 1 + .../convert_color_kernel_base.cpp | 2 +- .../convert_color_kernel_ref.cpp | 4 +- .../src/plugin/async_infer_request.cpp | 45 +- .../src/plugin/async_infer_request_legacy.cpp | 51 - .../intel_gpu/src/plugin/compiled_model.cpp | 446 +++--- src/plugins/intel_gpu/src/plugin/graph.cpp | 349 ++--- .../intel_gpu/src/plugin/infer_request.cpp | 1052 --------------- .../src/plugin/infer_request_legacy.cpp | 1190 ----------------- .../intel_gpu/src/plugin/ops/condition.cpp | 26 +- .../src/plugin/ops/convert_color.cpp | 19 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 17 +- .../intel_gpu/src/plugin/ops/parameter.cpp | 185 +-- .../intel_gpu/src/plugin/ops/result.cpp | 80 +- .../src/plugin/ops/tensor_iterator.cpp | 6 +- .../intel_gpu/src/plugin/ops/transpose.cpp | 39 - src/plugins/intel_gpu/src/plugin/plugin.cpp | 691 ++++------ .../intel_gpu/src/plugin/program_builder.cpp | 299 +---- .../src/plugin/remote_allocators.cpp | 63 +- .../intel_gpu/src/plugin/remote_blob.cpp | 300 ----- .../intel_gpu/src/plugin/remote_context.cpp | 342 ++--- .../intel_gpu/src/plugin/remote_tensor.cpp | 272 ++++ .../src/plugin/sync_infer_request.cpp | 815 +++++++++++ .../transformations/einsum_decomposition.cpp | 70 +- .../src/plugin/transformations_pipeline.cpp | 10 +- .../intel_gpu/src/plugin/variable_state.cpp | 87 +- .../intel_gpu/src/runtime/file_util.cpp | 18 + .../intel_gpu/src/runtime/kernels_cache.cpp | 4 +- .../intel_gpu/src/runtime/ocl/ocl_engine.cpp | 4 +- .../intel_gpu/src/runtime/ocl/ocl_memory.cpp | 12 +- .../functional/behavior/memory_dyn_batch.cpp | 5 + .../gpu_remote_tensor_tests.cpp | 74 +- .../skip_tests_config.cpp | 53 +- ...mic_smoke_test_shape_of_reduce_reshape.cpp | 2 +- .../passes/test_module_fusing_reorder.cpp | 20 +- .../unit/test_cases/cl_mem_input_test.cpp | 257 +--- .../test_cases/convert_color_gpu_test.cpp | 154 ++- .../unit/test_cases/serialization_test.cpp | 61 - .../proxy/dev_api/openvino/proxy/plugin.hpp | 4 +- src/plugins/proxy/src/infer_request.cpp | 9 +- src/plugins/proxy/src/remote_context.cpp | 2 +- src/plugins/proxy/src/remote_tensor.cpp | 25 +- src/plugins/proxy/src/remote_tensor.hpp | 2 +- 90 files changed, 3416 insertions(+), 6316 deletions(-) create mode 100644 src/inference/dev_api/remote_utils.hpp delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request_legacy.hpp delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_remote_blob.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_remote_context.hpp delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/runtime/file_util.hpp delete mode 100644 src/plugins/intel_gpu/src/plugin/async_infer_request_legacy.cpp delete mode 100644 src/plugins/intel_gpu/src/plugin/infer_request.cpp delete mode 100644 src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp delete mode 100644 src/plugins/intel_gpu/src/plugin/remote_blob.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/remote_tensor.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp create mode 100644 src/plugins/intel_gpu/src/runtime/file_util.cpp delete mode 100644 src/plugins/intel_gpu/tests/unit/test_cases/serialization_test.cpp diff --git a/src/bindings/c/tests/ov_remote_context_test.cpp b/src/bindings/c/tests/ov_remote_context_test.cpp index abfd96d8d62970..95795aa2464dba 100644 --- a/src/bindings/c/tests/ov_remote_context_test.cpp +++ b/src/bindings/c/tests/ov_remote_context_test.cpp @@ -360,9 +360,9 @@ TEST_P(ov_remote_context_ocl, create_remote_tensor_nv12_from_ocl_image2D) { const int height = 480; const int width = 640; ov_shape_t shape_y = {0, nullptr}; - int64_t dims_y[4] = {1, 1, height, width}; + int64_t dims_y[4] = {1, height, width, 1}; ov_shape_t shape_uv = {0, nullptr}; - int64_t dims_uv[4] = {1, 2, height / 2, width / 2}; + int64_t dims_uv[4] = {1, height / 2, width / 2, 2}; cl_int err; cl_image_format image_format; @@ -555,9 +555,9 @@ TEST_P(ov_remote_context_ocl, remote_tensor_nv12_inference) { EXPECT_NE(nullptr, context); ov_shape_t shape_y = {0, nullptr}; - int64_t dims_y[4] = {1, 1, height, width}; + int64_t dims_y[4] = {1, height, width, 1}; ov_shape_t shape_uv = {0, nullptr}; - int64_t dims_uv[4] = {1, 2, height / 2, width / 2}; + int64_t dims_uv[4] = {1, height / 2, width / 2, 2}; cl_int err; cl_image_format image_format; diff --git a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp index 2d32c1f036fac4..ed15438de2eb83 100644 --- a/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp +++ b/src/inference/dev_api/openvino/runtime/isync_infer_request.hpp @@ -151,12 +151,12 @@ class OPENVINO_RUNTIME_API ISyncInferRequest : public IInferRequest { const std::function& tensor)>& allocate_callback); std::unordered_map, std::vector>> m_batched_tensors; + ov::SoPtr& get_tensor_ptr(const ov::Output& port) const; private: std::shared_ptr m_compiled_model; // Mutable to return reference to ov::Tensor mutable std::unordered_map, ov::SoPtr> m_tensors; - ov::SoPtr& get_tensor_ptr(const ov::Output& port) const; /** * @brief Finds input or output port diff --git a/src/inference/dev_api/openvino/runtime/make_tensor.hpp b/src/inference/dev_api/openvino/runtime/make_tensor.hpp index e41ebd3688fa5c..2e5d771c7d98e7 100644 --- a/src/inference/dev_api/openvino/runtime/make_tensor.hpp +++ b/src/inference/dev_api/openvino/runtime/make_tensor.hpp @@ -67,7 +67,7 @@ OPENVINO_RUNTIME_API ov::SoPtr get_tensor_impl(const ov::Tensor& te IE_SUPPRESS_DEPRECATED_START /** @cond INTERNAL */ -ov::SoPtr make_tensor(const std::shared_ptr& tensor); +ov::SoPtr make_tensor(const std::shared_ptr& tensor, bool unwrap = false); const InferenceEngine::Blob* get_hardware_blob(const InferenceEngine::Blob* blob); InferenceEngine::Blob* get_hardware_blob(InferenceEngine::Blob* blob); diff --git a/src/inference/dev_api/remote_utils.hpp b/src/inference/dev_api/remote_utils.hpp new file mode 100644 index 00000000000000..6dc389981218fd --- /dev/null +++ b/src/inference/dev_api/remote_utils.hpp @@ -0,0 +1,184 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ie_ngraph_utils.hpp" +#include "ie_remote_blob.hpp" +#include "ie_remote_context.hpp" +#include "openvino/runtime/iremote_context.hpp" + +namespace ov { +namespace legacy_convert { + +INFERENCE_ENGINE_API_CPP(ov::SoPtr) +convert_remote_context(const std::shared_ptr& context); +INFERENCE_ENGINE_API_CPP(ie::Blob*) get_hardware_blob(ie::Blob* blob); + +class INFERENCE_ENGINE_API_CLASS(TensorHolder) { +public: + TensorHolder(ov::SoPtr tensor) : _tensor(tensor) {} + + const ov::SoPtr& get_tensor() const { + return _tensor; + } + +private: + ov::SoPtr _tensor; +}; + +} // namespace legacy_convert + +/** + * @brief Tensor what contains InferenceEngine::RemoteBlob inside + * Blob owns the memory + */ +class INFERENCE_ENGINE_API_CLASS(RemoteBlobTensor) : public IRemoteTensor { + mutable element::Type m_type; + mutable Shape m_shape; + mutable Strides m_strides; + mutable ov::AnyMap m_properties; + mutable std::string m_dev_name; + +public: + std::shared_ptr blob; + + RemoteBlobTensor(const InferenceEngine::RemoteBlob::Ptr& blob) : blob{blob} { + OPENVINO_ASSERT(blob); + m_shape = blob->getTensorDesc().getBlockingDesc().getBlockDims(); + } + + const element::Type& get_element_type() const override { + m_type = InferenceEngine::details::convertPrecision(blob->getTensorDesc().getPrecision()); + return m_type; + } + + void set_shape(ov::Shape shape) override { + blob->setShape({shape.begin(), shape.end()}); + } + + const Shape& get_shape() const override { + m_shape = blob->getTensorDesc().getBlockingDesc().getBlockDims(); + return m_shape; + } + + const Strides& get_strides() const override { + OPENVINO_ASSERT(get_element_type().bitwidth() >= 8, + "Could not get strides for types with bitwidths less then 8 bit. Tensor type: ", + get_element_type()); + const auto& element_strides = blob->getTensorDesc().getBlockingDesc().getStrides(); + const size_t elem_size = get_element_type().size(); + m_strides.clear(); + m_strides.resize(element_strides.size()); + std::transform(element_strides.begin(), element_strides.end(), m_strides.begin(), [&elem_size](size_t stride) { + return stride * elem_size; + }); + return m_strides; + } + + size_t get_size() const override { + return blob->size(); + } + + size_t get_byte_size() const override { + return blob->byteSize(); + } + + const AnyMap& get_properties() const override { + m_properties = blob->getParams(); + return m_properties; + } + + const std::string& get_device_name() const override { + m_dev_name = blob->getDeviceName(); + return m_dev_name; + } +}; + +/** + * @brief Create InferenceEngine::RemoteBlob from the Tensor + */ +class INFERENCE_ENGINE_API_CLASS(TensorRemoteBlob) : public ie::RemoteBlob, public ov::legacy_convert::TensorHolder { +public: + TensorRemoteBlob(const ov::SoPtr& tensor, ie::TensorDesc desc) + : ie::RemoteBlob{desc}, + ov::legacy_convert::TensorHolder(tensor) { + OPENVINO_ASSERT(this->get_tensor()); + } + std::shared_ptr cast_tensor() const { + auto remote = std::dynamic_pointer_cast(get_tensor()._ptr); + OPENVINO_ASSERT(remote); + return remote; + } + AnyMap getParams() const override { + return cast_tensor()->get_properties(); + } + std::string getDeviceName() const noexcept override { + try { + return cast_tensor()->get_device_name(); + } catch (...) { + return {}; + } + } + std::shared_ptr getContext() const noexcept override { + return {}; + } + + void allocate() noexcept override {} + bool deallocate() noexcept override { + return true; + } + ie::LockedMemory buffer() noexcept override { + return {nullptr, nullptr, 0}; + } + ie::LockedMemory cbuffer() const noexcept override { + return {nullptr, nullptr, 0}; + } + ie::LockedMemory rwmap() noexcept override { + return {nullptr, nullptr, 0}; + } + ie::LockedMemory rmap() const noexcept override { + return {nullptr, nullptr, 0}; + } + ie::LockedMemory wmap() noexcept override { + return {nullptr, nullptr, 0}; + } + const std::shared_ptr& getAllocator() const noexcept override { + return m_allocator; + } + void* getHandle() const noexcept override { + return nullptr; + } + + using TensorHolder::get_tensor; + +private: + std::shared_ptr m_allocator; +}; + +} // namespace ov + +namespace InferenceEngine { + +class INFERENCE_ENGINE_API_CLASS(IRemoteContextWrapper) : public ov::IRemoteContext { +private: + std::shared_ptr m_context; + mutable std::string m_name; + mutable ov::AnyMap m_params; + +public: + IRemoteContextWrapper(const std::shared_ptr& context) : m_context(context) {} + virtual ~IRemoteContextWrapper() = default; + const std::shared_ptr& get_context(); + const std::string& get_device_name() const override; + + const ov::AnyMap& get_property() const override; + + ov::SoPtr create_tensor(const ov::element::Type& type, + const ov::Shape& shape, + const ov::AnyMap& params = {}) override; + ov::SoPtr create_host_tensor(const ov::element::Type type, const ov::Shape& shape) override; +}; + +} // namespace InferenceEngine diff --git a/src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp b/src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp index 1e50b0dbd61440..70caec5ec389f9 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/ocl/dx.hpp @@ -139,7 +139,7 @@ class D3DContext : public ClContext { * @param target_tile_id Desired tile id within given context for multi-tile system. Default value (-1) means * that root device should be used */ - D3DContext(Core& core, ID3D11Device* device, int target_tile_id = -1) : ClContext(core, (cl_context) nullptr) { + D3DContext(Core& core, ID3D11Device* device, int target_tile_id = -1) : ClContext() { // clang-format off AnyMap context_params = { {ov::intel_gpu::context_type.name(), ov::intel_gpu::ContextType::VA_SHARED}, diff --git a/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp b/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp index ff7d1b2b1290bc..f35f7531a9a128 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp @@ -177,6 +177,11 @@ class ClContext : public RemoteContext { */ static constexpr const char* device_name = "GPU"; + /** + * @brief Default constructor which can be used in derived classes to avoid multiple create_context() calls + */ + ClContext() = default; + public: // Needed to make create_tensor overloads from base class visible for user using RemoteContext::create_tensor; diff --git a/src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp b/src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp index 91415dae081566..402abe223bca09 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/ocl/va.hpp @@ -104,7 +104,7 @@ class VAContext : public ClContext { * @param target_tile_id Desired tile id within given context for multi-tile system. Default value (-1) means * that root device should be used */ - VAContext(Core& core, VADisplay device, int target_tile_id = -1) : ClContext(core, (cl_context) nullptr) { + VAContext(Core& core, VADisplay device, int target_tile_id = -1) : ClContext() { AnyMap context_params = {{ov::intel_gpu::context_type.name(), ov::intel_gpu::ContextType::VA_SHARED}, {ov::intel_gpu::va_device.name(), static_cast(device)}, {ov::intel_gpu::tile_id.name(), target_tile_id}}; diff --git a/src/inference/src/dev/converter_utils.cpp b/src/inference/src/dev/converter_utils.cpp index fb55228e5917db..a957c032611990 100644 --- a/src/inference/src/dev/converter_utils.cpp +++ b/src/inference/src/dev/converter_utils.cpp @@ -215,7 +215,7 @@ class IVariableStateInternalWrapper : public InferenceEngine::IVariableStateInte } void SetState(const InferenceEngine::Blob::Ptr& newState) override { - m_state->set_state(ov::make_tensor(newState)); + m_state->set_state(ov::make_tensor(newState, true)); } InferenceEngine::Blob::CPtr GetState() const override { @@ -542,7 +542,7 @@ class IInferRequestInternalWrapper : public InferenceEngine::IInferRequestIntern void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override { try { - m_request->set_tensor(find_port(name), ov::make_tensor(data)); + m_request->set_tensor(find_port(name), ov::make_tensor(data, true)); } catch (const ov::Exception& ex) { const std::string what = ex.what(); if (what.find("Failed to set tensor") != std::string::npos) { @@ -556,7 +556,7 @@ class IInferRequestInternalWrapper : public InferenceEngine::IInferRequestIntern try { std::vector> tensors; for (const auto& blob : blobs) { - tensors.emplace_back(ov::make_tensor(blob)); + tensors.emplace_back(ov::make_tensor(blob, true)); } m_request->set_tensors(find_port(name), tensors); } catch (const ov::Exception& ex) { @@ -860,50 +860,40 @@ ov::SoPtr<::ov::IAsyncInferRequest> ov::legacy_convert::convert_infer_request( } namespace InferenceEngine { +const std::shared_ptr& IRemoteContextWrapper::get_context() { + return m_context; +} -class IRemoteContextWrapper : public ov::IRemoteContext { -private: - std::shared_ptr m_context; - mutable std::string m_name; - mutable ov::AnyMap m_params; - -public: - IRemoteContextWrapper(const std::shared_ptr& context) : m_context(context) {} - virtual ~IRemoteContextWrapper() = default; - const std::shared_ptr& get_context() { - return m_context; - } - const std::string& get_device_name() const override { - m_name = m_context->getDeviceName(); - return m_name; - } +const std::string& IRemoteContextWrapper::get_device_name() const { + m_name = m_context->getDeviceName(); + return m_name; +} - const ov::AnyMap& get_property() const override { - m_params = m_context->getParams(); - return m_params; - } +const ov::AnyMap& IRemoteContextWrapper::get_property() const { + m_params = m_context->getParams(); + return m_params; +} - ov::SoPtr create_tensor(const ov::element::Type& type, - const ov::Shape& shape, - const ov::AnyMap& params = {}) override { - InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(type), - shape, - InferenceEngine::TensorDesc::getLayoutByDims(shape)); - auto blob = m_context->CreateBlob(desc, params); - blob->allocate(); - auto tensor = ov::make_tensor(blob); - return {std::dynamic_pointer_cast(tensor._ptr), tensor._so}; - } +ov::SoPtr IRemoteContextWrapper::create_tensor(const ov::element::Type& type, + const ov::Shape& shape, + const ov::AnyMap& params) { + InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(type), + shape, + InferenceEngine::TensorDesc::getLayoutByDims(shape)); + auto blob = m_context->CreateBlob(desc, params); + blob->allocate(); + auto tensor = ov::make_tensor(blob); + return {std::dynamic_pointer_cast(tensor._ptr), tensor._so}; +} - ov::SoPtr create_host_tensor(const ov::element::Type type, const ov::Shape& shape) override { - InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(type), - shape, - InferenceEngine::TensorDesc::getLayoutByDims(shape)); - auto blob = m_context->CreateHostBlob(desc); - blob->allocate(); - return ov::make_tensor(blob); - } -}; +ov::SoPtr IRemoteContextWrapper::create_host_tensor(const ov::element::Type type, const ov::Shape& shape) { + InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(type), + shape, + InferenceEngine::TensorDesc::getLayoutByDims(shape)); + auto blob = m_context->CreateHostBlob(desc); + blob->allocate(); + return ov::make_tensor(blob); +} } // namespace InferenceEngine diff --git a/src/inference/src/dev/converter_utils.hpp b/src/inference/src/dev/converter_utils.hpp index e6cd57f7b39ccb..d121f5a4fa9ac3 100644 --- a/src/inference/src/dev/converter_utils.hpp +++ b/src/inference/src/dev/converter_utils.hpp @@ -15,6 +15,7 @@ #include "openvino/runtime/icompiled_model.hpp" #include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/iremote_context.hpp" +#include "remote_utils.hpp" namespace ov { namespace legacy_convert { @@ -40,11 +41,9 @@ ov::SoPtr<::ov::IAsyncInferRequest> convert_infer_request( const std::string& plugin_name = ""); std::shared_ptr convert_remote_context(const ov::SoPtr& context); -ov::SoPtr convert_remote_context(const std::shared_ptr& context); std::vector convert_extension(const std::vector& exts); std::vector convert_extension(const std::vector& exts); } // namespace legacy_convert } // namespace ov - diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp index 2e319c04c5f397..ddd0ea293966c0 100644 --- a/src/inference/src/dev/make_tensor.cpp +++ b/src/inference/src/dev/make_tensor.cpp @@ -11,6 +11,7 @@ #include "ie_remote_blob.hpp" #include "openvino/runtime/iremote_tensor.hpp" #include "openvino/runtime/properties.hpp" +#include "remote_utils.hpp" #ifdef PROXY_PLUGIN_ENABLED # include "openvino/proxy/plugin.hpp" #endif @@ -371,131 +372,6 @@ class BlobTensor : public ITensor { } }; -/** - * @brief Tensor what contains InferenceEngine::RemoteBlob inside - * Blob owns the memory - */ -class RemoteBlobTensor : public IRemoteTensor { - mutable element::Type m_type; - mutable Shape m_shape; - mutable Strides m_strides; - mutable ov::AnyMap m_properties; - mutable std::string m_dev_name; - -public: - std::shared_ptr blob; - - RemoteBlobTensor(const InferenceEngine::RemoteBlob::Ptr& blob) : blob{blob} { - OPENVINO_ASSERT(blob); - m_shape = blob->getTensorDesc().getBlockingDesc().getBlockDims(); - } - - const element::Type& get_element_type() const override { - m_type = InferenceEngine::details::convertPrecision(blob->getTensorDesc().getPrecision()); - return m_type; - } - - void set_shape(ov::Shape shape) override { - blob->setShape({shape.begin(), shape.end()}); - } - - const Shape& get_shape() const override { - m_shape = blob->getTensorDesc().getBlockingDesc().getBlockDims(); - return m_shape; - } - - const Strides& get_strides() const override { - OPENVINO_ASSERT(get_element_type().bitwidth() >= 8, - "Could not get strides for types with bitwidths less then 8 bit. Tensor type: ", - get_element_type()); - const auto& element_strides = blob->getTensorDesc().getBlockingDesc().getStrides(); - const size_t elem_size = get_element_type().size(); - m_strides.clear(); - m_strides.resize(element_strides.size()); - std::transform(element_strides.begin(), element_strides.end(), m_strides.begin(), [&elem_size](size_t stride) { - return stride * elem_size; - }); - return m_strides; - } - - size_t get_size() const override { - return blob->size(); - } - - size_t get_byte_size() const override { - return blob->byteSize(); - } - - const AnyMap& get_properties() const override { - m_properties = blob->getParams(); - return m_properties; - } - - const std::string& get_device_name() const override { - m_dev_name = blob->getDeviceName(); - return m_dev_name; - } -}; - -/** - * @brief Create InferenceEngine::RemoteBlob from the Tensor - */ -class TensorRemoteBlob : public ie::RemoteBlob { -public: - TensorRemoteBlob(const ov::SoPtr& tensor, ie::TensorDesc desc) : ie::RemoteBlob{desc}, tensor{tensor} { - OPENVINO_ASSERT(this->tensor); - } - std::shared_ptr cast_tensor() const { - auto remote = std::dynamic_pointer_cast(tensor._ptr); - OPENVINO_ASSERT(remote); - return remote; - } - AnyMap getParams() const override { - return cast_tensor()->get_properties(); - } - std::string getDeviceName() const noexcept override { - try { - return cast_tensor()->get_device_name(); - } catch (...) { - return {}; - } - } - std::shared_ptr getContext() const noexcept override { - return {}; - } - - void allocate() noexcept override {} - bool deallocate() noexcept override { - return true; - } - ie::LockedMemory buffer() noexcept override { - return {nullptr, nullptr, 0}; - } - ie::LockedMemory cbuffer() const noexcept override { - return {nullptr, nullptr, 0}; - } - ie::LockedMemory rwmap() noexcept override { - return {nullptr, nullptr, 0}; - } - ie::LockedMemory rmap() const noexcept override { - return {nullptr, nullptr, 0}; - } - ie::LockedMemory wmap() noexcept override { - return {nullptr, nullptr, 0}; - } - const std::shared_ptr& getAllocator() const noexcept override { - return m_allocator; - } - void* getHandle() const noexcept override { - return nullptr; - } - - ov::SoPtr tensor; - -private: - std::shared_ptr m_allocator; -}; - /** * @brief Create InferenceEngine::TBlob from the tensor * @@ -530,15 +406,17 @@ class TensorMemoryBlob : public ie::TBlob { ov::SoPtr tensor; }; -ov::SoPtr make_tensor(const std::shared_ptr& blob) { +ov::SoPtr make_tensor(const std::shared_ptr& blob, bool unwrap) { #define ELSE_IF(type) \ else if (auto tblob = dynamic_cast*>(blob.get())) { \ return tblob->tensor; \ } if (blob == nullptr) { return {}; + } else if (unwrap && std::dynamic_pointer_cast(blob) != nullptr) { + return std::dynamic_pointer_cast(blob)->get_tensor(); } else if (auto remote_blob = std::dynamic_pointer_cast(blob)) { - return remote_blob->tensor; + return remote_blob->get_tensor(); } else if (auto remote_blob = std::dynamic_pointer_cast(blob)) { return {std::make_shared(remote_blob), nullptr}; } @@ -564,7 +442,7 @@ ov::SoPtr make_tensor(const std::shared_ptr& blob) { ie::Blob* get_hardware_blob(ie::Blob* blob) { #ifdef PROXY_PLUGIN_ENABLED if (auto remote_blob = dynamic_cast(blob)) { - const auto& tensor = ov::proxy::get_hardware_tensor(remote_blob->tensor); + const auto& tensor = ov::proxy::get_hardware_tensor(remote_blob->get_tensor()); if (auto blob_tensor = std::dynamic_pointer_cast(tensor._ptr)) { return blob_tensor->blob.get(); } else if (auto blob_tensor = std::dynamic_pointer_cast(tensor._ptr)) { @@ -579,7 +457,7 @@ ie::Blob* get_hardware_blob(ie::Blob* blob) { const ie::Blob* get_hardware_blob(const ie::Blob* blob) { #ifdef PROXY_PLUGIN_ENABLED if (auto remote_blob = dynamic_cast(blob)) { - const auto& tensor = ov::proxy::get_hardware_tensor(remote_blob->tensor); + const auto& tensor = ov::proxy::get_hardware_tensor(remote_blob->get_tensor()); if (auto blob_tensor = std::dynamic_pointer_cast(tensor._ptr)) { return blob_tensor->blob.get(); } else if (auto blob_tensor = std::dynamic_pointer_cast(tensor._ptr)) { diff --git a/src/plugins/intel_gpu/docs/basic_data_structures.md b/src/plugins/intel_gpu/docs/basic_data_structures.md index a11f8ab666ab6a..d615d80328f3f8 100644 --- a/src/plugins/intel_gpu/docs/basic_data_structures.md +++ b/src/plugins/intel_gpu/docs/basic_data_structures.md @@ -4,10 +4,10 @@ ```mermaid -classDiagram +classDiagram direction LR pooling --<| primitive_base -convolution --<| primitive_base +convolution --<| primitive_base class primitive_base{<>} primitive_base --<| primitive primitive --o program_node @@ -78,7 +78,7 @@ A more detailed description of each component is described in the sections below ## primitive ```cpp -struct primitive { +struct primitive { ... const primitive_id id; const primitive_type* type; @@ -96,7 +96,7 @@ An example creation of a `arg_max_min` primitive: cldnn::arg_max_min top_k_prim = cldnn::arg_max_min("top_k", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, "", padding(), data_types::f32); ``` -In GPU plugin, the *primitives* are converted from ngraph [operations](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/src/plugin/ops). +In GPU plugin, the *primitives* are converted from OpenVINO [operations](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/src/plugin/ops). ## topology ```cpp @@ -200,7 +200,7 @@ class primitive_inst { std::vector _intermediates_memory; event::ptr execute(const std::vector& events); - memory::ptr allocate_output(); + memory::ptr allocate_output(); ... }; ``` diff --git a/src/plugins/intel_gpu/docs/gpu_debug_utils.md b/src/plugins/intel_gpu/docs/gpu_debug_utils.md index 5bfd49d3254e30..942a2309fb412e 100644 --- a/src/plugins/intel_gpu/docs/gpu_debug_utils.md +++ b/src/plugins/intel_gpu/docs/gpu_debug_utils.md @@ -39,7 +39,7 @@ Behavior when both versions are specified is not defined. Some options also allow multiple prefixes: `OV` and `OV_GPU`. `OV` prefix is intended to be used for options common for all OpenVINO components. When an option is set twice with different prefixes, then `OV_GPU` has higher priority. -### List of parameters +### List of parameters This is a part of the full list. To get all parameters, see OV_GPU_Help result. @@ -66,12 +66,12 @@ This is a part of the full list. To get all parameters, see OV_GPU_Help result. The execution graph (also known as a runtime graph) is a device-specific graph after all transformations applied by the plugin. It is a very useful feature for performance analysis and it allows finding a source of performance regressions quickly. The execution graph can be retrieved from the plugin -using `GetExecGraphInfo()` method of `InferenceEngine::ExecutableNetwork` and then serialized as usual IR: +using `get_runtime_model()` method of `ov::CompiledModel` and then serialized as usual IR: ```cpp - ExecutableNetwork exeNetwork; + ov::CompiledModel compiled_model; // Load some model into the plugin - CNNNetwork execGraphInfo = exeNetwork.GetExecGraphInfo(); - execGraphInfo.serialize("/path/to/serialized/exec/graph.xml"); + std::shared_ptr runtime_model = compiled_model.get_runtime_model(); + ov::serialize(runtime_model, "/path/to/serialized/exec/graph.xml"); ``` The capability to retrieve the execution graph and store it on the disk is integrated into `benchmark_app`. The execution graph can be simply dumped @@ -116,14 +116,14 @@ Most of the data here is very handy for performance analysis. For example, for e This graph can be visualized using Netron tool and all these properties can be analyzed there. -> **NOTE**: execution time collection for each primitive requires `CONFIG_KEY(PERF_COUNT)` to be enabled (`benchmark_app` does it automatically). Therefore, the overall model execution time is usually much worse in such use cases. +> **NOTE**: execution time collection for each primitive requires `ov::enable_profiling` to be enabled (`benchmark_app` does it automatically). Therefore, the overall model execution time is usually much worse in such use cases. ## Performance counters This feature is a simplified version of the execution graph as it provides much less information, but it might be more suitable for quick analysis and some kind of processing with scripts. -Performance counters can be retrieved from each `InferenceEngine::InferRequest` object using `getPerformanceCounts()` method. This feature is also integrated +Performance counters can be retrieved from each `ov::InferRequest` object using `get_profiling_info()` method. This feature is also integrated into `benchmark_app` and the counters can be printed to count using `-pc` parameter. The format looks as follows: @@ -149,14 +149,6 @@ So it allows you to quickly check the execution time of some operation on the de * You can dump graphs with `OV_GPU_DumpGraphs` of debug config. For the usage of debug config, see the [link](#debug-config). -* Alternatively, you can also enable the dumps from the application source code: -clDNN plugin has the special internal config option - `graph_dumps_dir`, which can be set from the user app via plugin config: -```cpp -Core ie; -std::map device_config; -device_config[CLDNN_CONFIG_KEY(GRAPH_DUMPS_DIR)] = "/some/existing/path/"; -ie.SetConfig(device_config, "GPU"); -``` For each stage, it dumps: ``` @@ -175,14 +167,6 @@ Since *Intel_GPU* source tree contains only *templates* of the OpenCL™ kernels * You can use `OV_GPU_DumpSources` of debug config. For the usage of debug config, see [link](#debug-config). -* You can also dump OpenCL source code by changing OpenVINO source code: -clDNN plugin has the special internal config option - `sources_dumps_dir`, which can be set from the user app via plugin config: -```cpp -Core ie; -std::map device_config; -device_config[CLDNN_CONFIG_KEY(SOURCES_DUMPS_DIR)] = "/some/existing/path/"; -ie.SetConfig(device_config, "GPU"); -``` When this key is enabled, the plugin dumps multiple files with the following names: ``` diff --git a/src/plugins/intel_gpu/docs/simplified_workflow.md b/src/plugins/intel_gpu/docs/simplified_workflow.md index c00f829aadbce5..482284a2d0dbcf 100644 --- a/src/plugins/intel_gpu/docs/simplified_workflow.md +++ b/src/plugins/intel_gpu/docs/simplified_workflow.md @@ -3,69 +3,67 @@ The simplified workflow in the GPU plugin is shown in the diagram below (click it for higher resolution): ```mermaid -classDiagram -class `intel_gpu::Plugin` {Inference Engine plugin -implementation for GPU} -class `intel_gpu::CompiledModel` {Device specific network -representation that can be executed} -class `intel_gpu::InferRequestAsync` { -Inference request for specific executable network. -Wrapper for input and output memory} -class `intel_gpu::TransformationPipeline` {Set of ngraph-based transformations -configured by GPU plugin} -`Core::compile_model()` --> `intel_gpu::CompiledModel` -`CompiledModel::create_infer_request()` -->`intel_gpu::InferRequestAsync` -`InferRequest::start_async()` --> `intel_gpu::network` -`intel_gpu::Plugin` --|> `InferenceEngine::InferencePluginInternal` +classDiagram + +%% Public API classes +class `ov::CompiledModel` +class `ov::InferRequest` +class `ov::Core` + +%% Plugin API interface %% +class `ov::IPlugin` +class `ov::ICompiledModel` +class `ov::IAsyncInferRequest` +class `ov::ISyncInferRequest` + +%% Plugin API Impl %% +class `intel_gpu::Plugin` { OpenVINO plugin implementation for GPU } +class `intel_gpu::CompiledModel` +class `intel_gpu::AsyncInferRequest` { Asynchronous version of infer request } +class `intel_gpu::SyncInferRequest` { Inference request for specific executable network. Wrapper for input and output memory } + +`intel_gpu::Plugin` --|> `ov::IPlugin` +`intel_gpu::CompiledModel` --|> `ov::ICompiledModel` +`intel_gpu::SyncInferRequest` --|> `ov::ISyncInferRequest` +`intel_gpu::AsyncInferRequest` --|> `ov::IAsyncInferRequest` `intel_gpu::Plugin` --> `intel_gpu::CompiledModel` : Create -`intel_gpu::CompiledModel` --|> `InferenceEngine::ExecutableNetworkThreadSafeDefault` -`intel_gpu::CompiledModel` --> `intel_gpu::InferRequestAsync` : Create +`intel_gpu::SyncInferRequest` "1" --* "1" `intel_gpu::AsyncInferRequest` + +%% Plugin implementation details %% +class `intel_gpu::TransformationPipeline` {Set of ngraph-based transformations configured by GPU plugin } +class `intel_gpu::Graph` { Per stream copy of compiled graph with independent memory } +class `intel_gpu::ProgramBuilder` { Object for operations semantic translation and graph compilation } + +`ov::Core` --> `intel_gpu::CompiledModel` : compile_model() +`ov::CompiledModel` -->`intel_gpu::AsyncInferRequest` : create_infer_request() +`ov::InferRequest` --> `intel_gpu::network` : start_async() +`intel_gpu::CompiledModel` --> `intel_gpu::AsyncInferRequest` : Create `intel_gpu::TransformationPipeline` --> `ov::Model` `intel_gpu::TransformationPipeline` --> `intel_gpu::CompiledModel` -`InferenceEngine::InferRequestInternal` -class `intel_gpu::Graph` {Per stream copy of -compiled graph with -independent memory} `intel_gpu::Graph` "1..N" --* `intel_gpu::CompiledModel` -class `intel_gpu::ProgramBuilder` {Object for operations -semantic translation and -graph compilation} `intel_gpu::CompiledModel` --> `intel_gpu::ProgramBuilder` : Create `intel_gpu::ProgramBuilder` "1" --o "N" `intel_gpu::Graph` + class `intel_gpu::convolution` {convolution operation descriptor} -class `intel_gpu::data` {Primitive representing -constant data in a topology} +class `intel_gpu::data` {Primitive representing constant data in a topology } class `intel_gpu::input_layout` {Represents dynamic input data} class `intel_gpu::primitive_base` {<>} `intel_gpu::convolution` ..<| `intel_gpu::primitive_base` `intel_gpu::data` ..<| `intel_gpu::primitive_base` `intel_gpu::input_layout` ..<| `intel_gpu::primitive_base` `Any other primitive` ..<| `intel_gpu::primitive_base` -class `intel_gpu::topology` { -Set of primitives. Each primitive -knows operation parameters, -it's inputs and outputs} -class `intel_gpu::program` { -Class that contains compiled topology. -All kernels are selected, -memory dependencies are resolved, -the only missing thing - memory for intermediate buffers} +class `intel_gpu::topology` { Set of primitives. Each primitive knows operation parameters, it's inputs and outputs } +class `intel_gpu::program` { Class that contains compiled topology. All kernels are selected, memory dependencies are resolved, the only missing thing - memory for intermediate buffers } `intel_gpu::primitive_base` "0..N" --o `intel_gpu::topology` `intel_gpu::program` --> `intel_gpu::topology` `intel_gpu::ProgramBuilder` --> `intel_gpu::topology` : Create `intel_gpu::ProgramBuilder` --> `intel_gpu::program` : Create -class `intel_gpu::program_node` {Base class for representation of a single graph node} -class `intel_gpu::primitive_impl` { -<> -Base class for representation of a single graph node} -class `intel_gpu::typed_primitive_onednn_impl` {Implementations that use oneDNN library} -class `oneDNN library` {statically linked into GPU plugin} -class `intel_gpu::typed_primitive_ocl_impl` {OCL implementations that use -kernels from kernel_selector} -class `intel_gpu::kernel_selector` { -module that stores OCL kernels -for primitives and has embed some -rules for optimal kernel selection} +class `intel_gpu::program_node` { Base class for representation of a single graph node } +class `intel_gpu::primitive_impl` { <> Base class for representation of a single graph node } +class `intel_gpu::typed_primitive_onednn_impl` {Implementations that use oneDNN library } +class `oneDNN library` { statically linked into GPU plugin } +class `intel_gpu::typed_primitive_ocl_impl` { OCL implementations that use kernels from kernel_selector } +class `intel_gpu::kernel_selector` { module that stores OCL kernels for primitives and has embed some rules for optimal kernel selection } `intel_gpu::program_node` --o `intel_gpu::program` `intel_gpu::primitive_impl` --o `intel_gpu::program_node` `intel_gpu::typed_primitive_onednn_impl` ..<| `intel_gpu::primitive_impl` @@ -73,26 +71,18 @@ rules for optimal kernel selection} `intel_gpu::typed_primitive_ocl_impl` ..> `intel_gpu::kernel_selector` `intel_gpu::typed_primitive_onednn_impl` --> `oneDNN bridge` : Use `intel_gpu::typed_primitive_onednn_impl` ..> `oneDNN library` -class `intel_gpu::build_options` {Set of options for graph compilations} -class `intel_gpu::pass_manager` {Helper to run graph transformations} -class `intel_gpu::base_pass` { -<> -Base class for graph transformations} +class `intel_gpu::build_options` { Set of options for graph compilations } +class `intel_gpu::pass_manager` { Helper to run graph transformations } +class `intel_gpu::base_pass` { <> Base class for graph transformations} `intel_gpu::program` --> `intel_gpu::build_options` `intel_gpu::program` --> `intel_gpu::pass_manager` : Use `intel_gpu::program` --> `intel_gpu::base_pass` : Use `intel_gpu::pass_manager` --> `intel_gpu::base_pass` : Run -class `intel_gpu::prepare_primitive_fusing` { -Pass that fuses multiple operations into single node} -class `intel_gpu::prepare_quantization` { -Pass that prepares models for low precision execution} -class `intel_gpu::reorder_inputs` { -Pass that is responsible for layout/impl selection} -class `intel_gpu::compile_graph` { -Pass that selects and creates -best implementation for each primitive} -class `intel_gpu::remove_redundant_reorders` { -Pass that optimizes reorders in the graph} +class `intel_gpu::prepare_primitive_fusing` { Pass that fuses multiple operations into single node } +class `intel_gpu::prepare_quantization` { Pass that prepares models for low precision execution } +class `intel_gpu::reorder_inputs` { Pass that is responsible for layout/impl selection } +class `intel_gpu::compile_graph` { Pass that selects and creates best implementation for each primitive } +class `intel_gpu::remove_redundant_reorders` { Pass that optimizes reorders in the graph } `intel_gpu::prepare_primitive_fusing`--|> `intel_gpu::base_pass` `intel_gpu::prepare_quantization`--|> `intel_gpu::base_pass` `intel_gpu::reorder_inputs`--|> `intel_gpu::base_pass` @@ -100,31 +90,23 @@ Pass that optimizes reorders in the graph} `intel_gpu::layout_optimizer`--|> `intel_gpu::base_pass` `intel_gpu::remove_redundant_reorders`--|> `intel_gpu::base_pass` `intel_gpu::reorder_inputs`--> `intel_gpu::layout_optimizer` : Use -class `intel_gpu::network` { -A program with allocated memory. -Can be executed on the device} -`intel_gpu::InferRequestAsync` --> `intel_gpu::network` : Set input/output memory and run execution -`intel_gpu::network` --> `intel_gpu::InferRequestAsync` : Return inference result -class `intel_gpu::tensor` {Size of memory buffer} -class `intel_gpu::format` {Order of elements in memory} -class `intel_gpu::data_type` {elements precision} -class `intel_gpu::memory_pool` { -Object that tracks memory allocations -and tries to reuse memory buffers} -class `intel_gpu::layout` {Memory descriptor} -class `intel_gpu::memory` {GPU memory object} -class `intel_gpu::stream` { -Abstraction for queue. -Knows how to submit kernels and - provide some synchronization capabilities} -class `intel_gpu::event` {Synchronization primitive} -class `intel_gpu::kernel` {Holds kernel handle} -class `intel_gpu::engine` {Engine for specific device, -responsible for memory allocations} -class `intel_gpu::device` {Holds context/device handles for selected backend} -class `intel_gpu::device_info` {Storage for device capabilities and info} -class `intel_gpu::engine_configuration` {Options for engine} -class `intel_gpu::device_query` {Detects available devices for given backend} +class `intel_gpu::network` { A program with allocated memory.Can be executed on the device } +`intel_gpu::AsyncInferRequest` --> `intel_gpu::network` : Set input/output memory and run execution +`intel_gpu::network` --> `intel_gpu::AsyncInferRequest` : Return inference result +class `intel_gpu::tensor` { Size of memory buffer } +class `intel_gpu::format` { Order of elements in memory } +class `intel_gpu::data_type` { elements precision } +class `intel_gpu::memory_pool` { Object that tracks memory allocations and tries to reuse memory buffers } +class `intel_gpu::layout` { Memory descriptor } +class `intel_gpu::memory` { GPU memory object } +class `intel_gpu::stream` { Abstraction for queue. Knows how to submit kernels and provide some synchronization } +class `intel_gpu::event` { Synchronization primitive } +class `intel_gpu::kernel` { Holds kernel handle } +class `intel_gpu::engine` { Engine for specific device, responsible for memory allocations } +class `intel_gpu::device` { Holds context/device handles for selected backend } +class `intel_gpu::device_info` { Storage for device capabilities and info } +class `intel_gpu::engine_configuration` { Options for engine } +class `intel_gpu::device_query` { Detects available devices for given backend } `intel_gpu::tensor` --o `intel_gpu::layout` `intel_gpu::format` --o `intel_gpu::layout` `intel_gpu::data_type` --o `intel_gpu::layout` @@ -142,8 +124,6 @@ class `intel_gpu::device_query` {Detects available devices for given backend} `intel_gpu::device` --o `intel_gpu::engine` `intel_gpu::device_info` --o `intel_gpu::device` `intel_gpu::device_query` --> `intel_gpu::device` -`OCL Implementation of Runtime`..<| `Runtime module API & common` -`SYCL/L0 Implementation of Runtime (POC)`..<| `Runtime module API & common` ``` ## See also @@ -152,4 +132,4 @@ class `intel_gpu::device_query` {Detects available devices for given backend} * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) * [OpenVINO GPU Plugin](../README.md) - * [Developer documentation](../../../../docs/dev/index.md) \ No newline at end of file + * [Developer documentation](../../../../docs/dev/index.md) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index acddfd0a462867..1474543428a7b7 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -244,7 +244,7 @@ struct network { using variables_state_info_map = std::map; void set_variables_state_info(const std::string& variable_id, const cldnn::layout& layout); - + const variables_state_info_map& get_variables_state_info() const; const ExecutionConfig& get_config() const { return _config; } ShapePredictor& get_shape_predictor() { return *_shape_predictor; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/utils.hpp index 9c77b1b66f6df3..ae912fa9c7519c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/utils.hpp @@ -6,43 +6,11 @@ #define RUN_ALL_MODEL_CACHING_TESTS +#include +#include #include -#include "openvino/core/deprecated.hpp" -#include "ie/ie_common.h" namespace cldnn { -class serial_util { -public: - OPENVINO_SUPPRESS_DEPRECATED_START - static InferenceEngine::Layout layout_from_string(const std::string& name) { - static const std::unordered_map layouts = { - { "ANY", InferenceEngine::Layout::ANY }, - { "NCHW", InferenceEngine::Layout::NCHW }, - { "NHWC", InferenceEngine::Layout::NHWC }, - { "NCDHW", InferenceEngine::Layout::NCDHW }, - { "NDHWC", InferenceEngine::Layout::NDHWC }, - { "OIHW", InferenceEngine::Layout::OIHW }, - { "GOIHW", InferenceEngine::Layout::GOIHW }, - { "OIDHW", InferenceEngine::Layout::OIDHW }, - { "GOIDHW", InferenceEngine::Layout::GOIDHW }, - { "SCALAR", InferenceEngine::Layout::SCALAR }, - { "C", InferenceEngine::Layout::C }, - { "CHW", InferenceEngine::Layout::CHW }, - { "HWC", InferenceEngine::Layout::HWC }, - { "HW", InferenceEngine::Layout::HW }, - { "NC", InferenceEngine::Layout::NC }, - { "CN", InferenceEngine::Layout::CN }, - { "BLOCKED", InferenceEngine::Layout::BLOCKED } - }; - auto it = layouts.find(name); - if (it != layouts.end()) { - return it->second; - } - OPENVINO_THROW("Unknown layout with name '", name, "'"); - } - OPENVINO_SUPPRESS_DEPRECATED_END -}; - class membuf : public std::streambuf { public: membuf() : _pos(0) { } diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp index 1dc1fba49d4591..b2fcb8087ad793 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp @@ -4,29 +4,29 @@ #pragma once +#include "openvino/runtime/iasync_infer_request.hpp" +#include "intel_gpu/plugin/sync_infer_request.hpp" #include #include -#include -#include "intel_gpu/plugin/infer_request.hpp" namespace ov { namespace intel_gpu { -class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { +class AsyncInferRequest : public ov::IAsyncInferRequest { public: - using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault; - AsyncInferRequest(const InferRequest::Ptr &inferRequest, - const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); + using Parent = ov::IAsyncInferRequest; + AsyncInferRequest(const std::shared_ptr& infer_request, + const std::shared_ptr& task_executor, + const std::shared_ptr& wait_executor, + const std::shared_ptr& callback_executor); - ~AsyncInferRequest(); + ~AsyncInferRequest() override; - void StartAsync_ThreadUnsafe() override; + void start_async() override; private: - InferRequest::Ptr _inferRequest; - InferenceEngine::ITaskExecutor::Ptr _waitExecutor; + std::shared_ptr m_infer_request; + std::shared_ptr m_wait_executor; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request_legacy.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request_legacy.hpp deleted file mode 100644 index 261efe73748eef..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request_legacy.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include "intel_gpu/plugin/infer_request_legacy.hpp" - -namespace ov { -namespace intel_gpu { - -class AsyncInferRequestLegacy : public InferenceEngine::AsyncInferRequestThreadSafeDefault { -public: - using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault; - AsyncInferRequestLegacy(const InferRequestLegacy::Ptr &inferRequest, - const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); - - ~AsyncInferRequestLegacy(); - - void StartAsync_ThreadUnsafe() override; - -private: - InferRequestLegacy::Ptr _inferRequest; - InferenceEngine::ITaskExecutor::Ptr _waitExecutor; -}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp index 767d60e7df3625..cd6a1a0e44d3b2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp @@ -4,16 +4,26 @@ #pragma once -#include +#include #include "intel_gpu/runtime/layout.hpp" #include "openvino/core/layout.hpp" -#include "openvino/core/deprecated.hpp" - -#include "ngraph/type/element_type.hpp" +#include "openvino/core/type/element_type.hpp" namespace ov { namespace intel_gpu { +enum class TensorType { + BT_EMPTY, + BT_BUF_INTERNAL, + BT_BUF_SHARED, + BT_USM_SHARED, + BT_USM_HOST_INTERNAL, + BT_USM_DEVICE_INTERNAL, + BT_IMG_SHARED, + BT_SURF_SHARED, + BT_DX_BUF_SHARED, +}; + #define TensorValue(val) static_cast(val) inline cldnn::tensor tensor_from_dims(const ov::Shape& dims, int def = 1) { @@ -29,120 +39,24 @@ inline cldnn::tensor tensor_from_dims(const ov::Shape& dims, int def = 1) { } } -OPENVINO_SUPPRESS_DEPRECATED_START -inline cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p) { - switch (p) { - case InferenceEngine::Precision::I16: - case InferenceEngine::Precision::U16: - case InferenceEngine::Precision::FP32: - case InferenceEngine::Precision::FP64: - return cldnn::data_types::f32; - case InferenceEngine::Precision::FP16: - return cldnn::data_types::f16; - case InferenceEngine::Precision::U8: - return cldnn::data_types::u8; - case InferenceEngine::Precision::I8: - return cldnn::data_types::i8; - case InferenceEngine::Precision::I32: - case InferenceEngine::Precision::U32: - case InferenceEngine::Precision::U64: - return cldnn::data_types::i32; - case InferenceEngine::Precision::I64: - return cldnn::data_types::i64; - case InferenceEngine::Precision::BIN: - return cldnn::data_types::bin; - case InferenceEngine::Precision::BOOL: - return cldnn::data_types::i8; - default: - IE_THROW(ParameterMismatch) - << "The plugin does not support " << p.name() << " precision"; - } -} - -inline InferenceEngine::Precision PrecisionFromDataType(cldnn::data_types dt) { - switch (dt) { - case cldnn::data_types::bin: - return InferenceEngine::Precision::ePrecision::BIN; - case cldnn::data_types::u8: - return InferenceEngine::Precision::ePrecision::U8; - case cldnn::data_types::i8: - return InferenceEngine::Precision::ePrecision::I8; - case cldnn::data_types::f16: - return InferenceEngine::Precision::ePrecision::FP16; - case cldnn::data_types::f32: - return InferenceEngine::Precision::ePrecision::FP32; - case cldnn::data_types::i32: - return InferenceEngine::Precision::ePrecision::I32; - case cldnn::data_types::i64: - return InferenceEngine::Precision::ePrecision::I64; - default: - OPENVINO_THROW("The plugin does not support ", cldnn::data_type_traits::name(dt), " data type"); - } -} - -inline cldnn::format FormatFromLayout(InferenceEngine::Layout l) { - switch (l) { - // TODO: change 6d case once new layout added in IE - case InferenceEngine::Layout::BLOCKED: - return cldnn::format::bfwzyx; - case InferenceEngine::Layout::NCDHW: - return cldnn::format::bfzyx; - case InferenceEngine::Layout::NCHW: - case InferenceEngine::Layout::NC: - case InferenceEngine::Layout::CHW: - case InferenceEngine::Layout::C: - case InferenceEngine::Layout::SCALAR: - return cldnn::format::bfyx; - case InferenceEngine::Layout::NHWC: - return cldnn::format::byxf; - default: - IE_THROW(ParameterMismatch) << "The plugin does not support " << l << " layout"; - } -} - -inline cldnn::format FormatFromTensorDesc(InferenceEngine::TensorDesc desc) { - switch (desc.getLayout()) { - case InferenceEngine::Layout::BLOCKED: { - if (desc.getDims().size() == 6) - return cldnn::format::bfwzyx; - else if (desc.getDims().size() == 5) - return cldnn::format::bfzyx; - else if (desc.getDims().size() <= 4) - return cldnn::format::bfyx; - } - case InferenceEngine::Layout::NCDHW: - return cldnn::format::bfzyx; - case InferenceEngine::Layout::NCHW: - case InferenceEngine::Layout::NC: - case InferenceEngine::Layout::CHW: - case InferenceEngine::Layout::C: - case InferenceEngine::Layout::SCALAR: - return cldnn::format::bfyx; - case InferenceEngine::Layout::NHWC: - return cldnn::format::byxf; - default: - IE_THROW(ParameterMismatch) - << "The plugin does not support " << desc.getLayout() << " layout"; - } +inline cldnn::layout make_layout(const ov::element::Type type, const ov::Shape& shape) { + return cldnn::layout{ov::PartialShape{shape}, + cldnn::element_type_to_data_type(type), + cldnn::format::get_default_format(shape.size())}; } -inline cldnn::format ImageFormatFromLayout(InferenceEngine::Layout l) { - switch (l) { - // currently, nv12 is the only supported image layout - case InferenceEngine::Layout::BLOCKED: - case InferenceEngine::Layout::NCDHW: - case InferenceEngine::Layout::NCHW: - case InferenceEngine::Layout::NC: - case InferenceEngine::Layout::CHW: - case InferenceEngine::Layout::C: - case InferenceEngine::Layout::NHWC: - return cldnn::format::nv12; - default: - IE_THROW(ParameterMismatch) - << "The plugin does not support " << l << " image layout"; +inline ov::element::Type convert_to_supported_device_type(ov::element::Type et) { + switch (et) { + case ov::element::f64: + case ov::element::i16: + case ov::element::u16: + return ov::element::f32; + case ov::element::u64: + case ov::element::u32: + return ov::element::i32; + default: return et; } } -OPENVINO_SUPPRESS_DEPRECATED_END /// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES. inline void ForceExit() { @@ -156,4 +70,11 @@ inline void ForceExit() { } } // namespace intel_gpu + +inline std::ostream& operator<<(std::ostream& os, const ov::AnyMap& params) { + for (const auto& p : params) { + os << p.first << " : " << p.second.as() << std::endl; + } + return os; +} } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp index b1422495e8baa0..dc93df5adf52c9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp @@ -4,51 +4,70 @@ #pragma once +#include "openvino/runtime/icompiled_model.hpp" +#include "intel_gpu/plugin/graph.hpp" +#include "intel_gpu/plugin/plugin.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/runtime/execution_config.hpp" + #include #include #include #include #include #include -#include "ie_blob.h" -#include "cpp/ie_cnn_network.h" -#include -#include "intel_gpu/plugin/graph.hpp" -#include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/runtime/execution_config.hpp" namespace ov { namespace intel_gpu { -class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault { +class CompiledModel : public ov::ICompiledModel { public: - typedef std::shared_ptr Ptr; - - CompiledModel(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config, - InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); - CompiledModel(cldnn::BinaryInputBuffer& ib, InferenceEngine::RemoteContext::Ptr context, const ExecutionConfig& config, - InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); - - void Export(std::ostream& networkModel) override; - std::shared_ptr GetExecGraphInfo() override; - InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override; - InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs) override; - InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(const std::vector>& inputs, - const std::vector>& outputs) override; - template - InferenceEngine::IInferRequestInternal::Ptr GetInferRequestImpl(const std::vector>& inputs, - const std::vector>& outputs); - InferenceEngine::Parameter GetMetric(const std::string &name) const override; - InferenceEngine::Parameter GetConfig(const std::string &name) const override; - std::shared_ptr GetContext() const override; + using Ptr = std::shared_ptr; - std::vector> m_graphs; - InferenceEngine::RemoteContext::Ptr m_context; + CompiledModel(std::shared_ptr model, + const std::shared_ptr& plugin, + RemoteContextImpl::Ptr context, + const ExecutionConfig& config); + CompiledModel(cldnn::BinaryInputBuffer ib, + const std::shared_ptr& plugin, + RemoteContextImpl::Ptr context, + const ExecutionConfig& config); + + std::shared_ptr create_infer_request() const override; + std::shared_ptr create_sync_infer_request() const override; + + void export_model(std::ostream& model) const override; + + std::shared_ptr get_runtime_model() const override; + + ov::Any get_property(const std::string& name) const override; + + void set_property(const ov::AnyMap& properties) override { + OPENVINO_ASSERT_HELPER(::ov::NotImplemented, + "", + false, + "Not Implemented: ", + "CompiledModel::set_property is not supported by this plugin!"); + }; + + const std::vector>& outputs() const override { return m_outputs; } + const std::vector>& inputs() const override { return m_inputs;} + + bool is_new_api() const { return std::static_pointer_cast(get_plugin())->is_new_api(); } + RemoteContextImpl::Ptr get_context_impl() const {return m_context; } + const std::vector>& get_graphs() const; + std::shared_ptr get_graph(size_t n) const; + +private: + RemoteContextImpl::Ptr m_context; ExecutionConfig m_config; - InferenceEngine::ITaskExecutor::Ptr m_taskExecutor; - InferenceEngine::ITaskExecutor::Ptr m_waitExecutor; - InferenceEngine::CNNNetwork m_network; + std::shared_ptr m_wait_executor; + std::shared_ptr m_model; + std::string m_model_name; + std::vector> m_inputs; + std::vector> m_outputs; + std::vector> m_graphs; + bool m_loaded_from_cache; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp index 5d056f480063e0..28bea9b80ff331 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp @@ -8,6 +8,12 @@ # define NOMINMAX #endif +#include "intel_gpu/graph/network.hpp" +#include "intel_gpu/graph/topology.hpp" +#include "intel_gpu/plugin/custom_layer.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/program_builder.hpp" + #include #include #include @@ -15,74 +21,48 @@ #include #include #include -#include "ie_blob.h" -#include "cpp/ie_cnn_network.h" - -#include "intel_gpu/graph/network.hpp" -#include "intel_gpu/graph/topology.hpp" - -#include -#include "intel_gpu/plugin/custom_layer.hpp" -#include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/plugin/remote_blob.hpp" -#include "intel_gpu/plugin/program_builder.hpp" +#include namespace ov { namespace intel_gpu { -class Graph { +class Graph final { public: + using Ptr = std::shared_ptr; enum class Stage : uint32_t { PREPROC = 1, EXECUTE = 2, POSTPROC = 4 }; - typedef std::shared_ptr Ptr; - using variable_states_map = std::map>; - - Graph(InferenceEngine::CNNNetwork& network, - const RemoteContextImpl::Ptr& context, - const ExecutionConfig& config, - uint16_t stream_id = 0, - InferenceEngine::InputsDataMap* inputs = nullptr, - InferenceEngine::OutputsDataMap* outputs = nullptr); - Graph(cldnn::BinaryInputBuffer& ib, - const RemoteContextImpl::Ptr& context, - const ExecutionConfig& config, - uint16_t stream_id = 0, - InferenceEngine::InputsDataMap* inputs = nullptr, - InferenceEngine::OutputsDataMap* outputs = nullptr); - explicit Graph(std::shared_ptr graph, uint16_t stream_id = 0); - void Export(cldnn::BinaryOutputBuffer &ob); - std::shared_ptr GetExecGraphInfo(); - - bool IsLoaded() const; - - std::map GetPerformanceCounts() const; - void UpdatePerfStatistics(); + + Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id = 0); + Graph(cldnn::BinaryInputBuffer& ib, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id = 0); + Graph(std::shared_ptr graph, uint16_t stream_id = 0); + + void export_model(cldnn::BinaryOutputBuffer &ob); + std::shared_ptr get_runtime_model(); + + bool is_loaded() const; + + std::vector get_profiling_info() const; + void update_profiling_info(); cldnn::engine& get_engine() const { return m_context->get_engine(); } const ExecutionConfig& get_config() const { return m_config; } - size_t GetMaxDynamicBatchSize() const { return m_config.get_property(ov::intel_gpu::max_dynamic_batch);} - const std::map& GetInputLayouts() const { return m_program->GetInputLayouts(); } - const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); } - const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); } - variable_states_map AllocateVariablesMemories(); - std::map> GetInputDynBatchDims() { return m_program->m_input_batch_dim; } - std::map GetOutputDynBatchDims() { return m_program->m_output_batch_dim; } - size_t GetNetworksCount() const { return m_networks.size(); } - std::shared_ptr GetNetwork(size_t idx = 0) const; - InferenceEngine::SizeVector GetOutputSize(std::string outName) const; - std::string MapOutputName(std::string outName) const; - std::string getName() const { return m_networkName; } + const std::map& get_input_layouts() const { return m_input_layouts; } + std::shared_ptr get_network() const; + + std::string out_name_to_internal(std::string out_port_name) const; + void wait(Stage stage_mask) { std::unique_lock lock(m_infer_mutex); - m_cv.wait(lock, [&] { + m_cv.wait(lock, [&stage_mask, this] { return (m_state & (uint32_t)stage_mask) == 0; }); m_state |= (uint32_t)stage_mask; } + void notify(Stage stage_mask) { { std::lock_guard lock(m_infer_mutex); @@ -94,30 +74,25 @@ class Graph { bool use_external_queue() const; -protected: +private: RemoteContextImpl::Ptr m_context; - std::shared_ptr m_program; - std::string m_networkName; ExecutionConfig m_config; uint16_t m_stream_id; - uint32_t m_state; + uint32_t m_state = 0; std::condition_variable m_cv; std::mutex m_infer_mutex; - std::vector> m_networks; + std::shared_ptr m_network; std::map primitiveIDs; std::map> prevPrimitiveIDs; std::map> perfMap; std::vector profilingIDs; - std::map outputDims; + std::map m_input_layouts; - std::shared_ptr BuildNetwork(std::shared_ptr program); - void Build(); - void UpdateLayersMaps(); - std::shared_ptr GetExecGraphInfoByPrimitivesInfo(std::vector& pi, - bool filter_const_primitives = true); + void build(std::shared_ptr program); + std::shared_ptr get_runtime_model(std::vector& pi, bool filter_const_primitives = true); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp deleted file mode 100644 index 58ccb21444e8a8..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include -#include "intel_gpu/plugin/graph.hpp" -#include - -namespace ov { -namespace intel_gpu { - -class CompiledModel; - -class InferRequest : public InferenceEngine::IInferRequestInternal { -public: - using Ptr = std::shared_ptr; - // make sure all blobs and cldnn::memory objects - // are in place and valid - void checkBlobs() override; - void InferImpl() override; - - std::map GetPerformanceCounts() const override; - - InferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs, - const std::shared_ptr& execNetwork); - InferRequest(const std::vector>& inputs, - const std::vector>& outputs, - const std::shared_ptr& execNetwork); - - InferRequest(const InferRequest &) = delete; - - virtual ~InferRequest() = default; - - InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; - void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; - void SetBlobs(const std::string& name, const std::vector &data) override; - - std::vector> QueryState() override; - void SetGraph(std::shared_ptr graph); - void EnableProfiling() { m_useProfiling = true; } - void EnableStreams() { m_useStreams = true; } - - void setup_stream_graph(); - void enqueue_notify(); - void wait_notify(); - - void enqueue(); - void wait(); - - bool use_external_queue() const { return m_useExternalQueue; } - void enable_external_queue() { m_useExternalQueue = true; } - -private: - // This blob is used for outputs processing if output data type convertion or padding handling is needed - InferenceEngine::Blob::Ptr intermediate_output_blob = nullptr; - InferenceEngine::BlobMap users_blobs_matching; - InferenceEngine::BlobMap _deviceOutputs; - std::map inputsMap; - std::map outputsMap; - - std::map> inputTensorsMap; - - bool m_useProfiling = false; - bool m_useStreams = false; - bool m_useExternalQueue = false; - std::shared_ptr m_graph; - InferenceEngine::gpu::ClContext::Ptr m_context = nullptr; - - InferenceEngine::IStreamsExecutor* streamExecutor = nullptr; - - void prepare_input(const cldnn::primitive_id &inputName, InferenceEngine::Blob::Ptr &inputBlob, - std::vector& dependencies); - void prepare_output(const cldnn::primitive_id& outputName, InferenceEngine::Blob::Ptr& outputBlob, - std::vector& dependencies); - void allocate_dev_mem_if_needed(InferenceEngine::BlobMap& device_mems, InferenceEngine::Blob::Ptr& user_blob, - const cldnn::primitive_id& blob_name, const cldnn::layout& layout, - const bool need_lockable_mem = false); - - InferenceEngine::Blob::Ptr create_host_blob(const InferenceEngine::TensorDesc& desc, bool is_dynamic); - InferenceEngine::Blob::Ptr create_device_blob(const InferenceEngine::TensorDesc& desc); - - void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, std::vector& events); - - template::value || - std::is_same::value>::type> - InferenceEngine::Blob::Ptr create_remote_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, - const BlobType mem_type, void* mem_ptr = nullptr); - InferenceEngine::Blob::Ptr create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem); - void allocate_inputs(); - void allocate_outputs(); - - void set_input(const std::string& name, const InferenceEngine::Blob::Ptr& data); - void set_output(const std::string& name, const InferenceEngine::Blob::Ptr& data); - InferenceEngine::Blob::Ptr reinterpret_device_blob(InferenceEngine::Blob::Ptr data, const InferenceEngine::TensorDesc& new_desc); - - std::map internal_outputs; -}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp deleted file mode 100644 index 8be7708db94b0b..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request_legacy.hpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include -#include "intel_gpu/plugin/graph.hpp" -#include - -namespace ov { -namespace intel_gpu { - -struct buf_info { - size_t buf_offset; - size_t buf_size; -}; - -class CompiledModel; - -class InferRequestLegacy : public InferenceEngine::IInferRequestInternal { -public: - using Ptr = std::shared_ptr; - // make sure all blobs and cldnn::memory objects - // are in place and valid - void checkBlobs() override; - void InferImpl() override; - - std::map GetPerformanceCounts() const override; - - InferRequestLegacy(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs, - const std::shared_ptr& execNetwork); - InferRequestLegacy(const std::vector>& inputs, - const std::vector>& outputs, - const std::shared_ptr& execNetwork); - - InferRequestLegacy(const InferRequestLegacy &) = delete; - - virtual ~InferRequestLegacy() = default; - - InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; - void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; - void SetBlobs(const std::string& name, const std::vector &data) override; - - void SetBatch(int batch = -1); - std::vector> QueryState() override; - void SetGraph(std::shared_ptr graph); - void EnableProfiling() { m_useProfiling = true; } - void EnableStreams() { m_useStreams = true; } - - void setup_stream_graph(); - void preprocess_notify(); - void enqueue_notify(); - void wait_notify(); - - void preprocess(); - void enqueue(); - void wait(); - - void preprocess_dynamic(); - void enqueue_dynamic(); - void wait_dynamic(); - - bool use_external_queue() const { return m_useExternalQueue; } - void enable_external_queue() { m_useExternalQueue = true; } - -private: - InferenceEngine::BlobMap _deviceOutputs; - std::map inputsMap; - std::map outputsMap; - - std::map> inputTensorsMap; - - bool m_useProfiling = false; - bool m_useStreams = false; - bool m_useExternalQueue = false; - std::shared_ptr m_graph; - InferenceEngine::gpu::ClContext::Ptr m_context = nullptr; - - // dynamic batch stuff - std::map> batchInputs; - std::map> batchOutputs; - InferenceEngine::IStreamsExecutor* streamExecutor = nullptr; - - void prepare_input(const cldnn::primitive_id &inputName, InferenceEngine::Blob::Ptr &inputBlob, - std::vector& dependencies); - void prepare_output(const cldnn::primitive_id& outputName, InferenceEngine::Blob::Ptr& outputBlob); - - InferenceEngine::Blob::Ptr create_host_blob(const InferenceEngine::TensorDesc& desc, - std::shared_ptr alloc = nullptr); - InferenceEngine::Blob::Ptr create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout); - - void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr); - void copy_input_data(std::shared_ptr network, const cldnn::primitive_id &inputName, - const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob, - buf_info* bi = nullptr); - - InferenceEngine::Blob::Ptr create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem); - void allocate_inputs(); - void allocate_outputs(); - void allocate_inputs_dynamic(); - void allocate_outputs_dynamic(); - - std::map internal_outputs; - std::vector> internal_outputs_dynamic; - Graph::variable_states_map variables_states_; - int m_curBatch = -1; -}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp index b6752b56a433f8..546dffd866d1e5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp @@ -4,7 +4,6 @@ #include "intel_gpu/runtime/execution_config.hpp" #include "ie_metric_helpers.hpp" -#include #include "ie_plugin_config.hpp" #include "gpu/gpu_config.hpp" #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_remote_blob.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_remote_blob.hpp new file mode 100644 index 00000000000000..574394cb6c2c1d --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_remote_blob.hpp @@ -0,0 +1,145 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_gpu/plugin/remote_tensor.hpp" +#include + +#ifndef NOMINMAX +# define NOMINMAX +#endif + +#ifdef _WIN32 +# include +#else +# include +#endif + +#include +#include +#include + +namespace ov { +namespace intel_gpu { + +class RemoteAllocator : public InferenceEngine::IAllocator { +protected: + friend class RemoteTensorImpl; + RemoteTensorImpl* m_tensor = nullptr; + + mutable std::mutex locked_mutex; + mutable size_t locked_counter = 0; + mutable std::unique_ptr> locked_holder = nullptr; + mutable void* _handle = nullptr; + +public: + using Ptr = std::shared_ptr; + + RemoteAllocator(RemoteTensorImpl* tensor) : m_tensor(tensor) { } + /** + * @brief Maps handle to heap memory accessible by any memory manipulation routines. + * @return Generic pointer to memory + */ + void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { + try { + std::lock_guard locker(locked_mutex); + if (locked_counter == 0) { + auto mem = m_tensor->get_original_memory(); + auto& stream = mem->get_engine()->get_service_stream(); + locked_holder = std::unique_ptr>(new cldnn::mem_lock(mem, stream)); + _handle = reinterpret_cast(locked_holder->data()); + } + locked_counter++; + + return _handle; + } catch (std::exception&) { + return nullptr; + } + }; + /** + * @brief Unmaps memory by handle with multiple sequential mappings of the same handle. + * The multiple sequential mappings of the same handle are suppose to get the same + * result while there isn't a ref counter supported. + */ + void unlock(void* handle) noexcept override { + std::lock_guard locker(locked_mutex); + locked_counter--; + if (locked_counter == 0) + locked_holder.reset(); + } + /** + * @brief Allocates memory + * @param size The size in bytes to allocate + * @return Handle to the allocated resource + */ + void* alloc(size_t size) noexcept override { return nullptr; } + /** + * @brief Releases handle and all associated memory resources which invalidates the handle. + * @return false if handle cannot be released, otherwise - true. + */ + bool free(void* handle) noexcept override { return true; } +}; + +template +class TypedRemoteBlob : public TpublicAPI, public ov::legacy_convert::TensorHolder { +public: + using Ptr = std::shared_ptr; + + explicit TypedRemoteBlob(std::shared_ptr impl, InferenceEngine::gpu::ClContext::Ptr context) + : TpublicAPI(InferenceEngine::TensorDesc(InferenceEngine::details::convertPrecision(impl->get_element_type()), + impl->get_shape(), + InferenceEngine::TensorDesc::getLayoutByDims(impl->get_shape()))) + , ov::legacy_convert::TensorHolder({impl, nullptr}) + , m_impl(impl) + , m_context(context) + , m_allocator(std::make_shared(impl.get())) { } + + void allocate() noexcept override { } + bool deallocate() noexcept override { return true; } + InferenceEngine::ParamMap getParams() const override { return m_impl->get_properties(); } + + std::string getDeviceName() const noexcept override { return m_impl->get_device_name(); } + + std::shared_ptr getContext() const noexcept override { return m_context; } + InferenceEngine::LockedMemory buffer() noexcept override { + return InferenceEngine::LockedMemory(m_allocator.get(), m_impl.get(), 0); + } + InferenceEngine::LockedMemory cbuffer() const noexcept override { + return InferenceEngine::LockedMemory(m_allocator.get(), m_impl.get(), 0); + } + InferenceEngine::LockedMemory rwmap() noexcept override { + return InferenceEngine::LockedMemory(m_allocator.get(), m_impl.get(), 0); + } + InferenceEngine::LockedMemory rmap() const noexcept override { + return InferenceEngine::LockedMemory(m_allocator.get(), m_impl.get(), 0); + } + InferenceEngine::LockedMemory wmap() noexcept override { + return InferenceEngine::LockedMemory(m_allocator.get(), m_impl.get(), 0); + } + void setShape(const InferenceEngine::SizeVector& dims) override { m_impl->set_shape(dims); } + + +private: + const std::shared_ptr &getAllocator() const noexcept override { return m_allocator; } + void *getHandle() const noexcept override { return m_handle; } + + std::shared_ptr m_impl; + std::shared_ptr m_context; + void* m_handle = nullptr; + std::shared_ptr m_allocator; +}; + +using RemoteCLbuffer = TypedRemoteBlob; +using RemoteUSMbuffer = TypedRemoteBlob; +using RemoteCLImage2D = TypedRemoteBlob; +#ifdef _WIN32 +using RemoteD3DBuffer = TypedRemoteBlob; +using RemoteD3DSurface = TypedRemoteBlob; +#else +using RemoteVASurface = TypedRemoteBlob; +#endif + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_remote_context.hpp new file mode 100644 index 00000000000000..30a2b4948b1744 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_remote_context.hpp @@ -0,0 +1,152 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/runtime/lru_cache.hpp" +#include "intel_gpu/plugin/common_utils.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/legacy_remote_blob.hpp" + +#include +#include +#include +#include + +#ifndef NOMINMAX +# define NOMINMAX +#endif + +#ifdef _WIN32 +# include +#else +# include +#endif + +#include +#include +#include +#include + +namespace ov { +namespace intel_gpu { + + +// Template class below is needed to allow proper cast of user contexts +// We have the following public classes hierarchy: +// RemoteContext +// | +// ClContext +// | | +// VAContext D3DContext +// So our implementation must allow casting of context object to proper type user type (ClContext, VAContext or D3DContext) +// Thus we introduce this template which have 3 instances with different base classes: +// RemoteContext +// | +// ---------- ClContext ----------- +// | | | +// VAContext | D3DContext +// | | | +// RemoteVAContext RemoteCLContext RemoteD3DContext +// +// All these context types are just thin wrappers that calls common context internal impl (RemoteContextImpl) +template +class TypedRemoteContext : public PublicContextType { +public: + using Ptr = std::shared_ptr; + + explicit TypedRemoteContext(std::shared_ptr impl) : m_impl(impl) {} + TypedRemoteContext(std::string device_name, std::vector devices) + : m_impl(std::make_shared(device_name, devices)) {} + TypedRemoteContext(const std::map& known_contexts, const InferenceEngine::ParamMap& params) + : m_impl(std::make_shared(known_contexts, params)) {} + + InferenceEngine::ParamMap getParams() const override { return m_impl->get_property(); } + std::string getDeviceName() const noexcept override { return m_impl->get_device_name(); } + InferenceEngine::MemoryBlob::Ptr CreateHostBlob(const InferenceEngine::TensorDesc& desc) override { + auto new_tensor = m_impl->create_host_tensor(InferenceEngine::details::convertPrecision(desc.getPrecision()), ov::Shape(desc.getDims())); + return std::dynamic_pointer_cast(make_blob_with_precision(desc, new_tensor->data())); + } + InferenceEngine::RemoteBlob::Ptr CreateBlob(const InferenceEngine::TensorDesc& desc, const InferenceEngine::ParamMap& params = {}) override { + auto new_tensor = m_impl->create_tensor(InferenceEngine::details::convertPrecision(desc.getPrecision()), ov::Shape(desc.getDims()), params); + auto tensor_impl = std::dynamic_pointer_cast(new_tensor._ptr); + OPENVINO_ASSERT(tensor_impl, "[GPU] Unexpected tensor impl type"); + auto mem_type = tensor_impl->get_properties().at(ov::intel_gpu::shared_mem_type.name()).as(); + if (mem_type == ov::intel_gpu::SharedMemType::OCL_BUFFER) { + return std::make_shared(tensor_impl, std::dynamic_pointer_cast(this->shared_from_this())); + } else if (mem_type == ov::intel_gpu::SharedMemType::USM_DEVICE_BUFFER || + mem_type == ov::intel_gpu::SharedMemType::USM_HOST_BUFFER || + mem_type == ov::intel_gpu::SharedMemType::USM_USER_BUFFER) { + return std::make_shared(tensor_impl, std::dynamic_pointer_cast(this->shared_from_this())); + } else if (mem_type == ov::intel_gpu::SharedMemType::OCL_IMAGE2D) { + return std::make_shared(tensor_impl, std::dynamic_pointer_cast(this->shared_from_this())); +#ifdef _WIN32 + } else if (mem_type == ov::intel_gpu::SharedMemType::DX_BUFFER) { + return std::make_shared(tensor_impl, std::dynamic_pointer_cast(this->shared_from_this())); + } else if (mem_type == ov::intel_gpu::SharedMemType::VA_SURFACE) { + return std::make_shared(tensor_impl, std::dynamic_pointer_cast(this->shared_from_this())); +#else + } else if (mem_type == ov::intel_gpu::SharedMemType::VA_SURFACE) { + return std::make_shared(tensor_impl, std::dynamic_pointer_cast(this->shared_from_this())); +#endif + } + OPENVINO_THROW("[GPU] CreateBlob error: Unsupported memory type: ", mem_type); + } + + RemoteContextImpl::Ptr get_impl() { return m_impl; } + +private: + std::shared_ptr m_impl; +}; + +using RemoteCLContext = TypedRemoteContext; +#ifdef _WIN32 +using RemoteD3DContext = TypedRemoteContext; +#else +using RemoteVAContext = TypedRemoteContext; +#endif + +inline ov::SoPtr wrap_if_old_api(std::shared_ptr new_impl, bool is_new_api) { + if (is_new_api) { + return new_impl; + } else { + auto remote_properties = new_impl->get_property(); + auto context_type = remote_properties.at(ov::intel_gpu::context_type.name()).as(); + if (context_type == ov::intel_gpu::ContextType::OCL) { + return ov::legacy_convert::convert_remote_context(std::make_shared(new_impl)); + } else if (context_type == ov::intel_gpu::ContextType::VA_SHARED) { + #ifdef _WIN32 + return ov::legacy_convert::convert_remote_context(std::make_shared(new_impl)); + #else + return ov::legacy_convert::convert_remote_context(std::make_shared(new_impl)); + #endif + } + } + OPENVINO_THROW("[GPU] Unexpected context parameters"); +} + + +inline RemoteContextImpl::Ptr get_context_impl(ov::SoPtr ptr) { + if (auto wrapper = std::dynamic_pointer_cast(ptr._ptr)) { + auto legacy_context = wrapper->get_context(); + if (auto legacy_context_impl = std::dynamic_pointer_cast(legacy_context)) { + return legacy_context_impl->get_impl(); +#ifdef _WIN32 + } else if (auto legacy_context_impl = std::dynamic_pointer_cast(legacy_context)) { + return legacy_context_impl->get_impl(); +#else + } else if (auto legacy_context_impl = std::dynamic_pointer_cast(legacy_context)) { + return legacy_context_impl->get_impl(); +#endif + } + } + auto casted = std::dynamic_pointer_cast(ptr._ptr); + OPENVINO_ASSERT(casted, "[GPU] Invalid remote context type. Can't cast to ov::intel_gpu::RemoteContext type"); + return casted; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 774ed4fba23941..d92cd695f4b569 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -4,70 +4,64 @@ #pragma once +#include "openvino/runtime/iplugin.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/runtime/engine.hpp" #include #include #include -#include "intel_gpu/runtime/engine.hpp" -#include -#include -#include "intel_gpu/plugin/remote_context.hpp" namespace ov { namespace intel_gpu { -class Plugin : public InferenceEngine::IInferencePlugin { - struct impl; - std::shared_ptr _impl; - - std::string default_device_id = "0"; - // key: device_id, value: cldnn device - std::map device_map; +class Plugin : public ov::IPlugin { +private: + std::string m_default_device_id = "0"; + std::map m_device_map; std::map m_configs_map; - mutable std::map m_default_contexts; + mutable std::map> m_default_contexts; mutable std::once_flag m_default_contexts_once; - std::map get_default_contexts() const; + std::map> get_default_contexts() const; - InferenceEngine::CNNNetwork clone_and_transform_model(const InferenceEngine::CNNNetwork& network, - const ExecutionConfig& config) const; + std::shared_ptr clone_and_transform_model(const std::shared_ptr& network, const ExecutionConfig& config) const; void transform_model(std::shared_ptr& model, const ExecutionConfig& config) const; - void register_primitives(); - std::string get_device_id_from_config(const std::map& config) const; - std::string get_device_id(const std::map& config) const; - RemoteCLContext::Ptr get_default_context(const std::string& device_id) const; + void register_primitives() const; + std::string get_device_id_from_config(const ov::AnyMap& config) const; + std::string get_device_id(const ov::AnyMap& config) const; + std::shared_ptr get_default_context(const std::string& device_id) const; + std::vector get_caching_properties() const; std::vector get_supported_properties() const; std::vector get_supported_internal_properties() const; std::vector get_device_capabilities(const cldnn::device_info& info) const; - uint32_t get_optimal_batch_size(const std::map& options) const; - uint32_t get_max_batch_size(const std::map& options) const; + uint32_t get_optimal_batch_size(const ov::AnyMap& options) const; + uint32_t get_max_batch_size(const ov::AnyMap& options) const; - ov::AnyMap preprocess_config(const std::map& orig_config) const; + ov::AnyMap preprocess_config(const ov::AnyMap& orig_config) const; + bool is_metric(const std::string& name) const; + ov::Any get_metric(const std::string& name, const ov::AnyMap& arguments) const; public: Plugin(); - InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, - const std::map &config) override; - - InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, - const std::shared_ptr &context, - const std::map &config) override; - - void SetConfig(const std::map &config) override; - InferenceEngine::Parameter GetConfig(const std::string& name, const std::map& options) const override; - InferenceEngine::Parameter GetMetric(const std::string& name, const std::map& options) const override; - InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, - const std::map& config) const override; - InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& networkModel, - const std::map& config) override; - InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& networkModel, - const std::shared_ptr& context, - const std::map& config) override; - - std::shared_ptr CreateContext(const InferenceEngine::ParamMap& params) override; - std::shared_ptr GetDefaultContext(const InferenceEngine::ParamMap& params) override; + std::shared_ptr compile_model(const std::shared_ptr& model, + const ov::AnyMap& properties) const override; + std::shared_ptr compile_model(const std::shared_ptr& model, + const ov::AnyMap& properties, + const ov::SoPtr& context) const override; + + void set_property(const ov::AnyMap& properties) override; + ov::Any get_property(const std::string& name, const ov::AnyMap& arguments) const override; + std::shared_ptr import_model(std::istream& model, const ov::AnyMap& properties) const override; + std::shared_ptr import_model(std::istream& model, + const ov::SoPtr& context, + const ov::AnyMap& properties) const override; + ov::SupportedOpsMap query_model(const std::shared_ptr& model, + const ov::AnyMap& properties) const override; + ov::SoPtr create_context(const ov::AnyMap& remote_properties) const override; + ov::SoPtr get_default_context(const ov::AnyMap& remote_properties) const override; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp index 6d3b40c6f8022c..5bd2d3b4dfcfaf 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program_builder.hpp @@ -13,8 +13,6 @@ #include "intel_gpu/graph/topology.hpp" #include "intel_gpu/graph/program.hpp" -#include - #include #include #include @@ -59,38 +57,28 @@ std::string layer_type_lower(const std::shared_ptr& op); std::string layer_type_name_ID(const std::shared_ptr& op); struct PerfCounter { - InferenceEngine::InferenceEngineProfileInfo::LayerStatus status; - bool isCPU; - uint64_t realTime_uSec; - uint64_t cpu_uSec; - uint32_t num; - std::string layerType; - std::string parentPrimitive; + ov::ProfilingInfo::Status status = ov::ProfilingInfo::Status::NOT_RUN; + bool isCPU = false; + uint64_t realTime_uSec = 0; + uint64_t cpu_uSec = 0; + uint32_t num = 0; + std::string layerType = ""; + std::string parentPrimitive = ""; -public: - PerfCounter() - : status(InferenceEngine::InferenceEngineProfileInfo::NOT_RUN) - , isCPU(false) - , realTime_uSec(0) - , cpu_uSec(0) - , num(0) {} + PerfCounter() = default; long long realTime_avg() const { return (num == 0) ? 0 : realTime_uSec / num; } long long cpu_avg() const { return (num == 0) ? 0 : cpu_uSec / num; } }; -class ProgramBuilder { +class ProgramBuilder final { public: - ProgramBuilder(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config, + ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, bool createTopologyOnly = false, bool partialBuild = false, - InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr, std::shared_ptr task_executor = nullptr, bool innerProgram = false); - ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config, - InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr); + ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config); static const cldnn::primitive_id m_preProcessTag; - static const cldnn::primitive_id m_meanValuesTag; - static const cldnn::primitive_id m_workaroundTag; static const cldnn::primitive_id m_preCustomLayerTag; static const cldnn::primitive_id m_postCustomLayerTag; @@ -100,38 +88,27 @@ class ProgramBuilder { std::vector profiling_ids; - std::map outputDims; std::map inputLayouts; using BlobCacheKey = std::pair>; std::map blobMemCache; - CustomLayerMap m_custom_layers; - int m_max_batch; - int m_curBatch; - std::map> m_input_batch_dim; - std::map m_output_batch_dim; + std::shared_ptr get_compiled_program() const; + std::shared_ptr get_topology() const { return m_topology; } - std::shared_ptr GetCompiledProgram(int program_id = 0); - const std::map& GetInputLayouts() const { return inputLayouts; } - InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; } - InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; } + const std::map& get_input_layouts() const { return inputLayouts; } cldnn::engine& get_engine() const { return m_engine; } const ExecutionConfig& get_config() const { return m_config; } - int GetMaxBatchSizeForSingleProgram(); - bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr& op); - bool IsDynBatchModel(const std::shared_ptr& model, - std::map& shapes, - std::map>& batch_dim); + bool is_op_supported(const std::shared_ptr& op); // Profiling utils void init_profile_info(const cldnn::primitive& prim); // Graph construction helpers - std::vector GetInputInfo(const std::shared_ptr& op) const; + std::vector GetInputInfo(const std::shared_ptr& op) const; - using factory_t = std::function&)>; - using factories_map_t = std::map; + using factory_t = std::function&)>; + using factories_map_t = std::map; template static void RegisterFactory(factory_t func) { @@ -142,13 +119,12 @@ class ProgramBuilder { } template::value>::type> - void add_primitive(const ngraph::Node& op, PType prim, std::vector aliases = {}) { + void add_primitive(const ov::Node& op, PType prim, std::vector aliases = {}) { add_primitive(op, std::static_pointer_cast(std::make_shared(prim)), std::move(aliases)); } - void add_primitive(const ngraph::Node& op, std::shared_ptr prim, std::vector aliases = {}); + void add_primitive(const ov::Node& op, std::shared_ptr prim, std::vector aliases = {}); - std::shared_ptr GetTopology() const { return m_topology; } using variables_state_info_map = std::map>; @@ -157,21 +133,20 @@ class ProgramBuilder { const variables_state_info_map& GetVariablesStatesInfo() const { return m_variablesStateInfo; } bool use_new_shape_infer() const { return allow_new_shape_infer; } - bool requires_new_shape_infer(const ngraph::Node& op) const; + bool requires_new_shape_infer(const ov::Node& op) const; - std::shared_ptr get_task_executor() { return m_task_executor; } + std::shared_ptr get_task_executor() const { return m_task_executor; } private: static factories_map_t factories_map; - std::vector> m_programs; + std::shared_ptr m_program; ExecutionConfig m_config; cldnn::engine& m_engine; static std::mutex m_mutex; std::shared_ptr m_topology; - InferenceEngine::InputsDataMap m_networkInputs; - InferenceEngine::OutputsDataMap m_networkOutputs; variables_state_info_map m_variablesStateInfo; + CustomLayerMap m_custom_layers; bool allow_new_shape_infer = false; @@ -182,31 +157,28 @@ class ProgramBuilder { void EnableQueryMode() { queryMode = true; } void DisableQueryMode() { queryMode = false; } - void PrepareBuild(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs); - void CleanupBuild(); + void prepare_build(); + void cleanup_build(); // TODO(eunsoo): remove createTopolpgyOnly argument and add another method to create topology from ngraph function - std::shared_ptr BuildProgram(const std::vector>& ops, - InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs, - bool createTopologyOnly = false, bool partialBuild = false, bool innerProgram = false); + std::shared_ptr build(const std::vector>& ops, + bool createTopologyOnly = false, bool partialBuild = false, bool innerProgram = false); - void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr& op); - void ChangeInputBatch(int batch); + void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr& op); }; -void CreateCustomOp(ProgramBuilder& p, const std::shared_ptr& node, CustomLayerPtr customLayer); -void CreateUnaryEltwiseOp(ProgramBuilder& p, const std::shared_ptr& node, +void CreateCustomOp(ProgramBuilder& p, const std::shared_ptr& node, CustomLayerPtr customLayer); +void CreateUnaryEltwiseOp(ProgramBuilder& p, const std::shared_ptr& node, cldnn::activation_func func, cldnn::activation_additional_params params); void CreateElementwiseOp(ProgramBuilder& p, - const std::shared_ptr& node, + const std::shared_ptr& node, cldnn::eltwise_mode mode, std::vector coefficients = {}, bool pythondiv = true); -bool IsNodeOnConstPath(const std::shared_ptr& node); +bool IsNodeOnConstPath(const std::shared_ptr& node); -void validate_inputs_count(const std::shared_ptr& op, std::vector possible_inputs_count); +void validate_inputs_count(const std::shared_ptr& op, std::vector possible_inputs_count); inline bool ends_with(const std::string& value, const std::string& suffix) { if (suffix.size() > value.size()) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp index ffbbf1a0f1fc1a..877c2c707f1791 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp @@ -4,95 +4,39 @@ #pragma once -#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/runtime/so_ptr.hpp" -#include -#include #include -#include namespace ov { namespace intel_gpu { -class RemoteBlobImpl; +class RemoteTensorImpl; +class RemoteContextImpl; -class RemoteAllocator : public InferenceEngine::IAllocator { -protected: - friend class RemoteBlobImpl; - std::atomic_flag _lock; - std::map m_lockedBlobs; - - void regLockedBlob(void* handle, const RemoteBlobImpl* blob); - -public: - using Ptr = std::shared_ptr; - - RemoteAllocator() { _lock.clear(std::memory_order_relaxed); } - /** - * @brief Maps handle to heap memory accessible by any memory manipulation routines. - * @return Generic pointer to memory - */ - void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { return handle; }; - /** - * @brief Unmaps memory by handle with multiple sequential mappings of the same handle. - * The multiple sequential mappings of the same handle are suppose to get the same - * result while there isn't a ref counter supported. - */ - void unlock(void* handle) noexcept override; - /** - * @brief Allocates memory - * @param size The size in bytes to allocate - * @return Handle to the allocated resource - */ - void* alloc(size_t size) noexcept override { return nullptr; } - /** - * @brief Releases handle and all associated memory resources which invalidates the handle. - * @return false if handle cannot be released, otherwise - true. - */ - bool free(void* handle) noexcept override { return true; } - - void lock() { - while (_lock.test_and_set(std::memory_order_acquire)) {} - } - - void unlock() { - _lock.clear(std::memory_order_release); - } -}; - -class USMHostAllocator : public InferenceEngine::IAllocator { -protected: - InferenceEngine::gpu::USMBlob::Ptr _usm_host_blob = nullptr; - InferenceEngine::gpu::ClContext::Ptr _context = nullptr; +class USMHostAllocator final { +private: + ov::SoPtr _usm_host_tensor = { nullptr, nullptr }; + std::shared_ptr _context = nullptr; public: using Ptr = std::shared_ptr; - USMHostAllocator(InferenceEngine::gpu::ClContext::Ptr context) : _context(context) { } - /** - * @brief Maps handle to heap memory accessible by any memory manipulation routines. - * @return Generic pointer to memory - */ - void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override; - - /** - * @brief Unmaps memory by handle with multiple sequential mappings of the same handle. - * The multiple sequential mappings of the same handle are suppose to get the same - * result while there isn't a ref counter supported. - */ - void unlock(void* handle) noexcept override; + explicit USMHostAllocator(std::shared_ptr context) : _context(context) { } /** * @brief Allocates memory * @param size The size in bytes to allocate * @return Handle to the allocated resource */ - void* alloc(size_t size) noexcept override; + void* allocate(const size_t bytes, const size_t alignment = alignof(max_align_t)) noexcept; /** * @brief Releases handle and all associated memory resources which invalidates the handle. * @return false if handle cannot be released, otherwise - true. */ - bool free(void* handle) noexcept override; + bool deallocate(void* handle, const size_t bytes, size_t alignment = alignof(max_align_t)) noexcept; + + bool is_equal(const USMHostAllocator& other) const; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp deleted file mode 100644 index d7c04ab142e342..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_blob.hpp +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright (C) 2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "intel_gpu/runtime/memory.hpp" -#include "intel_gpu/runtime/engine.hpp" -#include "intel_gpu/plugin/common_utils.hpp" - -#ifndef NOMINMAX -# define NOMINMAX -#endif - -#ifdef _WIN32 -# include -#else -# include -#endif - -#include -#include -#include - -namespace ov { -namespace intel_gpu { -class RemoteContextImpl; - -class RemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter { - friend class RemoteAllocator; -public: - explicit RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, - cldnn::stream& stream, - const cldnn::layout& layout, - cldnn::shared_handle mem = nullptr, - cldnn::shared_surface surf = 0, - uint32_t plane = 0, - BlobType mem_type = BlobType::BT_BUF_INTERNAL); - - void allocate(); - bool deallocate() noexcept; - InferenceEngine::ParamMap getParams() const; - std::string getDeviceName() const noexcept; - std::shared_ptr getContext() const noexcept; - InferenceEngine::LockedMemory buffer() noexcept; - InferenceEngine::LockedMemory cbuffer() const noexcept; - InferenceEngine::LockedMemory rwmap() noexcept; - InferenceEngine::LockedMemory rmap() const noexcept; - InferenceEngine::LockedMemory wmap() noexcept; - const std::shared_ptr &getAllocator() const noexcept; - void *getHandle() const noexcept { return _handle; } - - void reinterpret(const cldnn::layout& new_layout); - - bool is_allocated() const noexcept; - bool is_locked() const noexcept; - cldnn::memory::ptr get_memory() { - auto engine = m_memory_object->get_engine(); - return engine->reinterpret_buffer(*m_memory_object, m_layout); - } - cldnn::memory::ptr get_original_memory() { - return m_memory_object; - } - void setShape(const InferenceEngine::SizeVector& dims); - -protected: - std::shared_ptr m_allocator; - InferenceEngine::gpu::ClContext::Ptr m_context; - cldnn::stream& m_stream; - - // constructor stuff - cldnn::shared_handle m_mem; - cldnn::shared_surface m_surf; - - uint32_t m_plane; - cldnn::layout m_layout; - BlobType m_mem_type; - size_t m_hash; - - cldnn::memory::ptr m_memory_object; - - mutable std::mutex lockedMutex; - mutable size_t lockedCounter; - mutable std::unique_ptr> lockedHolder; - mutable void* _handle; - - void lock() const; - void unlock() const; - - bool is_shared() const; - bool supports_caching() const; -}; - -template -class TypedRemoteBlob : public TpublicAPI { -public: - using Ptr = std::shared_ptr; - - explicit TypedRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context, - cldnn::stream& stream, - const InferenceEngine::TensorDesc& desc, - const cldnn::layout& layout, - cldnn::shared_handle mem = nullptr, - cldnn::shared_surface surf = 0, - uint32_t plane = 0, - BlobType mem_type = BlobType::BT_BUF_INTERNAL) - : TpublicAPI(desc) - , _impl(context, stream, layout, mem, surf, plane, mem_type) {} - - void allocate() noexcept override { - try { - if (!_impl.is_allocated()) - _impl.allocate(); - } catch (...) {} - } - bool deallocate() noexcept override { return _impl.deallocate(); } - InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); } - std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); } - std::shared_ptr getContext() const noexcept override { return _impl.getContext(); } - InferenceEngine::LockedMemory buffer() noexcept override { return _impl.buffer(); } - InferenceEngine::LockedMemory cbuffer() const noexcept override { return _impl.cbuffer(); } - InferenceEngine::LockedMemory rwmap() noexcept override { return _impl.rwmap(); } - InferenceEngine::LockedMemory rmap() const noexcept override { return _impl.rmap(); } - InferenceEngine::LockedMemory wmap()noexcept override { return _impl.wmap(); } - RemoteBlobImpl* getImpl() { return &_impl; } - void setShape(const InferenceEngine::SizeVector& dims) override { _impl.setShape(dims); } - -protected: - const std::shared_ptr &getAllocator() const noexcept override { return _impl.getAllocator(); } - void *getHandle() const noexcept override { return _impl.getHandle(); } - RemoteBlobImpl _impl; -}; - -using RemoteCLbuffer = TypedRemoteBlob; -using RemoteUSMbuffer = TypedRemoteBlob; -using RemoteCLImage2D = TypedRemoteBlob; -#ifdef _WIN32 -using RemoteD3DBuffer = TypedRemoteBlob; -using RemoteD3DSurface = TypedRemoteBlob; -#else -using RemoteVASurface = TypedRemoteBlob; -#endif - -inline RemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) { -#ifdef _WIN32 - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } -#else - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } -#endif - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } - { - auto ptr = blobPtr->as(); - if (ptr) return ptr->getImpl(); - } - return nullptr; -} - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp index 72881bc471f278..131f769bb41cd5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp @@ -4,25 +4,23 @@ #pragma once -#include "intel_gpu/runtime/memory.hpp" -#include "intel_gpu/runtime/engine.hpp" -#include "intel_gpu/runtime/lru_cache.hpp" -#include "intel_gpu/plugin/common_utils.hpp" - -#include -#include -#include -#include - #ifndef NOMINMAX # define NOMINMAX #endif #ifdef _WIN32 # include +# include #else # include +# include #endif +#include "openvino/runtime/iremote_context.hpp" + +#include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/runtime/lru_cache.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include #include @@ -32,150 +30,50 @@ namespace ov { namespace intel_gpu { -enum class BlobType { - BT_EMPTY, - BT_BUF_INTERNAL, - BT_BUF_SHARED, - BT_USM_SHARED, - BT_USM_HOST_INTERNAL, - BT_USM_DEVICE_INTERNAL, - BT_IMG_SHARED, - BT_SURF_SHARED, - BT_DX_BUF_SHARED, -}; - -template -Result extract_object(const InferenceEngine::ParamMap& params, const std::string& key) { - auto itrHandle = params.find(key); - OPENVINO_ASSERT(itrHandle != params.end(), "[GPU] No parameter ", key, " found in ParamsMap"); - return itrHandle->second.as(); -} - -class RemoteContextImpl { +class RemoteContextImpl : public ov::IRemoteContext { public: - enum ContextType { - OCL, - DEV_SHARED - }; - using Ptr = std::shared_ptr; - using CPtr = std::shared_ptr; - RemoteContextImpl(std::string device_name, std::vector devices); - RemoteContextImpl(const std::vector& known_contexts, const InferenceEngine::ParamMap& params); + RemoteContextImpl(const std::string& device_name, std::vector devices); + RemoteContextImpl(const std::map& known_contexts, const ov::AnyMap& params); + + const std::string& get_device_name() const override; + + const ov::AnyMap& get_property() const override; + ov::SoPtr create_host_tensor(const ov::element::Type type, const ov::Shape& shape) override; + ov::SoPtr create_tensor(const ov::element::Type& type, const ov::Shape& shape, const ov::AnyMap& params) override; - InferenceEngine::ParamMap get_params() const; - std::string get_device_name() const noexcept; - InferenceEngine::MemoryBlob::Ptr create_host_blob(InferenceEngine::gpu::ClContext::Ptr public_context, const InferenceEngine::TensorDesc& desc); - InferenceEngine::RemoteBlob::Ptr create_blob(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc, - const InferenceEngine::ParamMap& params = {}); cldnn::engine& get_engine() { return *m_engine; } - InferenceEngine::gpu_handle_param get_external_queue() const { return m_external_queue; } + ov::intel_gpu::gpu_handle_param get_external_queue() const { return m_external_queue; } cldnn::memory::ptr try_get_cached_memory(size_t hash); void add_to_cache(size_t hash, cldnn::memory::ptr memory); private: - std::string get_device_name(const std::vector& known_contexts, - const cldnn::device::ptr current_device); - InferenceEngine::RemoteBlob::Ptr reuse_surface(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc, - const InferenceEngine::ParamMap& params); - InferenceEngine::RemoteBlob::Ptr reuse_memory(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc, - cldnn::shared_handle mem, - BlobType blob_type); - InferenceEngine::RemoteBlob::Ptr create_buffer(InferenceEngine::gpu::ClContext::Ptr public_context, const InferenceEngine::TensorDesc& desc); - InferenceEngine::RemoteBlob::Ptr create_usm(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc, - BlobType alloc_type); - void check_if_shared(); + std::shared_ptr get_this_shared_ptr(); + + std::string get_device_name(const std::map& known_contexts, const cldnn::device::ptr current_device) const; + std::shared_ptr reuse_surface(const ov::element::Type type, const ov::Shape& shape, const ov::AnyMap& params); + std::shared_ptr reuse_memory(const ov::element::Type type, const ov::Shape& shape, cldnn::shared_handle mem, TensorType tensor_type); + std::shared_ptr create_buffer(const ov::element::Type type, const ov::Shape& shape); + std::shared_ptr create_usm(const ov::element::Type type, const ov::Shape& shape, TensorType alloc_type); + void check_if_shared() const; + + void init_properties(); std::shared_ptr m_engine; - InferenceEngine::gpu_handle_param m_va_display; - InferenceEngine::gpu_handle_param m_external_queue; - static const size_t cache_capacity = 100; + ov::intel_gpu::gpu_handle_param m_va_display = nullptr; + ov::intel_gpu::gpu_handle_param m_external_queue = nullptr; - ContextType m_type; + ContextType m_type = ContextType::OCL; std::string m_device_name = ""; - const std::string m_plugin_name; - cldnn::LruCache m_memory_cache; + static const size_t cache_capacity = 100; + cldnn::LruCache m_memory_cache = cldnn::LruCache(cache_capacity); std::mutex m_cache_mutex; -}; - -// Template class below is needed to allow proper cast of user contexts -// We have the following public classes hierarchy: -// RemoteContext -// | -// ClContext -// | | -// VAContext D3DContext -// So our implementation must allow casting of context object to proper type user type (ClContext, VAContext or D3DContext) -// Thus we introduce this template which have 3 instances with different base classes: -// RemoteContext -// | -// ---------- ClContext ----------- -// | | | -// VAContext | D3DContext -// | | | -// RemoteVAContext RemoteCLContext RemoteD3DContext -// -// All these context types are just thin wrappers that calls common context internal impl (RemoteContextImpl) -template -class TypedRemoteContext : public PublicContextType { -public: - using Ptr = std::shared_ptr; - - TypedRemoteContext(std::string device_name, std::vector devices) - : m_impl(std::make_shared(device_name, devices)) {} - TypedRemoteContext(const std::vector& known_contexts, const InferenceEngine::ParamMap& params) - : m_impl(std::make_shared(known_contexts, params)) {} - - InferenceEngine::ParamMap getParams() const override { return m_impl->get_params(); } - std::string getDeviceName() const noexcept override { return m_impl->get_device_name(); } - InferenceEngine::MemoryBlob::Ptr CreateHostBlob(const InferenceEngine::TensorDesc& desc) override { - return m_impl->create_host_blob(std::dynamic_pointer_cast(this->shared_from_this()), desc); - } - InferenceEngine::RemoteBlob::Ptr CreateBlob(const InferenceEngine::TensorDesc& desc, const InferenceEngine::ParamMap& params = {}) override { - return m_impl->create_blob(std::dynamic_pointer_cast(this->shared_from_this()), desc, params); - } - - RemoteContextImpl::Ptr get_impl() { return m_impl; } -private: - std::shared_ptr m_impl; + ov::AnyMap properties; }; -using RemoteCLContext = TypedRemoteContext; -#ifdef _WIN32 -using RemoteD3DContext = TypedRemoteContext; -#else -using RemoteVAContext = TypedRemoteContext; -#endif - -inline std::shared_ptr get_context_impl(InferenceEngine::gpu::ClContext::Ptr context) { - OPENVINO_ASSERT(context != nullptr, "[GPU] Couldn't get impl from invalid context object"); -#ifdef _WIN32 - if (auto ptr = context->as()) - return ptr->get_impl(); -#else - if (auto ptr = context->as()) - return ptr->get_impl(); -#endif - if (auto ptr = context->as()) - return ptr->get_impl(); - - OPENVINO_ASSERT(false, "[GPU] Couldn't get context impl from public context object."); -} - -inline std::shared_ptr get_context_impl(InferenceEngine::RemoteContext::Ptr context) { - OPENVINO_ASSERT(context != nullptr, "[GPU] Couldn't get impl from invalid context object"); - auto casted = std::dynamic_pointer_cast(context); - OPENVINO_ASSERT(casted != nullptr, "[GPU] Couldn't get context impl: Context type is not ClContext or it's derivatives"); - return get_context_impl(casted); -} - } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp new file mode 100644 index 00000000000000..939c7b89784fc9 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp @@ -0,0 +1,84 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#ifndef NOMINMAX +# define NOMINMAX +#endif + +#ifdef _WIN32 +# include +#else +# include +#endif +#include "openvino/runtime/iremote_tensor.hpp" + +#include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/plugin/common_utils.hpp" + +#include +#include +#include + +namespace ov { +namespace intel_gpu { +class RemoteContextImpl; + +class RemoteTensorImpl : public ov::IRemoteTensor { + friend class RemoteAllocator; +public: + RemoteTensorImpl(std::shared_ptr context, + const ov::Shape& shape, + const ov::element::Type& element_type, + TensorType mem_type = TensorType::BT_BUF_INTERNAL, + cldnn::shared_handle mem = nullptr, + cldnn::shared_surface surf = 0, + uint32_t plane = 0); + + ~RemoteTensorImpl() override; + const AnyMap& get_properties() const override; + const std::string& get_device_name() const override; + + void set_shape(ov::Shape shape) override; + const ov::element::Type& get_element_type() const override; + const ov::Shape& get_shape() const override; + const ov::Strides& get_strides() const override; + + void allocate(); + bool deallocate() noexcept; + + bool is_allocated() const noexcept; + bool is_surface() const noexcept; + cldnn::memory::ptr get_memory() const; + cldnn::memory::ptr get_original_memory() const; + + std::shared_ptr get_context() const; + +private: + std::shared_ptr m_context; + + ov::element::Type m_element_type; + ov::Shape m_shape; + ov::Strides m_strides{}; + ov::AnyMap m_properties; + + cldnn::memory::ptr m_memory_object = nullptr; + cldnn::layout m_layout; + TensorType m_mem_type; + + cldnn::shared_handle m_mem; + cldnn::shared_surface m_surf; + uint32_t m_plane; + size_t m_hash = 0; + + bool is_shared() const; + bool supports_caching() const; + void update_strides(); + void init_properties(); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp new file mode 100644 index 00000000000000..1fd6d035dd48af --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp @@ -0,0 +1,109 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/isync_infer_request.hpp" +#include "intel_gpu/plugin/graph.hpp" +#include "intel_gpu/plugin/remote_tensor.hpp" + +#include +#include +#include +#include +#include + +namespace ov { +namespace intel_gpu { + +class CompiledModel; + +enum class TensorOwner : uint8_t { + USER = 0, + PLUGIN = 1 +}; + +struct TensorWrapper { + std::shared_ptr ptr; + TensorOwner owner; +}; + +class SyncInferRequest : public ov::ISyncInferRequest { +public: + using Ptr = std::shared_ptr; + + explicit SyncInferRequest(const std::shared_ptr& compiled_model); + SyncInferRequest(const SyncInferRequest &) = delete; + ~SyncInferRequest() override = default; + + void infer() override; + std::vector get_profiling_info() const override; + std::vector> query_state() const override; + + void set_tensor(const ov::Output& port, const ov::SoPtr& tensor) override; + void set_tensors_impl(const ov::Output port, const std::vector>& tensors) override; + + ov::SoPtr get_tensor(const ov::Output& port) const override; + + void set_task_executor(const std::shared_ptr& task_executor); + void setup_stream_graph(); + void enqueue_notify(); + void wait_notify(); + + void enqueue(); + void wait(); + + bool use_external_queue() const { return m_use_external_queue; } + +private: + void check_tensors() const override; + + std::unordered_map m_user_inputs; + std::unordered_map m_user_outputs; + + std::unordered_map m_plugin_inputs; + std::unordered_map m_plugin_outputs; + + std::unordered_map> m_input_ports_map; + std::unordered_map> m_output_ports_map; + std::unordered_map m_output_names_map; + + std::map m_internal_outputs; + + std::shared_ptr m_graph; + RemoteContextImpl::Ptr m_context = nullptr; + std::shared_ptr m_stream_executor = nullptr; + bool m_enable_profiling = false; + bool m_use_external_queue = false; + + std::vector prepare_input(const std::string& name, const ov::Output& port, const TensorWrapper& user_tensor_wrapper); + std::vector prepare_output(const std::string& name, const ov::Output& port, const TensorWrapper& user_tensor_wrapper); + std::vector prepare_batched_input(const std::string& name, + const ov::Output& port, + const std::vector>& user_tensors); + + TensorWrapper create_or_share_device_tensor(const TensorWrapper& user_tensor_wrapper, + const std::string& name, + const ov::PartialShape& pshape, + ov::element::Type element_type, + bool need_lockable_mem) const; + std::shared_ptr reinterpret_device_tensor(std::shared_ptr tensor, const ov::Shape new_shape) const; + std::shared_ptr create_host_tensor(const ov::PartialShape& port_shape, const ov::element::Type& port_element_type) const; + std::shared_ptr create_device_tensor(const ov::Shape& pshape, ov::element::Type element_type, + bool need_lockable_memory = false, void* mem_ptr = nullptr) const; + std::shared_ptr create_shared_device_tensor(const ov::Shape& pshape, ov::element::Type element_type, void* usm_host_mem) const; + + void allocate_inputs(); + void allocate_outputs(); + void allocate_states(); + void allocate_input(const ov::Output& port, const std::string& name); + void allocate_output(const ov::Output& port, const std::string& name); + cldnn::event::ptr copy_output_data(cldnn::memory::ptr src, const ov::ITensor& dst) const; + + void init_mappings(bool is_legacy_api); + bool is_batched_input(const ov::Output& port) const; +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp index 99cd902519ed3a..2661abb284452e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp @@ -3,45 +3,24 @@ // #pragma once -#include +#include "openvino/runtime/ivariable_state.hpp" #include "intel_gpu/plugin/graph.hpp" #include namespace ov { namespace intel_gpu { -class VariableState : public InferenceEngine::IVariableStateInternal { +class VariableState : public ov::IVariableState { public: - VariableState(const std::string& name, const std::vector& states, - cldnn::engine& engine, int currentBatch); + VariableState(const std::string& name, cldnn::network::VariableState::Ptr states, cldnn::engine& engine); - /** - * @brief Reset internal variable state for relevant infer request, to a value specified as - * default for according `ReadValue` node - */ - void Reset() override; - - /** - * @brief Sets the new state for the next inference - * @param newState A new state - */ - void SetState(const InferenceEngine::Blob::Ptr &newState) override; - - /** - * @brief Returns the value of the variable state. - * @return The value of the variable state - */ - InferenceEngine::Blob::CPtr GetState() const override; - -protected: - InferenceEngine::SizeVector AggregateShape(const cldnn::layout &layout); - void IterateOverStates(std::function f) const; + void reset() override; + void set_state(const ov::SoPtr& state) override; + const ov::SoPtr& get_state() const override; private: - int currentBatch_; - std::vector states_; - InferenceEngine::TensorDesc desc_; - cldnn::engine& engine_; + cldnn::network::VariableState::Ptr m_variable_state; + cldnn::engine& m_engine; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/convert_color.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/convert_color.hpp index f1cd6e19340152..aa09a09ee722c6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/convert_color.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/convert_color.hpp @@ -36,24 +36,20 @@ struct convert_color : public primitive_base { /// @param input_color_format Color to convert from. /// @param output_color_format Color to convert to. /// @param mem_type Memory type. - /// @param output_layout Requested memory layout. convert_color(const primitive_id& id, const std::vector& inputs, const color_format input_color_format, const color_format output_color_format, const memory_type mem_type, - const layout& output_layout, const padding& output_padding = padding()) : primitive_base(id, inputs, {output_padding}), input_color_format(input_color_format), output_color_format(output_color_format), - mem_type(mem_type), - output_layout(output_layout) {} + mem_type(mem_type) {} color_format input_color_format = color_format::RGB; color_format output_color_format = color_format::RGB; memory_type mem_type = memory_type::buffer; - layout output_layout; size_t hash() const override { size_t seed = primitive::hash(); @@ -71,8 +67,7 @@ struct convert_color : public primitive_base { return input_color_format == rhs_casted.input_color_format && output_color_format == rhs_casted.output_color_format && - mem_type == rhs_casted.mem_type && - output_layout == rhs_casted.output_layout; + mem_type == rhs_casted.mem_type; } void save(BinaryOutputBuffer& ob) const override { @@ -80,7 +75,6 @@ struct convert_color : public primitive_base { ob << make_data(&input_color_format, sizeof(color_format)); ob << make_data(&output_color_format, sizeof(color_format)); ob << make_data(&mem_type, sizeof(memory_type)); - ob << output_layout; } void load(BinaryInputBuffer& ib) override { @@ -88,7 +82,6 @@ struct convert_color : public primitive_base { ib >> make_data(&input_color_format, sizeof(color_format)); ib >> make_data(&output_color_format, sizeof(color_format)); ib >> make_data(&mem_type, sizeof(memory_type)); - ib >> output_layout; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/file_util.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/file_util.hpp new file mode 100644 index 00000000000000..ef2e201f099542 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/file_util.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/util/file_util.hpp" + +namespace ov { +namespace intel_gpu { + +// Version of save_binary that don't trow an exception if attempt to open file fails +void save_binary(const std::string& path, std::vector binary); + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 5b68100232f25f..5313c35bce1f56 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -235,7 +235,7 @@ inline data_types element_type_to_data_type(ov::element::Type t) { case ov::element::Type_t::i64: return cldnn::data_types::i64; case ov::element::Type_t::boolean: - return cldnn::data_types::i8; + return cldnn::data_types::u8; case ov::element::Type_t::u1: return cldnn::data_types::bin; default: diff --git a/src/plugins/intel_gpu/src/graph/convert_color.cpp b/src/plugins/intel_gpu/src/graph/convert_color.cpp index 8e9bc54dd58f81..d6252b62b0f1d5 100644 --- a/src/plugins/intel_gpu/src/graph/convert_color.cpp +++ b/src/plugins/intel_gpu/src/graph/convert_color.cpp @@ -11,9 +11,30 @@ namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(convert_color) -layout convert_color_inst::calc_output_layout(convert_color_node const& node, kernel_impl_params const& impl_param) { +layout convert_color_inst::calc_output_layout(convert_color_node const& /* node */, kernel_impl_params const& impl_param) { auto desc = impl_param.typed_desc(); - return desc->output_layout; + + auto src_fmt = desc->input_color_format; + auto dst_fmt = desc->output_color_format; + auto dst_is_rgb_or_bgr = dst_fmt == convert_color::color_format::BGR || + dst_fmt == convert_color::color_format::RGB; + auto inputs_count = desc->input_size(); + bool single_plane_input = inputs_count == 1; + const size_t h_dim = 1; + const size_t c_dim = 3; + if ((src_fmt == convert_color::color_format::NV12 || src_fmt == convert_color::color_format::I420) && dst_is_rgb_or_bgr) { + auto out_layout = impl_param.get_input_layout(0); + out_layout.format = format::bfyx; + auto out_shape = out_layout.get_partial_shape(); + out_shape[c_dim] = 3; + if (single_plane_input) { + out_shape[h_dim] = out_shape[h_dim] * 2 / 3; + } + out_layout.set_partial_shape(out_shape); + + return out_layout; + } + OPENVINO_THROW("[GPU] Unsupported color format combinations"); } std::string convert_color_inst::to_string(convert_color_node const& node) { diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index f103738baeb864..13d81321281061 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -122,7 +122,7 @@ std::vector crop_inst::calc_output_layouts(const crop_node& /*node*/, co // update split offsets if (is_output_static) { auto p_param = const_cast(&impl_param); - InferenceEngine::SizeVector startOffset(p_param->input_layouts[0].get_partial_shape().size()); + ov::Shape startOffset(p_param->input_layouts[0].get_partial_shape().size()); auto input_shape = p_param->input_layouts[0].get_partial_shape(); auto dims = p_param->input_layouts[0].get_partial_shape().size(); for (int32_t prev = 0; prev < desc->output_idx; prev++) { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp index 95b2441710f4fc..0135953263dc7e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/convert_color.cpp @@ -54,9 +54,9 @@ attach_convert_color_impl::attach_convert_color_impl() { std::make_tuple(data_types::f32, format::nv12), std::make_tuple(data_types::f16, format::nv12), std::make_tuple(data_types::u8, format::nv12), - std::make_tuple(data_types::f32, format::byxf), - std::make_tuple(data_types::f16, format::byxf), - std::make_tuple(data_types::u8, format::byxf), + std::make_tuple(data_types::f32, format::bfyx), + std::make_tuple(data_types::f16, format::bfyx), + std::make_tuple(data_types::u8, format::bfyx), }); } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 790ac4798cb971..92f68bf3105413 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -10,10 +10,10 @@ #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/file_util.hpp" #include "to_string_utils.h" #include "register.hpp" #include "utils.hpp" -#include "openvino/util/file_util.hpp" #include "runtime/ocl/ocl_event.hpp" #include "quantize_inst.h" @@ -368,7 +368,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { { std::lock_guard lock(cacheAccessMutex); - ov::util::save_binary(generate_cache_path_from_key(config, key), cache); + ov::intel_gpu::save_binary(generate_cache_path_from_key(config, key), cache); } } else { _prim = PrimType(_pd, cache); diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index f1a0c6b0773a27..eb971ae9410e5f 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -768,8 +768,7 @@ event::ptr network::set_input_data(const primitive_id& id, memory::ptr data) { primitive_inst = find_primitive(id); - if (primitive_inst == nullptr) - throw std::runtime_error("topology doesn't contain primitive:" + id); + OPENVINO_ASSERT(primitive_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id); if (primitive_inst->type() != input_layout::type_id()) { CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input"); @@ -911,8 +910,7 @@ std::vector network::set_output_memory(const primitive_id& id, memor std::vector ret_ev; p_inst = find_primitive(id); - if (!p_inst) - throw std::runtime_error("topology doesn't contain primitive: " + id); + OPENVINO_ASSERT(p_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id); auto iter = std::find(_outputs.begin(), _outputs.end(), p_inst); if (iter == _outputs.end()) @@ -927,7 +925,11 @@ std::vector network::set_output_memory(const primitive_id& id, memor } for (auto& prim : o_iter->second) { - ret_ev.push_back(prim->set_output_memory(eng.reinterpret_buffer(*mem_new, prim->output_memory().get_layout()), false)); + auto mem = mem_new; + if (!prim->is_dynamic() && mem_new && prim->output_memory_ptr()) + mem = eng.reinterpret_buffer(*mem_new, prim->output_memory().get_layout()); + + ret_ev.push_back(prim->set_output_memory(mem)); if (!_reset_arguments && (prim->type() != cldnn::data::type_id() && !(prim->type() == cldnn::mutable_data::type_id() && prim->dependencies().empty()))) { prim->set_arguments(); @@ -1722,6 +1724,10 @@ void network::allocate_variables_memories() { } } +const cldnn::network::variables_state_info_map& network::get_variables_state_info() const { + return _variables_state_info; +} + void network::set_variables_state_info(const std::string& variable_id, const cldnn::layout& layout) { auto it = _variables_state_info.find(variable_id); if (it == _variables_state_info.end()) { diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index bc55f8e376380f..ab454e6bc6b9d2 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/runtime/system_conf.hpp" + #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/graph/program.hpp" -#include - #include "auto_tuner.h" #include "layout_optimizer.h" #include "pass_manager.h" @@ -105,12 +105,12 @@ using namespace cldnn; using namespace ov::intel_gpu; static void adjust_num_cores(ov::threading::IStreamsExecutor::Config& config) { - if (InferenceEngine::getAvailableCoresTypes().size() == 1) { + if (ov::get_available_cores_types().size() == 1) { return; } - const auto total_num_cores = InferenceEngine::getNumberOfLogicalCPUCores(); - const auto total_num_big_cores = InferenceEngine::getNumberOfLogicalCPUCores(true); + const auto total_num_cores = ov::get_number_of_logical_cpu_cores(); + const auto total_num_big_cores = ov::get_number_of_logical_cpu_cores(true); const auto total_num_little_cores = total_num_cores - total_num_big_cores; auto core_type = config._threadPreferredCoreType; diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 2fbe366df35013..62c11e2f7e8066 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -117,7 +117,11 @@ std::unique_ptr program_node::desc_to_json() const { s << get_preferred_impl_type(); node_info->add("preferred impl", s.str()); - node_info->add("output layout", output_layouts[0].to_short_string()); + json_composite output_layouts_desc; + for (size_t i = 0; i < output_layouts.size(); i++) { + output_layouts_desc.add(std::to_string(i), output_layouts[i].to_short_string()); + } + node_info->add("output layouts", output_layouts_desc); node_info->add("constant", bool_to_str(constant)); node_info->add("in data flow", bool_to_str(data_flow)); @@ -168,7 +172,9 @@ std::unique_ptr program_node::desc_to_json() const { if (empty) { empty = false; } - deps_ptrs.push_back(std::to_string(reinterpret_cast(itr->first))); + auto ptr = std::to_string(reinterpret_cast(itr->first)); + auto port = std::to_string(itr->second); + deps_ptrs.push_back(ptr + "(" + port + ")"); itr++; } if (deps_ptrs.empty()) { diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 8847f5f17a98fe..9f5bb6615325c9 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -28,10 +28,16 @@ layout reorder_inst::calc_output_layout(reorder_node const& node, kernel_impl_pa } if (ifmt.is_nv12() && !desc->has_surface_input()) { - auto data_size = tensor{ input_layout.batch(), input_layout.feature() * 3, - input_layout.spatial(0), input_layout.spatial(1) }; + const size_t h_dim = 1; + const size_t c_dim = 3; + + auto out_shape = input_layout.get_partial_shape(); + out_shape[c_dim] = 3; + if (desc->input_size() == 1) + out_shape[h_dim] = out_shape[h_dim] * 2 / 3; + if (ofmt != ifmt) - return layout(odt, ofmt, data_size, op); + return layout(out_shape, odt, ofmt, op); CLDNN_ERROR_MESSAGE(desc->id, "No image_nv12 to image_nv12 reorder is supported"); } else if (ofmt.is_winograd() && ifmt.is_winograd()) { diff --git a/src/plugins/intel_gpu/src/graph/unique.cpp b/src/plugins/intel_gpu/src/graph/unique.cpp index 38f91b2515d1d7..93e739d422f04c 100644 --- a/src/plugins/intel_gpu/src/graph/unique.cpp +++ b/src/plugins/intel_gpu/src/graph/unique.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/unique.hpp" - #include #include diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convert_color_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convert_color_ref.cl index dfeda35f894a1d..0394a52a55b7e3 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convert_color_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convert_color_ref.cl @@ -4,6 +4,10 @@ #include "include/batch_headers/fetch_data.cl" + +#define IMAGE_W INPUT0_SIZE_Y +#define IMAGE_H (INPUT0_FEATURE_NUM * 2 / 3) + #if defined(CONVERT_FROM_NV12) || defined(CONVERT_FROM_I420) #ifdef BUFFER_MEM KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input1, @@ -19,22 +23,22 @@ KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input1, const uint y = get_global_id(1); const uint x = get_global_id(2); - float Y = input1[GET_DATA_INDEX(INPUT0, b, 0, y, x)]; + float Y = input1[GET_DATA_INDEX(INPUT0, b, y, x, 0)]; #if INPUTS_COUNT == 3 - float U = input2[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)]; - float V = input3[GET_DATA_INDEX(INPUT2, b, 0, y / 2, x / 2)]; + float U = input2[GET_DATA_INDEX(INPUT1, b, y / 2, x / 2, 0)]; + float V = input3[GET_DATA_INDEX(INPUT2, b, y / 2, x / 2, 0)]; #elif INPUTS_COUNT == 2 - float U = input2[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)]; - float V = input2[GET_DATA_INDEX(INPUT1, b, 1, y / 2, x / 2)]; + float U = input2[GET_DATA_INDEX(INPUT1, b, y / 2, x / 2, 0)]; + float V = input2[GET_DATA_INDEX(INPUT1, b, y / 2, x / 2, 1)]; #else // Single plane - uint input_uv_offset = INPUT0_SIZE_X * INPUT0_SIZE_Y / 3 * 2; + uint input_uv_offset = IMAGE_W * IMAGE_H; #ifdef CONVERT_FROM_NV12 - float U = input1[GET_DATA_INDEX(INPUT0, b, 0, y / 2, (x / 2) * 2) + input_uv_offset]; - float V = input1[GET_DATA_INDEX(INPUT0, b, 1, y / 2, (x / 2) * 2) + input_uv_offset]; + float U = input1[GET_DATA_INDEX(INPUT0, b, (y / 2), (x / 2) * 2, 0) + input_uv_offset]; + float V = input1[GET_DATA_INDEX(INPUT0, b, (y / 2), (x / 2) * 2, 0) + input_uv_offset + 1]; #else - float U = input1[GET_DATA_INDEX(INPUT0, b, 0, 0, x / 2 + (y / 2)*(INPUT0_Y_PITCH / 2)) + input_uv_offset]; - float V = input1[GET_DATA_INDEX(INPUT0, b, 0, 0, x / 2 + (y / 2)*(INPUT0_Y_PITCH / 2)) + 5 * input_uv_offset / 4]; + float U = input1[GET_DATA_INDEX(INPUT0, b, 0, x / 2 + (y / 2)*(INPUT0_FEATURE_PITCH / 2), 0) + input_uv_offset]; + float V = input1[GET_DATA_INDEX(INPUT0, b, 0, x / 2 + (y / 2)*(INPUT0_FEATURE_PITCH / 2), 0) + 5 * input_uv_offset / 4]; #endif #endif @@ -53,13 +57,13 @@ KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input1, #endif #ifdef CONVERT_TO_RGB - output[OUTPUT_GET_INDEX(b, 0, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS); - output[OUTPUT_GET_INDEX(b, 1, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS); - output[OUTPUT_GET_INDEX(b, 2, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 0)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 1)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 2)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS); #else // BGR - output[OUTPUT_GET_INDEX(b, 0, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS); - output[OUTPUT_GET_INDEX(b, 1, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS); - output[OUTPUT_GET_INDEX(b, 2, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 0)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 1)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 2)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS); #endif } #endif @@ -92,9 +96,8 @@ KERNEL(convert_color_ref)(read_only image2d_t input1, float Ucomponent = mad(UV.x, 255.0f, -128.f); float Vcomponent = mad(UV.y, 255.0f, -128.f); #else // Single plane - uint input_y_offset = INPUT0_SIZE_Y / 3 * 2; - float4 U = read_imagef(input1, (int2)((x / 2) * 2, y / 2 + input_y_offset)); - float4 V = read_imagef(input1, (int2)((x / 2) * 2 + 1, y / 2 + input_y_offset)); + float4 U = read_imagef(input1, (int2)((x / 2) * 2, y / 2 + IMAGE_H)); + float4 V = read_imagef(input1, (int2)((x / 2) * 2 + 1, y / 2 + IMAGE_H)); float Ucomponent = mad(U.x, 255.0f, -128.f); float Vcomponent = mad(V.x, 255.0f, -128.f); #endif @@ -110,13 +113,13 @@ KERNEL(convert_color_ref)(read_only image2d_t input1, #endif #ifdef CONVERT_TO_RGB - output[OUTPUT_GET_INDEX(b, 0, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS); - output[OUTPUT_GET_INDEX(b, 1, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS); - output[OUTPUT_GET_INDEX(b, 2, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 0)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 1)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 2)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS); #else // BGR - output[OUTPUT_GET_INDEX(b, 0, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS); - output[OUTPUT_GET_INDEX(b, 1, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS); - output[OUTPUT_GET_INDEX(b, 2, y, x)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 0)] = ACTIVATION(TO_OUTPUT_TYPE(B), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 1)] = ACTIVATION(TO_OUTPUT_TYPE(G), ACTIVATION_PARAMS); + output[OUTPUT_GET_INDEX(b, y, x, 2)] = ACTIVATION(TO_OUTPUT_TYPE(R), ACTIVATION_PARAMS); #endif } #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data.cl index fd321205389a67..da52d5af2da5b5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data.cl @@ -86,7 +86,7 @@ KERNEL (reorder_data)( #if defined INPUT0_LAYOUT_NV12 && !SURFACE_INPUT const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP; - float4 colorVYU = read_imagef(input, sampler, (int2)(x, y)); + float4 colorVYU = read_imagef(input, sampler, (int2)(y, f)); float Ycomponent = mad(colorVYU.s1, 296.82f, -18.624f); float Ucomponent = mad(colorVYU.s2, 255.0f, -128.f); @@ -128,7 +128,7 @@ KERNEL (reorder_data)( res = MEAN_OP(res, mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv.s0, msv.s1, /*msv.s2, msv.s3, msv.s4,msv.s5,*/ msv.s6, msv.s7)]); #endif #elif SURFACE_INPUT - float4 Y = read_imagef(input, (int2)(x, y)); + float4 Y = read_imagef(input, (int2)(y, f)); float Ycomponent = mad(Y.x, 296.82f, -18.624f); float res = clamp(Ycomponent, 0.f, 255.f); #else diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp index 94193c045f78bd..2538197d08eed7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp @@ -156,6 +156,7 @@ std::string toString(WeightsType wType) { case WeightsType::F16: return "F16"; case WeightsType::F32: return "F32"; case WeightsType::INT8: return "INT8"; + case WeightsType::UINT8: return "UINT8"; default: return ""; } } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convert_color/convert_color_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convert_color/convert_color_kernel_base.cpp index 971728f3acd399..a52c5594ea7ddc 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convert_color/convert_color_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convert_color/convert_color_kernel_base.cpp @@ -28,7 +28,7 @@ CommonDispatchData ConvertColorKernelBase::SetDefault(const convert_color_params auto in_layout = params.inputs[0].GetLayout(); auto out_layout = params.outputs[0].GetLayout(); - dispatchData.gws = { out.Batch().v, out.Y().v, out.X().v }; + dispatchData.gws = { out.Batch().v, out.Feature().v, out.Y().v }; dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo, in_layout, out_layout); return dispatchData; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convert_color/convert_color_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convert_color/convert_color_kernel_ref.cpp index 050ef33d88ed02..e207b936302c1d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convert_color/convert_color_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convert_color/convert_color_kernel_ref.cpp @@ -19,8 +19,8 @@ ParamsKey ConvertColorKernelRef::GetSupportedKey() const { k.EnableOutputDataType(Datatype::UINT8); k.EnableInputLayout(DataLayout::nv12); - k.EnableInputLayout(DataLayout::byxf); - k.EnableOutputLayout(DataLayout::byxf); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfyx); k.EnableDifferentTypes(); k.EnableTensorOffset(); diff --git a/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp index 88f3083c7cea71..2cc67ec8f0c61d 100644 --- a/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp @@ -9,40 +9,33 @@ namespace ov { namespace intel_gpu { -AsyncInferRequest::AsyncInferRequest(const InferRequest::Ptr &inferRequest, - const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) - : AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) { - _pipeline = {}; - - if (!_inferRequest->use_external_queue()) { - _pipeline.push_back({taskExecutor, - [this] { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::PreprocessingAndStartPipeline"); - _inferRequest->setup_stream_graph(); - _inferRequest->enqueue(); - _inferRequest->wait(); - } }); - } else { - _pipeline.push_back({ _waitExecutor, +AsyncInferRequest::AsyncInferRequest(const std::shared_ptr& infer_request, + const std::shared_ptr& task_executor, + const std::shared_ptr& wait_executor, + const std::shared_ptr& callback_executor) + : ov::IAsyncInferRequest(infer_request, task_executor, callback_executor) + , m_infer_request(infer_request) + , m_wait_executor(wait_executor) { + m_infer_request->set_task_executor(task_executor); + if (infer_request->use_external_queue()) { + m_pipeline.clear(); + m_pipeline.emplace_back(wait_executor, [this] { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::WaitPipeline"); - _inferRequest->wait_notify(); - } }); + m_infer_request->wait_notify(); + }); } } - -void AsyncInferRequest::StartAsync_ThreadUnsafe() { - if (_inferRequest->use_external_queue()) { - _inferRequest->setup_stream_graph(); - _inferRequest->enqueue_notify(); +void AsyncInferRequest::start_async() { + if (m_infer_request->use_external_queue()) { + m_infer_request->setup_stream_graph(); + m_infer_request->enqueue_notify(); } - Parent::StartAsync_ThreadUnsafe(); + Parent::start_async(); } AsyncInferRequest::~AsyncInferRequest() { - StopAndWait(); + stop_and_wait(); } } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/async_infer_request_legacy.cpp b/src/plugins/intel_gpu/src/plugin/async_infer_request_legacy.cpp deleted file mode 100644 index 80242244851113..00000000000000 --- a/src/plugins/intel_gpu/src/plugin/async_infer_request_legacy.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "intel_gpu/plugin/async_infer_request_legacy.hpp" -#include "intel_gpu/runtime/itt.hpp" -#include - -namespace ov { -namespace intel_gpu { - -AsyncInferRequestLegacy::AsyncInferRequestLegacy(const InferRequestLegacy::Ptr &inferRequest, - const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) - : AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) { - _pipeline = {}; - - if (!_inferRequest->use_external_queue()) { - _pipeline.push_back({taskExecutor, - [this] { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::PreprocessingAndStartPipeline"); - _inferRequest->setup_stream_graph(); - _inferRequest->preprocess(); - _inferRequest->enqueue(); - _inferRequest->wait(); - } }); - } else { - _pipeline.push_back({ _waitExecutor, - [this] { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::WaitPipeline"); - _inferRequest->wait_notify(); - } }); - } -} - -void AsyncInferRequestLegacy::StartAsync_ThreadUnsafe() { - if (_inferRequest->use_external_queue()) { - _inferRequest->setup_stream_graph(); - _inferRequest->preprocess_notify(); - _inferRequest->enqueue_notify(); - } - Parent::StartAsync_ThreadUnsafe(); -} - -AsyncInferRequestLegacy::~AsyncInferRequestLegacy() { - StopAndWait(); -} - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 2aa67be800c7e6..3b8581a1e2e34e 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -3,27 +3,22 @@ // #include "intel_gpu/plugin/legacy_api_helper.hpp" +#include "intel_gpu/plugin/legacy_remote_context.hpp" #include "openvino/pass/serialize.hpp" +#include "openvino/runtime/iplugin.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" +#include "openvino/util/common_util.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include "intel_gpu/graph/serialization/layout_serializer.hpp" #include "intel_gpu/graph/serialization/string_serializer.hpp" #include "intel_gpu/graph/serialization/utils.hpp" #include "intel_gpu/graph/serialization/vector_serializer.hpp" -#include "intel_gpu/plugin/graph.hpp" #include "intel_gpu/runtime/itt.hpp" -#include "intel_gpu/plugin/infer_request.hpp" +#include "intel_gpu/plugin/graph.hpp" #include "intel_gpu/plugin/compiled_model.hpp" #include "intel_gpu/plugin/async_infer_request.hpp" -#include "intel_gpu/plugin/async_infer_request_legacy.hpp" - -#include -#include "threading/ie_cpu_streams_executor.hpp" -#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" -#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" -#include "ie_icore.hpp" #include #include @@ -32,214 +27,175 @@ #include #include -#include - -using namespace InferenceEngine; -using namespace InferenceEngine::details; - namespace ov { namespace intel_gpu { -CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, - InferenceEngine::RemoteContext::Ptr context, - const ExecutionConfig& config, - InferenceEngine::InputsDataMap* inputs, - InferenceEngine::OutputsDataMap* outputs) : - InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr { - if (config.get_property(ov::internal::exclusive_async_requests)) { - //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior - return executorManager()->getExecutor("GPU"); - } else if (config.get_property(ov::num_streams) > 1) { - return std::make_shared( - IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); - } else { - return std::make_shared( - IStreamsExecutor::Config{"Intel GPU plugin executor", 1}); - } - }()}, - m_context(context), - m_config(config), - m_taskExecutor{ _taskExecutor }, - m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })), - m_network(network) { - auto graph_base = std::make_shared(network, get_context_impl(m_context), m_config, 0, inputs, outputs); - for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { - auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); - m_graphs.push_back(graph); +namespace { +std::shared_ptr create_task_executor(const std::shared_ptr& plugin, const ExecutionConfig& config) { + if (config.get_property(ov::internal::exclusive_async_requests)) { + //exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior + return plugin->get_executor_manager()->get_executor("GPU"); + } else { + return std::make_shared( + ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); } } - -CompiledModel::CompiledModel(cldnn::BinaryInputBuffer& ib, - InferenceEngine::RemoteContext::Ptr context, - const ExecutionConfig& config, - InferenceEngine::InputsDataMap* inputs, - InferenceEngine::OutputsDataMap* outputs) : - InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr { - if (config.get_property(ov::internal::exclusive_async_requests)) { - //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior - return executorManager()->getExecutor("GPU"); - } else if (config.get_property(ov::num_streams) > 1) { - return std::make_shared( - IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); - } else { - return std::make_shared( - IStreamsExecutor::Config{"Intel GPU plugin executor", 1}); - } - }()}, - m_context(context), - m_config(config), - m_taskExecutor{ _taskExecutor }, - m_waitExecutor(executorManager()->getIdleCPUStreamsExecutor({ "GPUWaitExecutor" })) { - auto context_impl = get_context_impl(m_context); - - auto pos = ib.tellg(); +} // namespace + +CompiledModel::CompiledModel(std::shared_ptr model, + const std::shared_ptr& plugin, + RemoteContextImpl::Ptr context, + const ExecutionConfig& config) + : ov::ICompiledModel(model, + plugin, + wrap_if_old_api(context, plugin->is_new_api()), + create_task_executor(plugin, config)) + , m_context(context) + , m_config(config) + , m_wait_executor(std::make_shared(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"})) + , m_model(model->is_dynamic() ? model : nullptr) + , m_model_name(model->get_friendly_name()) + , m_inputs(ov::ICompiledModel::inputs()) + , m_outputs(ov::ICompiledModel::outputs()) + , m_loaded_from_cache(false) { + auto graph_base = std::make_shared(model, m_context, m_config, 0); for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { - ib.seekg(pos); - auto graph = std::make_shared(ib, context_impl, m_config, n, inputs, outputs); + auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } } -template -IInferRequestInternal::Ptr CompiledModel::GetInferRequestImpl(const std::vector>& inputs, - const std::vector>& outputs) { - auto ptr = std::make_shared(inputs, outputs, std::static_pointer_cast(shared_from_this())); - if (m_config.get_property(ov::num_streams) > 1) - ptr->EnableStreams(); - if (m_config.get_property(ov::enable_profiling)) - ptr->EnableProfiling(); - if (m_graphs.front()->use_external_queue()) - ptr->enable_external_queue(); - ptr->SetGraph(m_graphs.front()); - - return ptr; -} - -IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(InputsDataMap networkInputs, - OutputsDataMap networkOutputs) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl"); - auto ptr = std::make_shared(networkInputs, networkOutputs, - std::static_pointer_cast(shared_from_this())); - if (m_config.get_property(ov::num_streams) > 1) - ptr->EnableStreams(); - if (m_config.get_property(ov::enable_profiling)) - ptr->EnableProfiling(); - if (m_graphs.front()->use_external_queue()) - ptr->enable_external_queue(); - ptr->SetGraph(m_graphs.front()); - - return ptr; -} - -IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(const std::vector>& inputs, - const std::vector>& outputs) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl"); - if (m_graphs.front()->GetMaxDynamicBatchSize() > 1) - return GetInferRequestImpl(inputs, outputs); - else - return GetInferRequestImpl(inputs, outputs); -} +CompiledModel::CompiledModel(cldnn::BinaryInputBuffer ib, + const std::shared_ptr& plugin, + RemoteContextImpl::Ptr context, + const ExecutionConfig& config) + : ov::ICompiledModel(nullptr, + plugin, + wrap_if_old_api(context, plugin->is_new_api()), + create_task_executor(plugin, config)) + , m_context(context) + , m_config(config) + , m_wait_executor(std::make_shared(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"})) + , m_model(nullptr) + , m_model_name("") + , m_loaded_from_cache(true) { + { + size_t num_params; + ib >> num_params; + + for (size_t idx = 0; idx < num_params; ++idx) { + std::string param_name; + ib >> param_name; + ov::element::Type param_element_type; + std::string str_element_type; + ib >> str_element_type; + std::stringstream oss(str_element_type); + oss >> param_element_type; + ov::PartialShape param_shape; + ib >> param_shape; + std::unordered_set param_names; + size_t num_names; + ib >> num_names; + for (size_t i = 0; i < num_names; ++i) { + std::string name; + ib >> name; + param_names.emplace(name); + } -IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequest"); - InferenceEngine::IInferRequestInternal::Ptr internalRequest; - if (m_graphs.empty()) { - OPENVINO_THROW("[GPU] Model not loaded"); + auto new_param = std::make_shared(param_element_type, param_shape); + new_param->set_friendly_name(param_name); + new_param->set_element_type(param_element_type); + new_param->output(0).get_tensor().set_names(param_names); + new_param->validate_and_infer_types(); + m_inputs.push_back(new_param->output(0)); + } } - for (auto& graph : m_graphs) { - if (graph == nullptr) { - OPENVINO_THROW("[GPU] Model not loaded"); - } + { + size_t num_results; + ib >> num_results; + + for (size_t idx = 0; idx < num_results; ++idx) { + ov::element::Type fake_element_type; + std::string str_element_type; + ib >> str_element_type; + std::stringstream oss(str_element_type); + oss >> fake_element_type; + + ov::PartialShape fake_shape; + ib >> fake_shape; + + std::string fake_name; + ib >> fake_name; + + std::string param_name; + ib >> param_name; + + std::unordered_set param_names; + size_t num_names; + ib >> num_names; + for (size_t i = 0; i < num_names; ++i) { + std::string name; + ib >> name; + param_names.emplace(name); + } + + auto fake_param = std::make_shared(fake_element_type, fake_shape); + fake_param->set_friendly_name(fake_name); + fake_param->validate_and_infer_types(); - if (!graph->IsLoaded()) { - OPENVINO_THROW("[GPU] Model not loaded: no networks created"); + auto new_result = std::make_shared(fake_param); + new_result->set_friendly_name(param_name); + new_result->output(0).get_tensor().set_names(param_names); + new_result->validate_and_infer_types(); + m_outputs.push_back(new_result->output(0)); } } - bool is_legacy = false; - if (this->_plugin && _plugin->IsNewAPI()) { - internalRequest = CreateInferRequestImpl(_parameters, _results); - if (std::dynamic_pointer_cast(internalRequest)) - is_legacy = true; - } - if (!internalRequest) { - internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs); - is_legacy = true; - } - internalRequest->setPointerToExecutableNetworkInternal(shared_from_this()); - if (is_legacy) { - return std::make_shared(std::static_pointer_cast(internalRequest), - m_taskExecutor, - m_waitExecutor, - _callbackExecutor); + auto pos = ib.tellg(); + for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { + ib.seekg(pos); + auto graph = std::make_shared(ib, context, m_config, 0); + m_graphs.push_back(graph); } - return std::make_shared(std::static_pointer_cast(internalRequest), - m_taskExecutor, - m_waitExecutor, - _callbackExecutor); +} + +std::shared_ptr CompiledModel::create_infer_request() const { + auto sync_request = create_sync_infer_request(); + auto async_infer_request = std::make_shared(std::static_pointer_cast(sync_request), + get_task_executor(), + m_wait_executor, + get_callback_executor()); + return async_infer_request; } // Cache blob format: -// [ ConstInputsDataMap / ConstOutputsDataMap ] +// [ is_dynamic flag ] // [ ov::Node::Input/ ov::Node::Output ] // [ ov::intel_gpu::Graph ] -void CompiledModel::Export(std::ostream& networkModel) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::Export"); - if (m_graphs.empty()) - OPENVINO_THROW("[GPU] Model not loaded"); +void CompiledModel::export_model(std::ostream& model) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); + OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - cldnn::BinaryOutputBuffer ob(networkModel); + cldnn::BinaryOutputBuffer ob(model); - // InputsInfo and OutputsInfo for CNNNetwork - { - ob << GetInputsInfo().size(); - - for (const auto& in : GetInputsInfo()) { - ob << in.first; - std::string precision(in.second->getPrecision().name()); - ob << precision; - std::stringstream ss; - ss << in.second->getInputData()->getLayout(); - ob << ss.str(); - ob << in.second->getTensorDesc().getDims(); - } - - ob << GetOutputsInfo().size(); - - for (const auto& out : GetOutputsInfo()) { - ob << out.first; - std::string precision(out.second->getPrecision().name()); - ob << precision; - std::stringstream ss; - ss << out.second->getLayout(); - ob << ss.str(); - ob << out.second->getTensorDesc().getDims(); - } - } + bool is_dynamic = get_graph(0)->get_network()->is_dynamic(); + ob << is_dynamic; // Inputs { - const std::vector>& const_params = getInputs(); - ob << const_params.size(); + const auto& params = inputs(); + ob << params.size(); - for (const auto& param : const_params) { - auto new_param = ov::as_type_ptr(param); - ov::element::Type param_element_type = new_param->get_element_type(); - - const std::string& param_name = new_param->get_friendly_name(); - const ov::PartialShape& param_shape = new_param->get_partial_shape(); - const ov::Layout& param_layout = new_param->get_layout(); - const auto& param_names = new_param->output(0).get_tensor().get_names(); - - ob << param_name; + for (const auto& param : params) { std::stringstream ss; - ss << param_element_type; + ss << param.get_element_type(); + + ob << param.get_node()->get_friendly_name(); ob << ss.str(); - ob << param_shape; - ob << param_layout.to_string(); - ob << param_names.size(); - for (const auto& name : param_names) { + ob << param.get_partial_shape(); + ob << param.get_names().size(); + for (const auto& name : param.get_names()) { ob << name; } } @@ -247,66 +203,44 @@ void CompiledModel::Export(std::ostream& networkModel) { // Outputs { - std::vector> const_results = getOutputs(); - ob << const_results.size(); - - for (const auto& param : const_results) { - auto new_param = ov::as_type_ptr(param); - ov::element::Type fake_element_type = new_param->get_input_element_type(0); - - const std::string& fake_name = new_param->get_input_node_ptr(0)->get_friendly_name(); - const std::string& param_name = new_param->get_friendly_name(); - const ov::PartialShape& fake_shape = new_param->get_input_partial_shape(0); - const ov::Layout& param_layout = new_param->get_layout(); - const auto& param_names = new_param->output(0).get_tensor().get_names(); - + const auto& results = outputs(); + ob << results.size(); + for (const auto& param : results) { std::stringstream ss; - ss << fake_element_type; + ss << param.get_element_type(); + ob << ss.str(); - ob << fake_shape; - ob << fake_name; - ob << param_name; - ob << param_layout.to_string(); - ob << param_names.size(); - for (const auto& name : param_names) { + ob << param.get_partial_shape(); + ob << param.get_node()->get_input_node_ptr(0)->get_friendly_name(); + ob << param.get_node()->get_friendly_name(); + ob << param.get_names().size(); + for (const auto& name : param.get_names()) { ob << name; } } } - if (m_graphs.front()->GetNetwork()->is_dynamic()) { - ob << true; - ov::pass::StreamSerialize serializer(networkModel, {}, ov::pass::Serialize::Version::UNSPECIFIED); - serializer.run_on_model(std::const_pointer_cast(m_network.getFunction())); + if (is_dynamic) { + ov::pass::StreamSerialize serializer(model, {}, ov::pass::Serialize::Version::UNSPECIFIED); + serializer.run_on_model(m_model); } else { - ob << false; - m_graphs.front()->Export(ob); + get_graph(0)->export_model(ob); } } -std::shared_ptr CompiledModel::GetExecGraphInfo() { - if (m_graphs.empty()) - OPENVINO_THROW("[GPU] Model not loaded"); - - return m_graphs.front()->GetExecGraphInfo(); +std::shared_ptr CompiledModel::get_runtime_model() const { + return get_graph(0)->get_runtime_model(); } - -InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) const { - auto actual_name = name; - if (LegacyAPIHelper::is_legacy_property({name, nullptr}, _plugin->IsNewAPI())) { - actual_name = LegacyAPIHelper::convert_legacy_property({name, nullptr}).first; - } - - auto val = m_config.get_property(actual_name); - if (LegacyAPIHelper::is_legacy_property({name, nullptr}, _plugin->IsNewAPI())) { - val = LegacyAPIHelper::convert_to_legacy_property({actual_name, val}).second; - } - - return val; +const std::vector>& CompiledModel::get_graphs() const { + return m_graphs; +} +std::shared_ptr CompiledModel::get_graph(size_t n) const { + OPENVINO_ASSERT(m_graphs.size() >= n, "[GPU] Invalid graph idx: ", n, ". Only ", m_graphs.size(), " were created"); + return m_graphs[n]; } -InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) const { +ov::Any CompiledModel::get_property(const std::string& name) const { if (name == ov::supported_properties) { return decltype(ov::supported_properties)::value_type { // Metrics @@ -333,17 +267,20 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con ov::PropertyName{ov::execution_devices.name(), PropertyMutability::RO} }; } else if (name == ov::model_name) { - OPENVINO_ASSERT(!m_graphs.empty()); - return decltype(ov::model_name)::value_type {m_graphs[0]->getName()}; + return decltype(ov::model_name)::value_type {m_model_name}; + } else if (name == ov::loaded_from_cache) { + return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache}; + OPENVINO_SUPPRESS_DEPRECATED_START } else if (name == METRIC_KEY(SUPPORTED_METRICS)) { - std::vector metrics; - metrics.push_back(METRIC_KEY(NETWORK_NAME)); - metrics.push_back(METRIC_KEY(SUPPORTED_METRICS)); - metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); - metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)); + static const std::vector metrics { + METRIC_KEY(NETWORK_NAME), + METRIC_KEY(SUPPORTED_METRICS), + METRIC_KEY(SUPPORTED_CONFIG_KEYS), + METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS), + }; IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { - static const std::vector configKeys { + static const std::vector config_keys { CONFIG_KEY(MODEL_PRIORITY), CONFIG_KEY(PERFORMANCE_HINT), CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS), @@ -360,21 +297,40 @@ InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) con GPU_CONFIG_KEY(MAX_NUM_THREADS), GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING), }; - IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); + IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, config_keys); + OPENVINO_SUPPRESS_DEPRECATED_END } else if (name == ov::optimal_number_of_infer_requests) { unsigned int nr = m_config.get_property(ov::num_streams); if (m_config.get_property(ov::hint::performance_mode) != ov::hint::PerformanceMode::LATENCY) nr *= 2; return decltype(ov::optimal_number_of_infer_requests)::value_type {nr}; } else if (name == ov::execution_devices) { - return decltype(ov::execution_devices)::value_type{m_context->getDeviceName()}; - } else { - OPENVINO_THROW("[GPU] Unsupported CompiledModel property: ", name); + return decltype(ov::execution_devices)::value_type{m_context->get_device_name()}; + } + + auto actual_name = name; + if (LegacyAPIHelper::is_legacy_property({name, nullptr}, is_new_api())) { + actual_name = LegacyAPIHelper::convert_legacy_property({name, nullptr}).first; } + + auto val = m_config.get_property(actual_name); + if (LegacyAPIHelper::is_legacy_property({name, nullptr}, is_new_api())) { + val = LegacyAPIHelper::convert_to_legacy_property({actual_name, val}).second; + } + + return val; } -std::shared_ptr CompiledModel::GetContext() const { - return m_context; +std::shared_ptr CompiledModel::create_sync_infer_request() const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::create_sync_infer_request"); + OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); + + for (auto& graph : m_graphs) { + OPENVINO_ASSERT(graph != nullptr, "[GPU] Model not loaded: graph is nullptr"); + OPENVINO_ASSERT(graph->is_loaded(), "[GPU] Model not loaded: invalid graph"); + } + + return std::make_shared(std::static_pointer_cast(shared_from_this())); } } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp index 460b85245b0aab..e4fe70d52b54b4 100644 --- a/src/plugins/intel_gpu/src/plugin/graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -2,6 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/layout.hpp" +#include "openvino/runtime/threading/executor_manager.hpp" +#include "openvino/runtime/exec_model_info.hpp" +#include "openvino/pass/serialize.hpp" + #include "intel_gpu/graph/network.hpp" #include "intel_gpu/graph/serialization/binary_buffer.hpp" #include "intel_gpu/graph/serialization/map_serializer.hpp" @@ -14,10 +19,6 @@ #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/plugin/graph.hpp" #include "intel_gpu/plugin/simple_math.hpp" -#include "intel_gpu/plugin/infer_request.hpp" - -#include "openvino/runtime/threading/executor_manager.hpp" -#include "openvino/runtime/exec_model_info.hpp" #include #include @@ -31,36 +32,29 @@ #include #include -using namespace InferenceEngine; -using namespace InferenceEngine::details; - namespace ov { namespace intel_gpu { -Graph::Graph(InferenceEngine::CNNNetwork& network, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id, - InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs) +Graph::Graph(std::shared_ptr model, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id) : m_context(context) - , m_networkName(network.getName()) , m_config(config) - , m_stream_id(stream_id) - , m_state(0) { - m_program = std::make_shared(network, get_engine(), config, false, false, inputs, outputs); - if (m_program->m_max_batch > 1) - m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch)); - Build(); + , m_stream_id(stream_id) { + auto program_builder = std::make_shared(model, get_engine(), config, false, false); + m_config = program_builder->get_config(); + + build(program_builder->get_compiled_program()); + + primitiveIDs = program_builder->primitive_ids; + prevPrimitiveIDs = program_builder->prevPrimitiveIDs; + profilingIDs = program_builder->profiling_ids; + perfMap = program_builder->perfMap; + m_input_layouts = program_builder->get_input_layouts(); } -Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id, - InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs) +Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context, const ExecutionConfig& config, uint16_t stream_id) : m_context(context) , m_config(config) - , m_stream_id(stream_id) - , m_state(0) { - m_program = std::make_shared(get_engine(), config, inputs, outputs); - ib >> m_program->m_max_batch; - if (m_program->m_max_batch > 1) - m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch)); - + , m_stream_id(stream_id) { bool need_onednn_engine = false; ib >> need_onednn_engine; if (need_onednn_engine) { @@ -71,12 +65,7 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context #endif // ENABLE_ONEDNN_FOR_GPU } - ib >> m_program->inputLayouts; - ProgramBuilder::variables_state_info_map variablesStateInfoMap; - ib >> variablesStateInfoMap; - for (const auto& variablesStateInfo : variablesStateInfoMap) { - m_program->AddVariableStateInfo(variablesStateInfo.first, *variablesStateInfo.second.begin()); - } + ib >> m_input_layouts; ib >> primitiveIDs; ib >> prevPrimitiveIDs; ib >> profilingIDs; @@ -89,71 +78,53 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, const RemoteContextImpl::Ptr& context perfMap[prim_id].first = prim_id; auto& perfEntry = perfMap[prim_id].second; ib >> perfEntry.layerType; - ib >> cldnn::make_data(&perfEntry.status, sizeof(InferenceEngine::InferenceEngineProfileInfo::LayerStatus)); + ib >> cldnn::make_data(&perfEntry.status, sizeof(ov::ProfilingInfo::Status)); perfEntry.cpu_uSec = perfEntry.realTime_uSec = 0; ib >> perfEntry.isCPU; ib >> perfEntry.parentPrimitive; } } - ib >> outputDims; - size_t num_networks; - ib >> num_networks; - for (uint32_t i = 0; i < num_networks; ++i) { - m_networks.emplace_back(std::make_shared(ib, get_engine().create_stream(config), get_engine(), m_stream_id == 0, i)); - } + m_network = std::make_shared(ib, get_engine().create_stream(config), get_engine(), m_stream_id == 0, 0); } Graph::Graph(std::shared_ptr graph, uint16_t stream_id) : m_context(graph->m_context) - , m_program(graph->m_program) - , m_networkName(graph->m_networkName) , m_config(graph->m_config) , m_stream_id(stream_id) - , m_state(0) { - Build(); + , primitiveIDs(graph->primitiveIDs) + , prevPrimitiveIDs(graph->prevPrimitiveIDs) + , perfMap(graph->perfMap) + , profilingIDs(graph->profilingIDs) + , m_input_layouts(graph->m_input_layouts) { + build(graph->get_network()->get_program()); } -void Graph::UpdateLayersMaps() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::UpdateLayersMaps"); - primitiveIDs = m_program->primitive_ids; - prevPrimitiveIDs = m_program->prevPrimitiveIDs; - profilingIDs = m_program->profiling_ids; - perfMap = m_program->perfMap; - outputDims = m_program->outputDims; -} +void Graph::build(std::shared_ptr program) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::build"); -void Graph::Build() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::Build"); - UpdateLayersMaps(); - - if (GetMaxDynamicBatchSize() > 1) { - int m_bv_sz = m_program->GetMaxBatchSizeForSingleProgram(); - for (int b = m_bv_sz - 1; b >= 0; b--) { - auto network = BuildNetwork(m_program->GetCompiledProgram(b)); - m_networks.insert(m_networks.begin(), network); - } + auto external_queue = m_context->get_external_queue(); + if (external_queue) { + OPENVINO_ASSERT(m_config.get_property(ov::num_streams) == 1, "[GPU] Throughput streams can't be used with shared queue!"); + const auto &engine = program->get_engine(); + m_network = std::make_shared(program, engine.create_stream(m_config, external_queue), m_stream_id); } else { - auto network = BuildNetwork(m_program->GetCompiledProgram()); - m_networks.emplace_back(network); + m_network = std::make_shared(program, m_stream_id); } GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(!debug_config->dry_run_path.empty()) { - CNNNetwork net(GetExecGraphInfo()); - net.serialize(debug_config->dry_run_path); + ov::pass::Serialize(debug_config->dry_run_path, "").run_on_model(get_runtime_model()); exit(0); } - GPU_DEBUG_IF(!debug_config->dump_graphs.empty() && m_stream_id == 0) { static int net_id = 0; - auto steps_info = GetNetwork()->get_optimizer_passes_info(); + auto steps_info = get_network()->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { - CNNNetwork net(GetExecGraphInfoByPrimitivesInfo(step.second, true)); - net.serialize(debug_config->dump_graphs + std::to_string(net_id) + "_" + - std::to_string(step_idx) + "_" + step.first + "_graph.xml"); + auto xml_path = debug_config->dump_graphs + std::to_string(net_id) + "_" + std::to_string(step_idx) + "_" + step.first + "_graph.xml"; + ov::pass::Serialize(xml_path, "").run_on_model(get_runtime_model(step.second, true)); step_idx++; } net_id++; @@ -164,73 +135,24 @@ bool Graph::use_external_queue() const { return m_context->get_external_queue() != nullptr; } -std::shared_ptr Graph::BuildNetwork(std::shared_ptr program) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::BuildNetwork"); - std::shared_ptr network = nullptr; - - auto externalQueue = m_context->get_external_queue(); - if (externalQueue) { - if (m_config.get_property(ov::num_streams) != 1) - OPENVINO_THROW("Throughput streams can't be used with shared queue!\n"); - auto &engine = m_program->get_engine(); - network = std::make_shared(program, engine.create_stream(m_config, externalQueue), m_stream_id); - } else { - network = std::make_shared(program, m_stream_id); - } - - return network; -} - -Graph::variable_states_map Graph::AllocateVariablesMemories() { - Graph::variable_states_map states {}; - const auto& memStatesInfo = m_program->GetVariablesStatesInfo(); - OPENVINO_ASSERT(memStatesInfo.empty() || !GetNetwork()->is_dynamic(), "[GPU] Dynamic shapes are not supported yet for stateful models"); - for (const auto& memStateInfo : memStatesInfo) { - std::vector orderedLayouts {memStateInfo.second.begin(), memStateInfo.second.end()}; - std::sort(orderedLayouts.begin(), orderedLayouts.end(), [](cldnn::layout& first, cldnn::layout& second) { - return first.batch() < second.batch(); - }); - std::vector memoryStates; - memoryStates.reserve(orderedLayouts.size()); - for (const auto& layout : orderedLayouts) - memoryStates.push_back(std::make_shared(get_engine().allocate_memory(layout, false))); - states.insert({memStateInfo.first, memoryStates }); - } - return states; -} - -std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::vector& primitives_info, - bool filter_const_primitives) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo"); +std::shared_ptr Graph::get_runtime_model(std::vector& primitives_info, bool filter_const_primitives) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_runtime_model"); if (m_config.get_property(ov::enable_profiling)) { try { // Update may throw an exception for step-by-step runtime graph dump, // since network->get_executed_primitives() method can't be called before network execution - UpdatePerfStatistics(); + update_profiling_info(); } catch (std::exception&) { } } - std::map> node2layer; - - ngraph::ResultVector results; - ngraph::ParameterVector params; - ngraph::NodeVector nodes; - - auto data_type_to_precision = [](cldnn::data_types dt) { - switch (dt) { - case cldnn::data_types::bin: return Precision::BIN; - case cldnn::data_types::f32: return Precision::FP32; - case cldnn::data_types::f16: return Precision::FP16; - case cldnn::data_types::i32: return Precision::I32; - case cldnn::data_types::i64: return Precision::I64; - case cldnn::data_types::u8: return Precision::U8; - case cldnn::data_types::i8: return Precision::I8; - default: return Precision::UNSPECIFIED; - } - }; + std::map> node2layer; + + ov::ResultVector results; + ov::ParameterVector params; + ov::NodeVector nodes; - // TODO: Adjust output layer names to be aligned with ngraph and add new ops + // TODO: Adjust output layer names to be aligned with ov and add new ops auto to_IE_type_name = [](const std::string& cldnn_name) -> std::string{ static std::map type_n2l { { "activation", "Activation" }, @@ -319,7 +241,7 @@ std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::v return std::string((it+1), name.end()); }; - auto extIdMap = GetNetwork()->get_ext_id_mapping(); + auto extIdMap = get_network()->get_ext_id_mapping(); auto find_origin_layers = [&](const std::string& name) -> std::vector { if (extIdMap.find(name) == extIdMap.end()) { @@ -329,7 +251,7 @@ std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::v }; auto get_inputs = [&] (const cldnn::primitive_info& prim_info) { - ngraph::OutputVector inputs; + ov::OutputVector inputs; auto& deps = prim_info.c_dependencies; @@ -355,21 +277,21 @@ std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::v return inputs; }; - auto create_ngraph_node = [&](const cldnn::primitive_info& prim_info) { + auto create_ov_node = [&](const cldnn::primitive_info& prim_info) { const auto& user_ids = prim_info.c_users; size_t output_size = user_ids.size(); bool is_output = user_ids.empty(); auto out_et = cldnn::data_type_to_element_type(prim_info.output_layout.data_type); auto out_pshape = prim_info.output_layout.get_partial_shape(); - std::shared_ptr return_node; + std::shared_ptr return_node; if (prim_info.type_id == "input_layout") { - auto param = std::make_shared(out_et, out_pshape); + auto param = std::make_shared(out_et, out_pshape); params.push_back(param); return_node = param; // create additional result node if parameter is output without post reorder if (is_output) { - results.emplace_back(std::make_shared(return_node->get_default_output())); + results.emplace_back(std::make_shared(return_node->get_default_output())); } } else { return_node = std::make_shared(get_inputs(prim_info), output_size); @@ -378,7 +300,7 @@ std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::v nodes.push_back(return_node); node2layer[prim_info.original_id] = return_node; return_node->set_output_type(0, out_et, out_pshape); - results.emplace_back(std::make_shared(return_node->get_default_output())); + results.emplace_back(std::make_shared(return_node->get_default_output())); } else { size_t port = 0; for (auto& usr_id : user_ids) { @@ -400,14 +322,12 @@ std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::v results.back()->set_friendly_name(layerName + "_result"); std::map info; - Precision prec = data_type_to_precision(prim_info.output_layout.data_type); - Precision inference_precision = data_type_to_precision(prim_info.runtime_precision); - info[ov::exec_model_info::OUTPUT_PRECISIONS] = prec.name(); + info[ov::exec_model_info::OUTPUT_PRECISIONS] = cldnn::data_type_to_element_type(prim_info.output_layout.data_type).get_type_name(); info[ov::exec_model_info::LAYER_TYPE] = to_IE_type_name(prim_info.type_id); info[ov::exec_model_info::OUTPUT_LAYOUTS] = prim_info.layout_str; info[ov::exec_model_info::EXECUTION_ORDER] = std::to_string(prim_info.exec_id); info[ov::exec_model_info::IMPL_TYPE] = prim_info.kernel_id; - info[ov::exec_model_info::RUNTIME_PRECISION] = inference_precision.name(); + info[ov::exec_model_info::RUNTIME_PRECISION] = cldnn::data_type_to_element_type(prim_info.runtime_precision).get_type_name(); std::vector originalNames{find_origin_layers(prim_info.original_id)}; for (auto& fused_id : prim_info.c_fused_ids) { @@ -493,20 +413,17 @@ std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::v } } - create_ngraph_node(pi); + create_ov_node(pi); } - return std::make_shared(results, params, "runtime_gpu_graph"); + return std::make_shared(results, params, "runtime_gpu_graph"); } // Cache blob format: // [ ov::intel_gpu::ProgramBuilder::inputLayouts ] // [ ov::intel_gpu::Graph::primitiveIDs ] -// [ ov::intel_gpu::Graph::outputDims ] // [ cldnn::network ] -void Graph::Export(cldnn::BinaryOutputBuffer &ob) { - ob << m_program->m_max_batch; - +void Graph::export_model(cldnn::BinaryOutputBuffer &ob) { bool need_onednn_engine = false; #ifdef ENABLE_ONEDNN_FOR_GPU try { @@ -518,8 +435,7 @@ void Graph::Export(cldnn::BinaryOutputBuffer &ob) { #endif // ENABLE_ONEDNN_FOR_GPU ob << need_onednn_engine; - ob << m_program->inputLayouts; - ob << m_program->GetVariablesStatesInfo(); + ob << m_input_layouts; ob << primitiveIDs; ob << prevPrimitiveIDs; ob << profilingIDs; @@ -528,31 +444,23 @@ void Graph::Export(cldnn::BinaryOutputBuffer &ob) { for (auto& perf_item : perfMap) { ob << perf_item.first; ob << perf_item.second.second.layerType; - ob << cldnn::make_data(&perf_item.second.second.status, sizeof(InferenceEngine::InferenceEngineProfileInfo::LayerStatus)); + ob << cldnn::make_data(&perf_item.second.second.status, sizeof(ov::ProfilingInfo::Status)); ob << perf_item.second.second.isCPU; ob << perf_item.second.second.parentPrimitive; } } - ob << outputDims; - ob << m_networks.size(); - for (const auto& net : m_networks) { - net->save(ob); - } + m_network->save(ob); } -std::shared_ptr Graph::GetExecGraphInfo() { - auto primitives_info = GetNetwork()->get_primitives_info(); - return GetExecGraphInfoByPrimitivesInfo(primitives_info, true); +std::shared_ptr Graph::get_runtime_model() { + auto primitives_info = get_network()->get_primitives_info(); + return get_runtime_model(primitives_info, true); } -void Graph::UpdatePerfStatistics() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::UpdatePerfStatistics"); - if (GetNetworksCount() == 0) { - return; - } - +void Graph::update_profiling_info() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::update_profiling_info"); // Collect timings auto collectTimings = [](cldnn::instrumentation::profiling_info& cldnnInfo, PerfCounter& pc) { for (auto &interval : cldnnInfo.intervals) { @@ -572,8 +480,8 @@ void Graph::UpdatePerfStatistics() { } }; - std::map executedPrimitives = GetNetwork()->get_executed_primitives(); - auto allPrimitives = GetNetwork()->get_all_primitives(); + std::map executedPrimitives = get_network()->get_executed_primitives(); + auto allPrimitives = get_network()->get_all_primitives(); // Get profiling info for all layers for (auto &profiledID : profilingIDs) { @@ -586,7 +494,7 @@ void Graph::UpdatePerfStatistics() { // Change status if layer wasn't executed by cldnn engine if (execIter == executedPrimitives.end()) { if (perfCount.num == 0) { - perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT; + perfCount.status = ov::ProfilingInfo::Status::OPTIMIZED_OUT; } continue; } @@ -615,22 +523,21 @@ void Graph::UpdatePerfStatistics() { } } -bool Graph::IsLoaded() const { - return GetNetwork() != nullptr; +bool Graph::is_loaded() const { + return get_network() != nullptr; } -std::map Graph::GetPerformanceCounts() const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetPerformanceCounts"); - std::map result; +std::vector Graph::get_profiling_info() const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::get_profiling_info"); + std::map result; bool combinePrimByIRLayers = false; - unsigned i = 0; - auto allIds = GetNetwork()->get_all_primitive_org_ids(); - auto executedPrimitives = GetNetwork()->get_executed_primitives(); - auto primitivesInfo = GetNetwork()->get_primitives_info(); - auto extIdMap = GetNetwork()->get_ext_id_mapping(); + auto allIds = get_network()->get_all_primitive_org_ids(); + auto executedPrimitives = get_network()->get_executed_primitives(); + auto primitivesInfo = get_network()->get_primitives_info(); + auto extIdMap = get_network()->get_ext_id_mapping(); std::map implementation_info; - if (GetNetwork()->get_program() == nullptr) { + if (get_network()->get_program() == nullptr) { for (auto& pi : primitivesInfo) { implementation_info[pi.original_id] = pi.kernel_id; } @@ -674,14 +581,12 @@ std::map Graph::GetPer auto& extPerfEntry = result[layerName]; - memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type)); if (perfCounter.isCPU) { - static const std::string cpuExecType("CPU"); - cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length()); // Override execType as CPU + extPerfEntry.exec_type = "CPU"; } else { std::string impl; - if (GetNetwork()->get_program() != nullptr) { - impl = GetNetwork()->get_implementation_info(primId); + if (get_network()->get_program() != nullptr) { + impl = get_network()->get_implementation_info(primId); } else { if (implementation_info.find(primId) != implementation_info.end()) { impl = implementation_info[primId]; @@ -689,13 +594,13 @@ std::map Graph::GetPer impl = "undef"; } } - impl.copy(extPerfEntry.exec_type, impl.length()); + extPerfEntry.exec_type = impl; } - extPerfEntry.execution_index = i++; extPerfEntry.status = perfCounter.status; - extPerfEntry.cpu_uSec = perfCounter.cpu_avg(); - extPerfEntry.realTime_uSec = perfCounter.realTime_avg(); + extPerfEntry.cpu_time = std::chrono::microseconds(perfCounter.cpu_avg()); + extPerfEntry.real_time = std::chrono::microseconds(perfCounter.realTime_avg()); + extPerfEntry.node_name = layerName; if (combinePrimByIRLayers) { std::string kernelId = ""; @@ -706,8 +611,8 @@ std::map Graph::GetPer const auto &pc = iter->second.second; if (id != primId && pc.parentPrimitive == primId) { - extPerfEntry.cpu_uSec += pc.cpu_avg(); - extPerfEntry.realTime_uSec += pc.realTime_avg(); + extPerfEntry.cpu_time += std::chrono::microseconds(pc.cpu_avg()); + extPerfEntry.real_time += std::chrono::microseconds(pc.realTime_avg()); if (pc.realTime_avg() > kernelTime) { kernelTime = pc.realTime_avg(); kernelId = id; @@ -716,12 +621,11 @@ std::map Graph::GetPer } } if (!kernelId.empty()) { - std::string impl_info = GetNetwork()->get_implementation_info(kernelId); - std::memcpy(extPerfEntry.exec_type, &impl_info[0], impl_info.length()); + extPerfEntry.exec_type = get_network()->get_implementation_info(kernelId); } } - getUpperCaseName(perfCounter.layerType).copy(extPerfEntry.layer_type, perfCounter.layerType.length()); + extPerfEntry.node_type = getUpperCaseName(perfCounter.layerType); return true; }; @@ -769,25 +673,20 @@ std::map Graph::GetPer auto& extPerfEntry = result[layerName]; if (pi.is_cpu) { - static const std::string cpuExecType("CPU"); - memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type)); - cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length()); // Override execType as CPU + extPerfEntry.exec_type = "CPU"; } else { - std::string impl = pi.kernel_id; - impl.copy(extPerfEntry.exec_type, impl.length()); + extPerfEntry.exec_type = pi.kernel_id; } - getUpperCaseName(pi.type_id).copy(extPerfEntry.layer_type, pi.type_id.length()); - extPerfEntry.execution_index = i++; - extPerfEntry.status = InferenceEngineProfileInfo::LayerStatus::EXECUTED; - extPerfEntry.cpu_uSec = cpuTime; - extPerfEntry.realTime_uSec = deviceTime; + extPerfEntry.node_type = getUpperCaseName(pi.type_id); + extPerfEntry.node_name = pi.original_id; + extPerfEntry.status = ov::ProfilingInfo::Status::EXECUTED; + extPerfEntry.cpu_time = std::chrono::microseconds(cpuTime); + extPerfEntry.real_time = std::chrono::microseconds(deviceTime); if (pi.type_id == "input_layout") { - const std::string input_string = "Input"; - const std::string undef_string = "undef"; - input_string.copy(extPerfEntry.layer_type, 256); - undef_string.copy(extPerfEntry.exec_type, 256); + extPerfEntry.node_type = "Input"; + extPerfEntry.exec_type = "undef"; } } } @@ -809,33 +708,33 @@ std::map Graph::GetPer auto second_res = result.find(getClearName(p.second)); if (first_res != result.end() && second_res != result.end() && first_res != second_res) { - std::swap(first_res->second.cpu_uSec, second_res->second.cpu_uSec); - std::swap(first_res->second.realTime_uSec, second_res->second.realTime_uSec); + std::swap(first_res->second.cpu_time, second_res->second.cpu_time); + std::swap(first_res->second.real_time, second_res->second.real_time); std::swap(first_res->second.status, second_res->second.status); std::swap(first_res->second.exec_type, second_res->second.exec_type); - std::swap(first_res->second.execution_index, second_res->second.execution_index); } } - return result; -} -std::shared_ptr Graph::GetNetwork(size_t idx) const { - if (idx >= GetNetworksCount()) - OPENVINO_THROW("Unable to find network with id=", idx, ". Stored networks count: ", GetNetworksCount()); - - return m_networks[idx]; + std::vector res; + for (auto& kv : result) { + res.push_back(kv.second); + } + return res; } +std::shared_ptr Graph::get_network() const { + return m_network; +} -std::string Graph::MapOutputName(std::string outName) const { - auto networkOutputsIDs = GetNetwork()->get_output_ids(); - auto allPrimitiveIds = GetNetwork()->get_all_primitives(); +std::string Graph::out_name_to_internal(std::string out_port_name) const { + auto networkOutputsIDs = get_network()->get_output_ids(); + auto allPrimitiveIds = get_network()->get_all_primitives(); // Find correct output ID. Start with name stored in IR. - if (primitiveIDs.find(outName) == primitiveIDs.end()) { - OPENVINO_THROW("output with name ", outName, " was not found in primitiveIDs"); + if (primitiveIDs.find(out_port_name) == primitiveIDs.end()) { + OPENVINO_THROW("output with name ", out_port_name, " was not found in primitiveIDs"); } - std::string outputID = primitiveIDs.at(outName); + std::string outputID = primitiveIDs.at(out_port_name); while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) { // If current ID isn't found in cldnn network outputs, get previous primitive id and try again. auto prim = allPrimitiveIds.find(outputID); @@ -852,17 +751,5 @@ std::string Graph::MapOutputName(std::string outName) const { return outputID; } -InferenceEngine::SizeVector Graph::GetOutputSize(std::string outName) const { - auto res_output = outputDims.find(outName); - - InferenceEngine::SizeVector sz; - if (res_output != outputDims.end()) - sz = res_output->second; - else - sz = outputDims.at(primitiveIDs.at(outName)); - - return sz; -} - } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/infer_request.cpp b/src/plugins/intel_gpu/src/plugin/infer_request.cpp deleted file mode 100644 index 51ccf5b701a493..00000000000000 --- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp +++ /dev/null @@ -1,1052 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include "intel_gpu/plugin/infer_request.hpp" -#include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" -#include "intel_gpu/plugin/compiled_model.hpp" -#include "intel_gpu/plugin/variable_state.hpp" -#include "intel_gpu/runtime/itt.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" -#include "openvino/core/preprocess/input_tensor_info.hpp" -#include - -using namespace InferenceEngine; - -namespace { - -const char str_device_output_unsupported_blob[] = "Device output is of an unsupported blob type."; -const char str_input_not_allocated[] = "Input data was not allocated."; -const char str_output_not_allocated[] = "Output data was not allocated."; - -template -void convertAndCopy(const InferenceEngine::Blob* src, dst_t* dst) { - if (!dst) { - return; - } - auto t_blob = dynamic_cast*>(src); - if (!t_blob) { - OPENVINO_THROW("input type is ", src->getTensorDesc().getPrecision(), " but input is not ", typeid(src_t).name()); - } - - const src_t* srcPtr = t_blob->readOnly(); - if (!srcPtr) { - OPENVINO_THROW(str_input_not_allocated); - } - for (size_t i = 0; i < t_blob->size(); i++) - dst[i] = srcPtr[i]; -} - -template -void copy_result_to_output_blob(InferenceEngine::Blob::Ptr src, InferenceEngine::Blob::Ptr dst, const cldnn::layout& src_layout) { - auto locked_src = src->buffer(); - auto src_ptr = locked_src.as(); - OPENVINO_ASSERT(src_ptr, "[GPU] Invalid source blob"); - - auto locked_dst = dst->buffer(); - auto dst_ptr = locked_dst.as(); - OPENVINO_ASSERT(dst_ptr, "[GPU] Invalid output blob"); - - if (src_layout.data_padding) { - auto size = src_layout.get_tensor(); - for (int64_t b = 0; b < size.batch[0]; b++) { - for (int64_t f = 0; f < size.feature[0]; f++) { - for (int64_t w = 0; w < size.spatial[3]; w++) { - for (int64_t z = 0; z < size.spatial[2]; z++) { - for (int64_t y = 0; y < size.spatial[1]; y++) { - for (int64_t x = 0; x < size.spatial[0]; x++) { - *dst_ptr++ = src_ptr[src_layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))]; - } - } - } - } - } - } - } else { - size_t n = dst->size(); - for (size_t i = 0; i < n; i++) { - dst_ptr[i] = src_ptr[i]; - } - } -} - -inline void checkAlloc(const Blob::Ptr& blob, const std::string& err_str) { - bool not_allocated = false; - if (!blob->is()) { - not_allocated = (blob->buffer() == nullptr); - } else { - not_allocated = !ov::intel_gpu::getBlobImpl(blob->as())->is_allocated(); - } - if (not_allocated) { - OPENVINO_THROW(err_str); - } -} - -void checkInputBlob(const Blob::Ptr &blob, - const std::string &name, - const InputInfo::Ptr foundInput) { - const std::string strNotMatched("The input blob size is not equal to the network input size"); - - if (!blob) { - OPENVINO_THROW(str_input_not_allocated); - } - - SizeVector dims = foundInput->getTensorDesc().getDims(); - size_t refSize = foundInput->getTensorDesc().getLayout() != SCALAR - ? details::product(dims) - : 1; - - if (refSize != blob->size()) { - OPENVINO_THROW(strNotMatched + ": got ", blob->size(), " expecting ", refSize); - } - - checkAlloc(blob, str_input_not_allocated); -} - -void checkOutputBlob(const Blob::Ptr &blob, - const std::string &name, - const DataPtr foundOutput) { - const std::string strNotMatched("The output blob size is not equal to the network output size"); - - if (!blob) { - OPENVINO_THROW(str_output_not_allocated); - } - SizeVector dims = foundOutput->getTensorDesc().getDims(); - size_t refSize = foundOutput->getTensorDesc().getLayout() != SCALAR - ? details::product(dims) - : 1; - - if (refSize != blob->size()) { - OPENVINO_THROW(strNotMatched + ": got ", blob->size(), " expecting ", refSize); - } - - checkAlloc(blob, str_output_not_allocated); -} - -bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) { - uint8_t* bufferMem = nullptr; - if (memPtr->get_allocation_type() == cldnn::allocation_type::usm_host) { - bufferMem = reinterpret_cast(memPtr->get_internal_params().mem); - } - return bufferMem == hostPtr; -} -} // namespace - -namespace ov { -namespace intel_gpu { - -// ----------------------------------------------------------------------------------------- // -// ---------------------------- IE API impl ------------------------------------------------ // -// ----------------------------------------------------------------------------------------- // -Blob::Ptr InferRequest::GetBlob(const std::string& name) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::GetBlob"); - Blob::Ptr data; - InputInfo::Ptr foundInput; - DataPtr foundOutput; - bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); - auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name); - bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - - if (is_input) { - data = _inputs[name]; - if (!isDynamic) - checkInputBlob(data, name, foundInput); - } else { - data = _outputs[name]; - if (!isDynamic) { - checkOutputBlob(data, name, foundOutput); - } - } - return data; -} - -void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetBlob"); - - // perform all common checks first - if (name.empty()) { - OPENVINO_THROW("Failed to set blob with empty name"); - } - if (!data) - OPENVINO_THROW("Failed to set empty blob with name: \'", name, "\'"); - - if (inputTensorsMap.find(name) != inputTensorsMap.end()) { - inputTensorsMap.erase(name); - } - - InputInfo::Ptr foundInput; - DataPtr foundOutput; - auto blobDesc = data->getTensorDesc(); - - bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); - const TensorDesc& desc = is_input - ? foundInput->getTensorDesc() - : foundOutput->getTensorDesc(); - - if (desc.getPrecision() != blobDesc.getPrecision()) { - OPENVINO_THROW("Failed to set Blob with precision not corresponding to user ", (is_input ? "input" : "output"), " precision"); - } - - size_t netReqBinSize = std::accumulate(desc.getDims().begin(), desc.getDims().end(), - desc.getPrecision().size(), - std::multiplies()); - auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name); - bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - - size_t dataSize = data->size(); - if (0 == dataSize && !isDynamic) { - OPENVINO_THROW("Input data is empty. Input name: \'", name, "\'"); - } - - size_t dataBinSize = dataSize * data->element_size(); - if (!isDynamic && dataBinSize != netReqBinSize) { - OPENVINO_THROW("Incorrect binary data size for ", (is_input ? "input" : "output"), - " blob with name: \'", name, "\' ", - "Current: ", dataBinSize, " Required: ", netReqBinSize); - } - - if (is_input) { - set_input(name, data); - } else { - set_output(name, data); - } -} - -void InferRequest::set_input(const std::string& name, const Blob::Ptr& data) { - auto remote_ptr = data->as(); - bool is_remote = remote_ptr != nullptr; - - if (is_remote) { - _deviceInputs[name] = data; - _inputs[name] = data; - } else { - OPENVINO_ASSERT(data->buffer().as() != nullptr, str_input_not_allocated, " Input name: \'", name, "\'"); - _inputs[name] = data; - } -} - -void InferRequest::set_output(const std::string& name, const Blob::Ptr& data) { - auto remote_ptr = data->as(); - bool is_remote = remote_ptr != nullptr; - - auto node = findOutputByNodeName(name); - bool isDynamic = node && node->get_output_partial_shape(0).is_dynamic(); - - if (is_remote) { - _deviceOutputs[name] = data; - } else { - if (!isDynamic) { - if (data->buffer() == nullptr) - OPENVINO_THROW(str_output_not_allocated, " Output name: \'", name, "\'"); - } - } - _outputs[name] = data; -} - -void InferRequest::SetBlobs(const std::string& name, const std::vector& blobs) { - if (blobs.size() == 1) { - SetBlob(name, blobs[0]); - return; - } - - if (name.empty()) { - OPENVINO_THROW("Failed to set blobs with empty name"); - } - if (blobs.empty()) { - OPENVINO_THROW("Failed to set empty blobs with name: \'", name, "\'"); - } - bool empty_data = std::any_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->size() == 0; - }); - if (empty_data) { - OPENVINO_THROW("At least one of the input blobs is empty. Input name: \'", name, "\'"); - } - - bool is_buffer = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - bool is_surface = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - bool is_remote = is_buffer || is_surface; - - bool is_host = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - is_host &= !is_remote; - - if (!is_host && !is_remote) { - OPENVINO_THROW("Incorrect input blobs. All blobs must be of the same type"); - } - - InputInfo::Ptr foundInput; - DataPtr foundOutput; - bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); - - if (!is_input) { - OPENVINO_THROW("SetBlobs method doesn't support outputs"); - } - - const TensorDesc& desc = foundInput->getTensorDesc(); - - size_t dataBinSize = blobs.front()->size() * blobs.front()->element_size() * blobs.size(); - size_t netReqBinSize = std::accumulate(desc.getDims().begin(), desc.getDims().end(), - desc.getPrecision().size(), - std::multiplies()); - if (dataBinSize != netReqBinSize) { - OPENVINO_THROW("Incorrect binary data size for input blobs with name: \'", name, "\' ", "Current: ", dataBinSize, " Required: ", netReqBinSize); - } - - if (is_surface) { - for (size_t i = 0; i < blobs.size(); ++i) { - std::string new_name = name + "_" + std::to_string(i); - - if (_inputs.find(new_name) != _inputs.end()) { - _inputs.erase(new_name); - } - } - } else { - if (_inputs.find(name) != _inputs.end()) { - _inputs.erase(name); - } - } - - inputTensorsMap[name] = blobs; -} - -void InferRequest::checkBlobs() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::checkBlobs"); - for (auto const &input : _inputs) { - InputInfo::Ptr foundInput = nullptr; - auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), - [&](const std::pair &pair) { - return pair.first == input.first; - }); - if (foundInputPair != std::end(_networkInputs)) { - foundInput = foundInputPair->second; - } else { - OPENVINO_THROW("Failed to find input with name: \'", input.first, "\'"); - } - auto node = findInputByNodeName(input.first); - bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - if (!is_dynamic) - checkInputBlob(input.second, input.first, foundInput); - } - for (auto const &output : _outputs) { - DataPtr foundOutput = nullptr; - auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs), - [&](const std::pair &pair) { - return pair.first == output.first; - }); - if (foundOutputPair != std::end(_networkOutputs)) { - foundOutput = foundOutputPair->second; - } else { - OPENVINO_THROW("Failed to find output with name: \'", output.first, "\'"); - } - auto node = findOutputByNodeName(output.first); - bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - if (!is_dynamic) - checkOutputBlob(output.second, output.first, foundOutput); - } -} - -void InferRequest::SetGraph(std::shared_ptr graph) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetGraph"); - m_graph = graph; - - OPENVINO_ASSERT(m_graph != nullptr, "[GPU] Model not loaded"); - - allocate_inputs(); - allocate_outputs(); - m_graph->GetNetwork()->allocate_variables_memories(); -} - -InferRequest::InferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs, - const CompiledModel::Ptr& execNetwork) - : IInferRequestInternal(networkInputs, networkOutputs) { - OPENVINO_ASSERT(nullptr != execNetwork); - streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); - m_context = std::dynamic_pointer_cast(execNetwork->GetContext()); - OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequest: wrong context type"); -} - -InferRequest::InferRequest(const std::vector>& inputs, - const std::vector>& outputs, - const CompiledModel::Ptr& execNetwork) - : IInferRequestInternal(inputs, outputs) { - OPENVINO_ASSERT(nullptr != execNetwork); - streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); - m_context = std::dynamic_pointer_cast(execNetwork->GetContext()); - OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequest: wrong context type"); -} - -// ----------------------------------------------------------------------------------------- // -// ---------------------------- internal pipeline stages ----------------------------------- // -// ----------------------------------------------------------------------------------------- // -void InferRequest::enqueue_notify() { - m_graph->wait(Graph::Stage::EXECUTE); - enqueue(); -} - -void InferRequest::enqueue() { - // set input and output memory from request blob maps - // into the network object primitives - std::vector dependencies; - for (const auto& inputTensor : inputTensorsMap) { - const std::string name = inputTensor.first; - const auto& blobs = inputTensor.second; - - auto blobsDesc = blobs.front()->getTensorDesc(); - blobsDesc.getDims().front() = blobs.size(); - - bool is_surface = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - bool is_buffer = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - bool is_remote = is_buffer || is_surface; - - if (is_surface) { - for (size_t i = 0; i < blobs.size(); ++i) { - std::string new_name = name + "_" + std::to_string(i); - _inputs[new_name] = blobs[i]; - _deviceInputs[new_name] = blobs[i]; - } - } else { - uint8_t* dst = nullptr; - if (_deviceInputs.find(name) != _deviceInputs.end()) { - if (_deviceInputs[name]->getTensorDesc() == blobsDesc) { - dst = _deviceInputs[name]->buffer().as(); - } - } - if (dst == nullptr) { - cldnn::layout layout(DataTypeFromPrecision(blobsDesc.getPrecision()), - FormatFromTensorDesc(blobsDesc), - tensor_from_dims(blobsDesc.getDims())); - - auto mergedBlobs = create_remote_blob(blobsDesc, layout, BlobType::BT_BUF_INTERNAL); - dst = mergedBlobs->buffer().as(); - - _inputs[name] = mergedBlobs; - if (is_remote) { - _deviceInputs[name] = mergedBlobs; - } - } - - for (auto& blob : blobs) { - const uint8_t* src = blob->cbuffer().as(); - std::copy(src, src + blob->byteSize(), dst); - dst += blob->byteSize(); - } - } - } - - for (auto& item : _inputs) { - std::string inputName = item.first; - Blob::Ptr& inputBlob = item.second; - prepare_input(inputName, inputBlob, dependencies); - } - - auto networkPtr = m_graph->GetNetwork(); - networkPtr->assign_variables_memories(); - - for (auto& item : _outputs) { - std::string outputName = item.first; - Blob::Ptr& outputBlob = item.second; - prepare_output(outputName, outputBlob, dependencies); - } - - internal_outputs.clear(); - internal_outputs = networkPtr->execute(dependencies); - - // If dump layers path is set, only runs first inference. - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) { - GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl; - exit(0); - } -} - -void InferRequest::wait_notify() { - wait(); - m_graph->notify(Graph::Stage::EXECUTE); -} - -void InferRequest::wait() { - if (internal_outputs.empty()) { - OPENVINO_THROW("Inference was not started!\n"); - } - // wait for completion & collect outputs as requested by the model - // for in_order_queue, it is enough to call finish only once - bool do_sync_per_output = (m_graph->GetNetwork()->get_stream().get_queue_type() == QueueTypes::in_order) ? false : true; - if (!do_sync_per_output) - m_graph->GetNetwork()->get_stream().finish(); - std::vector copy_events; - for (auto& no : _networkOutputs) { - // In dynamic case, graph API must be used to retrieve outputID - // because it does not create outputsMap during SetGraph - std::string outputID = outputsMap.empty() ? m_graph->MapOutputName(no.first) : outputsMap.at(no.first); - auto outputMemory = internal_outputs.at(outputID).get_memory(do_sync_per_output); - auto outputLayout = internal_outputs.at(outputID).get_layout(); - if (outputMemory) - outputMemory = m_graph->get_engine().reinterpret_buffer(*outputMemory, outputLayout); - - bool need_output_update = false; - if (outputLayout.bytes_count() == 0 || _outputs.find(no.first) == _outputs.end() || - (outputMemory && _outputs.at(no.first)->byteSize() != outputMemory->size())) { - need_output_update = true; - } - - if (need_output_update) { - auto node = findOutputByNodeName(no.first); - auto out_partial_shape = node->get_output_partial_shape(0); - auto mem_dims = outputLayout.get_shape(); - size_t out_rank = out_partial_shape.size(); - auto precision = InferenceEngine::Precision::FP32; - auto dims = SizeVector(mem_dims.begin(), mem_dims.end()); - if (static_cast(out_rank) < static_cast(dims.size())) { - for (size_t i = out_rank; i < dims.size(); i++) { - if (dims[i] != 1) - OPENVINO_THROW("[GPU] Unexpected out shape"); - } - dims.resize(out_rank); - } - auto layout_by_rank = [](size_t rank) { - switch (rank) { - case 5: return InferenceEngine::Layout::NCDHW; - case 4: return InferenceEngine::Layout::NCHW; - case 2: return InferenceEngine::Layout::NC; - default: return InferenceEngine::Layout::BLOCKED; - } - }; - auto layout = layout_by_rank(out_rank); - auto tensorDesc = InferenceEngine::TensorDesc(precision, dims, layout); - if (_outputs.find(no.first) == _outputs.end()) { - _outputs[no.first] = create_host_blob(tensorDesc, false); - } else { - _outputs[no.first]->setShape(dims); - } - } - Blob::Ptr bptr = _outputs[no.first]; - - // mapping remote blobs not needed - - // let the user take care of them explicitly - if (!bptr->is() && outputMemory) { - bool same_mem = false; - { - auto dst_lock = bptr->cbuffer(); - auto dst_ptr = dst_lock.as(); - same_mem = same_host_mem(outputMemory, dst_ptr); - } - if (!same_mem && outputMemory->size()) { - copy_output_data(outputMemory, bptr, copy_events); - } - } - } - // wait for copy event - if (copy_events.size() > 0) { - if (m_graph->GetNetwork()->get_stream().get_queue_type() == QueueTypes::in_order) { - // wait only the last one - m_graph->GetNetwork()->get_stream().wait_for_events({copy_events.back()}); - } else { - m_graph->GetNetwork()->get_stream().wait_for_events(copy_events); - } - } - // finally collect profiling info - if (m_useProfiling) { - m_graph->UpdatePerfStatistics(); - } -} - -// ----------------------------------------------------------------------------------------- // -// ---------------------------- internal utils --------- ----------------------------------- // -// ----------------------------------------------------------------------------------------- // -void InferRequest::setup_stream_graph() { - int streamID = 0; - auto& streamGraphs = static_cast(_exeNetwork.get())->m_graphs; - if (nullptr != streamExecutor) { - streamID = streamExecutor->GetStreamId(); - auto numGraphs = streamGraphs.size(); - streamID = streamID % numGraphs; - } - m_graph = streamGraphs[streamID]; -} - -Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, bool is_dynamic) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::create_host_blob"); - // Disable USM usage as USMHostAllocator may fail for attempt to allocate 0 bytes - // If we add WA for such case to avoid driver call, then deallocate method will return false and Blob::setShape call will throw an exception - bool use_usm = m_graph->get_engine().use_unified_shared_memory() && !is_dynamic; - auto alloc = use_usm ? std::make_shared(m_context) : CreateDefaultAllocator(); - auto blob = make_blob_with_precision(desc, alloc); - blob->allocate(); - return blob; -} - -template -InferenceEngine::Blob::Ptr InferRequest::create_remote_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, - const BlobType mem_type, void* mem_ptr) { - auto blob = std::make_shared(m_context, - m_graph->GetNetwork()->get_stream(), - desc, - layout, - mem_ptr, - 0, - 0, - mem_type); - OPENVINO_ASSERT(blob, "[GPU] Failed to allocate remote blob"); - blob->allocate(); - return blob; -} - -template InferenceEngine::Blob::Ptr InferRequest::create_remote_blob(const InferenceEngine::TensorDesc&, const cldnn::layout&, - const BlobType, void*); -template InferenceEngine::Blob::Ptr InferRequest::create_remote_blob(const InferenceEngine::TensorDesc&, const cldnn::layout&, - const BlobType, void*); - -Blob::Ptr InferRequest::create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem) { - auto blob = create_remote_blob(desc, layout, BlobType::BT_USM_SHARED, usm_host_mem); - OPENVINO_ASSERT(blob, "[GPU] Failed to allocate shared host <-> device blob"); - return blob; -} - -void InferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, std::vector& copy_events) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::copy_output_data"); - auto is_convert_needed = [](const Precision& prc) { - const std::vector convert_needed = { Precision::I16, Precision::U16, Precision::FP64, - Precision::U32, Precision::U64 }; - const std::vector convert_not_needed = { Precision::FP32, Precision::FP16, Precision::I64, Precision::I32, - Precision::I8, Precision::U8, Precision::BOOL }; - - if (std::find(convert_needed.begin(), convert_needed.end(), prc) != convert_needed.end()) - return true; - else if (std::find(convert_not_needed.begin(), convert_not_needed.end(), prc) != convert_not_needed.end()) - return false; - else - OPENVINO_ASSERT(false, "[GPU] Plugin does not support output ", prc, " precision"); - }; - - const auto convert_needed = is_convert_needed(dst->getTensorDesc().getPrecision()); - const auto src_layout = src->get_layout(); - auto& stream = m_graph->GetNetwork()->get_stream(); - - if (convert_needed || src_layout.data_padding) { - if (!intermediate_output_blob || intermediate_output_blob->byteSize() < src_layout.bytes_count()) { - auto desc = TensorDesc(Precision::U8, - SizeVector{src_layout.bytes_count()}, - InferenceEngine::Layout::C); - intermediate_output_blob = create_host_blob(desc, src_layout.is_dynamic()); - } - - OPENVINO_ASSERT(intermediate_output_blob, "[GPU] Intermediate blob for outputs precessing is not allocated"); - - auto ev = src->copy_to(stream, intermediate_output_blob->buffer(), false); - copy_events.push_back(ev); - - switch (dst->getTensorDesc().getPrecision()) { - #define CASE(PRC, SRC_DT, DST_DT) \ - case PRC: copy_result_to_output_blob (intermediate_output_blob, dst, src_layout); break; - CASE(Precision::FP64, float, double) - CASE(Precision::FP32, float, float) - CASE(Precision::FP16, uint16_t, uint16_t) - CASE(Precision::I64, int64_t, int64_t) - CASE(Precision::I32, int32_t, int32_t) - CASE(Precision::I16, float, int16_t) - CASE(Precision::I8, int8_t, int8_t) - CASE(Precision::U16, float, uint16_t) - CASE(Precision::U32, int32_t, uint32_t) - CASE(Precision::U64, int32_t, uint64_t) - CASE(Precision::U8, uint8_t, uint8_t) - CASE(Precision::BOOL, int8_t, int8_t) - #undef CASE - default: break; - } - } else { - auto dst_ptr = dst->buffer().as(); - auto ev = src->copy_to(stream, dst_ptr, false); - copy_events.push_back(ev); - } -} - -void InferRequest::allocate_inputs() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs"); - auto inputLayouts = m_graph->GetInputLayouts(); - - // allocate inputs - for (auto& ni : _networkInputs) { - std::string name = ni.first; - const TensorDesc& desc = ni.second->getTensorDesc(); - - bool is_nv12_input = false; - - auto parameter = std::find_if(_parameters.begin(), _parameters.end(), [&](const std::shared_ptr& node) { - return node->get_friendly_name() == name; - }); - - if (parameter != _parameters.end()) { - if (parameter->get()->output(0).get_rt_info().count(ov::preprocess::TensorInfoMemoryType::get_type_info_static())) { - std::string mem_type = parameter->get()->output(0).get_rt_info().at(ov::preprocess::TensorInfoMemoryType::get_type_info_static()) - .as().value; - if (mem_type.find(GPU_CONFIG_KEY(SURFACE)) != std::string::npos) { - is_nv12_input = true; - } - } - } - - if (!is_nv12_input) { - auto litr = inputLayouts.find(name); - OPENVINO_ASSERT(litr != inputLayouts.end(), "[GPU] Input layout for ", name, " is not found"); - const auto input_layout = litr->second; - - GPU_DEBUG_LOG << "[" << name << ": input blob]" << std::endl; - if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { - TensorDesc desc_fp32 = desc; - desc_fp32.setPrecision(Precision::FP32); - _inputs[name] = create_host_blob(desc, input_layout.is_dynamic()); - if (input_layout.is_static()) - _deviceInputs[name] = create_device_blob(desc_fp32); - } else { - _inputs[name] = create_host_blob(desc, input_layout.is_dynamic()); - // Pre-allocate device input only if USM is not supported; in other case it will be allocated - // in prepare_input() function later - if (input_layout.is_static() && !m_graph->get_engine().use_unified_shared_memory()) { - _deviceInputs[name] = create_device_blob(desc); - } - } - } - } -} - -void InferRequest::allocate_outputs() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs"); - - // allocate outputs - for (auto& no : _networkOutputs) { - std::string outputID = m_graph->MapOutputName(no.first); - const cldnn::layout output_layout = m_graph->GetNetwork()->get_node_output_layout(outputID); - TensorDesc desc = no.second->getTensorDesc(); - // Due to some reason TensorDesc in InferRequest contains wrong dims - // while ExecutableNetwork contains proper ones. Thus replace dims with once from exec network - // Can be removed once 76176 is resolved. - if (output_layout.is_static()) - desc.setDims(m_graph->GetOutputSize(no.first)); - - GPU_DEBUG_LOG << "[" << no.first << ": output blob]" << std::endl; - - outputsMap[no.first] = outputID; - if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16 || - desc.getPrecision() == Precision::U32 || desc.getPrecision() == Precision::U64 || - desc.getPrecision() == Precision::FP64) { - TensorDesc device_blob_desc = desc; - - if (desc.getPrecision() == Precision::U32 || desc.getPrecision() == Precision::U64) - device_blob_desc.setPrecision(Precision::I32); - else - device_blob_desc.setPrecision(Precision::FP32); - - _outputs[no.first] = create_host_blob(desc, output_layout.is_dynamic()); - if (output_layout.is_static()) - _deviceOutputs[no.first] = create_device_blob(device_blob_desc); - } else { - _outputs[no.first] = create_host_blob(desc, output_layout.is_dynamic()); - // Pre-allocate device output only if USM is not supported; in other case it will be allocated - // in prepare_output() function later - if (output_layout.is_static() && !m_graph->get_engine().use_unified_shared_memory()) { - _deviceOutputs[no.first] = create_device_blob(desc); - } - } - } -} - -void InferRequest::InferImpl() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::InferImpl"); - setup_stream_graph(); - std::lock_guard lk(m_graph->get_mutex()); - enqueue(); - wait(); -} - -std::map InferRequest::GetPerformanceCounts() const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::GetPerformanceCounts"); - if (!m_useProfiling) { - OPENVINO_THROW("Performance counters were not enabled"); - } else { - return m_graph->GetPerformanceCounts(); - } -} - -void InferRequest::allocate_dev_mem_if_needed(InferenceEngine::BlobMap& device_mems, InferenceEngine::Blob::Ptr& user_blob, - const cldnn::primitive_id& blob_name, const cldnn::layout& layout, bool need_lockable_mem) { - const auto input_ptr = static_cast(user_blob->cbuffer()); - const auto alloc_type = m_graph->get_engine().detect_usm_allocation_type(input_ptr); - const auto is_usm_host = alloc_type == cldnn::allocation_type::usm_host; - const auto has_device_blob = device_mems.find(blob_name) != device_mems.end(); - bool can_skip_allocation = false; - - if (has_device_blob) { - auto impl = getBlobImpl(device_mems[blob_name]->as()); - - OPENVINO_ASSERT(impl, str_device_output_unsupported_blob); - OPENVINO_ASSERT(impl->is_allocated(), str_input_not_allocated); - - auto impl_mem = impl->get_memory(); - auto src_ptr = user_blob->cbuffer().as(); - // If device mem already exists, we can reuse blob if buffer has usm_host type and points to the same memory, - // so we don't need to allocate new memory - can_skip_allocation |= same_host_mem(impl_mem, src_ptr); - // Or if blob has any type except usm_host - in that case explicit copy will be performed anyway - // Or if blob has usm_host type and lockable memory is expected by impl - can_skip_allocation |= need_lockable_mem ? impl_mem->get_allocation_type() == cldnn::allocation_type::usm_host - : impl_mem->get_allocation_type() != cldnn::allocation_type::usm_host; - // In case of lockable memory we need to keep updated device's usm_host memory buffer with - // user's blob to avoid incorrect behaviour if user will call set_blob() with - // the following sequence (usm_host, system_host, usm_host, system_host...) - if (need_lockable_mem) - can_skip_allocation &= users_blobs_matching.find(blob_name) != users_blobs_matching.end() - && users_blobs_matching[blob_name] == user_blob; - } - - if (!can_skip_allocation) { - if (is_usm_host) { - // For USM case we create host blob using custom USM host allocator - // and then create shared device blob on top of this buffer - device_mems[blob_name] = create_shared_device_blob(user_blob->getTensorDesc(), layout, user_blob->buffer().as()); - } else if (need_lockable_mem) { - device_mems[blob_name] = - create_remote_blob(user_blob->getTensorDesc(), layout, BlobType::BT_USM_HOST_INTERNAL); - } else { - device_mems[blob_name] = create_device_blob(user_blob->getTensorDesc()); - } - users_blobs_matching[blob_name] = user_blob; - } -} - -void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob, - std::vector& dependencies) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_input"); - auto inputLayoutItr = m_graph->GetInputLayouts().find(inputName); - OPENVINO_ASSERT(inputLayoutItr != m_graph->GetInputLayouts().end(), "[GPU] Input name mismatch"); - - auto input_layout = inputLayoutItr->second; - auto& prec = inputBlob->getTensorDesc().getPrecision(); - auto remote_ptr = inputBlob->as(); - auto& stream = m_graph->GetNetwork()->get_stream(); - const bool is_dev_input = remote_ptr != nullptr; - const bool can_use_usm = m_graph->get_engine().use_unified_shared_memory(); - - auto conv_to_supported_prec = [](Precision::ePrecision prec) { - switch (prec) { - case Precision::I16: - case Precision::U16: - case Precision::FP64: - return Precision::FP32; - case Precision::U64: - case Precision::U32: - return Precision::I32; - default: return prec; - } - }; - - auto _nw_ptr = m_graph->GetNetwork(); - if (input_layout.is_dynamic()) { - bool has_device_blob = _deviceInputs.find(inputName) != _deviceInputs.end(); - bool should_allocate_device_blob = !has_device_blob; - if (has_device_blob) { - auto device_blob = _deviceInputs.at(inputName)->as(); - auto blob = getBlobImpl(device_blob); - if (blob->get_original_memory()->size() < inputBlob->byteSize()) { - should_allocate_device_blob = true; - } - } - - auto& sp = _nw_ptr->get_shape_predictor(); - const auto& tensor_desc = inputBlob->getTensorDesc(); - auto dt_size = cldnn::data_type_traits::size_of(DataTypeFromPrecision(tensor_desc.getPrecision())); - auto current_shape = ov::Shape(tensor_desc.getDims()); - auto prealloc_info = sp.predict_preallocation_shape(inputName, current_shape, dt_size, !should_allocate_device_blob); - - if (should_allocate_device_blob) { - auto preallocation_shape = prealloc_info.second; - auto can_preallocate_buffer = prealloc_info.first && - sp.can_preallocate(ov::shape_size(preallocation_shape) * dt_size); - - if (can_preallocate_buffer) { - auto new_tensor_desc = tensor_desc; - new_tensor_desc.setDims(preallocation_shape); - auto device_blob = create_device_blob(new_tensor_desc); - _deviceInputs[inputName] = reinterpret_device_blob(device_blob, inputBlob->getTensorDesc()); - } else { - _deviceInputs[inputName] = create_device_blob(tensor_desc); - } - } else { - _deviceInputs[inputName] = reinterpret_device_blob(_deviceInputs[inputName], inputBlob->getTensorDesc()); - } - } else if (input_layout.is_static() && !is_dev_input && can_use_usm) { - allocate_dev_mem_if_needed(_deviceInputs, inputBlob, inputName, input_layout, (conv_to_supported_prec(prec) != prec)); - } - OPENVINO_ASSERT(_deviceInputs.find(inputName) != _deviceInputs.end(), "[GPU] Couldn't find device blob allocated for ", inputName, " input"); - auto reqBlob = _deviceInputs.at(inputName)->as(); - const cldnn::primitive_id internalName = "parameter:" + inputName; - - switch (prec) { - case Precision::FP64: - case Precision::FP32: - case Precision::FP16: - case Precision::I8: - case Precision::U8: - case Precision::BOOL: - case Precision::I16: - case Precision::U16: - case Precision::I32: - case Precision::U32: - case Precision::U64: - case Precision::I64: { - auto impl = getBlobImpl(is_dev_input ? - remote_ptr : - reqBlob); - if (!impl->is_allocated()) { - OPENVINO_THROW(str_input_not_allocated); - } - auto inputMem = impl->get_memory(); - - auto input_layout = m_graph->GetInputLayouts().find(inputName); - if (input_layout != m_graph->GetInputLayouts().end()) { - if (input_layout->second != inputMem->get_layout() && input_layout->second.is_static()) { - inputMem = m_graph->GetNetwork()->get_engine().reinterpret_buffer(*inputMem, input_layout->second); - } - } - - if (!is_dev_input) { - Precision conv_prec = conv_to_supported_prec(prec); - // TODO: Remove this checks once 95363 issue is solved - if (conv_prec != prec && conv_prec == Precision::FP32) { - // GPU plugin doesn't support I16 input precision, - // so have to convert input data to fp32 precision - cldnn::mem_lock ptr{ inputMem, stream }; - if (prec == Precision::I16) { - convertAndCopy(inputBlob.get(), ptr.data()); - } else if (prec == Precision::U16) { - convertAndCopy(inputBlob.get(), ptr.data()); - } else { - convertAndCopy(inputBlob.get(), ptr.data()); - } - } else if (conv_prec != prec && conv_prec == Precision::I32) { - cldnn::mem_lock ptr{ inputMem, stream }; - if (prec == Precision::U64) { - convertAndCopy(inputBlob.get(), ptr.data()); - } else { - convertAndCopy(inputBlob.get(), ptr.data()); - } - } else { - auto src_lock = inputBlob->cbuffer(); - auto src_ptr = src_lock.as(); - if (!same_host_mem(inputMem, src_ptr)) { - auto ev = inputMem->copy_from(stream, src_ptr, false); - dependencies.push_back(ev); - } - } - } - dependencies.push_back(_nw_ptr->set_input_data(internalName, inputMem)); - break; - } - default: - OPENVINO_THROW("Unsupported input precision ", prec); - } -} - -void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob, std::vector& dependencies) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_output"); - const auto output_id = outputsMap.at(outputName); - const auto output_layout = m_graph->GetNetwork()->get_node_output_layout(output_id); - const bool is_static = output_layout.is_static(); - const bool can_use_usm = m_graph->get_engine().use_unified_shared_memory(); - auto remote_ptr = outputBlob->as(); - const bool is_dev_input = remote_ptr != nullptr; - - if (is_static && can_use_usm && !is_dev_input) { - auto is_cpu_impl = m_graph->GetNetwork()->is_cpu_impl(output_id); - allocate_dev_mem_if_needed(_deviceOutputs, outputBlob, outputName, output_layout, is_cpu_impl); - } - - OPENVINO_ASSERT(!is_static || _deviceOutputs.find(outputName) != _deviceOutputs.end(), - "[GPU] Couldn't find device blob allocated for ", outputName, " output"); - // Missing output in _deviceOutputs means that the network is dynamic and outputs couldn't be pre-allocated - if (_deviceOutputs.find(outputName) == _deviceOutputs.end()) - return; - Blob::Ptr reqBlob = _deviceOutputs.at(outputName); - cldnn::primitive_id internalName = outputsMap[outputName]; - auto _nw_ptr = m_graph->GetNetwork(); - auto output_blob_ptr = (reqBlob != outputBlob && is_dev_input) - ? remote_ptr - : reqBlob->as(); - auto impl = getBlobImpl(output_blob_ptr); - if (!impl->is_allocated()) { - OPENVINO_THROW(str_output_not_allocated); - } - auto outputMem = impl->get_memory(); - for (auto o_ev : _nw_ptr->set_output_memory(internalName, outputMem)) { - dependencies.push_back(o_ev); - } -} - -InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc) { - auto format = FormatFromLayout(desc.getLayout()); - auto dt = DataTypeFromPrecision(desc.getPrecision()); - ov::PartialShape shape(desc.getDims()); - - // Currently, clDeviceMemAllocINTEL returns memory address allocated to other input blob if the current blob is empty - // W/A for this issue: - // Allocate with non-empty shape and then reinterprete with original shape - for (auto &i : shape) { - if (i == 0) - i = 1; - } - - auto l = cldnn::layout(shape, dt, format); - - if (m_graph->get_engine().use_unified_shared_memory()) { - auto blob = create_remote_blob(desc, l, BlobType::BT_USM_DEVICE_INTERNAL); - return reinterpret_device_blob(blob, desc); - } else { - return create_remote_blob(desc, l, BlobType::BT_BUF_INTERNAL); - } -} - -std::vector> InferRequest::QueryState() { - std::vector> ret{}; - const auto& variable_states = m_graph->GetNetwork()->get_variable_memories(); - for (const auto& pair : variable_states) { - std::vector states { pair.second }; - ret.push_back(std::make_shared(pair.first, states, m_graph->get_engine(), -1)); - } - return ret; -} - -Blob::Ptr InferRequest::reinterpret_device_blob(Blob::Ptr data, const TensorDesc& new_desc) { - auto format = FormatFromLayout(new_desc.getLayout()); - auto dt = DataTypeFromPrecision(new_desc.getPrecision()); - ov::PartialShape shape(new_desc.getDims()); - - auto l = cldnn::layout(std::move(shape), dt, format); - - auto remote_blob = data->as(); - if (!remote_blob) - OPENVINO_THROW("Invalid blob used for reinterpretation"); - - remote_blob->setShape(new_desc.getDims()); - - auto impl = getBlobImpl(remote_blob); - impl->reinterpret(l); - - return data; -} - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp b/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp deleted file mode 100644 index 9c0811398d858e..00000000000000 --- a/src/plugins/intel_gpu/src/plugin/infer_request_legacy.cpp +++ /dev/null @@ -1,1190 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include -#include "intel_gpu/plugin/infer_request_legacy.hpp" -#include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/plugin/remote_blob.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" -#include "intel_gpu/plugin/compiled_model.hpp" -#include "intel_gpu/runtime/itt.hpp" -#include "intel_gpu/plugin/variable_state.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" -#include "openvino/core/preprocess/input_tensor_info.hpp" -#include -#include - -using namespace InferenceEngine; - -namespace { - -const char fp32_suffix[] = "_fp32"; -const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing"; -const char str_input_not_allocated[] = "Input data was not allocated."; -const char str_output_not_allocated[] = "Output data was not allocated."; -const char str_host_mem_not_allocated[] = "Failed to allocate host memory."; -const char str_device_mem_not_allocated[] = "Failed to allocate device memory."; -const char str_shared_mem_not_allocated[] = "Failed to allocate shared memory."; - -template -void convertAndCopy(const InferenceEngine::Blob* src, dst_t* dst) { - if (!dst) { - return; - } - auto t_blob = dynamic_cast*>(src); - if (!t_blob) { - IE_THROW() << "input type is " << src->getTensorDesc().getPrecision() << " but input is not " - << typeid(src_t).name(); - } - - const src_t* srcPtr = t_blob->readOnly(); - if (!srcPtr) { - IE_THROW(NotAllocated) << str_input_not_allocated; - } - for (size_t i = 0; i < t_blob->size(); i++) - dst[i] = srcPtr[i]; -} - -template -void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, ov::intel_gpu::buf_info* bi, cldnn::stream& stream) { - size_t n = (bi == nullptr) ? dst->size() : bi->buf_size; - size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; - - auto layout = src->get_layout(); - auto size = layout.get_tensor(); - - auto locked_dst = dst->buffer(); - auto dst_ptr = locked_dst.as(); - if (dst_ptr == nullptr) { - IE_THROW() << "Invalid output blob"; - } - cldnn::mem_lock src_lock{ src, stream }; - src_dt* src_ptr = src_lock.data(); - dst_ptr += offset; - - if (layout.data_padding) { - for (int64_t b = 0; b < size.batch[0]; b++) { - for (int64_t f = 0; f < size.feature[0]; f++) { - for (int64_t w = 0; w < size.spatial[3]; w++) { - for (int64_t z = 0; z < size.spatial[2]; z++) { - for (int64_t y = 0; y < size.spatial[1]; y++) { - for (int64_t x = 0; x < size.spatial[0]; x++) { - *dst_ptr++ = src_ptr[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))]; - } - } - } - } - } - } - } else { - for (size_t i = 0; i < n; i++) { - dst_ptr[i] = src_ptr[i]; - } - } -} - -inline void checkAlloc(const Blob::Ptr& blob, const std::string& err_str) { - bool not_allocated = false; - if (!blob->is()) { - not_allocated = (blob->buffer() == nullptr); - } else { - not_allocated = !ov::intel_gpu::getBlobImpl(blob->as())->is_allocated(); - } - if (not_allocated) { - IE_THROW(NotAllocated) << err_str; - } -} - -void checkInputBlob(const Blob::Ptr &blob, - const std::string &name, - const InputInfo::Ptr foundInput, - bool nv12_two_inputs = false) { - const std::string strNotMatched("The input blob size is not equal to the network input size"); - - if (!blob) { - IE_THROW(NotAllocated) << str_input_not_allocated; - } - - SizeVector dims = foundInput->getTensorDesc().getDims(); - - size_t refSize = foundInput->getTensorDesc().getLayout() != SCALAR - ? details::product(dims) - : 1; - - if (refSize != blob->size()) { - IE_THROW() << strNotMatched + ": got " << blob->size() << " expecting " << refSize; - } - - checkAlloc(blob, str_input_not_allocated); -} - -void checkOutputBlob(const Blob::Ptr &blob, - const std::string &name, - const DataPtr foundOutput) { - const std::string strNotMatched("The output blob size is not equal to the network output size"); - - if (!blob) { - IE_THROW(NotAllocated) << str_output_not_allocated; - } - SizeVector dims = foundOutput->getTensorDesc().getDims(); - size_t refSize = foundOutput->getTensorDesc().getLayout() != SCALAR - ? details::product(dims) - : 1; - - if (refSize != blob->size()) { - IE_THROW() << strNotMatched + ": got " << blob->size() << " expecting " << refSize; - } - - checkAlloc(blob, str_output_not_allocated); -} - -bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) { - uint8_t* bufferMem = nullptr; - if (memPtr->get_allocation_type() == cldnn::allocation_type::usm_host) { - bufferMem = reinterpret_cast(memPtr->get_internal_params().mem); - } - return bufferMem == hostPtr; -} -} // namespace - -namespace ov { -namespace intel_gpu { - -// ----------------------------------------------------------------------------------------- // -// ---------------------------- IE API impl ------------------------------------------------ // -// ----------------------------------------------------------------------------------------- // -Blob::Ptr InferRequestLegacy::GetBlob(const std::string& name) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::GetBlob"); - Blob::Ptr data; - InputInfo::Ptr foundInput; - DataPtr foundOutput; - bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); - auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name); - bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - - if (is_input) { - // ROI blob is returned only if it was set previously. Otherwise default blob is returned. - auto it = _preProcData.find(name); - if (it != _preProcData.end()) { - data = it->second->getRoiBlob(); - } else { - data = _inputs[name]; - if (!isDynamic) - checkInputBlob(data, name, foundInput); - } - } else { - data = _outputs[name]; - if (isDynamic) { - if (m_graph->GetMaxDynamicBatchSize() > 1) { - SizeVector outDims = data->getTensorDesc().getDims(); - outDims[m_graph->GetOutputDynBatchDims()[name]] = m_curBatch; - data->getTensorDesc().setDims(outDims); - } - } else { - checkOutputBlob(data, name, foundOutput); - } - } - return data; -} - -void InferRequestLegacy::SetBlob(const std::string& name, const Blob::Ptr& data) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::SetBlob"); - - // perform all common checks first - if (name.empty()) { - IE_THROW(NotFound) << "Failed to set blob with empty name"; - } - if (!data) - IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'"; - - size_t dataSize = data->size(); - if (0 == dataSize) { - IE_THROW() << "Input data is empty. Input name: \'" << name << "\'"; - } - if (inputTensorsMap.find(name) != inputTensorsMap.end()) { - inputTensorsMap.erase(name); - } - const bool compoundBlobPassed = data->is(); - - InputInfo::Ptr foundInput; - DataPtr foundOutput; - auto blobDesc = data->getTensorDesc(); - - bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); - const TensorDesc& desc = is_input - ? foundInput->getTensorDesc() - : foundOutput->getTensorDesc(); - - if (desc.getPrecision() != blobDesc.getPrecision()) { - IE_THROW(ParameterMismatch) << "Failed to set Blob with precision not corresponding to user " - << (is_input ? "input" : "output") << " precision"; - } - - size_t dataBinSize = dataSize * data->element_size(); - size_t netReqBinSize = std::accumulate(desc.getDims().begin(), desc.getDims().end(), - desc.getPrecision().size(), - std::multiplies()); - bool preProcResize = false; - auto node = is_input ? findInputByNodeName(name) : findOutputByNodeName(name); - bool isDynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - if (is_input) { - preProcResize = foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE; - const auto inputColorFormat = foundInput->getPreProcess().getColorFormat(); - preProcResize |= (inputColorFormat != ColorFormat::RAW) && (inputColorFormat != ColorFormat::BGR); - } - - if (!isDynamic && - dataBinSize != netReqBinSize && !compoundBlobPassed && !preProcResize) { - IE_THROW() << "Incorrect binary data size for " << (is_input ? "input" : "output") << - " blob with name: \'" << name << "\' " << - "Current: " << dataBinSize << " Required: " << netReqBinSize; - } - - auto remote_ptr = data->as(); - bool is_remote = remote_ptr != nullptr; - if (is_remote) { - auto impl = getBlobImpl(remote_ptr); - if (!impl->is_allocated()) { - impl->allocate(); - } - } - if (is_input) { - if (is_remote) { - _deviceInputs[name] = data; - _inputs[name] = data; - } - - if (!is_remote) { - if (preProcessingRequired(foundInput, data)) { - // Stores the given blob as ROI blob. It will be used to fill in network input - // during pre-processing - if (_inputs[name]->is()) { - Blob::Ptr inputHostBlob = create_host_blob(desc); - _inputs[name] = inputHostBlob; - } - _preProcData[name] = CreatePreprocDataHelper(); - _preProcData[name]->isApplicable(data, _inputs[name]); - _preProcData[name]->setRoiBlob(data); - } else { - if (compoundBlobPassed) { - IE_THROW(NotImplemented) << cannot_set_compound; - } - if (isDynamic) { - // extract new batch size from blob - if (m_graph->GetMaxDynamicBatchSize() > 1) { - const auto batch_idx = m_graph->GetInputDynBatchDims()[name].first; - if (batch_idx >= 0) - SetBatch(static_cast(blobDesc.getDims()[batch_idx])); - } - } else { - size_t blobSize = desc.getLayout() != SCALAR - ? details::product(desc.getDims()) - : 1; - if (dataSize != blobSize) { - IE_THROW() << "Input blob size is not equal to network input size (" - << dataSize << "!=" << blobSize << ")."; - } - } - - if (data->buffer() == nullptr) - IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'"; - _inputs[name] = data; - } - } - } else { - if (compoundBlobPassed) { - IE_THROW(NotImplemented) << cannot_set_compound; - } - - if (is_remote) { - _deviceOutputs[name] = data; - } else { - if (!isDynamic) { - size_t outputSize = desc.getLayout() != SCALAR - ? details::product(desc.getDims()) - : 1; - if (dataSize != outputSize) { - IE_THROW() << "Output blob size is not equal to network output size (" << dataSize - << "!=" << outputSize << ")."; - } - if (data->buffer() == nullptr) - IE_THROW(NotAllocated) << str_output_not_allocated << " Output name: \'" << name << "\'"; - } - } - _outputs[name] = data; - } -} - -void InferRequestLegacy::SetBlobs(const std::string& name, const std::vector& blobs) { - if (blobs.size() == 1) { - SetBlob(name, blobs[0]); - return; - } - - if (name.empty()) { - IE_THROW(NotFound) << "Failed to set blobs with empty name"; - } - if (blobs.empty()) { - IE_THROW(NotAllocated) << "Failed to set empty blobs with name: \'" << name << "\'"; - } - bool empty_data = std::any_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->size() == 0; - }); - if (empty_data) { - IE_THROW() << "At least one of the input blobs is empty. Input name: \'" << name << "\'"; - } - - bool is_compound = std::any_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - if (is_compound) { - IE_THROW(NotImplemented) << cannot_set_compound; - } - - bool is_buffer = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - bool is_surface = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - bool is_remote = is_buffer || is_surface; - - bool is_host = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - is_host &= !is_remote; - - if (!is_host && !is_remote) { - IE_THROW() << "Incorrect input blobs. All blobs must be of the same type"; - } - - InputInfo::Ptr foundInput; - DataPtr foundOutput; - bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput); - - if (!is_input) { - IE_THROW() << "SetBlobs method doesn't support outputs"; - } - - const TensorDesc& desc = foundInput->getTensorDesc(); - - size_t dataBinSize = blobs.front()->size() * blobs.front()->element_size() * blobs.size(); - size_t netReqBinSize = std::accumulate(desc.getDims().begin(), desc.getDims().end(), - desc.getPrecision().size(), - std::multiplies()); - bool preProcResize = false; - if (is_input) { - preProcResize = foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE; - const auto inputColorFormat = foundInput->getPreProcess().getColorFormat(); - preProcResize |= (inputColorFormat != ColorFormat::RAW) && (inputColorFormat != ColorFormat::BGR); - } - if (dataBinSize != netReqBinSize && !preProcResize) { - IE_THROW() << "Incorrect binary data size for input blobs with name: \'" << name << "\' " << - "Current: " << dataBinSize << " Required: " << netReqBinSize; - } - - if (is_surface) { - for (size_t i = 0; i < blobs.size(); ++i) { - std::string new_name = name + "_" + std::to_string(i); - - if (_inputs.find(new_name) != _inputs.end()) { - _inputs.erase(new_name); - } - } - } else { - if (_inputs.find(name) != _inputs.end()) { - _inputs.erase(name); - } - } - - if (is_remote) { - for (auto& blob : blobs) { - auto impl = getBlobImpl(blob->as()); - if (!impl->is_allocated()) { - impl->allocate(); - } - } - } - inputTensorsMap[name] = blobs; -} - -void InferRequestLegacy::checkBlobs() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::checkBlobs"); - for (auto const &input : _inputs) { - InputInfo::Ptr foundInput = nullptr; - auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), - [&](const std::pair &pair) { - return pair.first == input.first; - }); - if (foundInputPair != std::end(_networkInputs)) { - foundInput = foundInputPair->second; - } else { - IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'"; - } - auto node = findInputByNodeName(input.first); - bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - if (!is_dynamic) - checkInputBlob(input.second, input.first, foundInput, m_graph->get_config().get_property(ov::intel_gpu::nv12_two_inputs)); - } - for (auto const &output : _outputs) { - DataPtr foundOutput = nullptr; - auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs), - [&](const std::pair &pair) { - return pair.first == output.first; - }); - if (foundOutputPair != std::end(_networkOutputs)) { - foundOutput = foundOutputPair->second; - } else { - IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'"; - } - auto node = findOutputByNodeName(output.first); - bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - if (!is_dynamic) - checkOutputBlob(output.second, output.first, foundOutput); - } -} - -void InferRequestLegacy::SetGraph(std::shared_ptr graph) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::SetGraph"); - m_graph = graph; - - if (m_graph == nullptr) { - IE_THROW(NetworkNotLoaded); - } - - if (m_graph->GetMaxDynamicBatchSize() > 1) { - SetBatch(static_cast(m_graph->GetMaxDynamicBatchSize())); - allocate_inputs_dynamic(); - allocate_outputs_dynamic(); - } else { - allocate_inputs(); - allocate_outputs(); - variables_states_ = m_graph->AllocateVariablesMemories(); - } -} - -void InferRequestLegacy::SetBatch(int new_batch) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::SetBatch"); - - OPENVINO_ASSERT(new_batch > 0 && static_cast(new_batch) <= m_graph->GetMaxDynamicBatchSize(), - "[GPU] Invalid dynamic batch size ", new_batch, " for this request. ", - "Got: ", new_batch, ". ", - "Expected value in range [1;", m_graph->GetMaxDynamicBatchSize(), "]"); - - if (new_batch == m_curBatch) - return; - - batchInputs.clear(); - batchOutputs.clear(); - - // tune expected inputs - for (auto& input : m_graph->GetNetworkInputs()) { - auto sz = input.second->getTensorDesc().getDims(); - const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first; - if (batch_idx >= 0) - sz[batch_idx] = 1; - - size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies()); - std::vector in_buf; - - size_t offset = 0; - size_t bsz = single_batch; - - // calculate metadata for input buffers - for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) { - unsigned int mask = 1 << nb; - - buf_info ib = { offset, bsz }; - in_buf.push_back(ib); - - if (new_batch & mask) - offset += bsz; - bsz <<= 1; - } - - batchInputs[input.first] = in_buf; - } - - // tune expected outputs - for (auto& no : m_graph->GetNetworkOutputs()) { - auto sz = no.second->getTensorDesc().getDims(); - const auto batch_idx = m_graph->GetInputDynBatchDims()[no.first].first; - if (batch_idx >= 0) - sz[batch_idx] = 1; - size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies()); - std::vector out_buf; - - size_t offset = 0; - size_t bsz = single_batch; - // calculate metadata for output buffers - for (uint32_t nb = 0; nb < m_graph->GetNetworksCount(); nb++) { - uint32_t mask = 1 << nb; - - buf_info ob = { offset, bsz }; - out_buf.push_back(ob); - - if (new_batch & mask) - offset += bsz; - - bsz <<= 1; - } - - batchOutputs[no.first] = out_buf; - } - variables_states_ = m_graph->AllocateVariablesMemories(); - - m_curBatch = new_batch; -} - -InferRequestLegacy::InferRequestLegacy(InputsDataMap networkInputs, OutputsDataMap networkOutputs, - const CompiledModel::Ptr& execNetwork) - : IInferRequestInternal(networkInputs, networkOutputs) { - IE_ASSERT(nullptr != execNetwork); - streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); - m_context = std::dynamic_pointer_cast(execNetwork->GetContext()); - OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequestLegacy: wrong context type"); -} - -InferRequestLegacy::InferRequestLegacy(const std::vector>& inputs, - const std::vector>& outputs, - const CompiledModel::Ptr& execNetwork) - : IInferRequestInternal(inputs, outputs) { - IE_ASSERT(nullptr != execNetwork); - streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); - m_context = std::dynamic_pointer_cast(execNetwork->GetContext()); - OPENVINO_ASSERT(m_context != nullptr, "[GPU] Can't initialize context of InferRequestLegacy: wrong context type"); -} - -// ----------------------------------------------------------------------------------------- // -// ---------------------------- internal pipeline stages ----------------------------------- // -// ----------------------------------------------------------------------------------------- // -void InferRequestLegacy::preprocess_notify() { - m_graph->wait(Graph::Stage::PREPROC); - if (m_graph->GetMaxDynamicBatchSize() > 1) { - preprocess_dynamic(); - } else { - execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP - } - m_graph->notify(Graph::Stage::PREPROC); -} - -void InferRequestLegacy::preprocess() { - if (m_graph->GetMaxDynamicBatchSize() > 1) { - preprocess_dynamic(); - } else { - execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP - } -} - -void InferRequestLegacy::enqueue_notify() { - m_graph->wait(Graph::Stage::EXECUTE); - enqueue(); -} - -void InferRequestLegacy::enqueue() { - if (m_graph->GetMaxDynamicBatchSize() > 1) { - enqueue_dynamic(); - return; - } - - // set input and output memory from request blob maps - // into the network object primitives - std::vector dependencies; - - for (const auto& inputTensor : inputTensorsMap) { - const std::string name = inputTensor.first; - const auto& blobs = inputTensor.second; - - auto blobsDesc = blobs.front()->getTensorDesc(); - blobsDesc.getDims().front() = blobs.size(); - - bool is_surface = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - bool is_buffer = std::all_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { - return blob->is(); - }); - bool is_remote = is_buffer || is_surface; - - if (is_surface) { - for (size_t i = 0; i < blobs.size(); ++i) { - std::string new_name = name + "_" + std::to_string(i); - _inputs[new_name] = blobs[i]; - _deviceInputs[new_name] = blobs[i]; - } - } else { - uint8_t* dst = nullptr; - if (_deviceInputs.find(name) != _deviceInputs.end()) { - if (_deviceInputs[name]->getTensorDesc() == blobsDesc) { - dst = _deviceInputs[name]->buffer().as(); - } - } - if (dst == nullptr) { - cldnn::layout layout(DataTypeFromPrecision(blobsDesc.getPrecision()), - FormatFromTensorDesc(blobsDesc), - tensor_from_dims(blobsDesc.getDims())); - - auto mergedBlobs = std::make_shared(m_context, - m_graph->GetNetwork()->get_stream(), - blobsDesc, - layout); - mergedBlobs->allocate(); - dst = mergedBlobs->buffer().as(); - - _inputs[name] = mergedBlobs; - if (is_remote) { - _deviceInputs[name] = mergedBlobs; - } - } - - for (auto& blob : blobs) { - const uint8_t* src = blob->cbuffer().as(); - std::copy(src, src + blob->byteSize(), dst); - dst += blob->byteSize(); - } - } - } - - for (auto& item : _inputs) { - std::string inputName = item.first; - Blob::Ptr& inputBlob = item.second; - - // regular blob - prepare_input(inputName, inputBlob, dependencies); - } - - cldnn::network::variables_states_map variables_states; - for (auto &variable_state_pair : variables_states_) - variables_states.insert({ variable_state_pair.first, variable_state_pair.second[0] }); - - auto networkPtr = m_graph->GetNetwork(); - - networkPtr->assign_variables_memories(std::move(variables_states)); - - for (auto& item : _outputs) { - std::string outputName = item.first; - Blob::Ptr& outputBlob = item.second; - prepare_output(outputName, outputBlob); - } - - internal_outputs.clear(); - internal_outputs = networkPtr->execute(dependencies); - - // If dump layers path is set, only runs first inference. - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) { - GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl; - exit(0); - } -} - -void InferRequestLegacy::wait_notify() { - wait(); - m_graph->notify(Graph::Stage::EXECUTE); -} - -void InferRequestLegacy::wait() { - if (m_graph->GetMaxDynamicBatchSize() > 1) { - wait_dynamic(); - return; - } - - if (internal_outputs.empty()) { - IE_THROW() << "Inference was not started!\n"; - } - - // wait for completion & collect outputs as requested by the model - for (auto& no : _networkOutputs) { - Blob::Ptr bptr = _outputs[no.first]; - std::string outputID = outputsMap.at(no.first); - auto outputMemory = internal_outputs.at(outputID).get_memory(); - - // mapping remote blobs not needed - - // let the user take care of them explicitly - if (!bptr->is()) { - bool same_mem = false; - { - auto dst_lock = bptr->cbuffer(); - auto dst_ptr = dst_lock.as(); - same_mem = same_host_mem(outputMemory, dst_ptr); - } - if (!same_mem) { - copy_output_data(outputMemory, bptr); - } - } - } - - // finally collect profiling info - if (m_useProfiling) { - m_graph->UpdatePerfStatistics(); - } -} - -void InferRequestLegacy::preprocess_dynamic() { - // execute input pre-processing. - execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP -} - -void InferRequestLegacy::enqueue_dynamic() { - internal_outputs_dynamic.clear(); - auto numNets = m_graph->GetNetworksCount(); - internal_outputs_dynamic.resize(numNets); - - // set up exection and put all graphs into driver queue - for (unsigned nb = 0; nb < numNets; nb++) { - unsigned int mask = 1 << nb; - - if (m_curBatch & mask) { - for (auto& item : _inputs) { - const cldnn::primitive_id& inputName = item.first; - const Blob::Ptr inputBlob = item.second; - - auto inputLayout = m_graph->GetInputLayouts().at(inputName); - auto new_size = inputLayout.get_tensor(); - new_size.batch[0] = mask; - inputLayout.set_tensor(new_size); - copy_input_data(m_graph->GetNetwork(nb), inputName, inputLayout, *inputBlob, &batchInputs[inputName][nb]); - } - - cldnn::network::variables_states_map variables_states; - for (auto &variable_state_pair : variables_states_) - variables_states.insert({ variable_state_pair.first, variable_state_pair.second[nb] }); - - auto networkPtr = m_graph->GetNetwork(nb); - - networkPtr->assign_variables_memories(std::move(variables_states)); - - internal_outputs_dynamic[nb] = networkPtr->execute(); - } - } -} - -void InferRequestLegacy::wait_dynamic() { - if (internal_outputs_dynamic.empty()) { - IE_THROW() << "Inference was not started!\n"; - } - - // now try to get execution results - for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) { - unsigned int mask = 1 << nb; - - if (m_curBatch & mask) { - for (auto& no : _networkOutputs) { - std::string outputID = outputsMap.at(no.first); - auto outputMemory = internal_outputs_dynamic[nb].at(outputID).get_memory(); - Blob::Ptr bptr = _outputs[no.first]; - - copy_output_data(outputMemory, std::move(bptr), &batchOutputs[no.first][nb]); - } - } - } -} - -// ----------------------------------------------------------------------------------------- // -// ---------------------------- internal utils --------- ----------------------------------- // -// ----------------------------------------------------------------------------------------- // -void InferRequestLegacy::setup_stream_graph() { - int streamID = 0; - auto& streamGraphs = static_cast(_exeNetwork.get())->m_graphs; - if (nullptr != streamExecutor) { - streamID = streamExecutor->GetStreamId(); - auto numGraphs = streamGraphs.size(); - streamID = streamID % numGraphs; - } - m_graph = streamGraphs[streamID]; - // in case of dynamic batch, check all input blobs and set new batch - if (m_graph->GetMaxDynamicBatchSize() > 1) { - for (auto& input : _networkInputs) { - auto node = findInputByNodeName(input.first); - bool is_dynamic = (node && node->get_output_partial_shape(0).is_dynamic()); - if (!is_dynamic) - continue; - // extract new batch size from blob - const auto batch_idx = m_graph->GetInputDynBatchDims()[input.first].first; - if (batch_idx >= 0) { - SetBatch(static_cast(_inputs[input.first]->getTensorDesc().getDims()[batch_idx])); - break; - } - } - } -} - -Blob::Ptr InferRequestLegacy::create_host_blob(const TensorDesc& desc, std::shared_ptr alloc) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::create_host_blob"); - auto blob = make_blob_with_precision(desc, alloc ? alloc : CreateDefaultAllocator()); - blob->allocate(); - checkAlloc(blob, str_host_mem_not_allocated); - return blob; -} - -Blob::Ptr InferRequestLegacy::create_shared_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, void* usm_host_mem) { - auto blob = std::make_shared(m_context, - m_graph->GetNetwork()->get_stream(), - desc, - layout, - usm_host_mem, - 0, - 0, - BlobType::BT_USM_SHARED); - if (!blob) - IE_THROW(NotAllocated) << "Failed to allocate shared host <-> device blob"; - blob->allocate(); - checkAlloc(blob, str_shared_mem_not_allocated); - return blob; -} - -void InferRequestLegacy::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::copy_output_data"); - auto& stream = m_graph->GetNetwork()->get_stream(); - switch (dst->getTensorDesc().getPrecision()) { - case Precision::FP64: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::FP32: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::FP16: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::I64: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::I32: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::I16: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::I8: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::U16: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::U32: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::U64: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::U8: copyResultToOutputBlob(src, dst, bi, stream); break; - default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision"; - } -} - -void InferRequestLegacy::copy_input_data(std::shared_ptr network, - const cldnn::primitive_id &inputName, - const cldnn::layout& inputLayout, - const Blob &inputBlob, buf_info* bi) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::copy_input_data"); - - size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; - - cldnn::primitive_id internalName = "parameter:" + inputName; - auto locked = inputBlob.cbuffer(); - switch (inputBlob.getTensorDesc().getPrecision()) { - case Precision::FP32: { - float* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::I32: { - int32_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::I64: { - int64_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::FP16: { - uint16_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::I8: { - int8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::U8: { - uint8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::BOOL: { - uint8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - default: - IE_THROW() << "The plugin does not support input " << inputBlob.getTensorDesc().getPrecision() << " precision"; - } -} - -void InferRequestLegacy::allocate_inputs() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::allocate_inputs"); - auto inputLayouts = m_graph->GetInputLayouts(); - - // allocate inputs - for (auto& ni : _networkInputs) { - std::string name = ni.first; - const TensorDesc& desc = ni.second->getTensorDesc(); - - bool is_nv12_input = false; - - auto parameter = std::find_if(_parameters.begin(), _parameters.end(), [&](const std::shared_ptr& node) { - return node->get_friendly_name() == name; - }); - - if (parameter != _parameters.end()) { - if (parameter->get()->output(0).get_rt_info().count(ov::preprocess::TensorInfoMemoryType::get_type_info_static())) { - std::string mem_type = parameter->get()->output(0).get_rt_info().at(ov::preprocess::TensorInfoMemoryType::get_type_info_static()) - .as().value; - if (mem_type.find(GPU_CONFIG_KEY(SURFACE)) != std::string::npos) { - is_nv12_input = true; - } - } - } - - if (!is_nv12_input) { - auto litr = inputLayouts.find(name); - if (litr == inputLayouts.end()) { - IE_THROW() << "Input layout for " << name << " is not found"; - } - - GPU_DEBUG_LOG << "[" << name << ": input blob]" << std::endl; - if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { - TensorDesc desc_fp32 = desc; - desc_fp32.setPrecision(Precision::FP32); - auto blobPtr = create_device_blob(desc_fp32, litr->second); - _deviceInputs[name] = std::move(blobPtr); - Blob::Ptr inputBlob = create_host_blob(desc); - _inputs[name] = inputBlob; - } else { - if (m_graph->get_engine().use_unified_shared_memory()) { - // For USM case we create host blob using custom USM host allocator - // and then create shared device blob on top of this buffer - auto host_blob = create_host_blob(desc, std::make_shared(m_context)); - _inputs[name] = host_blob; - _deviceInputs[name] = create_shared_device_blob(desc, litr->second, host_blob->buffer().as()); - } else { - _inputs[name] = create_host_blob(desc); - _deviceInputs[name] = create_device_blob(desc, litr->second); - } - } - } - } -} - -void InferRequestLegacy::allocate_inputs_dynamic() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::allocate_inputs_dynamic"); - // allocate inputs - for (auto &input : m_graph->GetNetworkInputs()) { - InputInfo::Ptr ni = _networkInputs.at(input.first); - TensorDesc desc = input.second->getTensorDesc(); - - Blob::Ptr inputBlob = create_host_blob(desc); - if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { - desc.setPrecision(Precision::FP32); - auto fp32inputBlob = InferenceEngine::make_shared_blob(desc); - fp32inputBlob->allocate(); - _inputs[input.first + fp32_suffix] = fp32inputBlob; - } - _inputs[input.first] = inputBlob; - } -} - -void InferRequestLegacy::allocate_outputs() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::allocate_outputs"); - // allocate outputs - for (auto& no : _networkOutputs) { - std::string outputID = m_graph->MapOutputName(no.first); - const cldnn::layout output_layout = m_graph->GetNetwork()->get_output_memory(outputID)->get_layout(); - TensorDesc desc = no.second->getTensorDesc(); - // Due to some reason TensorDesc in InferRequest contains wrong dims - // while ExecutableNetwork contains proper ones. Thus replace dims with once from exec network - // Can be removed once 76176 is resolved. - desc.setDims(m_graph->GetOutputSize(no.first)); - - GPU_DEBUG_LOG << "[" << no.first << ": output blob]" << std::endl; - - outputsMap[no.first] = outputID; - if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16 || - desc.getPrecision() == Precision::U32 || desc.getPrecision() == Precision::U64 || - desc.getPrecision() == Precision::FP64) { - TensorDesc device_blob_desc = desc; - - if (desc.getPrecision() == Precision::U32 || desc.getPrecision() == Precision::U64) - device_blob_desc.setPrecision(Precision::I32); - else - device_blob_desc.setPrecision(Precision::FP32); - - auto host_blob = create_host_blob(desc); - _outputs[no.first] = std::move(host_blob); - auto device_blob = create_device_blob(device_blob_desc, output_layout); - _deviceOutputs[no.first] = device_blob; - } else { - if (m_graph->get_engine().use_unified_shared_memory()) { - // For USM case we create host blob using custom USM host allocator - // and then create shared device blob on top of this buffer - auto host_blob = create_host_blob(desc, std::make_shared(m_context)); - _outputs[no.first] = host_blob; - _deviceOutputs[no.first] = create_shared_device_blob(desc, output_layout, host_blob->buffer().as()); - } else { - _outputs[no.first] = create_host_blob(desc); - _deviceOutputs[no.first] = create_device_blob(desc, output_layout); - } - } - } -} - -void InferRequestLegacy::allocate_outputs_dynamic() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::allocate_outputs_dynamic"); - // allocate outputs - for (auto& no : m_graph->GetNetworkOutputs()) { - std::string outputID = m_graph->MapOutputName(no.first); - DataPtr oi = no.second; - TensorDesc desc = oi->getTensorDesc(); - - Blob::Ptr outputBlob = create_host_blob(desc); - _outputs[no.first] = outputBlob; - outputsMap[no.first] = std::move(outputID); - } -} - -void InferRequestLegacy::InferImpl() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::InferImpl"); - setup_stream_graph(); - std::lock_guard lk(m_graph->get_mutex()); - preprocess(); - enqueue(); - wait(); -} - -std::map InferRequestLegacy::GetPerformanceCounts() const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::GetPerformanceCounts"); - if (!m_useProfiling) { - IE_THROW() << "Performance counters were not enabled"; - } else { - return m_graph->GetPerformanceCounts(); - } -} - -void InferRequestLegacy::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob, - std::vector& dependencies) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::prepare_input"); - auto inputLayoutItr = m_graph->GetInputLayouts().find(inputName); - if (inputLayoutItr == m_graph->GetInputLayouts().end()) { - IE_THROW() << "Input name mismatch."; - } - auto reqBlob = _deviceInputs.at(inputName)->as(); - auto _nw_ptr = m_graph->GetNetwork(); - cldnn::primitive_id internalName = "parameter:" + inputName; - const auto& prec = inputBlob->getTensorDesc().getPrecision(); - auto remote_ptr = inputBlob->as(); - auto& stream = m_graph->GetNetwork()->get_stream(); - bool is_dev_input = remote_ptr != nullptr; - - switch (prec) { - case Precision::FP64: - case Precision::FP32: - case Precision::FP16: - case Precision::I8: - case Precision::U8: - case Precision::BOOL: - case Precision::I16: - case Precision::U16: - case Precision::I32: - case Precision::U32: - case Precision::U64: - case Precision::I64: { - auto impl = getBlobImpl(is_dev_input ? - remote_ptr : - reqBlob); - if (!impl->is_allocated()) { - IE_THROW() << str_input_not_allocated; - } - auto inputMem = impl->get_memory(); - - auto input_layout = m_graph->GetInputLayouts().find(inputName); - if (input_layout != m_graph->GetInputLayouts().end()) { - if (input_layout->second.format != inputMem->get_layout().format) { - inputMem = m_graph->GetNetwork()->get_engine().reinterpret_buffer(*inputMem, input_layout->second); - } - } - - if (!is_dev_input) { - if (prec == Precision::I16 || prec == Precision::U16 || prec == Precision::FP64) { - // GPU plugin doesn't support I16 input precision, - // so have to convert input data to fp32 precision - cldnn::mem_lock ptr{ inputMem, stream }; - if (prec == Precision::I16) { - convertAndCopy(inputBlob.get(), ptr.data()); - } else if (prec == Precision::U16) { - convertAndCopy(inputBlob.get(), ptr.data()); - } else { - convertAndCopy(inputBlob.get(), ptr.data()); - } - } else if (prec == Precision::U64 || prec == Precision::U32) { - cldnn::mem_lock ptr{ inputMem, stream }; - if (prec == Precision::U64) { - convertAndCopy(inputBlob.get(), ptr.data()); - } else { - convertAndCopy(inputBlob.get(), ptr.data()); - } - } else { - auto src_lock = inputBlob->cbuffer(); - auto src_ptr = src_lock.as(); - if (!same_host_mem(inputMem, src_ptr)) { - auto ev = inputMem->copy_from(stream, src_ptr); - dependencies.push_back(ev); - } - } - } - _nw_ptr->set_input_data(internalName, inputMem); - break; - } - default: - IE_THROW() << "Unsupported input precision " << prec; - } -} - -void InferRequestLegacy::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequestLegacy::prepare_output"); - Blob::Ptr reqBlob = _deviceOutputs.at(outputName); - cldnn::primitive_id internalName = outputsMap[outputName]; - auto _nw_ptr = m_graph->GetNetwork(); - auto remote_ptr = outputBlob->as(); - auto output_blob_ptr = (reqBlob != outputBlob && remote_ptr != nullptr) - ? remote_ptr - : reqBlob->as(); - auto impl = getBlobImpl(output_blob_ptr); - if (!impl->is_allocated()) { - IE_THROW(NotAllocated) << str_output_not_allocated; - } - auto outputMem = impl->get_memory(); - _nw_ptr->set_output_memory(internalName, std::move(outputMem)); -} - -InferenceEngine::Blob::Ptr InferRequestLegacy::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) { - if (m_graph->get_engine().use_unified_shared_memory()) { - auto blobPtr = std::make_shared(m_context, - m_graph->GetNetwork()->get_stream(), - desc, - layout, - nullptr, - 0, - 0, - BlobType::BT_USM_HOST_INTERNAL); - getBlobImpl(blobPtr.get())->allocate(); - checkAlloc(blobPtr, str_device_mem_not_allocated); - return blobPtr; - } else { - auto blobPtr = std::make_shared(m_context, - m_graph->GetNetwork()->get_stream(), - desc, - layout); - getBlobImpl(blobPtr.get())->allocate(); - checkAlloc(blobPtr, str_device_mem_not_allocated); - return blobPtr; - } -} - -std::vector> InferRequestLegacy::QueryState() { - std::vector> ret{}; - ret.reserve(variables_states_.size()); - for (const auto& pair : variables_states_) - ret.push_back(std::make_shared(pair.first, pair.second, m_graph->get_engine(), m_curBatch)); - return ret; -} - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp index d7a9649eaf532e..fde2e65d62c474 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/condition.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/condition.cpp @@ -5,8 +5,6 @@ #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/primitives/condition.hpp" -#include "ie_ngraph_utils.hpp" - namespace ov { namespace intel_gpu { @@ -16,33 +14,13 @@ const size_t idx_false = 1; static cldnn::condition::branch gen_branch(ProgramBuilder& p, const std::shared_ptr& op, size_t idx) { cldnn::condition::branch branch; const auto& internal_body = (idx == idx_true)? op->get_then_body() : op->get_else_body(); - InferenceEngine::CNNNetwork body_network(internal_body); - { - // CNNNetwork change the input/output data type to fp32 when input/output data type is fp16 - // To run internal body, rollback input/output data to original one. - size_t tidx = 0; - auto& model_inputs = internal_body->get_parameters(); - for (auto& in : body_network.getInputsInfo()) { - auto input_data_type = InferenceEngine::details::convertPrecision(model_inputs[tidx++]->get_output_tensor(0).get_element_type()); - if (in.second->getPrecision() != input_data_type) - in.second->setPrecision(input_data_type); - } - - tidx = 0; - for (auto& out : body_network.getOutputsInfo()) { - const auto& model_output = internal_body->get_output_op(tidx++); - auto output_data_type = InferenceEngine::details::convertPrecision(model_output->get_output_tensor(0).get_element_type()); - if (out.second->getPrecision() != output_data_type) - out.second->setPrecision(output_data_type); - } - } auto config = p.get_config(); config.set_property(ov::intel_gpu::max_dynamic_batch(1)); config.set_property(ov::intel_gpu::allow_new_shape_infer(op->is_dynamic())); - ProgramBuilder prog(body_network, p.get_engine(), config, false, false, nullptr, nullptr, p.get_task_executor(), true); - branch.inner_program = prog.GetCompiledProgram(); + ProgramBuilder prog(internal_body, p.get_engine(), config, false, false, p.get_task_executor(), true); + branch.inner_program = prog.get_compiled_program(); auto& input_map = branch.input_map; auto external_inputs = p.GetInputInfo(op); diff --git a/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp b/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp index ecad76f5ef7023..e70e30c15a7df9 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp @@ -23,11 +23,7 @@ static void CreateCommonConvertColorOp(ProgramBuilder& p, const std::shared_ptr< auto inputs = p.GetInputInfo(op); std::string layerName = layer_type_name_ID(op); - auto outDatatype = cldnn::element_type_to_data_type(op->get_input_element_type(0)); - auto outShape = tensor_from_dims(op->get_output_shape(0)); - outShape = { outShape.sizes()[0], outShape.sizes()[2], outShape.sizes()[3], outShape.sizes()[1] }; - - auto out_layout = cldnn::layout(outDatatype, cldnn::format::byxf, outShape); + auto batch = op->get_input_partial_shape(0)[0]; auto memory_type = cldnn::convert_color::memory_type::buffer; if (op->get_input_node_ptr(0)->output(0).get_rt_info().count(ov::preprocess::TensorInfoMemoryType::get_type_info_static())) { @@ -38,25 +34,21 @@ static void CreateCommonConvertColorOp(ProgramBuilder& p, const std::shared_ptr< } } - if (outShape.batch[0] > 1 && memory_type == cldnn::convert_color::memory_type::image) { + if (batch.is_static() && batch.get_length() > 1 && memory_type == cldnn::convert_color::memory_type::image) { std::vector convert_color_names; - for (int b = 0; b < outShape.batch[0]; ++b) { + for (int64_t b = 0; b < batch.get_length(); ++b) { cldnn::primitive::input_info_arr batched_inputs; for (size_t i = 0; i < inputs.size(); ++i) { batched_inputs.emplace_back(cldnn::input_info(inputs[i].pid + "_" + std::to_string(b), inputs[i].idx)); } cldnn::primitive_id batched_prim_id = layerName + "_" + std::to_string(b); convert_color_names.emplace_back(cldnn::input_info(batched_prim_id)); - auto new_shape = outShape; - new_shape.batch[0] = 1; - out_layout.set_tensor(new_shape); p.add_primitive(*op, cldnn::convert_color(batched_prim_id, batched_inputs, from_color, to_color, - memory_type, - out_layout)); + memory_type)); } p.add_primitive(*op, cldnn::concatenation(layerName, convert_color_names, 0)); } else { @@ -64,8 +56,7 @@ static void CreateCommonConvertColorOp(ProgramBuilder& p, const std::shared_ptr< inputs, from_color, to_color, - memory_type, - out_layout)); + memory_type)); } } diff --git a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp index 0f73c551c84622..f44dddb26ba0e6 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/loop.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/loop.cpp @@ -16,8 +16,6 @@ #include "intel_gpu/primitives/reorder.hpp" #include "intel_gpu/graph/topology.hpp" -#include "ie_ngraph_utils.hpp" - #include #include @@ -54,9 +52,6 @@ static void CreateLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { const auto& body_inputs = op->get_function()->get_parameters(); const auto& body_outputs = op->get_function()->get_results(); - InferenceEngine::CNNNetwork body_network(op->get_function()); - auto networkInputs = body_network.getInputsInfo(); - auto networkOutputs = body_network.getOutputsInfo(); // Set special body ports: current_iteration input , execution condition output auto special_body_ports = op->get_special_body_ports(); @@ -65,23 +60,17 @@ static void CreateLoopOp(ProgramBuilder& p, const std::shared_ptr& op) { auto current_iteration_input = body_inputs.at(special_body_ports.current_iteration_input_idx); body_current_iteration_id = layer_type_name_ID(current_iteration_input); std::string input_name = ov::op::util::create_ie_output_name(current_iteration_input); - const auto networkInput = networkInputs.at(input_name); - auto precision = InferenceEngine::details::convertPrecision(current_iteration_input->get_element_type()); - networkInput->setPrecision(precision); } cldnn::primitive_id body_execution_condition_id; if (special_body_ports.body_condition_output_idx >= 0) { auto body_condition_output = body_outputs.at(special_body_ports.body_condition_output_idx)->get_input_node_shared_ptr(0); body_execution_condition_id = layer_type_name_ID(body_condition_output); - std::string output_name = ov::op::util::create_ie_output_name(body_condition_output); - const auto networkOutput = networkOutputs.at(output_name); - networkOutput->setPrecision(InferenceEngine::Precision::I64); } - // get body topology from ngraph function - ProgramBuilder body_program(body_network, p.get_engine(), p.get_config(), true); - auto body_topology = *body_program.GetTopology(); + // get body topology from ov::Model + ProgramBuilder body_program(op->get_function(), p.get_engine(), p.get_config(), true); + auto body_topology = *body_program.get_topology(); // setup input_primitive_maps/ output_primitive_maps and back_edges std::vector input_primitive_maps; diff --git a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp index 144f3c3f01a1f8..63e0decb32fac8 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp @@ -19,122 +19,21 @@ #include "intel_gpu/primitives/data.hpp" #include "intel_gpu/primitives/concatenation.hpp" -using namespace InferenceEngine; - namespace ov { namespace intel_gpu { static void CreateParameterOp(ProgramBuilder& p, const std::shared_ptr& op) { - auto networkInputs = p.GetNetworkInputs(); - OPENVINO_ASSERT(networkInputs.find(op->get_friendly_name()) != networkInputs.end(), - "[GPU] Can't find input ", op->get_friendly_name(), " in InputsDataMap"); - - auto inputInfo = networkInputs.at(op->get_friendly_name()); - // first create and add the input layout - const auto inputDesc = inputInfo->getTensorDesc(); - InferenceEngine::Layout l = inputDesc.getLayout(); - InferenceEngine::Precision ip = inputDesc.getPrecision(); - auto input_pshape = op->get_partial_shape(); - if (!p.use_new_shape_infer()) { - if (input_pshape.size() < 4) { - input_pshape.insert(input_pshape.end(), 4 - input_pshape.size(), ov::Dimension(1)); - } - if (p.m_max_batch > 1) { - input_pshape[0] = ov::Dimension(p.m_curBatch); - } + if (!p.use_new_shape_infer() && input_pshape.size() < 4) { + input_pshape.insert(input_pshape.end(), 4 - input_pshape.size(), ov::Dimension(1)); } - cldnn::format inputFormat = cldnn::format::get_default_format(input_pshape.size()); - std::vector default_order(input_pshape.size()); - std::iota(default_order.begin(), default_order.end(), 0); - // For legacy API we need to handle NHWC as well, so check non default order - if (inputDesc.getBlockingDesc().getOrder() != default_order) { - inputFormat = FormatFromLayout(l); - } + cldnn::format input_format = cldnn::format::get_default_format(input_pshape.size()); + auto element_type = cldnn::element_type_to_data_type(convert_to_supported_device_type(op->get_output_element_type(0))); // look at the expected color format of this input - auto inputName = layer_type_name_ID(op); - auto preProcess = inputInfo->getPreProcess(); - size_t meanChannels = preProcess.getNumberOfChannels(); - cldnn::layout networkInputLayout(input_pshape, - cldnn::element_type_to_data_type(op->get_output_element_type(0)), - inputFormat); - cldnn::primitive_id meanBlobID = inputName + ProgramBuilder::m_meanValuesTag; - std::vector meanValues; - - if ((meanChannels > 0) && - (meanChannels != static_cast(networkInputLayout.feature()))) { - OPENVINO_THROW("Mismatched mean values channels in input ", inputName); - } - - switch (preProcess.getMeanVariant()) { - case NONE: - case MEAN_VALUE: { - if (meanChannels > 0) { - for (size_t c = 0; c < meanChannels; c++) { - if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10) - OPENVINO_THROW("not supporting stdScale yet in input ", inputName); - meanValues.push_back(preProcess[c]->meanValue); - } - } - break; - } - case MEAN_IMAGE: { - OPENVINO_ASSERT(meanChannels); - // first merge all mean values to a single blob - // todo make sure mean blob precision is the same as the input precision - auto meanDims = input_pshape; - // overwrite batches with 1 - switch (meanDims.size()) { - case 4: meanDims[0] = 1; - break; - default: - OPENVINO_THROW("Missing batch dimensions in input image"); - } - const TensorDesc desc(Precision::FP32, meanDims.to_shape(), TensorDesc::getLayoutByDims(meanDims.to_shape())); - TBlob meanBlob(desc); - meanBlob.allocate(); - auto meanBlobData = meanBlob.data(); - for (size_t c = 0; c < meanChannels; c++) { - if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10) - OPENVINO_THROW("not supporting stdScale yet in input ", inputName); - auto channelMeanBlob = std::dynamic_pointer_cast>(preProcess[c]->meanData); - auto channelSize = channelMeanBlob->size(); - auto channelBlobData = channelMeanBlob->data(); - for (size_t i = 0; i < channelSize; i++) { - meanBlobData[(c * channelSize) + i] = channelBlobData[i]; - } - } - // then create a data primitive for the mean values - auto meanBlobPtr = std::make_shared>(meanBlob); - - // mean values will use external format (sub in the input format before convert to new format) - cldnn::tensor meanBlobTensor(networkInputLayout.get_tensor()); - meanBlobTensor.batch[0] = 1; // mean values have no batches - cldnn::layout meanBlobLayout(cldnn::data_types::f32, cldnn::format::bfyx, meanBlobTensor); - - auto data = static_cast(meanBlobPtr->buffer()); - - auto bufIter = p.blobMemCache.find(std::make_pair(data, meanDims.to_shape())); - if (bufIter != p.blobMemCache.end()) { - meanBlobID = bufIter->second; - } else { - auto mem = p.get_engine().allocate_memory(meanBlobLayout, false); - cldnn::mem_lock tmpPointer{ mem, p.get_engine().get_service_stream() }; - auto buf = tmpPointer.data(); - auto bufSize = meanBlobLayout.bytes_count(); - - std::memcpy(&buf[0], &data[0], bufSize); - - p.add_primitive(*op, cldnn::data(meanBlobID, mem)); - p.blobMemCache[std::make_pair(data, meanDims.to_shape())] = meanBlobID; - } - break; - } - default: OPENVINO_THROW("Invalid mean variant in input ", inputName); - break; - } + auto input_name = layer_type_name_ID(op); + cldnn::layout input_layout(input_pshape, element_type, input_format); auto is_convert_color_type = [](const std::shared_ptr &node) { return ov::is_type(node) || @@ -182,80 +81,44 @@ static void CreateParameterOp(ProgramBuilder& p, const std::shared_ptr surfaces_inputs; for (size_t i = 0; i < batch; ++i) { if (batch > 1) suffix = "_" + std::to_string(i); - std::string batched_name = inputName + suffix; - p.inputLayouts.insert({ inputInfo->name() + suffix, networkInputLayout }); - p.add_primitive(*op, cldnn::input_layout(batched_name, networkInputLayout)); + std::string batched_name = input_name + suffix; + p.inputLayouts.insert({ op->get_friendly_name() + suffix, input_layout }); + p.add_primitive(*op, cldnn::input_layout(batched_name, input_layout)); - auto reorder_layout = networkInputLayout; + auto reorder_layout = input_layout; reorder_layout.format = cldnn::format::bfyx; - auto preprocessPrimID = "reorder:" + inputName + ProgramBuilder::m_preProcessTag + suffix; - auto reorder = cldnn::reorder(preprocessPrimID, + auto reorder_name = "reorder:" + input_name + ProgramBuilder::m_preProcessTag + suffix; + auto reorder = cldnn::reorder(reorder_name, cldnn::input_info(batched_name), reorder_layout); reorder.input_mem_type = cldnn::reorder::memory_type::surface; p.add_primitive(*op, reorder); - surfaces_inputs.push_back(cldnn::input_info(preprocessPrimID)); + surfaces_inputs.emplace_back(reorder_name); } if (batch > 1 && !is_convert_color_input) - p.add_primitive(*op, cldnn::concatenation(inputName, surfaces_inputs, 0)); + p.add_primitive(*op, cldnn::concatenation(input_name, surfaces_inputs, 0)); else - p.primitive_ids[inputName] = "reorder:" + inputName + ProgramBuilder::m_preProcessTag; - } else if (is_convert_color_input) { - networkInputLayout.format = cldnn::format::byxf; - - networkInputLayout.set_partial_shape({ input_pshape[0], input_pshape[3], input_pshape[1], input_pshape[2] }); - - p.inputLayouts.insert({ inputInfo->name(), networkInputLayout }); - p.add_primitive(*op, cldnn::input_layout(inputName, networkInputLayout)); + p.primitive_ids[input_name] = "reorder:" + input_name + ProgramBuilder::m_preProcessTag; } else { - auto preprocessPrimID = "reorder:" + inputName + ProgramBuilder::m_preProcessTag; - cldnn::layout inputLayout(networkInputLayout); - auto network_input_data_type = DataTypeFromPrecision(ip); - inputLayout.data_type = network_input_data_type; - p.inputLayouts.insert({ inputInfo->name(), inputLayout }); + auto reorder_name = "reorder:" + input_name + ProgramBuilder::m_preProcessTag; + p.inputLayouts.insert({ op->get_friendly_name(), input_layout }); - p.add_primitive(*op, cldnn::input_layout(inputName, inputLayout)); + p.add_primitive(*op, cldnn::input_layout(input_name, input_layout)); - switch (preProcess.getMeanVariant()) { - case NONE: { - // If mean value is not specified and the data type does not change, do not add post reorder - if (network_input_data_type != networkInputLayout.data_type || connected_to_quantize(op)) { - p.add_primitive(*op, cldnn::reorder(preprocessPrimID, - cldnn::input_info(inputName), - networkInputLayout, - meanValues, - cldnn::reorder_mean_mode::none), {inputName}); - } - break; - } - case MEAN_VALUE: { - p.add_primitive(*op, cldnn::reorder(preprocessPrimID, - cldnn::input_info(inputName), - networkInputLayout, - meanValues, - cldnn::reorder_mean_mode::subtract), {inputName}); - break; - } - case MEAN_IMAGE: { - p.add_primitive(*op, cldnn::reorder(preprocessPrimID, - cldnn::input_info(inputName), - networkInputLayout, - meanBlobID, - cldnn::reorder_mean_mode::subtract), {inputName}); - break; - } - default: OPENVINO_THROW("Invalid mean variant in input ", inputName); - break; + if (connected_to_quantize(op)) { + // Techically this reorder is not needed, but for some reason it impacts layout propagation logic + // TODO: Remove it and fix layout assignment & propagation passes + p.add_primitive(*op, cldnn::reorder(reorder_name, cldnn::input_info(input_name), input_layout), {input_name}); } } } diff --git a/src/plugins/intel_gpu/src/plugin/ops/result.cpp b/src/plugins/intel_gpu/src/plugin/ops/result.cpp index 109d405cc4ef3b..e8bb0ae2c60cb4 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/result.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/result.cpp @@ -12,88 +12,36 @@ #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/primitives/reorder.hpp" -using namespace InferenceEngine; - namespace ov { namespace intel_gpu { static void CreateResultOp(ProgramBuilder& p, const std::shared_ptr& op) { - OutputsDataMap networkOutputs = p.GetNetworkOutputs(); validate_inputs_count(op, {1}); auto prev = op->get_input_node_shared_ptr(0); - NGRAPH_SUPPRESS_DEPRECATED_START - auto inputID = ov::descriptor::get_ov_tensor_legacy_name(op->get_input_source_output(0).get_tensor()); - NGRAPH_SUPPRESS_DEPRECATED_END - if (inputID.empty()) { - inputID = prev->get_friendly_name(); + OPENVINO_SUPPRESS_DEPRECATED_START + auto input_id = ov::descriptor::get_ov_tensor_legacy_name(op->get_input_source_output(0).get_tensor()); + OPENVINO_SUPPRESS_DEPRECATED_END + if (input_id.empty()) { + input_id = prev->get_friendly_name(); if (prev->get_output_size() > 1) { - inputID += "." + std::to_string(op->get_input_source_output(0).get_index()); + input_id += "." + std::to_string(op->get_input_source_output(0).get_index()); } } - auto it = networkOutputs.find(inputID); - OPENVINO_ASSERT(it != networkOutputs.end(), "[GPU] Can't find output ", inputID, " in OutputsDataMap"); - std::string originalOutName = it->first; - DataPtr outputData = it->second; - auto inputs = p.GetInputInfo(op); - const auto outputDesc = outputData->getTensorDesc(); - auto outputlayout = outputDesc.getLayout(); - - if (ov::is_type(prev) || - ov::is_type(prev) || - ov::is_type(prev) || - ov::is_type(prev)) { - outputlayout = NHWC; - } - // TODO: add precision check once there's an outputInfo object - if (outputlayout != NCHW && - // TODO: change 6d case once new layout added in IE - outputlayout != BLOCKED && - outputlayout != NCDHW && - outputlayout != NHWC && - outputlayout != CHW && - outputlayout != NC && - outputlayout != C && - outputlayout != SCALAR) { - OPENVINO_THROW("[GPU] Unsupported layout (", outputlayout, ") in output: ", originalOutName); - } auto out_rank = op->get_output_partial_shape(0).size(); auto out_format = cldnn::format::get_default_format(out_rank); - std::vector default_order(out_rank); - std::iota(default_order.begin(), default_order.end(), 0); - // For legacy API we need to handle NHWC as well, so check non default order - if (outputlayout == NHWC) { - out_format = FormatFromLayout(outputlayout); - } - auto outLayerName = layer_type_name_ID(op); - auto inputDataType = cldnn::element_type_to_data_type(op->get_input_element_type(0)); - Precision precision = outputData->getPrecision(); - auto outputDataType = DataTypeFromPrecision(precision); - cldnn::input_info outputID = inputs[0]; + auto out_primitive_name = layer_type_name_ID(op); + auto out_data_type = cldnn::element_type_to_data_type(convert_to_supported_device_type(op->get_input_element_type(0))); - // Even for result op, if reorder only performs type conversion, reorder is created in truncation mode - if (inputDataType != outputDataType) { - auto reorder_primitive = cldnn::reorder(outLayerName, - outputID, - out_format, - outputDataType, - std::vector(), - cldnn::reorder_mean_mode::subtract, - cldnn::padding(), - true); - p.add_primitive(*op, reorder_primitive, {originalOutName}); - } else { - auto reorder_primitive = cldnn::reorder(outLayerName, - outputID, - out_format, - outputDataType); - p.add_primitive(*op, reorder_primitive, {originalOutName}); - } - p.outputDims[originalOutName] = outputDesc.getDims(); - p.prevPrimitiveIDs[outLayerName] = {originalOutName}; + auto reorder_primitive = cldnn::reorder(out_primitive_name, + inputs[0], + out_format, + out_data_type); + p.add_primitive(*op, reorder_primitive, {input_id, op->get_friendly_name()}); + p.prevPrimitiveIDs[out_primitive_name] = {input_id}; } REGISTER_FACTORY_IMPL(v0, Result); diff --git a/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp b/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp index e6506e9f7fa66b..21c7d3a8167a91 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/tensor_iterator.cpp @@ -49,10 +49,8 @@ static cldnn::mutable_data CreateAdditionalOutputData(ProgramBuilder &p, const s static void CreateTensorIteratorOp(ProgramBuilder &p, const std::shared_ptr &op) { auto inputs = p.GetInputInfo(op); - // get body topology from ngraph function - InferenceEngine::CNNNetwork body_network(op->get_body()); - ProgramBuilder body_program(body_network, p.get_engine(), p.get_config(), true); - auto body_topology = *body_program.GetTopology(); + ProgramBuilder body_program(op->get_body(), p.get_engine(), p.get_config(), true); + auto body_topology = *body_program.get_topology(); // setup input_primitive_maps/ output_primitive_maps and back_edges const auto& loop_input_descs = op->get_input_descriptions(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp b/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp index 00837c3601d42e..ae7daf2b3992ce 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp @@ -4,16 +4,10 @@ #include "openvino/op/transpose.hpp" #include "openvino/op/constant.hpp" -#include "openvino/op/convert.hpp" -#include "openvino/op/nv12_to_rgb.hpp" -#include "openvino/op/nv12_to_bgr.hpp" -#include "openvino/op/i420_to_rgb.hpp" -#include "openvino/op/i420_to_bgr.hpp" #include "intel_gpu/plugin/program_builder.hpp" #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/primitives/permute.hpp" -#include "intel_gpu/primitives/reorder.hpp" namespace ov { namespace intel_gpu { @@ -30,39 +24,6 @@ static void CreateTransposeOp(ProgramBuilder& p, const std::shared_ptrcast_vector(); } - auto is_convert_color_type_impl = [](const std::shared_ptr &node) { - return ov::is_type(node) || - ov::is_type(node) || - ov::is_type(node) || - ov::is_type(node); - }; - - auto is_convert_color_type = [&is_convert_color_type_impl](const std::shared_ptr &node) { - if (ngraph::is_type(node)) { - return is_convert_color_type_impl(node->get_input_node_shared_ptr(0)); - } - return is_convert_color_type_impl(node); - }; - - // Handle Transpose operation related to ConvertColor operation: - // In case of ConvertColor operation we have NHWC (byxf) input format which should be converted to - // NCHW (bfyx) by this Permute, so we replace Permute with Reorder (to bfyx) primitve - auto input = op->get_input_size() > 0 ? op->get_input_node_shared_ptr(0) : nullptr; - // Handle the case ConvertColor -> FakeQuantize -> Permute - auto input1 = input ? (input->get_input_size() > 0 ? input->get_input_node_shared_ptr(0) : nullptr) : nullptr; - if (((input && is_convert_color_type(input)) || (input1 && is_convert_color_type(input1))) - && order == std::vector{0, 3, 1, 2}) { - auto precision = input->get_element_type(); - auto reorder_prim = cldnn::reorder(layerName, - inputs[0], - cldnn::format::bfyx, - cldnn::element_type_to_data_type(precision), - std::vector(), - cldnn::reorder_mean_mode::none); - p.add_primitive(*op, reorder_prim); - return; - } - int rank = std::max(4, static_cast(op->get_input_partial_shape(0).size())); if (order.empty()) { // if order size is less than 4 - fill the rest with just copy diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 579a47b1923814..55f4f5e7a42065 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -14,11 +14,16 @@ #include #include "intel_gpu/plugin/legacy_api_helper.hpp" +#include "intel_gpu/plugin/legacy_remote_context.hpp" +#include "openvino/core/deprecated.hpp" +#include "openvino/pass/visualize_tree.hpp" +#include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "openvino/runtime/device_id_parser.hpp" #include "openvino/core/dimension_tracker.hpp" #include "openvino/pass/manager.hpp" +#include "openvino/runtime/properties.hpp" #include "openvino/util/common_util.hpp" #include "intel_gpu/graph/serialization/layout_serializer.hpp" @@ -45,10 +50,6 @@ #undef DEVICE_TYPE #endif -using namespace InferenceEngine; -using namespace InferenceEngine::gpu; -using namespace InferenceEngine::details; - using ms = std::chrono::duration>; using Time = std::chrono::high_resolution_clock; @@ -65,15 +66,15 @@ namespace intel_gpu { #include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY -void Plugin::register_primitives() { +void Plugin::register_primitives() const { #define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name) #include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY } -ov::AnyMap Plugin::preprocess_config(const std::map& orig_config) const { +ov::AnyMap Plugin::preprocess_config(const ov::AnyMap& orig_config) const { // We can skip this conversion for new API once all meta plugins don't try to use legacy configs/metrics for new API internally - auto config = LegacyAPIHelper::convert_legacy_properties(orig_config, IsNewAPI()); + auto config = LegacyAPIHelper::convert_legacy_properties(orig_config, is_new_api()); // Code below is WA for issue 100498 auto hint_it = std::find_if(orig_config.begin(), orig_config.end(), [](const std::pair& kv) { @@ -81,31 +82,31 @@ ov::AnyMap Plugin::preprocess_config(const std::map& o }); if (hint_it != orig_config.end()) { - config[ov::hint::performance_mode.name()] = ov::util::from_string(hint_it->second, ov::hint::performance_mode); + config[ov::hint::performance_mode.name()] = ov::util::from_string(hint_it->second.as(), ov::hint::performance_mode); } return config; } -std::string Plugin::get_device_id_from_config(const std::map& config) const { - std::string device_id; - if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) { - device_id = config.at(PluginConfigParams::KEY_DEVICE_ID); +std::string Plugin::get_device_id_from_config(const ov::AnyMap& config) const { + std::string id; + if (config.find(ov::device::id.name()) != config.end()) { + id = config.at(ov::device::id.name()).as(); } - return device_id; + return id; } -std::string Plugin::get_device_id(const std::map& config) const { - std::string device_id = default_device_id; - if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) { - device_id = config.at(PluginConfigParams::KEY_DEVICE_ID); +std::string Plugin::get_device_id(const ov::AnyMap& config) const { + std::string id = m_default_device_id; + if (config.find(ov::device::id.name()) != config.end()) { + id = config.at(ov::device::id.name()).as(); } - return device_id; + return id; } void Plugin::transform_model(std::shared_ptr& model, const ExecutionConfig& config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::transform_model"); - auto deviceInfo = device_map.at(config.get_property(ov::device::id))->get_info(); + auto deviceInfo = m_device_map.at(config.get_property(ov::device::id))->get_info(); TransformationsPipeline transformations(config, deviceInfo); auto start = Time::now(); @@ -113,29 +114,48 @@ void Plugin::transform_model(std::shared_ptr& model, const ExecutionC GPU_DEBUG_LOG << "Transformations time: " << std::chrono::duration_cast(Time::now() - start).count() << " ms" << std::endl; } -InferenceEngine::CNNNetwork Plugin::clone_and_transform_model(const InferenceEngine::CNNNetwork& network, - const ExecutionConfig& config) const { +std::shared_ptr Plugin::clone_and_transform_model(const std::shared_ptr& model, const ExecutionConfig& config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::clone_and_transform_model"); + GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_DEFINE_MEM_LOGGER("Plugin::clone_and_transform_model"); - CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network); - - auto nGraphFunc = clonedNetwork.getFunction(); - if (nGraphFunc) { - transform_model(nGraphFunc, config); - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { - auto path_base = debug_config->dump_graphs + "/" + network.getName() + "_" + "transformed_func"; - ov::pass::Serialize(path_base + ".xml", path_base + ".bin").run_on_model(nGraphFunc); - } + + auto cloned_model = model->clone(); + OPENVINO_ASSERT(cloned_model != nullptr, "[GPU] Failed to clone model!"); + + GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { + auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name(); + ov::pass::Serialize(path_base + ".xml", path_base + ".bin").run_on_model(cloned_model); + ov::pass::VisualizeTree(path_base + ".dot").run_on_model(cloned_model); } - return clonedNetwork; + + transform_model(cloned_model, config); + + // Transformations for some reason may drop output tensor names, so here we copy those from the original model + auto new_results = cloned_model->get_results(); + auto old_results = model->get_results(); + OPENVINO_ASSERT(new_results.size() == old_results.size(), "[GPU] Unexpected outputs count change in transformed model", + "Before: ", old_results.size(), " After: ", new_results.size()); + for (size_t i = 0; i < model->get_results().size(); i++) { + auto new_res = new_results[i]; + auto old_res = old_results[i]; + + new_res->output(0).set_names(old_res->output(0).get_names()); + new_res->set_friendly_name(old_res->get_friendly_name()); + } + + GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) { + auto path_base = debug_config->dump_graphs + "/" + cloned_model->get_name() + "_" + "transformed_func"; + ov::pass::Serialize(path_base + ".xml", path_base + ".bin").run_on_model(cloned_model); + ov::pass::VisualizeTree(path_base + "_transformed.dot").run_on_model(cloned_model); + } + return cloned_model; } -std::map Plugin::get_default_contexts() const { +std::map Plugin::get_default_contexts() const { std::call_once(m_default_contexts_once, [this]() { // Create default context - for (auto& device : device_map) { - auto ctx = std::make_shared(GetName() + "." + device.first, std::vector{ device.second }); + for (auto& device : m_device_map) { + auto ctx = std::make_shared(get_device_name() + "." + device.first, std::vector{ device.second }); m_default_contexts.insert({device.first, ctx}); } }); @@ -143,72 +163,41 @@ std::map Plugin::get_default_contexts() const } Plugin::Plugin() { - _pluginName = "GPU"; + set_device_name("GPU"); register_primitives(); - // try loading gpu engine and get info from it - { - // Set OCL runtime which should be always available - cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl); - device_map = device_query.get_available_devices(); - // Set default configs for each device - for (auto& device : device_map) { - m_configs_map.insert({device.first, ExecutionConfig(ov::device::id(device.first))}); - } - } -} + // Set OCL runtime which should be always available + cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl); + m_device_map = device_query.get_available_devices(); -auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) { - for (const auto& ii : _networkInputs) { - auto input_precision = ii.second->getTensorDesc().getPrecision(); - if (input_precision != InferenceEngine::Precision::FP16 && - input_precision != InferenceEngine::Precision::FP32 && - input_precision != InferenceEngine::Precision::FP64 && - input_precision != InferenceEngine::Precision::U8 && - input_precision != InferenceEngine::Precision::I8 && - input_precision != InferenceEngine::Precision::I16 && - input_precision != InferenceEngine::Precision::U16 && - input_precision != InferenceEngine::Precision::I32 && - input_precision != InferenceEngine::Precision::U32 && - input_precision != InferenceEngine::Precision::I64 && - input_precision != InferenceEngine::Precision::U64 && - input_precision != InferenceEngine::Precision::BOOL) { - OPENVINO_THROW("Input image format ", input_precision, " is not supported yet..."); - } + // Set default configs for each device + for (const auto& device : m_device_map) { + m_configs_map.insert({device.first, ExecutionConfig(ov::device::id(device.first))}); } -}; - -IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, - const std::map &orig_config) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl"); - // verification of supported input - InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo(); - check_inputs(_networkInputs); +} +std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model"); std::string device_id = get_device_id(orig_config); auto context = get_default_context(device_id); - OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); + OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(preprocess_config(orig_config)); - config.apply_user_properties(context->get_impl()->get_engine().get_device_info()); + config.apply_user_properties(context->get_engine().get_device_info()); - auto transformedNetwork = clone_and_transform_model(network, config); + auto transformed_model = clone_and_transform_model(model, config); { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork"); - CompiledModel::Ptr exeNetwork = std::make_shared(transformedNetwork, context, config); - return exeNetwork; + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::compile_model::CreateCompiledModel"); + return std::make_shared(transformed_model, shared_from_this(), context, config); } } -IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, - const InferenceEngine::RemoteContext::Ptr &context, - const std::map &orig_config) { - InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo(); - check_inputs(_networkInputs); - +std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, + const ov::AnyMap& orig_config, + const ov::SoPtr& context) const { auto context_impl = get_context_impl(context); auto device_id = ov::DeviceIDParser{context_impl->get_device_name()}.get_device_id(); @@ -218,69 +207,53 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine config.set_user_property(preprocess_config(orig_config)); config.apply_user_properties(context_impl->get_engine().get_device_info()); - auto transformedNetwork = clone_and_transform_model(network, config); - return std::make_shared(transformedNetwork, context, config); + auto transformed_model = clone_and_transform_model(model, config); + return std::make_shared(transformed_model, shared_from_this(), context_impl, config); } -InferenceEngine::RemoteContext::Ptr Plugin::CreateContext(const AnyMap& params) { - if (params.empty()) { - return get_default_context(default_device_id); - } - - std::vector known_contexts; - for (auto& c : get_default_contexts()) { - known_contexts.push_back(c.second->get_impl()); - } - std::string context_type = extract_object(params, GPU_PARAM_KEY(CONTEXT_TYPE)); - - if (GPU_PARAM_VALUE(OCL) == context_type) { - return std::make_shared(known_contexts, params); - } else if (GPU_PARAM_VALUE(VA_SHARED) == context_type) { -#ifdef _WIN32 - return std::make_shared(known_contexts, params); -#else - return std::make_shared(known_contexts, params); -#endif +ov::SoPtr Plugin::create_context(const ov::AnyMap& remote_properties) const { + if (remote_properties.empty()) { + return get_default_context(m_default_device_id); } - - OPENVINO_ASSERT(false, "[GPU] Unsupported context type passed to CreateContext method: ", context_type); + return wrap_if_old_api(std::make_shared(get_default_contexts(), remote_properties), is_new_api()); } -RemoteCLContext::Ptr Plugin::get_default_context(const std::string& device_id) const { +std::shared_ptr Plugin::get_default_context(const std::string& device_id) const { auto contexts = get_default_contexts(); OPENVINO_ASSERT(contexts.count(device_id), "[GPU] Context was not initialized for ", device_id, " device"); return contexts.at(device_id); } -InferenceEngine::RemoteContext::Ptr Plugin::GetDefaultContext(const AnyMap& params) { - std::string device_id = default_device_id; +ov::SoPtr Plugin::get_default_context(const AnyMap& params) const { + std::string device_id = m_default_device_id; if (params.find(CONFIG_KEY(DEVICE_ID)) != params.end()) device_id = params.at(CONFIG_KEY(DEVICE_ID)).as(); - return get_default_context(device_id); + auto default_ctx = get_default_context(device_id); + return wrap_if_old_api(get_default_context(device_id), is_new_api()); } -void Plugin::SetConfig(const std::map &config) { - auto update_config = [this](ExecutionConfig& config, const std::map& user_config) { +void Plugin::set_property(const ov::AnyMap &config) { + auto update_config = [this](ExecutionConfig& config, const ov::AnyMap& user_config) { config.set_user_property(preprocess_config(user_config)); // Check that custom layers config can be loaded if (user_config.find(ov::intel_gpu::config_file.name()) != user_config.end()) { CustomLayerMap custom_layers; - auto custom_layers_config = user_config.at(ov::intel_gpu::config_file.name()); + auto custom_layers_config = user_config.at(ov::intel_gpu::config_file.name()).as(); CustomLayer::LoadFromFile(custom_layers_config, custom_layers, custom_layers_config.empty()); } }; if (config.find(ov::internal::config_device_id.name()) != config.end()) { - std::string device_id = config.at(ov::internal::config_device_id.name()); + std::string device_id = config.at(ov::internal::config_device_id.name()).as(); auto config_for_device = config; config_for_device.erase(ov::internal::config_device_id.name()); update_config(m_configs_map.at(device_id), config_for_device); } else { std::string device_id = get_device_id_from_config(config); if (!device_id.empty()) { - default_device_id = device_id; + m_default_device_id = device_id; update_config(m_configs_map.at(device_id), config); } else { for (auto& conf : m_configs_map) { @@ -290,80 +263,45 @@ void Plugin::SetConfig(const std::map &config) { } } -QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network, - const std::map& orig_config) const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::QueryNetwork"); - QueryNetworkResult res; +ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::query_model"); + ov::SupportedOpsMap res; std::string device_id = get_device_id(orig_config); - auto ctx = get_default_context(device_id)->get_impl(); + auto ctx = get_default_context(device_id); ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(preprocess_config(orig_config)); config.apply_user_properties(ctx->get_engine().get_device_info()); ProgramBuilder prog(ctx->get_engine(), config); - bool dyn_shape_batch_found = false; - - auto model = network.getFunction(); - OPENVINO_ASSERT(model != nullptr, "[GPU] Only ngraph-based models are supported!"); - - auto supported = GetSupportedNodes(model, - [&](std::shared_ptr& model) { - std::map shapes; - std::map> batch_dim; - dyn_shape_batch_found = prog.IsDynBatchModel(model, shapes, batch_dim); - transform_model(model, config); - }, - [&](std::shared_ptr node) { - if (node->is_dynamic()) { - if (!dyn_shape_batch_found) - return false; - - auto pshape = node->get_output_partial_shape(0); - if (pshape.rank().is_dynamic() || pshape.size() > cldnn::layout::max_rank()) - return false; - - int dynCount = 0; - int64_t batch_idx = -1; - for (size_t i = 0; i < pshape.size(); i++) { - if (pshape[i].is_dynamic()) { - dynCount++; - if (batch_idx < 0) { - batch_idx = i; - } - } - } - - if (dynCount != 1) - return false; // more than one dimension is dynamic - int64_t max_batch = pshape[batch_idx].get_max_length(); - if (max_batch <= 1) - return false; - - return true; - } - return prog.IsOpSupported(network, node); - }); - - for (auto&& layerName : supported) { - res.supportedLayersMap.emplace(layerName, ctx->get_device_name()); + auto supported = ov::get_supported_nodes(model, + [&config,this](std::shared_ptr& model) { + std::map shapes; + std::map> batch_dim; + transform_model(model, config); + }, + [&prog](std::shared_ptr node) { + return prog.is_op_supported(node); + }); + + for (auto&& op_name : supported) { + res.emplace(op_name, ctx->get_device_name()); } return res; } -InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& networkModel, - const std::map& config) { +std::shared_ptr Plugin::import_model(std::istream& model, const ov::AnyMap& config) const { std::string device_id = get_device_id(config); auto context = get_default_context(device_id); - return ImportNetwork(networkModel, context, config); + return import_model(model, { context, nullptr }, config); } -InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& networkModel, - const std::shared_ptr& context, - const std::map& orig_config) { +std::shared_ptr Plugin::import_model(std::istream& model, + const ov::SoPtr& context, + const ov::AnyMap& orig_config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork"); auto context_impl = get_context_impl(context); @@ -374,196 +312,90 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istr config.apply_user_properties(context_impl->get_engine().get_device_info()); { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::ImportNetwork::CreateExeNetwork"); - cldnn::BinaryInputBuffer ib(networkModel, context_impl->get_engine()); - - InputsDataMap inputs; - OutputsDataMap outputs; - std::vector> new_params; - std::vector> new_results; - - // InputsInfo and OutputsInfor for CNNNetwork - { - size_t inputSize; - ib >> inputSize; - - for (size_t idx = 0; idx < inputSize; ++idx) { - std::string name; - std::string precision; - std::string layout; - InferenceEngine::SizeVector dims; - ib >> name; - ib >> precision; - ib >> layout; - ib >> dims; - - DataPtr input = std::make_shared(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout)); - input->setDims(dims); - InputInfo::Ptr infoNew = std::make_shared(); - infoNew->setInputData(input); - inputs.emplace(std::make_pair(name, infoNew)); - } - - size_t outputSize; - ib >> outputSize; - - for (size_t idx = 0; idx < outputSize; ++idx) { - std::string name; - std::string precision; - std::string layout; - InferenceEngine::SizeVector dims; - ib >> name; - ib >> precision; - ib >> layout; - ib >> dims; - - DataPtr output = std::make_shared(name, Precision::FromStr(precision), cldnn::serial_util::layout_from_string(layout)); - output->setDims(dims); - outputs.emplace(std::make_pair(name, output)); - } - } + cldnn::BinaryInputBuffer ib(model, context_impl->get_engine()); - { - size_t num_params; - ib >> num_params; - - for (size_t idx = 0; idx < num_params; ++idx) { - std::string param_name; - ib >> param_name; - ov::element::Type param_element_type; - std::string str_element_type; - ib >> str_element_type; - std::stringstream oss(str_element_type); - oss >> param_element_type; - ov::PartialShape param_shape; - ib >> param_shape; - std::string str_layout; - ib >> str_layout; - ov::Layout param_layout(str_layout); - std::unordered_set param_names; - size_t num_names; - ib >> num_names; - for (size_t i = 0; i < num_names; ++i) { - std::string name; - ib >> name; - param_names.emplace(name); - } - - auto new_param = std::make_shared(param_element_type, param_shape); - new_param->set_friendly_name(param_name); - new_param->set_element_type(param_element_type); - new_param->set_layout(param_layout); - new_param->output(0).get_tensor().set_names(param_names); - new_param->validate_and_infer_types(); - new_params.emplace_back(new_param); - } - } - - { - size_t num_results; - ib >> num_results; - - for (size_t idx = 0; idx < num_results; ++idx) { - ov::element::Type fake_element_type; - std::string str_element_type; - ib >> str_element_type; - std::stringstream oss(str_element_type); - oss >> fake_element_type; - - ov::PartialShape fake_shape; - ib >> fake_shape; - - std::string fake_name; - ib >> fake_name; - - std::string param_name; - ib >> param_name; - - std::string str_layout; - ib >> str_layout; - ov::Layout param_layout(str_layout); - - std::unordered_set param_names; - size_t num_names; - ib >> num_names; - for (size_t i = 0; i < num_names; ++i) { - std::string name; - ib >> name; - param_names.emplace(name); - } - - auto fake_param = std::make_shared(fake_element_type, fake_shape); - fake_param->set_friendly_name(fake_name); - fake_param->validate_and_infer_types(); - - auto new_result = std::make_shared(fake_param); - new_result->set_friendly_name(param_name); - new_result->set_layout(param_layout); - new_result->output(0).get_tensor().set_names(param_names); - new_result->validate_and_infer_types(); - new_results.emplace_back(new_result); - } - } - - CompiledModel::Ptr exeNetwork; + CompiledModel::Ptr compiled_model; bool is_dynamic; ib >> is_dynamic; if (is_dynamic) { std::string xmlString, xmlInOutString; - InferenceEngine::Blob::Ptr dataBlob; + ov::Tensor data_tensor; ov::pass::StreamSerialize::DataHeader hdr = {}; - networkModel.read(reinterpret_cast(&hdr), sizeof hdr); + model.read(reinterpret_cast(&hdr), sizeof hdr); // read blob content - networkModel.seekg(hdr.consts_offset); + model.seekg(hdr.consts_offset); if (hdr.consts_size) { - dataBlob = InferenceEngine::make_shared_blob( - InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, {hdr.consts_size}, InferenceEngine::Layout::C)); - dataBlob->allocate(); - networkModel.read(dataBlob->buffer(), hdr.consts_size); + data_tensor = ov::Tensor(ov::element::u8, {hdr.consts_size}); + model.read(static_cast(data_tensor.data()), hdr.consts_size); } // read XML content - networkModel.seekg(hdr.model_offset); + model.seekg(hdr.model_offset); xmlString.resize(hdr.model_size); - networkModel.read(const_cast(xmlString.c_str()), hdr.model_size); + model.read(&xmlString[0], hdr.model_size); - auto transformedNetwork = GetCore()->ReadNetwork(xmlString, std::move(dataBlob), true); - exeNetwork = std::make_shared(transformedNetwork, context, config, &inputs, &outputs); + auto transformed_model = get_core()->read_model(xmlString, data_tensor, true); + compiled_model = std::make_shared(transformed_model, shared_from_this(), context_impl, config); } else { - exeNetwork = std::make_shared(ib, context, config, &inputs, &outputs); - exeNetwork->SetPointerToPlugin(shared_from_this()); + compiled_model = std::make_shared(ib, shared_from_this(), context_impl, config); } - exeNetwork->setNetworkInputs(inputs); - exeNetwork->setNetworkOutputs(outputs); - exeNetwork->setInputs(new_params); - exeNetwork->setOutputs(new_results); - return exeNetwork; + return compiled_model; } } -Parameter Plugin::GetConfig(const std::string& name, const std::map& options) const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetConfig"); - OPENVINO_ASSERT(!device_map.empty(), "[GPU] Can't get ", name, " property as no supported devices found or an error happened during devices query.\n" - "[GPU] Please check OpenVINO documentation for GPU drivers setup guide.\n"); - std::string device_id = default_device_id; +ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::get_property"); + + // The metrics below don't depend on the device ID, so we should handle those + // earler than querying actual ID to avoid exceptions when no devices are found + if (name == ov::supported_properties) { + return decltype(ov::supported_properties)::value_type {get_supported_properties()}; + } else if (ov::internal::supported_properties == name) { + return decltype(ov::internal::supported_properties)::value_type{get_supported_internal_properties()}; + } else if (name == ov::available_devices) { + std::vector available_devices = { }; + for (auto const& dev : m_device_map) + available_devices.push_back(dev.first); + return decltype(ov::available_devices)::value_type {available_devices}; + } else if (name == ov::internal::caching_properties) { + return decltype(ov::internal::caching_properties)::value_type(get_caching_properties()); + } + + OPENVINO_SUPPRESS_DEPRECATED_START + if (name == METRIC_KEY(SUPPORTED_METRICS)) { + IE_SET_METRIC_RETURN(SUPPORTED_METRICS, LegacyAPIHelper::get_supported_metrics()); + } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { + IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, LegacyAPIHelper::get_supported_configs()); + } else if (name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) { + IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true); + } + OPENVINO_SUPPRESS_DEPRECATED_END + + OPENVINO_ASSERT(!m_device_map.empty(), "[GPU] Can't get ", name, " property as no supported devices found or an error happened during devices query.\n" + "[GPU] Please check OpenVINO documentation for GPU drivers setup guide.\n"); + + if (is_metric(name)) { + return get_metric(name, options); + } + + std::string device_id = m_default_device_id; if (options.find(ov::device::id.name()) != options.end()) { device_id = options.find(ov::device::id.name())->second.as(); } - - OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] GetConfig: Couldn't find config for GPU with id ", device_id); + OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] get_property: Couldn't find config for GPU with id ", device_id); const auto& c = m_configs_map.at(device_id); auto actual_name = name; - if (LegacyAPIHelper::is_legacy_property({name, nullptr}, IsNewAPI())) { + if (LegacyAPIHelper::is_legacy_property({name, nullptr}, is_new_api())) { actual_name = LegacyAPIHelper::convert_legacy_property({name, nullptr}).first; } auto val = c.get_property(actual_name); - if (LegacyAPIHelper::is_legacy_property({name, nullptr}, IsNewAPI())) { + if (LegacyAPIHelper::is_legacy_property({name, nullptr}, is_new_api())) { val = LegacyAPIHelper::convert_to_legacy_property({actual_name, val}).second; } @@ -587,61 +419,56 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s return ret_str; }; -Parameter Plugin::GetMetric(const std::string& name, const std::map& options) const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetMetric"); +bool Plugin::is_metric(const std::string& name) const { + auto all_properties = get_supported_properties(); + auto internal_properties = get_supported_internal_properties(); + auto caching_properties = get_caching_properties(); + auto legacy_metrics = LegacyAPIHelper::get_supported_metrics(); + auto legacy_configs = LegacyAPIHelper::get_supported_configs(); + all_properties.emplace_back(ov::internal::supported_properties.name(), ov::PropertyMutability::RO); + all_properties.insert(all_properties.end(), internal_properties.begin(), internal_properties.end()); + all_properties.insert(all_properties.end(), caching_properties.begin(), caching_properties.end()); + for (auto& m : legacy_metrics) { + all_properties.emplace_back(m, ov::PropertyMutability::RO); + } + for (auto& c : legacy_configs) { + all_properties.emplace_back(c, ov::PropertyMutability::RW); + } + auto it = std::find(all_properties.begin(), all_properties.end(), name); + OPENVINO_ASSERT(it != all_properties.end(), "[GPU] Property ", name, " is not in a list of supported properties"); + + return !it->is_mutable(); +} + +ov::Any Plugin::get_metric(const std::string& name, const ov::AnyMap& options) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::get_metric"); GPU_DEBUG_GET_INSTANCE(debug_config); OPENVINO_SUPPRESS_DEPRECATED_START - // The metrics below don't depend on the device ID, so we should handle those - // earler than querying actual ID to avoid exceptions when no devices are found - if (name == ov::supported_properties) { - return decltype(ov::supported_properties)::value_type {get_supported_properties()}; - } else if (ov::internal::supported_properties == name) { - return decltype(ov::internal::supported_properties)::value_type{get_supported_internal_properties()}; - } else if (name == METRIC_KEY(SUPPORTED_METRICS)) { - IE_SET_METRIC_RETURN(SUPPORTED_METRICS, LegacyAPIHelper::get_supported_metrics()); - } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { - IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, LegacyAPIHelper::get_supported_configs()); - } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) { - std::vector availableDevices = { }; - for (auto const& dev : device_map) - availableDevices.push_back(dev.first); - return decltype(ov::available_devices)::value_type {availableDevices}; - } else if (name == ov::internal::caching_properties) { - std::vector cachingProperties; - cachingProperties.push_back(ov::PropertyName(ov::device::architecture.name(), PropertyMutability::RO)); - cachingProperties.push_back(ov::PropertyName(ov::intel_gpu::execution_units_count.name(), PropertyMutability::RO)); - cachingProperties.push_back(ov::PropertyName(ov::intel_gpu::driver_version.name(), PropertyMutability::RO)); - cachingProperties.push_back(ov::PropertyName(ov::hint::inference_precision.name(), PropertyMutability::RW)); - cachingProperties.push_back(ov::PropertyName(ov::hint::execution_mode.name(), PropertyMutability::RW)); - return decltype(ov::internal::caching_properties)::value_type(cachingProperties); - } else if (name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) { - IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true); - } - auto device_id = GetConfig(ov::device::id.name(), options).as(); + auto device_id = get_property(ov::device::id.name(), options).as(); - auto iter = device_map.find(std::to_string(cldnn::device_query::device_id)); - if (iter == device_map.end()) - iter = device_map.find(device_id); - if (iter == device_map.end()) - iter = device_map.begin(); + auto iter = m_device_map.find(std::to_string(cldnn::device_query::device_id)); + if (iter == m_device_map.end()) + iter = m_device_map.find(device_id); + if (iter == m_device_map.end()) + iter = m_device_map.begin(); auto device = iter->second; auto device_info = device->get_info(); - bool is_new_api = IsNewAPI(); if (name == ov::intel_gpu::device_total_mem_size) { return decltype(ov::intel_gpu::device_total_mem_size)::value_type {device_info.max_global_mem_size}; } else if (name == ov::device::type) { - if (is_new_api) { + if (is_new_api()) { auto dev_type = device_info.dev_type == cldnn::device_type::discrete_gpu ? ov::device::Type::DISCRETE : ov::device::Type::INTEGRATED; return decltype(ov::device::type)::value_type {dev_type}; } else { - auto dev_type = device_info.dev_type == cldnn::device_type::discrete_gpu ? Metrics::DeviceType::discrete : Metrics::DeviceType::integrated; + auto dev_type = device_info.dev_type == cldnn::device_type::discrete_gpu ? InferenceEngine::Metrics::DeviceType::discrete + : InferenceEngine::Metrics::DeviceType::integrated; IE_SET_METRIC_RETURN(DEVICE_TYPE, dev_type); } } else if (name == ov::device::gops) { - if (is_new_api) { + if (is_new_api()) { std::map gops; gops[element::i8] = device->get_gops(cldnn::data_types::i8); gops[element::u8] = device->get_gops(cldnn::data_types::u8); @@ -689,11 +516,11 @@ Parameter Plugin::GetMetric(const std::string& name, const std::mapget_impl(); + const auto& ctx = get_default_context(device_id); return decltype(ov::intel_gpu::memory_statistics)::value_type {ctx->get_engine().get_memory_statistics()}; } else if (name == METRIC_KEY(MAX_BATCH_SIZE) || name == ov::max_batch_size) { - return decltype(ov::max_batch_size)::value_type {static_cast(get_max_batch_size(options))}; + return decltype(ov::max_batch_size)::value_type {get_max_batch_size(options)}; } else if (name == ov::intel_gpu::driver_version) { return decltype(ov::intel_gpu::driver_version)::value_type {device_info.driver_version}; } else if (name == ov::intel_gpu::device_id) { @@ -718,6 +545,18 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map Plugin::get_caching_properties() const { + static const std::vector caching_properties = { + ov::PropertyName{ov::device::architecture.name(), PropertyMutability::RO}, + ov::PropertyName{ov::intel_gpu::execution_units_count.name(), PropertyMutability::RO}, + ov::PropertyName{ov::intel_gpu::driver_version.name(), PropertyMutability::RO}, + ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW}, + }; + + return caching_properties; +} + std::vector Plugin::get_supported_properties() const { static const std::vector supported_properties = { // Metrics @@ -771,30 +610,30 @@ std::vector Plugin::get_supported_internal_properties() const std::vector Plugin::get_device_capabilities(const cldnn::device_info& info) const { std::vector capabilities; - capabilities.push_back(ov::device::capability::FP32); - capabilities.push_back(ov::device::capability::BIN); - if (!IsNewAPI()) - capabilities.push_back(METRIC_VALUE(BATCHED_BLOB)); + capabilities.emplace_back(ov::device::capability::FP32); + capabilities.emplace_back(ov::device::capability::BIN); + if (!is_new_api()) + capabilities.emplace_back(METRIC_VALUE(BATCHED_BLOB)); if (info.supports_fp16) - capabilities.push_back(ov::device::capability::FP16); + capabilities.emplace_back(ov::device::capability::FP16); if (info.supports_imad || info.supports_immad) - capabilities.push_back(ov::device::capability::INT8); + capabilities.emplace_back(ov::device::capability::INT8); if (info.supports_immad) - capabilities.push_back(ov::intel_gpu::capability::HW_MATMUL); - capabilities.push_back(ov::device::capability::EXPORT_IMPORT); + capabilities.emplace_back(ov::intel_gpu::capability::HW_MATMUL); + capabilities.emplace_back(ov::device::capability::EXPORT_IMPORT); return capabilities; } -uint32_t Plugin::get_max_batch_size(const std::map& options) const { +uint32_t Plugin::get_max_batch_size(const ov::AnyMap& options) const { GPU_DEBUG_GET_INSTANCE(debug_config); - auto device_id = GetConfig(ov::device::id.name(), options).as(); - auto context = get_default_contexts().at(device_id)->get_impl(); + auto device_id = get_property(ov::device::id.name(), options).as(); + auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); const auto& config = m_configs_map.at(device_id); uint32_t n_streams = static_cast(config.get_property(ov::num_streams)); uint64_t occupied_device_mem = 0; - auto statistic_result = GetMetric(ov::intel_gpu::memory_statistics.name(), options).as>(); + auto statistic_result = get_metric(ov::intel_gpu::memory_statistics.name(), options).as>(); auto occupied_usm_dev = statistic_result.find("usm_device_current"); if (occupied_usm_dev != statistic_result.end()) { occupied_device_mem = occupied_usm_dev->second; @@ -811,21 +650,26 @@ uint32_t Plugin::get_max_batch_size(const std::map& opti return static_cast(max_batch_size); } - auto it_streams = options.find("GPU_THROUGHPUT_STREAMS") != options.end() ? options.find("GPU_THROUGHPUT_STREAMS") : - options.find(ov::num_streams.name()) != options.end() ? options.find(ov::num_streams.name()) : - options.end(); - if (it_streams != options.end()) { - if (it_streams->second.is()) { - n_streams = it_streams->second.as(); - } else if (it_streams->second.is()) { - n_streams = it_streams->second.as(); - } else if (it_streams->second.is()) { - auto n_streams_str = it_streams->second.as(); - if (n_streams_str != CONFIG_VALUE(GPU_THROUGHPUT_AUTO) && - n_streams_str != util::to_string(ov::streams::AUTO)) { + const uint32_t default_streams_for_tput = 2; + if (options.count(ov::num_streams.name()) > 0) { + auto streams = options.at(ov::num_streams.name()).as(); + if (streams == ov::streams::AUTO) { + n_streams = std::max(default_streams_for_tput, device_info.num_ccs); + } else { + n_streams = static_cast(streams.num); + } + } else if (options.count(CONFIG_KEY(GPU_THROUGHPUT_STREAMS)) > 0) { + auto streams = options.at(CONFIG_KEY(GPU_THROUGHPUT_STREAMS)); + if (streams.is()) { + n_streams = streams.as(); + } else if (streams.is()) { + n_streams = streams.as(); + } else if (streams.is()) { + auto n_streams_str = streams.as(); + if (n_streams_str != CONFIG_VALUE(GPU_THROUGHPUT_AUTO)) { OPENVINO_THROW("[GPU_MAX_BATCH_SIZE] bad casting: GPU_THROUGHPUT_STREAMS should be either of uint32_t type or \"GPU_THROUGHPUT_AUTO\""); } - n_streams = std::max(/* config.GetDefaultNStreamsForThroughputMode() */2u, device_info.num_ccs); + n_streams = std::max(default_streams_for_tput, device_info.num_ccs); } else { OPENVINO_THROW("[GPU_MAX_BATCH_SIZE] bad casting: GPU_THROUGHPUT_STREAMS should be either of uint32_t type or \"GPU_THROUGHPUT_AUTO\""); } @@ -846,18 +690,17 @@ uint32_t Plugin::get_max_batch_size(const std::map& opti } } - std::shared_ptr model; + std::shared_ptr model; auto model_param = options.find(ov::hint::model.name())->second; - if (model_param.is>()) { - model = model_param.as>(); + if (model_param.is>()) { + model = model_param.as>(); } else { OPENVINO_THROW("[GPU_MAX_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"); } - InferenceEngine::CNNNetwork network(model); size_t base_batch_size = 16; // empirically decided for DG1 - auto& engine = get_default_context(device_id)->get_impl()->get_engine(); + auto& engine = get_default_context(device_id)->get_engine(); std::shared_ptr program; @@ -866,19 +709,17 @@ uint32_t Plugin::get_max_batch_size(const std::map& opti base_batch_size = (user_specified_base_batch_size != base_batch_size) ? user_specified_base_batch_size : base_batch_size; } - auto cloned_network = InferenceEngine::details::cloneNetwork(network); - auto inputs_info = cloned_network.getInputsInfo(); - ICNNNetwork::InputShapes new_shapes; + auto cloned_model = model->clone(); try { std::set> batched_inputs; - auto function = InferenceEngine::details::cloneNetwork(cloned_network).getFunction(); + auto tmp_model = cloned_model->clone(); ov::pass::Manager m; m.register_pass(); m.register_pass(true, false); - m.run_passes(function); - const auto& params = function->get_parameters(); + m.run_passes(tmp_model); + const auto& params = tmp_model->get_parameters(); for (size_t input_id = 0; input_id < params.size(); input_id++) { const auto& input = params[input_id]; const auto& shape = input->get_partial_shape(); @@ -907,20 +748,22 @@ uint32_t Plugin::get_max_batch_size(const std::map& opti } try { - ICNNNetwork::InputShapes shapes = cloned_network.getInputShapes(); + std::map shapes; + for (auto& param : cloned_model->get_parameters()) { + shapes[ov::op::util::get_ie_output_name(param->output(0))] = param->get_output_partial_shape(0); + } for (const auto& input : batched_inputs) shapes[input.first][input.second] = base_batch_size; - cloned_network.reshape(shapes); + cloned_model->reshape(shapes); } catch (...) { GPU_DEBUG_INFO << "[MAX_BATCH_SIZE] Error at reshape to " << base_batch_size << std::endl; return static_cast(max_batch_size); } - auto nGraphFunc = cloned_network.getFunction(); TransformationsPipeline transformations(config, device_info); - transformations.apply(nGraphFunc); - program = std::make_shared(cloned_network, engine, config, false, true); - std::pair device_memory_usage = program->GetCompiledProgram(0)->get_estimated_device_mem_usage(); + transformations.apply(cloned_model); + program = std::make_shared(cloned_model, engine, config, false, true); + std::pair device_memory_usage = program->get_compiled_program()->get_estimated_device_mem_usage(); if (device_memory_usage.first == static_cast(-1L) && device_memory_usage.second == static_cast(-1L)) { return static_cast(max_batch_size); } @@ -937,9 +780,9 @@ uint32_t Plugin::get_max_batch_size(const std::map& opti return static_cast(max_batch_size); } -uint32_t Plugin::get_optimal_batch_size(const std::map& options) const { - auto device_id = GetConfig(ov::device::id.name(), options).as(); - auto context = get_default_contexts().at(device_id)->get_impl(); +uint32_t Plugin::get_optimal_batch_size(const ov::AnyMap& options) const { + auto device_id = get_property(ov::device::id.name(), options).as(); + auto context = get_default_contexts().at(device_id); const auto& device_info = context->get_engine().get_device_info(); auto next_pow_of_2 = [] (float x) { return pow(2, ceil(std::log(x)/std::log(2))); @@ -952,9 +795,9 @@ uint32_t Plugin::get_optimal_batch_size(const std::map& GPU_DEBUG_INFO << "[OPTIMAL_BATCH_SIZE] ov::hint::model is not set: return 1" << std::endl; return static_cast(1); } - std::shared_ptr model; + std::shared_ptr model; try { - model = model_param->second.as>(); + model = model_param->second.as>(); } catch (...) { OPENVINO_THROW("[OPTIMAL_BATCH_SIZE] ov::hint::model should be std::shared_ptr type"); } @@ -986,15 +829,15 @@ uint32_t Plugin::get_optimal_batch_size(const std::map& << ", L3_cache_size is (MB): " << float(L3_cache_size) / 1024 / 1024 << std::endl; } auto config = m_configs_map.at(device_id); - auto networkCloned = clone_and_transform_model(CNNNetwork(model), config); - ov::MemBandwidthPressure memPressure = ov::MemBandwidthPressureTolerance(networkCloned.getFunction(), L3_cache_size); + auto cloned_model = clone_and_transform_model(model, config); + ov::MemBandwidthPressure memPressure = ov::MemBandwidthPressureTolerance(cloned_model, L3_cache_size); uint32_t batch = 1; if (memPressure.max_mem_tolerance != ov::MemBandwidthPressure::UNKNOWN) batch = std::max(1.0, 16 * closest_pow_of_2(memPressure.max_mem_tolerance)); - std::map options_for_max_batch; + ov::AnyMap options_for_max_batch; options_for_max_batch[ov::hint::model.name()] = model; - options_for_max_batch["GPU_THROUGHPUT_STREAMS"] = CONFIG_VALUE(GPU_THROUGHPUT_AUTO); - auto max_batch_size = GetMetric(ov::max_batch_size.name(), options_for_max_batch).as(); + options_for_max_batch[ov::num_streams.name()] = ov::streams::AUTO; + auto max_batch_size = get_metric(ov::max_batch_size.name(), options_for_max_batch).as(); uint32_t closest = closest_pow_of_2(max_batch_size); batch = std::min(closest, batch); batch = std::min(256u, batch); //batch 256 is a max @@ -1008,5 +851,5 @@ uint32_t Plugin::get_optimal_batch_size(const std::map& } // namespace intel_gpu } // namespace ov -static const Version version = { {2, 1}, CI_BUILD_NUMBER, "Intel GPU plugin" }; -IE_DEFINE_PLUGIN_CREATE_FUNCTION(ov::intel_gpu::Plugin, version) +static const ov::Version version = { CI_BUILD_NUMBER, "Intel GPU plugin" }; +OV_DEFINE_PLUGIN_CREATE_FUNCTION(ov::intel_gpu::Plugin, version) diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 5be3e518c3e08b..231ee37252a2a5 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -25,14 +25,10 @@ #endif -using namespace InferenceEngine; -using namespace InferenceEngine::details; - namespace ov { namespace intel_gpu { const cldnn::primitive_id ProgramBuilder::m_preProcessTag("_cldnn_input_preprocess"); -const cldnn::primitive_id ProgramBuilder::m_meanValuesTag("_cldnn_mean_values"); const cldnn::primitive_id ProgramBuilder::m_preCustomLayerTag("_cldnn_custom_preprocess"); const cldnn::primitive_id ProgramBuilder::m_postCustomLayerTag("_cldnn_custom_postprocess"); ProgramBuilder::factories_map_t ProgramBuilder::factories_map = {}; @@ -57,93 +53,15 @@ std::string layer_type_name_ID(const std::shared_ptr& op) { return layer_type_name_ID(op.get()); } -void ProgramBuilder::ChangeInputBatch(int batch) { - m_curBatch = batch; -} - -auto getParamName = [](const std::shared_ptr& param) -> std::string { - const auto& names = param->get_output_tensor(0).get_names(); - if (!names.empty()) - return *names.begin(); - else - return param->get_friendly_name(); -}; - -// detect the only supported dynamic shape case - -// exactly one dimension is dynamic in input params with defined min/max interval -bool ProgramBuilder::IsDynBatchModel(const std::shared_ptr& model, - std::map& shapes, - std::map>& batch_dim) { - for (const auto& param : model->get_parameters()) { - auto pname = getParamName(param); - batch_dim[pname] = { -1, -1 }; - if (param->get_output_partial_shape(0).rank().is_dynamic()) { - return false; - } - ov::PartialShape pshape = param->get_output_partial_shape(0); - bool only_batch_dynamic = pshape.size() && pshape[0].is_dynamic(); - for (size_t i = 1; i < pshape.size(); i++) { - if (pshape[i].is_dynamic()) { - // only support 0th dimension for legacy dynamic batch - return false; - } - } - if (only_batch_dynamic) { - int64_t max_b = pshape[0].get_max_length(); - if (max_b > 1) { - batch_dim[pname].first = 0; - batch_dim[pname].second = max_b; - pshape[0] = 1; - } else { - // unbounded dynamic shape should be handled with new dynamic shape path - return false; - } - } - shapes[pname] = pshape; - } - if (batch_dim.empty()) - return false; - - bool dyn_shape_batch_found = false; - // detect 1st dyn dim, mark it and continue - auto bitr = batch_dim.begin(); - dyn_shape_batch_found = (bitr->second.first == 0); - auto batch_val_1st = bitr->second.second; - bitr++; - for (; bitr != batch_dim.end(); bitr++) { - if (bitr->second.first == 0) { - if (bitr->second.second != batch_val_1st) { - dyn_shape_batch_found = false; - break; - } else { - dyn_shape_batch_found = true; - } - } else { - return false; - } - } - return dyn_shape_batch_found; -} - -ProgramBuilder::ProgramBuilder(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config, - bool createTopologyOnly, bool partialBuild, - InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs, - std::shared_ptr task_executor, bool innerProgram) - : m_curBatch(-1) - , m_config(config) +ProgramBuilder::ProgramBuilder(std::shared_ptr model, cldnn::engine& engine, const ExecutionConfig& config, + bool create_topology_only, bool partial_build, + std::shared_ptr task_executor, bool is_inner_program) + : m_config(config) , m_engine(engine) , queryMode(false) , m_task_executor(task_executor) { if (m_task_executor == nullptr) m_task_executor = cldnn::program::make_task_executor(m_config); - // Extract inputs/outputs info from CNNNetwork - auto networkInputs = (inputs != nullptr) ? *inputs : network.getInputsInfo(); - auto networkOutputs = (outputs != nullptr) ? *outputs : network.getOutputsInfo(); - - auto func = network.getFunction(); - if (!func) { - OPENVINO_THROW("Function pointer inside CNNNetwork is nullptr"); - } // locate global custom kernel config // and auto-load kernels from it @@ -175,186 +93,28 @@ ProgramBuilder::ProgramBuilder(InferenceEngine::CNNNetwork& network, cldnn::engi auto custom_layers_config = m_config.get_property(ov::intel_gpu::config_file); CustomLayer::LoadFromFile(custom_layers_config, m_custom_layers, custom_layers_config.empty()); - auto ops = func->get_ordered_ops(); + auto ops = model->get_ordered_ops(); - bool dyn_shape_batch_found = false; - std::map shapes; - std::map> batch_dim; - - dyn_shape_batch_found = IsDynBatchModel(func, shapes, batch_dim); - if (dyn_shape_batch_found) { - m_config.set_property(ov::intel_gpu::max_dynamic_batch(batch_dim.begin()->second.second)); - } - - int m_bv_sz = GetMaxBatchSizeForSingleProgram(); - m_max_batch = static_cast(m_config.get_property(ov::intel_gpu::max_dynamic_batch)); - - // Do not apply dynamic batch for inner program (only single batch is allowed) - if (!innerProgram && (dyn_shape_batch_found || m_max_batch > 1)) { - // compile log2 networks to serve dynamic batch requests - for (int b = m_bv_sz - 1; b >= 0; b--) { - inputLayouts.clear(); - outputDims.clear(); - primitive_ids.clear(); - blobMemCache.clear(); - - auto new_batch = 1U << static_cast(b); - ChangeInputBatch(new_batch); - - // clone the source model, find the batch dim - // and reshape the model to next batch size - auto new_func = func->clone(); - std::map, ov::PartialShape> new_shapes; - for (const auto& param : new_func->get_parameters()) { - ov::PartialShape pshape = param->get_output_partial_shape(0); - - auto pname = getParamName(param); - auto batch_idx = batch_dim[pname].first; - - if (batch_idx >= 0) { - auto pshape = shapes[pname]; - pshape[batch_idx] = new_batch; - new_shapes[param->output(0)] = pshape; - } - } - new_func->reshape(new_shapes); - { - auto deviceInfo = engine.get_device_info(); - TransformationsPipeline transformations(m_config, deviceInfo); - transformations.apply(new_func); - } - - // reshape network input/output maps accordingly - // for correct network compilation - for (auto& new_input : new_func->inputs()) { - auto iname = new_input.get_node()->get_friendly_name(); - auto it = networkInputs.find(iname); - if (it != networkInputs.end()) { - auto shape = new_input.get_shape(); - auto l = it->second->getTensorDesc().getLayout(); - it->second->getInputData()->reshape(shape, l); - } - } - - for (auto& new_output : new_func->outputs()) { - auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name(); - auto it = networkOutputs.find(iname); - if (it != networkOutputs.end()) { - auto shape = new_output.get_shape(); - auto l = it->second->getTensorDesc().getLayout(); - it->second->reshape(shape, l); - } - } - m_programs.insert(m_programs.begin(), BuildProgram(new_func->get_ordered_ops(), networkInputs, networkOutputs, - createTopologyOnly, partialBuild)); - } - { - // recompute maximal dynamic batch inputs/outputs for infer request - // and store them into internal maps - // same operations as above, but for maximum batch - auto new_func = func->clone(); - std::map, ov::PartialShape> new_shapes; - for (const auto& param : new_func->get_parameters()) { - ov::PartialShape pshape = param->get_output_partial_shape(0); - - auto pname = getParamName(param); - auto batch_idx = batch_dim[pname].first; - - if (batch_idx >= 0) { - auto pshape = shapes[pname]; - pshape[batch_idx] = m_max_batch; - new_shapes[param->output(0)] = pshape; - } - } - new_func->reshape(new_shapes); - - for (auto& new_input : new_func->inputs()) { - auto iname = new_input.get_node()->get_friendly_name(); - auto it = networkInputs.find(iname); - if (it != networkInputs.end()) { - auto shape = new_input.get_shape(); - auto l = it->second->getTensorDesc().getLayout(); - it->second->getInputData()->reshape(shape, l); - } - } - - for (auto& new_output : new_func->outputs()) { - auto iname = new_output.get_node_shared_ptr()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name(); - auto it = networkOutputs.find(iname); - if (it != networkOutputs.end()) { - auto shape = new_output.get_shape(); - auto l = it->second->getTensorDesc().getLayout(); - SizeVector old_shape = it->second->getTensorDesc().getDims(); - it->second->reshape(shape, l); - // detect changed output batch dimension - SizeVector new_shape = it->second->getTensorDesc().getDims(); - for (int64_t i = 0; i < static_cast(old_shape.size()); i++) { - if (old_shape[i] != new_shape[i]) { - m_output_batch_dim[iname] = i; - break; - } - } - } - } - m_networkInputs = networkInputs; - m_networkOutputs = networkOutputs; - m_input_batch_dim = batch_dim; - } - } else { - m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly, partialBuild, innerProgram)); - } + m_program = build(ops, create_topology_only, partial_build, is_inner_program); } -ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config, - InferenceEngine::InputsDataMap* inputs, InferenceEngine::OutputsDataMap* outputs) - : m_max_batch(1) - , m_curBatch(-1) - , m_config(config) +ProgramBuilder::ProgramBuilder(cldnn::engine& engine, const ExecutionConfig& config) + : m_config(config) , m_engine(engine) , queryMode(false) { m_task_executor = cldnn::program::make_task_executor(m_config); - if (inputs != nullptr) - m_networkInputs = *inputs; - if (outputs != nullptr) - m_networkOutputs = *outputs; -} - -int ProgramBuilder::GetMaxBatchSizeForSingleProgram() { - auto max_dynamic_batch = m_config.get_property(ov::intel_gpu::max_dynamic_batch); - if (max_dynamic_batch > 1) { - // calculate number of networks necessary based on binary log - unsigned int tmp = static_cast(max_dynamic_batch); - unsigned int mask = 1U << 31; - unsigned int ldigit = 31; - - while (!(tmp & mask)) { - mask >>= 1; - ldigit--; - } - - return ldigit + 1; - } - - return 0; } -std::shared_ptr ProgramBuilder::GetCompiledProgram(int program_id) { - if (program_id >= static_cast(m_programs.size())) - OPENVINO_THROW("Invalid program ID"); - - return m_programs[program_id]; +std::shared_ptr ProgramBuilder::get_compiled_program() const { + return m_program; } -void ProgramBuilder::PrepareBuild(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs) { +void ProgramBuilder::prepare_build() { m_topology.reset(new cldnn::topology()); - m_networkInputs = networkInputs; - m_networkOutputs = networkOutputs; } -void ProgramBuilder::CleanupBuild() { +void ProgramBuilder::cleanup_build() { m_topology.reset(); - m_networkInputs.clear(); - m_networkOutputs.clear(); #if defined(__unix__) && !defined(__ANDROID__) // NOTE: In linux, without malloc_trim, an amount of the memory used by compilation is not being returned to system thought they are freed. // (It is at least 500 MB when we perform parallel compilation) @@ -365,12 +125,9 @@ void ProgramBuilder::CleanupBuild() { #endif } -std::shared_ptr ProgramBuilder::BuildProgram(const std::vector>& ops, - InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs, - bool createTopologyOnly, bool partialBuild, bool innerProgram) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::BuildProgram"); - // std::cout << "BuildProgram " << createTopologyOnly << ", " << partialBuild << ", " << innerProgram << std::endl; +std::shared_ptr ProgramBuilder::build(const std::vector>& ops, + bool create_topology_only, bool partial_build, bool is_inner_program) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::build"); // In the case of inner program, allow_new_shape_infer flag is setted by outside of program. // So, do not check allow_new_shape_infer for inner program build for (const auto& op : ops) { @@ -380,22 +137,22 @@ std::shared_ptr ProgramBuilder::BuildProgram(const std::vector ProgramBuilder::BuildProgram(const std::vector& op) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::IsOpSupported"); +bool ProgramBuilder::is_op_supported(const std::shared_ptr& op) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "ProgramBuilder::is_op_supported"); cldnn::topology topology; try { // Query mode disables checks that input primitives are created, - // as IsOpSupported method is called for each operation separately + // as is_op_supported method is called for each operation separately // So we just ensure that inputs count is valid for given operation EnableQueryMode(); // Creating topology object for each operation is supposed to be more time-consuming than @@ -425,14 +182,14 @@ bool ProgramBuilder::IsOpSupported(const InferenceEngine::CNNNetwork& network, c // add any ugly macro/templates to apply single function to multiple cases. // 2. We also check parameters of each operation, which means we have more // reliable results of QueryNetwork call. - PrepareBuild(network.getInputsInfo(), network.getOutputsInfo()); + prepare_build(); allow_new_shape_infer = requires_new_shape_infer(*op); CreateSingleLayerPrimitive(topology, op); - CleanupBuild(); + cleanup_build(); DisableQueryMode(); } catch (std::exception&) { // Exception means that an operation or some of it's parameters are not supported - CleanupBuild(); + cleanup_build(); return false; } @@ -505,7 +262,7 @@ void ProgramBuilder::init_profile_info(const cldnn::primitive& prim) { perfMap[prim.id].first = prim.id; auto& perfEntry = perfMap[prim.id].second; perfEntry.layerType = prim.origin_op_type_name; - perfEntry.status = InferenceEngine::InferenceEngineProfileInfo::LayerStatus::EXECUTED; + perfEntry.status = ov::ProfilingInfo::Status::EXECUTED; perfEntry.cpu_uSec = perfEntry.realTime_uSec = 0; perfEntry.isCPU = false; perfEntry.parentPrimitive = prim.origin_op_name; diff --git a/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp b/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp index 61e079b027442f..fe9ff746f04283 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp @@ -2,67 +2,36 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "intel_gpu/plugin/remote_allocators.hpp" -#include "intel_gpu/plugin/remote_blob.hpp" - -using namespace InferenceEngine; -using namespace InferenceEngine::gpu; -using namespace InferenceEngine::details; +#include "intel_gpu/plugin/remote_tensor.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include namespace ov { namespace intel_gpu { -void RemoteAllocator::regLockedBlob(void* handle, const RemoteBlobImpl* blob) { - std::lock_guard locker(*this); - auto iter = m_lockedBlobs.find(handle); - if (iter == m_lockedBlobs.end()) { - m_lockedBlobs.emplace(handle, blob); - } -} - -void RemoteAllocator::unlock(void* handle) noexcept { - std::lock_guard locker(*this); - auto iter = m_lockedBlobs.find(handle); - if (iter != m_lockedBlobs.end()) { - iter->second->unlock(); - m_lockedBlobs.erase(iter); - } -} - -void* USMHostAllocator::lock(void* handle, InferenceEngine::LockOp) noexcept { - if (!_usm_host_blob) - return nullptr; - try { - return _usm_host_blob->get(); - } catch (...) { - return nullptr; - } -}; - -void USMHostAllocator::unlock(void* handle) noexcept {} - -void* USMHostAllocator::alloc(size_t size) noexcept { +void* USMHostAllocator::allocate(const size_t bytes, const size_t /* alignment */) noexcept { try { - auto td = TensorDesc(Precision::U8, SizeVector{size}, InferenceEngine::Layout::C); - ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER)}}; - _usm_host_blob = std::dynamic_pointer_cast(_context->CreateBlob(td, params)); - _usm_host_blob->allocate(); - if (!getBlobImpl(_usm_host_blob.get())->is_allocated()) { - return nullptr; + ov::AnyMap params = { ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::USM_HOST_BUFFER) }; + _usm_host_tensor = _context->create_tensor(ov::element::u8, {bytes}, params); + if (auto casted = std::dynamic_pointer_cast(_usm_host_tensor._ptr)) { + return casted->get_original_memory()->get_internal_params().mem; } - return _usm_host_blob->get(); - } catch (...) { + return nullptr; + } catch (std::exception&) { return nullptr; } } -bool USMHostAllocator::free(void* handle) noexcept { +bool USMHostAllocator::deallocate(void* /* handle */, const size_t /* bytes */, size_t /* alignment */) noexcept { try { - _usm_host_blob = nullptr; - } catch(...) { } + _usm_host_tensor = {nullptr, nullptr}; + } catch (std::exception&) { } return true; } +bool USMHostAllocator::is_equal(const USMHostAllocator& other) const { + return other._usm_host_tensor != nullptr && _usm_host_tensor != nullptr && other._usm_host_tensor._ptr == _usm_host_tensor._ptr; +} } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/remote_blob.cpp b/src/plugins/intel_gpu/src/plugin/remote_blob.cpp deleted file mode 100644 index ecc072a7e5bd9d..00000000000000 --- a/src/plugins/intel_gpu/src/plugin/remote_blob.cpp +++ /dev/null @@ -1,300 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/plugin/remote_blob.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" -#include "intel_gpu/plugin/plugin.hpp" -#include "intel_gpu/runtime/itt.hpp" - -using namespace InferenceEngine; -using namespace InferenceEngine::gpu; -using namespace InferenceEngine::details; - -namespace ov { -namespace intel_gpu { - -RemoteBlobImpl::RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, - cldnn::stream& stream, - const cldnn::layout& layout, - cldnn::shared_handle mem, - cldnn::shared_surface surf, - uint32_t plane, - BlobType mem_type) - : m_allocator(std::make_shared()) - , m_context(context) - , m_stream(stream) - , m_mem(mem) - , m_surf(surf) - , m_plane(plane) - , m_layout(layout) - , m_mem_type(mem_type) - , m_hash(0) - , m_memory_object(nullptr) - , lockedCounter(0) - , lockedHolder(nullptr) - , _handle(nullptr) { - if (supports_caching()) { - m_hash = cldnn::hash_combine(0, m_mem); - m_hash = cldnn::hash_combine(m_hash, m_surf); - m_hash = cldnn::hash_combine(m_hash, plane); - m_hash = cldnn::hash_combine(m_hash, static_cast::type>(layout.format)); - m_hash = cldnn::hash_combine(m_hash, static_cast::type>(layout.data_type)); - for (auto& d : layout.get_shape()) { - m_hash = cldnn::hash_combine(m_hash, d); - } - } -} - -AnyMap RemoteBlobImpl::getParams() const { - OPENVINO_ASSERT(is_allocated(), "[GPU] Can't get RemoteBlob params as blob wasn't allocated properly"); - auto params = m_memory_object->get_internal_params(); - - switch (m_mem_type) { - case BlobType::BT_BUF_INTERNAL: - case BlobType::BT_BUF_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; - case BlobType::BT_USM_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_USER_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; - case BlobType::BT_USM_HOST_INTERNAL: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_HOST_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; - case BlobType::BT_USM_DEVICE_INTERNAL: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(USM_DEVICE_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; -#ifdef _WIN32 - case BlobType::BT_DX_BUF_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(VA_DEVICE), params.user_device }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface } - }; -#endif - case BlobType::BT_IMG_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem } - }; - case BlobType::BT_SURF_SHARED: - return{ - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) }, - { GPU_PARAM_KEY(OCL_CONTEXT), params.context }, - { GPU_PARAM_KEY(VA_DEVICE), params.user_device }, - { GPU_PARAM_KEY(MEM_HANDLE), params.mem }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), params.surface }, - { GPU_PARAM_KEY(VA_PLANE), params.plane } - }; - default: - OPENVINO_THROW("Unsupported shared object type ", static_cast(m_mem_type)); - } -} - -void RemoteBlobImpl::setShape(const SizeVector& dims) { - if (ov::shape_size(dims) > m_memory_object->count()) { - OPENVINO_ASSERT(!is_shared(), "Cannot call setShape for Blobs created on top of preallocated memory if shape was increased."); - if (!deallocate()) { - OPENVINO_THROW("Cannot deallocate blob while an attempt to enlarge blob area in setShape."); - } - - m_layout.set_partial_shape(ov::PartialShape{dims}); - - allocate(); - } -} - -bool RemoteBlobImpl::deallocate() noexcept { - m_memory_object.reset(); - return m_memory_object == nullptr; -} - -bool RemoteBlobImpl::is_allocated() const noexcept { - return m_memory_object != nullptr; -} - -bool RemoteBlobImpl::is_locked() const noexcept { - return lockedHolder != nullptr; -} - -void RemoteBlobImpl::allocate() { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteBlobImpl::Allocate"); - - auto context = get_context_impl(m_context); - auto enable_caching = supports_caching(); - - if (enable_caching) { - m_memory_object = context->try_get_cached_memory(m_hash); - if (m_memory_object) - return; - } - - auto& engine = context->get_engine(); - - switch (m_mem_type) { - case BlobType::BT_BUF_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem); - break; - } - case BlobType::BT_USM_HOST_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host); - break; - } - case BlobType::BT_USM_DEVICE_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_device); - break; - } - case BlobType::BT_BUF_SHARED: { - m_memory_object = engine.share_buffer(m_layout, m_mem); - break; - } - case BlobType::BT_USM_SHARED: { - m_memory_object = engine.share_usm(m_layout, m_mem); - break; - } -#ifdef _WIN32 - case BlobType::BT_SURF_SHARED: { - m_memory_object = engine.share_surface(m_layout, m_mem, m_plane); - break; - } - case BlobType::BT_DX_BUF_SHARED: { - m_memory_object = engine.share_dx_buffer(m_layout, m_mem); - break; - } -#else - case BlobType::BT_SURF_SHARED: { - m_memory_object = engine.share_surface(m_layout, m_surf, m_plane); - break; - } -#endif - case BlobType::BT_IMG_SHARED: { - m_memory_object = engine.share_image(m_layout, m_mem); - break; - } - default: - m_memory_object.reset(); - } - - if (enable_caching) - context->add_to_cache(m_hash, m_memory_object); -} - -const std::shared_ptr& RemoteBlobImpl::getAllocator() const noexcept { - return m_allocator; -}; - -std::string RemoteBlobImpl::getDeviceName() const noexcept { - return m_context->getDeviceName(); -}; - -std::shared_ptr RemoteBlobImpl::getContext() const noexcept { - return m_context; -} - -void RemoteBlobImpl::reinterpret(const cldnn::layout& new_layout) { - OPENVINO_ASSERT(m_memory_object->size() >= new_layout.bytes_count(), - "[GPU] Can't reinterpret blob to the size bigger than allocated memory buffer", - " (", m_memory_object->size(), " vs ", new_layout.bytes_count(), ")"); - m_layout = new_layout; -} - -void RemoteBlobImpl::lock() const { - if (!is_allocated()) { - OPENVINO_THROW("[GPU] Remote blob can't be locked as it's not allocated"); - } - - std::lock_guard locker(lockedMutex); - if (lockedCounter == 0) { - lockedHolder = std::unique_ptr>(new cldnn::mem_lock(m_memory_object, m_stream)); - auto ptr = lockedHolder->data(); - _handle = reinterpret_cast(ptr); - auto casted_allocator = std::dynamic_pointer_cast(m_allocator); - OPENVINO_ASSERT(casted_allocator, "[GPU] Invalid remote allocator type"); - casted_allocator->regLockedBlob(_handle, this); - } - lockedCounter++; -} - -void RemoteBlobImpl::unlock() const { - std::lock_guard locker(lockedMutex); - lockedCounter--; - if (lockedCounter == 0) - lockedHolder.reset(); -} - -LockedMemory RemoteBlobImpl::buffer() noexcept { - try { - lock(); - return LockedMemory(m_allocator.get(), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -LockedMemory RemoteBlobImpl::cbuffer() const noexcept { - try { - lock(); - return LockedMemory(m_allocator.get(), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -LockedMemory RemoteBlobImpl::rwmap() noexcept { - try { - lock(); - return LockedMemory(m_allocator.get(), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -LockedMemory RemoteBlobImpl::rmap() const noexcept { - try { - lock(); - return LockedMemory(m_allocator.get(), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -LockedMemory RemoteBlobImpl::wmap() noexcept { - try { - lock(); - return LockedMemory(m_allocator.get(), _handle, 0); - } catch (...) { - return LockedMemory(nullptr, nullptr, 0); - } -} - -bool RemoteBlobImpl::is_shared() const { - return m_mem_type == BlobType::BT_BUF_SHARED || - m_mem_type == BlobType::BT_USM_SHARED || - m_mem_type == BlobType::BT_IMG_SHARED || - m_mem_type == BlobType::BT_SURF_SHARED || - m_mem_type == BlobType::BT_DX_BUF_SHARED; -} - -bool RemoteBlobImpl::supports_caching() const { - return is_shared(); -} - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/remote_context.cpp b/src/plugins/intel_gpu/src/plugin/remote_context.cpp index be6e2ab1bf7d2a..1b932226881db3 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_context.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_context.cpp @@ -2,118 +2,186 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "openvino/runtime/intel_gpu/remote_properties.hpp" +#include "openvino/runtime/make_tensor.hpp" #include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/plugin/remote_blob.hpp" +#include "intel_gpu/plugin/remote_tensor.hpp" #include "intel_gpu/plugin/remote_allocators.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/runtime/device_query.hpp" - -using namespace InferenceEngine; -using namespace InferenceEngine::gpu; -using namespace InferenceEngine::details; +#include namespace ov { namespace intel_gpu { -RemoteContextImpl::RemoteContextImpl(std::string device_name, std::vector devices) - : m_va_display(nullptr) - , m_external_queue(nullptr) - , m_type(ContextType::OCL) - , m_device_name(std::move(device_name)) - , m_memory_cache(cache_capacity) { +namespace { + +template +Type extract_object(const ov::AnyMap& params, const ov::Property& p) { + auto itrHandle = params.find(p.name()); + OPENVINO_ASSERT(itrHandle != params.end(), "[GPU] No parameter ", p.name(), " found in parameters map"); + ov::Any res = itrHandle->second; + return res.as(); +} + +} // namespace + +RemoteContextImpl::RemoteContextImpl(const std::string& device_name, std::vector devices) : m_device_name(device_name) { OPENVINO_ASSERT(devices.size() == 1, "[GPU] Currently context can be created for single device only"); - // TODO: Parameterize this based on plugin config and compilation options - auto engine_type = cldnn::engine_types::ocl; - auto runtime_type = cldnn::runtime_types::ocl; + const auto engine_type = cldnn::engine_types::ocl; + const auto runtime_type = cldnn::runtime_types::ocl; m_engine = cldnn::engine::create(engine_type, runtime_type, devices.front()); GPU_DEBUG_LOG << "Initialize RemoteContext for " << m_device_name << " (" << m_engine->get_device_info().dev_name << ")" << std::endl; + init_properties(); } -RemoteContextImpl::RemoteContextImpl(const std::vector& known_contexts, const AnyMap& params) - : m_va_display(nullptr) - , m_external_queue(nullptr) - , m_type(ContextType::OCL) - , m_memory_cache(cache_capacity) { - gpu_handle_param _context_id = nullptr; - gpu_handle_param _va_device = nullptr; +RemoteContextImpl::RemoteContextImpl(const std::map& known_contexts, const AnyMap& params) { + gpu_handle_param context_id = nullptr; int ctx_device_id = 0; int target_tile_id = -1; if (params.size()) { - // parameter map is non-empty - std::string contextTypeStr = extract_object(params, GPU_PARAM_KEY(CONTEXT_TYPE)); + auto ctx_type = extract_object(params, ov::intel_gpu::context_type); - if (GPU_PARAM_VALUE(OCL) == contextTypeStr) { - _context_id = extract_object(params, GPU_PARAM_KEY(OCL_CONTEXT)); + if (ctx_type == ov::intel_gpu::ContextType::OCL) { + context_id = extract_object(params, ov::intel_gpu::ocl_context); + OPENVINO_ASSERT(context_id != nullptr, "[GPU] Can't create shared OCL context as user handle is nullptr! Params:\n", params); - if (params.find(GPU_PARAM_KEY(OCL_QUEUE)) != params.end()) - m_external_queue = extract_object(params, GPU_PARAM_KEY(OCL_QUEUE)); + if (params.find(ov::intel_gpu::ocl_queue.name()) != params.end()) { + m_external_queue = extract_object(params, ov::intel_gpu::ocl_queue); + } - if (params.find(GPU_PARAM_KEY(OCL_CONTEXT_DEVICE_ID)) != params.end()) - ctx_device_id = extract_object(params, GPU_PARAM_KEY(OCL_CONTEXT_DEVICE_ID)); - } else if (GPU_PARAM_VALUE(VA_SHARED) == contextTypeStr) { - m_va_display = _va_device = extract_object(params, GPU_PARAM_KEY(VA_DEVICE)); - m_type = ContextType::DEV_SHARED; + if (params.find(ov::intel_gpu::ocl_context_device_id.name()) != params.end()) + ctx_device_id = extract_object(params, ov::intel_gpu::ocl_context_device_id); + } else if (ctx_type == ov::intel_gpu::ContextType::VA_SHARED) { + m_va_display = extract_object(params, ov::intel_gpu::va_device); + OPENVINO_ASSERT(m_va_display != nullptr, "[GPU] Can't create shared VA/DX context as user handle is nullptr! Params:\n", params); + m_type = ContextType::VA_SHARED; } else { - OPENVINO_THROW("Invalid execution context type", contextTypeStr); + OPENVINO_THROW("Invalid execution context type", ctx_type); } - auto tile_id_itr = params.find(GPU_PARAM_KEY(TILE_ID)); - if (tile_id_itr != params.end()) { - target_tile_id = tile_id_itr->second.as(); + if (params.find(ov::intel_gpu::tile_id.name()) != params.end()) { + target_tile_id = extract_object(params, ov::intel_gpu::tile_id); } } - // TODO: Parameterize this based on plugin config and compilation options - auto engine_type = cldnn::engine_types::ocl; - auto runtime_type = cldnn::runtime_types::ocl; + const auto engine_type = cldnn::engine_types::ocl; + const auto runtime_type = cldnn::runtime_types::ocl; + // Use actual runtime and engine types - cldnn::device_query device_query(engine_type, runtime_type, _context_id, _va_device, ctx_device_id, target_tile_id); + cldnn::device_query device_query(engine_type, runtime_type, context_id, m_va_display, ctx_device_id, target_tile_id); auto device_map = device_query.get_available_devices(); - OPENVINO_ASSERT(device_map.size() == 1, "[GPU] Only one device expected in case of context sharing"); + OPENVINO_ASSERT(device_map.size() == 1, "[GPU] Exactly one device expected in case of context sharing, but ", device_map.size(), " found"); m_engine = cldnn::engine::create(engine_type, runtime_type, device_map.begin()->second); m_device_name = get_device_name(known_contexts, m_engine->get_device()); GPU_DEBUG_LOG << "Initialize RemoteContext for " << m_device_name << " (" << m_engine->get_device_info().dev_name << ")" << std::endl; + + init_properties(); } -AnyMap RemoteContextImpl::get_params() const { - AnyMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } }; +void RemoteContextImpl::init_properties() { + properties = { ov::intel_gpu::ocl_context(m_engine->get_user_context()) }; switch (m_type) { - case OCL: - ret[GPU_PARAM_KEY(CONTEXT_TYPE)] = GPU_PARAM_VALUE(OCL); - ret[GPU_PARAM_KEY(OCL_QUEUE)] = static_cast(m_external_queue); + case ContextType::OCL: + properties.insert(ov::intel_gpu::context_type(ov::intel_gpu::ContextType::OCL)); + properties.insert(ov::intel_gpu::ocl_queue(m_external_queue)); break; - case DEV_SHARED: - ret[GPU_PARAM_KEY(CONTEXT_TYPE)] = GPU_PARAM_VALUE(VA_SHARED); - ret[GPU_PARAM_KEY(VA_DEVICE)] = m_va_display; + case ContextType::VA_SHARED: + properties.insert(ov::intel_gpu::context_type(ov::intel_gpu::ContextType::VA_SHARED)); + properties.insert(ov::intel_gpu::va_device(m_va_display)); break; default: - OPENVINO_THROW("Unsupported shared context type ", m_type); + OPENVINO_THROW("[GPU] Unsupported shared context type ", m_type); } +} - return ret; +const ov::AnyMap& RemoteContextImpl::get_property() const { + return properties; +} + +std::shared_ptr RemoteContextImpl::get_this_shared_ptr() { + return std::static_pointer_cast(shared_from_this()); +} + +ov::SoPtr RemoteContextImpl::create_host_tensor(const ov::element::Type type, const ov::Shape& shape) { + if (m_engine->use_unified_shared_memory()) { + USMHostAllocator allocator(get_this_shared_ptr()); + return { ov::make_tensor(type, shape, allocator), nullptr }; + } else { + return { ov::make_tensor(type, shape), nullptr }; + } +} + +ov::SoPtr RemoteContextImpl::create_tensor(const ov::element::Type& type, const ov::Shape& shape, const ov::AnyMap& params) { + if (params.empty()) { + // user wants plugin to allocate tensor by itself and return handle + return { create_buffer(type, shape), nullptr }; + } else { + // user will supply shared object handle + auto mem_type = extract_object(params, ov::intel_gpu::shared_mem_type); + + bool is_usm = mem_type == ov::intel_gpu::SharedMemType::USM_HOST_BUFFER || + mem_type == ov::intel_gpu::SharedMemType::USM_DEVICE_BUFFER || + mem_type == ov::intel_gpu::SharedMemType::USM_USER_BUFFER; + + OPENVINO_ASSERT(!is_usm || m_engine->use_unified_shared_memory(), + "[GPU] Can't create USM tensor as USM is not supported (or manually disabled) on current device"); + + if (ov::intel_gpu::SharedMemType::VA_SURFACE == mem_type) { + check_if_shared(); + return { reuse_surface(type, shape, params), nullptr }; + } else if (ov::intel_gpu::SharedMemType::USM_HOST_BUFFER == mem_type) { + return { create_usm(type, shape, TensorType::BT_USM_HOST_INTERNAL), nullptr }; + } else if (ov::intel_gpu::SharedMemType::USM_DEVICE_BUFFER == mem_type) { + return { create_usm(type, shape, TensorType::BT_USM_DEVICE_INTERNAL), nullptr }; + } else { + TensorType tensor_type; + cldnn::shared_handle mem = nullptr; + + if (ov::intel_gpu::SharedMemType::OCL_BUFFER == mem_type) { + tensor_type = TensorType::BT_BUF_SHARED; + mem = extract_object(params, ov::intel_gpu::mem_handle); + } else if (ov::intel_gpu::SharedMemType::USM_USER_BUFFER == mem_type) { + tensor_type = TensorType::BT_USM_SHARED; + mem = extract_object(params, ov::intel_gpu::mem_handle); + } else if (ov::intel_gpu::SharedMemType::OCL_IMAGE2D == mem_type) { + tensor_type = TensorType::BT_IMG_SHARED; + mem = extract_object(params, ov::intel_gpu::mem_handle); +#ifdef _WIN32 + } else if (ov::intel_gpu::SharedMemType::DX_BUFFER == mem_type) { + tensor_type = TensorType::BT_DX_BUF_SHARED; + mem = extract_object(params, ov::intel_gpu::dev_object_handle); + check_if_shared(); +#endif + } else { + OPENVINO_THROW("[GPU] Unsupported shared object type ", mem_type); + } + + return { reuse_memory(type, shape, mem, tensor_type), nullptr }; + } + } } // For external contexts we try to match underlying handles with default contexts created by plugin to find device name -std::string RemoteContextImpl::get_device_name(const std::vector& known_contexts, - const cldnn::device::ptr current_device) { +std::string RemoteContextImpl::get_device_name(const std::map& known_contexts, + const cldnn::device::ptr current_device) const { std::string device_name = "GPU"; for (auto& c : known_contexts) { - if (c->get_engine().get_device()->is_same(current_device)) { - device_name = c->get_device_name(); + if (c.second->get_engine().get_device()->is_same(current_device)) { + device_name = c.second->get_device_name(); break; } } return device_name; } -std::string RemoteContextImpl::get_device_name() const noexcept { +const std::string& RemoteContextImpl::get_device_name() const { return m_device_name; } @@ -130,161 +198,35 @@ void RemoteContextImpl::add_to_cache(size_t hash, cldnn::memory::ptr memory) { m_memory_cache.add(hash, memory); } -InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::reuse_surface(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc, - const InferenceEngine::ParamMap& params) { - using namespace InferenceEngine; - auto& stream = m_engine->get_service_stream(); - uint32_t plane = extract_object(params, GPU_PARAM_KEY(VA_PLANE)); -#ifdef _WIN32 - cldnn::shared_handle surf = extract_object(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); -#else - cldnn::shared_surface surf = extract_object(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); -#endif - - cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()), - ImageFormatFromLayout(desc.getLayout()), - tensor_from_dims(desc.getDims())); +std::shared_ptr RemoteContextImpl::reuse_surface(const ov::element::Type type, const ov::Shape& shape, const ov::AnyMap& params) { + uint32_t plane = extract_object(params, ov::intel_gpu::va_plane); #ifdef _WIN32 - auto blob = std::make_shared(public_context, stream, - desc, layout, surf, 0, plane, - BlobType::BT_SURF_SHARED); + cldnn::shared_handle surf = extract_object(params, ov::intel_gpu::dev_object_handle); + return std::make_shared(get_this_shared_ptr(), shape, type, TensorType::BT_SURF_SHARED, surf, 0, plane); #else - auto blob = std::make_shared(public_context, stream, - desc, layout, nullptr, surf, plane, - BlobType::BT_SURF_SHARED); -#endif - - return blob; -} - -InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::reuse_memory(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc, - cldnn::shared_handle mem, - BlobType blob_type) { - auto& stream = m_engine->get_service_stream(); - - cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()), - FormatFromLayout(desc.getLayout()), - tensor_from_dims(desc.getDims())); - - switch (blob_type) { - case BlobType::BT_BUF_SHARED: { - return std::make_shared(public_context, stream, desc, layout, mem, 0, 0, blob_type); - } - case BlobType::BT_USM_SHARED: { - return std::make_shared(public_context, stream, desc, layout, mem, 0, 0, blob_type); - } - case BlobType::BT_IMG_SHARED: { - layout.format = ImageFormatFromLayout(desc.getLayout()); - return std::make_shared(public_context, stream, desc, layout, mem, 0, 0, blob_type); - } -#ifdef _WIN32 - case BlobType::BT_DX_BUF_SHARED: { - return std::make_shared(public_context, stream, desc, layout, mem, 0, 0, blob_type); - } + cldnn::shared_surface surf = extract_object(params, ov::intel_gpu::dev_object_handle); + return std::make_shared(get_this_shared_ptr(), shape, type, TensorType::BT_SURF_SHARED, nullptr, surf, plane); #endif - default: - break; - } - - return nullptr; -} - -InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_buffer(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc) { - cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()), - FormatFromLayout(desc.getLayout()), - tensor_from_dims(desc.getDims())); - auto& stream = m_engine->get_service_stream(); - return std::make_shared(public_context, - stream, - desc, - layout, - nullptr, 0, 0, - BlobType::BT_BUF_INTERNAL); } -InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_usm(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc, - BlobType alloc_type) { - cldnn::layout layout(DataTypeFromPrecision(desc.getPrecision()), - FormatFromLayout(desc.getLayout()), - tensor_from_dims(desc.getDims())); - auto& stream = m_engine->get_service_stream(); - - return std::make_shared(public_context, - stream, - desc, - layout, - nullptr, 0, 0, - alloc_type); +std::shared_ptr RemoteContextImpl::reuse_memory(const ov::element::Type type, + const ov::Shape& shape, + cldnn::shared_handle mem, + TensorType tensor_type) { + return std::make_shared(get_this_shared_ptr(), shape, type, tensor_type, mem); } -void RemoteContextImpl::check_if_shared() { - OPENVINO_ASSERT(m_type == RemoteContextImpl::ContextType::DEV_SHARED, "[GPU] Shared context is required to to share this type of memory"); +std::shared_ptr RemoteContextImpl::create_buffer(const ov::element::Type type, const ov::Shape& shape) { + return std::make_shared(get_this_shared_ptr(), shape, type, TensorType::BT_BUF_INTERNAL); } -InferenceEngine::MemoryBlob::Ptr RemoteContextImpl::create_host_blob(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc) { - if (m_engine->use_unified_shared_memory()) - return std::dynamic_pointer_cast(make_blob_with_precision(desc, std::make_shared(public_context))); - else - return std::dynamic_pointer_cast(make_blob_with_precision(desc)); +std::shared_ptr RemoteContextImpl::create_usm(const ov::element::Type type, const ov::Shape& shape, TensorType alloc_type) { + return std::make_shared(get_this_shared_ptr(), shape, type, alloc_type); } -InferenceEngine::RemoteBlob::Ptr RemoteContextImpl::create_blob(InferenceEngine::gpu::ClContext::Ptr public_context, - const InferenceEngine::TensorDesc& desc, - const InferenceEngine::ParamMap& params) { - using namespace InferenceEngine; - if (params.empty()) { - // user wants plugin to allocate blob by itself and return handle - return create_buffer(public_context, desc); - } else { - // user will supply shared object handle - std::string mem_type = extract_object(params, GPU_PARAM_KEY(SHARED_MEM_TYPE)); - - bool is_usm = mem_type == GPU_PARAM_VALUE(USM_HOST_BUFFER) || - mem_type == GPU_PARAM_VALUE(USM_DEVICE_BUFFER) || - mem_type == GPU_PARAM_VALUE(USM_USER_BUFFER); - - OPENVINO_ASSERT(!is_usm || m_engine->use_unified_shared_memory(), - "[GPU] Can't create USM tensor as USM is not supported (or manually disabled) on current device"); - - if (GPU_PARAM_VALUE(VA_SURFACE) == mem_type) { - check_if_shared(); - return reuse_surface(public_context, desc, params); - } else if (GPU_PARAM_VALUE(USM_HOST_BUFFER) == mem_type) { - return create_usm(public_context, desc, BlobType::BT_USM_HOST_INTERNAL); - } else if (GPU_PARAM_VALUE(USM_DEVICE_BUFFER) == mem_type) { - return create_usm(public_context, desc, BlobType::BT_USM_DEVICE_INTERNAL); - } else { - BlobType blob_type; - cldnn::shared_handle mem = nullptr; - - if (GPU_PARAM_VALUE(OCL_BUFFER) == mem_type) { - blob_type = BlobType::BT_BUF_SHARED; - mem = extract_object(params, GPU_PARAM_KEY(MEM_HANDLE)); - } else if (GPU_PARAM_VALUE(USM_USER_BUFFER) == mem_type) { - blob_type = BlobType::BT_USM_SHARED; - mem = extract_object(params, GPU_PARAM_KEY(MEM_HANDLE)); - } else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == mem_type) { - blob_type = BlobType::BT_IMG_SHARED; - mem = extract_object(params, GPU_PARAM_KEY(MEM_HANDLE)); -#ifdef _WIN32 - } else if (GPU_PARAM_VALUE(DX_BUFFER) == mem_type) { - blob_type = BlobType::BT_DX_BUF_SHARED; - mem = extract_object(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); - check_if_shared(); -#endif - } else { - OPENVINO_ASSERT(false, "[GPU] Unsupported shared object type ", mem_type); - } - - return reuse_memory(public_context, desc, mem, blob_type); - } - } +void RemoteContextImpl::check_if_shared() const { + OPENVINO_ASSERT(m_type == ContextType::VA_SHARED, "[GPU] Shared context is required to to share this type of memory"); } } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp new file mode 100644 index 00000000000000..cd164940027be7 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp @@ -0,0 +1,272 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/remote_tensor.hpp" +#include "intel_gpu/plugin/remote_allocators.hpp" +#include "intel_gpu/plugin/plugin.hpp" +#include "intel_gpu/runtime/itt.hpp" + +#include + +namespace ov { +namespace intel_gpu { + +RemoteTensorImpl::RemoteTensorImpl(RemoteContextImpl::Ptr context, + const ov::Shape& shape, + const ov::element::Type& element_type, + TensorType mem_type, + cldnn::shared_handle mem, + cldnn::shared_surface surf, + uint32_t plane) + : m_context(context) + , m_element_type(element_type) + , m_shape(shape) + , m_layout(make_layout(element_type, shape)) + , m_mem_type(mem_type) + , m_mem(mem) + , m_surf(surf) + , m_plane(plane) { + if (supports_caching()) { + m_hash = cldnn::hash_combine(0, m_mem); + m_hash = cldnn::hash_combine(m_hash, m_surf); + m_hash = cldnn::hash_combine(m_hash, plane); + m_hash = cldnn::hash_combine(m_hash, m_shape.size()); + m_hash = cldnn::hash_combine(m_hash, element_type.hash()); + for (const auto& d : m_shape) { + m_hash = cldnn::hash_combine(m_hash, d); + } + } + + update_strides(); + allocate(); + init_properties(); +} + +RemoteTensorImpl::~RemoteTensorImpl() { + deallocate(); +} + +const ov::element::Type& RemoteTensorImpl::get_element_type() const { + return m_element_type; +} + +const ov::Shape& RemoteTensorImpl::get_shape() const { + return m_shape; +} + +void RemoteTensorImpl::update_strides() { + if (m_element_type.bitwidth() < 8) + return; + auto& shape = get_shape(); + m_strides.clear(); + if (!shape.empty()) { + m_strides.resize(shape.size()); + m_strides.back() = m_element_type.size(); + std::copy(shape.rbegin(), shape.rend() - 1, m_strides.rbegin() + 1); + std::partial_sum(m_strides.rbegin(), m_strides.rend(), m_strides.rbegin(), std::multiplies()); + } +} + +const ov::Strides& RemoteTensorImpl::get_strides() const { + return m_strides; +} + +const AnyMap& RemoteTensorImpl::get_properties() const { + return m_properties; +} + + void RemoteTensorImpl::set_shape(ov::Shape shape) { + m_layout.set_partial_shape(ov::PartialShape{shape}); + m_shape = shape; + + if (ov::shape_size(shape) > m_memory_object->count()) { + OPENVINO_ASSERT(!is_shared(), "Cannot call setShape for Tensor created on top of preallocated memory if shape was increased."); + if (!deallocate()) { + OPENVINO_THROW("Cannot deallocate tensor while an attempt to enlarge tensor area in setShape."); + } + + allocate(); + } +} + +bool RemoteTensorImpl::deallocate() noexcept { + m_memory_object.reset(); + return m_memory_object == nullptr; +} + +bool RemoteTensorImpl::is_allocated() const noexcept { + return m_memory_object != nullptr; +} + +void RemoteTensorImpl::allocate() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteTensorImpl::Allocate"); + + auto context = std::dynamic_pointer_cast(m_context); + auto enable_caching = supports_caching(); + + if (enable_caching) { + m_memory_object = context->try_get_cached_memory(m_hash); + if (m_memory_object) + return; + } + + auto& engine = context->get_engine(); + + switch (m_mem_type) { + case TensorType::BT_BUF_INTERNAL: { + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem); + break; + } + case TensorType::BT_USM_HOST_INTERNAL: { + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host); + break; + } + case TensorType::BT_USM_DEVICE_INTERNAL: { + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_device); + break; + } + case TensorType::BT_BUF_SHARED: { + m_memory_object = engine.share_buffer(m_layout, m_mem); + break; + } + case TensorType::BT_USM_SHARED: { + m_memory_object = engine.share_usm(m_layout, m_mem); + break; + } +#ifdef _WIN32 + case TensorType::BT_SURF_SHARED: { + m_layout.format = cldnn::format::nv12; // Other formats are not supported + m_memory_object = engine.share_surface(m_layout, m_mem, m_plane); + break; + } + case TensorType::BT_DX_BUF_SHARED: { + m_memory_object = engine.share_dx_buffer(m_layout, m_mem); + break; + } +#else + case TensorType::BT_SURF_SHARED: { + m_layout.format = cldnn::format::nv12; // Other formats are not supported + m_memory_object = engine.share_surface(m_layout, m_surf, m_plane); + break; + } +#endif + case TensorType::BT_IMG_SHARED: { + m_layout.format = cldnn::format::nv12; // Other formats are not supported + m_memory_object = engine.share_image(m_layout, m_mem); + break; + } + default: + m_memory_object.reset(); + } + + if (enable_caching) + context->add_to_cache(m_hash, m_memory_object); +} + +const std::string& RemoteTensorImpl::get_device_name() const { + return m_context->get_device_name(); +} + +bool RemoteTensorImpl::is_shared() const { + return m_mem_type == TensorType::BT_BUF_SHARED || + m_mem_type == TensorType::BT_USM_SHARED || + m_mem_type == TensorType::BT_IMG_SHARED || + m_mem_type == TensorType::BT_SURF_SHARED || + m_mem_type == TensorType::BT_DX_BUF_SHARED; +} + +bool RemoteTensorImpl::supports_caching() const { + return is_shared(); +} + +bool RemoteTensorImpl::is_surface() const noexcept { + return m_mem_type == TensorType::BT_SURF_SHARED || + m_mem_type == TensorType::BT_IMG_SHARED || + m_mem_type == TensorType::BT_DX_BUF_SHARED; +} + +cldnn::memory::ptr RemoteTensorImpl::get_memory() const { + auto engine = m_memory_object->get_engine(); + return engine->reinterpret_buffer(*m_memory_object, m_layout); +} + +cldnn::memory::ptr RemoteTensorImpl::get_original_memory() const { + return m_memory_object; +} + +std::shared_ptr RemoteTensorImpl::get_context() const { + return m_context; +} + +void RemoteTensorImpl::init_properties() { + OPENVINO_ASSERT(is_allocated(), "[GPU] Can't initialize RemoteTensorImpl parameters as memory was not allocated"); + auto params = m_memory_object->get_internal_params(); + + switch (m_mem_type) { + case TensorType::BT_BUF_INTERNAL: + case TensorType::BT_BUF_SHARED: + m_properties = { + ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::OCL_BUFFER), + ov::intel_gpu::ocl_context(params.context), + ov::intel_gpu::mem_handle(params.mem), + }; + break; + case TensorType::BT_USM_SHARED: + m_properties = { + ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::USM_USER_BUFFER), + ov::intel_gpu::ocl_context(params.context), + ov::intel_gpu::mem_handle(params.mem), + }; + break; + case TensorType::BT_USM_HOST_INTERNAL: + m_properties = { + ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::USM_HOST_BUFFER), + ov::intel_gpu::ocl_context(params.context), + ov::intel_gpu::mem_handle(params.mem), + }; + break; + case TensorType::BT_USM_DEVICE_INTERNAL: + m_properties = { + ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::USM_DEVICE_BUFFER), + ov::intel_gpu::ocl_context(params.context), + ov::intel_gpu::mem_handle(params.mem), + }; + break; + +#ifdef _WIN32 + case TensorType::BT_DX_BUF_SHARED: + m_properties = { + ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::DX_BUFFER), + ov::intel_gpu::ocl_context(params.context), + ov::intel_gpu::va_device(params.user_device), + ov::intel_gpu::mem_handle(params.mem), + ov::intel_gpu::dev_object_handle(params.surface), + }; + break; +#endif + case TensorType::BT_IMG_SHARED: + m_properties = { + ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::OCL_IMAGE2D), + ov::intel_gpu::ocl_context(params.context), + ov::intel_gpu::mem_handle(params.mem), + }; + break; + case TensorType::BT_SURF_SHARED: + m_properties = { + ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::VA_SURFACE), + ov::intel_gpu::ocl_context(params.context), + ov::intel_gpu::va_device(params.user_device), + ov::intel_gpu::mem_handle(params.mem), + ov::intel_gpu::dev_object_handle(params.surface), + ov::intel_gpu::va_plane(params.plane), + }; + break; + default: + OPENVINO_THROW("[GPU] Unsupported shared object type ", static_cast(m_mem_type)); + } +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp new file mode 100644 index 00000000000000..2e6699333070f9 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -0,0 +1,815 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/runtime/make_tensor.hpp" +#include "openvino/core/preprocess/input_tensor_info.hpp" +#include "openvino/core/parallel.hpp" +#include "openvino/op/util/op_types.hpp" +#include "transformations/utils/utils.hpp" + +#include "intel_gpu/plugin/sync_infer_request.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/remote_allocators.hpp" +#include "intel_gpu/plugin/remote_tensor.hpp" +#include "intel_gpu/plugin/compiled_model.hpp" +#include "intel_gpu/plugin/variable_state.hpp" +#include "intel_gpu/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/itt.hpp" +#include "intel_gpu/runtime/debug_configuration.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace { + +inline std::string get_port_name(const ov::Output& port, const bool is_legacy_api) { + std::string name; + // TODO: Should use tensor name as the port name, but many legacy tests still use legacy name + // plus sometimes it will get empty tensor name. + if (!is_legacy_api) { + name = {}; + } + if (name.empty()) { + bool is_input = ov::op::util::is_parameter(port.get_node()); + if (is_input) { + name = ov::op::util::get_ie_output_name(port); + } else { + const auto node = port.get_node_shared_ptr(); + name = ov::op::util::get_ie_output_name(node->input_value(0)); + } + } + return name; +} + +template +void convert_any_copy(const src_t* src, dst_t* dst, size_t size) { + OPENVINO_ASSERT(src && dst, "[GPU] Src or Dst ptr is null"); + for (size_t i = 0; i < size; i++) + dst[i] = static_cast(src[i]); +} + +void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_ptr, ov::element::Type dst_et, size_t size) { + if (size == 0) + return; + + if (src_et == dst_et) { + std::memcpy(dst_ptr, src_ptr, size); + return; + } + + #define CASE(s_et, d_et, s_type, d_type) \ + if (src_et == s_et && dst_et == d_et) return convert_any_copy(static_cast(src_ptr), static_cast(dst_ptr), size) + + // For unsupported inputs + CASE(ov::element::f64, ov::element::f32, double, float); + CASE(ov::element::i16, ov::element::f32, int16_t, float); + CASE(ov::element::u16, ov::element::f32, uint16_t, float); + CASE(ov::element::u64, ov::element::i32, uint64_t, int32_t); + CASE(ov::element::i64, ov::element::i32, int64_t, int32_t); + CASE(ov::element::u32, ov::element::i32, uint32_t, int32_t); + + // For unsupported outputs + CASE(ov::element::f32, ov::element::f64, float, double); + CASE(ov::element::i32, ov::element::i64, int32_t, int64_t); + CASE(ov::element::i32, ov::element::u64, int32_t, uint64_t); + CASE(ov::element::i32, ov::element::u32, int32_t, uint32_t); + CASE(ov::element::f32, ov::element::i16, float, int16_t); + CASE(ov::element::f32, ov::element::u16, float, uint16_t); + + // TODO: Need instances below? + CASE(ov::element::u32, ov::element::i64, uint32_t, int64_t); + CASE(ov::element::u32, ov::element::u64, uint32_t, uint64_t); + + OPENVINO_THROW("[GPU] Unsupported element types combination for copy: ", src_et, " -> ", dst_et); +} + +bool is_convert_required(ov::element::Type src_et, ov::element::Type dst_et) { + return src_et != dst_et && !(dst_et == ov::element::boolean && src_et == ov::element::u8); +} + +void convert_and_copy(const cldnn::memory::ptr src, ov::ITensor const* dst, const cldnn::stream& stream) { + auto src_et = cldnn::data_type_to_element_type(src->get_layout().data_type); + auto dst_et = dst->get_element_type(); + + size_t size = ov::shape_size(dst->get_shape()); + + cldnn::mem_lock src_lock(src, stream); + std::unique_ptr> dst_lock = nullptr; + + const void* src_ptr = src_lock.data(); + void* dst_ptr = nullptr; + + if (auto remote = dynamic_cast(dst)) { + auto mem = remote->get_original_memory(); + dst_lock.reset(new cldnn::mem_lock(mem, stream)); + dst_ptr = dst_lock->data(); + } else { + dst_ptr = dst->data(); + } + + return convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size); +} + +void convert_and_copy(const ov::ITensor* src, ov::ITensor const* dst, const cldnn::stream& stream) { + auto src_et = src->get_element_type(); + auto dst_et = dst->get_element_type(); + + size_t size = ov::shape_size(dst->get_shape()); + + const void* src_ptr = nullptr; + void* dst_ptr = nullptr; + + std::unique_ptr> src_lock = nullptr; + std::unique_ptr> dst_lock = nullptr; + + if (auto remote = dynamic_cast(src)) { + auto mem = remote->get_original_memory(); + src_lock.reset(new cldnn::mem_lock(mem, stream)); + src_ptr = src_lock->data(); + } else { + src_ptr = src->data(); + } + + if (auto remote = dynamic_cast(dst)) { + auto mem = remote->get_original_memory(); + dst_lock.reset(new cldnn::mem_lock(mem, stream)); + dst_ptr = dst_lock->data(); + } else { + dst_ptr = dst->data(); + } + + return convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size); +} + +bool same_host_mem(cldnn::memory::cptr memory, const uint8_t* host_ptr) { + const uint8_t* device_ptr = memory->get_allocation_type() == cldnn::allocation_type::usm_host ? + static_cast(memory->get_internal_params().mem) : nullptr; + return device_ptr == host_ptr; +} + +ov::Shape predict_shape(const std::string& name, const ov::Shape current_shape, ov::element::Type element_type, cldnn::ShapePredictor& shape_predictor) { + auto et_size = cldnn::ceil_div(element_type.bitwidth(), 8); + auto prealloc_info = shape_predictor.predict_preallocation_shape(name, current_shape, et_size, false); + const auto& preallocation_shape = prealloc_info.second; + auto can_preallocate_buffer = prealloc_info.first && + shape_predictor.can_preallocate(ov::shape_size(preallocation_shape) * et_size); + if (can_preallocate_buffer) { + return preallocation_shape; + } + + return current_shape; +} + +inline bool all_remote_buffers(const std::vector>& tensors) { + return std::all_of(tensors.begin(), tensors.end(), [](const ov::SoPtr& tensor) { + if (auto remote_ptr = std::dynamic_pointer_cast(tensor._ptr)) { + return !remote_ptr->is_surface(); + } + return false; + }); +} + +inline bool all_remote_surfaces(const std::vector>& tensors) { + return std::all_of(tensors.begin(), tensors.end(), [](const ov::SoPtr& tensor) { + if (auto remote_ptr = std::dynamic_pointer_cast(tensor._ptr)) { + return remote_ptr->is_surface(); + } + return false; + }); +} + +inline bool all_host_tensors(const std::vector>& tensors) { + return std::all_of(tensors.begin(), tensors.end(), [](const ov::SoPtr& tensor) { + return std::dynamic_pointer_cast(tensor._ptr) == nullptr; + }); +} + +} // namespace + +namespace ov { +namespace intel_gpu { + +// ----------------------------------------------------------------------------------------------- // +// ---------------------------- OpenVINO API impl ------------------------------------------------ // +// ----------------------------------------------------------------------------------------------- // + +SyncInferRequest::SyncInferRequest(const std::shared_ptr& compiled_model) + : ov::ISyncInferRequest(compiled_model) + , m_graph(compiled_model->get_graph(0)) + , m_context(std::static_pointer_cast(compiled_model->get_context_impl())) + , m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling)) + , m_use_external_queue(m_graph->use_external_queue()) { + bool is_legacy_api = !compiled_model->is_new_api(); + init_mappings(is_legacy_api); + allocate_inputs(); + allocate_outputs(); + allocate_states(); +} + +void SyncInferRequest::infer() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::infer"); + setup_stream_graph(); + std::lock_guard lk(m_graph->get_mutex()); + enqueue(); + wait(); +} + +std::vector SyncInferRequest::get_profiling_info() const { + OPENVINO_ASSERT(m_enable_profiling, "[GPU] Profiling data was not collected: please check that ov::enable_profiling property was set to true"); + return m_graph->get_profiling_info(); +} + +std::vector> SyncInferRequest::query_state() const { + std::vector> ret{}; + const auto& variable_states = m_graph->get_network()->get_variable_memories(); + for (const auto& pair : variable_states) { + ret.emplace_back(std::make_shared(pair.first, pair.second, m_graph->get_engine())); + } + auto expected_states_count = m_graph->get_network()->get_variables_state_info().size(); + OPENVINO_ASSERT(expected_states_count == ret.size(), "[GPU] Mismatch of expected states count (", + expected_states_count, ") and actual size (", ret.size(), ")"); + return ret; +} + +void SyncInferRequest::set_tensor(const ov::Output& port, const ov::SoPtr& tensor) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::set_tensor"); + const auto& compiled_model = std::static_pointer_cast(get_compiled_model()); + const auto name = get_port_name(port, !compiled_model->is_new_api()); + const auto& shape = port.get_partial_shape(); + + OPENVINO_ASSERT(tensor != nullptr, "[GPU] Failed to set empty tensor to port: \'", name, "\'"); + OPENVINO_ASSERT(port.get_element_type() == tensor->get_element_type(), + "[GPU] Mismtach tensor and port type: ", port.get_element_type(), " vs ", tensor->get_element_type()); + OPENVINO_ASSERT(shape.compatible(ov::PartialShape(tensor->get_shape())) || tensor->get_shape() == ov::Shape{0}, + "[GPU] The tensor size is not equal to model, can't set input tensor with name: ", + name, + ", because model input (shape=", + shape, + ") and tensor (shape=", + tensor->get_shape(), + ") are incompatible"); + + bool is_input = ov::op::util::is_parameter(port.get_node()); + + if (is_input) { + m_user_inputs[name] = { tensor._ptr, TensorOwner::USER }; + } else { + m_user_outputs[name] = { tensor._ptr, TensorOwner::USER }; + } + + ov::ISyncInferRequest::set_tensor(port, tensor); +} + +void SyncInferRequest::set_tensors_impl(const ov::Output port, const std::vector>& tensors) { + if (tensors.size() == 1) { + return set_tensor(port, tensors[0]); + } + bool is_input = ov::op::util::is_parameter(port.get_node()); + OPENVINO_ASSERT(is_input, "[GPU] set_tensors_impl is not supported for output port"); + + bool is_remote = all_remote_buffers(tensors) || all_remote_surfaces(tensors); + bool is_host = all_host_tensors(tensors); + + OPENVINO_ASSERT(is_host || is_remote, "[GPU] Incorrect input blobs. All blobs must be of the same type"); + + for (const auto& input : get_inputs()) { + if (input == port) { + m_batched_tensors[input.get_tensor_ptr()] = tensors; + return; + } + } + OPENVINO_THROW("[GPU] Cannot find input tensors for port ", port); +} + +ov::SoPtr SyncInferRequest::get_tensor(const ov::Output& port) const { + bool is_input = ov::op::util::is_parameter(port.get_node()); + const auto& compiled_model = std::static_pointer_cast(get_compiled_model()); + const auto name = get_port_name(port, !compiled_model->is_new_api()); + if (is_input) { + OPENVINO_ASSERT(m_user_inputs.count(name) == 1, "[GPU] Input tensor with name ", name, " is not found"); + return { m_user_inputs.at(name).ptr, nullptr }; + } else { + OPENVINO_ASSERT(m_user_outputs.count(name) == 1, "[GPU] Output tensor with name ", name, " is not found"); + return { m_user_outputs.at(name).ptr, nullptr }; + } +} + +void SyncInferRequest::check_tensors() const { + const auto& inputs = get_compiled_model()->inputs(); + for (size_t i = 0; i < inputs.size(); i++) { + if (!is_batched_input(inputs[i])) + check_tensor(inputs[i], get_tensor_ptr(inputs[i])); + } + const auto& outputs = get_compiled_model()->outputs(); + for (size_t i = 0; i < outputs.size(); i++) { + check_tensor(outputs[i], get_tensor_ptr(outputs[i])); + } +} + +// ----------------------------------------------------------------------------------------- // +// ---------------------------- internal pipeline stages ----------------------------------- // +// ----------------------------------------------------------------------------------------- // +void SyncInferRequest::set_task_executor(const std::shared_ptr& task_executor) { + m_stream_executor = std::dynamic_pointer_cast(task_executor); +} + +void SyncInferRequest::enqueue_notify() { + m_graph->wait(Graph::Stage::EXECUTE); + enqueue(); +} + +void SyncInferRequest::wait_notify() { + wait(); + m_graph->notify(Graph::Stage::EXECUTE); +} + +void SyncInferRequest::enqueue() { + // set input and output memory from request blob maps + // into the network object primitives + std::vector dependencies; + + for (const auto& it : m_input_ports_map) { + const auto& name = it.first; + const auto& port = it.second; + + if (m_batched_tensors.count(port.get_tensor_ptr()) > 0) { + auto events = prepare_batched_input(name, port, m_batched_tensors.at(port.get_tensor_ptr())); + std::move(events.begin(), events.end(), std::back_inserter(dependencies)); + } else { + auto events = prepare_input(name, port, m_user_inputs.at(name)); + std::move(events.begin(), events.end(), std::back_inserter(dependencies)); + } + } + + for (const auto& it : m_output_ports_map) { + const auto& name = it.first; + const auto& port = it.second; + + auto events = prepare_output(name, port, m_user_outputs.at(name)); + std::move(events.begin(), events.end(), std::back_inserter(dependencies)); + } + + auto network = m_graph->get_network(); + network->assign_variables_memories(); + + m_internal_outputs.clear(); + m_internal_outputs = network->execute(dependencies); + + // If dump layers path is set, only runs first inference. + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->dump_layers_path.length() > 0 && debug_config->dump_iteration.empty()) { + GPU_DEBUG_INFO << "Only run first inference to dump layers." << std::endl; + exit(0); + } +} + +void SyncInferRequest::wait() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait"); + OPENVINO_ASSERT(!m_internal_outputs.empty(), "[GPU] Inference was not started!\n"); + + // wait for completion & collect outputs as requested by the model + // for in_order_queue, it is enough to call finish only once + bool do_sync_per_output = (m_graph->get_network()->get_stream().get_queue_type() == QueueTypes::in_order) ? false : true; + if (!do_sync_per_output) + m_graph->get_network()->get_stream().finish(); + + std::vector copy_events; + + for (const auto& it : m_output_ports_map) { + const auto& name = it.first; + const auto& port = it.second; + cldnn::primitive_id internal_name = m_output_names_map.at(name); + auto output_memory = m_internal_outputs.at(internal_name).get_memory(do_sync_per_output); + auto output_layout = m_internal_outputs.at(internal_name).get_layout(); + + if (output_memory) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait::reinterpret_memory"); + OPENVINO_ASSERT(!output_memory->get_layout().data_padding, "[GPU] Unexpected padding in output buffer"); + output_memory = m_graph->get_engine().reinterpret_buffer(*output_memory, output_layout); + } + + OPENVINO_ASSERT(m_user_outputs.count(name) > 0, "[GPU] Output ", name, " is not found in output tensors map"); + auto output_tensor_wrapper = m_user_outputs.at(name); + auto output_tensor = output_tensor_wrapper.ptr; + auto remote_ptr = std::dynamic_pointer_cast(output_tensor); + bool is_remote = remote_ptr != nullptr; + + bool need_output_update = output_layout.bytes_count() == 0 || (output_memory && output_tensor->get_byte_size() != output_memory->size()); + if (need_output_update) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait::update_output"); + auto mem_shape = output_layout.get_shape(); + // In case of old shape infer we need to shrink out tensor shape to avoid redudnant dimensions that occur due to rank extension + // For new shape infer this shouldn't happen, thus remove that WA once we migrate to ngraph-based shape infer for all cases + if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + OPENVINO_ASSERT(port.get_partial_shape().is_static(), "[GPU] Unexpected dynamic shape for legacy shape inference"); + OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); + mem_shape = port.get_shape(); + } + output_tensor->set_shape(mem_shape); + } + + // mapping remote blobs not needed - + // let the user take care of them explicitly + if (!is_remote && output_memory) { + auto dst_ptr = static_cast(output_tensor->data()); + bool same_mem = same_host_mem(output_memory, dst_ptr); + if (!same_mem && output_memory->size()) { + if (auto ev = copy_output_data(output_memory, *output_tensor)) { + copy_events.push_back(ev); + } + } + } + } + + if (!copy_events.empty()) { + auto& stream = m_graph->get_network()->get_stream(); + if (stream.get_queue_type() == QueueTypes::in_order) { + // wait only the last one + stream.wait_for_events({copy_events.back()}); + } else { + stream.wait_for_events(copy_events); + } + } + + // finally collect profiling info + if (m_enable_profiling) { + m_graph->update_profiling_info(); + } +} + +// ----------------------------------------------------------------------------------------- // +// ---------------------------- internal utils --------- ----------------------------------- // +// ----------------------------------------------------------------------------------------- // +void SyncInferRequest::setup_stream_graph() { + int stream_id = 0; + auto& stream_graphs = std::static_pointer_cast(get_compiled_model())->get_graphs(); + if (nullptr != m_stream_executor) { + stream_id = m_stream_executor->get_stream_id(); + auto num_graphs = stream_graphs.size(); + stream_id = stream_id % num_graphs; + } + m_graph = stream_graphs[stream_id]; +} + +std::shared_ptr SyncInferRequest::create_host_tensor(const ov::PartialShape& port_shape, const ov::element::Type& port_element_type) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::create_host_tensor"); + // Disable USM usage as USMHostAllocator may fail for attempt to allocate 0 bytes + // If we add WA for such case to avoid driver call, then deallocate method will return false and Blob::setShape call will throw an exception + bool use_usm = m_graph->get_engine().use_unified_shared_memory() && !port_shape.is_dynamic(); + + auto shape = port_shape.is_static() ? port_shape.to_shape() : ov::Shape(port_shape.size(), 0); + auto usm_allocator = USMHostAllocator(m_context); + return use_usm ? ov::make_tensor(port_element_type, shape, usm_allocator) + : ov::make_tensor(port_element_type, shape); +} + +std::shared_ptr SyncInferRequest::create_device_tensor(const ov::Shape& shape, ov::element::Type element_type, + bool need_lockable_memory, void* mem_ptr) const { + TensorType tensor_type = TensorType::BT_EMPTY; + if (mem_ptr) { + tensor_type = TensorType::BT_USM_SHARED; + } else if (m_graph->get_engine().use_unified_shared_memory()) { + tensor_type = need_lockable_memory ? TensorType::BT_USM_HOST_INTERNAL : TensorType::BT_USM_DEVICE_INTERNAL; + } else { + tensor_type = TensorType::BT_BUF_INTERNAL; + } + + // Currently, clDeviceMemAllocINTEL returns memory address allocated to other input blob if the current blob is empty + // W/A for this issue: + // Allocate with non-empty shape and then reinterprete with original shape + auto shape_copy = shape; + for (auto &i : shape_copy) { + if (i == 0) + i = 1; + } + + return std::make_shared(m_context, + shape_copy, + element_type, + tensor_type, + mem_ptr); +} + +std::shared_ptr SyncInferRequest::create_shared_device_tensor(const ov::Shape& shape, ov::element::Type element_type, void* usm_host_mem) const { + return create_device_tensor(shape, element_type, false, usm_host_mem); +} + +TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrapper& user_tensor_wrapper, + const std::string& name, + const ov::PartialShape& port_pshape, + ov::element::Type element_type, + bool need_lockable_mem) const { + auto user_tensor = user_tensor_wrapper.ptr; + auto tensor_shape = user_tensor->get_shape(); + bool is_dynamic = port_pshape.is_dynamic(); + OPENVINO_ASSERT(std::dynamic_pointer_cast(user_tensor) == nullptr, "[GPU] Unexpected remote tensor"); + auto input_ptr = user_tensor->data(); + const auto alloc_type = m_graph->get_engine().detect_usm_allocation_type(input_ptr); + const auto is_usm_host = alloc_type == cldnn::allocation_type::usm_host; + bool can_share = is_usm_host && !is_convert_required(user_tensor->get_element_type(), element_type); + + if (can_share) { + // For USM case we create host blob using custom USM host allocator + // and then create shared device blob on top of this buffer + return { create_shared_device_tensor(tensor_shape, element_type, input_ptr), user_tensor_wrapper.owner }; + } + + auto actual_memory_shape = tensor_shape; + if (is_dynamic) { + auto& shape_predictor = m_graph->get_network()->get_shape_predictor(); + actual_memory_shape = predict_shape(name, tensor_shape, element_type, shape_predictor); + } + + return { create_device_tensor(actual_memory_shape, element_type, need_lockable_mem), TensorOwner::PLUGIN }; +} + +cldnn::event::ptr SyncInferRequest::copy_output_data(cldnn::memory::ptr src, const ov::ITensor& dst) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::copy_output_data"); + OPENVINO_ASSERT(src->count() <= dst.get_size(), + "[GPU] Unexpected elements count of dst tensor: ", + "expected at least ", src->count(), ", but ", + "only ", dst.get_size(), " got"); + + const auto& layout = src->get_layout(); + auto& stream = m_graph->get_network()->get_stream(); + + if (is_convert_required(cldnn::data_type_to_element_type(layout.data_type), dst.get_element_type())) { + convert_and_copy(src, &dst, stream); + return nullptr; + } else { + return src->copy_to(stream, dst.data(), false); + } +} + +void SyncInferRequest::allocate_input(const ov::Output& port, const std::string& name) { + const auto& shape = port.get_partial_shape(); + auto element_type = port.get_element_type(); + + m_user_inputs[name] = { create_host_tensor(shape, element_type), TensorOwner::PLUGIN }; + ov::ISyncInferRequest::set_tensor(port, m_user_inputs.at(name).ptr); +} + +void SyncInferRequest::allocate_output(const ov::Output& port, const std::string& name) { + const auto& shape = port.get_partial_shape(); + auto element_type = port.get_element_type(); + + m_user_outputs[name] = { create_host_tensor(shape, element_type), TensorOwner::PLUGIN }; + ov::ISyncInferRequest::set_tensor(port, m_user_outputs.at(name).ptr); +} + +void SyncInferRequest::allocate_inputs() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::allocate_inputs"); + + for (const auto& it : m_input_ports_map) { + const auto& name = it.first; + const auto& port = it.second; + GPU_DEBUG_LOG << "[init " << name << " input blob]" << std::endl; + + bool is_nv12_input = false; + if (port.get_rt_info().count(ov::preprocess::TensorInfoMemoryType::get_type_info_static())) { + std::string mem_type = port.get_rt_info().at(ov::preprocess::TensorInfoMemoryType::get_type_info_static()) + .as().value; + if (mem_type.find(ov::intel_gpu::memory_type::surface) != std::string::npos) { + is_nv12_input = true; + } + } + + if (!is_nv12_input) { + allocate_input(port, name); + } + } +} + +void SyncInferRequest::allocate_outputs() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::allocate_outputs"); + + // allocate outputs + for (const auto& it : m_output_ports_map) { + const auto& name = it.first; + const auto& port = it.second; + GPU_DEBUG_LOG << "[init " << name << " output blob]" << std::endl; + + allocate_output(port, name); + } +} + +void SyncInferRequest::allocate_states() { + m_graph->get_network()->allocate_variables_memories(); +} + +std::vector SyncInferRequest::prepare_batched_input(const std::string& name, + const ov::Output& port, + const std::vector>& user_tensors) { + std::vector ret_events; + bool is_host = all_host_tensors(user_tensors); + bool is_remote_buffer = all_remote_buffers(user_tensors); + // Host buffers are merged to single tensor + if (is_host || is_remote_buffer) { + auto tmp_shape = user_tensors.at(0)->get_shape(); + auto tmp_et = user_tensors.at(0)->get_element_type(); + tmp_shape[0] = user_tensors.size(); + std::shared_ptr merged_tensor = nullptr; + if (is_host) { + merged_tensor = m_context->create_host_tensor(tmp_et, tmp_shape)._ptr; + auto ptr = static_cast(merged_tensor->data()); + ov::parallel_for(user_tensors.size(), [&](size_t i) { + const auto& tensor = user_tensors.at(i); + std::memcpy(ptr + i * tensor->get_byte_size(), static_cast(tensor->data()), tensor->get_byte_size()); + }); + } else { + const auto& stream = m_graph->get_network()->get_stream(); + merged_tensor = m_context->create_tensor(tmp_et, tmp_shape, {})._ptr; + auto merged_memory = std::dynamic_pointer_cast(merged_tensor)->get_memory(); + cldnn::mem_lock dst_lock(merged_memory, stream); + for (size_t i = 0; i < user_tensors.size(); i++) { + auto input_tensor = std::dynamic_pointer_cast(user_tensors[i]._ptr); + cldnn::mem_lock src_lock(input_tensor->get_memory(), stream); + std::memcpy(dst_lock.data() + i * input_tensor->get_byte_size(), src_lock.data(), input_tensor->get_byte_size()); + } + } + + auto events = prepare_input(name, port, {merged_tensor, TensorOwner::PLUGIN}); + std::move(events.begin(), events.end(), std::back_inserter(ret_events)); + } else { + for (size_t i = 0; i < user_tensors.size(); i++) { + auto new_name = name + "_" + std::to_string(i); + auto events = prepare_input(new_name, port, {user_tensors[i]._ptr, TensorOwner::USER}); + std::move(events.begin(), events.end(), std::back_inserter(ret_events)); + } + } + + return ret_events; +} + +std::vector SyncInferRequest::prepare_input(const std::string& name, + const ov::Output& port, + const TensorWrapper& user_tensor_wrapper) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::prepare_input"); + auto pshape = port.get_partial_shape(); + auto is_dynamic = pshape.is_dynamic(); + auto user_tensor = user_tensor_wrapper.ptr; + auto element_type = user_tensor->get_element_type(); + auto remote_ptr = std::dynamic_pointer_cast(user_tensor); + bool is_remote = remote_ptr != nullptr; + + auto network = m_graph->get_network(); + auto& engine = m_graph->get_engine(); + auto& stream = network->get_stream(); + + OPENVINO_ASSERT(pshape.compatible(ov::PartialShape(user_tensor->get_shape())) || is_batched_input(port), + "[GPU] The input tensor size is not equal to model port shape, can't handle input tensor with name: ", + name, + ", because model input (shape=", + pshape, + ") and tensor (shape=", + user_tensor->get_shape(), + ") are incompatible"); + + if (is_remote) { + m_plugin_inputs[name] = user_tensor_wrapper; + } + + auto device_tensor_et = convert_to_supported_device_type(element_type); + bool convert_needed = is_convert_required(element_type, device_tensor_et); + bool update_device_tensor = m_plugin_inputs.count(name) == 0 || (m_plugin_inputs[name].owner == TensorOwner::USER && !is_remote); + + if (update_device_tensor) { + // If device input hasn't been created, then try to use user memory if it's usm_host, or allocate new device buffer + m_plugin_inputs[name] = create_or_share_device_tensor(user_tensor_wrapper, name, pshape, device_tensor_et, convert_needed); + } else if (!is_remote) { + // Device memory has been created on previous iterations. Try to reuse whenever it's possible + auto device_tensor_wrapper = m_plugin_inputs.at(name); + auto device_tensor = std::dynamic_pointer_cast(device_tensor_wrapper.ptr); + if (is_dynamic) { + if (device_tensor->get_original_memory()->size() < user_tensor->get_byte_size()) { + auto& shape_predictor = network->get_shape_predictor(); + auto actual_shape = predict_shape(name, user_tensor->get_shape(), device_tensor_et, shape_predictor); + auto new_tensor = create_device_tensor(actual_shape, device_tensor_et, false); + new_tensor->set_shape(user_tensor->get_shape()); + m_plugin_inputs[name] = { new_tensor, TensorOwner::PLUGIN }; + } + } + } + + auto device_tensor = std::dynamic_pointer_cast(m_plugin_inputs.at(name).ptr); + if (is_dynamic) { + OPENVINO_ASSERT(device_tensor->get_original_memory()->size() >= user_tensor->get_size(), + "[GPU] Size of input device tensor (=", + device_tensor->get_original_memory()->size(), + ") is expected to be greater or equal to user tensor (=", + user_tensor->get_size(), + ") in dynamic case for ", name); + // tensor reshape below is expected to work w/o reallocation + device_tensor->set_shape(user_tensor->get_shape()); + } else { + OPENVINO_ASSERT(device_tensor->get_size() == user_tensor->get_size(), + "[GPU] Size of user tensor (=", + user_tensor->get_size(), + ") and device tensor (=", + device_tensor->get_size(), + ") don't match for ", name, + ". Those are expected to be equal in case of static shape of the port"); + } + + auto memory = device_tensor->get_memory(); + // WA to extend shape to ranks expected by legacy shape infer. Remove after full migration to new shape infer + if (!m_graph->get_config().get_property(ov::intel_gpu::allow_new_shape_infer)) { + auto new_layout = memory->get_layout(); + new_layout.set_partial_shape(m_graph->get_input_layouts().at(name).get_shape()); + memory = engine.reinterpret_buffer(*memory, new_layout); + } + + cldnn::event::ptr ret_event = nullptr; + if (!is_remote) { + if (device_tensor->get_element_type() != user_tensor->get_element_type()) { + convert_and_copy(user_tensor.get(), device_tensor.get(), stream); + } else { + auto src_ptr = static_cast(user_tensor->data()); + if (!same_host_mem(memory, src_ptr)) { + ret_event = memory->copy_from(stream, src_ptr, false); + } + } + } + + const cldnn::primitive_id internal_name = "parameter:" + name; + network->set_input_data(internal_name, memory); + + if (ret_event) + return { ret_event }; + else + return {}; +} + +std::vector SyncInferRequest::prepare_output(const std::string& name, + const ov::Output& port, + const TensorWrapper& user_tensor_wrapper) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::prepare_output"); + auto pshape = port.get_partial_shape(); + auto is_dynamic = pshape.is_dynamic(); + auto element_type = port.get_element_type(); + auto user_tensor = user_tensor_wrapper.ptr; + auto remote_ptr = std::dynamic_pointer_cast(user_tensor); + bool is_remote = remote_ptr != nullptr; + + if (user_tensor->get_size() > 0) { + OPENVINO_ASSERT(pshape.compatible(ov::PartialShape(user_tensor->get_shape())), + "[GPU] The output tensor size is not equal to model port shape, can't handle output tensor with name: ", + name, + ", because model output (shape=", + pshape, + ") and tensor (shape=", + user_tensor->get_shape(), + ") are incompatible"); + } + + auto network = m_graph->get_network(); + auto device_tensor_et = convert_to_supported_device_type(element_type); + bool convert_needed = is_convert_required(device_tensor_et, element_type); + cldnn::primitive_id internal_name = m_output_names_map.at(name); + if (is_remote && !convert_needed) { + m_plugin_outputs[name] = user_tensor_wrapper; + } + + if (!is_dynamic) { + auto is_cpu_impl = network->is_cpu_impl(internal_name); + bool has_device_buffer = m_plugin_outputs.count(name) > 0; + bool update_device_tensor = !has_device_buffer || + (m_plugin_outputs[name].owner == TensorOwner::USER && !is_remote); + if (update_device_tensor) { + m_plugin_outputs[name] = create_or_share_device_tensor(user_tensor_wrapper, name, pshape, device_tensor_et, is_cpu_impl || convert_needed); + } + } + + // Missing output in _plugin_outputs means that the network is dynamic and outputs couldn't be pre-allocated + if (m_plugin_outputs.find(name) == m_plugin_outputs.end()) + return {}; + + auto output_tensor = std::dynamic_pointer_cast(m_plugin_outputs.at(name).ptr); + auto output_memory = output_tensor->get_memory(); + return network->set_output_memory(internal_name, output_memory); +} + +void SyncInferRequest::init_mappings(bool is_legacy_api) { + for (const auto& in : get_inputs()) { + auto port_name = get_port_name(in, is_legacy_api); + m_input_ports_map[port_name] = in; + } + for (const auto& out : get_outputs()) { + auto port_name = get_port_name(out, is_legacy_api); + m_output_ports_map[port_name] = out; + m_output_names_map[port_name] = m_graph->out_name_to_internal(port_name); + } +} + +bool SyncInferRequest::is_batched_input(const ov::Output& port) const { + return m_batched_tensors.count(port.get_tensor_ptr()) > 0; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/einsum_decomposition.cpp b/src/plugins/intel_gpu/src/plugin/transformations/einsum_decomposition.cpp index ae71a9af2fbb98..6f18b227e6699a 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/einsum_decomposition.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/einsum_decomposition.cpp @@ -36,7 +36,7 @@ std::vector> compute_einsum_path(std::shared_ptr> einsum_path; const size_t num_inputs = einsum_node->get_input_size(); - NGRAPH_CHECK(num_inputs > 0); + OPENVINO_ASSERT(num_inputs > 0); for (size_t input_ind = num_inputs - 1; input_ind > 0; --input_ind) { einsum_path.push_back(std::make_pair(0, input_ind)); } @@ -141,9 +141,9 @@ std::string generate_grouping_subscript(const std::string& input_subscript, cons /// void update_operands(ov::OutputVector& input_nodes, std::vector& input_subscripts, size_t input_ind1, size_t input_ind2, const ov::Output& new_node, const std::string& new_subscript) { - NGRAPH_CHECK(input_ind1 < input_ind2); - NGRAPH_CHECK(input_ind2 < input_nodes.size()); - NGRAPH_CHECK(input_ind2 < input_subscripts.size()); + OPENVINO_ASSERT(input_ind1 < input_ind2); + OPENVINO_ASSERT(input_ind2 < input_nodes.size()); + OPENVINO_ASSERT(input_ind2 < input_subscripts.size()); input_nodes.erase(input_nodes.begin() + input_ind2); input_nodes.erase(input_nodes.begin() + input_ind1); input_nodes.push_back(new_node); @@ -163,7 +163,7 @@ void update_operands(ov::OutputVector& input_nodes, std::vector& in /// \return sub-shape /// ov::Shape compute_sub_shape(const ov::Shape& input_shape, size_t begin, size_t end, bool is_product = false) { - NGRAPH_CHECK(end <= input_shape.size()); + OPENVINO_ASSERT(end <= input_shape.size()); if (end <= begin) { return ov::Shape(); } @@ -260,12 +260,12 @@ LabelDimMap compute_label_dim_map(const ov::Rank& input_rank, static const std::string ellipsis = "..."; const auto labels = ov::op::v7::Einsum::extract_labels(input_subscript); const auto static_input_rank = input_rank.is_static(); - NGRAPH_CHECK(static_input_rank || (std::find(labels.begin(), labels.end(), ellipsis) == labels.end()), + OPENVINO_ASSERT(static_input_rank || (std::find(labels.begin(), labels.end(), ellipsis) == labels.end()), "Input rank cannot be dynamic in case of ellipsis in input subscript"); const size_t input_rank_length = static_input_rank ? input_rank.get_length() : labels.size(); - NGRAPH_CHECK(input_rank_length >= labels.size()); + OPENVINO_ASSERT(input_rank_length >= labels.size()); const size_t num_broadcasted_dims = input_rank_length - labels.size() + 1; - NGRAPH_CHECK(num_broadcasted_dims > 0); + OPENVINO_ASSERT(num_broadcasted_dims > 0); LabelDimMap resulted_map; size_t current_dim = 0; @@ -304,8 +304,8 @@ void transpose_input(ov::OutputVector& input_nodes, std::vector& in ov::NodeVector& subgraph_nodes) { // perform sanity check for arguments const auto num_inputs = input_nodes.size(); - NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript."); - NGRAPH_CHECK(input_ind < num_inputs, "Input index is out of range."); + OPENVINO_ASSERT(num_inputs == input_subscripts.size(), "Each input must have own subscript."); + OPENVINO_ASSERT(input_ind < num_inputs, "Input index is out of range."); // generate permutation vector by searching for bijection between input_subscripts // and required_subscript @@ -322,11 +322,11 @@ void transpose_input(ov::OutputVector& input_nodes, std::vector& in const auto& input_node = input_nodes[input_ind]; const auto labels = ov::op::v7::Einsum::extract_labels(input_subscript); const auto required_labels = ov::op::v7::Einsum::extract_labels(required_subscript); - NGRAPH_CHECK(labels.size() == required_labels.size()); + OPENVINO_ASSERT(labels.size() == required_labels.size()); const auto label_dim_map = compute_label_dim_map(input_node.get_partial_shape().rank(), input_subscript); for (const auto& required_label : required_labels) { const auto label_dims_it = label_dim_map.find(required_label); - NGRAPH_CHECK(label_dims_it != label_dim_map.end()); + OPENVINO_ASSERT(label_dims_it != label_dim_map.end()); const auto& label_dims = label_dims_it->second; permutation.insert(permutation.end(), label_dims.begin(), label_dims.end()); } @@ -360,8 +360,8 @@ void reduce_input(EinsumDecomposition *einsum_decompose_ptr, const std::string& output_subscript, size_t input_ind, ov::NodeVector& subgraph_nodes) { // perform sanity check for arguments const auto num_inputs = input_nodes.size(); - NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript."); - NGRAPH_CHECK(input_ind < num_inputs, "Input index is out of range."); + OPENVINO_ASSERT(num_inputs == input_subscripts.size(), "Each input must have own subscript."); + OPENVINO_ASSERT(input_ind < num_inputs, "Input index is out of range."); const auto& input_node = input_nodes[input_ind]; const auto& input_subscript = input_subscripts[input_ind]; @@ -377,7 +377,7 @@ void reduce_input(EinsumDecomposition *einsum_decompose_ptr, // or the output subscript const bool is_dim_reduced = is_dimension_reduced(input_subscripts, output_subscript, label, {input_ind}); - NGRAPH_CHECK(label_dim_map.find(label) != label_dim_map.end()); + OPENVINO_ASSERT(label_dim_map.find(label) != label_dim_map.end()); const auto& label_dims = label_dim_map[label]; // if label is not met, dimension corresponding to the label is to reduce @@ -416,7 +416,7 @@ void broadcast_input(ov::OutputVector& inputs, const ov::Shape& reduced_shape, bool is_separate_first, ov::NodeVector& subgraph_nodes) { - NGRAPH_CHECK(input_ind < inputs.size()); + OPENVINO_ASSERT(input_ind < inputs.size()); const auto& input = inputs[input_ind]; ov::Shape new_shape{new_common_shape.begin(), new_common_shape.end()}; @@ -434,7 +434,7 @@ void broadcast_input(ov::OutputVector& inputs, } const auto old_shape_size = old_shape.size(); const auto new_shape_size = new_shape.size(); - NGRAPH_CHECK(old_shape_size <= new_shape_size); + OPENVINO_ASSERT(old_shape_size <= new_shape_size); const auto new_shape_const = ov::op::v0::Constant::create(ov::element::Type_t::i64, ov::Shape {new_shape.size()}, new_shape); const auto broadcast = std::make_shared(input, new_shape_const, ov::op::BroadcastType::NUMPY); @@ -447,13 +447,13 @@ void broadcast_input(ov::OutputVector& inputs, ov::Output build_identity(const ov::Output& input_node, const std::vector& repeated_label_dims, ov::NodeVector& subgraph_nodes) { - NGRAPH_CHECK(repeated_label_dims.size() > 1); + OPENVINO_ASSERT(repeated_label_dims.size() > 1); const auto input_shape = input_node.get_shape(); ov::Shape identity_shape(input_shape.size(), 1); const size_t repeated_label_dim_size = input_shape[repeated_label_dims[0]]; for (const auto dim : repeated_label_dims) { - NGRAPH_CHECK(dim < input_shape.size()); - NGRAPH_CHECK(repeated_label_dim_size == input_shape[dim]); + OPENVINO_ASSERT(dim < input_shape.size()); + OPENVINO_ASSERT(repeated_label_dim_size == input_shape[dim]); identity_shape[dim] = repeated_label_dim_size; } @@ -481,11 +481,11 @@ ov::Output build_multi_identity(EinsumDecomposition* einsum_decompose_ const std::vector& repeated_labels, const LabelDimMap& label_dim_map, ov::NodeVector& subgraph_nodes) { - NGRAPH_CHECK(repeated_labels.size() > 0); + OPENVINO_ASSERT(repeated_labels.size() > 0); const auto get_identity = [&](size_t idx) { const auto repeated_label_dims = label_dim_map.find(repeated_labels[idx]); - NGRAPH_CHECK(repeated_label_dims != label_dim_map.end()); + OPENVINO_ASSERT(repeated_label_dims != label_dim_map.end()); return build_identity(input_node, repeated_label_dims->second, subgraph_nodes); }; @@ -522,11 +522,11 @@ void prepare_diagonal_extraction_data( } const auto dims_it = label_dim_map.find(label); - NGRAPH_CHECK(dims_it != label_dim_map.end()); + OPENVINO_ASSERT(dims_it != label_dim_map.end()); auto dims = dims_it->second; const auto dims_size = dims.size(); - NGRAPH_CHECK(dims_size > 0); + OPENVINO_ASSERT(dims_size > 0); if (label != ellipsis && dims_size > 1) { // repeated label is found @@ -539,7 +539,7 @@ void prepare_diagonal_extraction_data( } resultant_subscript += label; for (const auto dim : dims) { - NGRAPH_CHECK(dim < input_shape.size()); + OPENVINO_ASSERT(dim < input_shape.size()); result_shape.push_back(input_shape[dim]); } } @@ -552,8 +552,8 @@ void extract_diagonal(EinsumDecomposition* einsum_decompose_ptr, ov::NodeVector& subgraph_nodes) { // perform sanity check for arguments const auto num_inputs = inputs.size(); - NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript."); - NGRAPH_CHECK(input_ind < num_inputs, "Input index is out of range."); + OPENVINO_ASSERT(num_inputs == input_subscripts.size(), "Each input must have own subscript."); + OPENVINO_ASSERT(input_ind < num_inputs, "Input index is out of range."); const auto& input_node = inputs[input_ind]; const auto& input_subscript = input_subscripts[input_ind]; @@ -604,19 +604,19 @@ void compute_ranges(const ov::Rank& input_rank, size_t common_rank = common_labels.size(); if (std::find(common_labels.begin(), common_labels.end(), ellipsis) != common_labels.end()) { - NGRAPH_CHECK(label_to_dim_map.find(ellipsis) != label_to_dim_map.end()); + OPENVINO_ASSERT(label_to_dim_map.find(ellipsis) != label_to_dim_map.end()); common_rank += label_to_dim_map[ellipsis].size() - 1; } size_t sep_rank = sep_labels.size(); if (std::find(sep_labels.begin(), sep_labels.end(), ellipsis) != sep_labels.end()) { - NGRAPH_CHECK(label_to_dim_map.find(ellipsis) != label_to_dim_map.end()); + OPENVINO_ASSERT(label_to_dim_map.find(ellipsis) != label_to_dim_map.end()); sep_rank += label_to_dim_map[ellipsis].size() - 1; } size_t reduced_rank = reduced_labels.size(); if (std::find(reduced_labels.begin(), reduced_labels.end(), ellipsis) != reduced_labels.end()) { - NGRAPH_CHECK(label_to_dim_map.find(ellipsis) != label_to_dim_map.end()); + OPENVINO_ASSERT(label_to_dim_map.find(ellipsis) != label_to_dim_map.end()); reduced_rank += label_to_dim_map[ellipsis].size() - 1; } @@ -660,8 +660,8 @@ void contract_two_inputs(EinsumDecomposition* einsum_decompose_ptr, // perform sanity check for arguments auto num_inputs = input_nodes.size(); - NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript."); - NGRAPH_CHECK(input_ind2 < num_inputs && input_ind1 != input_ind2, "Incorrect input index is specified."); + OPENVINO_ASSERT(num_inputs == input_subscripts.size(), "Each input must have own subscript."); + OPENVINO_ASSERT(input_ind2 < num_inputs && input_ind1 != input_ind2, "Incorrect input index is specified."); const auto& input_node1 = input_nodes[input_ind1]; const auto& input_node2 = input_nodes[input_ind2]; @@ -746,7 +746,7 @@ void contract_two_inputs(EinsumDecomposition* einsum_decompose_ptr, std::vector unsqueeze_axis1; std::vector unsqueeze_axis2; for (const auto& sep_label2 : separate_labels2) { - NGRAPH_CHECK(label_to_dim_map2.find(sep_label2) != label_to_dim_map2.end()); + OPENVINO_ASSERT(label_to_dim_map2.find(sep_label2) != label_to_dim_map2.end()); const auto label_dims = label_to_dim_map2[sep_label2]; for (size_t dim_ind = 0; dim_ind < label_dims.size(); ++dim_ind) { unsqueeze_axis1.push_back(unsqueeze_dim + static_cast(dim_ind)); @@ -754,7 +754,7 @@ void contract_two_inputs(EinsumDecomposition* einsum_decompose_ptr, ++unsqueeze_dim; } for (const auto& sep_label1 : separate_labels1) { - NGRAPH_CHECK(label_to_dim_map1.find(sep_label1) != label_to_dim_map1.end()); + OPENVINO_ASSERT(label_to_dim_map1.find(sep_label1) != label_to_dim_map1.end()); const auto label_dims = label_to_dim_map1[sep_label1]; for (const auto label_dim : label_dims) { unsqueeze_axis2.push_back(label_dim); @@ -928,7 +928,7 @@ EinsumDecomposition::EinsumDecomposition() { extract_diagonal(this, input_nodes, input_subscripts, 0, subgraph_nodes); // reduce dimensions for the remained input node - NGRAPH_CHECK(input_nodes.size() == 1); + OPENVINO_ASSERT(input_nodes.size() == 1); reduce_input(this, input_nodes, input_subscripts, output_subscript, 0, subgraph_nodes); // transpose dimensions to layout required by the output subscript diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index a569404fb1cdee..e3bfbac3f3efbb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -229,9 +229,11 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->set_callback(is_matmul_output); const bool keep_precision_sensitive_in_fp32_1 = true; + const bool convert_input_output_precision = false; manager.register_pass(fp_convert_precision_map, empty_fuse_map, - keep_precision_sensitive_in_fp32_1); + keep_precision_sensitive_in_fp32_1, + convert_input_output_precision); manager.register_pass(); @@ -283,7 +285,11 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }; manager.register_pass(); - manager.register_pass(int_convert_precision_map); + const bool keep_precision_sensitive_in_fp32_2 = true; + manager.register_pass(int_convert_precision_map, + empty_fuse_map, + keep_precision_sensitive_in_fp32_2, + convert_input_output_precision); pass_config->disable(); diff --git a/src/plugins/intel_gpu/src/plugin/variable_state.cpp b/src/plugins/intel_gpu/src/plugin/variable_state.cpp index f2fb0249d573fc..316fb2253b987e 100644 --- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp +++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp @@ -1,69 +1,52 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include -#include + +#include "openvino/runtime/make_tensor.hpp" +#include "intel_gpu/plugin/remote_tensor.hpp" +#include "intel_gpu/plugin/variable_state.hpp" +#include "intel_gpu/runtime/layout.hpp" + +#include namespace ov { namespace intel_gpu { -VariableState::VariableState(const std::string &name, - const std::vector &states, - cldnn::engine& engine, int currentBatch) - : InferenceEngine::IVariableStateInternal {name} - , currentBatch_ {currentBatch} - , states_ {states} - , desc_ { - PrecisionFromDataType(states.front()->memory->get_layout().data_type), - AggregateShape(states.front()->memory->get_layout()), - InferenceEngine::Layout::ANY - } - , engine_(engine) { } - -void VariableState::Reset() { - IterateOverStates([](cldnn::network::VariableState &state) { - state.is_set = false; - }); +VariableState::VariableState(const std::string &name, cldnn::network::VariableState::Ptr state, cldnn::engine& engine) + : ov::IVariableState {name} + , m_variable_state(state) + , m_engine(engine) { + auto internal_memory = m_variable_state->memory; + auto internal_layout = internal_memory->get_layout(); + auto et = cldnn::data_type_to_element_type(internal_layout.data_type); + auto shape = internal_layout.get_shape(); + m_state = ov::make_tensor(et, shape); } -void VariableState::SetState(const InferenceEngine::Blob::Ptr &newState) { - auto lock = std::dynamic_pointer_cast(newState)->rmap(); - auto data = lock.as(); - IterateOverStates([&data, this](cldnn::network::VariableState &state) { - state.memory->copy_from(engine_.get_service_stream(), data); - data += state.memory->get_layout().bytes_count(); - state.is_set = true; - }); - engine_.get_service_stream().enqueue_barrier(); +void VariableState::reset() { + m_variable_state->is_set = false; } -InferenceEngine::Blob::CPtr VariableState::GetState() const { - auto blob = make_blob_with_precision(desc_, InferenceEngine::CreateDefaultAllocator()); - blob->allocate(); - auto blobLock = std::dynamic_pointer_cast(blob)->wmap(); - auto data = blobLock.as(); - IterateOverStates([&data, this](cldnn::network::VariableState &state) { - cldnn::mem_lock lock { state.memory, engine_.get_service_stream() }; - std::copy(lock.begin(), lock.end(), data); - data += state.memory->get_layout().bytes_count(); - }); - return blob; +void VariableState::set_state(const ov::SoPtr& state) { + const bool blocking = true; + auto remote_ptr = std::dynamic_pointer_cast(state._ptr); + if (remote_ptr != nullptr) { + auto user_memory = remote_ptr->get_memory(); + cldnn::mem_lock lock(user_memory, m_engine.get_service_stream()); + m_variable_state->memory->copy_from(m_engine.get_service_stream(), lock.data(), blocking); + } else { + auto data = state->data(); + m_variable_state->memory->copy_from(m_engine.get_service_stream(), data, blocking); + } + m_variable_state->is_set = true; } -InferenceEngine::SizeVector VariableState::AggregateShape(const cldnn::layout &layout) { - const auto& dims = layout.get_dims(); - InferenceEngine::SizeVector shape {dims.begin(), dims.end()}; - if (currentBatch_ != -1) - shape.front() = currentBatch_; - return shape; -} +const ov::SoPtr& VariableState::get_state() const { + auto internal_memory = m_variable_state->memory; + const bool blocking = true; + internal_memory->copy_to(m_engine.get_service_stream(), m_state->data(), blocking); -void VariableState::IterateOverStates(std::function f) const { - for (size_t i = 0; i < states_.size(); i++) { - auto batch = 1 << i; - if (batch & currentBatch_) - f(*states_[i]); - } + return m_state; } } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/file_util.cpp b/src/plugins/intel_gpu/src/runtime/file_util.cpp new file mode 100644 index 00000000000000..37f9fb1d6d2ae7 --- /dev/null +++ b/src/plugins/intel_gpu/src/runtime/file_util.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/file_util.hpp" +#include + +namespace ov { +namespace intel_gpu { + +void save_binary(const std::string &path, std::vector binary) { + try { + ov::util::save_binary(path, binary); + } catch (std::runtime_error&) {} +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp index c16fb7e3e0ccd2..148a5f1b7a8d2e 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp @@ -13,7 +13,7 @@ #include "intel_gpu/graph/serialization/string_serializer.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/itt.hpp" -#include "openvino/util/file_util.hpp" +#include "intel_gpu/runtime/file_util.hpp" #ifdef WIN32 #include @@ -272,7 +272,7 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program& // Bucket size can be changed in get_max_kernels_per_batch() method, but forcing it to 1 will lead to much longer // compile time. std::lock_guard lock(cacheAccessMutex); - ov::util::save_binary(cached_bin_name, getProgramBinaries(program)); + ov::intel_gpu::save_binary(cached_bin_name, getProgramBinaries(program)); } } else { cl::Program program(cl_build_engine.get_cl_context(), {cl_build_engine.get_cl_device()}, precompiled_kernels); diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index 1825f4b21081fd..1f6c430207bba4 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -32,7 +32,7 @@ cl::PFN_clCreateFromD3D11Buffer cl::BufferDX::pfn_clCreateFromD3D11Buffer = NULL #ifdef ENABLE_ONEDNN_FOR_GPU #include -#include "openvino/util/file_util.hpp" +#include "intel_gpu/runtime/file_util.hpp" #endif namespace cldnn { @@ -89,7 +89,7 @@ void ocl_engine::create_onednn_engine(const ExecutionConfig& config) { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get())); onednn_cache_blob = dnnl::ocl_interop::get_engine_cache_blob(*_onednn_engine); - ov::util::save_binary(path, onednn_cache_blob); + ov::intel_gpu::save_binary(path, onednn_cache_blob); } else { _onednn_engine = std::make_shared(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get(), onednn_cache_blob)); diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp index fcab83a4be4cbc..67d3e65dc08c5d 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp @@ -176,15 +176,19 @@ gpu_image2d::gpu_image2d(ocl_engine* engine, const layout& layout) type = CL_UNORM_INT8; break; case format::nv12: - _width = layout.spatial(1); - _height = layout.spatial(0); - if (layout.feature() == 2) { + { + // [NHWC] dimensions order + auto shape = layout.get_shape(); + _width = shape[2]; + _height = shape[1]; + if (shape[3] == 2) { order = CL_RG; - } else if (layout.feature() > 2) { + } else if (shape[3] > 2) { CLDNN_ERROR_MESSAGE("2D image allocation", "invalid number of channels in NV12 input image!"); } type = CL_UNORM_INT8; break; + } default: CLDNN_ERROR_MESSAGE("2D image allocation", "unsupported image type!"); } diff --git a/src/plugins/intel_gpu/tests/functional/behavior/memory_dyn_batch.cpp b/src/plugins/intel_gpu/tests/functional/behavior/memory_dyn_batch.cpp index 0eaa4a79240ac4..a91b114809a879 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/memory_dyn_batch.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/memory_dyn_batch.cpp @@ -8,6 +8,7 @@ #include "shared_test_classes/base/ov_subgraph.hpp" #include #include +#include "functional_test_utils/skip_tests_config.hpp" #include "functional_test_utils/ov_plugin_cache.hpp" #include "common_test_utils/common_utils.hpp" @@ -96,6 +97,7 @@ class MemoryDynamicBatch : public ::testing::Test, }; TEST_P(MemoryDynamicBatch, MultipleInferencesOnTheSameInferRequest) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() auto compiledModel = core_->compile_model(model_, ov::test::utils::DEVICE_GPU, { }); auto inferRequest = compiledModel.create_infer_request(); input_ = generateInput(inputShape_); @@ -112,6 +114,7 @@ TEST_P(MemoryDynamicBatch, MultipleInferencesOnTheSameInferRequest) { } TEST_P(MemoryDynamicBatch, ResetVariableState) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() auto compiledModel = core_->compile_model(model_, ov::test::utils::DEVICE_GPU, { }); auto inferRequest = compiledModel.create_infer_request(); input_ = generateInput(inputShape_); @@ -129,6 +132,7 @@ TEST_P(MemoryDynamicBatch, ResetVariableState) { } TEST_P(MemoryDynamicBatch, GetVariableState) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() auto compiledModel = core_->compile_model(model_, ov::test::utils::DEVICE_GPU, { }); auto inferRequest = compiledModel.create_infer_request(); input_ = generateInput(inputShape_); @@ -145,6 +149,7 @@ TEST_P(MemoryDynamicBatch, GetVariableState) { } TEST_P(MemoryDynamicBatch, SetVariableState) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() auto compiledModel = core_->compile_model(model_, ov::test::utils::DEVICE_GPU, { }); auto inferRequest = compiledModel.create_infer_request(); input_ = generateInput(inputShape_); diff --git a/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp b/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp index 4a5270d348912d..8df502e7bb3487 100644 --- a/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/remote_blob_tests/gpu_remote_tensor_tests.cpp @@ -7,11 +7,12 @@ #include #include +#include "common_test_utils/test_assertions.hpp" +#include "openvino/core/except.hpp" #include "openvino/runtime/intel_gpu/ocl/ocl.hpp" #include "openvino/runtime/core.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" -#include #include #include #include @@ -877,8 +878,8 @@ TEST_F(OVRemoteTensor_Test, NV12toGray) { // ------------------------------------------------------ // Prepare input data - ov::Tensor fake_image = ov::test::utils::create_and_fill_tensor(ov::element::i8, {1, feature, height, width}, 50, 0, 1); - ov::Tensor fake_image_regular = ov::test::utils::create_and_fill_tensor(ov::element::f32, {1, feature, height, width}); + ov::Tensor fake_image = ov::test::utils::create_and_fill_tensor(ov::element::i8, {1, height, width, feature}, 50, 0, 1); + ov::Tensor fake_image_regular = ov::test::utils::create_and_fill_tensor(ov::element::f32, {1, height, width, feature }); auto image_ptr = static_cast(fake_image.data()); auto image_ptr_regular = static_cast(fake_image_regular.data()); @@ -943,12 +944,13 @@ TEST_F(OVRemoteTensor_Test, NV12toGray) { // ------------------------------------------------------ // regular inference - auto fn_ptr_regular = ngraph::builder::subgraph::makeConvPoolRelu({1, 1, height, width}); + auto fn_ptr_regular = ngraph::builder::subgraph::makeConvPoolRelu({1, feature, height, width}); auto p_reg = PrePostProcessor(fn_ptr_regular); p_reg.input().tensor().set_element_type(ov::element::f32) + .set_layout("NHWC") .set_memory_type(ov::intel_gpu::memory_type::buffer); - p_reg.input().model().set_layout("NHWC"); + p_reg.input().model().set_layout("NCHW"); auto function_regular = p_reg.build(); auto param_input_y_regular = function_regular->get_parameters().at(0); @@ -978,8 +980,8 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_ConvertTranspose) { // ------------------------------------------------------ // Prepare input data - ov::Tensor fake_image_data_y = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 1, height, width}, 50, 0, 1); - ov::Tensor fake_image_data_uv = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 2, height / 2, width / 2}, 256, 0, 1); + ov::Tensor fake_image_data_y = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height, width, 1}, 50, 0, 1); + ov::Tensor fake_image_data_uv = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height / 2, width / 2, 2}, 256, 0, 1); auto ie = ov::Core(); @@ -991,7 +993,7 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_ConvertTranspose) { auto p = PrePostProcessor(fn_ptr_remote); p.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(SURFACE)); + .set_memory_type(ov::intel_gpu::memory_type::surface); p.input().preprocess().convert_color(ColorFormat::BGR); p.input().model().set_layout("NCHW"); auto function = p.build(); @@ -1056,7 +1058,7 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_ConvertTranspose) { auto p_reg = PrePostProcessor(fn_ptr_regular); p_reg.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(BUFFER)); + .set_memory_type(ov::intel_gpu::memory_type::buffer); p_reg.input().preprocess().convert_color(ColorFormat::BGR); p_reg.input().model().set_layout("NCHW"); auto function_regular = p_reg.build(); @@ -1087,7 +1089,7 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_single_plane) { // ------------------------------------------------------ // Prepare input data - ov::Tensor fake_image_data_yuv = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 1, height * 3 / 2, width}, 50); + ov::Tensor fake_image_data_yuv = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height * 3 / 2, width, 1}, 50); auto ie = ov::Core(); @@ -1099,7 +1101,7 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_single_plane) { auto p = PrePostProcessor(fn_ptr_remote); p.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_SINGLE_PLANE) - .set_memory_type(GPU_CONFIG_KEY(SURFACE)); + .set_memory_type(ov::intel_gpu::memory_type::surface); p.input().preprocess().convert_color(ColorFormat::BGR); p.input().model().set_layout("NCHW"); auto function = p.build(); @@ -1147,7 +1149,7 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_single_plane) { auto p_reg = PrePostProcessor(fn_ptr_regular); p_reg.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_SINGLE_PLANE) - .set_memory_type(GPU_CONFIG_KEY(BUFFER)); + .set_memory_type(ov::intel_gpu::memory_type::buffer); p_reg.input().preprocess().convert_color(ColorFormat::BGR); p_reg.input().model().set_layout("NCHW"); auto function_regular = p_reg.build(); @@ -1177,8 +1179,8 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_two_planes) { // ------------------------------------------------------ // Prepare input data - ov::Tensor fake_image_data_y = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 1, height, width}, 50, 0, 1); - ov::Tensor fake_image_data_uv = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 2, height / 2, width / 2}, 256, 0, 1); + ov::Tensor fake_image_data_y = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height, width, 1}, 50, 0, 1); + ov::Tensor fake_image_data_uv = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height / 2, width / 2, 2}, 256, 0, 1); auto ie = ov::Core(); @@ -1190,7 +1192,7 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_two_planes) { auto p = PrePostProcessor(fn_ptr_remote); p.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(SURFACE)); + .set_memory_type(ov::intel_gpu::memory_type::surface); p.input().preprocess().convert_color(ColorFormat::BGR); p.input().model().set_layout("NCHW"); auto function = p.build(); @@ -1255,7 +1257,7 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_image_two_planes) { auto p_reg = PrePostProcessor(fn_ptr_regular); p_reg.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(BUFFER)); + .set_memory_type(ov::intel_gpu::memory_type::buffer); p_reg.input().preprocess().convert_color(ColorFormat::BGR); p_reg.input().model().set_layout("NCHW"); auto function_regular = p_reg.build(); @@ -1286,8 +1288,8 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_buffer) { // ------------------------------------------------------ // Prepare input data - ov::Tensor fake_image_data_y = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 1, height, width}, 50, 0, 1); - ov::Tensor fake_image_data_uv = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 2, height / 2, width / 2}, 256, 0, 1); + ov::Tensor fake_image_data_y = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height, width, 1}, 50, 0, 1); + ov::Tensor fake_image_data_uv = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height / 2, width / 2, 2}, 256, 0, 1); auto ie = ov::Core(); @@ -1297,7 +1299,7 @@ TEST_F(OVRemoteTensor_Test, NV12toBGR_buffer) { auto p = PrePostProcessor(fn_ptr_remote); p.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(BUFFER)); + .set_memory_type(ov::intel_gpu::memory_type::buffer); p.input().preprocess().convert_color(ColorFormat::BGR); p.input().model().set_layout("NCHW"); auto function = p.build(); @@ -1389,7 +1391,7 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_image_single_plane) { std::vector fake_image_data_yuv; for (size_t i = 0; i < num_batch; i++) { fake_image_data_yuv.push_back(ov::test::utils::create_and_fill_tensor( - ov::element::u8, {1, 1, height * 3 / 2, width}, 50, 0, 1, static_cast(i))); + ov::element::u8, {1, height * 3 / 2, width, 1}, 50, 0, 1, static_cast(i))); } auto ie = ov::Core(); @@ -1402,7 +1404,7 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_image_single_plane) { auto p = PrePostProcessor(fn_ptr_remote); p.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_SINGLE_PLANE) - .set_memory_type(GPU_CONFIG_KEY(SURFACE)); + .set_memory_type(ov::intel_gpu::memory_type::surface); p.input().preprocess().convert_color(ColorFormat::BGR); p.input().model().set_layout("NCHW"); auto function = p.build(); @@ -1460,7 +1462,7 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_image_single_plane) { auto p_reg = PrePostProcessor(fn_ptr_regular); p_reg.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_SINGLE_PLANE) - .set_memory_type(GPU_CONFIG_KEY(BUFFER)); + .set_memory_type(ov::intel_gpu::memory_type::buffer); p_reg.input().preprocess().convert_color(ColorFormat::BGR); p_reg.input().model().set_layout("NCHW"); auto function_regular = p_reg.build(); @@ -1495,9 +1497,9 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_image_two_planes) { // Prepare input data std::vector fake_image_data_y, fake_image_data_uv; for (size_t i = 0; i < num_batch; i++) { - fake_image_data_y.push_back(ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 1, height, width}, 50, 0, 1, static_cast(i))); + fake_image_data_y.push_back(ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height, width, 1}, 50, 0, 1, static_cast(i))); fake_image_data_uv.push_back(ov::test::utils::create_and_fill_tensor( - ov::element::u8, {1, 2, height / 2, width / 2}, 256, 0, 1, static_cast(i))); + ov::element::u8, {1, height / 2, width / 2, 2}, 256, 0, 1, static_cast(i))); } auto ie = ov::Core(); @@ -1510,7 +1512,7 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_image_two_planes) { auto p = PrePostProcessor(fn_ptr_remote); p.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(SURFACE)); + .set_memory_type(ov::intel_gpu::memory_type::surface); p.input().preprocess().convert_color(ColorFormat::BGR); p.input().model().set_layout("NCHW"); auto function = p.build(); @@ -1584,7 +1586,7 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_image_two_planes) { auto p_reg = PrePostProcessor(fn_ptr_regular); p_reg.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(BUFFER)); + .set_memory_type(ov::intel_gpu::memory_type::buffer); p_reg.input().preprocess().convert_color(ColorFormat::BGR); p_reg.input().model().set_layout("NCHW"); auto function_regular = p_reg.build(); @@ -1623,8 +1625,8 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toGray) { std::vector fake_image; std::vector fake_image_regular; for (size_t i = 0; i < num_batch; i++) { - auto tensor_image = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, feature, height, width}, 50, 0, 1, static_cast(i)); - auto tensor_regular = ov::test::utils::create_and_fill_tensor(ov::element::f32, {1, feature, height, width}); + auto tensor_image = ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height, width, feature}, 50, 0, 1, static_cast(i)); + auto tensor_regular = ov::test::utils::create_and_fill_tensor(ov::element::f32, {1, feature, height, width }); auto image_ptr = static_cast(tensor_image.data()); auto image_ptr_regular = static_cast(tensor_regular.data()); // Apply NV12 (Surface) -> Gray conversion for regular blob @@ -1739,9 +1741,9 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_buffer) { // Prepare input data std::vector fake_image_data_y, fake_image_data_uv; for (size_t i = 0; i < num_batch * 2; ++i) { - fake_image_data_y.push_back(ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, 1, height, width}, 50, 0, 1, static_cast(i))); + fake_image_data_y.push_back(ov::test::utils::create_and_fill_tensor(ov::element::u8, {1, height, width, 1}, 50, 0, 1, static_cast(i))); fake_image_data_uv.push_back(ov::test::utils::create_and_fill_tensor( - ov::element::u8, {1, 2, height / 2, width / 2}, 256, 0, 1, static_cast(i))); + ov::element::u8, {1, height / 2, width / 2, 2}, 256, 0, 1, static_cast(i))); } auto ie = ov::Core(); @@ -1754,7 +1756,7 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_buffer) { auto p = PrePostProcessor(fn_ptr_remote); p.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(BUFFER)); + .set_memory_type(ov::intel_gpu::memory_type::buffer); p.input().preprocess().convert_color(ColorFormat::BGR); p.input().model().set_layout("NCHW"); auto function = p.build(); @@ -1856,7 +1858,7 @@ TEST_P(OVRemoteTensorBatched_Test, NV12toBGR_buffer) { auto p_reg = PrePostProcessor(fn_ptr_regular); p_reg.input().tensor().set_element_type(ov::element::u8) .set_color_format(ColorFormat::NV12_TWO_PLANES, {"y", "uv"}) - .set_memory_type(GPU_CONFIG_KEY(BUFFER)); + .set_memory_type(ov::intel_gpu::memory_type::buffer); p_reg.input().preprocess().convert_color(ColorFormat::BGR); p_reg.input().model().set_layout("NCHW"); auto function_regular = p_reg.build(); @@ -1931,6 +1933,14 @@ TEST(OVRemoteContextGPU, smoke_CustomContextDeviceNames) { } } +TEST(OVRemoteContextGPU, smoke_CantCreateContextForNullHandle) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + auto core = ov::Core(); + OV_EXPECT_THROW(ov::intel_gpu::ocl::ClContext(core, nullptr, 0), ov::Exception, HasSubstr("Can't create shared OCL context as user handle is nullptr!")); +} + TEST(OVRemoteContextGPU, smoke_RemoteContextPerDevice) { #if defined(ANDROID) GTEST_SKIP(); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 5ab84c9013f8aa..ef9e93a3f4f298 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -9,14 +9,6 @@ std::vector disabledTestPatterns() { return { - // TODO: Issue: 39612 - R"(.*Interpolate.*cubic.*tf_half_pixel_for_nn.*FP16.*)", - // TODO: Issue: 43794 - R"(.*(PreprocessTest).*(SetScalePreProcessSetBlob).*)", - R"(.*(PreprocessTest).*(SetScalePreProcessGetBlob).*)", - R"(.*(PreprocessTest).*(SetMeanValuePreProcessSetBlob).*)", - R"(.*(PreprocessTest).*(SetMeanImagePreProcessSetBlob).*)", - R"(.*(PreprocessTest).*(ReverseInputChannelsPreProcessGetBlob).*)", R"(.*(InferRequestPreprocessDynamicallyInSetBlobTest).*)", // TODO: Issue: 46841 R"(.*(QuantGroupConvBackpropData3D).*)", @@ -39,8 +31,6 @@ std::vector disabledTestPatterns() { R"(.*ConstantResultSubgraphTest.*inPrc=I16.*)", // TODO: Issue: 54194 R"(.*ActivationLayerTest.*SoftPlus.*)", - // need to implement Export / Import - R"(.*IEClassImportExportTestP.*)", R"(.*Behavior.*InferRequestSetBlobByType.*Device=HETERO.*)", // TODO: Issue: 59586, NormalizeL2 output mismatch for empty axes case R"(.*NormalizeL2LayerTest.*axes=\(\).*)", @@ -51,15 +41,11 @@ std::vector disabledTestPatterns() { R"(.*Behavior.*(Multi|Auto).*InferRequestSetBlobByType.*Batched.*)", R"(.*(Multi|Auto).*Behavior.*InferRequestIOBBlobTest.*canProcessDeallocatedOutputBlobAfterGetAndSetBlob.*)", // TODO Issue 100145 - R"(.*Behavior.*InferRequestIOBBlobTest.*canReallocateExternalBlobViaGet.*)", R"(.*Behavior.*OVInferRequestIOTensorTest.*canInferAfterIOBlobReallocation.*)", R"(.*Behavior.*OVInferRequestDynamicTests.*InferUpperBoundNetworkAfterIOTensorsReshaping.*)", R"(.*(Auto|Multi).*Behavior.*IncorrectConfigTests.*CanNotLoadNetworkWithIncorrectConfig.*)", // Not implemented yet: R"(.*Behavior.*ExecutableNetworkBaseTest.*canSetConfigToExecNet.*)", - R"(.*Behavior.*ExecutableNetworkBaseTest.*canExport.*)", - R"(.*OVCompiledModelBaseTest.*CanSetConfigToExecNet.*)", - R"(.*OVCompiledModelBaseTest.*CanSetConfigToExecNetAndCheckConfigAndCheck.*)", // TODO: Issue 67408 R"(.*smoke_LSTMSequenceCommonClip.*LSTMSequenceTest.*CompareWithRefs.*)", // TODO: Issue 114262 @@ -68,17 +54,10 @@ std::vector disabledTestPatterns() { R"(.*EltwiseLayerTest.*OpType=Pow.*NetType=i64.*)", // TODO: Issue: 68712 R"(.*.MatMul.*CompareWithRefs.*IS0=\(1.5\)_IS1=\(1.5\).*transpose_a=0.*transpose_b=1.*CONSTANT.*FP16.*UNSPECIFIED.*UNSPECIFIED.*ANY.*)", - // TODO: Issue 69187 - R"(smoke_PrePostProcess.*cvt_color_nv12.*)", - // TODO: Issue 71215 - R"(smoke_PrePostProcess.*cvt_color_i420.*)", // Unsupported R"(smoke_Behavior/InferRequestSetBlobByType.setInputBlobsByType/BlobType=Batched_Device=GPU_Config=().*)", // TODO: Issue 72624 R"(smoke_PrePostProcess.*resize_dynamic.*)", - // Issue: CVS-66778 - R"(.*smoke_Auto_BehaviorTests.*DynamicOutputToDynamicInput.*)", - R"(.*smoke_Auto_BehaviorTests.*DynamicInputToDynamicOutput.*)", // Dynamic batch allocates output using upper bound R"(.*smoke_BehaviorTests.*InferUpperBoundNetworkWithGetTensor.*)", // need dynamic shapes @@ -89,16 +68,8 @@ std::vector disabledTestPatterns() { R"(.*smoke.*BehaviorTests.*DynamicInputToDynamicOutput.*)", // Issue: 76197 R"(.*registerPluginsXMLUnicodePath.*)", - // Not supported yet - R"(.*CompileModelCacheTestBase.*ConvBias.*)", - R"(.*CompileModelCacheTestBase.*KSOFunction.*)", - R"(.*LoadNetworkCacheTestBase.*)", - // Issue: 83014 - R"(.*smoke_RemoteBlob.*canInferOnUserQueue.*)", // Issue: CVS-76980 R"(.*smoke_Auto_BehaviorTests.*InferDynamicNetwork/.*)", - // Issue: CVS-86976 - R"(.*smoke_VirtualPlugin_BehaviorTests.*LoadedRemoteContext.*)", // Issue: CVS-88667 - Need to verify hetero interoperability R"(.*nightly_OVClassHeteroExecutableNetworlGetMetricTest.*SUPPORTED_(CONFIG_KEYS|METRICS).*)", // TODO: Issue: 89555 @@ -132,10 +103,10 @@ std::vector disabledTestPatterns() { R"(.*(Auto|Multi).*InferRequestPreprocessTest.*SetPreProcessToInputInfo.*)", R"(.*(Auto|Multi).*InferRequestPreprocessTest.*SetPreProcessToInferRequest.*)", // New plugin work with tensors, so it means that blob in old API can have different pointers - R"(.*(Auto|Multi).*InferRequestIOBBlobTest.*secondCallGetInputDoNotReAllocateData.*)", - R"(.*(Auto|Multi).*InferRequestIOBBlobTest.*secondCallGetOutputDoNotReAllocateData.*)", - R"(.*(Auto|Multi).*InferRequestIOBBlobTest.*secondCallGetInputAfterInferSync.*)", - R"(.*(Auto|Multi).*InferRequestIOBBlobTest.*secondCallGetOutputAfterInferSync.*)", + R"(.*InferRequestIOBBlobTest.*secondCallGetInputDoNotReAllocateData.*)", + R"(.*InferRequestIOBBlobTest.*secondCallGetOutputDoNotReAllocateData.*)", + R"(.*InferRequestIOBBlobTest.*secondCallGetInputAfterInferSync.*)", + R"(.*InferRequestIOBBlobTest.*secondCallGetOutputAfterInferSync.*)", // For some strange reason (bug?) output format cannot have a rank greater than 4 for dynamic shape case, // because it crashes in some random places during "reorder_inputs" pass. R"(.*UniqueLayerDynamicGPUTest.*\(\d*\.\d*\.\d*\.\d*\.\d*\).*axis.*)", @@ -143,5 +114,21 @@ std::vector disabledTestPatterns() { // Plugin version was changed to ov::Version R"(.*VersionTest.*pluginCurrentVersionIsCorrect.*)", #endif + // Old API cannot deallocate tensor + R"(.*InferRequestIOBBlobTest.*canProcessDeallocatedOutputBlobAfterGetAndSetBlob.*)", + // Issue: 113704 - Layout information maybe incorrect when covert tensor to blob + R"(.*smoke_.*BehaviorTests/InferRequestPreprocessConversionTest.*NHWC.*)", + // Issue: Disabled due to LPT precision matching issue + R"(.*smoke_.*FakeQuantizeTransformation.*)", + R"(.*smoke_LPT.*ReshapeTransformation.*)", + R"(.*smoke_LPT.*ConvolutionTransformation.*)", + R"(.*smoke_LPT.*MatMulWithConstantTransformation.*)", + R"(.*smoke_LPT.*PullReshapeThroughDequantizationTransformation.*)", + R"(.*smoke_LPT.*ElementwiseBranchSelectionTransformation.*)", + // Dynamic state unsupported for now + R"(.*MemoryDynamicBatch.*)", + // Meta plugins may miss saving HW plugin so handle, thus plugin may be unloaded before all objects are deleted which leads to segfault + // Issue: 118840 + R"(.*OVHoldersTest.*)", }; } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_reduce_reshape.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_reduce_reshape.cpp index dd6d1f26656875..fd6db3767919fe 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_reduce_reshape.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_smoke_test_shape_of_reduce_reshape.cpp @@ -143,7 +143,7 @@ const std::vector> dynInputShapes = { // Input for ShapeOf {{ov::Dimension::dynamic(), ov::Dimension::dynamic(), ov::Dimension::dynamic()}, {{1, 10, 4}, {1, 4, 12}}}, // Input for Add - {{ov::Dimension::dynamic()}, {{1, 10, 4}, {2, 2, 12}}} + {{ov::Dimension::dynamic(), ov::Dimension::dynamic(), ov::Dimension::dynamic()}, {{1, 10, 4}, {2, 2, 12}}} }, // 4D { diff --git a/src/plugins/intel_gpu/tests/unit/passes/test_module_fusing_reorder.cpp b/src/plugins/intel_gpu/tests/unit/passes/test_module_fusing_reorder.cpp index 5f1a0a046478e5..7c36d65fcf9659 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/test_module_fusing_reorder.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/test_module_fusing_reorder.cpp @@ -313,11 +313,10 @@ TEST_P(can_fuse_reorder, surface_input_reorder) { std::tie(req_data_type, req_format) = GetParam(); const auto reorder_prim_id = "surface_input_reorder_prim"; - auto in_size = tensor{1, 1, 8, 8}; auto weights_size = tensor{32, 1, 8, 8}; - auto in_layout = layout(data_types::u8, format::nv12, in_size); + auto in_layout = layout({1, 8, 8, 1}, data_types::u8, format::nv12); // Set data type the same as input's data type - auto reorder_layout = layout(data_types::u8, format::bfyx, in_size); + auto reorder_layout = layout({1, 8, 8, 1}, data_types::u8, format::bfyx); auto input_data = engine.allocate_memory({ in_layout }); auto weights_dt = req_data_type == data_types::u8 ? data_types::i8 : req_data_type; @@ -328,9 +327,10 @@ TEST_P(can_fuse_reorder, surface_input_reorder) { auto surface_input_reorder_prim = reorder(reorder_prim_id, input_info("input"), reorder_layout); surface_input_reorder_prim.input_mem_type = reorder::memory_type::surface; auto conv_input_reorder_prim = reorder("reorder_conv", input_info(reorder_prim_id), req_format, req_data_type); - auto conv_prim = cldnn::convolution("conv", input_info("reorder_conv"), "weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false); + auto transpose = permute("permute", input_info("reorder_conv"), {0, 3, 1, 2}); + auto conv_prim = cldnn::convolution("conv", input_info("permute"), "weights", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false); - topology.add(input_layout_prim, weights_data_prim, surface_input_reorder_prim, conv_input_reorder_prim, conv_prim); + topology.add(input_layout_prim, weights_data_prim, surface_input_reorder_prim, conv_input_reorder_prim, transpose, conv_prim); ExecutionConfig cfg = get_test_default_config(engine); cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); @@ -370,11 +370,10 @@ TEST_P(can_fuse_reorder, surface_input_reorder_batched) { const auto reorder_prim_id1 = "surface_input_reorder_prim1"; const auto reorder_prim_id2 = "surface_input_reorder_prim2"; - auto in_size = tensor{1, 1, 8, 8}; auto weights_size = tensor{32, 1, 8, 8}; - auto in_layout = layout(data_types::u8, format::nv12, in_size); + auto in_layout = layout({1, 8, 8, 1}, data_types::u8, format::nv12); // Set data type the same as input's data type - auto reorder_layout = layout(data_types::u8, format::bfyx, in_size); + auto reorder_layout = layout({1, 8, 8, 1}, data_types::u8, format::bfyx); auto input_data = engine.allocate_memory({ in_layout }); auto weights_dt = req_data_type == data_types::u8 ? data_types::i8 : req_data_type; @@ -389,10 +388,11 @@ TEST_P(can_fuse_reorder, surface_input_reorder_batched) { surface_input_reorder_prim2.input_mem_type = reorder::memory_type::surface; auto concat = concatenation("concat",{ input_info(reorder_prim_id1),input_info(reorder_prim_id2) }, 0); auto conv_input_reorder_prim = reorder("reorder_conv", input_info("concat"), req_format, req_data_type); - auto conv_prim = cldnn::convolution("conv", input_info("reorder_conv"), "weights" , "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false); + auto transpose = permute("permute", input_info("reorder_conv"), {0, 3, 1, 2}); + auto conv_prim = cldnn::convolution("conv", input_info("permute"), "weights" , "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false); topology.add(input_layout_prim1, input_layout_prim2, weights_data_prim, - surface_input_reorder_prim1, surface_input_reorder_prim2, + surface_input_reorder_prim1, surface_input_reorder_prim2, transpose, conv_input_reorder_prim, concat, conv_prim); ExecutionConfig cfg = get_test_default_config(engine); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/cl_mem_input_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/cl_mem_input_test.cpp index fde46f16995a17..e8a9d4c81c0a7e 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/cl_mem_input_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/cl_mem_input_test.cpp @@ -14,259 +14,6 @@ using namespace cldnn; using namespace ::tests; -typedef std::chrono::high_resolution_clock Time; -typedef std::chrono::nanoseconds ns; -typedef std::chrono::duration> ms; -typedef std::chrono::duration fsec; - -namespace { -std::vector createSampleData(int width, int height) { - int data_size = width * (height + height / 2); - auto data = std::vector(data_size); - srand((unsigned)time(0)); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - data[i * width + j] = (i + j) % 255; - } - } - for (int i = 0; i < height / 2; i++) { - for (int j = 0; j < width; j += 2) { - data[width * height + i * width + j] = (i + j) % 255; - data[width * height + i * width + j + 1] = (i + j) % 255; - } - } - - return data; -} - -std::vector createReferenceData(std::vector data, int width, int height, cldnn::format format) { - auto img = std::vector(width * height * 3); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - int y_comp = data[i * width + j]; - int u_comp = data[width * height + i / 2 * width + ((j >> 1) << 1)]; - int v_comp = data[width * height + i / 2 * width + ((j >> 1) << 1) + 1]; - - float B = (1.164f * (float)(y_comp - 16) + 1.596f * (float)(v_comp - 128)); - float G = (1.164f * (float)(y_comp - 16) - 0.813f * (float)(v_comp - 128) - 0.391f * (u_comp - 128)); - float R = (1.164f * (float)(y_comp - 16) + 2.018f * (float)(u_comp - 128)); - - R = std::min(std::max(R, 0.f), 255.f); - G = std::min(std::max(G, 0.f), 255.f); - B = std::min(std::max(B, 0.f), 255.f); - - if (format == cldnn::format::bfyx) { - img[j + width * i] = R; - img[j + width * i + width * height] = G; - img[j + width * i + width * height * 2] = B; - } else { //byxf - img[3* width*i + 3 * j] = R; - img[3 * width * i + 3 * j + 1] = G; - img[3 * width*i + 3 * j + 2] = B; - } - } - } - - return img; -} -} // namespace - -template -void start_cl_mem_check_2_inputs(bool is_caching_test) { - device_query query(engine_types::ocl, runtime_types::ocl); - auto devices = query.get_available_devices(); - ASSERT_TRUE(!devices.empty()); - - auto iter = devices.find(std::to_string(device_query::device_id)); - auto& device = iter != devices.end() ? iter->second : devices.begin()->second; - auto engine = engine::create(engine_types::ocl, runtime_types::ocl, device); - - auto ocl_instance = std::make_shared(std::dynamic_pointer_cast(device)->get_device()); - int width = 224; - int height = 224; - cl_int err; - - if (!device->get_info().supports_image) - GTEST_SKIP(); - - auto data = createSampleData(width, height); - cl_image_format image_format; - image_format.image_channel_order = CL_R; - image_format.image_channel_data_type = CL_UNORM_INT8; - cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)height, 0, - 0, 0, 0, 0, 0, { nullptr } }; - - cl_mem nv12_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); - checkStatus(err, "Creating nv12 image plane_y failed"); - - image_format.image_channel_order = CL_RG; - image_desc.image_width = width / 2; - image_desc.image_height = height / 2; - image_desc.image_depth = 1; - - cl_mem nv12_image_plane_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); - checkStatus(err, "Creating nv12 image plane_uv failed"); - - size_t origin[3] = { 0, 0, 0 }; - size_t y_region[3] = { (size_t)width, (size_t)height, 1 }; - size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 }; - - err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, nullptr, nullptr); - checkStatus(err, "Writing nv12 image plane_y failed"); - - err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr); - checkStatus(err, "Writing nv12 image plane_uv failed"); - - auto input = input_layout("input", { data_types::i8, format::nv12, {1,1,height,width} }); - auto input2 = input_layout("input2", { data_types::i8, format::nv12, {1,1,height / 2,width / 2} }); - auto output_format = cldnn::format::byxf; - layout output_layout(data_types::f32, output_format, { 1,3,height,width }); - auto input_memory = engine->share_image(input.layout, nv12_image_plane_y); - auto input_memory2 = engine->share_image(input2.layout, nv12_image_plane_uv); - - topology topology; - topology.add(input); - topology.add(input2); - topology.add(reorder("reorder", input_info("input"), input_info("input2"), output_layout)); - - cldnn::network::ptr network = get_network(*engine, topology, get_test_default_config(*engine), get_test_stream_ptr(), is_caching_test); - - network->set_input_data("input", input_memory); - network->set_input_data("input2", input_memory2); - - auto outputs = network->execute(); - - std::vector reference_results = createReferenceData(data, width, height, output_format); - auto output_prim = outputs.begin()->second.get_memory(); - cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - int size = width * height * 3; - for (auto i = 0; i < size; i++) { - ASSERT_NEAR(reference_results[i], output_ptr[i], 1.001f); - } - checkStatus(clReleaseMemObject(nv12_image_plane_uv), "clReleaseMemObject"); - checkStatus(clReleaseMemObject(nv12_image_plane_y), "clReleaseMemObject"); -} - -TEST(cl_mem_check, check_2_inputs) { - start_cl_mem_check_2_inputs(false); -} - -TEST(export_import_cl_mem_check, check_2_inputs) { - start_cl_mem_check_2_inputs(true); -} - -TEST(cl_mem_check, check_input) { - device_query query(engine_types::ocl, runtime_types::ocl); - auto devices = query.get_available_devices(); - ASSERT_TRUE(!devices.empty()); - auto iter = devices.find(std::to_string(device_query::device_id)); - auto& device = iter != devices.end() ? iter->second : devices.begin()->second; - auto engine = engine::create(engine_types::ocl, runtime_types::ocl, device); - auto ocl_instance = std::make_shared(std::dynamic_pointer_cast(device)->get_device()); - - if (!device->get_info().supports_intel_planar_yuv) - GTEST_SKIP(); - - int width = 224; - int height = 224; - cl_int err; - - auto data = createSampleData(width, height); - cl_image_format image_format; - image_format.image_channel_order = CL_R; - image_format.image_channel_data_type = CL_UNORM_INT8; - cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)height, 0, - 0, 0, 0, 0, 0, { nullptr } }; - - cl_mem nv12_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); - checkStatus(err, "Creating nv12 image plane_y failed"); - - image_format.image_channel_order = CL_RG; - image_desc.image_width = width / 2; - image_desc.image_height = height / 2; - - cl_mem nv12_image_plane_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); - checkStatus(err, "Creating nv12 image plane_uv failed"); - - size_t origin[3] = { 0, 0, 0 }; - size_t y_region[3] = { (size_t)width, (size_t)height, 1 }; - size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 }; - - err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, nullptr, nullptr); - checkStatus(err, "Writing nv12 image plane_y failed"); - - err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr); - checkStatus(err, "Writing nv12 image plane_uv failed"); - - image_format.image_channel_order = CL_NV12_INTEL; - image_format.image_channel_data_type = CL_UNORM_INT8; - - image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; - image_desc.image_width = width; - image_desc.image_height = height; - image_desc.image_array_size = 0; - image_desc.image_row_pitch = 0; - image_desc.image_slice_pitch = 0; - image_desc.num_mip_levels = 0; - image_desc.num_samples = 0; - image_desc.buffer = NULL; - - cl_mem img = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, - &image_format, &image_desc, NULL, &err); - checkStatus(err, "Creating nv12 image failed"); - - image_desc.image_width = 0; - image_desc.image_height = 0; - image_desc.buffer = img; - image_desc.image_depth = 0; - image_format.image_channel_order = CL_R; - - cl_mem img_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); - checkStatus(err, "Creating nv12 image plane_y failed"); - - image_desc.image_depth = 1; - image_format.image_channel_order = CL_RG; - cl_mem img_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); - checkStatus(err, "Creating nv12 image plane_uv failed"); - - size_t regionY[] = { (size_t)width, (size_t)height, 1 }; - size_t regionUV[] = { (size_t)width / 2, (size_t)height / 2, 1 }; - - err = clEnqueueCopyImage(ocl_instance->_queue.get(), nv12_image_plane_y, img_y, origin, origin, regionY, 0, 0, 0); - checkStatus(err, "clEnqueueCopyImage"); - - cl_event event_out; - err = clEnqueueCopyImage(ocl_instance->_queue.get(), nv12_image_plane_uv, img_uv, origin, origin, regionUV, 0, 0, &event_out); - checkStatus(err, "clEnqueueCopyImage"); - - checkStatus(clReleaseMemObject(nv12_image_plane_uv), "clReleaseMemObject"); - checkStatus(clReleaseMemObject(nv12_image_plane_y), "clReleaseMemObject"); - - auto input = input_layout("input", { data_types::i8, format::nv12, {1,1,height,width} }); - auto output_format = cldnn::format::byxf; - layout output_layout(data_types::f32, output_format, { 1,3,height,width }); - auto input_memory = engine->share_image(input.layout, img); - - topology topology; - - topology.add(input); - topology.add(reorder("reorder", input_info("input"), output_layout)); - - network network(*engine, topology, get_test_default_config(*engine)); - network.set_input_data("input", input_memory); - - auto outputs = network.execute(); - - std::vector reference_results = createReferenceData(data, width, height, output_format); - auto output_prim = outputs.begin()->second.get_memory(); - cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - int size = width * height * 3; - for (auto i = 0; i < size; i++) { - ASSERT_NEAR(reference_results[i], output_ptr[i], 1.001f); - } - checkStatus(clReleaseMemObject(img), "clReleaseMemObject"); -} - TEST(cl_mem_check, check_write_access_type) { device_query query(engine_types::ocl, runtime_types::ocl); auto devices = query.get_available_devices(); @@ -285,7 +32,7 @@ TEST(cl_mem_check, check_write_access_type) { std::vector src_buffer(values_count); std::iota(src_buffer.begin(), src_buffer.end(), 0.0f); - cldnn::layout linear_layout = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, cldnn::tensor(1, 1, int32_t(values_count), 1)); + cldnn::layout linear_layout{{ov::Dimension(values_count)}, cldnn::data_types::f32, cldnn::format::bfyx}; auto cldnn_mem_src = engine->allocate_memory(linear_layout, cldnn::allocation_type::cl_mem); { cldnn::mem_lock lock(cldnn_mem_src, *stream); @@ -323,7 +70,7 @@ TEST(cl_mem_check, check_read_access_type) { std::vector src_buffer(values_count); std::iota(src_buffer.begin(), src_buffer.end(), 0.0f); - cldnn::layout linear_layout = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, cldnn::tensor(1, 1, int32_t(values_count), 1)); + cldnn::layout linear_layout{{ov::Dimension(values_count)}, cldnn::data_types::f32, cldnn::format::bfyx}; auto cldnn_mem_src = engine->allocate_memory(linear_layout, cldnn::allocation_type::cl_mem); { cldnn::mem_lock lock(cldnn_mem_src, *stream); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convert_color_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convert_color_gpu_test.cpp index d6b621a40b40a0..75f4ad71d99f9f 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convert_color_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convert_color_gpu_test.cpp @@ -66,8 +66,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp32) { int width = 224; int height = 448; - auto input_y = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, height } }); - auto input_uv = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 2, width / 2 , height / 2 } }); + auto input_y = engine.allocate_memory({ { 1, height, width, 1 }, data_types::f32, format::bfyx }); + auto input_uv = engine.allocate_memory({ { 1, height / 2 , width / 2, 2 }, data_types::f32, format::bfyx}); std::vector input_y_data = rg.generate_random_1d(width * height, 0, 255); std::vector input_uv_data = rg.generate_random_1d(width * height / 2, 0, 255); @@ -75,13 +75,14 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp32) { set_values(input_y, input_y_data); set_values(input_uv, input_uv_data); - layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height }); - topology topology; topology.add(input_layout("input_y", input_y->get_layout())); topology.add(input_layout("input_uv", input_uv->get_layout())); - topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::buffer, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input_y"), input_info("input_uv") }, + cldnn::convert_color::color_format::NV12, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::buffer)); network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); @@ -106,8 +107,8 @@ TEST(convert_color, nv12_to_bgr_two_planes_buffer_fp32) { int width = 224; int height = 224; - auto input_y = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, height } }); - auto input_uv = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 2, width / 2 , height / 2 } }); + auto input_y = engine.allocate_memory({ { 1, height, width, 1 }, data_types::f32, format::bfyx }); + auto input_uv = engine.allocate_memory({ { 1, height / 2 , width / 2, 2 }, data_types::f32, format::bfyx}); std::vector input_y_data = rg.generate_random_1d(width * height, 0, 255); std::vector input_uv_data = rg.generate_random_1d(width * height / 2, 0, 255); @@ -115,13 +116,14 @@ TEST(convert_color, nv12_to_bgr_two_planes_buffer_fp32) { set_values(input_y, input_y_data); set_values(input_uv, input_uv_data); - layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height }); - topology topology; topology.add(input_layout("input_y", input_y->get_layout())); topology.add(input_layout("input_uv", input_uv->get_layout())); - topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR, - cldnn::convert_color::memory_type::buffer, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input_y"), input_info("input_uv") }, + cldnn::convert_color::color_format::NV12, + cldnn::convert_color::color_format::BGR, + cldnn::convert_color::memory_type::buffer)); network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); @@ -147,8 +149,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_u8) { int width = 224; int height = 224; - auto input_y = engine.allocate_memory({ data_types::u8, format::byxf, { 1, 1, width, height } }); - auto input_uv = engine.allocate_memory({ data_types::u8, format::byxf, { 1, 2, width / 2 , height / 2 } }); + auto input_y = engine.allocate_memory({ { 1, height, width, 1 }, data_types::u8, format::bfyx }); + auto input_uv = engine.allocate_memory({ { 1, height / 2 , width / 2, 2 }, data_types::u8, format::bfyx}); std::vector input_y_data = rg.generate_random_1d(width * height, 0, 255); std::vector input_uv_data = rg.generate_random_1d(width * height / 2, 0, 255); @@ -156,13 +158,14 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_u8) { set_values(input_y, input_y_data); set_values(input_uv, input_uv_data); - layout output_layout(data_types::u8, cldnn::format::byxf, { 1, 3, width, height }); - topology topology; topology.add(input_layout("input_y", input_y->get_layout())); topology.add(input_layout("input_uv", input_uv->get_layout())); - topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::buffer, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input_y"), input_info("input_uv") }, + cldnn::convert_color::color_format::NV12, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::buffer)); network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); @@ -188,8 +191,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) { int width = 224; int height = 224; - auto input_y = engine.allocate_memory({ data_types::f16, format::byxf, { 1, 1, width, height } }); - auto input_uv = engine.allocate_memory({ data_types::f16, format::byxf, { 1, 2, width / 2 , height / 2 } }); + auto input_y = engine.allocate_memory({ { 1, height, width, 1 }, data_types::f16, format::bfyx }); + auto input_uv = engine.allocate_memory({ { 1, height / 2 , width / 2, 2 }, data_types::f16, format::bfyx}); std::vector input_y_data = rg.generate_random_1d(width * height, 0, 255); std::vector input_uv_data = rg.generate_random_1d(width * height / 2, 0, 255); @@ -197,13 +200,14 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) { set_values(input_y, input_y_data); set_values(input_uv, input_uv_data); - layout output_layout(data_types::f16, cldnn::format::byxf, { 1, 3, width, height }); - topology topology; topology.add(input_layout("input_y", input_y->get_layout())); topology.add(input_layout("input_uv", input_uv->get_layout())); - topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::buffer, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input_y"), input_info("input_uv") }, + cldnn::convert_color::color_format::NV12, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::buffer)); network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); @@ -231,18 +235,19 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_fp32) { int height = 448; int input_height = height + height / 2; - auto input = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, input_height } }); + auto input = engine.allocate_memory({{ 1, input_height, width, 1 }, data_types::f32, format::bfyx}); int data_size = width * (height + height / 2); std::vector input_data = rg.generate_random_1d(data_size, 0, 255); set_values(input, input_data); - layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height }); - topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::buffer, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input") }, + cldnn::convert_color::color_format::NV12, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::buffer)); network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -268,18 +273,19 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_u8) { int height = 448; int input_height = height + height / 2; - auto input = engine.allocate_memory({ data_types::u8, format::byxf, { 1, 1, width, input_height } }); + auto input = engine.allocate_memory({{ 1, input_height, width, 1 }, data_types::u8, format::bfyx}); int data_size = width * (height + height / 2); std::vector input_data = rg.generate_random_1d(data_size, 0, 255); set_values(input, input_data); - layout output_layout(data_types::u8, cldnn::format::byxf, { 1, 3, width, height }); - topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::buffer, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input") }, + cldnn::convert_color::color_format::NV12, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::buffer)); network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -346,18 +352,19 @@ TEST(convert_color, nv12_to_rgb_two_planes_surface_u8) { err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr); checkStatus(err, "Writing nv12 image plane_uv failed"); - auto input = input_layout("input", { data_types::u8, format::nv12, { 1, 1, width, height } }); - auto input2 = input_layout("input2", { data_types::u8, format::nv12, { 1, 2, width / 2, height / 2} }); - auto output_format = cldnn::format::byxf; - layout output_layout(data_types::f32, output_format, { 1, 3, width, height }); + auto input = input_layout("input", { { 1, height, width, 1} , data_types::u8, format::nv12 }); + auto input2 = input_layout("input2", { { 1, height / 2, width / 2, 2}, data_types::u8, format::nv12 }); auto input_memory = engine->share_image(input.layout, nv12_image_plane_y); auto input_memory2 = engine->share_image(input2.layout, nv12_image_plane_uv); topology topology; topology.add(input); topology.add(input2); - topology.add(convert_color("convert_color", { input_info("input"), input_info("input2") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::image, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input"), input_info("input2") }, + cldnn::convert_color::color_format::NV12, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::image)); network network(*engine, topology, get_test_default_config(*engine)); network.set_input_data("input", input_memory); @@ -365,14 +372,14 @@ TEST(convert_color, nv12_to_rgb_two_planes_surface_u8) { auto outputs = network.execute(); - std::vector reference_results(width * height * 3); - createReferenceDataNV12(data.data(), data.data() + height * width, reference_results.data(), - 1, height, width, height * width, height * width / 2, true); + std::vector reference_results(width * height * 3); + createReferenceDataNV12(data.data(), data.data() + height * width, reference_results.data(), + 1, height, width, height * width, height * width / 2, true); auto output_prim = outputs.begin()->second.get_memory(); - cldnn::mem_lock output_ptr(output_prim, get_test_stream()); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t i = 0; i < reference_results.size(); i++) { - ASSERT_NEAR(reference_results[i], output_ptr[i], 1.001f); + ASSERT_EQ(reference_results[i], output_ptr[i]); } checkStatus(clReleaseMemObject(nv12_image_plane_uv), "clReleaseMemObject"); checkStatus(clReleaseMemObject(nv12_image_plane_y), "clReleaseMemObject"); @@ -415,29 +422,30 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) { err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image, true, origin, y_region, 0, 0, &input_data[0], 0, nullptr, nullptr); checkStatus(err, "Writing nv12 image failed"); - auto input = input_layout("input", { data_types::u8, format::nv12, { 1, 1, width, input_height } }); - auto output_format = cldnn::format::byxf; - layout output_layout(data_types::f32, output_format, { 1, 3, width, height }); + auto input = input_layout("input", {{ 1, input_height, width, 1 }, data_types::u8, format::nv12}); auto input_memory = engine->share_image(input.layout, nv12_image); topology topology; topology.add(input); - topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::image, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input") }, + cldnn::convert_color::color_format::NV12, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::image)); network network(*engine, topology, get_test_default_config(*engine)); network.set_input_data("input", input_memory); auto outputs = network.execute(); - std::vector reference_results(width * height * 3); - createReferenceDataNV12(input_data.data(), input_data.data() + height * width, reference_results.data(), + std::vector reference_results(width * height * 3); + createReferenceDataNV12(input_data.data(), input_data.data() + height * width, reference_results.data(), 1, height, width, input_height * width, input_height * width, true); auto output_prim = outputs.begin()->second.get_memory(); - cldnn::mem_lock output_ptr(output_prim, get_test_stream()); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t i = 0; i < reference_results.size(); i++) { - ASSERT_NEAR(reference_results[i], output_ptr[i], 1.001f); + ASSERT_EQ(reference_results[i], output_ptr[i]); } checkStatus(clReleaseMemObject(nv12_image), "clReleaseMemObject"); } @@ -498,9 +506,9 @@ TEST(convert_color, i420_to_rgb_three_planes_buffer_fp32) { int width = 224; int height = 448; - auto input_y = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, height } }); - auto input_u = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width / 2 , height / 2 } }); - auto input_v = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width / 2 , height / 2 } }); + auto input_y = engine.allocate_memory({ { 1, height, width, 1 }, data_types::f32, format::bfyx }); + auto input_u = engine.allocate_memory({ { 1, height / 2 , width / 2, 1 }, data_types::f32, format::bfyx }); + auto input_v = engine.allocate_memory({ { 1, height / 2 , width / 2, 1 }, data_types::f32, format::bfyx }); std::vector input_y_data = rg.generate_random_1d(width * height, 0, 255); std::vector input_u_data = rg.generate_random_1d(width * height / 4, 0, 255); @@ -510,14 +518,15 @@ TEST(convert_color, i420_to_rgb_three_planes_buffer_fp32) { set_values(input_u, input_u_data); set_values(input_v, input_v_data); - layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height }); - topology topology; topology.add(input_layout("input_y", input_y->get_layout())); topology.add(input_layout("input_u", input_u->get_layout())); topology.add(input_layout("input_v", input_v->get_layout())); - topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_u"), input_info("input_v") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::buffer, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input_y"), input_info("input_u"), input_info("input_v") }, + cldnn::convert_color::color_format::I420, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::buffer)); network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); @@ -590,11 +599,9 @@ void test_convert_color_i420_to_rgb_three_planes_surface_u8(bool is_caching_test err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_v, true, origin, uv_region, 0, 0, &data[width * (height + height / 4)], 0, nullptr, nullptr); checkStatus(err, "Writing i420 image plane_v failed"); - auto input = input_layout("input", { data_types::u8, format::nv12, { 1, 1, width, height } }); - auto input2 = input_layout("input2", { data_types::u8, format::nv12, { 1, 1, width / 2, height / 2 } }); - auto input3 = input_layout("input3", { data_types::u8, format::nv12, { 1, 1, width / 2, height / 2 } }); - auto output_format = cldnn::format::byxf; - layout output_layout(data_types::f32, output_format, { 1, 3, width, height }); + auto input = input_layout("input", { { 1, height, width, 1 }, data_types::u8, format::nv12, }); + auto input2 = input_layout("input2", { { 1, height / 2, width / 2, 1 }, data_types::u8, format::nv12 }); + auto input3 = input_layout("input3", { { 1, height / 2, width / 2, 1 }, data_types::u8, format::nv12 }); auto input_memory = engine->share_image(input.layout, i420_image_plane_y); auto input_memory2 = engine->share_image(input2.layout, i420_image_plane_u); @@ -604,8 +611,11 @@ void test_convert_color_i420_to_rgb_three_planes_surface_u8(bool is_caching_test topology.add(input); topology.add(input2); topology.add(input3); - topology.add(convert_color("convert_color", { input_info("input"), input_info("input2"), input_info("input3") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::image, output_layout)); + topology.add(convert_color("convert_color", + { input_info("input"), input_info("input2"), input_info("input3") }, + cldnn::convert_color::color_format::I420, + cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::image)); cldnn::network::ptr network = get_network(*engine, topology, get_test_default_config(*engine), get_test_stream_ptr(), is_caching_test); @@ -615,14 +625,14 @@ void test_convert_color_i420_to_rgb_three_planes_surface_u8(bool is_caching_test auto outputs = network->execute(); - std::vector reference_results(width * height * 3); - createReferenceDataI420(data.data(), data.data() + height * width, data.data() + width * (height + height / 4), reference_results.data(), + std::vector reference_results(width * height * 3); + createReferenceDataI420(data.data(), data.data() + height * width, data.data() + width * (height + height / 4), reference_results.data(), 1, height, width, height * width, height * width / 2, true); auto output_prim = outputs.begin()->second.get_memory(); - cldnn::mem_lock output_ptr(output_prim, get_test_stream()); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t i = 0; i < reference_results.size(); i++) { - ASSERT_NEAR(reference_results[i], output_ptr[i], 1.001f); + ASSERT_EQ(reference_results[i], output_ptr[i]) << " i = " << i; } checkStatus(clReleaseMemObject(i420_image_plane_y), "clReleaseMemObject"); checkStatus(clReleaseMemObject(i420_image_plane_u), "clReleaseMemObject"); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/serialization_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/serialization_test.cpp deleted file mode 100644 index e9217a4b7a44f9..00000000000000 --- a/src/plugins/intel_gpu/tests/unit/test_cases/serialization_test.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "test_utils.h" -#include "intel_gpu/graph/serialization/binary_buffer.hpp" -#include "intel_gpu/graph/serialization/string_serializer.hpp" -#include "intel_gpu/graph/serialization/utils.hpp" - -using namespace cldnn; -using namespace ::tests; - -struct ie_layout_serialization_test : testing::TestWithParam { - void run_test() { - InferenceEngine::Layout test_layout = GetParam(); - - membuf mem_buf; - { - std::ostream out_mem(&mem_buf); - BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); - - std::stringstream ss; - ss << test_layout; - ob << ss.str(); - } - { - std::istream in_mem(&mem_buf); - BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine()); - - std::string str_layout; - ib >> str_layout; - EXPECT_EQ(cldnn::serial_util::layout_from_string(str_layout), test_layout); - } - } -}; - -TEST_P(ie_layout_serialization_test, basic) { - run_test(); -} - -INSTANTIATE_TEST_SUITE_P( - gpu_serialization, - ie_layout_serialization_test, - testing::Values(InferenceEngine::Layout::ANY, - InferenceEngine::Layout::NCHW, - InferenceEngine::Layout::NHWC, - InferenceEngine::Layout::NCDHW, - InferenceEngine::Layout::NDHWC, - InferenceEngine::Layout::OIHW, - InferenceEngine::Layout::GOIHW, - InferenceEngine::Layout::OIDHW, - InferenceEngine::Layout::GOIDHW, - InferenceEngine::Layout::SCALAR, - InferenceEngine::Layout::C, - InferenceEngine::Layout::CHW, - InferenceEngine::Layout::HWC, - InferenceEngine::Layout::HW, - InferenceEngine::Layout::NC, - InferenceEngine::Layout::CN, - InferenceEngine::Layout::BLOCKED) -); diff --git a/src/plugins/proxy/dev_api/openvino/proxy/plugin.hpp b/src/plugins/proxy/dev_api/openvino/proxy/plugin.hpp index c0bc2164ee4622..251dbf925978a1 100644 --- a/src/plugins/proxy/dev_api/openvino/proxy/plugin.hpp +++ b/src/plugins/proxy/dev_api/openvino/proxy/plugin.hpp @@ -26,7 +26,7 @@ void create_plugin(std::shared_ptr& plugin); * * @return Original remote context */ -const ov::SoPtr& get_hardware_context(const ov::SoPtr& context); +ov::SoPtr get_hardware_context(const ov::SoPtr& context); /** * @brief Get wrapped remote tensor @@ -35,7 +35,7 @@ const ov::SoPtr& get_hardware_context(const ov::SoPtr& get_hardware_tensor(const ov::SoPtr& tensor); +ov::SoPtr get_hardware_tensor(const ov::SoPtr& tensor, bool unwrap = false); } // namespace proxy } // namespace ov diff --git a/src/plugins/proxy/src/infer_request.cpp b/src/plugins/proxy/src/infer_request.cpp index 7bc17b5af9afdd..affa877bb0bf95 100644 --- a/src/plugins/proxy/src/infer_request.cpp +++ b/src/plugins/proxy/src/infer_request.cpp @@ -7,6 +7,7 @@ #include #include +#include "openvino/proxy/plugin.hpp" #include "openvino/runtime/itensor.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/remote_context.hpp" @@ -57,7 +58,7 @@ ov::SoPtr ov::proxy::InferRequest::get_tensor(const ov::Output& port, const ov::SoPtr& tensor) { - m_infer_request->set_tensor(port, tensor); + m_infer_request->set_tensor(port, ov::proxy::get_hardware_tensor(tensor, true)); } std::vector> ov::proxy::InferRequest::get_tensors(const ov::Output& port) const { @@ -75,7 +76,11 @@ std::vector> ov::proxy::InferRequest::get_tensors(const o void ov::proxy::InferRequest::set_tensors(const ov::Output& port, const std::vector>& tensors) { - return m_infer_request->set_tensors(port, tensors); + std::vector> hw_tensors; + for (auto& tensor : tensors) { + hw_tensors.push_back(ov::proxy::get_hardware_tensor(tensor, true)); + } + return m_infer_request->set_tensors(port, hw_tensors); } std::vector> ov::proxy::InferRequest::query_state() const { diff --git a/src/plugins/proxy/src/remote_context.cpp b/src/plugins/proxy/src/remote_context.cpp index e1a3080441f5bc..5ce8133f334a2b 100644 --- a/src/plugins/proxy/src/remote_context.cpp +++ b/src/plugins/proxy/src/remote_context.cpp @@ -75,6 +75,6 @@ const ov::SoPtr& ov::proxy::RemoteContext::get_hardware_cont return context; } -const ov::SoPtr& ov::proxy::get_hardware_context(const ov::SoPtr& context) { +ov::SoPtr ov::proxy::get_hardware_context(const ov::SoPtr& context) { return ov::proxy::RemoteContext::get_hardware_context(context); } diff --git a/src/plugins/proxy/src/remote_tensor.cpp b/src/plugins/proxy/src/remote_tensor.cpp index 4ef5f5b3f1bdbe..49dc25fcebd92d 100644 --- a/src/plugins/proxy/src/remote_tensor.cpp +++ b/src/plugins/proxy/src/remote_tensor.cpp @@ -7,6 +7,10 @@ #include #include "openvino/proxy/plugin.hpp" +#include "openvino/runtime/itensor.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include "openvino/runtime/so_ptr.hpp" +#include "remote_utils.hpp" namespace { std::shared_ptr cast_tensor(const ov::SoPtr& tensor) { @@ -59,12 +63,23 @@ const ov::Strides& ov::proxy::RemoteTensor::get_strides() const { return m_tensor->get_strides(); } -const ov::SoPtr& ov::proxy::RemoteTensor::get_hardware_tensor(const ov::SoPtr& tensor) { +ov::SoPtr ov::proxy::RemoteTensor::get_hardware_tensor(const ov::SoPtr& tensor, bool unwrap) { + ov::SoPtr hw_tensor = tensor; if (auto remote_tensor = std::dynamic_pointer_cast(tensor._ptr)) - return remote_tensor->m_tensor; - return tensor; + hw_tensor = remote_tensor->m_tensor; + + if (unwrap) { + if (auto wrapper = std::dynamic_pointer_cast(hw_tensor._ptr)) { + auto blob = ov::get_hardware_blob(wrapper->blob.get()); + if (auto tensor_holder = dynamic_cast(blob)) { + hw_tensor = tensor_holder->get_tensor(); + } + } + } + + return hw_tensor; } -const ov::SoPtr& ov::proxy::get_hardware_tensor(const ov::SoPtr& tensor) { - return ov::proxy::RemoteTensor::get_hardware_tensor(tensor); +ov::SoPtr ov::proxy::get_hardware_tensor(const ov::SoPtr& tensor, bool unwrap) { + return ov::proxy::RemoteTensor::get_hardware_tensor(tensor, unwrap); } diff --git a/src/plugins/proxy/src/remote_tensor.hpp b/src/plugins/proxy/src/remote_tensor.hpp index ca32974494ef7f..62e226245437f7 100644 --- a/src/plugins/proxy/src/remote_tensor.hpp +++ b/src/plugins/proxy/src/remote_tensor.hpp @@ -36,7 +36,7 @@ class RemoteTensor : public ov::IRemoteTensor { const ov::Strides& get_strides() const override; - static const ov::SoPtr& get_hardware_tensor(const ov::SoPtr& tensor); + static ov::SoPtr get_hardware_tensor(const ov::SoPtr& tensor, bool unwrap); private: mutable std::string m_name; From 7bb22b43b3b23fd039618641c33d9ee613e049e6 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Tue, 12 Sep 2023 12:02:01 +0400 Subject: [PATCH 20/31] `TopKLayerTest` to API2.0 (#19738) --- .../single_layer_tests/topk.cpp | 34 +++++------ .../skip_tests_config.cpp | 5 ++ .../shared/include/single_op_tests/topk.hpp | 15 +++++ .../shared_test_classes/single_op/topk.hpp | 33 +++++++++++ .../src/base/utils/generate_inputs.cpp | 29 ++++++++++ .../src/single_op/topk.cpp | 57 +++++++++++++++++++ 6 files changed, 156 insertions(+), 17 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/topk.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/topk.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/topk.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp index 436bfbf7940ab5..9c51f980add024 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/topk.cpp @@ -4,15 +4,15 @@ #include -#include "single_layer_tests/topk.hpp" +#include "single_op_tests/topk.hpp" -using namespace LayerTestsDefinitions; +using ov::test::TopKLayerTest; namespace { -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 +const std::vector model_types = { + ov::element::f32, + ov::element::f16 }; const std::vector axes = { @@ -30,28 +30,28 @@ const std::vector k = { 21 }; -const std::vector modes = { - ngraph::opset4::TopK::Mode::MIN, - ngraph::opset4::TopK::Mode::MAX +const std::vector modes = { + ov::op::v1::TopK::Mode::MIN, + ov::op::v1::TopK::Mode::MAX }; -const std::vector sortTypes = { - ngraph::opset4::TopK::SortType::SORT_INDICES, - ngraph::opset4::TopK::SortType::SORT_VALUES, +const std::vector sort_types = { + ov::op::v1::TopK::SortType::SORT_INDICES, + ov::op::v1::TopK::SortType::SORT_VALUES, }; +const std::vector> input_shape_static = { + {{21, 21, 21, 21}} +}; INSTANTIATE_TEST_SUITE_P(smoke_TopK, TopKLayerTest, ::testing::Combine( ::testing::ValuesIn(k), ::testing::ValuesIn(axes), ::testing::ValuesIn(modes), - ::testing::ValuesIn(sortTypes), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({21, 21, 21, 21})), + ::testing::ValuesIn(sort_types), + ::testing::ValuesIn(model_types), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shape_static)), ::testing::Values(ov::test::utils::DEVICE_CPU)), TopKLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 72d2608dc49799..76bbfcce246754 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -190,6 +190,11 @@ std::vector disabledTestPatterns() { R"(.*smoke_AutoBatching_CPU/AutoBatching_Test_DetectionOutput.*)", // Issue: 117837 R"(.*smoke_4D_out_of_range/GatherInPlaceLayerTestCPU.*_indices=\(\-15\).*)", + // Issue: 120222 + R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=1_axis=3_.*_modelType=f16_trgDev=CPU.*)", + R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=7_axis=3_.*_modelType=f16_trgDev=CPU.*)", + R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=18_.*_modelType=f16_trgDev=CPU.*)", + R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=21_.*_sort=value_modelType=f16_trgDev=CPU.*)", }; #if defined(OPENVINO_ARCH_X86) diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/topk.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/topk.hpp new file mode 100644 index 00000000000000..214286ec996802 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/topk.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/topk.hpp" + +namespace ov { +namespace test { +TEST_P(TopKLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/topk.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/topk.hpp new file mode 100644 index 00000000000000..953a2cffe8661d --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/topk.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { +typedef std::tuple< + int64_t, // keepK + int64_t, // axis + ov::op::v1::TopK::Mode, // mode + ov::op::v1::TopK::SortType, // sort + ov::element::Type, // Model type + std::vector, // Input shape + std::string // Target device name +> TopKParams; + +class TopKLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp index 609acf31bf2ab1..369b6dedcd7a68 100644 --- a/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp +++ b/src/tests/functional/shared_test_classes/src/base/utils/generate_inputs.cpp @@ -934,6 +934,35 @@ ov::runtime::Tensor generate(const } } +ov::runtime::Tensor generate(const + std::shared_ptr& node, + size_t port, + const ov::element::Type& elemType, + const ov::Shape& targetShape) { + auto tensor = ov::Tensor{elemType, targetShape}; + size_t size = tensor.get_size(); + int start = - static_cast(size / 2); + std::vector data(size); + std::iota(data.begin(), data.end(), start); + std::mt19937 gen(0); + std::shuffle(data.begin(), data.end(), gen); + + float divisor = size / 10.0; + + if (tensor.get_element_type() == ov::element::f32) { + auto *p = tensor.data(); + for (size_t i = 0; i < size; i++) + p[i] = static_cast(data[i] / divisor); + } else if (tensor.get_element_type() == ov::element::f16) { + auto *p = tensor.data(); + for (size_t i = 0; i < size; i++) + p[i] = static_cast(data[i] / divisor); + } else { + OPENVINO_THROW("Unsupported element type: ", tensor.get_element_type()); + } + return tensor; +} + template ov::runtime::Tensor generateInput(const std::shared_ptr& node, size_t port, diff --git a/src/tests/functional/shared_test_classes/src/single_op/topk.cpp b/src/tests/functional/shared_test_classes/src/single_op/topk.cpp new file mode 100644 index 00000000000000..480c8eaaa090a5 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/topk.cpp @@ -0,0 +1,57 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/topk.hpp" +#include +#include + +namespace ov { +namespace test { +std::string TopKLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + ov::element::Type model_type; + std::vector input_shapes; + std::string target_device; + int64_t keepK, axis; + ov::op::v1::TopK::Mode mode; + ov::op::v1::TopK::SortType sort; + std::tie(keepK, axis, mode, sort, model_type, input_shapes, target_device) = obj.param; + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < input_shapes.size(); i++) { + result << ov::test::utils::partialShape2str({input_shapes[i].first}) + << (i < input_shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < input_shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < input_shapes.size(); j++) { + result << ov::test::utils::vec2str(input_shapes[j].second[i]) << (j < input_shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "k=" << keepK << "_"; + result << "axis=" << axis << "_"; + result << "mode=" << mode << "_"; + result << "sort=" << sort << "_"; + result << "modelType=" << model_type.to_string() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void TopKLayerTest::SetUp() { + std::vector input_shapes; + ov::element::Type model_type; + int64_t keepK, axis; + ov::op::v1::TopK::Mode mode; + ov::op::v1::TopK::SortType sort; + std::tie(keepK, axis, mode, sort, model_type, input_shapes, targetDevice) = this->GetParam(); + init_input_shapes(input_shapes); + + auto param = std::make_shared(model_type, inputDynamicShapes.front()); + auto k = std::make_shared(ov::element::i64, ov::Shape{}, &keepK); + auto topk = std::make_shared(param, k, axis, mode, sort); + function = std::make_shared(topk->outputs(), ov::ParameterVector{param}, "TopK"); +} +} // namespace test +} // namespace ov From 58546b2ecbaef16f1673c98690f38fdf13f2dc95 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 12 Sep 2023 12:20:49 +0400 Subject: [PATCH 21/31] Use ov_mark_target_as_cc in CPU oneDNN (#19766) --- src/plugins/intel_cpu/thirdparty/onednn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index a1aa20ca8f1946..3110963434d866 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit a1aa20ca8f19465dc2fd18389953ed83798b2fd3 +Subproject commit 3110963434d8662e93b3cdafc9bf6b41235aa602 From 514f9864afc4e6389c1a3e414147f3e8084ae40e Mon Sep 17 00:00:00 2001 From: HenryLin-png <105683113+HenryLin-png@users.noreply.github.com> Date: Tue, 12 Sep 2023 04:22:24 -0400 Subject: [PATCH 22/31] CVS-98205 and CVS-114018 (#18592) * Changed ls calls to /bin/ls, unset python_version before parsing cmd line * Update setupvars.sh Unset all temporary variables --------- Co-authored-by: henry1.lin Co-authored-by: Ilya Lavrenov --- scripts/setupvars/setupvars.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh index 66a620b2d39883..591b25e7bfb7f5 100755 --- a/scripts/setupvars/setupvars.sh +++ b/scripts/setupvars/setupvars.sh @@ -35,7 +35,7 @@ if [ -e "$INSTALLDIR/runtime" ]; then export ngraph_DIR=$INSTALLDIR/runtime/cmake export OpenVINO_DIR=$INSTALLDIR/runtime/cmake - system_type=$(ls "$INSTALLDIR/runtime/lib/") + system_type=$(/bin/ls "$INSTALLDIR/runtime/lib/") OV_PLUGINS_PATH=$INSTALLDIR/runtime/lib/$system_type if [[ "$OSTYPE" == "darwin"* ]]; then @@ -56,7 +56,7 @@ if [ -e "$INSTALLDIR/runtime" ]; then fi fi - if ls "$tbb_lib_path"/libtbb* >/dev/null 2>&1; then + if /bin/ls "$tbb_lib_path"/libtbb* >/dev/null 2>&1; then if [[ "$OSTYPE" == "darwin"* ]]; then export DYLD_LIBRARY_PATH=$tbb_lib_path:${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH} fi @@ -64,6 +64,7 @@ if [ -e "$INSTALLDIR/runtime" ]; then else echo "[setupvars.sh] WARNING: Directory with TBB libraries is not detected. Please, add TBB libraries to LD_LIBRARY_PATH / DYLD_LIBRARY_PATH manually" fi + unset tbb_lib_path if [ -e "$INSTALLDIR/runtime/3rdparty/tbb/lib/cmake/TBB" ]; then export TBB_DIR=$INSTALLDIR/runtime/3rdparty/tbb/lib/cmake/TBB @@ -77,6 +78,8 @@ if [ -e "$INSTALLDIR/runtime" ]; then echo "[setupvars.sh] WARNING: TBB_DIR directory is not defined automatically by setupvars.sh. Please, set it manually to point to TBBConfig.cmake" fi fi + + unset system_type fi # OpenCV environment @@ -115,18 +118,22 @@ check_python_version () { echo "[setupvars.sh] WARNING: Unsupported Python version ${python_version}. Please install one of Python" \ "${PYTHON_VERSION_MAJOR}.${MIN_REQUIRED_PYTHON_VERSION_MINOR} -" \ "${PYTHON_VERSION_MAJOR}.${MAX_SUPPORTED_PYTHON_VERSION_MINOR} (64-bit) from https://www.python.org/downloads/" + unset python_version return 0 fi + if command -v python"$python_version" > /dev/null 2>&1; then python_interp=python"$python_version" else python_interp=python"$python_version_major" fi python_bitness=$("$python_interp" -c 'import sys; print(64 if sys.maxsize > 2**32 else 32)') + unset python_interp if [ "$python_bitness" != "" ] && [ "$python_bitness" != "64" ] && [ "$OS_NAME" != "Raspbian" ]; then echo "[setupvars.sh] WARNING: 64 bitness for Python $python_version is required" fi + unset python_bitness if [ -n "$python_version" ]; then if [[ -d $INTEL_OPENVINO_DIR/python ]]; then @@ -154,4 +161,11 @@ else check_python_version fi +unset python_version +unset python_version_to_check +unset PYTHON_VERSION_MAJOR +unset MIN_REQUIRED_PYTHON_VERSION_MINOR +unset MAX_SUPPORTED_PYTHON_VERSION_MINOR +unset OS_NAME + echo "[setupvars.sh] OpenVINO environment initialized" From a6bc78dd0f6f3d5580dcb5b64da9380213ff4528 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 12 Sep 2023 10:40:20 +0200 Subject: [PATCH 23/31] [PT FE] Separate tracing and scripting modes (#19676) * [PT FE] Separate scripting and tracing in decoder * Fix convert_model to accept decoder * Some fixes * Fix code style * Fix preprocessor tests * Fix tests * Fix tests * Fix more tests * Fix ovc tests --- .../openvino/frontend/pytorch/ts_decoder.py | 171 +++--------------- .../src/openvino/frontend/pytorch/utils.py | 133 +++++++++++++- .../test_preprocessor.py | 3 +- .../test_mo_convert_pytorch.py | 15 +- .../ovc_python_api_tests/test_pytorch.py | 8 +- .../pytorch_tests/pytorch_layer_test_class.py | 69 +++---- tests/layer_tests/pytorch_tests/test_add.py | 2 +- .../layer_tests/pytorch_tests/test_aliases.py | 13 +- tests/layer_tests/pytorch_tests/test_mul.py | 2 +- .../mo/moc_frontend/pytorch_frontend_utils.py | 5 +- .../moc_frontend/pytorch_frontend_utils.py | 5 +- 11 files changed, 214 insertions(+), 212 deletions(-) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index 5ac0f797efef8e..32e62084e89e41 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -7,29 +7,15 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType from openvino.runtime import op, PartialShape, Type as OVType, OVAny -from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map, torch_tensor_to_ov_const +from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map, prepare_example_inputs_and_model, convert_quantized_tensor from openvino.runtime import opset11 as ops import typing import torch -import numpy as np - -wrapper_template = """ -import torch -from typing import * - -class ModelWrapper(torch.nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward(self, {input_sign}): - return self.model({example_input}) -""" class TorchScriptPythonDecoder (Decoder): - def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=None, shared_memory=True): + def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=None, shared_memory=True, skip_freeze=False): Decoder.__init__(self) # We store every decoder created by this decoder so that all them are not deleted until the first decoder is deleted self.m_decoders = [] @@ -38,16 +24,18 @@ def __init__(self, pt_module, graph_element=None, example_input=None, alias_db=N self._input_is_list = False if graph_element is None: try: - pt_module = self._get_scripted_model(pt_module, example_input) + pt_module = self._get_scripted_model(pt_module, example_input, skip_freeze) except Exception as e: if example_input is not None: - msg = "tracing or scripting" - help_msg = "" + msg = "tracing" + help_msg = "Please check correctness of provided 'example_input'. " + "Sometimes models can be converted in scripted mode, please try running " + "conversion without 'example_input'." else: msg = "scripting" - help_msg = "\nTracing sometimes provide better results, please provide valid 'example_input' argument. " + help_msg = "\nTracing sometimes provide better results, please provide valid 'example_input' argument." raise RuntimeError( - f"Couldn't get TorchScript module by {msg}. With exception:\n{e}\n {help_msg}" + f"Couldn't get TorchScript module by {msg}. With exception:\n{e}\n{help_msg} " "You can also provide TorchScript module that you obtained" " yourself, please refer to PyTorch documentation: " "https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html.") @@ -82,74 +70,10 @@ def _get_preserved_attributes(model) -> list: preserved_attributes.append(name) return preserved_attributes - def _get_scripted_model(self, pt_module, example_inputs=None): + def _get_scripted_model(self, pt_module, example_inputs=None, skip_freeze=False): import torch import inspect - def process_dict_inputs(inputs, input_params, model): - ordered_inputs = [] - for input_name in input_params: - if input_name in inputs: - ordered_inputs.append(input_name) - - input_signature = list(input_params) - if ordered_inputs == input_signature[:len(ordered_inputs)]: - example_inputs = [inputs[input_name] for input_name in ordered_inputs] - if all([isinstance(inp, torch.Tensor) for inp in example_inputs]): - return {"example_inputs": [inputs[name] for name in ordered_inputs]}, ordered_inputs, model - return {"example_inputs": example_inputs}, ordered_inputs, model - - # PyTorch has some difficulties to trace models with named unordered parameters: - # torch < 2.0.0 supports only positional arguments for tracing - # pytorch == 2.0.0 supports input kwargs tracing, - # but does not support complex nested objects (e. g. tuple of tuples of tensors) - # We will use wrapper for making them positional as workaround. - - input_sign_str = [] - input_params_str = [] - - for input_name in ordered_inputs: - if str(input_params[input_name].annotation).startswith("typing.Union"): - filter_custom_args = [] - for arg in input_params[input_name].annotation.__args__: - str_arg = str(arg) - is_typing = str_arg.startswith("typing.") - is_torch = "torch." in str_arg - is_builten = str_arg in (str(int), str(float), str(type(None))) - if not (is_typing or is_torch or is_builten): - continue - filter_custom_args.append(arg) - input_params[input_name].annotation.__args__ = tuple(filter_custom_args) - input_sign_str.append(str(input_params[input_name]).replace("NoneType", "None")) - input_params_str.append(f"{input_name}={input_name}") - - wrapper_class = wrapper_template.format(input_sign=', '.join(input_sign_str), example_input=', '.join(input_params_str)) - result = {} - try: - exec(wrapper_class, result) - - wrapped_model = result["ModelWrapper"](model) - wrapped_model.eval() - # if wrapping failed, it is better to return original model for avoid user confusion regarding error message - except Exception: - wrapped_model = model - - return {"example_inputs": [inputs[name] for name in ordered_inputs]}, ordered_inputs, wrapped_model - - def prepare_example_inputs_and_model(inputs, input_params, model): - input_signature = list(input_params) - if isinstance(inputs, dict): - return process_dict_inputs(inputs, input_params, model) - if isinstance(inputs, list) and len(inputs) == 1 and isinstance(inputs[0], torch.Tensor): - if "typing.List" in str(input_params[input_signature[0]].annotation): - inputs = inputs[0].unsqueeze(0) - self._input_is_list = True - - if isinstance(inputs, torch.Tensor): - inputs = [inputs] - input_signature = input_signature[:len(inputs)] - return {"example_inputs": inputs}, input_signature, model - if isinstance(pt_module, torch.nn.Module): pt_module.eval() input_signature = None @@ -160,32 +84,23 @@ def prepare_example_inputs_and_model(inputs, input_params, model): if example_inputs is None: scripted = torch.jit.script(pt_module) else: - input_parameters, input_signature, pt_module = prepare_example_inputs_and_model(example_inputs, input_params, pt_module) - try: - scripted = torch.jit.trace(pt_module, **input_parameters) - except Exception: - try: - scripted = torch.jit.script(pt_module) - except Exception as se: - try: - scripted = torch.jit.trace(pt_module, **input_parameters, strict=False) - except Exception as te: - raise Exception(f"Tracing failed with exception {te}\nScripting failed with exception: {se}") - skip_freeze = False - for n in scripted.inlined_graph.nodes(): - # TODO: switch off freezing for all traced models - if "quantize" in n.kind(): - # do not freeze quantized models - skip_freeze = True - break - elif "aten::to" in n.kind(): - first_input = next(n.inputs()) - if first_input.node().kind() == "prim::Constant": - ivalue = first_input.toIValue() - if isinstance(ivalue, torch.Tensor) and ivalue.dtype in [torch.bfloat16, torch.float16]: - # do not freeze models with compressed constants - skip_freeze = True - break + input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model(example_inputs, input_params, pt_module) + scripted = torch.jit.trace(pt_module, **input_parameters, strict=False) + if not skip_freeze: + for n in scripted.inlined_graph.nodes(): + # TODO: switch off freezing for all traced models + if "quantize" in n.kind(): + # do not freeze quantized models + skip_freeze = True + break + elif "aten::to" in n.kind(): + first_input = next(n.inputs()) + if first_input.node().kind() == "prim::Constant": + ivalue = first_input.toIValue() + if isinstance(ivalue, torch.Tensor) and ivalue.dtype in [torch.bfloat16, torch.float16]: + # do not freeze models with compressed constants + skip_freeze = True + break if not skip_freeze: preserved_attrs = self._get_preserved_attributes(scripted) f_model = torch.jit.freeze(scripted, preserved_attrs=preserved_attrs) @@ -331,36 +246,6 @@ def mark_node(self, node): node.set_friendly_name(name) return node - @staticmethod - def convert_quantized_tensor(qtensor: torch.Tensor, shared_memory: bool): - # need to represent as Constant(u8) -> Convert(f32) -> Subtract(zero_point) -> Multiply (scale) - qscheme = qtensor.qscheme() # torch.per_channel_affine (per_tensor) - if qscheme == torch.per_channel_affine: - int8_tensor = qtensor.int_repr() - scale = qtensor.q_per_channel_scales().numpy().astype(np.float32) # (weight.q_scale() for per_tensor) - zero_point = qtensor.q_per_channel_zero_points().numpy().astype(np.float32) # (weight.q_zero_point() for per_tensor) - axis = np.int32(qtensor.q_per_channel_axis()) - - new_shape = np.ones(len(int8_tensor.shape), dtype=np.int32) - new_shape[axis] = -1 - zero_point_bc = np.reshape(zero_point, new_shape) - scale_bc = np.reshape(scale, new_shape) - - int8_const = torch_tensor_to_ov_const(int8_tensor, shared_memory=shared_memory) - convert = ops.convert(int8_const, np.float32) - sub = ops.subtract(convert, zero_point_bc) - return ops.multiply(sub, scale_bc).outputs() - elif qscheme == torch.per_tensor_affine: - int8_tensor = qtensor.int_repr() - scale = np.float32(qtensor.q_scale()) - zero_point = np.float32(qtensor.q_zero_point()) - - int8_const = torch_tensor_to_ov_const(int8_tensor, shared_memory=shared_memory) - convert = ops.convert(int8_const, np.float32) - sub = ops.subtract(convert, zero_point) - return ops.multiply(sub, scale).outputs() - assert False, "Unsupported qscheme" - def try_decode_get_attr(self): pt_value = get_value_from_getattr(self.graph_element, self.pt_module) assert pt_value is not None, "Couldn't retrieve value from prim::GetAttr" @@ -368,7 +253,7 @@ def try_decode_get_attr(self): # We assume this is __torch__.torch.classes.quantized.Conv2dPackedParamsBase or __torch__.torch.classes.quantized.LinearPackedParamsBase # TODO: but can be anything. Figure a better way to distinguish weight, bias = pt_value.unpack() - res = self.convert_quantized_tensor(weight, self._shared_memory) + res = convert_quantized_tensor(weight, self._shared_memory) if isinstance(bias, torch.Tensor): res += ivalue_to_constant(bias) else: diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py index 3c658119bb1c2a..97d237fb0efda1 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py @@ -10,9 +10,10 @@ import ctypes from openvino.runtime import op, Type as OVType, Shape, Tensor +from openvino.runtime import opset11 as ops -def maybe_convert_max_int(value : int): +def maybe_convert_max_int(value: int): # FIXME: This is a convertion from 64-bit positive max integer value # to 32-bit positive max integer value. Find a better way to handle this. if value == torch.iinfo(torch.int64).max: @@ -20,10 +21,12 @@ def maybe_convert_max_int(value : int): else: return value + def make_constant(*args, **kwargs): return op.Constant(*args, **kwargs) -def fetch_attr(self_module, target : str): + +def fetch_attr(self_module, target: str): """ Fetch an attribute from the ``Module`` hierarchy of ``self.module``. @@ -37,7 +40,8 @@ def fetch_attr(self_module, target : str): attr_itr = self_module for i, atom in enumerate(target_atoms): if not hasattr(attr_itr, atom): - raise RuntimeError(f"Node referenced nonexistent target {'.'.join(target_atoms[:i])}") + raise RuntimeError( + f"Node referenced nonexistent target {'.'.join(target_atoms[:i])}") attr_itr = getattr(attr_itr, atom) return attr_itr @@ -84,6 +88,7 @@ def ivalue_to_constant(ivalue, shared_memory=True): return torch_tensor_to_ov_const(ivalue, shared_memory=shared_memory).outputs() return None + def get_value_from_getattr(getattr_node, self_module): assert getattr_node.kind() == "prim::GetAttr", "Got node of kind not equal to prim::GetAttr" # GetAttr nodes can be nested @@ -98,10 +103,12 @@ def get_value_from_getattr(getattr_node, self_module): while len(stack) > 0: node = stack.pop() attr_name = node.s("name") - assert hasattr(module, attr_name), f"No attribute with name \"{attr_name}\" found in module." + assert hasattr( + module, attr_name), f"No attribute with name \"{attr_name}\" found in module." module = getattr(module, attr_name) return module + pt_to_ov_type_map = { "float": OVType.f32, "int": OVType.i32, @@ -131,3 +138,121 @@ def get_value_from_getattr(getattr_node, self_module): OVType.i32: ctypes.c_int, OVType.i64: ctypes.c_int64, } + + +wrapper_template = """ +import torch +from typing import * + +class ModelWrapper(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, {input_sign}): + return self.model({example_input}) +""" + + +def process_dict_inputs(inputs, input_params, model): + ordered_inputs = [] + for input_name in input_params: + if input_name in inputs: + ordered_inputs.append(input_name) + + input_signature = list(input_params) + if ordered_inputs == input_signature[:len(ordered_inputs)]: + example_inputs = [inputs[input_name] for input_name in ordered_inputs] + if all([isinstance(inp, torch.Tensor) for inp in example_inputs]): + return {"example_inputs": [inputs[name] for name in ordered_inputs]}, ordered_inputs, model + return {"example_inputs": example_inputs}, ordered_inputs, model + + # PyTorch has some difficulties to trace models with named unordered parameters: + # torch < 2.0.0 supports only positional arguments for tracing + # pytorch == 2.0.0 supports input kwargs tracing, + # but does not support complex nested objects (e. g. tuple of tuples of tensors) + # We will use wrapper for making them positional as workaround. + + input_sign_str = [] + input_params_str = [] + + for input_name in ordered_inputs: + if str(input_params[input_name].annotation).startswith("typing.Union"): + filter_custom_args = [] + for arg in input_params[input_name].annotation.__args__: + str_arg = str(arg) + is_typing = str_arg.startswith("typing.") + is_torch = "torch." in str_arg + is_builten = str_arg in (str(int), str(float), str(type(None))) + if not (is_typing or is_torch or is_builten): + continue + filter_custom_args.append(arg) + input_params[input_name].annotation.__args__ = tuple( + filter_custom_args) + input_sign_str.append( + str(input_params[input_name]).replace("NoneType", "None")) + input_params_str.append(f"{input_name}={input_name}") + + wrapper_class = wrapper_template.format(input_sign=', '.join( + input_sign_str), example_input=', '.join(input_params_str)) + result = {} + try: + exec(wrapper_class, result) + + wrapped_model = result["ModelWrapper"](model) + wrapped_model.eval() + # if wrapping failed, it is better to return original model for avoid user confusion regarding error message + except Exception: + wrapped_model = model + + return {"example_inputs": [inputs[name] for name in ordered_inputs]}, ordered_inputs, wrapped_model + + +def prepare_example_inputs_and_model(inputs, input_params, model): + input_is_list = False + input_signature = list(input_params) + if isinstance(inputs, dict): + examples, ordered, wrapped = process_dict_inputs(inputs, input_params, model) + return examples, ordered, wrapped, input_is_list + if isinstance(inputs, list) and len(inputs) == 1 and isinstance(inputs[0], torch.Tensor): + if "typing.List" in str(input_params[input_signature[0]].annotation): + inputs = inputs[0].unsqueeze(0) + input_is_list = True + + if isinstance(inputs, torch.Tensor): + inputs = [inputs] + input_signature = input_signature[:len(inputs)] + return {"example_inputs": inputs}, input_signature, model, input_is_list + + +def convert_quantized_tensor(qtensor: torch.Tensor, shared_memory: bool): + # represents torch quantized tensor as + # Constant(u8) -> Convert(f32) -> Subtract(zero_point) -> Multiply(scale) + qscheme = qtensor.qscheme() + if qscheme == torch.per_channel_affine: + int8_tensor = qtensor.int_repr() + scale = qtensor.q_per_channel_scales().numpy().astype(np.float32) + zero_point = qtensor.q_per_channel_zero_points().numpy().astype(np.float32) + axis = np.int32(qtensor.q_per_channel_axis()) + + new_shape = np.ones(len(int8_tensor.shape), dtype=np.int32) + new_shape[axis] = -1 + zero_point_bc = np.reshape(zero_point, new_shape) + scale_bc = np.reshape(scale, new_shape) + + int8_const = torch_tensor_to_ov_const( + int8_tensor, shared_memory=shared_memory) + convert = ops.convert(int8_const, np.float32) + sub = ops.subtract(convert, zero_point_bc) + return ops.multiply(sub, scale_bc).outputs() + elif qscheme == torch.per_tensor_affine: + int8_tensor = qtensor.int_repr() + scale = np.float32(qtensor.q_scale()) + zero_point = np.float32(qtensor.q_zero_point()) + + int8_const = torch_tensor_to_ov_const( + int8_tensor, shared_memory=shared_memory) + convert = ops.convert(int8_const, np.float32) + sub = ops.subtract(convert, zero_point) + return ops.multiply(sub, scale).outputs() + assert False, "Unsupported qscheme" diff --git a/src/bindings/python/tests/test_torchvision_to_ov/test_preprocessor.py b/src/bindings/python/tests/test_torchvision_to_ov/test_preprocessor.py index 59f2b458ce7c08..a1cdc41f610e62 100644 --- a/src/bindings/python/tests/test_torchvision_to_ov/test_preprocessor.py +++ b/src/bindings/python/tests/test_torchvision_to_ov/test_preprocessor.py @@ -32,8 +32,7 @@ def forward(self, data): def _infer_pipelines(test_input, preprocess_pipeline, input_channels=3): torch_model = Convnet(input_channels) - example_input = Tensor(np.expand_dims(test_input, axis=0).astype(np.float32)) - ov_model = convert_model(torch_model, example_input=example_input) + ov_model = convert_model(torch_model) core = Core() ov_model = PreprocessConverter.from_torchvision( diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py index 9a863a12d70b27..6eab63bf682bd0 100644 --- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py +++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py @@ -841,7 +841,7 @@ def forward(self, x: torch.Tensor, z: Tuple[torch.Tensor, torch.Tensor]): add = ov.opset10.add(concat1, param0) ref_model = Model([concat2, add], [param0, param1, param2], "test") return net, ref_model, { - "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 10)), torch.ones((1, 5, 5)))}, + "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 9)), torch.ones((1, 5, 5)))}, "compress_to_fp16": False} @@ -867,7 +867,7 @@ def forward(self, z: Tuple[torch.Tensor, torch.Tensor], x: torch.Tensor): add = ov.opset10.add(concat1, param3) ref_model = Model([concat2, add], [param1, param2, param3], "test") return net, ref_model, { - "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 10)), torch.ones((1, 5, 3)))}, + "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 9)), torch.ones((1, 5, 3)))}, "compress_to_fp16": False} @@ -895,7 +895,7 @@ def forward(self, x: torch.Tensor, z: Tuple[torch.Tensor, torch.Tensor], y: torc mul = ov.opset10.multiply(concat2, param4) ref_model = Model([mul, add], [param3, param1, param2, param4], "test") return net, ref_model, { - "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 10)), torch.ones((1, 5, 10))), + "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 9)), torch.ones((1, 5, 10))), "y": torch.ones((1,))}, "compress_to_fp16": False} @@ -924,7 +924,7 @@ def forward(self, x: torch.Tensor, z: Tuple[torch.Tensor, torch.Tensor], y: torc mul = ov.opset10.multiply(concat2, param4) ref_model = Model([mul, add], [param0, param1, param2, param4], "test") return net, ref_model, { - "example_input": [torch.ones((1, 10)), (torch.zeros((1, 10)), torch.ones((1, 5, 10))), torch.ones((1,))], + "example_input": [torch.ones((1, 10)), (torch.zeros((1, 9)), torch.ones((1, 5, 10))), torch.ones((1,))], "compress_to_fp16": False} @@ -1268,9 +1268,4 @@ def test_precision_sensitive(self, create_model, ie_device, precision, ir_versio fw_res = fw_model(*torch_inp_tensors) ov_res = core.compile_model(ir_test)(example_inputs) - if precision == 'FP32': - custom_eps = 1e-4 - else: - custom_eps = 1e-3 - - npt.assert_allclose(ov_res[0], fw_res.numpy(), atol=custom_eps) + npt.assert_allclose(ov_res[0], fw_res.numpy(), atol=1e-3, rtol=1e-3) diff --git a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py index 302144d59c3e40..268f69d13f025c 100644 --- a/tests/layer_tests/ovc_python_api_tests/test_pytorch.py +++ b/tests/layer_tests/ovc_python_api_tests/test_pytorch.py @@ -843,7 +843,7 @@ def forward(self, x: torch.Tensor, z: Tuple[torch.Tensor, torch.Tensor]): add = ov.opset10.add(concat1, param0) ref_model = Model([concat2, add], [param0, param1, param2], "test") return net, ref_model, { - "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 10)), torch.ones((1, 5, 5)))}, + "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 9)), torch.ones((1, 5, 5)))}, "compress_to_fp16": False} @@ -869,7 +869,7 @@ def forward(self, z: Tuple[torch.Tensor, torch.Tensor], x: torch.Tensor): add = ov.opset10.add(concat1, param3) ref_model = Model([concat2, add], [param1, param2, param3], "test") return net, ref_model, { - "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 10)), torch.ones((1, 5, 3)))}, + "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 9)), torch.ones((1, 5, 3)))}, "compress_to_fp16": False} @@ -897,7 +897,7 @@ def forward(self, x: torch.Tensor, z: Tuple[torch.Tensor, torch.Tensor], y: torc mul = ov.opset10.multiply(concat2, param4) ref_model = Model([mul, add], [param3, param1, param2, param4], "test") return net, ref_model, { - "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 10)), torch.ones((1, 5, 10))), + "example_input": {"x": torch.ones((1, 10)), "z": (torch.zeros((1, 9)), torch.ones((1, 5, 10))), "y": torch.ones((1,))}, "compress_to_fp16": False} @@ -926,7 +926,7 @@ def forward(self, x: torch.Tensor, z: Tuple[torch.Tensor, torch.Tensor], y: torc mul = ov.opset10.multiply(concat2, param4) ref_model = Model([mul, add], [param0, param1, param2, param4], "test") return net, ref_model, { - "example_input": [torch.ones((1, 10)), (torch.zeros((1, 10)), torch.ones((1, 5, 10))), torch.ones((1,))], + "example_input": [torch.ones((1, 10)), (torch.zeros((1, 9)), torch.ones((1, 5, 10))), torch.ones((1,))], "compress_to_fp16": False} diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index 283a90942b4212..0f5638ea8c8d38 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -77,18 +77,16 @@ def use_torch_compile_backend(): self.torch_compile_backend_test(model, torch_inputs, custom_eps) else: with torch.no_grad(): - model.eval() trace_model = kwargs.get('trace_model', False) freeze_model = kwargs.get('freeze_model', True) - model, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) - graph = model.inlined_graph - - if kind is not None and not isinstance(kind, (tuple, list)): - kind = [kind] - if kind is not None: - for op in kind: - assert self._check_kind_exist( - graph, op), f"Operation {op} type doesn't exist in provided graph" + smodel, converted_model = self.convert_directly_via_frontend(model, torch_inputs, trace_model, dynamic_shapes, ov_inputs, freeze_model) + + if kind is not None and not isinstance(kind, (tuple, list)): + kind = [kind] + if kind is not None: + for op in kind: + assert self._check_kind_exist( + smodel.inlined_graph, op), f"Operation {op} type doesn't exist in provided graph" # OV infer: core = Core() compiled = core.compile_model(converted_model, ie_device) @@ -99,7 +97,7 @@ def use_torch_compile_backend(): return # Framework infer: - fw_res = model(*deepcopy(torch_inputs)) + fw_res = smodel(*deepcopy(torch_inputs)) if not isinstance(fw_res, (tuple)): fw_res = (fw_res,) @@ -162,47 +160,36 @@ def use_torch_compile_backend(): def _prepare_input(self): raise RuntimeError("Please provide inputs generation function") - def convert_via_mo(self, model, example_input, trace_model, dynamic_shapes, ov_inputs): - import torch + def convert_via_mo(self, model, example_input, trace_model, dynamic_shapes, ov_inputs, freeze_model): from openvino.tools.ovc import convert_model - kwargs = {"example_input": example_input if len( - example_input) > 1 else example_input[0], "compress_to_fp16": False} - with torch.no_grad(): - if trace_model: - model = torch.jit.trace(model, example_input) - else: - model = torch.jit.script(model) - model = torch.jit.freeze(model) - print(model) - if not dynamic_shapes: - input_shapes = [inp.shape for inp in ov_inputs] - kwargs["input_shape"] = input_shapes - om = convert_model(model, **kwargs) + kwargs = {"example_input": example_input if len(example_input) > 1 else example_input[0]} + if trace_model: + decoder = TorchScriptPythonDecoder(model, example_input=example_input, skip_freeze=not freeze_model) + else: + decoder = TorchScriptPythonDecoder(model, skip_freeze=not freeze_model) + smodel = decoder.pt_module + print(smodel.inlined_graph) + if not dynamic_shapes: + input_shapes = [inp.shape for inp in ov_inputs] + kwargs["input"] = input_shapes + om = convert_model(decoder, **kwargs) self._resolve_input_shape_dtype(om, ov_inputs, dynamic_shapes) - return model, om + return smodel, om def convert_directly_via_frontend(self, model, example_input, trace_model, dynamic_shapes, ov_inputs, freeze_model): - import torch - fe_manager = FrontEndManager() fe = fe_manager.load_by_framework('pytorch') - model.eval() - with torch.no_grad(): - if trace_model: - model = torch.jit.trace(model, example_input) - else: - model = torch.jit.script(model) - if freeze_model: - _model = torch.jit.freeze(model) + if trace_model: + decoder = TorchScriptPythonDecoder(model, example_input=example_input, skip_freeze=not freeze_model) else: - _model = model - print(_model.inlined_graph) - decoder = TorchScriptPythonDecoder(_model) + decoder = TorchScriptPythonDecoder(model, skip_freeze=not freeze_model) + smodel = decoder.pt_module + print(smodel.inlined_graph) im = fe.load(decoder) om = fe.convert(im) self._resolve_input_shape_dtype(om, ov_inputs, dynamic_shapes) - return model, om + return smodel, om def _resolve_input_shape_dtype(self, om, ov_inputs, dynamic_shapes): params = list(om.inputs) diff --git a/tests/layer_tests/pytorch_tests/test_add.py b/tests/layer_tests/pytorch_tests/test_add.py index c13cfbcd363ed9..8c3026a9c2c16d 100644 --- a/tests/layer_tests/pytorch_tests/test_add.py +++ b/tests/layer_tests/pytorch_tests/test_add.py @@ -111,7 +111,7 @@ def test_add_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, self.rhs_type = rhs_type self.rhs_shape = rhs_shape self._test(*self.create_model(lhs_type, lhs_shape, rhs_type, rhs_shape), - ie_device, precision, ir_version) + ie_device, precision, ir_version, freeze_model=False, trace_model=True) class TestAddLists(PytorchLayerTest): diff --git a/tests/layer_tests/pytorch_tests/test_aliases.py b/tests/layer_tests/pytorch_tests/test_aliases.py index 1919aab5fbc7bb..78f323b4a2d670 100644 --- a/tests/layer_tests/pytorch_tests/test_aliases.py +++ b/tests/layer_tests/pytorch_tests/test_aliases.py @@ -28,11 +28,16 @@ def _prepare_input(self): @pytest.mark.nightly @pytest.mark.precommit def test_alias(self, ie_device, precision, ir_version): - self._test(aten_alias(), None, [ - "aten::slice", "aten::select", "aten::copy_"], ie_device, precision, ir_version) + self._test(aten_alias(), None, ["aten::slice", + "aten::select", + "aten::copy_"], + ie_device, precision, ir_version) @pytest.mark.nightly @pytest.mark.precommit def test_loop_alias(self, ie_device, precision, ir_version): - self._test(aten_loop_alias(), None, [ - "aten::slice", "aten::select", "aten::copy_", "prim::Loop"], ie_device, precision, ir_version) + self._test(aten_loop_alias(), None, ["aten::slice", + "aten::select", + "aten::copy_", + "prim::Loop"], + ie_device, precision, ir_version, freeze_model=False) diff --git a/tests/layer_tests/pytorch_tests/test_mul.py b/tests/layer_tests/pytorch_tests/test_mul.py index 02a17e8c38d7d1..8e958f095697f0 100644 --- a/tests/layer_tests/pytorch_tests/test_mul.py +++ b/tests/layer_tests/pytorch_tests/test_mul.py @@ -100,4 +100,4 @@ def test_mul_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, self.rhs_type = rhs_type self.rhs_shape = rhs_shape self._test(*self.create_model(lhs_type, lhs_shape, rhs_type, rhs_shape), - ie_device, precision, ir_version) + ie_device, precision, ir_version, freeze_model=False, trace_model=True) diff --git a/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py b/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py index 5b11f8c6998b66..214fbbc4ff77cc 100644 --- a/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py +++ b/tools/mo/openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py @@ -32,7 +32,10 @@ def get_pytorch_decoder(model, input_shape, example_inputs, args): raise RuntimeError( "NNCF models produced by nncf<2.6 are not supported directly. Please upgrade nncf or export to ONNX first.") inputs = prepare_torch_inputs(example_inputs) - decoder = TorchScriptPythonDecoder(model, example_input=inputs, shared_memory=args.get("share_weights", True)) + if not isinstance(model, TorchScriptPythonDecoder): + decoder = TorchScriptPythonDecoder(model, example_input=inputs, shared_memory=args.get("share_weights", True)) + else: + decoder = model args['input_model'] = decoder args["framework"] = "pytorch" args["example_input"] = inputs diff --git a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py index 2703af43d8b192..8baf75354f92ee 100644 --- a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +++ b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py @@ -32,7 +32,10 @@ def get_pytorch_decoder(model, example_inputs, args): raise RuntimeError( "NNCF models produced by nncf<2.6 are not supported directly. Please upgrade nncf or export to ONNX first.") inputs = prepare_torch_inputs(example_inputs) - decoder = TorchScriptPythonDecoder(model, example_input=inputs, shared_memory=args.get("share_weights", True)) + if not isinstance(model, TorchScriptPythonDecoder): + decoder = TorchScriptPythonDecoder(model, example_input=inputs, shared_memory=args.get("share_weights", True)) + else: + decoder = model args['input_model'] = decoder args["example_input"] = inputs From 3c1b3846944fd32f5b4c8601cb243159d7dda121 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Tue, 12 Sep 2023 10:45:10 +0200 Subject: [PATCH 24/31] [PyOV] Expose missed properties (#19678) --- src/bindings/python/src/openvino/runtime/properties/__init__.py | 2 ++ .../python/src/pyopenvino/core/properties/properties.cpp | 2 ++ src/bindings/python/tests/test_runtime/test_properties.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/bindings/python/src/openvino/runtime/properties/__init__.py b/src/bindings/python/src/openvino/runtime/properties/__init__.py index 4f6fb5fa8eb5c5..fc3ac92f8c5e4a 100644 --- a/src/bindings/python/src/openvino/runtime/properties/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/__init__.py @@ -23,6 +23,8 @@ from openvino._pyopenvino.properties import optimal_batch_size from openvino._pyopenvino.properties import max_batch_size from openvino._pyopenvino.properties import range_for_async_infer_requests +from openvino._pyopenvino.properties import execution_devices +from openvino._pyopenvino.properties import loaded_from_cache # Submodules from openvino.runtime.properties import hint diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 0953f75c06e629..2e70b63b5851dd 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -40,6 +40,8 @@ void regmodule_properties(py::module m) { wrap_property_RO(m_properties, ov::optimal_batch_size, "optimal_batch_size"); wrap_property_RO(m_properties, ov::max_batch_size, "max_batch_size"); wrap_property_RO(m_properties, ov::range_for_async_infer_requests, "range_for_async_infer_requests"); + wrap_property_RO(m_properties, ov::execution_devices, "execution_devices"); + wrap_property_RO(m_properties, ov::loaded_from_cache, "loaded_from_cache"); // Submodule hint py::module m_hint = diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index 1623e64e421752..ae1b4c908e7ee5 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -153,6 +153,8 @@ def test_conflicting_enum(proxy_enums, expected_values): (properties.optimal_batch_size, "OPTIMAL_BATCH_SIZE"), (properties.max_batch_size, "MAX_BATCH_SIZE"), (properties.range_for_async_infer_requests, "RANGE_FOR_ASYNC_INFER_REQUESTS"), + (properties.execution_devices, "EXECUTION_DEVICES"), + (properties.loaded_from_cache, "LOADED_FROM_CACHE"), (properties.device.full_name, "FULL_DEVICE_NAME"), (properties.device.architecture, "DEVICE_ARCHITECTURE"), (properties.device.type, "DEVICE_TYPE"), From faa6b772472cabb08aff1b2abbce9c57ac7ac7e2 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Tue, 12 Sep 2023 11:20:18 +0200 Subject: [PATCH 25/31] [Snippets] LIR serialization: additional connections between LoopBegin and LoopEnd nodes (#19630) * [Snippets] LIR serialization improvements * Minor correction * Review comments --- .../snippets/op/serialization_node.hpp | 2 +- src/common/snippets/src/lowered/linear_ir.cpp | 22 +++++++++++++-- ...ialize_node.cpp => serialization_node.cpp} | 28 +++++++++---------- 3 files changed, 34 insertions(+), 18 deletions(-) rename src/common/snippets/src/op/{serialize_node.cpp => serialization_node.cpp} (65%) diff --git a/src/common/snippets/include/snippets/op/serialization_node.hpp b/src/common/snippets/include/snippets/op/serialization_node.hpp index fc447f15d6a8ea..2804f4a2817c36 100644 --- a/src/common/snippets/include/snippets/op/serialization_node.hpp +++ b/src/common/snippets/include/snippets/op/serialization_node.hpp @@ -22,7 +22,7 @@ class SerializationNode : public ov::op::Op { OPENVINO_OP("SerializationNode", "SnippetsOpset"); SerializationNode() = default; - SerializationNode(const Output &arg, const std::shared_ptr& expr); + SerializationNode(const ov::OutputVector& args, const std::shared_ptr& expr); void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector &new_args) const override; diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index 4f039d837fbb6d..cc5e5c2fce621e 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -70,11 +70,27 @@ void LinearIR::serialize(const std::string& xml, const std::string& bin) const { auto first_node = std::make_shared(element::f32, Shape{}); first_node->set_friendly_name("Start"); first_node->get_rt_info()["execTimeMcs"] = 0; - std::shared_ptr body_node = first_node; + std::shared_ptr serialization_node = first_node; + + // This map allows to get LoopBegin serialization node by original LoopBegin node + // It is used to draw an edge between LoopBegin and LoopEnd serialization nodes + std::map, std::shared_ptr> loops_map; for (const auto& expr : m_expressions) { - body_node = std::make_shared(body_node, expr); + const auto node = expr->get_node(); + if (auto loop_end = ov::as_type_ptr(node)) { + OPENVINO_ASSERT(loops_map.count(loop_end->get_loop_begin()), + "Serialization can't find LoopBegin that corresponds to LoopEnd with friendly name ", + loop_end->get_friendly_name()); + auto loop_begin_serialization_node = loops_map.at(loop_end->get_loop_begin()); + serialization_node = std::make_shared(ov::OutputVector{serialization_node, loop_begin_serialization_node}, expr); + } else { + serialization_node = std::make_shared(ov::OutputVector{serialization_node}, expr); + if (auto loop_begin = ov::as_type_ptr(node)) { + loops_map[loop_begin] = serialization_node; + } + } } - auto last_node = std::make_shared(body_node); + auto last_node = std::make_shared(serialization_node); last_node->set_friendly_name("End"); const auto tmp_model = std::make_shared(ResultVector {last_node}, ParameterVector {first_node}, diff --git a/src/common/snippets/src/op/serialize_node.cpp b/src/common/snippets/src/op/serialization_node.cpp similarity index 65% rename from src/common/snippets/src/op/serialize_node.cpp rename to src/common/snippets/src/op/serialization_node.cpp index 3672b2203a77a1..6c521cc856f214 100644 --- a/src/common/snippets/src/op/serialize_node.cpp +++ b/src/common/snippets/src/op/serialization_node.cpp @@ -9,8 +9,8 @@ namespace ov { namespace snippets { namespace op { -SerializationNode::SerializationNode(const Output &arg, const std::shared_ptr &expr) - : Op({arg}), m_expr(expr) { +SerializationNode::SerializationNode(const ov::OutputVector& args, const std::shared_ptr& expr) + : Op(args), m_expr(expr) { if (!m_expr || !m_expr->get_node()) OPENVINO_THROW("SerializationNode requires a valid expression with non-null node pointer"); const auto &node = expr->get_node(); @@ -28,22 +28,22 @@ void SerializationNode::validate_and_infer_types() { std::shared_ptr SerializationNode::clone_with_new_inputs(const OutputVector &new_args) const { check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), m_expr); + return std::make_shared(new_args, m_expr); } bool SerializationNode::visit_attributes(AttributeVisitor &visitor) { - std::vector> shapes; - const auto &node = m_expr->get_node(); - for (size_t i = 0; i < node->get_input_size(); i++) { - const auto &pshape = node->get_input_partial_shape(i); - if (pshape.begin() != pshape.end()) - shapes.emplace_back("in_shape_" + std::to_string(i), node->get_input_partial_shape(i)); + std::vector>> shapes; + for (size_t i = 0; i < m_expr->get_input_count(); i++) { + const auto &shape = m_expr->get_input_port_descriptor(i)->get_shape(); + if (!shape.empty()) + shapes.emplace_back("in_shape_" + std::to_string(i), shape); } - for (size_t i = 0; i < node->get_output_size(); i++) { - const auto &pshape = node->get_output_partial_shape(i); - if (pshape.begin() != pshape.end()) - shapes.emplace_back("out_shape_" + std::to_string(i), pshape); + for (size_t i = 0; i < m_expr->get_output_count(); i++) { + const auto &shape = m_expr->get_output_port_descriptor(i)->get_shape(); + if (!shape.empty()) + shapes.emplace_back("out_shape_" + std::to_string(i), shape); } + auto loop_ids = m_expr->get_loop_ids(); auto rinfo = m_expr->get_reg_info(); if (!rinfo.first.empty()) @@ -54,7 +54,7 @@ bool SerializationNode::visit_attributes(AttributeVisitor &visitor) { visitor.on_attribute(s.first, s.second); visitor.on_attribute("loop_ids", loop_ids); - node->visit_attributes(visitor); + m_expr->get_node()->visit_attributes(visitor); return true; } From 0675d9fd8b9c7e7eca198578c42fac08728759ac Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Tue, 12 Sep 2023 11:46:30 +0200 Subject: [PATCH 26/31] Refactor ComparisonLayerTest, ClampLayerTest (#19681) * Refactor ClampLayerTest * Refactor ComparisonLayerTest --- .../single_layer_tests/clamp.cpp | 33 ++--- .../single_layer_tests/comparison.cpp | 44 ++++--- .../shared/include/single_op_tests/clamp.hpp | 15 +++ .../include/single_op_tests/comparison.hpp | 15 +++ .../shared_test_classes/single_op/clamp.hpp | 30 +++++ .../single_op/comparison.hpp | 37 ++++++ .../src/single_op/clamp.cpp | 50 ++++++++ .../src/single_op/comparison.cpp | 113 ++++++++++++++++++ 8 files changed, 303 insertions(+), 34 deletions(-) create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/clamp.hpp create mode 100644 src/tests/functional/plugin/shared/include/single_op_tests/comparison.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/clamp.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/comparison.hpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/clamp.cpp create mode 100644 src/tests/functional/shared_test_classes/src/single_op/comparison.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/clamp.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/clamp.cpp index 0bf4d29c8155e0..15e66eb2978574 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/clamp.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/clamp.cpp @@ -4,17 +4,19 @@ #include -#include "single_layer_tests/clamp.hpp" +#include "single_op_tests/clamp.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; +namespace { +using ov::test::ClampLayerTest; -const std::vector> inShapes = { - {50}, - {10, 10}, - {1, 20, 20} +const std::vector> input_shapes_static = { + {{ 50 }}, + {{ 10, 10 }}, + {{ 1, 20, 20 }} }; + const std::vector> intervals = { {-20.1, -10.5}, {-10.0, 10.0}, @@ -27,26 +29,27 @@ const std::vector> intervals_unsigned = { {10.6, 20.6} }; -const std::vector netPrc = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I64, - InferenceEngine::Precision::I32 +const std::vector model_type = { + ov::element::f32, + ov::element::f16, + ov::element::i64, + ov::element::i32 }; const auto test_Clamp_signed = ::testing::Combine( - ::testing::ValuesIn(inShapes), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::ValuesIn(intervals), - ::testing::ValuesIn(netPrc), + ::testing::ValuesIn(model_type), ::testing::Values(ov::test::utils::DEVICE_CPU) ); const auto test_Clamp_unsigned = ::testing::Combine( - ::testing::ValuesIn(inShapes), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::ValuesIn(intervals_unsigned), - ::testing::Values(InferenceEngine::Precision::U64), + ::testing::Values(ov::element::u64), ::testing::Values(ov::test::utils::DEVICE_CPU) ); INSTANTIATE_TEST_SUITE_P(smoke_TestsClamp_signed, ClampLayerTest, test_Clamp_signed, ClampLayerTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_TestsClamp_unsigned, ClampLayerTest, test_Clamp_unsigned, ClampLayerTest::getTestCaseName); +} // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp index 572d595e39852b..0b68b33e2074a9 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/comparison.cpp @@ -3,15 +3,13 @@ // #include -#include "single_layer_tests/comparison.hpp" +#include "single_op_tests/comparison.hpp" #include "common_test_utils/test_constants.hpp" -using namespace LayerTestsDefinitions; -using namespace LayerTestsDefinitions::ComparisonParams; - namespace { +using ov::test::ComparisonLayerTest; -std::map, std::vector>> inputShapes = { +std::map> input_shapes_combinations = { {{1}, {{1}, {17}, {1, 1}, {2, 18}, {1, 1, 2}, {2, 2, 3}, {1, 1, 2, 3}}}, {{5}, {{1}, {1, 1}, {2, 5}, {1, 1, 1}, {2, 2, 5}}}, {{2, 200}, {{1}, {200}, {1, 200}, {2, 200}, {2, 2, 200}}}, @@ -20,11 +18,23 @@ std::map, std::vector>> inputShapes = { {{2, 1, 1, 3, 1}, {{1}, {1, 3, 4}, {2, 1, 3, 4}, {1, 1, 1, 1, 1}}}, }; -std::vector inputsPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::BOOL, +auto input_shapes_pair_vector = ov::test::utils::combineParams(input_shapes_combinations); + +auto converter = [] (const std::vector>& shapes) { + std::vector> result; + for (const auto& shape : shapes) { + result.push_back({shape.first, shape.second}); + } + return result; +}; + +auto input_shapes_static = converter(input_shapes_pair_vector); + +std::vector model_type = { + ov::element::f32, + ov::element::f16, + ov::element::i32, + ov::element::boolean, }; std::vector comparisonOpTypes = { @@ -44,19 +54,17 @@ std::vector secondInputTypes = { std::map additional_config = {}; const auto ComparisonTestParams = ::testing::Combine( - ::testing::ValuesIn(ov::test::utils::combineParams(inputShapes)), - ::testing::ValuesIn(inputsPrecisions), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_static)), ::testing::ValuesIn(comparisonOpTypes), ::testing::ValuesIn(secondInputTypes), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::ValuesIn(model_type), ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::Values(additional_config)); INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, ComparisonLayerTest, ComparisonTestParams, ComparisonLayerTest::getTestCaseName); -std::vector inputShapesIsOps = { +std::vector> input_shapes_is_ops_static = { {{1}, {1}}, {{1, 2}, {1}}, {{3, 1}, {1}}, @@ -79,12 +87,10 @@ std::vector comparisonOpTypesIs = { }; const auto ComparisonTestParamsIs = ::testing::Combine( - ::testing::ValuesIn(inputShapesIsOps), - ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(input_shapes_is_ops_static)), ::testing::ValuesIn(comparisonOpTypesIs), ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(ov::element::f32), ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::Values(additional_config)); diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/clamp.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/clamp.hpp new file mode 100644 index 00000000000000..c3d848fea3d0ab --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/clamp.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_op/clamp.hpp" + +namespace ov { +namespace test { +TEST_P(ClampLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/single_op_tests/comparison.hpp b/src/tests/functional/plugin/shared/include/single_op_tests/comparison.hpp new file mode 100644 index 00000000000000..65e36f84c152e0 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/single_op_tests/comparison.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov { +namespace test { +TEST_P(ComparisonLayerTest, Inference) { + run(); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/clamp.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/clamp.hpp new file mode 100644 index 00000000000000..eadded62f9ff9c --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/clamp.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +using clampParamsTuple = std::tuple< + std::vector, // Input shape + std::pair, // Interval [min, max] + ov::element::Type, // Model precision + std::string>; // Device name + +class ClampLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); +protected: + void SetUp() override; +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/comparison.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/comparison.hpp new file mode 100644 index 00000000000000..0e29e65f22c122 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/comparison.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + + +#include + +#include "gtest/gtest.h" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/test_constants.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +namespace ov { +namespace test { + +typedef std::tuple< + std::vector, // Input shapes tuple + ngraph::helpers::ComparisonTypes, // Comparison op type + ngraph::helpers::InputLayerType, // Second input type + ov::element::Type, // Model type + std::string, // Device name + std::map // Additional network configuration +> ComparisonTestParams; + +class ComparisonLayerTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { + ngraph::helpers::ComparisonTypes comparison_op_type; +protected: + void SetUp() override; + void generate_inputs(const std::vector& targetInputStaticShapes) override; +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); +}; +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/clamp.cpp b/src/tests/functional/shared_test_classes/src/single_op/clamp.cpp new file mode 100644 index 00000000000000..307c913fd68a3d --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/clamp.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/clamp.hpp" + +namespace ov { +namespace test { +std::string ClampLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + std::vector shapes; + std::pair interval; + ov::element::Type model_type; + std::string target_device; + + std::tie(shapes, interval, model_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < shapes.size(); i++) { + result << ov::test::utils::partialShape2str({shapes[i].first}) << (i < shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < shapes.size(); j++) { + result << ov::test::utils::vec2str(shapes[j].second[i]) << (j < shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "min=" << interval.first << "_"; + result << "max=" << interval.second << "_"; + result << "netPrc=" << model_type.get_type_name() << "_"; + result << "trgDev=" << target_device; + return result.str(); +} + +void ClampLayerTest::SetUp() { + std::vector shapes; + std::pair interval; + ov::element::Type model_type; + std::tie(shapes, interval, model_type, targetDevice) = this->GetParam(); + init_input_shapes(shapes); + + auto input = std::make_shared(model_type, inputDynamicShapes.front()); + auto clamp = std::make_shared(input, interval.first, interval.second); + auto result = std::make_shared(clamp); + function = std::make_shared(result, ngraph::ParameterVector{input}); +} +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/single_op/comparison.cpp b/src/tests/functional/shared_test_classes/src/single_op/comparison.cpp new file mode 100644 index 00000000000000..7dacd12bc9d74c --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/single_op/comparison.cpp @@ -0,0 +1,113 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_op/comparison.hpp" + +#include "ngraph_functions/builders.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +namespace ov { +namespace test { +using ngraph::helpers::ComparisonTypes; +using ngraph::helpers::InputLayerType; + +std::string ComparisonLayerTest::getTestCaseName(const testing::TestParamInfo &obj) { + std::vector shapes; + ComparisonTypes comparison_op_type; + InputLayerType second_input_type; + ov::element::Type model_type; + std::string device_name; + std::map additional_config; + std::tie(shapes, + comparison_op_type, + second_input_type, + model_type, + device_name, + additional_config) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (size_t i = 0lu; i < shapes.size(); i++) { + result << ov::test::utils::partialShape2str({shapes[i].first}) << (i < shapes.size() - 1lu ? "_" : ""); + } + result << ")_TS="; + for (size_t i = 0lu; i < shapes.front().second.size(); i++) { + result << "{"; + for (size_t j = 0lu; j < shapes.size(); j++) { + result << ov::test::utils::vec2str(shapes[j].second[i]) << (j < shapes.size() - 1lu ? "_" : ""); + } + result << "}_"; + } + result << "comparisonOpType=" << comparison_op_type << "_"; + result << "secondInputType=" << second_input_type << "_"; + result << "in_type=" << model_type.get_type_name() << "_"; + result << "targetDevice=" << device_name; + return result.str(); +} + +void ComparisonLayerTest::SetUp() { + std::vector shapes; + InputLayerType second_input_type; + std::map additional_config; + ov::element::Type model_type; + std::tie(shapes, + comparison_op_type, + second_input_type, + model_type, + targetDevice, + additional_config) = this->GetParam(); + configuration.insert(additional_config.begin(), additional_config.end()); + init_input_shapes(shapes); + + ov::ParameterVector inputs {std::make_shared(model_type, inputDynamicShapes[0])}; + + std::shared_ptr second_input; + if (second_input_type == InputLayerType::PARAMETER) { + second_input = std::make_shared(model_type, inputDynamicShapes[1]); + inputs.push_back(std::dynamic_pointer_cast(second_input)); + } else { + ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(model_type, targetStaticShapes.front()[1]); + second_input = std::make_shared(tensor); + } + + auto comparisonNode = ngraph::builder::makeComparison(inputs[0], second_input, comparison_op_type); + function = std::make_shared(comparisonNode, inputs, "Comparison"); +} + +void ComparisonLayerTest::generate_inputs(const std::vector& target_input_static_shapes) { + if (comparison_op_type == ComparisonTypes::IS_FINITE || comparison_op_type == ComparisonTypes::IS_NAN) { + inputs.clear(); + auto params = function->get_parameters(); + OPENVINO_ASSERT(target_input_static_shapes.size() >= params.size()); + for (int i = 0; i < params.size(); i++) { + ov::Tensor tensor(params[i]->get_element_type(), target_input_static_shapes[i]); + auto data_ptr = static_cast(tensor.data()); + auto data_ptr_int = static_cast(tensor.data()); + auto range = tensor.get_size(); + auto start = -static_cast(range) / 2.f; + testing::internal::Random random(1); + for (size_t i = 0; i < range; i++) { + if (i % 7 == 0) { + data_ptr[i] = std::numeric_limits::infinity(); + } else if (i % 7 == 1) { + data_ptr[i] = -std::numeric_limits::infinity(); + } else if (i % 7 == 2) { + data_ptr_int[i] = 0x7F800000 + random.Generate(range); + } else if (i % 7 == 3) { + data_ptr[i] = std::numeric_limits::quiet_NaN(); + } else if (i % 7 == 5) { + data_ptr[i] = -std::numeric_limits::quiet_NaN(); + } else { + data_ptr[i] = start + static_cast(random.Generate(range)); + } + } + inputs.insert({params[i], tensor}); + } + } else { + SubgraphBaseTest::generate_inputs(target_input_static_shapes); + } +} + +} // namespace test +} // namespace ov From adf7a24ec0d69f2b32b4872b47c550f851eb692b Mon Sep 17 00:00:00 2001 From: Sergey Lyalin Date: Tue, 12 Sep 2023 14:31:54 +0400 Subject: [PATCH 27/31] [DOCS] OVC/convert_model Documentation (#19555) * Added OVC and ov.convert_model() description. * Minor corrections. * Small correction. * Include page to toctree. * WIP: Model Preparation * Forked OVC/ov.convert_model documentation sub-directory; reworked model_introduction.md * Reverted ovc-related changes in old MO_DG documentation * State explicitly that MO is considered legacy API * Reduced ovc description in model preparation part; added TF Hub exampe (via file) * Grammar check; removed obsolexte parts not relevant to ovc; better wording * Removed a duplicate of mo-to-ovc transition * Fixed links and some other errors found in documentation build * Resolved XYZ placeholder to the transition guide * Fixed technical issues with links * Up-to-date link to PTQ chapter (instead of obsolete POT) * Fixed strong text ending * Update docs/OV_Converter_UG/prepare_model/convert_model/MO_OVC_transition.md Co-authored-by: Anastasiia Pnevskaia * Update docs/OV_Converter_UG/prepare_model/convert_model/MO_OVC_transition.md Co-authored-by: Anastasiia Pnevskaia * Update docs/OV_Converter_UG/prepare_model/convert_model/MO_OVC_transition.md Co-authored-by: Anastasiia Pnevskaia * Renamed Legacy conversion guides * Fixed links and styles for inlined code * Fixed style for code references * Fixing technical syntax errors in docs * Another attempt to fix docs * Removed all unreferenced images * Better content for Additional Resources in model preporation introduction * MO to OVC transition guide. (#127) * Examples code correction. * Change format of example. * Conflict fix. * Remove wrong change. * Added input_shapes example. * batch example. * Examples format changed. * List item removed. * Remove list for all examples. * Corrected batch example. * Transform example. * Text corrections. * Text correction. * Example correction. * Small correction. * Small correction. * Small correction. * Small correction. * Text corrections. * Links corrected. * Text corrections (#128) * Text corrections. * Example corrected. * Update docs/install_guides/pypi-openvino-dev.md Co-authored-by: Sergey Lyalin --------- Co-authored-by: Sergey Lyalin * Many technical fixes, description of recursive flattening of list and tuples * Reorganized structure of Model Conversion toc tree. Removed fp16 dedicated page, merged to Conversion Parameters. * Update docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md Co-authored-by: Roman Kazantsev * Update docs/Documentation/model_introduction.md Co-authored-by: Maciej Smyk * Fixed example from tf hub. Removed input_shape references * Update docs/Documentation/model_introduction.md Co-authored-by: Tatiana Savina * Update docs/Documentation/model_introduction.md Co-authored-by: Tatiana Savina * Update docs/Documentation/model_introduction.md Co-authored-by: Tatiana Savina * Removed * Update docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_ONNX.md Co-authored-by: Tatiana Savina * Update docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_ONNX.md Co-authored-by: Tatiana Savina * Update docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_ONNX.md Co-authored-by: Tatiana Savina * Update docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_ONNX.md Co-authored-by: Tatiana Savina * Fixed links * Removed TODO for model flow * Apply suggestions from code review Co-authored-by: Tatiana Savina * Restored lost code-blocks that leaded to wrong rendering of the code snippets in some places * Apply suggestions from code review Co-authored-by: Tatiana Savina * Update docs/Documentation/model_introduction.md * Fixed links to notebooks * Apply suggestions from code review Co-authored-by: Tatiana Savina --------- Co-authored-by: Anastasiia Pnevskaia Co-authored-by: Karol Blaszczak Co-authored-by: Roman Kazantsev Co-authored-by: Maciej Smyk Co-authored-by: Tatiana Savina --- docs/Documentation/model_introduction.md | 225 ++++++- .../Deep_Learning_Model_Optimizer_DevGuide.md | 9 +- docs/MO_DG/prepare_model/FP16_Compression.md | 10 +- .../convert_model/supported_model_formats.md | 62 +- .../Deep_Learning_Model_Optimizer_DevGuide.md | 98 +++ .../convert_model/Convert_Model_From_ONNX.md | 59 ++ .../Convert_Model_From_Paddle.md | 201 ++++++ .../Convert_Model_From_PyTorch.md | 155 +++++ .../Convert_Model_From_TensorFlow.md | 331 +++++++++ .../Convert_Model_From_TensorFlow_Lite.md | 42 ++ .../convert_model/Converting_Model.md | 141 ++++ .../convert_model/MO_OVC_transition.md | 634 ++++++++++++++++++ .../convert_model/supported_model_formats.md | 33 + docs/get_started/get_started_demos.md | 166 ++--- docs/glossary.md | 129 ++-- .../installing-openvino-from-archive-linux.md | 203 +++--- docs/install_guides/pypi-openvino-dev.md | 26 +- docs/model_zoo.md | 4 +- 18 files changed, 2197 insertions(+), 331 deletions(-) create mode 100644 docs/OV_Converter_UG/Deep_Learning_Model_Optimizer_DevGuide.md create mode 100644 docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_ONNX.md create mode 100644 docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_Paddle.md create mode 100644 docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_PyTorch.md create mode 100644 docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md create mode 100644 docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow_Lite.md create mode 100644 docs/OV_Converter_UG/prepare_model/convert_model/Converting_Model.md create mode 100644 docs/OV_Converter_UG/prepare_model/convert_model/MO_OVC_transition.md create mode 100644 docs/OV_Converter_UG/prepare_model/convert_model/supported_model_formats.md diff --git a/docs/Documentation/model_introduction.md b/docs/Documentation/model_introduction.md index d0fd4535ce59c2..26038599b83362 100644 --- a/docs/Documentation/model_introduction.md +++ b/docs/Documentation/model_introduction.md @@ -3,64 +3,233 @@ @sphinxdirective .. meta:: - :description: Preparing models for OpenVINO Runtime. Learn about the methods + :description: Preparing models for OpenVINO Runtime. Learn about the methods used to read, convert and compile models from different frameworks. - .. toctree:: :maxdepth: 1 :hidden: Supported_Model_Formats - openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide + openvino_docs_OV_Converter_UG_Conversion_Options + openvino_docs_OV_Converter_UG_prepare_model_convert_model_Converting_Model + openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition Every deep learning workflow begins with obtaining a model. You can choose to prepare a custom one, use a ready-made solution and adjust it to your needs, or even download and run a pre-trained network from an online database, such as `TensorFlow Hub `__, `Hugging Face `__, or `Torchvision models `__. -Import a model using ``read_model()`` -################################################# +OpenVINO™ :doc:`supports several model formats ` and can convert them into its own representation, `openvino.Model `__ (`ov.Model `__), providing a conversion API. Converted models can be used for inference with one or multiple OpenVINO Hardware plugins. There are two ways to use the conversion API: using a Python program or calling the ``ovc`` command line tool. -Model files (not Python objects) from :doc:`ONNX, PaddlePaddle, TensorFlow and TensorFlow Lite ` (check :doc:`TensorFlow Frontend Capabilities and Limitations `) do not require a separate step for model conversion, that is ``mo.convert_model``. +.. note:: -The ``read_model()`` method reads a model from a file and produces `openvino.runtime.Model `__. If the file is in one of the supported original framework :doc:`file formats `, the method runs internal conversion to an OpenVINO model format. If the file is already in the :doc:`OpenVINO IR format `, it is read "as-is", without any conversion involved. + Prior OpenVINO 2023.1 release, model conversion API was exposed as ``openvino.tools.mo.convert_model`` function and ``mo`` command line tool. + Starting from 2023.1 release, a new simplified API was introduced: ``openvino.convert_model`` function and ``ovc`` command line tool as a replacement for ``openvino.tools.mo.convert_model`` + and ``mo`` correspondingly, which are considered to be legacy now. All new users are recommended to use these new methods instead of the old methods. Please note that the new API and old API do not + provide the same level of features, that means the new tools are not always backward compatible with the old ones. Please consult with :doc:`Model Conversion API Transition Guide `. -You can also convert a model from original framework to `openvino.runtime.Model `__ using ``convert_model()`` method. More details about ``convert_model()`` are provided in :doc:`model conversion guide ` . +Convert a Model in Python: ``convert_model`` +############################################ -``ov.Model`` can be saved to IR using the ``ov.save_model()`` method. The saved IR can be further optimized using :doc:`Neural Network Compression Framework (NNCF) ` that applies post-training quantization methods. +You can use Model conversion API in Python with the ``openvino.convert_model`` function. This function converts a model from its original framework representation, for example Pytorch or TensorFlow, to the object of type ``openvino.Model``. The resulting ``openvino.Model`` can be inferred in the same application (Python script or Jupyter Notebook) or saved into a file using``openvino.save_model`` for future use. Below, there are examples on how to use the ``openvino.convert_model`` with models from popular public repositories: -.. note:: +.. tab-set:: + + .. tab-item:: Torchvision + + .. code-block:: py + :force: + + import openvino as ov + import torch + from torchvision.models import resnet50 + + model = resnet50(pretrained=True) + + # prepare input_data + input_data = torch.rand(1, 3, 224, 224) + + ov_model = ov.convert_model(model, example_input=input_data) + + ###### Option 1: Save to OpenVINO IR: + + # save model to OpenVINO IR for later use + ov.save_model(ov_model, 'model.xml') + + ###### Option 2: Compile and infer with OpenVINO: + + # compile model + compiled_model = ov.compile_model(ov_model) + + # run the inference + result = compiled_model(input_data) + + .. tab-item:: Hugging Face Transformers + + .. code-block:: py + + from transformers import BertTokenizer, BertModel + + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertModel.from_pretrained("bert-base-uncased") + text = "Replace me by any text you'd like." + encoded_input = tokenizer(text, return_tensors='pt') + + import openvino as ov + ov_model = ov.convert_model(model, example_input={**encoded_input}) + + ###### Option 1: Save to OpenVINO IR: + + # save model to OpenVINO IR for later use + ov.save_model(ov_model, 'model.xml') + + ###### Option 2: Compile and infer with OpenVINO: + + # compile model + compiled_model = ov.compile_model(ov_model) + + # prepare input_data using HF tokenizer or your own tokenizer + # encoded_input is reused here for simplicity + + # run inference + result = compiled_model({**encoded_input}) + + .. tab-item:: Keras Applications + + .. code-block:: py + + import tensorflow as tf + import openvino as ov + + tf_model = tf.keras.applications.ResNet50(weights="imagenet") + ov_model = ov.convert_model(tf_model) + + ###### Option 1: Save to OpenVINO IR: + + # save model to OpenVINO IR for later use + ov.save_model(ov_model, 'model.xml') + + ###### Option 2: Compile and infer with OpenVINO: + + # compile model + compiled_model = ov.compile_model(ov_model) + + # prepare input_data + import numpy as np + input_data = np.random.rand(1, 224, 224, 3) - ``convert_model()`` also allows you to perform input/output cut, add pre-processing or add custom Python conversion extensions. + # run inference + result = compiled_model(input_data) -Convert a model with Python using ``mo.convert_model()`` -########################################################### + .. tab-item:: TensorFlow Hub -Model conversion API, specifically, the ``mo.convert_model()`` method converts a model from original framework to ``ov.Model``. ``mo.convert_model()`` returns ``ov.Model`` object in memory so the ``read_model()`` method is not required. The resulting ``ov.Model`` can be inferred in the same training environment (python script or Jupiter Notebook). ``mo.convert_model()`` provides a convenient way to quickly switch from framework-based code to OpenVINO-based code in your inference application. + .. code-block:: py -In addition to model files, ``mo.convert_model()`` can take OpenVINO extension objects constructed directly in Python for easier conversion of operations that are not supported in OpenVINO. The ``mo.convert_model()`` method also has a set of parameters to :doc:`cut the model `, :doc:`set input shapes or layout `, :doc:`add preprocessing `, etc. + import tensorflow as tf + import tensorflow_hub as hub + import openvino as ov -The figure below illustrates the typical workflow for deploying a trained deep learning model, where IR is a pair of files describing the model: + model = tf.keras.Sequential([ + hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/classification/5") + ]) -* ``.xml`` - Describes the network topology. -* ``.bin`` - Contains the weights and biases binary data. + # Check model page for information about input shape: https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/classification/5 + model.build([None, 224, 224, 3]) -.. image:: _static/images/model_conversion_diagram.svg + model.save('mobilenet_v1_100_224') # use a temporary directory + ov_model = ov.convert_model('mobilenet_v1_100_224') + + ###### Option 1: Save to OpenVINO IR: + + ov.save_model(ov_model, 'model.xml') + + ###### Option 2: Compile and infer with OpenVINO: + + compiled_model = ov.compile_model(ov_model) + + # prepare input_data + import numpy as np + input_data = np.random.rand(1, 224, 224, 3) + + # run inference + result = compiled_model(input_data) + + .. tab-item:: ONNX Model Hub + + .. code-block:: py + + import onnx + + model = onnx.hub.load("resnet50") + onnx.save(model, 'resnet50.onnx') # use a temporary file for model + + import openvino as ov + ov_model = ov.convert_model('resnet50.onnx') + + ###### Option 1: Save to OpenVINO IR: + + # save model to OpenVINO IR for later use + ov.save_model(ov_model, 'model.xml') + + ###### Option 2: Compile and infer with OpenVINO: + + # compile model + compiled_model = ov.compile_model(ov_model) + + # prepare input_data + import numpy as np + input_data = np.random.rand(1, 3, 224, 224) + + # run inference + result = compiled_model(input_data) + +In Option 1, where the ``openvino.save_model`` function is used, an OpenVINO model is serialized in the file system as two files with ``.xml`` and ``.bin`` extensions. This pair of files is called OpenVINO Intermediate Representation format (OpenVINO IR, or just IR) and useful for efficient model deployment. OpenVINO IR can be loaded into another application for inference using the ``openvino.Core.read_model`` function. For more details, refer to the :doc:`OpenVINO™ Runtime documentation `. + +Option 2, where ``openvino.compile_model`` is used, provides a convenient way to quickly switch from framework-based code to OpenVINO-based code in your existing Python inference application. In this case, the converted model is not saved to IR. Instead, the model is compiled and used for inference within the same application. + +Option 1 separates model conversion and model inference into two different applications. This approach is useful for deployment scenarios requiring fewer extra dependencies and faster model loading in the end inference application. + +For example, converting a PyTorch model to OpenVINO usually demands the ``torch`` Python module and Python. This process can take extra time and memory. But, after the converted model is saved as IR with ``openvino.save_model``, it can be loaded in a separate application without requiring the ``torch`` dependency and the time-consuming conversion. The inference application can be written in other languages supported by OpenVINO, for example, in C++, and Python installation is not necessary for it to run. + +Before saving the model to OpenVINO IR, consider applying :doc:`Post-training Optimization ` to enable more efficient inference and smaller model size. + +The figure below illustrates the typical workflow for deploying a trained deep-learning model. + +.. image:: ./_static/images/model_conversion_diagram.svg :alt: model conversion diagram +Convert a Model in CLI: ``ovc`` +############################### + +Another option for model conversion is to use ``ovc`` command-line tool, which stands for OpenVINO Model Converter. The tool combines both ``openvino.convert_model`` and ``openvino.save_model`` functionalities. It is convenient to use when the original model is ready for inference and is in one of the supported file formats: ONNX, TensorFlow, TensorFlow Lite, or PaddlePaddle. As a result, ``ovc`` produces an OpenVINO IR, consisting of ``.xml`` and ``.bin`` files, which needs to be read with the ``ov.read_model()`` method. You can compile and infer the ``ov.Model`` later with :doc:`OpenVINO™ Runtime ` -Convert a model using ``mo`` command-line tool -################################################# +.. note:: + PyTorch models cannot be converted with ``ovc``, use ``openvino.convert_model`` instead. -Another option to convert a model is to use ``mo`` command-line tool. ``mo`` is a cross-platform tool that facilitates the transition between training and deployment environments, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices in the same measure, as the ``mo.convert_model()`` method. +The results of both ``ovc`` and ``openvino.convert_model``/``openvino.save_model`` conversion methods are the same. You can choose either of them based on your convenience. Note that there should not be any differences in the results of model conversion if the same set of parameters is used and the model is saved into OpenVINO IR. -``mo`` requires the use of a pre-trained deep learning model in one of the supported formats: TensorFlow, TensorFlow Lite, PaddlePaddle, or ONNX. ``mo`` converts the model to the OpenVINO Intermediate Representation format (IR), which needs to be read with the ``ov.read_model()`` method. Then, you can compile and infer the ``ov.Model`` later with :doc:`OpenVINO™ Runtime `. +Cases when Model Preparation is not Required +############################################ -The results of both ``mo`` and ``mo.convert_model()`` conversion methods described above are the same. You can choose one of them, depending on what is most convenient for you. Keep in mind that there should not be any differences in the results of model conversion if the same set of parameters is used. +If a model is represented as a single file from ONNX, PaddlePaddle, TensorFlow and TensorFlow Lite (check :doc:`TensorFlow Frontend Capabilities and Limitations `), it does not require a separate conversion and IR-saving step, that is ``openvino.convert_model`` and ``openvino.save_model``, or ``ovc``. -This section describes how to obtain and prepare your model for work with OpenVINO to get the best inference results: +OpenVINO provides C++ and Python APIs for reading such models by just calling the ``openvino.Core.read_model`` or ``openvino.Core.compile_model`` methods. These methods perform conversion of the model from the original representation. While this conversion may take extra time compared to using prepared OpenVINO IR, it is convenient when you need to read a model in the original format in C++, since ``openvino.convert_model`` is only available in Python. However, for efficient model deployment with the OpenVINO Runtime, it is still recommended to prepare OpenVINO IR and then use it in your inference application. -* :doc:`See the supported formats and how to use them in your project `. -* :doc:`Convert different model formats to the ov.Model format `. +Additional Resources +#################### -@endsphinxdirective +The following articles describe in details how to obtain and prepare your model depending on the source model type: + +* :doc:`Convert different model formats to the ov.Model format `. +* :doc:`Review all available conversion parameters `. + +To achieve the best model inference performance and more compact OpenVINO IR representation follow: +* :doc:`Post-training optimization ` +* :doc:`Model inference in OpenVINO Runtime ` + +If you are using legacy conversion API (``mo`` or ``openvino.tools.mo.convert_model``), please refer to the following materials: + +* :doc:`Transition from legacy mo and ov.tools.mo.convert_model ` +* :doc:`Legacy Model Conversion API ` + +@endsphinxdirective diff --git a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md index fb82472b9b45ad..96d83591a4dedf 100644 --- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md +++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md @@ -1,4 +1,4 @@ -# Convert a Model {#openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide} +# Legacy Conversion API {#openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide} @sphinxdirective @@ -14,12 +14,15 @@ openvino_docs_MO_DG_FP16_Compression openvino_docs_MO_DG_Python_API openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ + Supported_Model_Formats_MO_DG .. meta:: - :description: Model conversion (MO) furthers the transition between training and - deployment environments, it adjusts deep learning models for + :description: Model conversion (MO) furthers the transition between training and + deployment environments, it adjusts deep learning models for optimal execution on target devices. +.. note:: + This part of the documentation describes a legacy approach to model conversion. Starting with OpenVINO 2023.1, a simpler alternative API for model conversion is available: ``openvino.convert_model`` and OpenVINO Model Converter ``ovc`` CLI tool. Refer to `Model preparation ` for more details. If you are still using `openvino.tools.mo.convert_model` or `mo` CLI tool, you can still refer to this documentation. However, consider checking the `transition guide ` to learn how to migrate from the legacy conversion API to the new one. Depending on the model topology, the new API can be a better option for you. To convert a model to OpenVINO model format (``ov.Model``), you can use the following command: diff --git a/docs/MO_DG/prepare_model/FP16_Compression.md b/docs/MO_DG/prepare_model/FP16_Compression.md index f560a6a035063d..05b2676c055dc5 100644 --- a/docs/MO_DG/prepare_model/FP16_Compression.md +++ b/docs/MO_DG/prepare_model/FP16_Compression.md @@ -3,7 +3,7 @@ @sphinxdirective By default, when IR is saved all relevant floating-point weights are compressed to ``FP16`` data type during model conversion. -It results in creating a "compressed ``FP16`` model", which occupies about half of +It results in creating a "compressed ``FP16`` model", which occupies about half of the original space in the file system. The compression may introduce a minor drop in accuracy, but it is negligible for most models. In case if accuracy drop is significant user can disable compression explicitly. @@ -29,20 +29,20 @@ To disable compression, use the ``compress_to_fp16=False`` option: mo --input_model INPUT_MODEL --compress_to_fp16=False -For details on how plugins handle compressed ``FP16`` models, see +For details on how plugins handle compressed ``FP16`` models, see :doc:`Working with devices `. .. note:: - ``FP16`` compression is sometimes used as the initial step for ``INT8`` quantization. - Refer to the :doc:`Post-training optimization ` guide for more + ``FP16`` compression is sometimes used as the initial step for ``INT8`` quantization. + Refer to the :doc:`Post-training optimization ` guide for more information about that. .. note:: Some large models (larger than a few GB) when compressed to ``FP16`` may consume an overly large amount of RAM on the loading - phase of the inference. If that is the case for your model, try to convert it without compression: + phase of the inference. If that is the case for your model, try to convert it without compression: ``convert_model(INPUT_MODEL, compress_to_fp16=False)`` or ``convert_model(INPUT_MODEL)`` diff --git a/docs/MO_DG/prepare_model/convert_model/supported_model_formats.md b/docs/MO_DG/prepare_model/convert_model/supported_model_formats.md index b57c73eac51324..068ba7fca16297 100644 --- a/docs/MO_DG/prepare_model/convert_model/supported_model_formats.md +++ b/docs/MO_DG/prepare_model/convert_model/supported_model_formats.md @@ -1,4 +1,4 @@ -# Supported Model Formats {#Supported_Model_Formats} +# Supported Model Formats {#Supported_Model_Formats_MO_DG} @sphinxdirective @@ -17,7 +17,7 @@ :description: Learn about supported model formats and the methods used to convert, read, and compile them in OpenVINO™. -**OpenVINO IR (Intermediate Representation)** - the proprietary and default format of OpenVINO, benefiting from the full extent of its features. All other supported model formats, as listed below, are converted to :doc:`OpenVINO IR ` to enable inference. Consider storing your model in this format to minimize first-inference latency, perform model optimization, and, in some cases, save space on your drive. +**OpenVINO IR (Intermediate Representation)** - the proprietary and default format of OpenVINO, benefiting from the full extent of its features. All other supported model formats, as listed below, are converted to :doc:`OpenVINO IR ` to enable inference. Consider storing your model in this format to minimize first-inference latency, perform model optimization, and, in some cases, save space on your drive. **PyTorch, TensorFlow, ONNX, and PaddlePaddle** - can be used with OpenVINO Runtime API directly, which means you do not need to save them as OpenVINO IR before including them in your application. @@ -62,9 +62,9 @@ Here are code examples of how to use these methods with different model formats: ov_model = convert_model(model) compiled_model = core.compile_model(ov_model, "AUTO") - For more details on conversion, refer to the - :doc:`guide ` - and an example `tutorial `__ + For more details on conversion, refer to the + :doc:`guide ` + and an example `tutorial `__ on this topic. .. tab-item:: TensorFlow @@ -104,10 +104,10 @@ Here are code examples of how to use these methods with different model formats: ov_model = convert_model("saved_model.pb") compiled_model = core.compile_model(ov_model, "AUTO") - For more details on conversion, refer to the - :doc:`guide ` - and an example `tutorial `__ - on this topic. + For more details on conversion, refer to the + :doc:`guide ` + and an example `tutorial `__ + on this topic. * The ``read_model()`` and ``compile_model()`` methods: @@ -125,8 +125,8 @@ Here are code examples of how to use these methods with different model formats: ov_model = read_model("saved_model.pb") compiled_model = core.compile_model(ov_model, "AUTO") - For a guide on how to run inference, see how to - :doc:`Integrate OpenVINO™ with Your Application `. + For a guide on how to run inference, see how to + :doc:`Integrate OpenVINO™ with Your Application `. For TensorFlow format, see :doc:`TensorFlow Frontend Capabilities and Limitations `. .. tab-item:: C++ @@ -146,7 +146,7 @@ Here are code examples of how to use these methods with different model formats: ov::CompiledModel compiled_model = core.compile_model("saved_model.pb", "AUTO"); - For a guide on how to run inference, see how to + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application `. .. tab-item:: C @@ -167,7 +167,7 @@ Here are code examples of how to use these methods with different model formats: ov_compiled_model_t* compiled_model = NULL; ov_core_compile_model_from_file(core, "saved_model.pb", "AUTO", 0, &compiled_model); - For a guide on how to run inference, see how to + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application `. .. tab-item:: CLI @@ -206,9 +206,9 @@ Here are code examples of how to use these methods with different model formats: ov_model = convert_model(".tflite") compiled_model = core.compile_model(ov_model, "AUTO") - For more details on conversion, refer to the - :doc:`guide ` - and an example `tutorial `__ + For more details on conversion, refer to the + :doc:`guide ` + and an example `tutorial `__ on this topic. @@ -239,7 +239,7 @@ Here are code examples of how to use these methods with different model formats: compiled_model = core.compile_model(".tflite", "AUTO") - For a guide on how to run inference, see how to + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application `. @@ -258,7 +258,7 @@ Here are code examples of how to use these methods with different model formats: ov::CompiledModel compiled_model = core.compile_model(".tflite", "AUTO"); - For a guide on how to run inference, see how to + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application `. .. tab-item:: C @@ -277,7 +277,7 @@ Here are code examples of how to use these methods with different model formats: ov_compiled_model_t* compiled_model = NULL; ov_core_compile_model_from_file(core, ".tflite", "AUTO", 0, &compiled_model); - For a guide on how to run inference, see how to + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application `. .. tab-item:: CLI @@ -297,7 +297,7 @@ Here are code examples of how to use these methods with different model formats: mo --input_model .tflite - For details on the conversion, refer to the + For details on the conversion, refer to the :doc:`article `. .. tab-item:: ONNX @@ -324,9 +324,9 @@ Here are code examples of how to use these methods with different model formats: ov_model = convert_model(".onnx") compiled_model = core.compile_model(ov_model, "AUTO") - For more details on conversion, refer to the - :doc:`guide ` - and an example `tutorial `__ + For more details on conversion, refer to the + :doc:`guide ` + and an example `tutorial `__ on this topic. @@ -445,9 +445,9 @@ Here are code examples of how to use these methods with different model formats: ov_model = convert_model(".pdmodel") compiled_model = core.compile_model(ov_model, "AUTO") - For more details on conversion, refer to the - :doc:`guide ` - and an example `tutorial `__ + For more details on conversion, refer to the + :doc:`guide ` + and an example `tutorial `__ on this topic. * The ``read_model()`` method: @@ -477,7 +477,7 @@ Here are code examples of how to use these methods with different model formats: compiled_model = core.compile_model(".pdmodel", "AUTO") - For a guide on how to run inference, see how to + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application `. .. tab-item:: C++ @@ -495,7 +495,7 @@ Here are code examples of how to use these methods with different model formats: ov::CompiledModel compiled_model = core.compile_model(".pdmodel", "AUTO"); - For a guide on how to run inference, see how to + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application `. .. tab-item:: C @@ -514,7 +514,7 @@ Here are code examples of how to use these methods with different model formats: ov_compiled_model_t* compiled_model = NULL; ov_core_compile_model_from_file(core, ".pdmodel", "AUTO", 0, &compiled_model); - For a guide on how to run inference, see how to + For a guide on how to run inference, see how to :doc:`Integrate OpenVINO™ with Your Application `. .. tab-item:: CLI @@ -538,8 +538,8 @@ Here are code examples of how to use these methods with different model formats: :doc:`article `. -**MXNet, Caffe, and Kaldi** are legacy formats that need to be converted explicitly to OpenVINO IR or ONNX before running inference. -As OpenVINO is currently proceeding **to deprecate these formats** and **remove their support entirely in the future**, +**MXNet, Caffe, and Kaldi** are legacy formats that need to be converted explicitly to OpenVINO IR or ONNX before running inference. +As OpenVINO is currently proceeding **to deprecate these formats** and **remove their support entirely in the future**, converting them to ONNX for use with OpenVINO should be considered the default path. .. note:: diff --git a/docs/OV_Converter_UG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/OV_Converter_UG/Deep_Learning_Model_Optimizer_DevGuide.md new file mode 100644 index 00000000000000..d0362bd904d6d3 --- /dev/null +++ b/docs/OV_Converter_UG/Deep_Learning_Model_Optimizer_DevGuide.md @@ -0,0 +1,98 @@ +# Conversion Parameters {#openvino_docs_OV_Converter_UG_Conversion_Options} + +@sphinxdirective + +.. _deep learning model optimizer: + +.. meta:: + :description: Model Conversion API provides several parameters to adjust model conversion. + +This document describes all available parameters for ``openvino.convert_model``, ``ovc``, and ``openvino.save_model`` without focusing on a particular framework model format. Use this information for your reference as a common description of the conversion API capabilities in general. Part of the options can be not relevant to some specific frameworks. Use :doc:`Supported Model Formats ` page for more dedicated framework-dependent tutorials. + +In most cases when it is required to convert a model the following simple syntax can be used: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model('path_to_your_model') + # or, when model is a Python model object + ov_model = ov.convert_model(model) + + # Optionally adjust model by embedding pre-post processing here... + + ov.save_model(ov_model, 'model.xml') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc path_to_your_model + +Providing just a path to the model or model object as ``openvino.convert_model`` argument is frequently enough to make a successful conversion. However, depending on the model topology and original deep learning framework, additional parameters may be required, which are described below. + +- ``example_input`` parameter available in Python ``openvino.convert_model`` only is intended to trace the model to obtain its graph representation. This parameter is crucial for converting PyTorch models and may sometimes be required for TensorFlow models. For more details, refer to the :doc:`PyTorch Model Conversion ` or :doc:`TensorFlow Model Conversion `. + +- ``input`` parameter to set or override shapes for model inputs. It configures dynamic and static dimensions in model inputs depending on your inference requirements. For more information on this parameter, refer to the :doc:`Setting Input Shapes ` guide. + +- ``output`` parameter to select one or multiple outputs from the original model. This is useful when the model has outputs that are not required for inference in a deployment scenario. By specifying only necessary outputs, you can create a more compact model that infers faster. + +- ``compress_to_fp16`` parameter that is provided by ``ovc`` CLI tool and ``openvino.save_model`` Python function, gives controls over the compression of model weights to FP16 format when saving OpenVINO model to IR. This option is enabled by default which means all produced IRs are saved using FP16 data type for weights which saves up to 2x storage space for the model file and in most cases doesn't sacrifice model accuracy. In case it does affect accuracy, the compression can be disabled by setting this flag to ``False``: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(original_model) + ov.save_model(ov_model, 'model.xml' compress_to_fp16=False) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc path_to_your_model --compress_to_fp16=False + +For details on how plugins handle compressed ``FP16`` models, see +:doc:`Working with devices `. + +.. note:: + + ``FP16`` compression is sometimes used as the initial step for ``INT8`` quantization. + Refer to the :doc:`Post-training optimization ` guide for more + information about that. + +- ``extension`` parameter which makes possible conversion of the models consisting of operations that are not supported by OpenVINO out-of-the-box. It requires implementing of an OpenVINO extension first, please refer to :doc:`Frontend Extensions ` guide. + +- ``share_weigths`` parameter with default value `True` allows reusing memory with original weights. For models loaded in Python and then passed to ``openvino.convert_model``, that means that OpenVINO model will share the same areas in program memory where the original weights are located. For models loaded from files by ``openvino.convert_model``, file memory mapping is used to avoid extra memory allocation. When enabled, the original model cannot be destroyed (Python object cannot be deallocated and original model file cannot be deleted) for the whole lifetime of OpenVINO model. If it is not desired, set ``share_weights=False`` when calling ``openvino.convert_model``. + +.. note:: ``ovc`` doesn't have ``share_weights`` option and always uses sharing to reduce conversion time and consume less amount of memory during the conversion. + +- ``output_model`` parameter in ``ovc`` and ``openvino.save_model`` specifies name for output ``.xml`` file with the resulting OpenVINO IR. The accompanying ``.bin`` file name will be generated automatically by replacing ``.xml`` extension with ``.bin`` extension. The value of ``output_model`` must end with ``.xml`` extension. For ``ovc`` command line tool, ``output_model`` can also contain a name of a directory. In this case, the resulting OpenVINO IR files will be put into that directory with a base name of ``.xml`` and ``.bin`` files matching the original model base name passed to ``ovc`` as a parameter. For example, when calling ``ovc your_model.onnx --output_model directory_name``, files ``directory_name/your_model.xml`` and ``directory_name/your_model.bin`` will be created. If ``output_model`` is not used, then the current directory is used as a destination directory. + +.. note:: ``openvino.save_model`` doesn't support a directory for ``output_model`` parameter value because ``openvino.save_model`` gets OpenVINO model object represented in a memory and there is no original model file name available for output file name generation. For the same reason, ``output_model`` is a mandatory parameter for ``openvino.save_model``. + +- ``verbose`` parameter activates extra diagnostics printed to the standard output. Use for debugging purposes in case there is an issue with the conversion and to collect information for better bug reporting to OpenVINO team. + +.. note:: Weights sharing doesn't equally work for all the supported model formats. The value of this flag is considered as a hint for the conversion API, and actual sharing is used only if it is implemented and possible for a particular model representation. + +You can always run ``ovc -h`` or ``ovc --help`` to recall all the supported parameters for ``ovc``. + +Use ``ovc --version`` to check the version of OpenVINO package installed. + +@endsphinxdirective + + diff --git a/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_ONNX.md b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_ONNX.md new file mode 100644 index 00000000000000..37bfd58f87b01c --- /dev/null +++ b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_ONNX.md @@ -0,0 +1,59 @@ +# Converting an ONNX Model {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_ONNX} + +@sphinxdirective + +.. meta:: + :description: Learn how to convert a model from the + ONNX format to the OpenVINO Model. + +Introduction to ONNX +#################### + +`ONNX `__ is a representation format for deep learning models that enables AI developers to easily transfer models between different frameworks. + +.. note:: An ONNX model file can be loaded by ``openvino.Core.read_model`` or ``openvino.Core.compile_model`` methods by OpenVINO runtime API without the need to prepare an OpenVINO IR first. Refer to the :doc:`inference example ` for more details. Using ``openvino.convert_model`` is still recommended if the model load latency is important for the inference application. + +Converting an ONNX Model +######################## + +This page provides instructions on model conversion from the ONNX format to the OpenVINO IR format. + +For model conversion, you need an ONNX model either directly downloaded from a public repository or converted from any framework that supports exporting to the ONNX format. + +To convert an ONNX model, run model conversion with the path to the input model ``.onnx`` file: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + + import openvino as ov + ov.convert_model('your_model_file.onnx') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc your_model_file.onnx + +External Data Files +################### + +ONNX models may consist of multiple files when the model size exceeds 2GB allowed by Protobuf. According to this `ONNX article `__, instead of a single file, the model is represented as one file with ``.onnx`` extension and multiple separate files with external data. These data files are located in the same directory as the main ``.onnx`` file or in another directory. + +OpenVINO model conversion API supports ONNX models with external data representation. In this case, you only need to pass the main file with ``.onnx`` extension as ``ovc`` or ``openvino.convert_model`` parameter. The other files will be found and loaded automatically during the model conversion. The resulting OpenVINO model, represented as an IR in the filesystem, will have the usual structure with a single ``.xml`` file and a single ``.bin`` file, where all the original model weights are copied and packed together. + +Supported ONNX Layers +##################### + +For the list of supported standard layers, refer to the :doc:`Supported Operations ` page. + +Additional Resources +#################### + +Check out more examples of model conversion in :doc:`interactive Python tutorials `. + +@endsphinxdirective diff --git a/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_Paddle.md b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_Paddle.md new file mode 100644 index 00000000000000..ad2aa8798738ff --- /dev/null +++ b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_Paddle.md @@ -0,0 +1,201 @@ +# Converting a PaddlePaddle Model {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_Paddle} + +@sphinxdirective + +.. meta:: + :description: Learn how to convert a model from the + PaddlePaddle format to the OpenVINO Model. + +This page provides general instructions on how to convert a model from the PaddlePaddle format to the OpenVINO IR format using OpenVINO model conversion API. The instructions are different depending on the PaddlePaddle model format. + +.. note:: PaddlePaddle model serialized in a file can be loaded by ``openvino.Core.read_model`` or ``openvino.Core.compile_model`` methods by OpenVINO runtime API without preparing OpenVINO IR first. Refer to the :doc:`inference example ` for more details. Using ``openvino.convert_model`` is still recommended if model load latency matters for the inference application. + +Converting PaddlePaddle Model Files +################################### + +PaddlePaddle inference model includes ``.pdmodel`` (storing model structure) and ``.pdiparams`` (storing model weight). For details on how to export a PaddlePaddle inference model, refer to the `Exporting PaddlePaddle Inference Model `__ Chinese guide. + +To convert a PaddlePaddle model, use the ``ovc`` or ``openvino.convert_model`` and specify the path to the input ``.pdmodel`` model file: + + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + + import openvino as ov + ov.convert_model('your_model_file.pdmodel') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc your_model_file.pdmodel + +**For example**, this command converts a yolo v3 PaddlePaddle model to OpenVINO IR model: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + + import openvino as ov + ov.convert_model('yolov3.pdmodel') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc yolov3.pdmodel + +Converting PaddlePaddle Python Model +#################################### + +Model conversion API supports passing PaddlePaddle models directly in Python without saving them to files in the user code. + +Following PaddlePaddle model object types are supported: + +* ``paddle.hapi.model.Model`` +* ``paddle.fluid.dygraph.layers.Layer`` +* ``paddle.fluid.executor.Executor`` + +Some PaddlePaddle models may require setting ``example_input`` or ``output`` for conversion as shown in the examples below: + +* Example of converting ``paddle.hapi.model.Model`` format model: + + .. code-block:: py + :force: + + import paddle + import openvino as ov + + # create a paddle.hapi.model.Model format model + resnet50 = paddle.vision.models.resnet50() + x = paddle.static.InputSpec([1,3,224,224], 'float32', 'x') + y = paddle.static.InputSpec([1,1000], 'float32', 'y') + + model = paddle.Model(resnet50, x, y) + + # convert to OpenVINO IR format + ov_model = ov.convert_model(model) + + ov.save_model(ov_model, "resnet50.xml") + +* Example of converting ``paddle.fluid.dygraph.layers.Layer`` format model: + + ``example_input`` is required while ``output`` is optional, which accept the following formats: + + ``list`` with tensor (``paddle.Tensor``) or InputSpec (``paddle.static.input.InputSpec``) + + .. code-block:: py + :force: + + import paddle + import openvino as ov + + # create a paddle.fluid.dygraph.layers.Layer format model + model = paddle.vision.models.resnet50() + x = paddle.rand([1,3,224,224]) + + # convert to OpenVINO IR format + ov_model = ov.convert_model(model, example_input=[x]) + +* Example of converting ``paddle.fluid.executor.Executor`` format model: + + ``example_input`` and ``output`` are required, which accept the following formats: + + ``list`` or ``tuple`` with variable(``paddle.static.data``) + + .. code-block:: py + :force: + + import paddle + import openvino as ov + + paddle.enable_static() + + # create a paddle.fluid.executor.Executor format model + x = paddle.static.data(name="x", shape=[1,3,224]) + y = paddle.static.data(name="y", shape=[1,3,224]) + relu = paddle.nn.ReLU() + sigmoid = paddle.nn.Sigmoid() + y = sigmoid(relu(x)) + + exe = paddle.static.Executor(paddle.CPUPlace()) + exe.run(paddle.static.default_startup_program()) + + # convert to OpenVINO IR format + ov_model = ov.convert_model(exe, example_input=[x], output=[y]) + +Supported PaddlePaddle Layers +############################# + +For the list of supported standard layers, refer to the :doc:`Supported Operations ` page. + +Officially Supported PaddlePaddle Models +######################################## + +The following PaddlePaddle models have been officially validated and confirmed to work (as of OpenVINO 2022.1): + +.. list-table:: + :widths: 20 25 55 + :header-rows: 1 + + * - Model Name + - Model Type + - Description + * - ppocr-det + - optical character recognition + - Models are exported from `PaddleOCR `_. Refer to `READ.md `_. + * - ppocr-rec + - optical character recognition + - Models are exported from `PaddleOCR `_. Refer to `READ.md `_. + * - ResNet-50 + - classification + - Models are exported from `PaddleClas `_. Refer to `getting_started_en.md `_. + * - MobileNet v2 + - classification + - Models are exported from `PaddleClas `_. Refer to `getting_started_en.md `_. + * - MobileNet v3 + - classification + - Models are exported from `PaddleClas `_. Refer to `getting_started_en.md `_. + * - BiSeNet v2 + - semantic segmentation + - Models are exported from `PaddleSeg `_. Refer to `model_export.md `_. + * - DeepLab v3 plus + - semantic segmentation + - Models are exported from `PaddleSeg `_. Refer to `model_export.md `_. + * - Fast-SCNN + - semantic segmentation + - Models are exported from `PaddleSeg `_. Refer to `model_export.md `_. + * - OCRNET + - semantic segmentation + - Models are exported from `PaddleSeg `_. Refer to `model_export.md `_. + * - Yolo v3 + - detection + - Models are exported from `PaddleDetection `_. Refer to `EXPORT_MODEL.md `_. + * - ppyolo + - detection + - Models are exported from `PaddleDetection `_. Refer to `EXPORT_MODEL.md `_. + * - MobileNetv3-SSD + - detection + - Models are exported from `PaddleDetection `_. Refer to `EXPORT_MODEL.md `_. + * - U-Net + - semantic segmentation + - Models are exported from `PaddleSeg `_. Refer to `model_export.md `_. + * - BERT + - language representation + - Models are exported from `PaddleNLP `_. Refer to `README.md `_. + +Additional Resources +#################### + +Check out more examples of model conversion in :doc:`interactive Python tutorials `. + +@endsphinxdirective diff --git a/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_PyTorch.md b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_PyTorch.md new file mode 100644 index 00000000000000..cc6126cffd6043 --- /dev/null +++ b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_PyTorch.md @@ -0,0 +1,155 @@ +# Converting a PyTorch Model {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_PyTorch} + +@sphinxdirective + +.. meta:: + :description: Learn how to convert a model from the + PyTorch format to the OpenVINO Model. + +This page provides instructions on how to convert a model from the PyTorch format to the OpenVINO Model using the ``openvino.convert_model`` function. + +.. note:: + + In the examples below the ``openvino.save_model`` function is not used because there are no PyTorch-specific details regarding the usage of this function. In all examples, the converted OpenVINO model can be saved to IR by calling ``ov.save_model(ov_model, 'model.xml')`` as usual. + +Here is the simplest example of PyTorch model conversion using a model from ``torchvision``: + +.. code-block:: py + :force: + + import torchvision + import torch + import openvino as ov + + model = torchvision.models.resnet50(pretrained=True) + ov_model = ov.convert_model(model) + +``openvino.convert_model`` function supports the following PyTorch model object types: + +* ``torch.nn.Module`` derived classes +* ``torch.jit.ScriptModule`` +* ``torch.jit.ScriptFunction`` + +When passing a ``torch.nn.Module`` derived class object as an input model, converting PyTorch models often requires the ``example_input`` parameter to be specified in the ``openvino.convert_model`` function call. Internally it triggers the model tracing during the model conversion process, using the capabilities of the ``torch.jit.trace`` function. + +The use of ``example_input`` can lead to a better quality of the resulting OpenVINO model in terms of correctness and performance compared to converting the same original model without specifying ``example_input``. While the necessity of ``example_input`` depends on the implementation details of a specific PyTorch model, it is recommended to always set the ``example_input`` parameter when it is available. + +The value for the ``example_input`` parameter can be easily derived from knowing the input tensor's element type and shape. While it may not be suitable for all cases, random numbers can frequently serve this purpose effectively: + +.. code-block:: py + :force: + + import torchvision + import torch + import openvino as ov + + model = torchvision.models.resnet50(pretrained=True) + ov_model = ov.convert_model(model, example_input=example_input=torch.rand(1, 3, 224, 224)) + +In practice, the code to evaluate or test the PyTorch model is usually provided with the model itself and can be used to generate a proper ``example_input`` value. A modified example of using ``resnet50`` model from ``torchvision`` is presented below. It demonstrates how to switch inference in the existing PyTorch application to OpenVINO and how to get value for ``example_input``: + +.. code-block:: py + :force: + + from torchvision.io import read_image + from torchvision.models import resnet50, ResNet50_Weights + import requests, PIL, io, torch + + # Get a picture of a cat from the web: + img = PIL.Image.open(io.BytesIO(requests.get("https://placekitten.com/200/300").content)) + + # Torchvision model and input data preparation from https://pytorch.org/vision/stable/models.html + + weights = ResNet50_Weights.DEFAULT + model = resnet50(weights=weights) + model.eval() + preprocess = weights.transforms() + batch = preprocess(img).unsqueeze(0) + + # PyTorch model inference and post-processing + + prediction = model(batch).squeeze(0).softmax(0) + class_id = prediction.argmax().item() + score = prediction[class_id].item() + category_name = weights.meta["categories"][class_id] + print(f"{category_name}: {100 * score:.1f}% (with PyTorch)") + + # OpenVINO model preparation and inference with the same post-processing + + import openvino as ov + compiled_model = ov.compile_model(ov.convert_model(model, example_input=batch)) + + prediction = torch.tensor(compiled_model(batch)[0]).squeeze(0).softmax(0) + class_id = prediction.argmax().item() + score = prediction[class_id].item() + category_name = weights.meta["categories"][class_id] + print(f"{category_name}: {100 * score:.1f}% (with OpenVINO)") + +Check out more examples in :doc:`interactive Python tutorials `. + +Supported Input Parameter Types +############################### + +If the model has a single input, the following input types are supported in ``example_input``: + +* ``openvino.runtime.Tensor`` +* ``torch.Tensor`` +* ``tuple`` or any nested combination of tuples + +If a model has multiple inputs, the input values are combined in a ``list``, a ``tuple``, or a ``dict``: + +* values in a ``list`` or ``tuple`` should be passed in the same order as the original model specifies, +* ``dict`` has keys from the names of the original model argument names. + +Enclosing in ``list``, ``tuple`` or ``dict`` can be used for a single input as well as for multiple inputs. + +If a model has a single input parameter and the type of this input is a ``tuple``, it should be always passed enclosed into an extra ``list``, ``tuple`` or ``dict`` as in the case of multiple inputs. It is required to eliminate ambiguity between ``model((a, b))`` and ``model(a, b)`` in this case. + +Non-tensor Data Types +##################### + +When a non-tensor data type, such as a ``tuple`` or ``dict``, appears in a model input or output, it is flattened. The flattening means that each element within the ``tuple`` will be represented as a separate input or output. The same is true for ``dict`` values, where the keys of the ``dict`` are used to form a model input/output name. The original non-tensor input or output is replaced by one or multiple new inputs or outputs resulting from this flattening process. This flattening procedure is applied recursively in the case of nested ``tuples`` and ``dicts`` until it reaches the assumption that the most nested data type is a tensor. + +For example, if the original model is called with ``example_input=(a, (b, c, (d, e)))``, where ``a``, ``b``, ...``e`` are tensors, it means that the original model has two inputs. The first is a tensor ``a``, and the second is a tuple ``(b, c, (d, e))``, containing two tensors ``b`` and ``c`` and a nested tuple ``(d, e)``. Then the resulting OpenVINO model will have signature ``(a, b, c, d, e)``, which means it will have five inputs, all of type tensor, instead of two in the original model. + +Flattening of a ``dict`` is supported for outputs only. If your model has an input of type ``dict``, you will need to decompose the ``dict`` to one or multiple tensor inputs by modifying the original model signature or making a wrapper model on top of the original model. This approach hides the dictionary from the model signature and allows it to be processed inside the model successfully. + +.. note:: + + An important consequence of flattening is that only ``tuple`` and ``dict`` with a fixed number of elements and key values are supported. The structure of such inputs should be fully described in the ``example_input`` parameter of ``convert_model``. The flattening on outputs should be reproduced with the given ``example_input`` and cannot be changed once the conversion is done. + +Check out more examples of model conversion with non-tensor data types in the following tutorials: + +* `Video Subtitle Generation using Whisper and OpenVINO™ `__ +* `Visual Question Answering and Image Captioning using BLIP and OpenVINO `__ + + +Exporting a PyTorch Model to ONNX Format +######################################## + +An alternative method of converting PyTorch models is exporting a PyTorch model to ONNX with ``torch.onnx.export`` first and then converting the resulting ``.onnx`` file to OpenVINO Model with ``openvino.convert_model``. It can be considered as a backup solution if a model cannot be converted directly from PyTorch to OpenVINO as described in the above chapters. Converting through ONNX can be more expensive in terms of code, conversion time, and allocated memory. + +1. Refer to the `Exporting PyTorch models to ONNX format `__ guide to learn how to export models from PyTorch to ONNX. +2. Follow :doc:`Convert the ONNX model ` chapter to produce OpenVINO model. + +Here is an illustration of using these two steps together: + +.. code-block:: py + :force: + + import torchvision + import torch + import openvino as ov + + model = torchvision.models.resnet50(pretrained=True) + # 1. Export to ONNX + torch.onnx.export(model, (torch.rand(1, 3, 224, 224), ), 'model.onnx') + # 2. Convert to OpenVINO + ov_model = ov.convert_model('model.onnx') + +.. note:: + + As of version 1.8.1, not all PyTorch operations can be exported to ONNX opset 9 which is used by default. + It is recommended to export models to opset 11 or higher when export to default opset 9 is not working. In that case, use ``opset_version`` option of the ``torch.onnx.export``. For more information about ONNX opset, refer to the `Operator Schemas `__ page. + +@endsphinxdirective diff --git a/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md new file mode 100644 index 00000000000000..d2c8f1418c0815 --- /dev/null +++ b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md @@ -0,0 +1,331 @@ +# Converting a TensorFlow Model {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_TensorFlow} + +@sphinxdirective + +.. meta:: + :description: Learn how to convert a model from a + TensorFlow format to the OpenVINO Model. + +This page provides general instructions on how to run model conversion from a TensorFlow format to the OpenVINO IR format. The instructions are different depending on whether your model was created with TensorFlow v1.X or TensorFlow v2.X. + +.. note:: TensorFlow models can be loaded by `openvino.Core.read_model` or `openvino.Core.compile_model` methods by OpenVINO runtime API without preparing OpenVINO IR first. Refer to the :doc:`inference example ` for more details. Using ``openvino.convert_model`` is still recommended if model load latency matters for the inference application. + +.. note:: Examples below that convert TensorFlow models from a file, do not require any version of TensorFlow to be installed on the system, except in cases when the `tensorflow` module is imported explicitly. + +Converting TensorFlow 2 Models +############################## + +TensorFlow 2.X officially supports two model formats: SavedModel and Keras H5 (or HDF5). +Below are the instructions on how to convert each of them. + +SavedModel Format ++++++++++++++++++ + +A model in the SavedModel format consists of a directory with a ``saved_model.pb`` file and two subfolders: ``variables`` and ``assets`` inside. +To convert a model, run conversion with the directory as the model argument: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + ov_model = ov.convert_model('path_to_saved_model_dir') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc path_to_saved_model_dir + +Keras H5 Format ++++++++++++++++ + +If you have a model in the HDF5 format, load the model using TensorFlow 2 and serialize it in the +SavedModel format. Here is an example of how to do it: + +.. code-block:: py + :force: + + import tensorflow as tf + model = tf.keras.models.load_model('model.h5') + tf.saved_model.save(model,'model') + +Converting a Keras H5 model with a custom layer to the SavedModel format requires special considerations. +For example, the model with a custom layer ``CustomLayer`` from ``custom_layer.py`` is converted as follows: + +.. code-block:: py + :force: + + import tensorflow as tf + from custom_layer import CustomLayer + model = tf.keras.models.load_model('model.h5', custom_objects={'CustomLayer': CustomLayer}) + tf.saved_model.save(model,'model') + +Then follow the above instructions for the SavedModel format. + +.. note:: + + Avoid using any workarounds or hacks to resave TensorFlow 2 models into TensorFlow 1 formats. + +Converting TensorFlow 1 Models +############################### + +Converting Frozen Model Format ++++++++++++++++++++++++++++++++ + +To convert a TensorFlow model, run model conversion with the path to the input model ``*.pb*`` file: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + + import openvino as ov + ov_model = ov.convert_model('your_model_file.pb') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc your_model_file.pb + + +Converting Non-Frozen Model Formats ++++++++++++++++++++++++++++++++++++ + +There are three ways to store non-frozen TensorFlow models. + +1. **SavedModel format**. In this case, a model consists of a special directory with a ``.pb`` file +and several subfolders: ``variables``, ``assets``, and ``assets.extra``. For more information about the SavedModel directory, refer to the `README `__ file in the TensorFlow repository. +To convert such TensorFlow model, run the conversion similarly to other model formats and pass a path to the directory as a model argument: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + + import openvino as ov + ov_model = ov.convert_model('path_to_saved_model_dir') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc path_to_saved_model_dir + +2. **Checkpoint**. In this case, a model consists of two files: ``inference_graph.pb`` (or ``inference_graph.pbtxt``) and ``checkpoint_file.ckpt``. +If you do not have an inference graph file, refer to the `Freezing Custom Models in Python <#Freezing-Custom-Models-in-Python>`__ section. +To convert the model with the inference graph in ``.pb`` format, provide paths to both files as an argument for ``ovc`` or ``openvino.convert_model``: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + + import openvino as ov + ov_model = ov.convert_model(['path_to_inference_graph.pb', 'path_to_checkpoint_file.ckpt']) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc path_to_inference_graph.pb path_to_checkpoint_file.ckpt + +To convert the model with the inference graph in the ``.pbtxt`` format, specify the path to ``.pbtxt`` file instead of the ``.pb`` file. The conversion API automatically detects the format of the provided file, there is no need to specify the model file format explicitly when calling ``ovc`` or ``openvino.convert_model`` in all examples in this document. + +3. **MetaGraph**. In this case, a model consists of three or four files stored in the same directory: ``model_name.meta``, ``model_name.index``, +``model_name.data-00000-of-00001`` (the numbers may vary), and ``checkpoint`` (optional). +To convert such a TensorFlow model, run the conversion providing a path to `.meta` file as an argument: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + + import openvino as ov + ov_model = ov.convert_model('path_to_meta_graph.meta') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc path_to_meta_graph.meta + + +Freezing Custom Models in Python +++++++++++++++++++++++++++++++++ + +When a model is defined in Python code, you must create an inference graph file. Graphs are usually built in a form +that allows model training. That means all trainable parameters are represented as variables in the graph. +To be able to use such a graph with the model conversion API, it should be frozen first before passing to the ``openvino.convert_model`` function: + +.. code-block:: py + :force: + + import tensorflow as tf + from tensorflow.python.framework import graph_io + frozen = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["name_of_the_output_node"]) + + import openvino as ov + ov_model = ov.convert_model(frozen) + +Where: + +* ``sess`` is the instance of the TensorFlow Session object where the network topology is defined. +* ``["name_of_the_output_node"]`` is the list of output node names in the graph; ``frozen`` graph will include only those nodes from the original ``sess.graph_def`` that are directly or indirectly used to compute given output nodes. The ``'name_of_the_output_node'`` is an example of a possible output node name. You should derive the names based on your own graph. + +Converting TensorFlow Models from Memory Using Python API +############################################################ + +Model conversion API supports passing TensorFlow/TensorFlow2 models directly from memory. + +* ``tf.keras.Model`` + + .. code-block:: py + :force: + + import openvino as ov + model = tf.keras.applications.ResNet50(weights="imagenet") + ov_model = ov.convert_model(model) + +* ``tf.keras.layers.Layer``. Requires saving model to TensorFlow ``saved_model`` file format and then loading to ``openvino.convert_model``. Saving to the file and then restoring is required due to a known bug in ``openvino.convert_model`` that ignores model signature. + + .. code-block:: py + :force: + + import tensorflow_hub as hub + import openvino as ov + + model = hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/classification/5") + model.build([None, 224, 224, 3]) + model.save('mobilenet_v1_100_224') # use a temporary directory + + ov_model = ov.convert_model('mobilenet_v1_100_224') + +* ``tf.Module``. Requires setting shapes in ``input`` parameter. + + .. code-block:: py + :force: + + import tensorflow as tf + import openvino as ov + + class MyModule(tf.Module): + def __init__(self, name=None): + super().__init__(name=name) + self.constant1 = tf.constant(5.0, name="var1") + self.constant2 = tf.constant(1.0, name="var2") + def __call__(self, x): + return self.constant1 * x + self.constant2 + + model = MyModule(name="simple_module") + ov_model = ov.convert_model(model, input=[-1]) + +.. note:: There is a known bug in ``openvino.convert_model`` on using ``tf.Variable`` nodes in the model graph. The results of the conversion of such models are unpredictable. It is recommended to save a model with ``tf.Variable`` into TensorFlow Saved Model format and load it with `openvino.convert_model`. + +* ``tf.compat.v1.Graph`` + + .. code-block:: py + :force: + + with tf.compat.v1.Session() as sess: + inp1 = tf.compat.v1.placeholder(tf.float32, [100], 'Input1') + inp2 = tf.compat.v1.placeholder(tf.float32, [100], 'Input2') + output = tf.nn.relu(inp1 + inp2, name='Relu') + tf.compat.v1.global_variables_initializer() + model = sess.graph + + import openvino as ov + ov_model = ov.convert_model(model) + +* ``tf.compat.v1.GraphDef`` + + .. code-block:: py + :force: + + with tf.compat.v1.Session() as sess: + inp1 = tf.compat.v1.placeholder(tf.float32, [100], 'Input1') + inp2 = tf.compat.v1.placeholder(tf.float32, [100], 'Input2') + output = tf.nn.relu(inp1 + inp2, name='Relu') + tf.compat.v1.global_variables_initializer() + model = sess.graph_def + + import openvino as ov + ov_model = ov.convert_model(model) + +* ``tf.function`` + + .. code-block:: py + :force: + + @tf.function( + input_signature=[tf.TensorSpec(shape=[1, 2, 3], dtype=tf.float32), + tf.TensorSpec(shape=[1, 2, 3], dtype=tf.float32)]) + def func(x, y): + return tf.nn.sigmoid(tf.nn.relu(x + y)) + + import openvino as ov + ov_model = ov.convert_model(func) + +* ``tf.compat.v1.session`` + + .. code-block:: py + :force: + + with tf.compat.v1.Session() as sess: + inp1 = tf.compat.v1.placeholder(tf.float32, [100], 'Input1') + inp2 = tf.compat.v1.placeholder(tf.float32, [100], 'Input2') + output = tf.nn.relu(inp1 + inp2, name='Relu') + tf.compat.v1.global_variables_initializer() + + import openvino as ov + ov_model = ov.convert_model(sess) + +* ``tf.train.checkpoint`` + + .. code-block:: py + :force: + + model = tf.keras.Model(...) + checkpoint = tf.train.Checkpoint(model) + save_path = checkpoint.save(save_directory) + # ... + checkpoint.restore(save_path) + + import openvino as ov + ov_model = ov.convert_model(checkpoint) + +Supported TensorFlow and TensorFlow 2 Keras Layers +################################################## + +For the list of supported standard layers, refer to the :doc:`Supported Operations ` page. + +Summary +####### + +In this document, you learned: + +* Basic information about how the model conversion API works with TensorFlow models. +* Which TensorFlow models are supported. +* How to freeze a TensorFlow model. + +@endsphinxdirective + + diff --git a/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow_Lite.md b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow_Lite.md new file mode 100644 index 00000000000000..e25795c95a4b1f --- /dev/null +++ b/docs/OV_Converter_UG/prepare_model/convert_model/Convert_Model_From_TensorFlow_Lite.md @@ -0,0 +1,42 @@ +# Converting a TensorFlow Lite Model {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_TensorFlow_Lite} + +@sphinxdirective + +.. meta:: + :description: Learn how to convert a model from a + TensorFlow Lite format to the OpenVINO Model. + + +To convert an ONNX model, run model conversion with the path to the ``.tflite`` model file: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + + import openvino as ov + ov.convert_model('your_model_file.tflite') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc your_model_file.tflite + +.. note:: TensorFlow Lite model file can be loaded by ``openvino.Core.read_model`` or ``openvino.Core.compile_model`` methods by OpenVINO runtime API without preparing OpenVINO IR first. Refer to the :doc:`inference example ` for more details. Using ``openvino.convert_model`` is still recommended if model load latency matters for the inference application. + +Supported TensorFlow Lite Layers +################################### + +For the list of supported standard layers, refer to the :doc:`Supported Operations ` page. + +Supported TensorFlow Lite Models +################################### + +More than eighty percent of public TensorFlow Lite models are supported from open sources `TensorFlow Hub `__ and `MediaPipe `__. +Unsupported models usually have custom TensorFlow Lite operations. + +@endsphinxdirective diff --git a/docs/OV_Converter_UG/prepare_model/convert_model/Converting_Model.md b/docs/OV_Converter_UG/prepare_model/convert_model/Converting_Model.md new file mode 100644 index 00000000000000..24fa33c17f4a94 --- /dev/null +++ b/docs/OV_Converter_UG/prepare_model/convert_model/Converting_Model.md @@ -0,0 +1,141 @@ +# Setting Input Shapes {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_Converting_Model} + +With model conversion API you can increase your model's efficiency by providing an additional shape definition using the ``input`` parameter. + +@sphinxdirective + +.. meta:: + :description: Learn how to increase the efficiency of a model by providing an additional shape definition with the ``input`` parameter of ``openvino.convert_model`` and ``ovc``. + +.. _when_to_specify_input_shapes: + +Specifying Shapes in the ``input`` Parameter +##################################################### + +``openvino.convert_model`` supports conversion of models with dynamic input shapes that contain undefined dimensions. +However, if the shape of data is not going to change from one inference request to another, +it is recommended to set up static shapes (when all dimensions are fully defined) for the inputs. +Doing it at this stage, instead of during inference in runtime, can be beneficial in terms of performance and memory consumption. +To set up static shapes, model conversion API provides the ``input`` parameter. +For more information on changing input shapes in runtime, refer to the :doc:`Changing input shapes ` guide. +To learn more about dynamic shapes in runtime, refer to the :doc:`Dynamic Shapes ` guide. + +The OpenVINO Runtime API may present certain limitations in inferring models with undefined dimensions on some hardware. See the :doc:`Features support matrix ` for reference. +In this case, the ``input`` parameter and the :doc:`reshape method ` can help to resolve undefined dimensions. + +For example, run model conversion for the TensorFlow MobileNet model with the single input +and specify the input shape of ``[2,300,300,3]``: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + ov_model = ov.convert_model("MobileNet.pb", input=[2, 300, 300, 3]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc MobileNet.pb --input [2,300,300,3] + +If a model has multiple inputs, the input shape should be specified in ``input`` parameter as a list. In ``ovc``, this is a command separate list, and in ``openvino.convert_model`` this is a Python list or tuple with number of elements matching the number of inputs in the model. Use input names from the original model to define the mapping between inputs and shapes specified. +The following example demonstrates the conversion of the ONNX OCR model with a pair of inputs ``data`` and ``seq_len`` +and specifies shapes ``[3,150,200,1]`` and ``[3]`` for them respectively: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + ov_model = ov.convert_model("ocr.onnx", input=[("data", [3,150,200,1]), ("seq_len", [3])]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc ocr.onnx --input data[3,150,200,1],seq_len[3] + +If the order of inputs is defined in the input model and the order is known for the user, names could be omitted. In this case, it is important to specify shapes in the same order of input model inputs: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + ov_model = ov.convert_model("ocr.onnx", input=([3,150,200,1], [3])) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc ocr.onnx --input [3,150,200,1],[3] + +Whether the model has a specified order of inputs depends on the original framework. Usually, it is convenient to set shapes without specifying the names of the parameters in the case of PyTorch model conversion because a PyTorch model is considered as a callable that usually accepts positional parameters. On the other hand, names of inputs are convenient when converting models from model files, because naming of inputs is a good practice for many frameworks that serialize models to files. + +The ``input`` parameter allows overriding original input shapes if it is supported by the model topology. +Shapes with dynamic dimensions in the original model can be replaced with static shapes for the converted model, and vice versa. +The dynamic dimension can be marked in model conversion API parameter as ``-1`` or ``?`` when using ``ovc``. +For example, launch model conversion for the ONNX OCR model and specify dynamic batch dimension for inputs: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + ov_model = ov.convert_model("ocr.onnx", input=[("data", [-1, 150, 200, 1]), ("seq_len", [-1])]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc ocr.onnx --input "data[?,150,200,1],seq_len[?]" + +To optimize memory consumption for models with undefined dimensions in run-time, model conversion API provides the capability to define boundaries of dimensions. +The boundaries of undefined dimension can be specified with ellipsis in the command line or with ``openvino.Dimension`` class in Python. +For example, launch model conversion for the ONNX OCR model and specify a boundary for the batch dimension 1..3, which means that the input tensor will have batch dimension minimum 1 and maximum 3 in inference: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + batch_dim = ov.Dimension(1, 3) + ov_model = ov.convert_model("ocr.onnx", input=[("data", [batch_dim, 150, 200, 1]), ("seq_len", [batch_dim])]) + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc ocr.onnx --input data[1..3,150,200,1],seq_len[1..3] + +In practice, not every model is designed in a way that allows change of input shapes. An attempt to change the shape for such models may lead to an exception during model conversion, later in model inference, or even to wrong results of inference without explicit exception raised. A knowledge about model topology is required to set shapes appropriately. +For more information about shape follow the :doc:`inference troubleshooting ` +and :ref:`ways to relax shape inference flow ` guides. + +@endsphinxdirective diff --git a/docs/OV_Converter_UG/prepare_model/convert_model/MO_OVC_transition.md b/docs/OV_Converter_UG/prepare_model/convert_model/MO_OVC_transition.md new file mode 100644 index 00000000000000..e550d515b753ad --- /dev/null +++ b/docs/OV_Converter_UG/prepare_model/convert_model/MO_OVC_transition.md @@ -0,0 +1,634 @@ +# Transition from Legacy Conversion API {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition} + +@sphinxdirective + +.. meta:: + :description: Transition guide from MO / mo.convert_model() to OVC / ov.convert_model(). + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide + +In 2023.1 OpenVINO release a new OVC (OpenVINO Model Converter) tool has been introduced with the corresponding Python API: ``openvino.convert_model`` method. ``ovc`` and ``openvino.convert_model`` represent +a lightweight alternative of ``mo`` and ``openvino.tools.mo.convert_model`` which are considered legacy API now. In this article, all the differences between ``mo`` and ``ovc`` are summarized and the transition guide from the legacy API to the new API is provided. + +Parameters Comparison +##################### + +The comparison of parameters between ov.convert_model() / OVC and mo.convert_model() / MO. + +.. list-table:: + :widths: 20 25 55 + :header-rows: 1 + + * - mo.convert_model() / MO + - ov.convert_model() / OVC + - Differences description + * - input_model + - input_model + - Along with model object or path to input model ov.convert_model() accepts list of model parts, for example, the path to TensorFlow weights plus the path to TensorFlow checkpoint. OVC tool accepts an unnamed input model. + * - output_dir + - output_model + - output_model in OVC tool sets both output model name and output directory. + * - model_name + - output_model + - output_model in OVC tool sets both output model name and output directory. + * - input + - input + - ov.convert_model() accepts tuples for setting multiple parameters. OVC tool 'input' does not have type setting and freezing functionality. ov.convert_model() does not allow input cut. + * - output + - output + - ov.convert_model() does not allow output cut. + * - input_shape + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by ``input`` parameter. + * - example_input + - example_input + - No differences. + * - batch + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by model reshape functionality. See details below. + * - mean_values + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - scale_values + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - scale + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - reverse_input_channels + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - source_layout + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - target_layout + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - layout + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - compress_to_fp16 + - compress_to_fp16 + - OVC provides 'compress_to_fp16' for command line tool only, as compression is performed during saving a model to IR (Intermediate Representation). + * - extensions + - extension + - No differences. + * - transform + - N/A + - Not available in ov.convert_model() / OVC. Can be replaced by functionality from ``PrePostProcessor``. See details below. + * - transformations_config + - N/A + - Not available in ov.convert_model() / OVC. + * - static_shape + - N/A + - Not available in ov.convert_model() / OVC. + * - freeze_placeholder_with_value + - N/A + - Not available in ov.convert_model() / OVC. + * - use_legacy_frontend + - N/A + - Not available in ov.convert_model() / OVC. + * - use_legacy_frontend + - N/A + - Not available in ov.convert_model() / OVC. + * - silent + - verbose + - OVC / ov.convert_model provides 'verbose' parameter instead of 'silent' for printing of detailed conversion information if 'verbose' is set to True. + * - log_level + - N/A + - Not available in ov.convert_model() / OVC. + * - version + - version + - N/A + * - progress + - N/A + - Not available in ov.convert_model() / OVC. + * - stream_output + - N/A + - Not available in ov.convert_model() / OVC. + * - share_weights + - share_weights + - No differences. + * - framework + - N/A + - Not available in ov.convert_model() / OVC. + * - help / -h + - help / -h + - OVC provides help parameter only in command line tool. + * - example_output + - output + - OVC / ov.convert_model 'output' parameter includes capabilities of MO 'example_output' parameter. + * - input_model_is_text + - N/A + - Not available in ov.convert_model() / OVC. + * - input_checkpoint + - input_model + - All supported model formats can be passed to 'input_model'. + * - input_meta_graph + - input_model + - All supported model formats can be passed to 'input_model'. + * - saved_model_dir + - input_model + - All supported model formats can be passed to 'input_model'. + * - saved_model_tags + - N/A + - Not available in ov.convert_model() / OVC. + * - tensorflow_custom_operations_config_update + - N/A + - Not available in ov.convert_model() / OVC. + * - tensorflow_object_detection_api_pipeline_config + - N/A + - Not available in ov.convert_model() / OVC. + * - tensorboard_logdir + - N/A + - Not available in ov.convert_model() / OVC. + * - tensorflow_custom_layer_libraries + - N/A + - Not available in ov.convert_model() / OVC. + * - input_symbol + - N/A + - Not available in ov.convert_model() / OVC. + * - nd_prefix_name + - N/A + - Not available in ov.convert_model() / OVC. + * - pretrained_model_name + - N/A + - Not available in ov.convert_model() / OVC. + * - save_params_from_nd + - N/A + - Not available in ov.convert_model() / OVC. + * - legacy_mxnet_model + - N/A + - Not available in ov.convert_model() / OVC. + * - enable_ssd_gluoncv + - N/A + - Not available in ov.convert_model() / OVC. + * - input_proto + - N/A + - Not available in ov.convert_model() / OVC. + * - caffe_parser_path + - N/A + - Not available in ov.convert_model() / OVC. + * - k + - N/A + - Not available in ov.convert_model() / OVC. + * - disable_omitting_optional + - N/A + - Not available in ov.convert_model() / OVC. + * - enable_flattening_nested_params + - N/A + - Not available in ov.convert_model() / OVC. + * - counts + - N/A + - Not available in ov.convert_model() / OVC. + * - remove_output_softmax + - N/A + - Not available in ov.convert_model() / OVC. + * - remove_memory + - N/A + - Not available in ov.convert_model() / OVC. + +Transition from Legacy API to New API +############################################################################ + +mo.convert_model() provides a wide range of preprocessing parameters. Most of these parameters have analogs in OVC or can be replaced with functionality from ``ov.PrePostProcessor`` class. +Here is the guide to transition from legacy model preprocessing to new API preprocessing. + + +``input_shape`` +################ + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, input_shape=[[1, 3, 100, 100],[1]]) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model, input=[[1, 3, 100, 100],[1]]) + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --input_shape [1,3,100,100],[1] --output_dir OUTPUT_DIR + + - .. code-block:: sh + :force: + + ovc MODEL_NAME --input [1,3,100,100],[1] --output_model OUTPUT_MODEL + +``batch`` +########## + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, batch=2) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + input_shape = ov_model.inputs[0].partial_shape + input_shape[0] = 2 # batch size + ov_model.reshape(input_shape) + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --batch 2 --output_dir OUTPUT_DIR + + - Not available in OVC tool. Please check Python API. + +``mean_values`` +################ + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, mean_values=[0.5, 0.5, 0.5]) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + prep.input(input_name).preprocess().mean([0.5, 0.5, 0.5]) + ov_model = prep.build() + + There is currently no heuristic for automatic detection of the channel to which mean, scale or reverse channels should be applied. ``Layout`` needs to be explicitly specified with "C" channel. For example "NHWC", "NCHW", "?C??". See also :doc:`Layout API overview `. + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --mean_values [0.5,0.5,0.5] --output_dir OUTPUT_DIR + + - Not available in OVC tool. Please check Python API. + +``scale_values`` +################# + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, scale_values=[255., 255., 255.]) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + prep.input(input_name).preprocess().scale([255., 255., 255.]) + ov_model = prep.build() + + There is currently no heuristic for automatic detection of the channel to which mean, scale or reverse channels should be applied. ``Layout`` needs to be explicitly specified with "C" channel. For example "NHWC", "NCHW", "?C??". See also :doc:`Layout API overview `. + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --scale_values [255,255,255] --output_dir OUTPUT_DIR + + - Not available in OVC tool. Please check Python API. + +``reverse_input_channels`` +########################### + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, reverse_input_channels=True) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + prep.input(input_name).preprocess().reverse_channels() + ov_model = prep.build() + + There is currently no heuristic for automatic detection of the channel to which mean, scale or reverse channels should be applied. ``Layout`` needs to be explicitly specified with "C" channel. For example "NHWC", "NCHW", "?C??". See also :doc:`Layout API overview `. + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --reverse_input_channels --output_dir OUTPUT_DIR + + - Not available in OVC tool. Please check Python API. + +``source_layout`` +################## + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + import openvino as ov + from openvino.tools import mo + + ov_model = mo.convert_model(model, source_layout={input_name: ov.Layout("NHWC")}) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).model().set_layout(ov.Layout("NHWC")) + ov_model = prep.build() + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --source_layout input_name(NHWC) --output_dir OUTPUT_DIR + + - Not available in OVC tool. Please check Python API. + +``target_layout`` +################## + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + import openvino as ov + from openvino.tools import mo + + ov_model = mo.convert_model(model, target_layout={input_name: ov.Layout("NHWC")}) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + ov_model = prep.build() + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --target_layout input_name(NHWC) --output_dir OUTPUT_DIR + + - Not available in OVC tool. Please check Python API. + +``layout`` +########### + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, layout={input_name: mo.LayoutMap("NCHW", "NHWC")}) + + - .. code-block:: py + :force: + + import openvino as ov + + ov_model = ov.convert_model(model) + + prep = ov.preprocess.PrePostProcessor(ov_model) + prep.input(input_name).model().set_layout(ov.Layout("NCHW")) + prep.input(input_name).tensor().set_layout(ov.Layout("NHWC")) + ov_model = prep.build() + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --layout "input_name(NCHW->NHWC)" --output_dir OUTPUT_DIR + + - Not available in OVC tool. Please check Python API. + +``transform`` +############## + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: py + :force: + + from openvino.tools import mo + + ov_model = mo.convert_model(model, transform=[('LowLatency2', {'use_const_initializer': False}), 'Pruning', ('MakeStateful', {'param_res_names': {'input_name': 'output_name'}})]) + + - .. code-block:: py + :force: + + import openvino as ov + from openvino._offline_transformations import apply_low_latency_transformation, apply_pruning_transformation, apply_make_stateful_transformation + + ov_model = ov.convert_model(model) + apply_low_latency_transformation(model, use_const_initializer=False) + apply_pruning_transformation(model) + apply_make_stateful_transformation(model, param_res_names={'input_name': 'output_name'}) + + .. tab-item:: CLI + :sync: cli + + .. list-table:: + :header-rows: 1 + + * - Legacy API + - New API + * - .. code-block:: sh + :force: + + mo --input_model MODEL_NAME --transform LowLatency2[use_const_initializer=False],Pruning,MakeStateful[param_res_names={'input_name':'output_name'}] --output_dir OUTPUT_DIR + + - Not available in OVC tool. Please check Python API. + +Supported Frameworks in MO vs OVC +################################# + +ov.convert_model() and OVC tool support conversion from PyTorch, TF, TF Lite, ONNX, PaddlePaddle. +The following frameworks are supported only in MO and mo.convert_model(): Caffe, MxNet, Kaldi. + +@endsphinxdirective + + diff --git a/docs/OV_Converter_UG/prepare_model/convert_model/supported_model_formats.md b/docs/OV_Converter_UG/prepare_model/convert_model/supported_model_formats.md new file mode 100644 index 00000000000000..75262747e3a9fc --- /dev/null +++ b/docs/OV_Converter_UG/prepare_model/convert_model/supported_model_formats.md @@ -0,0 +1,33 @@ +# Supported Model Formats {#Supported_Model_Formats} + +@sphinxdirective + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_PyTorch + openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_TensorFlow + openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_ONNX + openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_TensorFlow_Lite + openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_From_Paddle + + +**OpenVINO IR (Intermediate Representation)** - the proprietary format of OpenVINO™, benefiting from the full extent of its features. The result of running ``ovc`` CLI tool or ``openvino.save_model`` is OpenVINO IR. All other supported formats can be converted to the IR, refer to the following articles for details on conversion: + +* :doc:`How to convert Pytorch ` +* :doc:`How to convert ONNX ` +* :doc:`How to convert TensorFlow ` +* :doc:`How to convert TensorFlow Lite ` +* :doc:`How to convert PaddlePaddle ` + +To choose the best workflow for your application, read :doc:`Introduction to Model Preparation` + +Refer to the list of all supported conversion options in :doc:`Conversion Parameters ` + +Additional Resources +#################### + +* :doc:`Transition guide from the legacy to new conversion API ` + +@endsphinxdirective diff --git a/docs/get_started/get_started_demos.md b/docs/get_started/get_started_demos.md index 61e6e60c600c7b..0ec61b0f0c3e1f 100644 --- a/docs/get_started/get_started_demos.md +++ b/docs/get_started/get_started_demos.md @@ -3,11 +3,11 @@ @sphinxdirective .. meta:: - :description: Learn the details on the workflow of Intel® Distribution of OpenVINO™ + :description: Learn the details on the workflow of Intel® Distribution of OpenVINO™ toolkit, and how to run inference, using provided code samples. -The guide presents a basic workflow for building and running C++ code samples in OpenVINO. Note that these steps will not work with the Python samples. +The guide presents a basic workflow for building and running C++ code samples in OpenVINO. Note that these steps will not work with the Python samples. To get started, you must first install OpenVINO Runtime, install OpenVINO Development tools, and build the sample applications. See the :ref:`Prerequisites ` section for instructions. @@ -40,8 +40,8 @@ Make sure that you also `install OpenCV `. This guide uses the ``googlenet-v1`` model from the Caffe framework, therefore, when you get to Step 4 of the installation, run the following command to install OpenVINO with the Caffe requirements: @@ -76,11 +76,11 @@ You can use one of the following options to find a model suitable for OpenVINO: - Download public or Intel pre-trained models from :doc:`Open Model Zoo ` using :doc:`Model Downloader tool ` - Download from GitHub, Caffe Zoo, TensorFlow Zoo, etc. - Train your own model with machine learning tools - + This guide uses OpenVINO Model Downloader to get pre-trained models. You can use one of the following commands to find a model with this method: * List the models available in the downloader. - + .. code-block:: sh omz_info_dumper --print_all @@ -115,21 +115,21 @@ This guide used the following model to run the Image Classification Sample: :sync: windows .. code-block:: bat - + omz_downloader --name googlenet-v1 --output_dir %USERPROFILE%\Documents\models .. tab-item:: Linux :sync: linux .. code-block:: sh - + omz_downloader --name googlenet-v1 --output_dir ~/models - + .. tab-item:: macOS :sync: macos - + .. code-block:: sh - + omz_downloader --name googlenet-v1 --output_dir ~/models @@ -139,54 +139,54 @@ This guide used the following model to run the Image Classification Sample: .. tab-item:: Windows :sync: windows - + .. code-block:: bat - + ################|| Downloading models ||################ - + ========== Downloading C:\Users\username\Documents\models\public\googlenet-v1\googlenet-v1.prototxt ... 100%, 9 KB, ? KB/s, 0 seconds passed - + ========== Downloading C:\Users\username\Documents\models\public\googlenet-v1\googlenet-v1.caffemodel ... 100%, 4834 KB, 571 KB/s, 8 seconds passed - + ################|| Post-processing ||################ - + ========== Replacing text in C:\Users\username\Documents\models\public\googlenet-v1\googlenet-v1.prototxt .. tab-item:: Linux :sync: linux - + .. code-block:: sh - + ###############|| Downloading models ||############### - + ========= Downloading /home/username/models/public/googlenet-v1/googlenet-v1.prototxt - + ========= Downloading /home/username/models/public/googlenet-v1/googlenet-v1.caffemodel ... 100%, 4834 KB, 3157 KB/s, 1 seconds passed - + ###############|| Post processing ||############### - + ========= Replacing text in /home/username/models/public/googlenet-v1/googlenet-v1.prototxt ========= - + .. tab-item:: macOS :sync: macos - + .. code-block:: sh - + ###############|| Downloading models ||############### - + ========= Downloading /Users/username/models/public/googlenet-v1/googlenet-v1.prototxt ... 100%, 9 KB, 44058 KB/s, 0 seconds passed - + ========= Downloading /Users/username/models/public/googlenet-v1/googlenet-v1.caffemodel ... 100%, 4834 KB, 4877 KB/s, 0 seconds passed - + ###############|| Post processing ||############### - + ========= Replacing text in /Users/username/models/public/googlenet-v1/googlenet-v1.prototxt ========= - + .. _convert-models-to-intermediate-representation: Step 2: Convert the Model with ``mo`` @@ -210,26 +210,26 @@ Create an ```` directory to contain the model's Intermediate Representat .. tab-item:: Windows :sync: windows - + .. code-block:: bat - + mkdir %USERPROFILE%\Documents\ir .. tab-item:: Linux :sync: linux - + .. code-block:: sh - + mkdir ~/ir - + .. tab-item:: macOS :sync: macos - + .. code-block:: sh - + mkdir ~/ir -To save disk space for your IR file, you can apply :doc:`weights compression to FP16 `. To generate an IR with FP16 weights, run model conversion with the ``--compress_to_fp16`` option. +To save disk space for your IR files, OpenVINO stores weights in FP16 format by default. Generic model conversion script: @@ -246,23 +246,23 @@ The command with most placeholders filled in and FP16 precision: .. tab-item:: Windows :sync: windows - + .. code-block:: bat - + mo --input_model %USERPROFILE%\Documents\models\public\googlenet-v1\googlenet-v1.caffemodel --compress_to_fp16 --output_dir %USERPROFILE%\Documents\ir .. tab-item:: Linux :sync: linux - + .. code-block:: sh - + mo --input_model ~/models/public/googlenet-v1/googlenet-v1.caffemodel --compress_to_fp16 --output_dir ~/ir - + .. tab-item:: macOS :sync: macos - + .. code-block:: sh - + mo --input_model ~/models/public/googlenet-v1/googlenet-v1.caffemodel --compress_to_fp16 --output_dir ~/ir .. _download-media: @@ -290,75 +290,75 @@ To run the **Image Classification** code sample with an input image using the IR .. tab-item:: Windows :sync: windows - + .. code-block:: bat - + \setupvars.bat .. tab-item:: Linux :sync: linux - + .. code-block:: sh - + source /setupvars.sh - + .. tab-item:: macOS :sync: macos - + .. code-block:: sh - + source /setupvars.sh - + 2. Go to the code samples release directory created when you built the samples earlier: .. tab-set:: .. tab-item:: Windows :sync: windows - + .. code-block:: bat - + cd %USERPROFILE%\Documents\Intel\OpenVINO\openvino_samples_build\intel64\Release .. tab-item:: Linux :sync: linux - + .. code-block:: sh - + cd ~/openvino_cpp_samples_build/intel64/Release - + .. tab-item:: macOS :sync: macos - + .. code-block:: sh - + cd ~/openvino_cpp_samples_build/intel64/Release - + 3. Run the code sample executable, specifying the input media file, the IR for your model, and a target device for performing inference: .. tab-set:: .. tab-item:: Windows :sync: windows - + .. code-block:: bat - + classification_sample_async.exe -i -m -d .. tab-item:: Linux :sync: linux - + .. code-block:: sh - + classification_sample_async -i -m -d - + .. tab-item:: macOS :sync: macos - + .. code-block:: sh - + classification_sample_async -i -m -d - + Examples ++++++++ @@ -371,23 +371,23 @@ The following command shows how to run the Image Classification Code Sample usin .. tab-item:: Windows :sync: windows - + .. code-block:: bat - + .\classification_sample_async.exe -i %USERPROFILE%\Downloads\dog.bmp -m %USERPROFILE%\Documents\ir\googlenet-v1.xml -d CPU .. tab-item:: Linux :sync: linux - + .. code-block:: sh - + ./classification_sample_async -i ~/Downloads/dog.bmp -m ~/ir/googlenet-v1.xml -d CPU - + .. tab-item:: macOS :sync: macos - + .. code-block:: sh - + ./classification_sample_async -i ~/Downloads/dog.bmp -m ~/ir/googlenet-v1.xml -d CPU When the sample application is complete, you are given the label and confidence for the top 10 categories. The input image and sample output of the inference results is shown below: @@ -418,24 +418,24 @@ The following example shows how to run the same sample using GPU as the target d Running Inference on GPU ------------------------ -.. note:: - +.. note:: + Running inference on Intel® Processor Graphics (GPU) requires :doc:`additional hardware configuration steps `, as described earlier on this page. Running on GPU is not compatible with macOS. .. tab-set:: .. tab-item:: Windows :sync: windows - + .. code-block:: bat - + .\classification_sample_async.exe -i %USERPROFILE%\Downloads\dog.bmp -m %USERPROFILE%\Documents\ir\googlenet-v1.xml -d GPU .. tab-item:: Linux :sync: linux - + .. code-block:: sh - + ./classification_sample_async -i ~/Downloads/dog.bmp -m ~/ir/googlenet-v1.xml -d GPU diff --git a/docs/glossary.md b/docs/glossary.md index 48d65d57d49b24..5380db526c10f7 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -3,7 +3,7 @@ @sphinxdirective .. meta:: - :description: Check the list of acronyms, abbreviations and terms used in + :description: Check the list of acronyms, abbreviations and terms used in Intel® Distribution of OpenVINO™ toolkit. @@ -11,54 +11,55 @@ Acronyms and Abbreviations ################################################# ================== =========================================================================== - Abbreviation Description + Abbreviation Description ================== =========================================================================== - API Application Programming Interface - AVX Advanced Vector Extensions - clDNN Compute Library for Deep Neural Networks - CLI Command Line Interface - CNN Convolutional Neural Network - CPU Central Processing Unit - CV Computer Vision - DL Deep Learning - DLL Dynamic Link Library - DNN Deep Neural Networks - ELU Exponential Linear rectification Unit - FCN Fully Convolutional Network - FP Floating Point - GCC GNU Compiler Collection - GPU Graphics Processing Unit - HD High Definition - IR Intermediate Representation - JIT Just In Time - JTAG Joint Test Action Group - LPR License-Plate Recognition - LRN Local Response Normalization - mAP Mean Average Precision - Intel® OneDNN Intel® OneAPI Deep Neural Network Library - `mo` Command-line tool for model conversion, CLI for ``tools.mo.convert_model`` - MVN Mean Variance Normalization - NCDHW Number of images, Channels, Depth, Height, Width - NCHW Number of images, Channels, Height, Width - NHWC Number of images, Height, Width, Channels - NMS Non-Maximum Suppression - NN Neural Network - NST Neural Style Transfer - OD Object Detection - OS Operating System - PCI Peripheral Component Interconnect - PReLU Parametric Rectified Linear Unit - PSROI Position Sensitive Region Of Interest - RCNN, R-CNN Region-based Convolutional Neural Network - ReLU Rectified Linear Unit - ROI Region Of Interest - SDK Software Development Kit - SSD Single Shot multibox Detector - SSE Streaming SIMD Extensions - USB Universal Serial Bus - VGG Visual Geometry Group - VOC Visual Object Classes - WINAPI Windows Application Programming Interface + API Application Programming Interface + AVX Advanced Vector Extensions + clDNN Compute Library for Deep Neural Networks + CLI Command Line Interface + CNN Convolutional Neural Network + CPU Central Processing Unit + CV Computer Vision + DL Deep Learning + DLL Dynamic Link Library + DNN Deep Neural Networks + ELU Exponential Linear rectification Unit + FCN Fully Convolutional Network + FP Floating Point + GCC GNU Compiler Collection + GPU Graphics Processing Unit + HD High Definition + IR Intermediate Representation + JIT Just In Time + JTAG Joint Test Action Group + LPR License-Plate Recognition + LRN Local Response Normalization + mAP Mean Average Precision + Intel® OneDNN Intel® OneAPI Deep Neural Network Library + `mo` Command-line tool for model conversion, CLI for ``tools.mo.convert_model`` (legacy) + MVN Mean Variance Normalization + NCDHW Number of images, Channels, Depth, Height, Width + NCHW Number of images, Channels, Height, Width + NHWC Number of images, Height, Width, Channels + NMS Non-Maximum Suppression + NN Neural Network + NST Neural Style Transfer + OD Object Detection + OS Operating System + `ovc` OpenVINO Model Converter, command line tool for model conversion + PCI Peripheral Component Interconnect + PReLU Parametric Rectified Linear Unit + PSROI Position Sensitive Region Of Interest + RCNN, R-CNN Region-based Convolutional Neural Network + ReLU Rectified Linear Unit + ROI Region Of Interest + SDK Software Development Kit + SSD Single Shot multibox Detector + SSE Streaming SIMD Extensions + USB Universal Serial Bus + VGG Visual Geometry Group + VOC Visual Object Classes + WINAPI Windows Application Programming Interface ================== =========================================================================== @@ -68,46 +69,46 @@ Terms Glossary of terms used in OpenVINO™ -| *Batch* +| *Batch* | Number of images to analyze during one call of infer. Maximum batch size is a property of the model set before its compilation. In NHWC, NCHW, and NCDHW image data layout representations, the 'N' refers to the number of images in the batch. -| *Device Affinity* +| *Device Affinity* | A preferred hardware device to run inference (CPU, GPU, GNA, etc.). -| *Extensibility mechanism, Custom layers* +| *Extensibility mechanism, Custom layers* | The mechanism that provides you with capabilities to extend the OpenVINO™ Runtime and model conversion API so that they can work with models containing operations that are not yet supported. | *layer / operation* -| In OpenVINO, both terms are treated synonymously. To avoid confusion, "layer" is being pushed out and "operation" is the currently accepted term. +| In OpenVINO, both terms are treated synonymously. To avoid confusion, "layer" is being pushed out and "operation" is the currently accepted term. -| *Model conversion API* -| A component of OpenVINO Development Tools. The API is used to import, convert, and optimize models trained in popular frameworks to a format usable by other OpenVINO components. In ``openvino.tools.mo`` namespace, model conversion API is represented by a Python ``mo.convert_model()`` method and ``mo`` command-line tool. +| *Model conversion API* +| The Conversion API is used to import and convert models trained in popular frameworks to a format usable by other OpenVINO components. Model conversion API is represented by a Python ``openvino.convert_model()`` method and ``ovc`` command-line tool. -| *OpenVINO™ Core* -| OpenVINO™ Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, GNA, etc. +| *OpenVINO™ Core* +| OpenVINO™ Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, GNA, etc. -| *OpenVINO™ API* +| *OpenVINO™ API* | The basic default API for all supported devices, which allows you to load a model from Intermediate Representation or convert from ONNX, PaddlePaddle, TensorFlow, TensorFlow Lite file formats, set input and output formats and execute the model on various devices. -| *OpenVINO™ Runtime* +| *OpenVINO™ Runtime* | A C++ library with a set of classes that you can use in your application to infer input tensors and get the results. -| *ov::Model* +| *ov::Model* | A class of the Model that OpenVINO™ Runtime reads from IR or converts from ONNX, PaddlePaddle, TensorFlow, TensorFlow Lite formats. Consists of model structure, weights and biases. -| *ov::CompiledModel* +| *ov::CompiledModel* | An instance of the compiled model which allows the OpenVINO™ Runtime to request (several) infer requests and perform inference synchronously or asynchronously. -| *ov::InferRequest* +| *ov::InferRequest* | A class that represents the end point of inference on the model compiled by the device and represented by a compiled model. Inputs are set here, outputs should be requested from this interface as well. -| *ov::ProfilingInfo* +| *ov::ProfilingInfo* | Represents basic inference profiling information per operation. -| *ov::Layout* +| *ov::Layout* | Image data layout refers to the representation of images batch. Layout shows a sequence of 4D or 5D tensor data in memory. A typical NCHW format represents pixel in horizontal direction, rows by vertical dimension, planes by channel and images into batch. See also [Layout API Overview](./OV_Runtime_UG/layout_overview.md). -| *ov::element::Type* +| *ov::element::Type* | Represents data element type. For example, f32 is 32-bit floating point, f16 is 16-bit floating point. | *plugin / Inference Device / Inference Mode* diff --git a/docs/install_guides/installing-openvino-from-archive-linux.md b/docs/install_guides/installing-openvino-from-archive-linux.md index 3eca6f4acf21fe..d7cfdf8d224787 100644 --- a/docs/install_guides/installing-openvino-from-archive-linux.md +++ b/docs/install_guides/installing-openvino-from-archive-linux.md @@ -3,34 +3,33 @@ @sphinxdirective .. meta:: - :description: Learn how to install OpenVINO™ Runtime on the Linux operating + :description: Learn how to install OpenVINO™ Runtime on the Linux operating system, using an archive file. .. note:: - + Note that the Archive distribution: - + * offers both C/C++ and Python APIs - * additionally includes code samples + * additionally includes code samples * is dedicated to users of all major OSs: Windows, Linux, macOS * may offer different hardware support under different operating systems (see the drop-down below for more details). - - .. dropdown:: Inference Options - =================== ===== ===== ===== ===== - Operating System CPU GPU GNA NPU - =================== ===== ===== ===== ===== - Debian9 armhf V n/a n/a n/a - Debian9 arm64 V n/a n/a n/a - CentOS7 x86_64 V V n/a n/a - Ubuntu18 x86_64 V V V n/a - Ubuntu20 x86_64 V V V V - Ubuntu22 x86_64 V V V V - RHEL8 x86_64 V V V n/a - =================== ===== ===== ===== ===== + .. dropdown:: Inference Options + =================== ===== ===== ===== ===== + Operating System CPU GPU GNA NPU + =================== ===== ===== ===== ===== + Debian9 armhf V n/a n/a n/a + Debian9 arm64 V n/a n/a n/a + CentOS7 x86_64 V V n/a n/a + Ubuntu18 x86_64 V V V n/a + Ubuntu20 x86_64 V V V V + Ubuntu22 x86_64 V V V V + RHEL8 x86_64 V V V n/a + =================== ===== ===== ===== ===== .. tab-set:: @@ -40,58 +39,58 @@ | Full requirement listing is available in: | `System Requirements Page `__ - + .. tab-item:: Processor Notes :sync: processor-notes - + | To see if your processor includes the integrated graphics technology and supports iGPU inference, refer to: | `Product Specifications `__ - + .. tab-item:: Software :sync: software - + * `CMake 3.13 or higher, 64-bit `__ * `Python 3.7 - 3.11, 64-bit `__ * GCC: - + .. tab-set:: .. tab-item:: Ubuntu 20.04 :sync: ubuntu-20 - + * GCC 9.3.0 .. tab-item:: Ubuntu 18.04 :sync: ubuntu-18 - + * GCC 7.5.0 - + .. tab-item:: RHEL 8 :sync: rhel-8 - + * GCC 8.4.1 - + .. tab-item:: CentOS 7 :sync: centos-7 - + * GCC 8.3.1 Use the following instructions to install it: - + Install GCC 8.3.1 via devtoolset-8 - + .. code-block:: sh - + sudo yum update -y && sudo yum install -y centos-release-scl epel-release sudo yum install -y devtoolset-8 - + Enable devtoolset-8 and check current gcc version - + .. code-block:: sh - + source /opt/rh/devtoolset-8/enable gcc -v - - + + @@ -107,19 +106,19 @@ Step 1: Download and Install the OpenVINO Core Components 2. Create the ``/opt/intel`` folder for OpenVINO by using the following command. If the folder already exists, skip this step. .. code-block:: sh - + sudo mkdir /opt/intel - + .. note:: - + The ``/opt/intel`` path is the recommended folder path for administrators or root users. If you prefer to install OpenVINO in regular userspace, the recommended path is ``/home//intel``. You may use a different path if desired. 3. Browse to the current user's ``Downloads`` folder: - + .. code-block:: sh - + cd /Downloads - + 4. Download the `OpenVINO Runtime archive file for your system `_, extract the files, rename the extracted folder and move it to the desired path: .. tab-set:: @@ -131,70 +130,70 @@ Step 1: Download and Install the OpenVINO Core Components .. tab-item:: Ubuntu 22.04 :sync: ubuntu-22 - + .. code-block:: sh - + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0.1/linux/l_openvino_toolkit_ubuntu22_2023.0.1.11005.fa1c41994f3_x86_64.tgz --output openvino_2023.0.1.tgz tar -xf openvino_2023.0.1.tgz sudo mv l_openvino_toolkit_ubuntu22_2023.0.1.11005.fa1c41994f3_x86_64 /opt/intel/openvino_2023.0.1 - + .. tab-item:: Ubuntu 20.04 :sync: ubuntu-20 - + .. code-block:: sh - + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0.1/linux/l_openvino_toolkit_ubuntu20_2023.0.1.11005.fa1c41994f3_x86_64.tgz --output openvino_2023.0.1.tgz tar -xf openvino_2023.0.1.tgz sudo mv l_openvino_toolkit_ubuntu20_2023.0.1.11005.fa1c41994f3_x86_64 /opt/intel/openvino_2023.0.1 - + .. tab-item:: Ubuntu 18.04 :sync: ubuntu-18 - + .. code-block:: sh - + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0.1/linux/l_openvino_toolkit_ubuntu18_2023.0.1.11005.fa1c41994f3_x86_64.tgz --output openvino_2023.0.1.tgz tar -xf openvino_2023.0.1.tgz sudo mv l_openvino_toolkit_ubuntu18_2023.0.1.11005.fa1c41994f3_x86_64 /opt/intel/openvino_2023.0.1 - + .. tab-item:: RHEL 8 :sync: rhel-8 - + .. code-block:: sh - + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0.1/linux/l_openvino_toolkit_rhel8_2023.0.1.11005.fa1c41994f3_x86_64.tgz --output openvino_2023.0.1.tgz tar -xf openvino_2023.0.1.tgz sudo mv l_openvino_toolkit_rhel8_2023.0.1.11005.fa1c41994f3_x86_64 /opt/intel/openvino_2023.0.1 - + .. tab-item:: CentOS 7 :sync: centos-7 - + .. code-block:: sh - + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0.1/linux/l_openvino_toolkit_centos7_2023.0.1.11005.fa1c41994f3_x86_64.tgz --output openvino_2023.0.1.tgz tar -xf openvino_2023.0.1.tgz sudo mv l_openvino_toolkit_centos7_2023.0.1.11005.fa1c41994f3_x86_64 /opt/intel/openvino_2023.0.1 - + .. tab-item:: ARM 64-bit :sync: arm-64 - + .. code-block:: sh - + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0.1/linux/l_openvino_toolkit_debian9_2023.0.1.11005.fa1c41994f3_arm64.tgz -O openvino_2023.0.1.tgz tar -xf openvino_2023.0.1.tgz sudo mv l_openvino_toolkit_debian9_2023.0.1.11005.fa1c41994f3_arm64 /opt/intel/openvino_2023.0.1 - + .. tab-item:: ARM 32-bit :sync: arm-32 - + .. code-block:: sh - + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0.1/linux/l_openvino_toolkit_debian9_2023.0.1.11005.fa1c41994f3_armhf.tgz -O openvino_2023.0.1.tgz tar -xf openvino_2023.0.1.tgz sudo mv l_openvino_toolkit_debian9_2023.0.1.11005.fa1c41994f3_armhf /opt/intel/openvino_2023.0.1 - - + + 5. Install required system dependencies on Linux. To do this, OpenVINO provides a script in the extracted installation directory. Run the following command: - + .. code-block:: sh cd /opt/intel/openvino_2023.0.1 @@ -214,33 +213,33 @@ Step 1: Download and Install the OpenVINO Core Components python3 -m pip install -r ./python/requirements.txt 7. For simplicity, it is useful to create a symbolic link as below: - + .. code-block:: sh - + cd /opt/intel sudo ln -s openvino_2023.0.1 openvino_2023 - + .. note:: - If you have already installed a previous release of OpenVINO 2023, a symbolic link to the ``openvino_2023`` folder may already exist. + If you have already installed a previous release of OpenVINO 2023, a symbolic link to the ``openvino_2023`` folder may already exist. Unlink the previous link with ``sudo unlink openvino_2023``, and then re-run the command above. -Congratulations, you have finished the installation! For some use cases you may still -need to install additional components. Check the description below, as well as the +Congratulations, you have finished the installation! For some use cases you may still +need to install additional components. Check the description below, as well as the :doc:`list of additional configurations ` to see if your case needs any of them. -The ``/opt/intel/openvino_2023`` folder now contains the core components for OpenVINO. -If you used a different path in Step 2, for example, ``/home//intel/``, -OpenVINO is now in ``/home//intel/openvino_2023``. The path to the ``openvino_2023`` +The ``/opt/intel/openvino_2023`` folder now contains the core components for OpenVINO. +If you used a different path in Step 2, for example, ``/home//intel/``, +OpenVINO is now in ``/home//intel/openvino_2023``. The path to the ``openvino_2023`` directory is also referred as ```` throughout the OpenVINO documentation. Step 2: Configure the Environment ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -You must update several environment variables before you can compile and run OpenVINO applications. -Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. +You must update several environment variables before you can compile and run OpenVINO applications. +Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. If your is not ``/opt/intel/openvino_2023``, use the correct one instead. .. code-block:: sh @@ -250,12 +249,12 @@ If your is not ``/opt/intel/openvino_2023``, use the correct one i If you have more than one OpenVINO version installed on your system, you can easily switch versions by sourcing the `setupvars.sh` of your choice. -.. note:: - - The above command must be re-run every time you start a new terminal session. - To set up Linux to automatically run the command every time a new terminal is opened, - open ``~/.bashrc`` in your favorite editor and add ``source /opt/intel/openvino_2023/setupvars.sh`` after the last line. - Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. +.. note:: + + The above command must be re-run every time you start a new terminal session. + To set up Linux to automatically run the command every time a new terminal is opened, + open ``~/.bashrc`` in your favorite editor and add ``source /opt/intel/openvino_2023/setupvars.sh`` after the last line. + Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. Changing ``.bashrc`` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them. The environment variables are set. @@ -266,57 +265,57 @@ The environment variables are set. What's Next? ############################################################ -Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! +Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials. .. tab-set:: .. tab-item:: Get started with Python :sync: get-started-py - + Try the `Python Quick Start Example `_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. - + .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif :width: 400 - + Visit the :doc:`Tutorials ` page for more Jupyter Notebooks to get you started with OpenVINO, such as: - + * `OpenVINO Python API Tutorial `__ * `Basic image classification program with Hello Image Classification `__ * `Convert a PyTorch model and use it for image background removal `__ - - + + .. tab-item:: Get started with C++ :sync: get-started-cpp - - Try the :doc:`C++ Quick Start Example ` for step-by-step instructions + + Try the :doc:`C++ Quick Start Example ` for step-by-step instructions on building and running a basic image classification C++ application. - + .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg :width: 400 - + Visit the :doc:`Samples ` page for other C++ example applications to get you started with OpenVINO, such as: - + * `Basic object detection with the Hello Reshape SSD C++ sample `__ * `Automatic speech recognition C++ sample `__ - - - + + + Uninstalling the Intel® Distribution of OpenVINO™ Toolkit ########################################################### If you have installed OpenVINO Runtime from archive files, you can uninstall it by deleting the archive files and the extracted folders. -Uninstallation removes all Intel® Distribution of OpenVINO™ Toolkit component files but does not affect user files in the installation directory. +Uninstallation removes all Intel® Distribution of OpenVINO™ Toolkit component files but does not affect user files in the installation directory. If you have created the symbolic link, remove the link first: - + .. code-block:: sh sudo rm /opt/intel/openvino_2023 - + To delete the files: - + .. code-block:: sh rm -r && rm @@ -330,7 +329,7 @@ Additional Resources ########################################################### * :doc:`Troubleshooting Guide for OpenVINO Installation & Configuration ` -* Converting models for use with OpenVINO™: :doc:`Convert a Model ` +* Converting models for use with OpenVINO™: :doc:`Convert a Model ` * Writing your own OpenVINO™ applications: :doc:`OpenVINO™ Runtime User Guide ` * Sample applications: :doc:`OpenVINO™ Toolkit Samples Overview ` * Pre-trained deep learning models: :doc:`Overview of OpenVINO™ Toolkit Pre-Trained Models ` diff --git a/docs/install_guides/pypi-openvino-dev.md b/docs/install_guides/pypi-openvino-dev.md index df7568f9a179bf..25e1aeac76ea02 100644 --- a/docs/install_guides/pypi-openvino-dev.md +++ b/docs/install_guides/pypi-openvino-dev.md @@ -1,4 +1,4 @@ -# OpenVINO™ Development Tools +# OpenVINO™ Development Tools > **NOTE**: This version is pre-release software and has not undergone full release validation or qualification. No support is offered on pre-release software and APIs/behavior are subject to change. It should NOT be incorporated into any production software/solution and instead should be used only for early testing and integration while awaiting a final release version of this software. @@ -31,11 +31,11 @@ pip install openvino-dev ### Installation in a New Environment If you do not have an environment with the source deep learning framework for the input model or you encounter any compatibility issues between OpenVINO and your version of deep learning framework, -you may install OpenVINO Development Tools with validated versions of frameworks into a new environment. +you may install OpenVINO Development Tools with validated versions of frameworks into a new environment. #### Step 1. Set Up Python Virtual Environment -Use a virtual environment to avoid dependency conflicts. +Use a virtual environment to avoid dependency conflicts. To create a virtual environment, use the following commands: @@ -75,7 +75,7 @@ Use the following command: ```sh pip install openvino-dev[extras] ``` - where `extras` is the source deep learning framework for the input model and is one or more of the following values separated with "," : + where `extras` is the source deep learning framework for the input model and is one or more of the following values separated with "," : | Extras Value | DL Framework | | :-------------------------------| :------------------------------------------------------------------------------- | @@ -113,34 +113,34 @@ For example, to install and configure the components for working with TensorFlow ## What's in the Package? -> **NOTE**: The openvino-dev package installs [OpenVINO™ Runtime](https://pypi.org/project/openvino) as a dependency, which is the engine that runs the deep learning model and includes a set of libraries for an easy inference integration into your applications. +> **NOTE**: The openvino-dev package installs [OpenVINO™ Runtime](https://pypi.org/project/openvino) as a dependency, which is the engine that runs the deep learning model and includes a set of libraries for an easy inference integration into your applications. **In addition, the openvino-dev package installs the following components by default:** -| Component | Console Script | Description | +| Component | Console Script | Description | |------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Model conversion API](https://docs.openvino.ai/nightly/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | `mo` |**Model conversion API** imports, converts, and optimizes models that were trained in popular frameworks to a format usable by OpenVINO components.
Supported frameworks include Caffe\*, TensorFlow\*, MXNet\*, PaddlePaddle\*, and ONNX\*. | +| [Legacy Model conversion API](https://docs.openvino.ai/nightly/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | `mo` |**Model conversion API** imports, converts, and optimizes models that were trained in popular frameworks to a format usable by OpenVINO components.
Supported frameworks include Caffe\*, TensorFlow\*, MXNet\*, PaddlePaddle\*, and ONNX\*. | | [Benchmark Tool](https://docs.openvino.ai/nightly/openvino_inference_engine_tools_benchmark_tool_README.html)| `benchmark_app` | **Benchmark Application** allows you to estimate deep learning inference performance on supported devices for synchronous and asynchronous modes. | | [Accuracy Checker](https://docs.openvino.ai/nightly/omz_tools_accuracy_checker.html) and
[Annotation Converter](https://docs.openvino.ai/nightly/omz_tools_accuracy_checker_annotation_converters.html) | `accuracy_check`
`convert_annotation` |**Accuracy Checker** is a deep learning accuracy validation tool that allows you to collect accuracy metrics against popular datasets. The main advantages of the tool are the flexibility of configuration and a set of supported datasets, preprocessing, postprocessing, and metrics.
**Annotation Converter** is a utility that prepares datasets for evaluation with Accuracy Checker. | | [Post-Training Optimization Tool](https://docs.openvino.ai/nightly/pot_introduction.html)| `pot` |**Post-Training Optimization Tool** allows you to optimize trained models with advanced capabilities, such as quantization and low-precision optimizations, without the need to retrain or fine-tune models. | -| [Model Downloader and other Open Model Zoo tools](https://docs.openvino.ai/nightly/omz_tools_downloader.html)| `omz_downloader`
`omz_converter`
`omz_quantizer`
`omz_info_dumper`| **Model Downloader** is a tool for getting access to the collection of high-quality and extremely fast pre-trained deep learning [public](@ref omz_models_group_public) and [Intel](@ref omz_models_group_intel)-trained models. These free pre-trained models can be used to speed up the development and production deployment process without training your own models. The tool downloads model files from online sources and, if necessary, patches them to make them more usable with model conversion API. A number of additional tools are also provided to automate the process of working with downloaded models:
**Model Converter** is a tool for converting Open Model Zoo models that are stored in an original deep learning framework format into the OpenVINO Intermediate Representation (IR) using model conversion API.
**Model Quantizer** is a tool for automatic quantization of full-precision models in the IR format into low-precision versions using the Post-Training Optimization Tool.
**Model Information Dumper** is a helper utility for dumping information about the models to a stable, machine-readable format. | +| [Model Downloader and other Open Model Zoo tools](https://docs.openvino.ai/nightly/omz_tools_downloader.html)| `omz_downloader`
`omz_converter`
`omz_quantizer`
`omz_info_dumper`| **Model Downloader** is a tool for getting access to the collection of high-quality and extremely fast pre-trained deep learning [public](@ref omz_models_group_public) and [Intel](@ref omz_models_group_intel)-trained models. These free pre-trained models can be used to speed up the development and production deployment process without training your own models. The tool downloads model files from online sources and, if necessary, patches them to make them more usable with model conversion API. A number of additional tools are also provided to automate the process of working with downloaded models:
**Model Converter** is a tool for converting Open Model Zoo models that are stored in an original deep learning framework format into the OpenVINO Intermediate Representation (IR) using model conversion API.
**Model Quantizer** is a tool for automatic quantization of full-precision models in the IR format into low-precision versions using the Post-Training Optimization Tool.
**Model Information Dumper** is a helper utility for dumping information about the models to a stable, machine-readable format. | ## Troubleshooting -For general troubleshooting steps and issues, see [Troubleshooting Guide for OpenVINO Installation](https://docs.openvino.ai/2023.1/openvino_docs_get_started_guide_troubleshooting.html). The following sections also provide explanations to several error messages. +For general troubleshooting steps and issues, see [Troubleshooting Guide for OpenVINO Installation](https://docs.openvino.ai/2023.1/openvino_docs_get_started_guide_troubleshooting.html). The following sections also provide explanations to several error messages. ### Errors with Installing via PIP for Users in China Users in China might encounter errors while downloading sources via PIP during OpenVINO™ installation. To resolve the issues, try the following solution: - -* Add the download source using the ``-i`` parameter with the Python ``pip`` command. For example: + +* Add the download source using the ``-i`` parameter with the Python ``pip`` command. For example: ``` sh pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/ ``` Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``. You can also run the following command to install openvino-dev with specific frameworks. For example: - + ``` pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/ ``` @@ -154,7 +154,7 @@ pip install openvino-dev[tensorflow2,mxnet,caffe] zsh: no matches found: openvino-dev[tensorflow2,mxnet,caffe] ``` -By default zsh interprets square brackets as an expression for pattern matching. To resolve this issue, you need to escape the command with quotes: +By default zsh interprets square brackets as an expression for pattern matching. To resolve this issue, you need to escape the command with quotes: ```sh pip install 'openvino-dev[tensorflow2,mxnet,caffe]' diff --git a/docs/model_zoo.md b/docs/model_zoo.md index a7f95024b08b01..560b67304a771f 100644 --- a/docs/model_zoo.md +++ b/docs/model_zoo.md @@ -7,7 +7,7 @@ .. toctree:: :maxdepth: 1 :hidden: - + omz_models_group_intel omz_models_group_public @@ -29,7 +29,7 @@ Open Model Zoo for OpenVINO™ toolkit delivers a wide variety of free, pre-trained deep learning models and demo applications that provide full application templates to help you implement deep learning in Python, C++, or OpenCV Graph API (G-API). Models and demos are available in the `Open Model Zoo GitHub repo `__ and licensed under Apache License Version 2.0. -Browse through over 200 neural network models, both :doc:`public ` and from :doc:`Intel `, and pick the right one for your solution. Types include object detection, classification, image segmentation, handwriting recognition, text to speech, pose estimation, and others. The Intel models have already been converted to work with OpenVINO™ toolkit, while public models can easily be converted using the :doc:`Model Optimizer ` utility. +Browse through over 200 neural network models, both :doc:`public ` and from :doc:`Intel `, and pick the right one for your solution. Types include object detection, classification, image segmentation, handwriting recognition, text to speech, pose estimation, and others. The Intel models have already been converted to work with OpenVINO™ toolkit, while public models can easily be converted using the :doc:`OpenVINO Model Conversion API ` utility. Get started with simple :doc:`step-by-step procedures ` to learn how to build and run demo applications or discover the :doc:`full set of demos ` and adapt them for implementing specific deep learning scenarios in your applications. From 4af1fd087c88284b60827408d065f1b356dfe432 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 12 Sep 2023 13:10:23 +0200 Subject: [PATCH 28/31] [core] Migrate the Assign operator to new API (#19664) * Migrate the Assign operator to new API * Use memcpy instead of tensor copy_to --- src/core/include/openvino/op/assign.hpp | 6 +- src/core/src/op/assign.cpp | 80 ++++++++++++------------- 2 files changed, 42 insertions(+), 44 deletions(-) diff --git a/src/core/include/openvino/op/assign.hpp b/src/core/include/openvino/op/assign.hpp index fb703e8c3f604d..e40372591ea9b0 100644 --- a/src/core/include/openvino/op/assign.hpp +++ b/src/core/include/openvino/op/assign.hpp @@ -63,11 +63,9 @@ class OPENVINO_API Assign : public util::AssignBase { OPENVINO_ASSERT(m_variable, "Variable is not initialized. Variable_id is unavailable"); return m_variable->get_info().variable_id; } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, - const HostTensorVector& inputs, + bool evaluate(TensorVector& outputs, + const TensorVector& inputs, const EvaluationContext& evaluation_context) const override; - OPENVINO_SUPPRESS_DEPRECATED_END bool has_evaluate() const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; }; diff --git a/src/core/src/op/assign.cpp b/src/core/src/op/assign.cpp index 4dad71f6db9e01..020fc132f0599e 100644 --- a/src/core/src/op/assign.cpp +++ b/src/core/src/op/assign.cpp @@ -2,26 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/assign.hpp" - -#include +#include "openvino/op/assign.hpp" +#include "assign_shape_inference.hpp" #include "itt.hpp" -#include "ngraph/op/read_value.hpp" -#include "ngraph/op/util/variable.hpp" -#include "ngraph/op/util/variable_context.hpp" -#include "ngraph/ops.hpp" - -using namespace std; -using namespace ngraph; - -op::v3::Assign::Assign(const Output& new_value, const std::string& variable_id) +#include "openvino/op/read_value.hpp" +#include "openvino/op/util/variable.hpp" +#include "openvino/op/util/variable_context.hpp" + +namespace ov { +namespace op { +namespace v3 { +Assign::Assign(const Output& new_value, const std::string& variable_id) : AssignBase({new_value}), m_variable_id(variable_id) { constructor_validate_and_infer_types(); } -void op::v3::Assign::validate_and_infer_types() { +void Assign::validate_and_infer_types() { OV_OP_SCOPE(v3_Assign_validate_and_infer_types); auto value = input_value(0); auto arg_t = get_input_element_type(0); @@ -33,93 +31,95 @@ void op::v3::Assign::validate_and_infer_types() { } auto nodes = topological_sort(start_nodes); for (const auto& node : nodes) { - if (auto read_value = ov::as_type_ptr(node)) { + if (auto read_value = ov::as_type_ptr(node)) { if (read_value->get_variable_id() == m_variable_id) m_variable = read_value->get_variable(); } } NODE_VALIDATION_CHECK(this, m_variable != nullptr, "Can't find variable with id = ", m_variable_id); } - std::vector input_shapes = {input_shape}; + + const auto input_shapes = std::vector{input_shape}; const auto output_shapes = shape_infer(this, input_shapes); set_output_type(0, arg_t, output_shapes[0]); } -shared_ptr op::v3::Assign::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Assign::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v3_Assign_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), m_variable_id); + return std::make_shared(new_args.at(0), m_variable_id); } -bool op::v3::Assign::visit_attributes(AttributeVisitor& visitor) { +bool Assign::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v3_Assign_visit_attributes); visitor.on_attribute("variable_id", m_variable_id); return true; } +} // namespace v3 -op::v6::Assign::Assign(const Output& new_value, const std::shared_ptr& variable) +namespace v6 { +Assign::Assign(const Output& new_value, const std::shared_ptr& variable) : AssignBase({new_value}) { m_variable = variable; constructor_validate_and_infer_types(); } -void op::v6::Assign::validate_and_infer_types() { +void Assign::validate_and_infer_types() { OV_OP_SCOPE(v6_Assign_validate_and_infer_types); m_variable->update({get_input_partial_shape(0), get_input_element_type(0), m_variable->get_info().variable_id}); set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); } -shared_ptr op::v6::Assign::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Assign::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v6_Assign_clone_with_new_inputs); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), m_variable); + return std::make_shared(new_args.at(0), m_variable); } -bool op::v6::Assign::visit_attributes(AttributeVisitor& visitor) { +bool Assign::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v6_Assign_visit_attributes); visitor.on_attribute("variable_id", m_variable); return true; } -OPENVINO_SUPPRESS_DEPRECATED_START -bool op::v6::Assign::evaluate(const HostTensorVector& outputs, - const HostTensorVector& inputs, - const EvaluationContext& evaluation_context) const { +bool Assign::evaluate(TensorVector& outputs, + const TensorVector& inputs, + const EvaluationContext& evaluation_context) const { OV_OP_SCOPE(v6_Assign_evaluate); const auto& found_context = evaluation_context.find("VariableContext"); NODE_VALIDATION_CHECK(this, found_context != evaluation_context.end(), "VariableContext not found."); - auto& variable_context = const_cast(found_context->second.as()); + auto& variable_context = const_cast(found_context->second.as()); const auto& variable_values = variable_context.get_variable_values(); // automatically allocate memory if not provided by user if (variable_values.find(m_variable) == variable_values.end()) { - auto host_tensor = - std::make_shared(m_variable->get_info().data_type, m_variable->get_info().data_shape); - variable_context.set_variable_value(m_variable, make_shared(host_tensor)); + auto tensor = Tensor(m_variable->get_info().data_type, m_variable->get_info().data_shape.to_shape()); + variable_context.set_variable_value(m_variable, std::make_shared(tensor)); } const auto var_value = variable_values.find(m_variable)->second; var_value->set_reset(false); - const auto& buffer = var_value->get_value(); - buffer->set_unary(inputs[0]); - outputs[0]->set_unary(inputs[0]); + auto buffer = var_value->get_state(); + buffer.set_shape(inputs[0].get_shape()); + outputs[0].set_shape(inputs[0].get_shape()); - void* input = inputs[0]->get_data_ptr(); - outputs[0]->write(input, outputs[0]->get_size_in_bytes()); - buffer->write(input, buffer->get_size_in_bytes()); + std::memcpy(outputs[0].data(), inputs[0].data(), inputs[0].get_byte_size()); + std::memcpy(buffer.data(), inputs[0].data(), inputs[0].get_byte_size()); return true; } -OPENVINO_SUPPRESS_DEPRECATED_END -bool op::v6::Assign::has_evaluate() const { +bool Assign::has_evaluate() const { OV_OP_SCOPE(v1_Assign_has_evaluate); return true; } -bool op::v6::Assign::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool Assign::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { return false; } +} // namespace v6 +} // namespace op +} // namespace ov From f3d4665f7b93fc03cc8e3e7c2563ccda377fbf8e Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 12 Sep 2023 13:15:04 +0200 Subject: [PATCH 29/31] Api 2.0/migrate shape inference test to new api (#19665) * Migrate static shape inference test to new API * Use new API in CPU custom shape inference tests * Rename range shape inference test file --- ...adaptive_avg_pool_shape_inference_test.cpp | 17 +-- ...adaptive_max_pool_shape_inference_test.cpp | 17 +-- .../assign_shape_inference.cpp | 2 +- .../shape_inference_test/augru_cell_test.cpp | 4 +- .../augru_sequence_test.cpp | 8 +- .../batch_to_space_shape_inference_test.cpp | 27 ++-- .../bec_shape_inference_test.cpp | 16 +-- .../bel_shape_inference_test.cpp | 18 ++- ...inary_convolution_shape_inference_test.cpp | 8 +- .../binary_elementwise_arithmetic.cpp | 30 ++-- .../broadcast_shape_inference.cpp | 124 +++++++---------- .../bucketize_shape_inference_test.cpp | 8 +- .../concat_shape_inference_test.cpp | 2 +- ...volution_backprop_shape_inference_test.cpp | 9 +- .../convolution_shape_inference_test.cpp | 8 +- ...y_decoder_seq_len_shape_inference_test.cpp | 10 +- ...tc_greedy_decoder_shape_inference_test.cpp | 8 +- .../ctc_loss_shape_inference_test.cpp | 4 +- .../custom_shape_infer/adaptive_avg_pool.cpp | 6 +- .../custom_shape_infer/adaptive_max_pool.cpp | 5 +- .../custom_shape_infer/custom_shape_infer.cpp | 30 ++-- .../custom_shape_infer/custom_shape_infer.hpp | 15 +- .../custom_shape_infer/gather.cpp | 3 +- .../custom_shape_infer/one_hot.cpp | 27 ++-- .../custom_shape_infer/prior_box.cpp | 13 +- .../prior_box_clustered.cpp | 7 +- .../custom_shape_infer/reshape.cpp | 11 +- .../custom_shape_infer/squeeze.cpp | 12 +- .../custom_shape_infer/strided_slice.cpp | 16 +-- .../custom_shape_infer/transpose.cpp | 7 +- .../custom_shape_infer/unsqueeze.cpp | 11 +- ...mable_convolution_shape_inference_test.cpp | 10 +- ...ble_psroi_pooling_shape_inference_test.cpp | 11 +- .../depth_to_space_shape_inference_test.cpp | 4 +- .../detection_output_shape_inference_test.cpp | 16 +-- .../einsum_shape_infernce_test.cpp | 17 ++- .../shape_inference_test/elementwises.cpp | 20 ++- .../embedding_segments_sum_test.cpp | 25 ++-- ...ngbag_offsets_sum_shape_inference_test.cpp | 16 +-- ...ingbag_packed_sum_shape_inference_test.cpp | 11 +- ..._detection_output_shape_inference_test.cpp | 12 +- ...generate_proposal_shape_inference_test.cpp | 10 +- ...or_grid_generator_shape_inference_test.cpp | 10 +- ...feature_extractor_shape_inference_test.cpp | 10 +- ...etectron_topkrois_shape_inference_test.cpp | 12 +- ...act_image_patches_shape_inference_test.cpp | 8 +- .../eye_shape_inference_test.cpp | 60 ++++---- .../fft_base_shape_inference_test.cpp | 130 ++++++++---------- .../gather_elements_shape_inference_test.cpp | 8 +- .../gather_nd_shape_inference_test.cpp | 5 +- .../gather_shape_inference_test.cpp | 17 ++- .../gather_tree_shape_inference_test.cpp | 4 +- .../grid_sample_shape_inference_test.cpp | 4 +- ...volution_backprop_shape_inference_test.cpp | 15 +- ...group_convolution_shape_inference_test.cpp | 6 +- .../gru_cell_shape_inference_test.cpp | 10 +- .../gru_sequence_shape_inference_test.cpp | 10 +- .../interpolate_shape_inference_test.cpp | 67 ++++----- .../logical_not_shape_inference_test.cpp | 7 +- .../lstm_cell_shape_inference_test.cpp | 9 +- .../lstm_seq_shape_inference_test.cpp | 18 +-- .../make_shape_inference.cpp | 38 ++--- .../matmul_shape_inference.cpp | 8 +- .../one_hot_shape_inference_test.cpp | 52 +++---- .../pad_shape_inference_test.cpp | 34 ++--- ...ior_box_clustered_shape_inference_test.cpp | 29 +--- .../prior_box_shape_inference_test.cpp | 29 +--- .../proposal_shape_inference_test.cpp | 10 +- .../psroi_pooling_shape_inference_test.cpp | 10 +- .../range_shape_inference_test.cpp | 67 +++++++++ .../unit/shape_inference_test/range_test.cpp | 37 ----- .../read_value_shape_inference.cpp | 4 +- .../reduce_shape_inference_test.cpp | 19 ++- .../region_yolo_shape_inference_test.cpp | 8 +- .../reorg_yolo_shape_inference_test.cpp | 10 +- .../reverse_sequence_shape_inference_test.cpp | 12 +- .../reverse_shape_inference_test.cpp | 23 ++-- .../rnn_cell_shape_inference_test.cpp | 8 +- .../rnn_seq_shape_inference_test.cpp | 8 +- .../roi_align_shape_inference_test.cpp | 14 +- .../roi_pooling_shape_inference_test.cpp | 8 +- .../roll_shape_inference_test.cpp | 29 ++-- ...r_elements_update_shape_inference_test.cpp | 17 +-- .../scatter_nd_shape_inference_test.cpp | 6 +- .../scatter_update_shape_inference_test.cpp | 35 +++-- .../select_shape_inference_test.cpp | 26 ++-- .../shape_inference_test/shape_node_tests.cpp | 32 ++--- .../shuffle_channels_shape_inference_test.cpp | 4 +- .../slice_shape_inference_test.cpp | 34 ++--- .../space_to_batch_shape_inference_test.cpp | 27 ++-- .../space_to_depth_shape_inference_test.cpp | 4 +- .../split_shape_inference_tests.cpp | 12 +- .../squeeze_shape_inference_test.cpp | 17 +-- .../strided_slice_shape_inference_test.cpp | 99 +++++++++++-- .../tile_shape_inference_test.cpp | 28 ++-- .../topk_shape_inference_test.cpp | 31 ++--- .../transpose_shape_infernece_test.cpp | 17 +-- .../unsqueeze_shape_inference_test.cpp | 14 +- .../tests/unit/shape_inference_test/utils.cpp | 28 ++++ .../tests/unit/shape_inference_test/utils.hpp | 111 ++------------- .../variadic_split_shape_inference_tests.cpp | 24 ++-- 101 files changed, 955 insertions(+), 1113 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/range_shape_inference_test.cpp delete mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/range_test.cpp create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.cpp diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/adaptive_avg_pool_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/adaptive_avg_pool_shape_inference_test.cpp index 0059829fe39406..885146b8e028b1 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/adaptive_avg_pool_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/adaptive_avg_pool_shape_inference_test.cpp @@ -21,12 +21,11 @@ class AdaptiveAvgPoolV8StaticShapeInferenceTest : public OpStaticShapeInferenceT TEST_F(AdaptiveAvgPoolV8StaticShapeInferenceTest, default_ctor) { int32_t spatial_dims[] = {10, 20}; - const std::map const_data{ - {1, std::make_shared(element::i32, ov::Shape{2}, spatial_dims)}}; + const std::unordered_map const_data{{1, {element::i32, ov::Shape{2}, spatial_dims}}}; op = make_op(); input_shapes = ShapeVector{{1, 3, 1, 2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({1, 3, 10, 20})); @@ -39,7 +38,7 @@ TEST_F(AdaptiveAvgPoolV8StaticShapeInferenceTest, out_spatial_dims_as_constant) op = make_op(data, out_shape); input_shapes = ShapeVector{{1, 3, 10}, {1}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({1, 3, 17})); @@ -52,11 +51,10 @@ TEST_F(AdaptiveAvgPoolV8StaticShapeInferenceTest, out_spatial_dims_in_const_map) op = make_op(data, out_shape); int32_t spatial_dims[] = {9, 8, 7}; - const std::map const_data{ - {1, std::make_shared(element::i32, ov::Shape{3}, spatial_dims)}}; + const std::unordered_map const_data{{1, {element::i32, ov::Shape{3}, spatial_dims}}}; input_shapes = ShapeVector{{1, 3, 10, 2, 4}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({1, 3, 9, 8, 7})); @@ -69,11 +67,10 @@ TEST_F(AdaptiveAvgPoolV8StaticShapeInferenceTest, out_spatial_dims_in_const_map_ op = make_op(data, out_shape); int32_t spatial_dims[] = {9, 8}; - const std::map const_data{ - {1, std::make_shared(element::i32, ov::Shape{2}, spatial_dims)}}; + const std::unordered_map const_data{{1, {element::i32, ov::Shape{2}, spatial_dims}}}; input_shapes = ShapeVector{{1, 3, 10, 2, 4}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_data), ov::NodeValidationFailure, HasSubstr("Number of spatial dimensions is not compatible with input data rank")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/adaptive_max_pool_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/adaptive_max_pool_shape_inference_test.cpp index a9619b747b0465..6de1a9ce5ce4c8 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/adaptive_max_pool_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/adaptive_max_pool_shape_inference_test.cpp @@ -21,12 +21,11 @@ class AdaptiveMaxPoolV8StaticShapeInferenceTest : public OpStaticShapeInferenceT TEST_F(AdaptiveMaxPoolV8StaticShapeInferenceTest, default_ctor) { int32_t spatial_dims[] = {10, 20}; - const std::map const_data{ - {1, std::make_shared(element::i32, ov::Shape{2}, spatial_dims)}}; + const std::unordered_map const_data{{1, {element::i32, ov::Shape{2}, spatial_dims}}}; op = make_op(); input_shapes = ShapeVector{{1, 3, 1, 2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 2); EXPECT_THAT(output_shapes, Each(StaticShape({1, 3, 10, 20}))); @@ -39,7 +38,7 @@ TEST_F(AdaptiveMaxPoolV8StaticShapeInferenceTest, out_spatial_dims_as_constant) op = make_op(data, out_shape); input_shapes = ShapeVector{{1, 3, 10}, {1}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 2); EXPECT_THAT(output_shapes, Each(StaticShape({1, 3, 17}))); @@ -52,11 +51,10 @@ TEST_F(AdaptiveMaxPoolV8StaticShapeInferenceTest, out_spatial_dims_in_const_map) op = make_op(data, out_shape); int32_t spatial_dims[] = {9, 8, 7}; - const std::map const_data{ - {1, std::make_shared(element::i32, ov::Shape{3}, spatial_dims)}}; + const std::unordered_map const_data{{1, {element::i32, ov::Shape{3}, spatial_dims}}}; input_shapes = ShapeVector{{1, 3, 10, 2, 4}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 2); EXPECT_THAT(output_shapes, Each(StaticShape({1, 3, 9, 8, 7}))); @@ -69,11 +67,10 @@ TEST_F(AdaptiveMaxPoolV8StaticShapeInferenceTest, out_spatial_dims_in_const_map_ op = make_op(data, out_shape); int32_t spatial_dims[] = {9, 8}; - const std::map const_data{ - {1, std::make_shared(element::i32, ov::Shape{2}, spatial_dims)}}; + const std::unordered_map const_data{{1, {element::i32, ov::Shape{2}, spatial_dims}}}; input_shapes = ShapeVector{{1, 3, 10, 2, 4}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_data), ov::NodeValidationFailure, HasSubstr("Number of spatial dimensions is not compatible with input data rank")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/assign_shape_inference.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/assign_shape_inference.cpp index 9500ca8138f5cd..ea2e1819a2d3d3 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/assign_shape_inference.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/assign_shape_inference.cpp @@ -34,7 +34,7 @@ void assignTest() { // Test StaticShape std::vector static_input_shapes = {StaticShape{1, 2, 64, 64}}, static_output_shapes = {StaticShape{}}; - shape_inference(assign.get(), static_input_shapes, static_output_shapes); + static_output_shapes = shape_inference(assign.get(), static_input_shapes); ASSERT_EQ(static_input_shapes[0], (StaticShape{1, 2, 64, 64})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/augru_cell_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/augru_cell_test.cpp index 311e43dc634bbf..c392e549e16e4d 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/augru_cell_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/augru_cell_test.cpp @@ -34,7 +34,7 @@ TEST(StaticShapeInferenceTest, AUGRUCellTest_all_inputs_static_rank) { std::vector static_output_shapes{StaticShape{}, StaticShape{}}; - shape_inference(augru.get(), static_input_shapes, static_output_shapes); + static_output_shapes = shape_inference(augru.get(), static_input_shapes); EXPECT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size})); } @@ -62,6 +62,6 @@ TEST(StaticShapeInferenceTest, AUGRUCellTest_all_inputs_dynamic_rank) { std::vector static_output_shapes{StaticShape{}, StaticShape{}}; - shape_inference(augru.get(), static_input_shapes, static_output_shapes); + static_output_shapes = shape_inference(augru.get(), static_input_shapes); EXPECT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/augru_sequence_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/augru_sequence_test.cpp index 55cb4958110d27..5dfe469e7b6f60 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/augru_sequence_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/augru_sequence_test.cpp @@ -38,9 +38,7 @@ TEST(StaticShapeInferenceTest, AGRUSequenceTest_FORWARD_all_static_rank) { StaticShape{num_directions, gates_count * hidden_size}, // B StaticShape{batch_size, seq_len, 1}}; // A - std::vector static_output_shapes{StaticShape{}, StaticShape{}}; - - shape_inference(augru_sequence.get(), static_input_shapes, static_output_shapes); + const auto static_output_shapes = shape_inference(augru_sequence.get(), static_input_shapes); EXPECT_EQ(static_output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(static_output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } @@ -73,9 +71,7 @@ TEST(StaticShapeInferenceTest, AGRUSequenceTest_FORWARD_all_inputs_dynamic_rank) StaticShape{num_directions, gates_count * hidden_size}, // B StaticShape{batch_size, seq_len, 1}}; // A - std::vector static_output_shapes{StaticShape{}, StaticShape{}}; - - shape_inference(augru_sequence.get(), static_input_shapes, static_output_shapes); + const auto static_output_shapes = shape_inference(augru_sequence.get(), static_input_shapes); EXPECT_EQ(static_output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(static_output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/batch_to_space_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/batch_to_space_shape_inference_test.cpp index a79f3fd98a41d6..cf80cd9ca957de 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/batch_to_space_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/batch_to_space_shape_inference_test.cpp @@ -35,13 +35,12 @@ TEST_F(BatchToSpaceV1StaticShapeInferenceTest, default_ctor) { int32_t crops_begin_val[] = {0, 2, 0, 0, 0}; int32_t crops_end_val[] = {0, 2, 1, 0, 0}; - const auto constant_data = - std::map{{1, std::make_shared(element::i32, Shape{5}, block_val)}, - {2, std::make_shared(element::i32, Shape{5}, crops_begin_val)}, - {3, std::make_shared(element::i32, Shape{5}, crops_end_val)}}; + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{5}, block_val}}, + {2, {element::i32, Shape{5}, crops_begin_val}}, + {3, {element::i32, Shape{5}, crops_end_val}}}; input_shapes = {{960, 6, 13, 128, 16}, {5}, {5}, {5}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{960 / (6 * 5 * 16), 6 * 6 - 2 - 2, 13 * 5 - 1, 128, 16 * 16})); } @@ -53,14 +52,13 @@ TEST_F(BatchToSpaceV1StaticShapeInferenceTest, blocks_crops_in_constant_map) { int32_t crops_begin_val[] = {0, 2, 0, 0, 0}; int32_t crops_end_val[] = {0, 2, 1, 0, 0}; - const auto constant_data = - std::map{{1, std::make_shared(element::i32, Shape{5}, block_val)}, - {2, std::make_shared(element::i32, Shape{5}, crops_begin_val)}, - {3, std::make_shared(element::i32, Shape{5}, crops_end_val)}}; + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{5}, block_val}}, + {2, {element::i32, Shape{5}, crops_begin_val}}, + {3, {element::i32, Shape{5}, crops_end_val}}}; input_shapes = {{960, 6, 13, 128, 16}, {5}, {5}, {5}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes[0], (StaticShape{960 / (6 * 5 * 16), 6 * 6 - 2 - 2, 13 * 5 - 1, 128, 16 * 16})); } @@ -72,7 +70,7 @@ TEST_F(BatchToSpaceV1StaticShapeInferenceTest, blocs_crops_as_constants) { op = make_op(data, block_shape, crops_begin, crops_end); input_shapes = {{100, 7, 13, 3}, {4}, {4}, {4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{100 / (10 * 5), 7 * 10 - 3 - 3, 13 * 5 - 1, 3})); } @@ -83,11 +81,10 @@ TEST_F(BatchToSpaceV1StaticShapeInferenceTest, missing_tensor_data) { int32_t block_val[] = {1, 6, 5, 1, 16}; int32_t crops_end_val[] = {0, 2, 1, 0, 0}; - const auto constant_data = - std::map{{1, std::make_shared(element::i32, Shape{5}, block_val)}, - {3, std::make_shared(element::i32, Shape{5}, crops_end_val)}}; + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{5}, block_val}}, + {3, {element::i32, Shape{5}, crops_end_val}}}; input_shapes = {{960, 6, 13, 128, 16}, {5}, {5}, {5}}; - EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, constant_data), NodeValidationFailure); + EXPECT_THROW(shape_inference(op.get(), input_shapes, constant_data), NodeValidationFailure); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/bec_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/bec_shape_inference_test.cpp index d6ced8c162c59d..496c06bb2ebf1d 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/bec_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/bec_shape_inference_test.cpp @@ -2,8 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include "common_test_utils/test_assertions.hpp" -#include "gmock/gmock.h" #include "openvino/op/parameter.hpp" #include "utils.hpp" @@ -27,8 +28,7 @@ TYPED_TEST_P(BECStaticShapeInferenceTest, broadcast_none) { const auto op = this->make_op(a, b, op::AutoBroadcastType::NONE); this->input_shapes = {StaticShape{3, 4, 7, 5}, StaticShape{3, 4, 7, 5}}; - - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({3, 4, 7, 5})); } @@ -40,7 +40,7 @@ TYPED_TEST_P(BECStaticShapeInferenceTest, broadcast_none_incompatible_shapes) { this->input_shapes = {StaticShape{3, 4, 6, 5}, StaticShape{3, 1, 6, 1}}; - OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Argument shapes are inconsistent.")) } @@ -52,7 +52,7 @@ TYPED_TEST_P(BECStaticShapeInferenceTest, broadcast_numpy_equal_rank) { this->input_shapes = {StaticShape{3, 1, 1, 5}, StaticShape{3, 1, 6, 1}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({3, 1, 6, 5})); } @@ -64,7 +64,7 @@ TYPED_TEST_P(BECStaticShapeInferenceTest, broadcast_numpy_a_rank_higher) { this->input_shapes = {StaticShape{6, 5, 1, 8}, StaticShape{5, 6, 1}}, - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({6, 5, 6, 8})); } @@ -76,7 +76,7 @@ TYPED_TEST_P(BECStaticShapeInferenceTest, broadcast_numpy_b_rank_higher) { this->input_shapes = {StaticShape{5, 6, 1}, StaticShape{6, 5, 1, 8}}, - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({6, 5, 6, 8})); } @@ -88,7 +88,7 @@ TYPED_TEST_P(BECStaticShapeInferenceTest, broadcast_numpy_incompatible_shapes) { this->input_shapes = {StaticShape{3, 4, 6, 6}, StaticShape{2, 4, 6, 6}}; - OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Argument shapes are inconsistent.")) } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/bel_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/bel_shape_inference_test.cpp index a2fde95bb65358..2f0760b90efebc 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/bel_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/bel_shape_inference_test.cpp @@ -2,8 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include "common_test_utils/test_assertions.hpp" -#include "gmock/gmock.h" #include "openvino/op/parameter.hpp" #include "utils.hpp" @@ -30,7 +31,7 @@ TYPED_TEST_P(BELStaticShapeInferenceTest, broadcast_none) { this->input_shapes = {StaticShape{3, 4, 7, 5}, StaticShape{3, 4, 7, 5}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({3, 4, 7, 5})); } @@ -42,7 +43,7 @@ TYPED_TEST_P(BELStaticShapeInferenceTest, broadcast_none_incompatible_shapes) { this->input_shapes = {StaticShape{3, 4, 6, 5}, StaticShape{3, 1, 6, 1}}; - OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Argument shapes are inconsistent.")) } @@ -53,8 +54,7 @@ TYPED_TEST_P(BELStaticShapeInferenceTest, broadcast_numpy_equal_rank) { const auto op = this->make_op(a, b); this->input_shapes = {StaticShape{3, 1, 1, 5}, StaticShape{3, 1, 6, 1}}; - - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({3, 1, 6, 5})); } @@ -65,8 +65,7 @@ TYPED_TEST_P(BELStaticShapeInferenceTest, broadcast_numpy_a_rank_higher) { const auto op = this->make_op(a, b); this->input_shapes = {StaticShape{6, 5, 1, 8}, StaticShape{5, 6, 1}}, - - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({6, 5, 6, 8})); } @@ -77,8 +76,7 @@ TYPED_TEST_P(BELStaticShapeInferenceTest, broadcast_numpy_b_rank_higher) { const auto op = this->make_op(a, b); this->input_shapes = {StaticShape{5, 6, 1}, StaticShape{6, 5, 1, 8}}, - - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({6, 5, 6, 8})); } @@ -90,7 +88,7 @@ TYPED_TEST_P(BELStaticShapeInferenceTest, broadcast_numpy_incompatible_shapes) { this->input_shapes = {StaticShape{3, 4, 6, 6}, StaticShape{2, 4, 6, 6}}; - OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Argument shapes are inconsistent.")) } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp index 219e20257c1cd6..13ca00304ea7d9 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_convolution_shape_inference_test.cpp @@ -74,7 +74,7 @@ TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, auto_pads_same_lower_inputs_ op = make_op(data, filters, strides, pads_begin, pads_end, dilations, mode, pad_value, auto_pad); input_shapes = ShapeVector{{3, 6, 5, 5}, {7, 6, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 5, 5})); @@ -93,7 +93,7 @@ TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, auto_pad_same_lower_inputs_s op = make_op(data, filters, strides, pads_begin, pads_end, dilations, mode, pad_value, auto_pad); input_shapes = ShapeVector{{3, 6, 5, 5}, {7, 6, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 5, 5})); @@ -113,7 +113,7 @@ TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, data_and_filters_num_channel input_shapes = ShapeVector{{3, 5, 5, 5}, {7, 6, 3, 3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Data batch channel count (5) does not match filter")); } @@ -132,7 +132,7 @@ TEST_F(BinaryConvolutionV1StaticShapeInferenceTest, data_rank_not_4) { input_shapes = ShapeVector{{3, 6, 5}, {7, 6, 3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Expected 4D for the input. Got:")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_elementwise_arithmetic.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_elementwise_arithmetic.cpp index 6a174e392fd67d..e48c6542b231b8 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_elementwise_arithmetic.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/binary_elementwise_arithmetic.cpp @@ -25,9 +25,8 @@ TYPED_TEST_P(StaticShapeInferenceTest_BEA, shape_inference_autob_numpy_equal_ran auto node = std::make_shared(A, B); - std::vector static_input_shapes = {StaticShape{3, 1, 1, 5}, StaticShape{3, 1, 6, 1}}, - static_output_shapes = {StaticShape{}}; - shape_inference(node.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{3, 1, 1, 5}, StaticShape{3, 1, 6, 1}}; + const auto static_output_shapes = shape_inference(node.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({3, 1, 6, 5})); } @@ -38,9 +37,8 @@ TYPED_TEST_P(StaticShapeInferenceTest_BEA, shape_inference_autob_numpy_a_rank_hi auto node = std::make_shared(A, B); - std::vector static_input_shapes = {StaticShape{3, 4, 1, 5}, StaticShape{4, 6, 1}}, - static_output_shapes = {StaticShape{}}; - shape_inference(node.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{3, 4, 1, 5}, StaticShape{4, 6, 1}}; + const auto static_output_shapes = shape_inference(node.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({3, 4, 6, 5})); } @@ -51,9 +49,8 @@ TYPED_TEST_P(StaticShapeInferenceTest_BEA, shape_inference_autob_numpy_b_rank_hi auto node = std::make_shared(A, B); - std::vector static_input_shapes = {StaticShape{4, 6, 1}, StaticShape{3, 4, 1, 5}}, - static_output_shapes = {StaticShape{}}; - shape_inference(node.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{4, 6, 1}, StaticShape{3, 4, 1, 5}}; + const auto static_output_shapes = shape_inference(node.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({3, 4, 6, 5})); } @@ -64,10 +61,9 @@ TYPED_TEST_P(StaticShapeInferenceTest_BEA, shape_inference_autob_numpy_incompati auto node = std::make_shared(A, B); - std::vector static_input_shapes = {StaticShape{3, 4, 6, 5}, StaticShape{2, 4, 6, 5}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{3, 4, 6, 5}, StaticShape{2, 4, 6, 5}}; - ASSERT_THROW(shape_inference(node.get(), static_input_shapes, static_output_shapes), NodeValidationFailure); + ASSERT_THROW(shape_inference(node.get(), static_input_shapes), NodeValidationFailure); } TYPED_TEST_P(StaticShapeInferenceTest_BEA, shape_inference_aubtob_none) { @@ -76,9 +72,8 @@ TYPED_TEST_P(StaticShapeInferenceTest_BEA, shape_inference_aubtob_none) { auto node = std::make_shared(A, B, op::AutoBroadcastType::NONE); - std::vector static_input_shapes = {StaticShape{3, 4, 6, 5}, StaticShape{3, 4, 6, 5}}, - static_output_shapes = {StaticShape{}}; - shape_inference(node.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{3, 4, 6, 5}, StaticShape{3, 4, 6, 5}}; + const auto static_output_shapes = shape_inference(node.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({3, 4, 6, 5})); } @@ -89,10 +84,9 @@ TYPED_TEST_P(StaticShapeInferenceTest_BEA, shape_inference_aubtob_none_incompati auto node = std::make_shared(A, B, op::AutoBroadcastType::NONE); - std::vector static_input_shapes = {StaticShape{3, 4, 6, 5}, StaticShape{3, 1, 6, 1}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{3, 4, 6, 5}, StaticShape{3, 1, 6, 1}}; - ASSERT_THROW(shape_inference(node.get(), static_input_shapes, static_output_shapes), NodeValidationFailure); + ASSERT_THROW(shape_inference(node.get(), static_input_shapes), NodeValidationFailure); } REGISTER_TYPED_TEST_SUITE_P(StaticShapeInferenceTest_BEA, diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/broadcast_shape_inference.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/broadcast_shape_inference.cpp index 263062e4eced41..6aa0879bbffa7e 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/broadcast_shape_inference.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/broadcast_shape_inference.cpp @@ -15,18 +15,16 @@ TEST(StaticShapeInferenceTest, BroadcastBidirectionalTest) { auto broadcast_v3 = std::make_shared(input, target_shape, op::BroadcastType::BIDIRECTIONAL); int32_t target_shape_val[] = {1, 16, 50, 1}; - std::map> constant_data; - constant_data[1] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{4}, target_shape_val); + std::unordered_map constant_data{{1, {element::Type_t::i32, ov::Shape{4}, target_shape_val}}}; + + std::vector static_input_shapes = {StaticShape{16, 1, 8}, StaticShape{4}}; + const auto static_output_shapes = shape_inference(broadcast_v3.get(), static_input_shapes, constant_data); - std::vector static_input_shapes = {StaticShape{16, 1, 8}, StaticShape{4}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 16, 50, 8})); static_input_shapes = {StaticShape{16, 1, 1}, StaticShape{4}}; - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, {}), NodeValidationFailure); + + EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes), NodeValidationFailure); } TEST(StaticShapeInferenceTest, BroadcastBidirectionalConstantTest) { @@ -34,9 +32,9 @@ TEST(StaticShapeInferenceTest, BroadcastBidirectionalConstantTest) { auto target_shape = std::make_shared(element::i32, ov::Shape{3}, std::vector{16, 1, 40}); auto broadcast_v3 = std::make_shared(input, target_shape, op::BroadcastType::BIDIRECTIONAL); - std::vector static_input_shapes = {StaticShape{1, 16, 50, 1}, StaticShape{3}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, {}); + std::vector static_input_shapes = {StaticShape{1, 16, 50, 1}, StaticShape{3}}; + + const auto static_output_shapes = shape_inference(broadcast_v3.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 16, 50, 40})); } @@ -47,18 +45,16 @@ TEST(StaticShapeInferenceTest, BroadcastPDPDTest) { std::make_shared(input, target_shape, op::BroadcastModeSpec(op::BroadcastType::PDPD, 1)); int32_t target_shape_val[] = {2, 3, 6}; - std::map> constant_data; - constant_data[1] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{3}, target_shape_val); + std::unordered_map constant_data{{1, {element::Type_t::i32, ov::Shape{3}, target_shape_val}}}; + + std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}; - std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(broadcast_v3.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({2, 3, 6})); static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}; - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, {}), NodeValidationFailure); + + EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes), NodeValidationFailure); } TEST(StaticShapeInferenceTest, BroadcastPDPDConstantTest) { @@ -67,9 +63,8 @@ TEST(StaticShapeInferenceTest, BroadcastPDPDConstantTest) { auto broadcast_v3 = std::make_shared(input, target_shape, op::BroadcastModeSpec(op::BroadcastType::PDPD, 1)); - std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, {}); + std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}; + const auto static_output_shapes = shape_inference(broadcast_v3.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({2, 3, 6})); } @@ -79,18 +74,16 @@ TEST(StaticShapeInferenceTest, BroadcastNumpyTest) { auto broadcast_v3 = std::make_shared(input, target_shape, op::BroadcastType::NUMPY); int32_t target_shape_val[] = {1, 16, 50, 50}; - std::map> constant_data; - constant_data[1] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{4}, target_shape_val); + std::unordered_map constant_data{{1, {element::Type_t::i32, ov::Shape{4}, target_shape_val}}}; + + std::vector static_input_shapes = {StaticShape{16, 1, 1}, StaticShape{4}}; - std::vector static_input_shapes = {StaticShape{16, 1, 1}, StaticShape{4}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(broadcast_v3.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 16, 50, 50})); static_input_shapes = {StaticShape{16, 1, 1}, StaticShape{4}}; - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, {}), NodeValidationFailure); + + EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes), NodeValidationFailure); } TEST(StaticShapeInferenceTest, BroadcastNumpyConstantTest) { @@ -99,9 +92,9 @@ TEST(StaticShapeInferenceTest, BroadcastNumpyConstantTest) { std::make_shared(element::i32, ov::Shape{4}, std::vector{1, 16, 50, 50}); auto broadcast_v3 = std::make_shared(input, target_shape, op::BroadcastType::NUMPY); - std::vector static_input_shapes = {StaticShape{16, 1, 1}, StaticShape{4}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, {}); + std::vector static_input_shapes = {StaticShape{16, 1, 1}, StaticShape{4}}; + + const auto static_output_shapes = shape_inference(broadcast_v3.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 16, 50, 50})); } @@ -114,21 +107,16 @@ TEST(StaticShapeInferenceTest, BroadcastExplicitTest) { int32_t target_shape_val[] = {1, 16, 50, 50}; int32_t axes_mapping_val[] = {1}; - std::map> constant_data; - constant_data[1] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{4}, target_shape_val); - constant_data[2] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{1}, axes_mapping_val); + std::unordered_map constant_data{{1, {element::Type_t::i32, ov::Shape{4}, target_shape_val}}, + {2, {element::Type_t::i32, ov::Shape{1}, axes_mapping_val}}}; std::vector static_input_shapes = {StaticShape{16}, StaticShape{4}, StaticShape{1}}; - std::vector static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(broadcast_v3.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 16, 50, 50})); constant_data.erase(1); - EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, constant_data), - NodeValidationFailure); - EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, {}), NodeValidationFailure); + EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes, constant_data), NodeValidationFailure); + EXPECT_THROW(shape_inference(broadcast_v3.get(), static_input_shapes), NodeValidationFailure); } TEST(StaticShapeInferenceTest, BroadcastExplicitConstantTest) { @@ -140,8 +128,7 @@ TEST(StaticShapeInferenceTest, BroadcastExplicitConstantTest) { std::make_shared(input, target_shape, axes_mapping, op::BroadcastType::EXPLICIT); std::vector static_input_shapes = {StaticShape{16}, StaticShape{4}, StaticShape{1}}; - std::vector static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v3.get(), static_input_shapes, static_output_shapes, {}); + const auto static_output_shapes = shape_inference(broadcast_v3.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 16, 50, 50})); } @@ -154,18 +141,16 @@ TEST(StaticShapeInferenceTest, BroadcastV1PDPDTest) { std::make_shared(input, target_shape, op::AutoBroadcastSpec(op::AutoBroadcastType::PDPD, 1)); int32_t target_shape_val[] = {2, 3, 6}; - std::map> constant_data; - constant_data[1] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{3}, target_shape_val); + std::unordered_map constant_data{{1, {element::Type_t::i32, ov::Shape{3}, target_shape_val}}}; - std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v1.get(), static_input_shapes, static_output_shapes, constant_data); + std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}; + + const auto static_output_shapes = shape_inference(broadcast_v1.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({2, 3, 6})); static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}; - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(broadcast_v1.get(), static_input_shapes, static_output_shapes, {}), NodeValidationFailure); + + EXPECT_THROW(shape_inference(broadcast_v1.get(), static_input_shapes), NodeValidationFailure); } TEST(StaticShapeInferenceTest, BroadcastV1NumpyTest) { @@ -174,18 +159,16 @@ TEST(StaticShapeInferenceTest, BroadcastV1NumpyTest) { auto broadcast_v1 = std::make_shared(input, target_shape); int32_t target_shape_val[] = {2, 3, 6}; - std::map> constant_data; - constant_data[1] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{3}, target_shape_val); + std::unordered_map constant_data{{1, {element::Type_t::i32, ov::Shape{3}, target_shape_val}}}; - std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v1.get(), static_input_shapes, static_output_shapes, constant_data); + std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}; + + const auto static_output_shapes = shape_inference(broadcast_v1.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({2, 3, 6})); static_input_shapes = {StaticShape{3, 1}, StaticShape{3}}; - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(broadcast_v1.get(), static_input_shapes, static_output_shapes, {}), NodeValidationFailure); + + EXPECT_THROW(shape_inference(broadcast_v1.get(), static_input_shapes), NodeValidationFailure); } TEST(StaticShapeInferenceTest, BroadcastV1ExplicitTest) { @@ -196,18 +179,15 @@ TEST(StaticShapeInferenceTest, BroadcastV1ExplicitTest) { int32_t target_shape_val[] = {2, 3, 1}; int32_t axes_mapping_val[] = {1, 2}; - std::map> constant_data; - constant_data[1] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{3}, target_shape_val); - constant_data[2] = - std::make_shared(ngraph::element::Type_t::i32, ov::Shape{2}, axes_mapping_val); - - std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; - shape_inference(broadcast_v1.get(), static_input_shapes, static_output_shapes, constant_data); + std::unordered_map constant_data{{1, {element::Type_t::i32, ov::Shape{3}, target_shape_val}}, + {2, {element::Type_t::i32, ov::Shape{2}, axes_mapping_val}}}; + + std::vector static_input_shapes = {StaticShape{3, 1}, StaticShape{3}, StaticShape{2}}; + + const auto static_output_shapes = shape_inference(broadcast_v1.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({2, 3, 1})); static_input_shapes = {StaticShape{3, 1}, StaticShape{3}, StaticShape{2}}; - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(broadcast_v1.get(), static_input_shapes, static_output_shapes, {}), NodeValidationFailure); + + EXPECT_THROW(shape_inference(broadcast_v1.get(), static_input_shapes), NodeValidationFailure); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/bucketize_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/bucketize_shape_inference_test.cpp index 02a17e2c99cb23..472d8d09835b63 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/bucketize_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/bucketize_shape_inference_test.cpp @@ -22,7 +22,7 @@ TEST_F(BucketizeV3StaticShapeInferenceTest, default_ctor) { op->set_with_right_bound(false); input_shapes = ShapeVector{{3, 2, 7, 89}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({3, 2, 7, 89})); @@ -34,7 +34,7 @@ TEST_F(BucketizeV3StaticShapeInferenceTest, dynamic_rank_inputs) { op = make_op(data, buckets, element::i32); input_shapes = ShapeVector{{10, 12, 1}, {5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10, 12, 1})); @@ -46,7 +46,7 @@ TEST_F(BucketizeV3StaticShapeInferenceTest, static_rank_inputs) { op = make_op(data, buckets); input_shapes = ShapeVector{{100, 11}, {1}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({100, 11})); @@ -58,7 +58,7 @@ TEST_F(BucketizeV3StaticShapeInferenceTest, bucket_incorrect_rank) { op = make_op(data, buckets, element::i32); input_shapes = ShapeVector{{100, 11}, {2, 1}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Buckets input must be a 1D tensor")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/concat_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/concat_shape_inference_test.cpp index 444a1f7dec6bd3..026a4aee4977ac 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/concat_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/concat_shape_inference_test.cpp @@ -67,7 +67,7 @@ INSTANTIATE_TEST_SUITE_P( /** \brief Check shape_infer for concat op on static shapes. */ TEST_P(ConcatStaticShapeInferenceTest, concat_static) { - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes.front(), exp_shape); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp index 53b6fa0382300d..a3919cd258102f 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_backprop_shape_inference_test.cpp @@ -122,7 +122,7 @@ TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, 2d_inputs_dynamic_rank op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{3, 6, 5, 5}, {6, 1, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 1, 7, 7})); @@ -142,7 +142,7 @@ TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, 3d_auto_pad_same_lower op = make_op(data, filters, out_spatial, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {6, 2, 3, 3, 3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 2, 2, 1, 3})); @@ -161,11 +161,10 @@ TEST_F(ConvolutionBackpropDataV1StaticShapeInferenceTest, 3d_auto_pad_same_upper op = make_op(data, filters, out_spatial, strides, pads_begin, pads_end, dilations, auto_pad); int32_t spatial_dims[] = {2, 6, 1}; - const auto const_map = - std::map{{2, std::make_shared(element::i32, Shape{3}, spatial_dims)}}; + const auto const_map = std::unordered_map{{2, {element::i32, Shape{3}, spatial_dims}}}; input_shapes = ShapeVector{{3, 5, 5, 5, 5}, {5, 7, 3, 3, 3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_map); + output_shapes = shape_inference(op.get(), input_shapes, const_map); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 2, 6, 1})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp index 04eecc6067bcd9..2621d3a3b7a501 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/convolution_shape_inference_test.cpp @@ -71,7 +71,7 @@ TEST_F(ConvolutionV1StaticShapeInferenceTest, 2d_auto_pads_same_lower_inputs_dyn op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{3, 6, 5, 5}, {7, 6, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 5, 5})); @@ -90,7 +90,7 @@ TEST_F(ConvolutionV1StaticShapeInferenceTest, 3d_auto_pad_same_lower_inputs_stat op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {7, 6, 3, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 7, 5, 5, 5})); @@ -110,7 +110,7 @@ TEST_F(ConvolutionV1StaticShapeInferenceTest, data_and_filters_num_channels_not_ input_shapes = ShapeVector{{3, 5, 5, 5, 5}, {7, 6, 3, 3, 3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Data batch channel count (5) does not match filter")); } @@ -129,7 +129,7 @@ TEST_F(ConvolutionV1StaticShapeInferenceTest, data_rank_not_compatible_with_filt input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {7, 6, 3, 3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Data batch and filters rank do not match")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_greedy_decoder_seq_len_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_greedy_decoder_seq_len_shape_inference_test.cpp index 4e143270e481b3..853aef2f37877b 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_greedy_decoder_seq_len_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_greedy_decoder_seq_len_shape_inference_test.cpp @@ -26,7 +26,7 @@ TEST_F(CTCGreedyDecoderSeqLenV6StaticShapeInferenceTest, basic) { input_shapes = {StaticShape{4, 100, 1200}, StaticShape{4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({4, 100})); EXPECT_EQ(output_shapes[1], StaticShape({4})); } @@ -36,13 +36,13 @@ TEST_F(CTCGreedyDecoderSeqLenV6StaticShapeInferenceTest, default_ctor) { // Two inputs input_shapes = {StaticShape{4, 100, 1200}, StaticShape{4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({4, 100})); EXPECT_EQ(output_shapes[1], StaticShape({4})); // Three inputs (the last one is optional) input_shapes = {StaticShape{4, 100, 1200}, StaticShape{4}, {}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({4, 100})); EXPECT_EQ(output_shapes[1], StaticShape({4})); } @@ -54,7 +54,7 @@ TEST_F(CTCGreedyDecoderSeqLenV6StaticShapeInferenceTest, incompatible_batch) { input_shapes = {StaticShape{4, 100, 1200}, StaticShape{6}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The first dimensions of input tensors must match")) } @@ -66,7 +66,7 @@ TEST_F(CTCGreedyDecoderSeqLenV6StaticShapeInferenceTest, incompatible_seq_len_ra input_shapes = {StaticShape{4, 100, 1200}, StaticShape{4, 1}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The rank of sequence len tensor must be equal to 1")) } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_greedy_decoder_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_greedy_decoder_shape_inference_test.cpp index 77e79873277a6a..e98876f7b02538 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_greedy_decoder_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_greedy_decoder_shape_inference_test.cpp @@ -26,7 +26,7 @@ TEST_F(CTCGreedyDecoderV0StaticShapeInferenceTest, basic) { input_shapes = {StaticShape{100, 3, 1200}, StaticShape{100, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({3, 100, 1, 1})); } @@ -35,7 +35,7 @@ TEST_F(CTCGreedyDecoderV0StaticShapeInferenceTest, decoder_default_ctor) { input_shapes = {StaticShape{100, 3, 1200}, StaticShape{100, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({3, 100, 1, 1})); } @@ -46,7 +46,7 @@ TEST_F(CTCGreedyDecoderV0StaticShapeInferenceTest, incompatible_batch) { input_shapes = {StaticShape{10, 3, 1200}, StaticShape{100, 3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The first dimensions of input tensors must match")) } @@ -58,7 +58,7 @@ TEST_F(CTCGreedyDecoderV0StaticShapeInferenceTest, incompatible_t_dim) { input_shapes = {StaticShape{100, 3, 1200}, StaticShape{100, 5}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The second dimensions of input tensors must match")) } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_loss_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_loss_shape_inference_test.cpp index b69e7ede9901de..71e479e9e57b67 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_loss_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/ctc_loss_shape_inference_test.cpp @@ -29,7 +29,7 @@ TEST_F(CTCLossV4StaticShapeInferenceTest, correct_input_shapes) { auto op = make_op(logits, logit_length, labels, label_length, blank_index); input_shapes = ShapeVector{{10, 120, 28}, {10}, {10, 120}, {10}, {}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10})); @@ -39,7 +39,7 @@ TEST_F(CTCLossV4StaticShapeInferenceTest, default_ctor) { auto op = make_op(); input_shapes = ShapeVector{{12, 120, 28}, {12}, {12, 120}, {12}, {}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({12})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/adaptive_avg_pool.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/adaptive_avg_pool.cpp index 44c5eed6790dff..cf6976dac42bd3 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/adaptive_avg_pool.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/adaptive_avg_pool.cpp @@ -57,9 +57,8 @@ TEST_P(AdaptiveAvgPoolV8CpuShapeInferenceTest , shape_inference_with_const_map) const auto axes_node = std::make_shared(element::i32, PartialShape::dynamic()); const auto op = make_op(arg, axes_node); - const auto axes_const = std::make_shared(element::i32, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = ov::Tensor(element::i32, ov::Shape{axes.size()}, axes.data()); + const std::unordered_map constant_data = {{1, axes_tensor}}; unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, constant_data); } @@ -74,4 +73,3 @@ INSTANTIATE_TEST_SUITE_P( } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/adaptive_max_pool.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/adaptive_max_pool.cpp index af07648d0e4e39..4cf9d13a18aab0 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/adaptive_max_pool.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/adaptive_max_pool.cpp @@ -61,8 +61,8 @@ TEST_P(AdaptiveMaxPoolV8CpuShapeInferenceTest , shape_inference_with_const_map) const auto op = make_op(arg, axes_node); const auto axes_const = std::make_shared(element::i32, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = ov::Tensor(element::i32, ov::Shape{axes.size()}, axes.data()); + const std::unordered_map constant_data = {{1, axes_tensor}}; unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, constant_data); } @@ -77,4 +77,3 @@ INSTANTIATE_TEST_SUITE_P( } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/custom_shape_infer.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/custom_shape_infer.cpp index 3cf0753ae34c75..f4a35907253017 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/custom_shape_infer.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/custom_shape_infer.cpp @@ -1,29 +1,31 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include +#include + +#include "custom_shape_infer.hpp" +#include "ie_ngraph_utils.hpp" +#include "openvino/cc/factory.h" #include "openvino/core/partial_shape.hpp" #include "openvino/core/type.hpp" #include "openvino/op/ops.hpp" #include "openvino/op/parameter.hpp" -#include "shape_inference/custom/reshape.hpp" -#include "shape_inference/custom/gather.hpp" -#include "shape_inference/custom/transpose.hpp" +#include "shape_inference/custom/adaptive_pooling.hpp" #include "shape_inference/custom/color_convert.hpp" #include "shape_inference/custom/eltwise.hpp" -#include "shape_inference/custom/adaptive_pooling.hpp" #include "shape_inference/custom/fullyconnected.hpp" +#include "shape_inference/custom/gather.hpp" #include "shape_inference/custom/matmul.hpp" #include "shape_inference/custom/ngram.hpp" #include "shape_inference/custom/one_hot.hpp" #include "shape_inference/custom/priorbox.hpp" #include "shape_inference/custom/priorbox_clustered.hpp" +#include "shape_inference/custom/reshape.hpp" #include "shape_inference/custom/shapeof.hpp" #include "shape_inference/custom/strided_slice.hpp" -#include "ie_ngraph_utils.hpp" -#include "custom_shape_infer.hpp" +#include "shape_inference/custom/transpose.hpp" #include "shape_inference/shape_inference_status.hpp" -#include + namespace ov { namespace intel_cpu { namespace unit_test { @@ -84,9 +86,9 @@ void compare_result(const std::vector& ref, const std::vector& input_shapes, - std::vector& output_shapes, - const std::map& constant_data) { + const std::vector& input_shapes, + std::vector& output_shapes, + const std::unordered_map& constant_data) { static std::shared_ptr cusFactory = std::make_shared(); auto shapeInferFactory = cusFactory->create(op->shared_from_this()); ASSERT_TRUE(shapeInferFactory != nullptr); @@ -114,9 +116,9 @@ void cpu_test_shape_infer(ov::Node* op, const void* data = nullptr; ov::element::Type elementType; if (tensorIter != constant_data.end()) { - const auto tensor = tensorIter->second; - data = tensor->get_data_ptr(); - elementType = tensor->get_element_type(); + const auto& tensor = tensorIter->second; + data = tensor.data(); + elementType = tensor.get_element_type(); } else { const auto input_op = op->input_value(port).get_node_shared_ptr(); const auto const_op = ov::as_type_ptr(input_op); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/custom_shape_infer.hpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/custom_shape_infer.hpp index a3e2d149c3319c..7edc933e0eb52a 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/custom_shape_infer.hpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/custom_shape_infer.hpp @@ -2,21 +2,22 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cpu_types.h" -#include -#include -#include #include +#include "common_test_utils/common_utils.hpp" +#include "cpu_types.h" +#include "shape_inference/shape_inference_cpu.hpp" +#include "shape_inference/static_shape.hpp" + #pragma once namespace ov { namespace intel_cpu { namespace unit_test { void cpu_test_shape_infer(ov::Node* op, - const std::vector& input_shapes, - std::vector& output_shapes, - const std::map& constant_data = {}); + const std::vector& input_shapes, + std::vector& output_shapes, + const std::unordered_map& constant_data = {}); using ShapeVector = std::vector; diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/gather.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/gather.cpp index e462cada49134d..1ebb1693a8144c 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/gather.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/gather.cpp @@ -68,7 +68,7 @@ TYPED_TEST_P(CpuShapeInferenceGatherTest, axis_in_const_map) { std::tie(this->axis_val, this->input_shapes, this->exp_shape) = params; auto op = this->make_gather(this->input_shapes); - auto axis_tensor = std::make_shared(element::i32, ov::Shape{1}, &this->axis_val); + auto axis_tensor = ov::Tensor(element::i32, ov::Shape{1}, &this->axis_val); this->output_shapes = {this->exp_shape}; unit_test::cpu_test_shape_infer(op.get(), this->input_shapes, this->output_shapes, {{2, axis_tensor}}); @@ -83,4 +83,3 @@ INSTANTIATE_TYPED_TEST_SUITE_P(CpuShapeInfer, CpuShapeInferenceGatherTest, Gathe } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/one_hot.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/one_hot.cpp index 6f5efcf329a89f..57e007c7d9a863 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/one_hot.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/one_hot.cpp @@ -72,15 +72,10 @@ TEST_P(OneHotCpuShapeInferenceTest , shape_inference_with_const_map) { int64_t axis = -1; const auto op = make_op(arg, depth, on, off, axis); - const auto depth_const = std::make_shared(element::i64, ov::Shape{}, std::vector{m_depth}); - const auto on_const = std::make_shared(element::i32, ov::Shape{}, std::vector{m_on}); - const auto off_const = std::make_shared(element::i32, ov::Shape{}, std::vector{m_off}); - const auto depth_tensor = std::make_shared(depth_const); - const auto on_tensor = std::make_shared(on_const); - const auto off_tensor = std::make_shared(off_const); - const std::map& constant_data = {{1, depth_tensor}, - {2, on_tensor}, - {3, off_tensor}}; + const auto depth_tensor = ov::Tensor(element::i64, ov::Shape{}, &m_depth); + const auto on_tensor = ov::Tensor(element::i32, ov::Shape{}, &m_on); + const auto off_tensor = ov::Tensor(element::i32, ov::Shape{}, &m_off); + const std::unordered_map constant_data = {{1, depth_tensor}, {2, on_tensor}, {3, off_tensor}}; unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, constant_data); } @@ -101,15 +96,10 @@ TEST_P(OneHotCpuShapeInferenceThrowExceptionTest, wrong_pattern) { int64_t axis = -1; const auto op = make_op(arg, depth, on, off, axis); - const auto depth_const = std::make_shared(element::i64, ov::Shape{}, std::vector{m_depth}); - const auto on_const = std::make_shared(element::i32, ov::Shape{}, std::vector{m_on}); - const auto off_const = std::make_shared(element::i32, ov::Shape{}, std::vector{m_off}); - const auto depth_tensor = std::make_shared(depth_const); - const auto on_tensor = std::make_shared(on_const); - const auto off_tensor = std::make_shared(off_const); - const std::map& constant_data = {{1, depth_tensor}, - {2, on_tensor}, - {3, off_tensor}}; + const auto depth_tensor = ov::Tensor(element::i64, ov::Shape{}, &m_depth); + const auto on_tensor = ov::Tensor(element::i32, ov::Shape{}, &m_on); + const auto off_tensor = ov::Tensor(element::i32, ov::Shape{}, &m_off); + const std::unordered_map constant_data = {{1, depth_tensor}, {2, on_tensor}, {3, off_tensor}}; // TODO , implementation should throw exception ASSERT_THROW(unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, constant_data), @@ -126,4 +116,3 @@ INSTANTIATE_TEST_SUITE_P( } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/prior_box.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/prior_box.cpp index 80726f1b54e249..5fb1b6f29d5530 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/prior_box.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/prior_box.cpp @@ -4,10 +4,11 @@ #include +#include + #include "common_test_utils/test_assertions.hpp" #include "custom_shape_infer.hpp" -#include -#include +#include "openvino/opsets/opset8.hpp" namespace ov { namespace intel_cpu { namespace unit_test { @@ -180,12 +181,8 @@ TEST_P(PriorBoxV0CpuShapeInferenceTest , shape_inference_with_const_map) { const auto image_shape = std::make_shared(element::i32, PartialShape::dynamic()); auto op = make_op(layer_shape, image_shape, attrs); - const auto layer_const = std::make_shared(element::i32, ov::Shape{2}, data[0]); - const auto image_const = std::make_shared(element::i32, ov::Shape{2}, data[1]); - const std::map const_data { - {0, std::make_shared(layer_const)}, - {1, std::make_shared(image_const)}, - }; + const std::unordered_map const_data{{0, {element::i32, ov::Shape{2}, data[0].data()}}, + {1, {element::i32, ov::Shape{2}, data[1].data()}}}; unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, const_data); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/prior_box_clustered.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/prior_box_clustered.cpp index e5b2e53e2db20c..08ac749f6d83b5 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/prior_box_clustered.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/prior_box_clustered.cpp @@ -144,12 +144,10 @@ TEST_P(PriorBoxClusteredV0CpuShapeInferenceTest , shape_inference_with_const_map const auto layer_shape = std::make_shared(element::i32, PartialShape::dynamic()); const auto image_shape = std::make_shared(element::i32, PartialShape::dynamic()); auto op = make_op(layer_shape, image_shape, attrs); - const auto layer_const = std::make_shared(element::i32, ov::Shape{2}, data[0]); - std::map const_data{{0, std::make_shared(layer_const)}}; + std::unordered_map const_data{{0, {element::i32, ov::Shape{2}, data[0].data()}}}; if (input_shapes.size() == 2) { - const auto image_const = std::make_shared(element::i32, ov::Shape{2}, data[1]); - const_data.insert({1, std::make_shared(image_const)}); + const_data.insert({1, {element::i32, ov::Shape{2}, data[1].data()}}); } unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, const_data); } @@ -171,4 +169,3 @@ INSTANTIATE_TEST_SUITE_P( } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/reshape.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/reshape.cpp index ed3f87b2e8ca9a..98687a6b0d1434 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/reshape.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/reshape.cpp @@ -63,9 +63,8 @@ TEST_P(ReshapeCpuShapeInferenceTest , shape_inference_with_const_map) { const auto axes_node = std::make_shared(element::i64, PartialShape::dynamic()); const auto op = make_op(arg, axes_node, specalZero); - const auto axes_const = std::make_shared(element::i64, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const std::unordered_map constant_data = {{1, axes_tensor}}; output_shapes.push_back(exp_shape); unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, constant_data); @@ -92,9 +91,8 @@ TEST_P(ReshapeCpuShapeInferenceThrowExceptionTest, wrong_pattern) { const auto axes_node = std::make_shared(element::i64, PartialShape::dynamic()); const auto op = make_op(arg, axes_node, specalZero); - const auto axes_const = std::make_shared(element::i64, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const std::unordered_map constant_data = {{1, axes_tensor}}; std::ostringstream os; os << "[cpu]reshape: the shape of input data "; os << "("; @@ -134,4 +132,3 @@ INSTANTIATE_TEST_SUITE_P( } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/squeeze.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/squeeze.cpp index 6f7dd89a2ef6fb..7de0e1a67edf0f 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/squeeze.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/squeeze.cpp @@ -57,9 +57,9 @@ TEST_P(SqueezeCpuShapeInferenceTest , shape_inference_with_const_map) { const auto axes_node = std::make_shared(element::i64, PartialShape::dynamic()); const auto op = make_op(arg, axes_node); - const auto axes_const = std::make_shared(element::i64, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = axes.empty() ? ov::Tensor(element::i64, ov::Shape{axes.size()}) + : ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const std::unordered_map constant_data = {{1, axes_tensor}}; unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, constant_data); } @@ -92,9 +92,8 @@ TEST_P(SqueezeCpuShapeInferenceThrowExceptionTest, wrong_pattern) { const auto axes_node = std::make_shared(element::i64, PartialShape::dynamic()); const auto op = make_op(arg, axes_node); - const auto axes_const = std::make_shared(element::i64, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const std::unordered_map constant_data = {{1, axes_tensor}}; std::ostringstream os; os << "[cpu]squeeze: the shape of input data "; os << "("; @@ -135,4 +134,3 @@ INSTANTIATE_TEST_SUITE_P( } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/strided_slice.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/strided_slice.cpp index b899d6d76a64b1..9a9fe23f00afbe 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/strided_slice.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/strided_slice.cpp @@ -78,15 +78,10 @@ TEST_P(StridedSliceCpuShapeInferenceTest , shape_inference_in_const_map) { const auto stride = std::make_shared(element::i32, input_shapes[3].get_shape()); const auto op = make_op(arg, begin, end, stride, begin_mask, end_mask); - const auto begin_const = std::make_shared(element::i32, input_shapes[1].get_shape(), data[BEGIN]); - const auto end_const = std::make_shared(element::i32, input_shapes[2].get_shape(), data[END]); - const auto stride_const = std::make_shared(element::i32, input_shapes[3].get_shape(), data[STRIDE]); - const auto begin_tensor = std::make_shared(begin_const); - const auto end_tensor = std::make_shared(end_const); - const auto stride_tensor = std::make_shared(stride_const); - const std::map& constant_data = {{1, begin_tensor}, - {2, end_tensor}, - {3, stride_tensor}}; + const auto begin_tensor = ov::Tensor(element::i32, input_shapes[1].get_shape(), data[BEGIN].data()); + const auto end_tensor = ov::Tensor(element::i32, input_shapes[2].get_shape(), data[END].data()); + const auto stride_tensor = ov::Tensor(element::i32, input_shapes[3].get_shape(), data[STRIDE].data()); + const std::unordered_map constant_data = {{1, begin_tensor}, {2, end_tensor}, {3, stride_tensor}}; // implementation depends on some output information of the op op->set_output_type(0, element::i32, {-1, -1, -1}); unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, constant_data); @@ -95,7 +90,7 @@ TEST_P(StridedSliceCpuShapeInferenceTest , shape_inference_in_const_map) { INSTANTIATE_TEST_SUITE_P( CpuShapeInfer, StridedSliceCpuShapeInferenceTest, - Values(make_tuple(unit_test::ShapeVector{{3, 4, 5}, {3}, {3}, {3}}, std::vector>{{100}, {-100}, {-1}}, + Values(make_tuple(unit_test::ShapeVector{{3, 4, 5}, {3}, {3}, {3}}, std::vector>{{100, 100, 100}, {-100, -100, -100}, {-1, -1, -1}}, std::vector(4, 0), std::vector(4, 0), StaticShape({3, 4, 5})), make_tuple(unit_test::ShapeVector{{3, 2, 3}, {3}, {3}, {3}}, std::vector>{{1, 0, 0}, {2, 1, 3}, {1, 1, 1}}, std::vector(4, 0), std::vector(4, 0), StaticShape({1, 1, 3})), @@ -133,4 +128,3 @@ TEST(CpuShapeInfer, StridedSliceDefault_stride) { } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/transpose.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/transpose.cpp index c31d9f7917fa38..e0ee0db80898fa 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/transpose.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/transpose.cpp @@ -79,9 +79,9 @@ TEST_P(TransposeCpuShapeInferenceThrowExceptionTest, shape_inference_in_const_ma const auto order = std::make_shared(element::i64, PartialShape::dynamic()); auto op = make_op(arg, order); - const auto axes = std::make_shared(element::i64, ov::Shape{transpose_order.size()}, transpose_order); - const auto const_tensor = std::make_shared(axes); - const std::map const_map = {{1, const_tensor}}; + const auto const_tensor = transpose_order.empty() ? ov::Tensor(element::i64, ov::Shape{transpose_order.size()}) + : ov::Tensor(element::i64, ov::Shape{transpose_order.size()}, transpose_order.data()); + const std::unordered_map const_map = {{1, const_tensor}}; OV_EXPECT_THROW(unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, const_map), ov::Exception, @@ -99,4 +99,3 @@ INSTANTIATE_TEST_SUITE_P( } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/unsqueeze.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/unsqueeze.cpp index c8cbf74874932e..ee484b101d7941 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/unsqueeze.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/unsqueeze.cpp @@ -57,9 +57,8 @@ TEST_P(UnsqueezeCpuShapeInferenceTest , shape_inference_with_const_map) { const auto axes_node = std::make_shared(element::i64, PartialShape::dynamic()); op = std::make_shared(arg, axes_node); - const auto axes_const = std::make_shared(element::i64, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const std::unordered_map constant_data = {{1, axes_tensor}}; output_shapes.push_back(exp_shape); unit_test::cpu_test_shape_infer(op.get(), input_shapes, output_shapes, constant_data); } @@ -95,9 +94,8 @@ TEST_P(UnsqueezeCpuShapeInferenceThrowExceptionTest, wrong_pattern) { const auto axes_node = std::make_shared(element::i64, PartialShape::dynamic()); const auto op = make_op(arg, axes_node); - const auto axes_const = std::make_shared(element::i64, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const std::unordered_map constant_data = {{1, axes_tensor}}; std::ostringstream os; os << "[cpu]unsqueeze: the shape of input data "; os << "("; @@ -135,4 +133,3 @@ INSTANTIATE_TEST_SUITE_P( } // namespace unit_test } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_convolution_shape_inference_test.cpp index ff7125b3150aa7..d897c45f024bd7 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_convolution_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_convolution_shape_inference_test.cpp @@ -55,7 +55,7 @@ TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, pads_same_lower_inputs_d op = make_op(data, offsets, filters, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2); input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 5, 5})); @@ -76,7 +76,7 @@ TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, pads_same_lower_inputs_d op = make_op(data, offsets, filters, masks, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2); input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}, {1, 18, 5, 5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 5, 5})); @@ -96,7 +96,7 @@ TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, pads_same_uper_inputs_st op = make_op(data, offsets, filters, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2); input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 5, 5})); @@ -117,7 +117,7 @@ TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, pads_same_upper_inputs_s op = make_op(data, offsets, filters, masks, strides, pads_begin, pads_end, dilations, auto_pad, 4, 2); input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}, {1, 18, 5, 5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 5, 5})); @@ -140,7 +140,7 @@ TEST_F(DeformableConvolutionV8StaticShapeInferenceTest, mask_channel_dimension_n input_shapes = ShapeVector{{1, 4, 5, 5}, {1, 36, 5, 5}, {4, 1, 3, 3}, {1, 17, 5, 5}}; OV_EXPECT_THROW( - shape_inference(op.get(), input_shapes, output_shapes), + shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr( "The channels dimension of mask input is not compatible with filters and 'deformable group' attribute")); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_psroi_pooling_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_psroi_pooling_shape_inference_test.cpp index f136e7cff9f208..5b50f597f4aac4 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_psroi_pooling_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/deformable_psroi_pooling_shape_inference_test.cpp @@ -1,10 +1,11 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include + #include #include "common_test_utils/test_assertions.hpp" -#include "gmock/gmock.h" #include "openvino/opsets/opset10.hpp" #include "utils.hpp" @@ -36,13 +37,13 @@ TEST_F(DeformablePSROIPoolingV1StaticShapeInferenceTest, default_ctor) { // 2 inputs { input_shapes = {StaticShape{2, 4, 8, 6}, StaticShape{rois_dim, 5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } // 3 inputs { input_shapes = {StaticShape{2, 4, 8, 6}, StaticShape{rois_dim, 5}, StaticShape{rois_dim, 20, group_size, group_size}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } } @@ -62,7 +63,7 @@ TEST_F(DeformablePSROIPoolingV1StaticShapeInferenceTest, no_offsets_input) { StaticShape expected_output{rois_dim, output_dim, group_size, group_size}; input_shapes = {StaticShape{2, 4, 8, 6}, StaticShape{rois_dim, 5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } @@ -82,6 +83,6 @@ TEST_F(DeformablePSROIPoolingV1StaticShapeInferenceTest, offsets_input) { StaticShape expected_output{rois_dim, output_dim, group_size, group_size}; input_shapes = {StaticShape{2, 4, 8, 6}, StaticShape{rois_dim, 5}, StaticShape{rois_dim, 20, group_size, group_size}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/depth_to_space_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/depth_to_space_shape_inference_test.cpp index d7fb9d9f4e676a..30a0621a9564a5 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/depth_to_space_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/depth_to_space_shape_inference_test.cpp @@ -24,7 +24,7 @@ TEST_F(DepthToSpaceV0StaticShapeInferenceTest, default_ctor) { const auto op = make_op(); op->set_block_size(2); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{1, 2, 2 * 3, 2 * 1080, 2 * 1616})); @@ -34,7 +34,7 @@ TEST_F(DepthToSpaceV0StaticShapeInferenceTest, block_first) { const auto data = std::make_shared(element::f32, PartialShape::dynamic(4)); const auto op = make_op(data, op_type::DepthToSpaceMode::BLOCKS_FIRST, 2); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{1, 2, 2 * 3, 2 * 1080, 2 * 1616})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/detection_output_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/detection_output_shape_inference_test.cpp index 146a4ec1f2ef37..23db7df4dba312 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/detection_output_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/detection_output_shape_inference_test.cpp @@ -48,7 +48,7 @@ TEST(StaticShapeInferenceTest, detection_output_v0_top_k) { StaticShape{4, 10}, StaticShape{4, 20}}; std::vector output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes[0], StaticShape({1, 1, 56, 7})); } @@ -74,7 +74,7 @@ TEST(StaticShapeInferenceTest, detection_output_v0_no_share_location) { StaticShape{4, 10}, StaticShape{4, 40}}; std::vector output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes[0], StaticShape({1, 1, 40, 7})); } @@ -98,7 +98,7 @@ TEST(StaticShapeInferenceTest, detection_output_v0_basic) { StaticShape{4, 10}, StaticShape{4, 20}}; std::vector output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes[0], (StaticShape{1, 1, 800, 7})); } @@ -118,7 +118,7 @@ TEST(StaticShapeInferenceTest, detection_output_v0_default_ctor) { StaticShape{4, 20}}; std::vector output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{1, 1, 800, 7})); } @@ -142,7 +142,7 @@ TEST(StaticShapeInferenceTest, detection_output_v8_top_k) { StaticShape{4, 10}, StaticShape{4, 20}}; std::vector output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes[0], StaticShape({1, 1, 56, 7})); } @@ -167,7 +167,7 @@ TEST(StaticShapeInferenceTest, detection_output_v8_no_share_location) { StaticShape{4, 10}, StaticShape{4, 40}}; std::vector output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes[0], StaticShape({1, 1, 40, 7})); } @@ -190,7 +190,7 @@ TEST(StaticShapeInferenceTest, detection_output_v8_basic) { StaticShape{4, 10}, StaticShape{4, 20}}; std::vector output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes[0], (StaticShape{1, 1, 800, 7})); } @@ -209,6 +209,6 @@ TEST(StaticShapeInferenceTest, detection_output_v8_default_ctor) { StaticShape{4, 20}}; std::vector output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{1, 1, 800, 7})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/einsum_shape_infernce_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/einsum_shape_infernce_test.cpp index 6fce6f5cd15db1..6113fdf8e13f72 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/einsum_shape_infernce_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/einsum_shape_infernce_test.cpp @@ -1,12 +1,14 @@ // Copyright (C) 2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include #include "openvino/op/einsum.hpp" #include "utils.hpp" using namespace ov; using namespace ov::intel_cpu; +using testing::ElementsAre; class EinsumStaticShapeInferenceTest : public OpStaticShapeInferenceTest {}; @@ -14,33 +16,38 @@ TEST_F(EinsumStaticShapeInferenceTest, dot_product) { auto inputs = OutputVector(2, std::make_shared(element::f32, ov::PartialShape::dynamic())); auto op = make_op(inputs, "i,i->"); - check_static_shape(op.get(), {StaticShape{3}, StaticShape{3}}, {StaticShape{}}); + output_shapes = shape_inference(op.get(), ShapeVector{{3}, {3}}); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{})); } TEST_F(EinsumStaticShapeInferenceTest, matmul) { auto inputs = OutputVector(2, std::make_shared(element::f32, ov::PartialShape::dynamic())); auto op = make_op(inputs, "ab,bc->ac"); - check_static_shape(op.get(), {StaticShape{2, 3}, StaticShape{3, 4}}, {StaticShape{2, 4}}); + output_shapes = shape_inference(op.get(), ShapeVector{{2, 3}, {3, 4}}); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{2, 4})); } TEST_F(EinsumStaticShapeInferenceTest, trace) { auto I1 = std::make_shared(element::f32, ov::PartialShape::dynamic()); auto op = make_op(OutputVector{I1}, "kii->k"); - check_static_shape(op.get(), {StaticShape{2, 3, 3}}, {StaticShape{2}}); + output_shapes = shape_inference(op.get(), ShapeVector{{2, 3, 3}}); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{2})); } TEST_F(EinsumStaticShapeInferenceTest, transpose) { auto I1 = std::make_shared(element::f32, ov::PartialShape::dynamic()); auto op = make_op(OutputVector{I1}, "ijk->kij"); - check_static_shape(op.get(), {StaticShape{1, 2, 3}}, {StaticShape{3, 1, 2}}); + output_shapes = shape_inference(op.get(), ShapeVector{{1, 2, 3}}); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{3, 1, 2})); } TEST_F(EinsumStaticShapeInferenceTest, multi_matmul) { auto inputs = OutputVector(3, std::make_shared(element::i32, ov::PartialShape::dynamic())); auto op = make_op(inputs, "ab,bcd,bc->ca"); - check_static_shape(op.get(), {StaticShape{2, 5}, StaticShape{5, 3, 6}, StaticShape{5, 3}}, {StaticShape{3, 2}}); + output_shapes = shape_inference(op.get(), ShapeVector{{2, 5}, {5, 3, 6}, {5, 3}}); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{3, 2})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/elementwises.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/elementwises.cpp index b77b1330ea4d31..e1ca0de7d211a8 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/elementwises.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/elementwises.cpp @@ -13,9 +13,8 @@ TEST(StaticShapeInferenceTest, UnaryEltwiseTest) { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); auto node = std::make_shared(data); - std::vector static_input_shapes = {StaticShape{3, 6, 5, 5}}, - static_output_shapes = {StaticShape{}}; - shape_inference(node.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{3, 6, 5, 5}}; + const auto static_output_shapes = shape_inference(node.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({3, 6, 5, 5})); } @@ -29,15 +28,12 @@ TEST(StaticShapeInferenceTest, FakeQuantizeTest) { auto node = std::make_shared(data, il, ih, ol, oh, 256); - std::vector static_input_shapes = { - StaticShape{3, 6, 3, 5}, - StaticShape{1, 3, 1}, - StaticShape{1}, - StaticShape{5}, - StaticShape{1, 1, 1, 1} - }, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{3, 6, 3, 5}, + StaticShape{1, 3, 1}, + StaticShape{1}, + StaticShape{5}, + StaticShape{1, 1, 1, 1}}; - shape_inference(node.get(), static_input_shapes, static_output_shapes); + const auto static_output_shapes = shape_inference(node.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({3, 6, 3, 5})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/embedding_segments_sum_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/embedding_segments_sum_test.cpp index 76eef8cdaaaa51..14c67c657958ff 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/embedding_segments_sum_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/embedding_segments_sum_test.cpp @@ -26,10 +26,9 @@ TEST_F(EmbeddingSegmentsSumV3StaticShapeInferenceTest, default_ctor) { input_shapes = {StaticShape{5, 2, 6}, StaticShape{4}, StaticShape{4}, StaticShape{}, StaticShape{}, StaticShape{4}}; int64_t num_segments = 4; - const auto const_map = - std::map{{3, std::make_shared(element::i64, Shape{}, &num_segments)}}; + const auto const_map = std::unordered_map{{3, {element::i64, Shape{}, &num_segments}}}; - shape_inference(op.get(), input_shapes, output_shapes, const_map); + output_shapes = shape_inference(op.get(), input_shapes, const_map); EXPECT_EQ(output_shapes[0], (StaticShape{4, 2, 6})); } @@ -43,7 +42,7 @@ TEST_F(EmbeddingSegmentsSumV3StaticShapeInferenceTest, constant_input) { auto op = make_op(emb_table, indices, segment_ids, num_segments, default_index, per_sample_weights); input_shapes = {StaticShape{5, 2, 6}, StaticShape{4}, StaticShape{4}, StaticShape{}, StaticShape{}, StaticShape{4}}, - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{3, 2, 6})); } @@ -59,10 +58,9 @@ TEST_F(EmbeddingSegmentsSumV3StaticShapeInferenceTest, constant_map) { input_shapes = {StaticShape{5, 2, 6}, StaticShape{4}, StaticShape{4}, StaticShape{}, StaticShape{}, StaticShape{4}}; int64_t num_segm_val = 3; - const auto const_map = - std::map{{3, std::make_shared(element::i64, Shape{}, &num_segm_val)}}; + const auto const_map = std::unordered_map{{3, {element::i64, Shape{}, &num_segm_val}}}; - shape_inference(op.get(), input_shapes, output_shapes, const_map); + output_shapes = shape_inference(op.get(), input_shapes, const_map); EXPECT_EQ(output_shapes[0], (StaticShape{3, 2, 6})); } @@ -76,12 +74,11 @@ TEST_F(EmbeddingSegmentsSumV3StaticShapeInferenceTest, basic) { auto op = make_op(emb_table, indices, segment_ids, num_segments, default_index, per_sample_weights); - check_static_shape( - op.get(), - {StaticShape{5, 2}, StaticShape{4}, StaticShape{4}, StaticShape{}, StaticShape{}, StaticShape{4}}, - {StaticShape{3, 2}}); + output_shapes = shape_inference(op.get(), ShapeVector{{5, 2}, {4}, {4}, {}, {}, {4}}); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{3, 2})); - check_static_shape(op.get(), - {StaticShape{5, 2}, StaticShape{4}, StaticShape{4}, 8, StaticShape{}, StaticShape{4}}, - {StaticShape{8, 2}}); + int64_t num_segm_val = 8; + const auto const_map = std::unordered_map{{3, {element::i64, Shape{}, &num_segm_val}}}; + output_shapes = shape_inference(op.get(), ShapeVector{{5, 2}, {4}, {4}, {}, {}, {4}}, const_map); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{8, 2})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/embeddingbag_offsets_sum_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/embeddingbag_offsets_sum_shape_inference_test.cpp index 76698a8a33f375..dac5ff8307f344 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/embeddingbag_offsets_sum_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/embeddingbag_offsets_sum_shape_inference_test.cpp @@ -1,12 +1,12 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include + #include #include "common_test_utils/test_assertions.hpp" #include "embeddingbag_offsets_shape_inference.hpp" - -#include "gmock/gmock.h" #include "openvino/opsets/opset10.hpp" #include "utils.hpp" @@ -31,19 +31,19 @@ TEST_F(EmbeddingBagOffsetsSumV3StaticShapeInferenceTest, default_ctor) { // 3 inputs { input_shapes = {StaticShape{3, 4, 5, 6}, StaticShape{2}, StaticShape{batch}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } // 4 inputs { input_shapes = {StaticShape{3, 4, 5, 6}, StaticShape{2}, StaticShape{batch}, StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } // 5 inputs { input_shapes = {StaticShape{3, 4, 5, 6}, StaticShape{2}, StaticShape{batch}, StaticShape{}, StaticShape{2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } } @@ -58,7 +58,7 @@ TEST_F(EmbeddingBagOffsetsSumV3StaticShapeInferenceTest, basic_3in) { auto expected_output = StaticShape{3, 2, 6}; input_shapes = {StaticShape{5, 2, 6}, StaticShape{4}, StaticShape{3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } @@ -73,7 +73,7 @@ TEST_F(EmbeddingBagOffsetsSumV3StaticShapeInferenceTest, basic_4in) { auto expected_output = StaticShape{3, 2, 6}; input_shapes = {StaticShape{5, 2, 6}, StaticShape{4}, StaticShape{3}, StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } @@ -89,6 +89,6 @@ TEST_F(EmbeddingBagOffsetsSumV3StaticShapeInferenceTest, basic_5in) { auto expected_output = StaticShape{3, 2, 6}; input_shapes = {StaticShape{5, 2, 6}, StaticShape{4}, StaticShape{3}, StaticShape{}, StaticShape{4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/embeddingbag_packed_sum_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/embeddingbag_packed_sum_shape_inference_test.cpp index b79b6b92f5ea15..9b90704c42ac6c 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/embeddingbag_packed_sum_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/embeddingbag_packed_sum_shape_inference_test.cpp @@ -1,11 +1,12 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include + #include #include "common_test_utils/test_assertions.hpp" #include "embeddingbag_packed_shape_inference.hpp" -#include "gmock/gmock.h" #include "openvino/opsets/opset10.hpp" #include "utils.hpp" @@ -30,13 +31,13 @@ TEST_F(EmbeddingBagPackedSumV3StaticShapeInferenceTest, default_ctor) { // 2 inputs { input_shapes = {StaticShape{3, 4, 5, 6}, StaticShape{batch, 2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } // 3 inputs { input_shapes = {StaticShape{3, 4, 5, 6}, StaticShape{batch, 2}, StaticShape{batch, 2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], expected_output); } } @@ -49,7 +50,7 @@ TEST_F(EmbeddingBagPackedSumV3StaticShapeInferenceTest, basic_2in) { auto op = make_op(emb_table, indices); input_shapes = {StaticShape{5, 2, 6}, StaticShape{3, 4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{3, 2, 6})); } @@ -61,6 +62,6 @@ TEST_F(EmbeddingBagPackedSumV3StaticShapeInferenceTest, basic_3in) { auto op = make_op(emb_table, indices, per_sample_weights); input_shapes = {StaticShape{5, 2, 6}, StaticShape{3, 4}, StaticShape{3, 4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{3, 2, 6})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_detection_output_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_detection_output_shape_inference_test.cpp index 8238885da10888..75af42b9596937 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_detection_output_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_detection_output_shape_inference_test.cpp @@ -30,7 +30,7 @@ TEST_F(ExperimentalDetectronDetectionOutputV6StaticShapeInferenceTest, default_c op->set_attrs({.05f, .5f, 4.1352f, 12, 20, 7, false, {10.0f, 10.0f, 5.0f, 5.0f}}); input_shapes = ShapeVector{{10, 4}, {10, 48}, {10, 12}, {1, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes, ShapeVector({{7, 4}, {7}, {7}})); } @@ -43,7 +43,7 @@ TEST_F(ExperimentalDetectronDetectionOutputV6StaticShapeInferenceTest, inputs_dy op = make_op(rois, deltas, scores, im_info, make_attrs()); input_shapes = ShapeVector{{10, 4}, {10, 40}, {10, 10}, {1, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes, ShapeVector({{5, 4}, {5}, {5}})); } @@ -56,7 +56,7 @@ TEST_F(ExperimentalDetectronDetectionOutputV6StaticShapeInferenceTest, inputs_st op = make_op(rois, deltas, scores, im_info, make_attrs()); input_shapes = ShapeVector{{10, 4}, {10, 40}, {10, 10}, {1, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes, ShapeVector({{5, 4}, {5}, {5}})); } @@ -69,7 +69,7 @@ TEST_F(ExperimentalDetectronDetectionOutputV6StaticShapeInferenceTest, im_info_b op = make_op(rois, deltas, scores, im_info, make_attrs()); input_shapes = ShapeVector{{10, 4}, {10, 40}, {10, 10}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Input image info shape must be compatible with [1,3]")); } @@ -82,7 +82,7 @@ TEST_F(ExperimentalDetectronDetectionOutputV6StaticShapeInferenceTest, deltas_no op = make_op(rois, deltas, scores, im_info, make_attrs()); input_shapes = ShapeVector{{10, 4}, {10, 40, 1}, {10, 10}, {1, 3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Input deltas rank must be equal to 2")); } @@ -96,7 +96,7 @@ TEST_F(ExperimentalDetectronDetectionOutputV6StaticShapeInferenceTest, rois_1st_ input_shapes = ShapeVector{{9, 4}, {10, 40}, {10, 10}, {1, 3}}; OV_EXPECT_THROW( - shape_inference(op.get(), input_shapes, output_shapes), + shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The first dimension of inputs 'input_rois', 'input_deltas', 'input_scores' must be the compatible")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_generate_proposal_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_generate_proposal_shape_inference_test.cpp index 288ca8b5867fbe..60921e07627927 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_generate_proposal_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_generate_proposal_shape_inference_test.cpp @@ -29,7 +29,7 @@ TEST_F(ExperimentalDetectronGenerateProposalsSingleImageV6StaticShapeInferenceTe op->set_attrs({0.0f, 0.0f, 100, 0}); input_shapes = ShapeVector{{3}, {12, 4}, {3, 12, 15}, {5, 12, 15}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes, ShapeVector({{100, 4}, {100}})); } @@ -42,7 +42,7 @@ TEST_F(ExperimentalDetectronGenerateProposalsSingleImageV6StaticShapeInferenceTe op = make_op(im_info, anchors, deltas, scores, make_attrs(100)); input_shapes = ShapeVector{{3}, {12, 4}, {3, 12, 15}, {5, 12, 15}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes, ShapeVector({{100, 4}, {100}})); } @@ -55,7 +55,7 @@ TEST_F(ExperimentalDetectronGenerateProposalsSingleImageV6StaticShapeInferenceTe op = make_op(im_info, anchors, deltas, scores, make_attrs(1000)); input_shapes = ShapeVector{{3}, {12, 4}, {3, 120, 15}, {5, 120, 15}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes, ShapeVector({{1000, 4}, {1000}})); } @@ -68,7 +68,7 @@ TEST_F(ExperimentalDetectronGenerateProposalsSingleImageV6StaticShapeInferenceTe op = make_op(im_info, anchors, deltas, scores, make_attrs(40)); input_shapes = ShapeVector{{4}, {12, 4}, {3, 120, 15}, {5, 120, 15}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The 'input_im_info' shape is expected to be a compatible with [3]")); } @@ -81,7 +81,7 @@ TEST_F(ExperimentalDetectronGenerateProposalsSingleImageV6StaticShapeInferenceTe op = make_op(im_info, anchors, deltas, scores, make_attrs(40)); input_shapes = ShapeVector{{3}, {12, 4}, {3, 120, 15, 1}, {5, 120, 15}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The 'input_deltas' input is expected to be a 3D")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference_test.cpp index cb1a39f1fba7cb..7a4bd8b00cff80 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference_test.cpp @@ -30,7 +30,7 @@ TEST_F(ExperimentalDetectronPriorGridGeneratorV6StaticShapeInferenceTest, defaul op->set_attrs({true, 0, 0, 5.0f, 5.0f}); input_shapes = ShapeVector{{3, 4}, {1, 5, 7, 2}, {1, 5, 50, 50}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes, ShapeVector({{42, 4}})); } @@ -42,7 +42,7 @@ TEST_F(ExperimentalDetectronPriorGridGeneratorV6StaticShapeInferenceTest, inputs op = make_op(priors, feat_map, im_data, make_attrs(false)); input_shapes = ShapeVector{{10, 4}, {1, 2, 4, 5}, {1, 2, 100, 100}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({4, 5, 10, 4})); @@ -55,7 +55,7 @@ TEST_F(ExperimentalDetectronPriorGridGeneratorV6StaticShapeInferenceTest, inputs op = make_op(priors, feat_map, im_data, make_attrs(true)); input_shapes = ShapeVector{{10, 4}, {1, 2, 4, 5}, {1, 2, 100, 100}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({200, 4})); @@ -68,7 +68,7 @@ TEST_F(ExperimentalDetectronPriorGridGeneratorV6StaticShapeInferenceTest, feat_m op = make_op(priors, feat_map, im_data, make_attrs(true)); input_shapes = ShapeVector{{10, 4}, {1, 2, 4, 5, 1}, {1, 2, 100, 100}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Feature_map rank must be equal to 4")); } @@ -80,7 +80,7 @@ TEST_F(ExperimentalDetectronPriorGridGeneratorV6StaticShapeInferenceTest, priors op = make_op(priors, feat_map, im_data, make_attrs(true)); input_shapes = ShapeVector{{10, 5}, {1, 2, 4, 5}, {1, 2, 100, 100}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The last dimension of the 'priors' input must be equal to 4")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_roi_feature_extractor_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_roi_feature_extractor_shape_inference_test.cpp index 51dd87f1b1a433..26ecd6c4d4783a 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_roi_feature_extractor_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_roi_feature_extractor_shape_inference_test.cpp @@ -28,7 +28,7 @@ TEST_F(ExperimentalDetectronROIFeatureExtractorV6StaticShapeInferenceTest, defau op->set_attrs(make_attrs(16)); input_shapes = ShapeVector{{1000, 4}, {1, 5, 8, 8}, {1, 5, 16, 16}, {1, 5, 64, 64}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_THAT(output_shapes, ElementsAre(StaticShape{1000, 5, 16, 16}, StaticShape{1000, 4})); } @@ -40,7 +40,7 @@ TEST_F(ExperimentalDetectronROIFeatureExtractorV6StaticShapeInferenceTest, input op = make_op(OutputVector{rois, layer_0, layer_1}, make_attrs(100)); input_shapes = ShapeVector{{25, 4}, {1, 2, 100, 100}, {1, 2, 20, 300}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_THAT(output_shapes, ElementsAre(StaticShape{25, 2, 100, 100}, StaticShape{25, 4})); } @@ -54,7 +54,7 @@ TEST_F(ExperimentalDetectronROIFeatureExtractorV6StaticShapeInferenceTest, input op = make_op(OutputVector{rois, layer_0, layer_1, layer_2, layer_3}, make_attrs(15)); input_shapes = ShapeVector{{25, 4}, {1, 2, 100, 100}, {1, 2, 20, 300}, {1, 2, 30, 30}, {1, 2, 200, 50}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_THAT(output_shapes, ElementsAre(StaticShape{25, 2, 15, 15}, StaticShape{25, 4})); } @@ -67,7 +67,7 @@ TEST_F(ExperimentalDetectronROIFeatureExtractorV6StaticShapeInferenceTest, rois_ op = make_op(OutputVector{rois, layer_0, layer_1, layer_2}, make_attrs(15)); input_shapes = ShapeVector{{25, 4, 1}, {1, 2, 20, 300}, {1, 2, 30, 30}, {1, 2, 200, 50}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Input rois rank must be equal to 2")); } @@ -80,7 +80,7 @@ TEST_F(ExperimentalDetectronROIFeatureExtractorV6StaticShapeInferenceTest, layer op = make_op(OutputVector{rois, layer_0, layer_1, layer_2}, make_attrs(15)); input_shapes = ShapeVector{{25, 4}, {1, 2, 20, 300}, {1, 2, 30, 30}, {1, 3, 200, 50}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The number of channels must be the same for all layers of the pyramid")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_topkrois_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_topkrois_shape_inference_test.cpp index b2bfc2a34373ac..f797a2fcc675c6 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_topkrois_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/experimental_detectron_topkrois_shape_inference_test.cpp @@ -25,7 +25,7 @@ TEST_F(ExperimentalDetectronTopKROIsV6StaticShapeInferenceTest, default_ctor) { op->set_max_rois(100); input_shapes = ShapeVector{{12, 4}, {12}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({100, 4})); @@ -37,7 +37,7 @@ TEST_F(ExperimentalDetectronTopKROIsV6StaticShapeInferenceTest, inputs_dynamic_r op = make_op(input_rois, rois_probs, 5); input_shapes = ShapeVector{{10, 4}, {10}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({5, 4})); @@ -49,7 +49,7 @@ TEST_F(ExperimentalDetectronTopKROIsV6StaticShapeInferenceTest, inputs_static_ra op = make_op(input_rois, rois_probs, 15); input_shapes = ShapeVector{{100, 4}, {100}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({15, 4})); @@ -62,7 +62,7 @@ TEST_F(ExperimentalDetectronTopKROIsV6StaticShapeInferenceTest, input_rois_not_2 input_shapes = ShapeVector{{10, 4, 10}, {10}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The 'input_rois' input is expected to be a 2D.")); } @@ -74,7 +74,7 @@ TEST_F(ExperimentalDetectronTopKROIsV6StaticShapeInferenceTest, rois_prob_not_1d input_shapes = ShapeVector{{10, 4}, {10, 2}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The 'rois_probs' input is expected to be a 1D.")); } @@ -86,7 +86,7 @@ TEST_F(ExperimentalDetectronTopKROIsV6StaticShapeInferenceTest, input_rois_secon input_shapes = ShapeVector{{10, 5}, {10}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The second dimension of 'input_rois' should be 4.")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/extract_image_patches_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/extract_image_patches_shape_inference_test.cpp index 11d5a24d8bbd00..b2c3704337cd24 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/extract_image_patches_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/extract_image_patches_shape_inference_test.cpp @@ -25,7 +25,7 @@ TEST_F(StaticShapeExtractImagePatchesV3Test, default_ctor_no_args) { op->set_auto_pad(pad_type); input_shapes = ShapeVector{{10, 8, 12, 6}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10, 72, 2, 1})); @@ -36,7 +36,7 @@ TEST_F(StaticShapeExtractImagePatchesV3Test, data_input_is_dynamic_rank) { op = make_op(data, ov::Shape{3, 3}, ov::Strides{5, 5}, ov::Shape{2, 2}, op::PadType::VALID); input_shapes = ShapeVector{{2, 2, 23, 24}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 18, 4, 4})); @@ -47,7 +47,7 @@ TEST_F(StaticShapeExtractImagePatchesV3Test, data_input_is_static_rank) { op = make_op(data, ov::Shape{3, 3}, ov::Strides{5, 5}, ov::Shape{1, 1}, op::PadType::SAME_UPPER); input_shapes = ShapeVector{{2, 2, 43, 34}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 18, 9, 7})); @@ -57,7 +57,7 @@ TEST_F(StaticShapeExtractImagePatchesV3Test, data_shape_not_compatible_rank_4) { const auto data = std::make_shared(element::f32, ov::PartialShape::dynamic(4)); op = make_op(data, ov::Shape{3, 3}, ov::Strides{5, 5}, ov::Shape{1, 1}, op::PadType::SAME_UPPER); - OV_EXPECT_THROW(shape_inference(op.get(), ShapeVector{{2, 20, 12, 24, 1}}, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), ShapeVector{{2, 20, 12, 24, 1}}), NodeValidationFailure, HasSubstr("input tensor must be 4D tensor")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/eye_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/eye_shape_inference_test.cpp index d2dcd363403fdc..d848d3c04d1582 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/eye_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/eye_shape_inference_test.cpp @@ -2,10 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include #include "common_test_utils/test_assertions.hpp" -#include "gmock/gmock.h" #include "openvino/opsets/opset10.hpp" #include "utils.hpp" @@ -30,7 +31,7 @@ TEST_F(EyeV9StaticShapeInferenceTest, parameters_as_constant) { const auto op = make_op(rows, cols, diag, batch, element::f64); input_shapes = ShapeVector{rows->get_shape(), cols->get_shape(), diag->get_shape(), batch->get_shape()}; - shape_inference(op.get(), input_shapes, output_shapes, {}); + const auto output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 5, 4})); @@ -46,13 +47,12 @@ TEST_F(EyeV9StaticShapeInferenceTest, parameters_in_const_data_map) { int32_t rows = 3, cols = 8; auto batch = std::array{2, 4, 1}; - const auto const_data = - std::map{{0, std::make_shared(element::i32, Shape{}, &rows)}, - {1, std::make_shared(element::i32, Shape{1}, &cols)}, - {3, std::make_shared(element::i32, Shape{3}, batch.data())}}; + const auto const_data = std::unordered_map{{0, {element::i32, Shape{}, &rows}}, + {1, {element::i32, Shape{1}, &cols}}, + {3, {element::i32, Shape{3}, batch.data()}}}; input_shapes = ShapeVector{{}, {1}, {1}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 4, 1, 3, 8})); @@ -69,13 +69,13 @@ TEST_F(EyeV9StaticShapeInferenceTest, assert_on_negative_rows) { int64_t rows = -3, cols = 8; auto batch = std::array{2, 4, 1}; const auto const_data = - std::map{{0, std::make_shared(element::i64, Shape{}, &rows)}, - {1, std::make_shared(element::i64, Shape{1}, &cols)}, - {3, std::make_shared(element::i32, Shape{3}, batch.data())}}; + std::unordered_map{{0, {element::i32, Shape{}, &rows}}, + {1, {element::i32, Shape{1}, &cols}}, + {3, {element::i32, Shape{batch.size()}, batch.data()}}}; input_shapes = ShapeVector{{}, {1}, {1}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_data), AssertFailure, HasSubstr("Value -3 not in range [0:")); } @@ -91,13 +91,13 @@ TEST_F(EyeV9StaticShapeInferenceTest, assert_on_negative_columns) { int64_t rows = 3, cols = -8; auto batch = std::array{2, 4, 1}; const auto const_data = - std::map{{0, std::make_shared(element::i64, Shape{}, &rows)}, - {1, std::make_shared(element::i64, Shape{1}, &cols)}, - {3, std::make_shared(element::i32, Shape{3}, batch.data())}}; + std::unordered_map{{0, {element::i32, Shape{}, &rows}}, + {1, {element::i32, Shape{1}, &cols}}, + {3, {element::i32, Shape{batch.size()}, batch.data()}}}; input_shapes = ShapeVector{{}, {1}, {1}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_data), AssertFailure, HasSubstr("Value -8 not in range [0:")); } @@ -113,14 +113,14 @@ TEST_F(EyeV9StaticShapeInferenceTest, assert_on_rows_not_1D) { int64_t cols = 8; auto rows = std::array{2, 1}; auto batch = std::array{2, 4, 1}; - const auto const_data = std::map{ - {0, std::make_shared(element::i64, Shape{rows.size()}, rows.data())}, - {1, std::make_shared(element::i64, Shape{1}, &cols)}, - {3, std::make_shared(element::i32, Shape{batch.size()}, batch.data())}}; + const auto const_data = + std::unordered_map{{0, {element::i32, Shape{rows.size()}, &rows}}, + {1, {element::i32, Shape{1}, &cols}}, + {3, {element::i32, Shape{batch.size()}, batch.data()}}}; input_shapes = ShapeVector{{}, {1}, {1}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_data), NodeValidationFailure, HasSubstr("'num_rows' value must be a scalar or 1D tensor. Got:")); } @@ -136,14 +136,14 @@ TEST_F(EyeV9StaticShapeInferenceTest, assert_on_columns_not_1D) { int64_t rows = 8; auto cols = std::array{2, 1}; auto batch = std::array{2, 4, 1}; - const auto const_data = std::map{ - {0, std::make_shared(element::i64, Shape{}, &rows)}, - {1, std::make_shared(element::i64, Shape{cols.size()}, cols.data())}, - {3, std::make_shared(element::i32, Shape{batch.size()}, batch.data())}}; + const auto const_data = + std::unordered_map{{0, {element::i32, Shape{}, &rows}}, + {1, {element::i32, Shape{cols.size()}, &cols}}, + {3, {element::i32, Shape{batch.size()}, batch.data()}}}; input_shapes = ShapeVector{{1}, {}, {1}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_data), NodeValidationFailure, HasSubstr("'num_columns' value must be a scalar or 1D tensor. Got:")); } @@ -158,14 +158,14 @@ TEST_F(EyeV9StaticShapeInferenceTest, assert_on_batch_shape_not_match_shape_in_c int64_t rows = 8, cols = 5; auto batch = std::array{2, 4, 1}; - const auto const_data = std::map{ - {0, std::make_shared(element::i64, Shape{}, &rows)}, - {1, std::make_shared(element::i64, Shape{}, &cols)}, - {3, std::make_shared(element::i32, Shape{batch.size()}, batch.data())}}; + const auto const_data = + std::unordered_map{{0, {element::i32, Shape{}, &rows}}, + {1, {element::i32, Shape{}, &cols}}, + {3, {element::i32, Shape{batch.size()}, batch.data()}}}; input_shapes = ShapeVector{{}, {}, {}, {2}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_data), NodeValidationFailure, HasSubstr("Check 'static_cast(batch_shape[0].get_length()) == " "static_cast(batch_as_shape->rank().get_length())'")); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/fft_base_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/fft_base_shape_inference_test.cpp index d597461d36d636..d333988672d304 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/fft_base_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/fft_base_shape_inference_test.cpp @@ -49,108 +49,96 @@ static std::shared_ptr build_idft_signal() { TEST(StaticShapeInferenceTest, DFTTest) { auto DFT = build_dft(); - std::map> constant_data; int32_t axes_val[] = {1, 2}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}}; - std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}}; - shape_inference(DFT.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(DFT.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 320, 320, 2})); } TEST(StaticShapeInferenceTest, DFTSignalTest) { auto DFT = build_dft_signal(); - std::map> constant_data; + int32_t axes_val[] = {1, 2}; int32_t signal_val[] = {512, 100}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); - constant_data[2] = - std::make_shared(ngraph::element::Type_t::i32, Shape{2}, signal_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}, + {2, {element::i32, ov::Shape{2}, signal_val}}}; - std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}; - shape_inference(DFT.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(DFT.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 512, 100, 2})); } TEST(StaticShapeInferenceTest, DFTConstantTest) { auto DFT = build_dft_constant(); - std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}; - shape_inference(DFT.get(), static_input_shapes, static_output_shapes); + const auto static_output_shapes = shape_inference(DFT.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 512, 100, 2})); } TEST(StaticShapeInferenceTest, DFTSignalMissingConstDataTest) { auto DFT = build_dft_signal(); - std::map> constant_data; + int32_t axes_val[] = {1, 2}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}}; - std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(DFT.get(), static_input_shapes, static_output_shapes, constant_data), - NodeValidationFailure); + std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}; + + EXPECT_THROW(shape_inference(DFT.get(), static_input_shapes, constant_data), NodeValidationFailure); } TEST(StaticShapeInferenceTest, IDFTTest) { auto IDFT = build_idft(); - std::map> constant_data; int32_t axes_val[] = {1, 2}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}}; - std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}}; - shape_inference(IDFT.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(IDFT.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 320, 320, 2})); } TEST(StaticShapeInferenceTest, IDFTSignalTest) { auto IDFT = build_idft_signal(); - std::map> constant_data; + int32_t axes_val[] = {1, 2}; int32_t signal_val[] = {512, 100}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); - constant_data[2] = - std::make_shared(ngraph::element::Type_t::i32, Shape{2}, signal_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}, + {2, {element::i32, ov::Shape{2}, signal_val}}}; - std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}; - shape_inference(IDFT.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(IDFT.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 512, 100, 2})); } TEST(StaticShapeInferenceTest, IDFTSignalMissingConstDataTest) { auto IDFT = build_idft_signal(); - std::map> constant_data; + int32_t axes_val[] = {1, 2}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}}; std::vector static_input_shapes = {StaticShape{1, 320, 320, 2}, StaticShape{2}, StaticShape{2}}, static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(IDFT.get(), static_input_shapes, static_output_shapes, constant_data), - NodeValidationFailure); + EXPECT_THROW(shape_inference(IDFT.get(), static_input_shapes, constant_data), NodeValidationFailure); } TEST(StaticShapeInferenceTest, RDFT) { auto input_shape = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); auto axes = std::make_shared(element::i32, PartialShape::dynamic()); auto RDFT = std::make_shared(input_shape, axes); - std::map> constant_data; + int32_t axes_val[] = {2, 3}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}}; - std::vector static_input_shapes = {StaticShape{1, 120, 64, 64}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 120, 64, 64}, StaticShape{2}}; - shape_inference(RDFT.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(RDFT.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 120, 64, 33, 2})); } @@ -159,17 +147,15 @@ TEST(StaticShapeInferenceTest, RDFTWithSignalSizes) { auto axes = std::make_shared(element::i32, PartialShape::dynamic()); auto signal = std::make_shared(element::i32, PartialShape::dynamic()); auto RDFT = std::make_shared(input_shape, axes, signal); - std::map> constant_data; + int32_t axes_val[] = {2, 3}; int32_t signal_val[] = {40, 30}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); - constant_data[2] = - std::make_shared(ngraph::element::Type_t::i32, Shape{2}, signal_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}, + {2, {element::i32, ov::Shape{2}, signal_val}}}; - std::vector static_input_shapes = {StaticShape{1, 120, 64, 64}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 120, 64, 64}, StaticShape{2}, StaticShape{2}}; - shape_inference(RDFT.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(RDFT.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 120, 40, 16, 2})); } @@ -179,10 +165,9 @@ TEST(StaticShapeInferenceTest, RDFTWithConstAxesAndSignalSizes) { auto signal = std::make_shared(element::i32, Shape{2}, std::vector{64, 64}); auto RDFT = std::make_shared(input_shape, axes, signal); - std::vector static_input_shapes = {StaticShape{1, 120, 64, 64}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 120, 64, 64}, StaticShape{2}, StaticShape{2}}; - shape_inference(RDFT.get(), static_input_shapes, static_output_shapes); + const auto static_output_shapes = shape_inference(RDFT.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 120, 64, 33, 2})); } @@ -191,28 +176,25 @@ TEST(StaticShapeInferenceTest, RDFTMissingSignalTensor) { auto axes = std::make_shared(element::i32, PartialShape::dynamic()); auto signal = std::make_shared(element::i32, PartialShape::dynamic()); auto RDFT = std::make_shared(input_shape, axes, signal); - std::map> constant_data; + int32_t axes_val[] = {2, 3}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}}; - std::vector static_input_shapes = {StaticShape{1, 120, 64, 64}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(RDFT.get(), static_input_shapes, static_output_shapes, constant_data), - NodeValidationFailure); + std::vector static_input_shapes = {StaticShape{1, 120, 64, 64}, StaticShape{2}, StaticShape{2}}; + EXPECT_THROW(shape_inference(RDFT.get(), static_input_shapes, constant_data), NodeValidationFailure); } TEST(StaticShapeInferenceTest, IRDFT) { auto input_shape = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1, -1}); auto axes = std::make_shared(element::i32, PartialShape::dynamic()); auto IRDFT = std::make_shared(input_shape, axes); - std::map> constant_data; + int32_t axes_val[] = {2, 3}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}}; - std::vector static_input_shapes = {StaticShape{1, 120, 64, 33, 2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 120, 64, 33, 2}, StaticShape{2}}; - shape_inference(IRDFT.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(IRDFT.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 120, 64, 64})); } @@ -221,17 +203,15 @@ TEST(StaticShapeInferenceTest, IRDFTWithSignalSizes) { auto axes = std::make_shared(element::i32, PartialShape::dynamic()); auto signal = std::make_shared(element::i32, PartialShape::dynamic()); auto IRDFT = std::make_shared(input_shape, axes, signal); - std::map> constant_data; + int32_t axes_val[] = {2, 3}; int32_t signal_val[] = {64, 64}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); - constant_data[2] = - std::make_shared(ngraph::element::Type_t::i32, Shape{2}, signal_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}, + {2, {element::i32, ov::Shape{2}, signal_val}}}; - std::vector static_input_shapes = {StaticShape{1, 120, 64, 33, 2}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{1, 120, 64, 33, 2}, StaticShape{2}, StaticShape{2}}; - shape_inference(IRDFT.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(IRDFT.get(), static_input_shapes, constant_data); ASSERT_EQ(static_output_shapes[0], StaticShape({1, 120, 64, 64})); } @@ -240,12 +220,10 @@ TEST(StaticShapeInferenceTest, IRDFTMissingSignalSizesTensor) { auto axes = std::make_shared(element::i32, PartialShape::dynamic()); auto signal = std::make_shared(element::i32, PartialShape::dynamic()); auto IRDFT = std::make_shared(input_shape, axes, signal); - std::map> constant_data; + int32_t axes_val[] = {2, 3}; - constant_data[1] = std::make_shared(ngraph::element::Type_t::i32, Shape{2}, axes_val); + auto constant_data = std::unordered_map{{1, {element::i32, ov::Shape{2}, axes_val}}}; - std::vector static_input_shapes = {StaticShape{1, 120, 64, 33, 2}, StaticShape{2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; - EXPECT_THROW(shape_inference(IRDFT.get(), static_input_shapes, static_output_shapes, constant_data), - NodeValidationFailure); + std::vector static_input_shapes = {StaticShape{1, 120, 64, 33, 2}, StaticShape{2}, StaticShape{2}}; + EXPECT_THROW(shape_inference(IRDFT.get(), static_input_shapes, constant_data), NodeValidationFailure); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_elements_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_elements_shape_inference_test.cpp index 8d5f6ce0eef5e1..c1fc73c1bea49d 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_elements_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_elements_shape_inference_test.cpp @@ -23,7 +23,7 @@ TEST_F(GatherElementsStaticShapeInferenceTest, GatherElements_basic) { input_shapes = {StaticShape{300, 3, 10, 2}, StaticShape{300, 3, 10, 33333}}; output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{300, 3, 10, 33333})); } @@ -35,7 +35,7 @@ TEST_F(GatherElementsStaticShapeInferenceTest, GatherElements_incompatible_rank) op = make_op(data, indices, axis); input_shapes = {StaticShape{1, 2, 3, 4, 5}, StaticShape{1, 2, 3, 4}}; output_shapes = {StaticShape{}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), ov::NodeValidationFailure, HasSubstr("rank must be equal")); } @@ -48,7 +48,7 @@ TEST_F(GatherElementsStaticShapeInferenceTest, GatherElements_incompatible_dims) op = make_op(data, indices, axis); input_shapes = {StaticShape{300, 4, 10, 2}, StaticShape{300, 5, 10, 33333}}; output_shapes = {StaticShape{}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), ov::NodeValidationFailure, HasSubstr("are not consistent")); } @@ -60,6 +60,6 @@ TEST_F(GatherElementsStaticShapeInferenceTest, GatherElements_default_constructo input_shapes = {StaticShape{300, 3, 10, 2}, StaticShape{300, 3, 10, 33333}}; output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{300, 3, 10, 33333})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_nd_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_nd_shape_inference_test.cpp index f3cf457802b7cd..0f017e18de2491 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_nd_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_nd_shape_inference_test.cpp @@ -34,8 +34,7 @@ template void run_gather_nd_test(const GatherNDTestParams& test_params) { auto op = make_gather_nd(test_params.batch_dims); - ShapeVector output_shapes(1); - shape_inference(op.get(), test_params.input_shapes, output_shapes); + auto output_shapes = shape_inference(op.get(), test_params.input_shapes); EXPECT_EQ(output_shapes[0], test_params.exp_shape) << "Failed for input shapes: " << ov::util::vector_to_string(test_params.input_shapes) @@ -105,7 +104,7 @@ TYPED_TEST_P(StaticShapeInferenceGatherNDTest, gather_nd_common_default_ctor) { ShapeVector input_shapes{{8, 3, 11, 12}, {8, 5, 2}}; ShapeVector output_shapes(1); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{8, 5, 12})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_shape_inference_test.cpp index 40eb4bf2d3dfc6..9c866155950610 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_shape_inference_test.cpp @@ -4,12 +4,11 @@ #include -#include -#include -#include -#include -#include - +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/util/common_util.hpp" +#include "shape_inference/shape_inference.hpp" #include "utils.hpp" using namespace ov; @@ -61,7 +60,7 @@ TYPED_TEST_P(StaticShapeInferenceGatherTest, axis_const) { auto op = this->make_gather(this->input_shapes, &this->axis_val); - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), this->exp_shape) << "Failed for axis: " << this->axis_val @@ -74,9 +73,9 @@ TYPED_TEST_P(StaticShapeInferenceGatherTest, axis_in_const_map) { std::tie(this->axis_val, this->input_shapes, this->exp_shape) = params; auto op = this->make_gather(this->input_shapes); - auto axis_tensor = std::make_shared(element::i32, Shape{1}, &this->axis_val); + auto axis_tensor = ov::Tensor(element::i32, Shape{1}, &this->axis_val); - shape_inference(op.get(), this->input_shapes, this->output_shapes, {{2, axis_tensor}}); + this->output_shapes = shape_inference(op.get(), this->input_shapes, {{2, axis_tensor}}); ASSERT_EQ(this->output_shapes.front(), this->exp_shape) << "Failed for axis: " << this->axis_val diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_tree_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_tree_shape_inference_test.cpp index e6ef234693b012..d460ffa1217b92 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_tree_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gather_tree_shape_inference_test.cpp @@ -23,7 +23,7 @@ TEST_F(GatherTreeStaticShapeInferenceTest, gather_tree) { input_shapes = {StaticShape{1, 2, 3}, StaticShape{1, 2, 3}, StaticShape{2}, StaticShape{}}; output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{1, 2, 3})); } @@ -31,6 +31,6 @@ TEST_F(GatherTreeStaticShapeInferenceTest, gather_tree_default_ctor) { op = make_op(); input_shapes = {StaticShape{2, 4, 3}, StaticShape{2, 4, 3}, StaticShape{4}, StaticShape{}}; output_shapes = {StaticShape{}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{2, 4, 3})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/grid_sample_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/grid_sample_shape_inference_test.cpp index b673b069651a11..66a28bd1cb3e72 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/grid_sample_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/grid_sample_shape_inference_test.cpp @@ -23,7 +23,7 @@ TEST_F(GridSampleStaticShapeInferenceTest, GridSample) { output_shapes = {StaticShape{}}; exp_shape = StaticShape{2, 3, 6, 7}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], exp_shape); } @@ -34,6 +34,6 @@ TEST_F(GridSampleStaticShapeInferenceTest, GridSample_default_constructor) { output_shapes = {StaticShape{}}; exp_shape = StaticShape{2, 3, 6, 7}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], exp_shape); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp index 06a32822842f4d..a11ae5e3881ea7 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_backprop_shape_inference_test.cpp @@ -53,8 +53,7 @@ TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, default_ctor) { op->set_auto_pad(op::PadType::EXPLICIT); int32_t spatial_shape[] = {5, 10, 15}; - const auto const_data = - std::map{{2, std::make_shared(element::i32, Shape{3}, spatial_shape)}}; + const auto const_data = std::unordered_map{{2, {element::i32, Shape{3}, spatial_shape}}}; input_shapes = ShapeVector{{1, 6, 10, 12, 2}, {3, 2, 2, 5, 5, 5}, {3}}; auto shape_infer = make_shape_inference(op); @@ -77,8 +76,7 @@ TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, default_ctor_more_i op->set_auto_pad(op::PadType::EXPLICIT); int32_t spatial_shape[] = {5, 10, 15}; - const auto const_data = - std::map{{2, std::make_shared(element::i32, Shape{3}, spatial_shape)}}; + const auto const_data = std::unordered_map{{2, {element::i32, Shape{3}, spatial_shape}}}; // More than three inputs can be provided, but not used input_shapes = ShapeVector{{1, 6, 10, 12, 2}, {3, 2, 2, 5, 5, 5}, {3}, {0}}; @@ -105,7 +103,7 @@ TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, 2d_inputs_dynamic_r op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{1, 2, 5, 5}, {2, 1, 2, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1, 4, 7, 7})); @@ -125,7 +123,7 @@ TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, 3d_auto_pad_same_lo op = make_op(data, filters, out_spatial, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {1, 6, 6, 3, 3, 3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 6, 2, 1, 3})); @@ -144,11 +142,10 @@ TEST_F(GroupConvolutionBackpropDataStaticShapeInferenceTest, 3d_auto_pad_same_up op = make_op(data, filters, out_spatial, strides, pads_begin, pads_end, dilations, auto_pad); int32_t spatial_dims[] = {2, 6, 1}; - const auto const_data = - std::map{{2, std::make_shared(element::i32, Shape{3}, spatial_dims)}}; + const auto const_data = std::unordered_map{{2, {element::i32, Shape{3}, spatial_dims}}}; input_shapes = ShapeVector{{3, 5, 5, 5, 5}, {1, 5, 1, 3, 3, 3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 1, 2, 6, 1})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp index c0dda6e598ae50..9b104e9aa7ad0c 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/group_convolution_shape_inference_test.cpp @@ -90,7 +90,7 @@ TEST_F(GroupConvolutionV1StaticShapeInferenceTest, 1d_explicit_pads_inputs_stati op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{1, 12, 20}, {12, 1, 1, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1, 12, 18})); @@ -109,7 +109,7 @@ TEST_F(GroupConvolutionV1StaticShapeInferenceTest, 2d_auto_pads_same_lower_input op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{1, 4, 5, 5}, {2, 1, 2, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1, 2, 5, 5})); @@ -128,7 +128,7 @@ TEST_F(GroupConvolutionV1StaticShapeInferenceTest, 3d_auto_pad_same_lower_inputs op = make_op(data, filters, strides, pads_begin, pads_end, dilations, auto_pad); input_shapes = ShapeVector{{3, 6, 5, 5, 5}, {1, 6, 6, 3, 3, 3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({3, 6, 5, 5, 5})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_shape_inference_test.cpp index 229178fb0f1048..1910cdeb948434 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_cell_shape_inference_test.cpp @@ -30,7 +30,7 @@ TEST_F(GRUCellV3StaticShapeInferenceTest, default_ctor) { StaticShape{gates_count * hidden_size, hidden_size}, // R StaticShape{gates_count * hidden_size}}; // B - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } @@ -54,7 +54,7 @@ TEST_F(GRUCellV3StaticShapeInferenceTest, default_bias) { StaticShape{gates_count * hidden_size, hidden_size}, // R StaticShape{gates_count * hidden_size}}; // B - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } @@ -79,7 +79,7 @@ TEST_F(GRUCellV3StaticShapeInferenceTest, with_bias) { output_shapes = {StaticShape{}, StaticShape{}}; - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } @@ -115,7 +115,7 @@ TEST_F(GRUCellV3StaticShapeInferenceTest, linear_before) { output_shapes = {StaticShape{}, StaticShape{}}; - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } @@ -139,6 +139,6 @@ TEST_F(GRUCellV3StaticShapeInferenceTest, dynamic_rank_inputs) { StaticShape{gates_count * hidden_size, hidden_size}, // R StaticShape{gates_count * hidden_size}}; // B - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_shape_inference_test.cpp index c42f1c8c65934c..3be0fe38665a54 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/gru_sequence_shape_inference_test.cpp @@ -33,7 +33,7 @@ TEST_F(GRUSequenceV5StaticShapeInferenceTest, default_ctor) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(gru_sequence.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru_sequence.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } @@ -65,7 +65,7 @@ TEST_F(GRUSequenceV5StaticShapeInferenceTest, FORWARD) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(gru_sequence.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru_sequence.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } @@ -108,7 +108,7 @@ TEST_F(GRUSequenceV5StaticShapeInferenceTest, FORWARD_linear_before) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, (gates_count + 1) * hidden_size}}; // B - shape_inference(gru_sequence.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru_sequence.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } @@ -140,7 +140,7 @@ TEST_F(GRUSequenceV5StaticShapeInferenceTest, REVERSE) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(gru_sequence.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru_sequence.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } @@ -172,7 +172,7 @@ TEST_F(GRUSequenceV5StaticShapeInferenceTest, BIDIRECTIONAL) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(gru_sequence.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru_sequence.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/interpolate_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/interpolate_shape_inference_test.cpp index c25b9245638f7e..68fe1766dafb7e 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/interpolate_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/interpolate_shape_inference_test.cpp @@ -30,11 +30,10 @@ TEST_F(InterpolateV0StaticShapeInferenceTest, default_ctor_no_attributes) { op->set_attrs(attrs); int32_t out_shape_v[] = {10, 20, 30}; - const auto const_data = - std::map{{1, std::make_shared(element::i32, Shape{3}, out_shape_v)}}; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{3}, out_shape_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10, 2, 20, 128, 128, 30})); @@ -48,7 +47,7 @@ TEST_F(InterpolateV0StaticShapeInferenceTest, out_shape_as_constant) { op = make_op(img, out_shape, attrs); input_shapes = ShapeVector{{5, 2, 128, 128, 128}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({5, 100, 128, 100, 128})); @@ -62,11 +61,10 @@ TEST_F(InterpolateV0StaticShapeInferenceTest, all_inputs_dynamic_rank_use_scales op = make_op(img, out_shape, attrs); int32_t out_shape_v[] = {10, 20, 30}; - const auto const_data = - std::map{{1, std::make_shared(element::i32, Shape{3}, out_shape_v)}}; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{3}, out_shape_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({5, 2, 10, 128, 20, 30})); @@ -80,11 +78,10 @@ TEST_F(InterpolateV0StaticShapeInferenceTest, all_inputs_static_rank_use_sizes) op = make_op(img, out_shape, attrs); int32_t out_shape_v[] = {10, 20, 30}; - const auto const_data = - std::map{{1, std::make_shared(element::i32, Shape{3}, out_shape_v)}}; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{3}, out_shape_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10, 20, 30, 128, 128, 64})); @@ -111,12 +108,11 @@ TEST_F(InterpolateV4StaticShapeInferenceTest, default_ctor_no_attributes) { float scales_v[] = {1.5f, 3.0f, 0.2f}; int32_t axes_v[] = {2, 0, 5}; - const auto const_data = - std::map{{2, std::make_shared(element::f32, Shape{3}, scales_v)}, - {3, std::make_shared(element::i32, Shape{3}, axes_v)}}; + const auto const_data = std::unordered_map{{2, {element::f32, Shape{3}, scales_v}}, + {3, {element::i32, Shape{3}, axes_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}, {3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({15, 2, 192, 128, 128, 12})); @@ -132,7 +128,7 @@ TEST_F(InterpolateV4StaticShapeInferenceTest, scales_as_constant) { op = make_op(img, sizes, scales, axes, attrs); input_shapes = ShapeVector{{5, 2, 128, 128, 128}, {1}, {2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({5, 4, 128, 89, 128})); @@ -146,7 +142,7 @@ TEST_F(InterpolateV4StaticShapeInferenceTest, sizes_as_constant) { op = make_op(img, sizes, scales, axes, attrs); input_shapes = ShapeVector{{5, 2, 128, 128, 128}, {2}, {1}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({5, 5, 128, 10, 128})); @@ -164,12 +160,11 @@ TEST_F(InterpolateV4StaticShapeInferenceTest, all_inputs_dynamic_rank_use_scales float scales_v[] = {1.5f, 3.0f, 0.2f}; int32_t axes_v[] = {2, 0, 5}; - const auto const_data = - std::map{{2, std::make_shared(element::f32, Shape{3}, scales_v)}, - {3, std::make_shared(element::i32, Shape{3}, axes_v)}}; + const auto const_data = std::unordered_map{{2, {element::f32, Shape{3}, scales_v}}, + {3, {element::i32, Shape{3}, axes_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}, {3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({18, 3, 193, 129, 129, 12})); @@ -186,12 +181,11 @@ TEST_F(InterpolateV4StaticShapeInferenceTest, all_inputs_static_rank_use_sizes) int32_t sizes_v[] = {10, 50, 60}; int32_t axes_v[] = {1, 0, 3}; - const auto const_data = - std::map{{1, std::make_shared(element::i32, Shape{3}, sizes_v)}, - {3, std::make_shared(element::i32, Shape{3}, axes_v)}}; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{3}, sizes_v}}, + {3, {element::i32, Shape{3}, axes_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}, {3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({50, 10, 128, 60, 128, 64})); @@ -218,12 +212,11 @@ TEST_F(InterpolateV11StaticShapeInferenceTest, default_ctor_no_attributes) { float scales_v[] = {1.5f, 3.0f, 0.2f}; int32_t axes_v[] = {2, 0, 5}; - const auto const_data = - std::map{{1, std::make_shared(element::f32, Shape{3}, scales_v)}, - {2, std::make_shared(element::i32, Shape{3}, axes_v)}}; + const auto const_data = std::unordered_map{{1, {element::f32, Shape{3}, scales_v}}, + {2, {element::i32, Shape{3}, axes_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({15, 2, 192, 128, 128, 12})); @@ -238,7 +231,7 @@ TEST_F(InterpolateV11StaticShapeInferenceTest, scales_as_constant) { op = make_op(img, scales, axes, attrs); input_shapes = ShapeVector{{5, 2, 128, 128, 128}, {2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({5, 4, 128, 89, 128})); @@ -251,7 +244,7 @@ TEST_F(InterpolateV11StaticShapeInferenceTest, sizes_as_constant) { op = make_op(img, sizes, axes, attrs); input_shapes = ShapeVector{{5, 2, 128, 128, 128}, {2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({5, 5, 128, 10, 128})); @@ -267,12 +260,11 @@ TEST_F(InterpolateV11StaticShapeInferenceTest, all_inputs_dynamic_rank_use_scale float scales_v[] = {1.5f, 3.0f, 0.2f}; int32_t axes_v[] = {2, 0, 5}; - const auto const_data = - std::map{{1, std::make_shared(element::f32, Shape{3}, scales_v)}, - {2, std::make_shared(element::i32, Shape{3}, axes_v)}}; + const auto const_data = std::unordered_map{{1, {element::f32, Shape{3}, scales_v}}, + {2, {element::i32, Shape{3}, axes_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({15, 2, 192, 128, 128, 12})); @@ -288,12 +280,11 @@ TEST_F(InterpolateV11StaticShapeInferenceTest, all_inputs_static_rank_use_sizes) int32_t sizes_v[] = {10, 50, 60}; int32_t axes_v[] = {1, 0, 3}; - const auto const_data = - std::map{{1, std::make_shared(element::i32, Shape{3}, sizes_v)}, - {2, std::make_shared(element::i32, Shape{3}, axes_v)}}; + const auto const_data = std::unordered_map{{1, {element::i32, Shape{3}, sizes_v}}, + {2, {element::i32, Shape{3}, axes_v}}}; input_shapes = ShapeVector{{5, 2, 128, 128, 128, 64}, {3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({50, 10, 128, 60, 128, 64})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/logical_not_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/logical_not_shape_inference_test.cpp index f51a6a8cc7f400..2e7945731b7a54 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/logical_not_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/logical_not_shape_inference_test.cpp @@ -2,8 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include "common_test_utils/test_assertions.hpp" -#include "gmock/gmock.h" #include "openvino/op/logical_not.hpp" #include "openvino/op/parameter.hpp" #include "utils.hpp" @@ -25,7 +26,7 @@ TEST_F(LogicalNotStaticShapeInferenceTest, static_rank) { this->input_shapes = {StaticShape{3, 4, 7, 5}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({3, 4, 7, 5})); } @@ -36,7 +37,7 @@ TEST_F(LogicalNotStaticShapeInferenceTest, dynamic_rank) { this->input_shapes = {StaticShape{3, 1, 5, 2}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); ASSERT_EQ(this->output_shapes.front(), StaticShape({3, 1, 5, 2})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference_test.cpp index 23d7d31515ad29..9042a26859f143 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_cell_shape_inference_test.cpp @@ -30,7 +30,7 @@ TEST_F(LSTMCellV4StaticShapeInferenceTest, default_ctor) { StaticShape{gates_count * hidden_size, input_size}, StaticShape{gates_count * hidden_size, hidden_size}, StaticShape{gates_count * hidden_size}}, - shape_inference(lstm_cell.get(), input_shapes, output_shapes); + output_shapes = shape_inference(lstm_cell.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, hidden_size})); } @@ -55,7 +55,7 @@ TEST_F(LSTMCellV4StaticShapeInferenceTest, basic_shape_infer) { StaticShape{gates_count * hidden_size, input_size}, StaticShape{gates_count * hidden_size, hidden_size}, StaticShape{gates_count * hidden_size}}, - shape_inference(lstm_cell.get(), input_shapes, output_shapes); + output_shapes = shape_inference(lstm_cell.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, hidden_size})); } @@ -81,9 +81,8 @@ TEST(StaticShapeInferenceTest, LSTMCellV0Test) { StaticShape{gates_count * hidden_size, input_size}, StaticShape{gates_count * hidden_size, hidden_size}, StaticShape{gates_count * hidden_size}, - StaticShape{3 * hidden_size}}, - static_output_shapes = {StaticShape{}, StaticShape{}}; - shape_inference(lstm_cell.get(), static_input_shapes, static_output_shapes); + StaticShape{3 * hidden_size}}; + const auto static_output_shapes = shape_inference(lstm_cell.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size})); ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, hidden_size})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp index bf6ef9e3676125..8b4692d843f1b4 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/lstm_seq_shape_inference_test.cpp @@ -35,7 +35,7 @@ TEST_F(LSTMSequenceV0StaticShapeInferenceTest, default_ctor) { StaticShape{num_directions, gates_count * hidden_size}, // B StaticShape{num_directions, (gates_count - 1) * hidden_size}}; // P - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); @@ -71,7 +71,7 @@ TEST_F(LSTMSequenceV0StaticShapeInferenceTest, FORWARD_without_P) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); @@ -109,7 +109,7 @@ TEST_F(LSTMSequenceV0StaticShapeInferenceTest, FORWARD_with_P) { StaticShape{num_directions, gates_count * hidden_size}, // B StaticShape{num_directions, (gates_count - 1) * hidden_size}}; // P - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); @@ -145,7 +145,7 @@ TEST_F(LSTMSequenceV0StaticShapeInferenceTest, REVERSE) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); @@ -180,7 +180,7 @@ TEST_F(LSTMSequenceV0StaticShapeInferenceTest, BIDIRECTIONAL) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); @@ -212,7 +212,7 @@ TEST_F(LSTMSequenceV5StaticShapeInferenceTest, default_ctor) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); @@ -248,7 +248,7 @@ TEST_F(LSTMSequenceV5StaticShapeInferenceTest, FORWARD) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); @@ -284,7 +284,7 @@ TEST_F(LSTMSequenceV5StaticShapeInferenceTest, REVERSE) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); @@ -319,7 +319,7 @@ TEST_F(LSTMSequenceV5StaticShapeInferenceTest, BIDIRECTIONAL) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 3); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/make_shape_inference.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/make_shape_inference.cpp index c6f9e315b63c69..65840a85967283 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/make_shape_inference.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/make_shape_inference.cpp @@ -3,36 +3,38 @@ // #include -#include -#include -#include -#include -#include -#include "ngraph_functions/builders.hpp" -#include #include -#include +#include + +#include "openvino/core/coordinate_diff.hpp" +#include "openvino/op/ops.hpp" +#include "openvino/op/parameter.hpp" +#include "ov_ops/type_relaxed.hpp" +#include "shape_inference/shape_inference.hpp" using namespace ov; using namespace ov::intel_cpu; +using ov::op::v0::MatMul; +using ov::op::v0::Parameter; +using ov::op::v0::Result; TEST(StaticShapeInferenceTest, MakeShapeInference) { - auto inp1_f32 = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); - auto inp2_f32 = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); + auto inp1_f32 = std::make_shared(element::f32, PartialShape::dynamic(4)); + auto inp2_f32 = std::make_shared(element::f32, PartialShape::dynamic(4)); - auto inp1 = std::make_shared(element::i8, PartialShape{-1, -1, -1, -1}); - auto inp2 = std::make_shared(element::i8, PartialShape{-1, -1, -1, -1}); + auto inp1 = std::make_shared(element::i8, PartialShape::dynamic(4)); + auto inp2 = std::make_shared(element::i8, PartialShape::dynamic(4)); - auto matMulRelaxed = std::make_shared>( - *as_type_ptr(ngraph::builder::makeMatMul(inp1_f32, inp2_f32, false, false)), - element::f32); + auto matMulRelaxed = std::make_shared>( + *as_type_ptr(std::make_shared(inp1_f32, inp2_f32, false, false)), + element::f32); auto matMul = matMulRelaxed->clone_with_new_inputs({inp1, inp2}); - ngraph::ResultVector results; - results.push_back(std::make_shared(matMul->output(0))); + ov::ResultVector results; + results.push_back(std::make_shared(matMul->output(0))); - auto function = std::make_shared(results, ngraph::ParameterVector{inp1, inp2}, "testFunction"); + auto model = std::make_shared(results, ov::ParameterVector{inp1, inp2}, "testFunction"); std::atomic_flag wrongPrcFlag; wrongPrcFlag.clear(); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/matmul_shape_inference.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/matmul_shape_inference.cpp index 99ca54e4d11b33..c3b73d3e9738dc 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/matmul_shape_inference.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/matmul_shape_inference.cpp @@ -81,7 +81,7 @@ TEST_P(MatMulTest, no_input_transpose) { std::vector static_input_shapes = {a_shape, b_shape}, static_output_shapes = {StaticShape{}}; - shape_inference(matmul.get(), static_input_shapes, static_output_shapes); + static_output_shapes = shape_inference(matmul.get(), static_input_shapes); ASSERT_EQ(static_output_shapes.front(), exp_shape); } @@ -91,7 +91,7 @@ TEST_P(MatMulTest, transpose_input_a) { const auto a_transpose = make_transpose_input(a_shape); std::vector static_input_shapes = {a_transpose, b_shape}, static_output_shapes = {StaticShape{}}; - shape_inference(matmul.get(), static_input_shapes, static_output_shapes); + static_output_shapes = shape_inference(matmul.get(), static_input_shapes); ASSERT_EQ(static_output_shapes.front(), exp_shape); } @@ -101,7 +101,7 @@ TEST_P(MatMulTest, transpose_input_b) { const auto b_transpose = make_transpose_input(b_shape); std::vector static_input_shapes = {a_shape, b_transpose}, static_output_shapes = {StaticShape{}}; - shape_inference(matmul.get(), static_input_shapes, static_output_shapes); + static_output_shapes = shape_inference(matmul.get(), static_input_shapes); ASSERT_EQ(static_output_shapes.front(), exp_shape); } @@ -113,6 +113,6 @@ TEST_P(MatMulTest, transpose_inputs_a_b) { std::vector static_input_shapes = {a_transpose, b_transpose}, static_output_shapes = {StaticShape{}}; - shape_inference(matmul.get(), static_input_shapes, static_output_shapes); + static_output_shapes = shape_inference(matmul.get(), static_input_shapes); ASSERT_EQ(static_output_shapes.front(), exp_shape); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/one_hot_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/one_hot_shape_inference_test.cpp index 1c8ab310b5bc9f..487fd0890dc191 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/one_hot_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/one_hot_shape_inference_test.cpp @@ -20,9 +20,8 @@ TEST(StaticShapeInferenceTest, OneHotTestConstantInput) { int64_t axis = -1; auto ont_hot = std::make_shared(indices, depth, on_value, off_value, axis); // Test StaticShape - std::vector static_input_shapes = {StaticShape{3}, StaticShape{}, StaticShape{}, StaticShape{}}, - static_output_shapes = {StaticShape{}}; - shape_inference(ont_hot.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{3}, StaticShape{}, StaticShape{}, StaticShape{}}; + const auto static_output_shapes = shape_inference(ont_hot.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], (StaticShape{3, 2})); } @@ -38,17 +37,12 @@ TEST(StaticShapeInferenceTest, OneHotTestConstantMap) { int32_t on_value[] = {1}; int32_t off_value[] = {0}; - std::map> constant_data; - constant_data[1] = - std::make_shared(element::Type_t::i64, Shape{}, depth_value); - constant_data[2] = - std::make_shared(element::Type_t::i32, Shape{}, on_value); - constant_data[3] = - std::make_shared(element::Type_t::i32, Shape{}, off_value); - - std::vector static_input_shapes = {StaticShape{3}, StaticShape{}, StaticShape{}, StaticShape{}}, - static_output_shapes = {StaticShape{}}; - shape_inference(ont_hot.get(), static_input_shapes, static_output_shapes, constant_data); + const auto constant_data = std::unordered_map{{1, {element::i64, ov::Shape{}, depth_value}}, + {2, {element::i32, ov::Shape{}, on_value}}, + {1, {element::i32, ov::Shape{}, off_value}}}; + + std::vector static_input_shapes = {StaticShape{3}, StaticShape{}, StaticShape{}, StaticShape{}}; + const auto static_output_shapes = shape_inference(ont_hot.get(), static_input_shapes, constant_data); EXPECT_EQ(static_output_shapes[0], (StaticShape{3, 2})); } @@ -60,18 +54,13 @@ TEST(StaticShapeInferenceTest, OneHotTestConstantMapDefaultCtor) { int32_t on_value[] = {1}; int32_t off_value[] = {0}; - std::map> constant_data; - constant_data[1] = - std::make_shared(element::Type_t::i64, Shape{}, depth_value); - constant_data[2] = - std::make_shared(element::Type_t::i32, Shape{}, on_value); - constant_data[3] = - std::make_shared(element::Type_t::i32, Shape{}, off_value); + const auto constant_data = std::unordered_map{{1, {element::i64, ov::Shape{}, depth_value}}, + {2, {element::i32, ov::Shape{}, on_value}}, + {1, {element::i32, ov::Shape{}, off_value}}}; - std::vector static_input_shapes = {StaticShape{3}, StaticShape{}, StaticShape{}, StaticShape{}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{3}, StaticShape{}, StaticShape{}, StaticShape{}}; - shape_inference(ont_hot.get(), static_input_shapes, static_output_shapes, constant_data); + const auto static_output_shapes = shape_inference(ont_hot.get(), static_input_shapes, constant_data); EXPECT_EQ(static_output_shapes[0], (StaticShape{3, 2})); } @@ -88,18 +77,13 @@ TEST(StaticShapeInferenceTest, OneHotTestConstantMapNegativeDepth) { int32_t on_value[] = {1}; int32_t off_value[] = {0}; - std::map> constant_data; - constant_data[1] = - std::make_shared(element::Type_t::i64, Shape{}, depth_value); - constant_data[2] = - std::make_shared(element::Type_t::i32, Shape{}, on_value); - constant_data[3] = - std::make_shared(element::Type_t::i32, Shape{}, off_value); + const auto constant_data = std::unordered_map{{1, {element::i64, ov::Shape{}, depth_value}}, + {2, {element::i32, ov::Shape{}, on_value}}, + {1, {element::i32, ov::Shape{}, off_value}}}; - std::vector static_input_shapes = {StaticShape{3}, StaticShape{}, StaticShape{}, StaticShape{}}, - static_output_shapes = {StaticShape{}}; + std::vector static_input_shapes = {StaticShape{3}, StaticShape{}, StaticShape{}, StaticShape{}}; - OV_EXPECT_THROW(shape_inference(ont_hot.get(), static_input_shapes, static_output_shapes, constant_data), + OV_EXPECT_THROW(shape_inference(ont_hot.get(), static_input_shapes, constant_data), ov::NodeValidationFailure, HasSubstr("can't be negative")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/pad_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/pad_shape_inference_test.cpp index 223ab734db84ff..8887cc5f49c43f 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/pad_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/pad_shape_inference_test.cpp @@ -30,12 +30,11 @@ TYPED_TEST_P(PadStaticShapeInference, default_ctor) { int64_t pads_begin[] = {3, 2, 1, 1}; int32_t pads_end[] = {0, 1, 2, 3}; - const auto const_data = - std::map{{1, std::make_shared(element::i64, Shape{4}, pads_begin)}, - {2, std::make_shared(element::i32, Shape{4}, pads_end)}}; + const auto const_data = std::unordered_map{{1, {element::i64, Shape{4}, pads_begin}}, + {2, {element::i32, Shape{4}, pads_end}}}; this->input_shapes = ShapeVector{{3, 6, 5, 5}, {4}, {4}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes, const_data); + this->output_shapes = shape_inference(op.get(), this->input_shapes, const_data); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes.front(), StaticShape({6, 9, 8, 9})); @@ -51,7 +50,7 @@ TYPED_TEST_P(PadStaticShapeInference, pads_begin_end_value_as_constants) { const auto op = this->make_op(data, pads_begin, pads_end, pad_val, op::PadMode::CONSTANT); this->input_shapes = ShapeVector{{3, 6, 5, 5}, {4}, {4}, {}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes.front(), StaticShape({6, 9, 8, 8})); @@ -65,14 +64,13 @@ TYPED_TEST_P(PadStaticShapeInference, pads_begin_end_in_constant_map) { uint64_t pads_begin_data[] = {0, 2, 2, 0}; uint32_t pads_end_data[] = {0, 1, 2, 0}; - const auto const_data = - std::map{{1, std::make_shared(element::u64, Shape{4}, pads_begin_data)}, - {2, std::make_shared(element::u32, Shape{4}, pads_end_data)}}; + const auto const_data = std::unordered_map{{1, {element::u64, Shape{4}, pads_begin_data}}, + {2, {element::u32, Shape{4}, pads_end_data}}}; const auto op = this->make_op(data, pads_begin, pads_end, op::PadMode::REFLECT); this->input_shapes = ShapeVector{{3, 6, 5, 1}, {4}, {4}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes, const_data); + this->output_shapes = shape_inference(op.get(), this->input_shapes, const_data); EXPECT_EQ(this->output_shapes.front(), StaticShape({3, 9, 9, 1})); } @@ -84,13 +82,12 @@ TYPED_TEST_P(PadStaticShapeInference, pads_begin_got_negative_value) { int8_t pads_begin_data[] = {0, -2, -2, 0}; - const auto const_data = - std::map{{1, std::make_shared(element::i8, Shape{4}, pads_begin_data)}}; + const auto const_data = std::unordered_map{{1, {element::i8, Shape{4}, pads_begin_data}}}; const auto op = this->make_op(data, pads_begin, pads_end, op::PadMode::REFLECT); this->input_shapes = ShapeVector{{3, SIZE_MAX, 5, 2}, {4}, {4}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes, const_data); + this->output_shapes = shape_inference(op.get(), this->input_shapes, const_data); EXPECT_EQ(this->output_shapes.front(), StaticShape({3, SIZE_MAX, 3, 2})); } @@ -103,12 +100,11 @@ TYPED_TEST_P(PadStaticShapeInference, pads_end_got_negative_value) { int8_t pads_end_data[] = {0, -3, -2, 0}; - const auto const_data = - std::map{{2, std::make_shared(element::i8, Shape{4}, pads_end_data)}}; + const auto const_data = std::unordered_map{{2, {element::i8, Shape{4}, pads_end_data}}}; this->input_shapes = ShapeVector{{3, 6, 5, SIZE_MAX}, {4}, {4}}; - shape_inference(op.get(), this->input_shapes, this->output_shapes, const_data); + this->output_shapes = shape_inference(op.get(), this->input_shapes, const_data); EXPECT_EQ(this->output_shapes.front(), StaticShape({4, 4, 5, SIZE_MAX})); } @@ -119,11 +115,11 @@ TYPED_TEST_P(PadStaticShapeInference, pads_begin_is_empty) { const auto pads_end = Constant::create(element::i64, Shape{4}, {0, 0, 0, 0}); const auto op = this->make_op(data, pads_begin, pads_end, op::PadMode::REFLECT); - const auto const_data = std::map{{1, std::make_shared(element::u64, Shape{0})}}; + const auto const_data = std::unordered_map{{1, {element::u64, Shape{0}}}}; this->input_shapes = ShapeVector{{3, 6, 5, 2}, {0}, {4}}; - OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes, this->output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes, const_data), NodeValidationFailure, HasSubstr("length of pads_begin mismatches with rank of input")); } @@ -134,11 +130,11 @@ TYPED_TEST_P(PadStaticShapeInference, pads_end_is_empty) { const auto pads_end = std::make_shared(element::i8, PartialShape::dynamic()); const auto op = this->make_op(data, pads_begin, pads_end, op::PadMode::REFLECT); - const auto const_data = std::map{{2, std::make_shared(element::i8, Shape{0})}}; + const auto const_data = std::unordered_map{{2, {element::i8, Shape{0}}}}; this->input_shapes = ShapeVector{{3, 6, 5, 2}, {4}, {0}}; - OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes, this->output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), this->input_shapes, const_data), NodeValidationFailure, HasSubstr("length of pads_end mismatches with rank of input")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/prior_box_clustered_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/prior_box_clustered_shape_inference_test.cpp index a099d4ca5f8800..a7da58358ee259 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/prior_box_clustered_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/prior_box_clustered_shape_inference_test.cpp @@ -31,10 +31,7 @@ TEST_F(PriorBoxClusteredV0StaticShapeInferenceTest, default_ctor_no_args) { int32_t out_size[] = {2, 5}; input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i32, ov::Shape{2}, out_size)}}); + output_shapes = shape_inference(op.get(), input_shapes, {{0, {element::i32, ov::Shape{2}, out_size}}}); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 80})); @@ -49,10 +46,7 @@ TEST_F(PriorBoxClusteredV0StaticShapeInferenceTest, all_inputs_dynamic_rank) { int32_t output_size[] = {2, 5}; input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i32, ov::Shape{2}, output_size)}}); + output_shapes = shape_inference(op.get(), input_shapes, {{0, {element::i32, ov::Shape{2}, output_size}}}); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2, 4 * 2 * 5 * 2})); @@ -67,10 +61,7 @@ TEST_F(PriorBoxClusteredV0StaticShapeInferenceTest, all_inputs_static_rank) { int32_t output_size[] = {5, 2}; input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i32, ov::Shape{2}, output_size)}}); + output_shapes = shape_inference(op.get(), input_shapes, {{0, {element::i32, ov::Shape{2}, output_size}}}); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2, 4 * 5 * 2 * 2})); @@ -83,7 +74,7 @@ TEST_F(PriorBoxClusteredV0StaticShapeInferenceTest, out_size_constant) { op = make_op(out_size, img_size, attrs); input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2, 4 * 4 * 6 * 2})); @@ -96,7 +87,7 @@ TEST_F(PriorBoxClusteredV0StaticShapeInferenceTest, all_inputs_constants) { op = make_op(out_size, img_size, attrs); input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2, 4 * 12 * 16 * 2})); @@ -111,10 +102,7 @@ TEST_F(PriorBoxClusteredV0StaticShapeInferenceTest, invalid_number_of_elements_i int64_t output_size[] = {5, 2, 1}; input_shapes = ShapeVector{{2}, {2}}; - OV_EXPECT_THROW(shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i64, ov::Shape{3}, output_size)}}), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, {{0, {element::i64, ov::Shape{3}, output_size}}}), NodeValidationFailure, HasSubstr("Output size must have two elements")); } @@ -128,10 +116,7 @@ TEST_F(PriorBoxClusteredV0StaticShapeInferenceTest, invalid_input_ranks) { int64_t output_size[] = {5, 2, 1}; input_shapes = ShapeVector{{2, 1}, {2}}; - OV_EXPECT_THROW(shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i64, ov::Shape{3}, output_size)}}), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, {{0, {element::i64, ov::Shape{3}, output_size}}}), NodeValidationFailure, HasSubstr("output size input rank 2 must match image shape input rank 1")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/prior_box_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/prior_box_shape_inference_test.cpp index 5fec63e33a57c0..263582507de76c 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/prior_box_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/prior_box_shape_inference_test.cpp @@ -32,10 +32,7 @@ TEST_F(PriorBoxV8StaticShapeInferenceTest, default_ctor_no_args) { int32_t out_size[] = {2, 5}; input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i32, ov::Shape{2}, out_size)}}); + output_shapes = shape_inference(op.get(), input_shapes, {{0, {element::i32, ov::Shape{2}, out_size}}}); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 200})); @@ -50,10 +47,7 @@ TEST_F(PriorBoxV8StaticShapeInferenceTest, all_inputs_dynamic_rank) { int32_t output_size[] = {2, 5}; input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i32, ov::Shape{2}, output_size)}}); + output_shapes = shape_inference(op.get(), input_shapes, {{0, {element::i32, ov::Shape{2}, output_size}}}); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2, 200})); @@ -68,10 +62,7 @@ TEST_F(PriorBoxV8StaticShapeInferenceTest, all_inputs_static_rank) { int32_t output_size[] = {5, 2}; input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i32, ov::Shape{2}, output_size)}}); + output_shapes = shape_inference(op.get(), input_shapes, {{0, {element::i32, ov::Shape{2}, output_size}}}); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2, 200})); @@ -84,7 +75,7 @@ TEST_F(PriorBoxV8StaticShapeInferenceTest, out_size_constant) { op = make_op(out_size, img_size, attrs); input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2, 480})); @@ -97,7 +88,7 @@ TEST_F(PriorBoxV8StaticShapeInferenceTest, all_inputs_constants) { op = make_op(out_size, img_size, attrs); input_shapes = ShapeVector{{2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2, 3840})); @@ -112,10 +103,7 @@ TEST_F(PriorBoxV8StaticShapeInferenceTest, invalid_number_of_elements_in_out_siz int64_t output_size[] = {5, 2, 1}; input_shapes = ShapeVector{{2}, {2}}; - OV_EXPECT_THROW(shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i64, ov::Shape{3}, output_size)}}), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, {{0, {element::i64, ov::Shape{3}, output_size}}}), NodeValidationFailure, HasSubstr("Output size must have two elements")); } @@ -129,10 +117,7 @@ TEST_F(PriorBoxV8StaticShapeInferenceTest, invalid_input_ranks) { int64_t output_size[] = {5, 2, 1}; input_shapes = ShapeVector{{2, 1}, {2}}; - OV_EXPECT_THROW(shape_inference(op.get(), - input_shapes, - output_shapes, - {{0, std::make_shared(element::i64, ov::Shape{3}, output_size)}}), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, {{0, {element::i64, ov::Shape{3}, output_size}}}), NodeValidationFailure, HasSubstr("output size input rank 2 must match image shape input rank 1")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/proposal_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/proposal_shape_inference_test.cpp index 4c9237fd2703fb..7ef8f9802b5efc 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/proposal_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/proposal_shape_inference_test.cpp @@ -40,7 +40,7 @@ TYPED_TEST_P(ProposalTest, default_ctor) { this->op->set_attrs(this->make_attrs(10)); this->input_shapes = ShapeVector{{2, 3, 10, 10}, {2, 6, 10, 10}, {3}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(this->op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), this->exp_out_size()); EXPECT_EQ(this->output_shapes.front(), StaticShape({20, 5})); @@ -54,7 +54,7 @@ TYPED_TEST_P(ProposalTest, all_inputs_dynamic_rank) { this->op = this->make_op(class_probs, class_bbox_deltas, image_shape, this->make_attrs(4)); this->input_shapes = ShapeVector{{2, 3, 10, 10}, {2, 6, 10, 10}, {3}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(this->op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), this->exp_out_size()); EXPECT_EQ(this->output_shapes[0], StaticShape({8, 5})); @@ -68,7 +68,7 @@ TYPED_TEST_P(ProposalTest, all_inputs_static_rank) { this->op = this->make_op(class_probs, class_bbox_deltas, image_shape, this->make_attrs(5)); this->input_shapes = ShapeVector{{3, 4, 10, 10}, {3, 8, 10, 10}, {4}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(this->op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), this->exp_out_size()); EXPECT_EQ(this->output_shapes[0], StaticShape({15, 5})); @@ -82,7 +82,7 @@ TYPED_TEST_P(ProposalTest, batch_size_not_compatible) { this->op = this->make_op(class_probs, class_bbox_deltas, image_shape, this->make_attrs(5)); this->input_shapes = ShapeVector{{3, 4, 10, 10}, {4, 8, 10, 10}, {3}}; - OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Batch size inconsistent between class_probs")); } @@ -95,7 +95,7 @@ TYPED_TEST_P(ProposalTest, image_shape_input_not_compatible_shape) { this->op = this->make_op(class_probs, class_bbox_deltas, image_shape, this->make_attrs(5)); this->input_shapes = ShapeVector{{3, 4, 10, 10}, {3, 8, 10, 10}, {5}}; - OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Image_shape must be 1-D tensor and has got 3 or 4 elements")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/psroi_pooling_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/psroi_pooling_shape_inference_test.cpp index b5b0b6ca2940ed..d32c173c911d14 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/psroi_pooling_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/psroi_pooling_shape_inference_test.cpp @@ -33,7 +33,7 @@ TEST_F(PSROIPoolingV0StaticShapeInferenceTest, default_ctor_avg_mode) { input_shapes = ShapeVector{{1, 45, 10, 10}, {3, 5}}; auto shape_infer = make_shape_inference(op); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({3, 5, 3, 3})); @@ -50,7 +50,7 @@ TEST_F(PSROIPoolingV0StaticShapeInferenceTest, default_ctor_bilinear_mode) { input_shapes = ShapeVector{{1, 75, 10, 10}, {2, 5}}; auto shape_infer = make_shape_inference(op); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 5, 8, 8})); @@ -63,7 +63,7 @@ TEST_F(PSROIPoolingV0StaticShapeInferenceTest, inputs_dynamic_rank) { op = make_op(feat, rois, 4, group, scale, 0, 0, "average"); input_shapes = ShapeVector{{2, 36, 100, 100}, {10, 5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10, 4, 3, 3})); @@ -76,7 +76,7 @@ TEST_F(PSROIPoolingV0StaticShapeInferenceTest, inputs_static_rank) { op = make_op(feat, rois, 2, 1, scale, bins_x, bins_y, "bilinear"); input_shapes = ShapeVector{{2, 24, 20, 100}, {1, 5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({1, 2, 1, 1})); @@ -90,7 +90,7 @@ TEST_F(PSROIPoolingV0StaticShapeInferenceTest, invalid_rois_batch_size) { input_shapes = ShapeVector{{2, 24, 20, 100}, {1, 6}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The second dimension of ROIs input should contain batch id and box coordinates. This " "dimension is expected to be equal to 5")); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/range_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/range_shape_inference_test.cpp new file mode 100644 index 00000000000000..a7debe18e3e0ae --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/range_shape_inference_test.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "range_shape_inference.hpp" +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; +using std::make_shared; +using testing::ElementsAre; + +TEST(StaticShapeInferenceTest, Rangev4_i32) { + auto start = make_shared(element::i32, ov::PartialShape{}); + auto stop = make_shared(element::i32, ov::PartialShape{}); + auto step = make_shared(element::i32, ov::PartialShape{}); + auto range = make_shared(start, stop, step, element::i32); + + int32_t start_v = 2, stop_v = 0, step_v = -2; + auto const_data = std::unordered_map{{0, {element::i32, Shape{}, &start_v}}, + {1, {element::i32, Shape{}, &stop_v}}, + {2, {element::i32, Shape{}, &step_v}}}; + + auto output_shapes = shape_inference(range.get(), ShapeVector{{}, {}, {}}, const_data); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{1})); + + step_v = -1; + output_shapes = shape_inference(range.get(), ShapeVector{{}, {}, {}}, const_data); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{2})); + + start_v = -19, stop_v = 19, step_v = 1; + output_shapes = shape_inference(range.get(), ShapeVector{{}, {}, {}}, const_data); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{38})); + + step_v = 3; + output_shapes = shape_inference(range.get(), ShapeVector{{}, {}, {}}, const_data); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{13})); + + start_v = 20, stop_v = -19, step_v = 1; + output_shapes = shape_inference(range.get(), ShapeVector{{}, {}, {}}, const_data); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{0})); +} + +TEST(StaticShapeInferenceTest, Rangev4_f32) { + auto start = make_shared(element::f32, ov::PartialShape{}); + auto stop = make_shared(element::f32, ov::PartialShape{}); + auto step = make_shared(element::f32, ov::PartialShape{}); + auto range = make_shared(start, stop, step, element::f32); + + float start_v = 0.f, stop_v = 1.f, step_v = .25f; + auto const_data = std::unordered_map{{0, {element::f32, Shape{}, &start_v}}, + {1, {element::f32, Shape{}, &stop_v}}, + {2, {element::f32, Shape{}, &step_v}}}; + + auto output_shapes = shape_inference(range.get(), ShapeVector{{}, {}, {}}, const_data); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{4})); + + start_v = -1.f; + output_shapes = shape_inference(range.get(), ShapeVector{{}, {}, {}}, const_data); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{8})); + + stop_v = .875f; + output_shapes = shape_inference(range.get(), ShapeVector{{}, {}, {}}, const_data); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{8})); +} diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/range_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/range_test.cpp deleted file mode 100644 index 81898749ee054d..00000000000000 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/range_test.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "utils.hpp" - -using namespace ov; -using namespace ov::intel_cpu; -using namespace std; - -TEST(StaticShapeInferenceTest, Rangev4_i32) { - auto start = make_shared(element::i32, ov::PartialShape{}); - auto stop = make_shared(element::i32, ov::PartialShape{}); - auto step = make_shared(element::i32, ov::PartialShape{}); - - auto range = make_shared(start, stop, step, element::i32); - - check_static_shape(range.get(), {2, 0, -2}, {StaticShape{1}}); - check_static_shape(range.get(), {2, 0, -1}, {StaticShape{2}}); - check_static_shape(range.get(), {-19, 19, 1}, {StaticShape{38}}); - check_static_shape(range.get(), {-19, 19, 3}, {StaticShape{13}}); - check_static_shape(range.get(), {20, -19, 1}, {StaticShape{0}}); -} - -TEST(StaticShapeInferenceTest, Rangev4_f32) { - auto start = make_shared(element::f32, ov::PartialShape{}); - auto stop = make_shared(element::f32, ov::PartialShape{}); - auto step = make_shared(element::f32, ov::PartialShape{}); - - auto range = make_shared(start, stop, step, element::f32); - - check_static_shape(range.get(), {0., 1., 0.25}, {StaticShape{4}}); - check_static_shape(range.get(), {-1., 1., 0.25}, {StaticShape{8}}); - check_static_shape(range.get(), {-1., 0.875, 0.25}, {StaticShape{8}}); -} diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/read_value_shape_inference.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/read_value_shape_inference.cpp index 43426ca1f2b6fd..cc627d55533b0e 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/read_value_shape_inference.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/read_value_shape_inference.cpp @@ -30,8 +30,8 @@ void readValueTest() { auto readValue = constructGraph(); // Test StaticShape - std::vector static_input_shapes = {StaticShape{1, 2, 64, 64}}, static_output_shapes = {StaticShape{}}; - shape_inference(readValue.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{1, 2, 64, 64}}; + const auto static_output_shapes = shape_inference(readValue.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], (StaticShape{1, 2, 64, 64})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reduce_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reduce_shape_inference_test.cpp index 94f0c852b7367d..45f587c5c23aab 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reduce_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reduce_shape_inference_test.cpp @@ -28,9 +28,8 @@ TYPED_TEST_P(ReduceStaticShapeInferenceTest, default_ctor) { this->input_shapes = ShapeVector{{1, 6, 7, 8, 4}, {3}}; int32_t axes_val[] = {0, 1, 3}; - const std::map>& constant_data = { - {1, std::make_shared(element::i32, Shape{3}, axes_val)}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes, constant_data); + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{3}, axes_val}}}; + this->output_shapes = shape_inference(this->op.get(), this->input_shapes, constant_data); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes.front(), StaticShape({1, 1, 7, 1, 4})); @@ -43,7 +42,7 @@ TYPED_TEST_P(ReduceStaticShapeInferenceTest, axes_constant) { this->op = this->make_op(data, axes, false); this->input_shapes = {StaticShape{3, 6, 5, 8}, StaticShape{2}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(this->op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes.front(), StaticShape({3, 5})); @@ -57,9 +56,8 @@ TYPED_TEST_P(ReduceStaticShapeInferenceTest, axes_param) { this->input_shapes = {StaticShape{3, 6, 5, 8}, StaticShape{2}}; int32_t axes_val[] = {1, 3}; - const std::map>& constant_data = { - {1, std::make_shared(element::i32, Shape{2}, axes_val)}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes, constant_data); + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{2}, axes_val}}}; + this->output_shapes = shape_inference(this->op.get(), this->input_shapes, constant_data); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes.front(), StaticShape({3, 5})); @@ -72,7 +70,7 @@ TYPED_TEST_P(ReduceStaticShapeInferenceTest, axes_constant_keep_dims) { this->op = this->make_op(data, axes, true); this->input_shapes = {StaticShape{3, 6, 5, 8}, StaticShape{2}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(this->op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes.front(), StaticShape({3, 1, 5, 1})); @@ -86,9 +84,8 @@ TYPED_TEST_P(ReduceStaticShapeInferenceTest, axes_param_keep_dims) { this->input_shapes = {StaticShape{3, 6, 5, 8}, StaticShape{2}}; int32_t axes_val[] = {1, 3}; - const std::map>& constant_data = { - {1, std::make_shared(element::i32, Shape{2}, axes_val)}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes, constant_data); + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{2}, axes_val}}}; + this->output_shapes = shape_inference(this->op.get(), this->input_shapes, constant_data); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes.front(), StaticShape({3, 1, 5, 1})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/region_yolo_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/region_yolo_shape_inference_test.cpp index d33ff2b1c14232..e42aabcb3a3d9f 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/region_yolo_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/region_yolo_shape_inference_test.cpp @@ -22,7 +22,7 @@ TEST_F(StaticShapeRegionYoloTest, default_ctor_do_soft_max_no_args) { op->set_end_axis(3); input_shapes = ShapeVector{{10, 8, 12, 6}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10, 8, 72})); @@ -33,7 +33,7 @@ TEST_F(StaticShapeRegionYoloTest, data_input_is_dynamic_rank) { op = make_op(data, 0, 0, 0, true, std::vector(), 1, 3); input_shapes = ShapeVector{{2, 2, 3, 4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 24})); @@ -44,7 +44,7 @@ TEST_F(StaticShapeRegionYoloTest, data_input_is_static_rank) { op = make_op(data, 5, 4, 20, false, std::vector{0, 1}, 1, 3); input_shapes = ShapeVector{{2, 5, 6, 7}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 20, 6, 7})); @@ -54,7 +54,7 @@ TEST_F(StaticShapeRegionYoloTest, data_shape_not_compatible_rank_4) { const auto data = std::make_shared(element::f32, PartialShape::dynamic()); op = make_op(data, 5, 4, 20, false, std::vector{0, 1}, 1, 3); - OV_EXPECT_THROW(shape_inference(op.get(), ShapeVector{{2, 20, 12, 24, 1}}, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), ShapeVector({{2, 20, 12, 24, 1}})), NodeValidationFailure, HasSubstr("Input must be a tensor of rank 4, but got")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reorg_yolo_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reorg_yolo_shape_inference_test.cpp index 8c6947fa54c4f8..91651234c80509 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reorg_yolo_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reorg_yolo_shape_inference_test.cpp @@ -20,7 +20,7 @@ TEST_F(StaticShapeReorgYoloTest, default_ctor_no_args) { op->set_strides(3); input_shapes = ShapeVector{{2, 9, 12, 6}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 81, 4, 2})); @@ -31,7 +31,7 @@ TEST_F(StaticShapeReorgYoloTest, data_input_is_dynamic_rank) { op = make_op(data, 2); input_shapes = ShapeVector{{2, 12, 12, 24}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 48, 6, 12})); @@ -42,7 +42,7 @@ TEST_F(StaticShapeReorgYoloTest, data_input_is_static_rank) { op = make_op(data, 2); input_shapes = ShapeVector{{2, 20, 12, 24}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 80, 6, 12})); @@ -52,7 +52,7 @@ TEST_F(StaticShapeReorgYoloTest, data_shape_not_compatible_rank_4) { const auto data = std::make_shared(element::f32, PartialShape::dynamic()); op = make_op(data, 2); - OV_EXPECT_THROW(shape_inference(op.get(), ShapeVector{{2, 20, 12, 24, 1}}, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), ShapeVector({{2, 20, 12, 24, 1}})), NodeValidationFailure, HasSubstr("[N, C, H, W] input shape is required")); } @@ -61,7 +61,7 @@ TEST_F(StaticShapeReorgYoloTest, h_dim_not_div_by_stride) { const auto data = std::make_shared(element::f32, PartialShape::dynamic()); op = make_op(data, 2); - OV_EXPECT_THROW(shape_inference(op.get(), ShapeVector{{2, 20, 11, 24}}, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), ShapeVector{{2, 20, 11, 24}}), NodeValidationFailure, HasSubstr("H and W should be divisible by stride")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reverse_sequence_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reverse_sequence_shape_inference_test.cpp index ca367910fe7169..2ad1978785c427 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reverse_sequence_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reverse_sequence_shape_inference_test.cpp @@ -27,7 +27,7 @@ TEST_F(ReverseSequenceV0StaticShapeInferenceTest, default_batch_seq_axes) { auto op = make_op(data, seq_lengths); input_shapes = ShapeVector{{4, 3, 2}, {4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({4, 3, 2})); } @@ -36,7 +36,7 @@ TEST_F(ReverseSequenceV0StaticShapeInferenceTest, set_batch_seq_axes) { auto op = make_op(data, seq_lengths, -1, 1); input_shapes = ShapeVector{{4, 3, 2}, {2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({4, 3, 2})); } @@ -45,14 +45,14 @@ TEST_F(ReverseSequenceV0StaticShapeInferenceTest, invalid_input_shapes_count) { auto op = make_op(data, seq_lengths); input_shapes = ShapeVector{{1, 2, 4}}; - EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), NodeValidationFailure); + EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure); } TEST_F(ReverseSequenceV0StaticShapeInferenceTest, invalid_data_shape_rank) { auto op = make_op(data, seq_lengths); input_shapes = ShapeVector{{4}, {4}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Data input rank should be equal or greater than 2. Got: ")); } @@ -61,7 +61,7 @@ TEST_F(ReverseSequenceV0StaticShapeInferenceTest, invalid_sequence_shape_rank) { auto op = make_op(data, seq_lengths); input_shapes = ShapeVector{{4, 5, 6}, {2, 2}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Sequence lengths rank must be equal to 1. Got: ")); } @@ -70,7 +70,7 @@ TEST_F(ReverseSequenceV0StaticShapeInferenceTest, default_ctor) { auto op = make_op(); input_shapes = ShapeVector{{11, 2, 3}, {11}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({11, 2, 3})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reverse_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reverse_shape_inference_test.cpp index e5f6313a038471..34b010cf353e8b 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/reverse_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/reverse_shape_inference_test.cpp @@ -28,7 +28,7 @@ TEST_F(ReverseV1StaticShapeInferenceTest, axes_index_as_constant) { auto op = make_op(data, Constant::create(element::i16, Shape{4}, {-1000, 1, 2, 2}), Reverse::Mode::INDEX); input_shapes = ShapeVector{{4, 3, 2, 4}, {4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({4, 3, 2, 4})); } @@ -38,9 +38,8 @@ TEST_F(ReverseV1StaticShapeInferenceTest, axes_index_in_constant_data) { input_shapes = ShapeVector{{4, 3, 2, 4}, {4}}; int8_t axes_val[] = {-1, 2, 1}; - auto const_data = - std::map{{1, std::make_shared(element::i8, Shape{3}, axes_val)}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + auto const_data = std::unordered_map{{1, {element::i8, Shape{3}, axes_val}}}; + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes[0], StaticShape({4, 3, 2, 4})); } @@ -51,7 +50,7 @@ TEST_F(ReverseV1StaticShapeInferenceTest, axes_mask_as_constant) { input_shapes = ShapeVector{{4, 3, 2, 4}, {4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({4, 3, 2, 4})); } @@ -62,9 +61,8 @@ TEST_F(ReverseV1StaticShapeInferenceTest, axes_mask_in_constant_data) { input_shapes = ShapeVector{{4, 3, 2, 4}, {4}}; bool axes_val[] = {true, true, false, false}; - auto const_data = - std::map{{1, std::make_shared(element::boolean, Shape{4}, axes_val)}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + auto const_data = std::unordered_map{{1, {element::boolean, Shape{4}, axes_val}}}; + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes[0], StaticShape({4, 3, 2, 4})); } @@ -73,7 +71,7 @@ TEST_F(ReverseV1StaticShapeInferenceTest, invalid_axes_mask_length) { auto op = make_op(data, Constant::create(element::boolean, Shape{3}, {false, false, true}), Reverse::Mode::MASK); input_shapes = ShapeVector{{1, 2, 4, 3}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The number of elements in the reversed_axes tensor (3) must match the input data tensor " "rank (4) in 'mask' mode")); @@ -83,7 +81,7 @@ TEST_F(ReverseV1StaticShapeInferenceTest, axes_index_out_of_data_rank) { auto op = make_op(data, Constant::create(element::u8, Shape{3}, {0, 20, 3}), Reverse::Mode::INDEX); input_shapes = ShapeVector{{1, 2, 4, 3}, {3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Some of the provided axes (AxisSet{0, 3, 20}) are out of bounds (input rank: 4)")); } @@ -95,9 +93,8 @@ TEST_F(ReverseV1StaticShapeInferenceTest, default_ctor) { input_shapes = ShapeVector{{11, 2, 3}, {3}}; int64_t axes_val[] = {-1, 2, 0}; - auto const_data = - std::map{{1, std::make_shared(element::i64, Shape{3}, axes_val)}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + auto const_data = std::unordered_map{{1, {element::i64, Shape{3}, axes_val}}}; + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes[0], StaticShape({11, 2, 3})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_cell_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_cell_shape_inference_test.cpp index 8427492edf67a0..2bbd022f966638 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_cell_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_cell_shape_inference_test.cpp @@ -32,7 +32,7 @@ TEST_F(RNNCellV0StaticShapeInferenceTest, default_ctor) { StaticShape{gates_count * hidden_size}}; // B std::vector output_shapes; - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } @@ -57,7 +57,7 @@ TEST_F(RNNCellV0StaticShapeInferenceTest, default_bias) { StaticShape{gates_count * hidden_size}}; // B std::vector output_shapes; - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } @@ -80,7 +80,7 @@ TEST_F(RNNCellV0StaticShapeInferenceTest, with_bias) { StaticShape{gates_count * hidden_size, hidden_size}, // R StaticShape{gates_count * hidden_size}}; // B - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } @@ -105,6 +105,6 @@ TEST_F(RNNCellV0StaticShapeInferenceTest, dynamic_rank_inputs) { StaticShape{gates_count * hidden_size}}; // B std::vector output_shapes; - shape_inference(gru.get(), input_shapes, output_shapes); + output_shapes = shape_inference(gru.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, hidden_size})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_seq_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_seq_shape_inference_test.cpp index ef409775ab6766..cd29aa4905ca6d 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_seq_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/rnn_seq_shape_inference_test.cpp @@ -33,7 +33,7 @@ TEST_F(RNNSequenceV5StaticShapeInferenceTest, default_ctor) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } @@ -64,7 +64,7 @@ TEST_F(RNNSequenceV5StaticShapeInferenceTest, FORWARD) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } @@ -95,7 +95,7 @@ TEST_F(RNNSequenceV5StaticShapeInferenceTest, REVERSE) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } @@ -126,7 +126,7 @@ TEST_F(RNNSequenceV5StaticShapeInferenceTest, BIDIRECTIONAL) { StaticShape{num_directions, gates_count * hidden_size, hidden_size}, // R StaticShape{num_directions, gates_count * hidden_size}}; // B - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({batch_size, num_directions, seq_len, hidden_size})); EXPECT_EQ(output_shapes[1], StaticShape({batch_size, num_directions, hidden_size})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_align_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_align_shape_inference_test.cpp index 2e8eca5e286cd0..696c5ebe454df4 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_align_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_align_shape_inference_test.cpp @@ -27,7 +27,7 @@ TYPED_TEST_P(StaticShapeROIAlignTest, default_ctor_no_args) { this->op->set_pooled_w(2); this->input_shapes = ShapeVector{{2, 3, 5, 5}, {7, 4}, {7}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(this->op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes[0], (StaticShape{7, 3, 2, 2})); @@ -41,7 +41,7 @@ TYPED_TEST_P(StaticShapeROIAlignTest, all_inputs_dynamic_rank) { this->op = this->make_op(data, rois, batch_indices, 2, 2, 2, 1.0f, TypeParam::PoolingMode::AVG); this->input_shapes = ShapeVector{{2, 3, 5, 5}, {10, 4}, {10}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(this->op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes[0], (StaticShape{10, 3, 2, 2})); @@ -55,7 +55,7 @@ TYPED_TEST_P(StaticShapeROIAlignTest, all_inputs_static_rank) { this->op = this->make_op(data, rois, batch_indices, 2, 2, 2, 1.0f, TypeParam::PoolingMode::AVG); this->input_shapes = ShapeVector{{2, 8, 5, 5}, {10, 4}, {10}}; - shape_inference(this->op.get(), this->input_shapes, this->output_shapes); + this->output_shapes = shape_inference(this->op.get(), this->input_shapes); EXPECT_EQ(this->output_shapes.size(), 1); EXPECT_EQ(this->output_shapes[0], (StaticShape{10, 8, 2, 2})); @@ -69,7 +69,7 @@ TYPED_TEST_P(StaticShapeROIAlignTest, incompatible_input_rank) { this->op = this->make_op(data, rois, batch_indices, 2, 2, 2, 1.0f, TypeParam::PoolingMode::AVG); this->input_shapes = ShapeVector{{2, 8, 5}, {10, 3}, {10}}; - OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Expected a 4D tensor for the input data")); } @@ -82,7 +82,7 @@ TYPED_TEST_P(StaticShapeROIAlignTest, incompatible_rois_rank) { this->op = this->make_op(data, rois, batch_indices, 2, 2, 2, 1.0f, TypeParam::PoolingMode::AVG); this->input_shapes = ShapeVector{{2, 8, 5, 5}, {10, 3, 1}, {10}}; - OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Expected a 2D tensor for the ROIs input")); } @@ -94,7 +94,7 @@ TYPED_TEST_P(StaticShapeROIAlignTest, incompatible_batch_indicies_rank) { this->op = this->make_op(data, rois, batch_indices, 2, 2, 2, 1.0f, TypeParam::PoolingMode::AVG); this->input_shapes = ShapeVector{{2, 8, 5, 5}, {10, 3}, {}}; - OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("Expected a 1D tensor for the batch indices input.")); } @@ -107,7 +107,7 @@ TYPED_TEST_P(StaticShapeROIAlignTest, invalid_rois_2nd_dim) { this->op = this->make_op(data, rois, batch_indices, 2, 2, 2, 1.0f, TypeParam::PoolingMode::AVG); this->input_shapes = ShapeVector{{2, 8, 5, 5}, {10, 3}, {10}}; - OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes, this->output_shapes), + OV_EXPECT_THROW(shape_inference(this->op.get(), this->input_shapes), NodeValidationFailure, HasSubstr("op dimension is expected to be equal to 4")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp index 0999e278c95ae6..1bae78a4f4a30e 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp @@ -27,7 +27,7 @@ TEST_F(ROIPoolingV0StaticShapeInferenceTest, default_ctor) { input_shapes = ShapeVector{{1, 5, 10, 10}, {2, 5}}; auto shape_infer = make_shape_inference(op); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({2, 5, 3, 3})); @@ -40,7 +40,7 @@ TEST_F(ROIPoolingV0StaticShapeInferenceTest, inputs_dynamic_rank) { op = make_op(feat, rois, ov::Shape{5, 5}, 0.9f); input_shapes = ShapeVector{{2, 3, 100, 100}, {10, 5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10, 3, 5, 5})); @@ -53,7 +53,7 @@ TEST_F(ROIPoolingV0StaticShapeInferenceTest, inputs_static_rank) { op = make_op(feat, rois, ov::Shape{7, 5}, 1.9f, "max"); input_shapes = ShapeVector{{2, 3, 20, 100}, {10, 5}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes.front(), StaticShape({10, 3, 7, 5})); @@ -67,7 +67,7 @@ TEST_F(ROIPoolingV0StaticShapeInferenceTest, invalid_rois_batch_size) { input_shapes = ShapeVector{{2, 3, 20, 100}, {10, 6}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("The second dimension of ROIs input should contain batch id and box coordinates. This " "dimension is expected to be equal to 5")); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/roll_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roll_shape_inference_test.cpp index a507324c18c40b..91370a2ae1d589 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/roll_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roll_shape_inference_test.cpp @@ -1,10 +1,11 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include + #include #include "common_test_utils/test_assertions.hpp" -#include "gmock/gmock.h" #include "openvino/opsets/opset10.hpp" #include "utils.hpp" @@ -29,7 +30,7 @@ TEST_F(RollV7StaticShapeInferenceTest, axes_as_constant) { input_shapes = {StaticShape{3, 5}, StaticShape{2}, StaticShape{2}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], input_shapes[0]); } @@ -41,12 +42,12 @@ TEST_F(RollV7StaticShapeInferenceTest, axes_in_const_map) { const auto op = make_op(arg, shift, axes); auto axes_val = std::array{0, 1, -1}; - const auto constant_data = std::map{ - {2, std::make_shared(element::i32, Shape{axes_val.size()}, axes_val.data())}}; + const auto constant_data = + std::unordered_map{{2, {element::i32, Shape{axes_val.size()}, axes_val.data()}}}; input_shapes = {StaticShape{3, 3, 3}, StaticShape{3}, StaticShape{3}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes[0], input_shapes[0]); } @@ -58,12 +59,12 @@ TEST_F(RollV7StaticShapeInferenceTest, axes_over_arg_rank) { const auto op = make_op(arg, shift, axes); auto axes_val = std::array{0, 3, -1}; - const auto constant_data = std::map{ - {2, std::make_shared(element::i32, Shape{axes_val.size()}, axes_val.data())}}; + const auto constant_data = + std::unordered_map{{2, {element::i32, Shape{axes_val.size()}, axes_val.data()}}}; input_shapes = {StaticShape{3, 3, 3}, StaticShape{3}, StaticShape{3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, constant_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, constant_data), NodeValidationFailure, HasSubstr("Parameter axis 3 out of the tensor rank range")); } @@ -76,12 +77,12 @@ TEST_F(RollV7StaticShapeInferenceTest, axes_has_negative_after_normalization) { const auto op = make_op(arg, shift, axes); auto axes_val = std::array{-4, 2, -1}; - const auto constant_data = std::map{ - {2, std::make_shared(element::i64, Shape{axes_val.size()}, axes_val.data())}}; + const auto constant_data = + std::unordered_map{{2, {element::i64, Shape{axes_val.size()}, axes_val.data()}}}; input_shapes = {StaticShape{3, 3, 3}, StaticShape{3}, StaticShape{3}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, constant_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, constant_data), NodeValidationFailure, HasSubstr(" Parameter axis -4 out of the tensor rank range")); } @@ -90,11 +91,11 @@ TEST_F(RollV7StaticShapeInferenceTest, default_ctor) { const auto op = make_op(); auto axes_val = std::array{-4, 2, -1, 1}; - const auto constant_data = std::map{ - {2, std::make_shared(element::i64, Shape{axes_val.size()}, axes_val.data())}}; + const auto constant_data = + std::unordered_map{{2, {element::i64, Shape{axes_val.size()}, axes_val.data()}}}; input_shapes = {StaticShape{3, 2, 5, 1}, StaticShape{}, StaticShape{4}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes[0], input_shapes[0]); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_elements_update_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_elements_update_shape_inference_test.cpp index 4ea2cf3fef8eb8..599d0a6b61ca1c 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_elements_update_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_elements_update_shape_inference_test.cpp @@ -25,11 +25,10 @@ TEST_F(ScatterElementsUpdateV3StaticShapeInferenceTest, default_ctor) { const auto op = make_op(); int32_t axis = 1; - const auto const_data = - std::map{{3, std::make_shared(element::i32, Shape{1}, &axis)}}; + const auto const_data = std::unordered_map{{3, {element::i32, Shape{1}, &axis}}}; input_shapes = ShapeVector{{1000, 256, 10, 13}, {25, 125, 3, 1}, {25, 125, 3, 1}, {1}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1000, 256, 10, 13})); @@ -44,7 +43,7 @@ TEST_F(ScatterElementsUpdateV3StaticShapeInferenceTest, correct_inputs_axis_as_c const auto op = make_op(d, i, u, a); input_shapes = ShapeVector{{2, 5, 10, 15}, {2, 1, 10, 15}, {2, 1, 10, 15}, {}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({2, 5, 10, 15})); @@ -59,11 +58,10 @@ TEST_F(ScatterElementsUpdateV3StaticShapeInferenceTest, params_are_dynamic_rank_ const auto op = make_op(d, i, u, a); uint32_t axis = 2; - const auto const_data = - std::map{{3, std::make_shared(element::u32, Shape{}, &axis)}}; + const auto const_data = std::unordered_map{{3, {element::u32, Shape{}, &axis}}}; input_shapes = ShapeVector{{5000, 256, 10, 15}, {30, 25, 3, 3}, {30, 25, 3, 3}, {}}; - shape_inference(op.get(), input_shapes, output_shapes, const_data); + const auto output_shapes = shape_inference(op.get(), input_shapes, const_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({5000, 256, 10, 15})); @@ -78,11 +76,10 @@ TEST_F(ScatterElementsUpdateV3StaticShapeInferenceTest, incorrect_axis_value) { const auto op = make_op(d, i, u, a); uint32_t axis = 4; - const auto const_data = - std::map{{3, std::make_shared(element::u32, Shape{}, &axis)}}; + const auto const_data = std::unordered_map{{3, {element::u32, Shape{}, &axis}}}; input_shapes = ShapeVector{{5000, 256, 10, 15}, {30, 25, 3, 3}, {30, 25, 3, 3}, {}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_data), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_data), AssertFailure, HasSubstr("Parameter axis 4 out of the tensor rank range [-4, 3]")); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_nd_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_nd_shape_inference_test.cpp index 3b10cb4a956bc9..2bb4ddb7a731cb 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_nd_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_nd_shape_inference_test.cpp @@ -21,7 +21,7 @@ TEST_F(ScatterNDUpdateV3StaticShapeInferenceTest, default_ctor) { const auto op = make_op(); input_shapes = ShapeVector{{1000, 256, 10, 13}, {25, 125, 3}, {25, 125, 13}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1000, 256, 10, 13})); @@ -35,7 +35,7 @@ TEST_F(ScatterNDUpdateV3StaticShapeInferenceTest, correct_inputs) { const auto op = make_op(d, i, u); input_shapes = ShapeVector{{1000, 256, 10, 15}, {25, 125, 3}, {25, 125, 15}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({1000, 256, 10, 15})); @@ -49,7 +49,7 @@ TEST_F(ScatterNDUpdateV3StaticShapeInferenceTest, params_are_dynamic_rank) { const auto op = make_op(d, i, u); input_shapes = ShapeVector{{5000, 256, 10, 15}, {30, 25, 3}, {30, 25, 15}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], StaticShape({5000, 256, 10, 15})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_update_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_update_shape_inference_test.cpp index d88c8a20f46bb9..0174dadffa3d04 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_update_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/scatter_update_shape_inference_test.cpp @@ -18,14 +18,14 @@ TEST(StaticShapeInferenceTest, ScatterUpdate_3D_axis_1) { auto scatter_update = std::make_shared(data_param, indices_param, updates_param, axis_param); int32_t axis_val[] = {1}; - std::map> constant_data; - constant_data[3] = std::make_shared(element::Type_t::i32, Shape{1}, axis_val); + std::unordered_map constant_data; + constant_data[3] = ov::Tensor(element::Type_t::i32, Shape{1}, axis_val); std::vector input_shapes = {StaticShape{2, 3, 4}, StaticShape{2, 1}, StaticShape{2, 2, 1, 4}, StaticShape{1}}, output_shapes = {StaticShape{}}; - shape_inference(scatter_update.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(scatter_update.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes[0], StaticShape({2, 3, 4})); } @@ -38,14 +38,14 @@ TEST(StaticShapeInferenceTest, ScatterUpdate_4D_axis_2) { auto scatter_update = std::make_shared(data_param, indices_param, updates_param, axis_param); int32_t axis_val[] = {2}; - std::map> constant_data; - constant_data[3] = std::make_shared(element::Type_t::i32, Shape{1}, axis_val); + std::unordered_map constant_data; + constant_data[3] = ov::Tensor(element::Type_t::i32, Shape{1}, axis_val); std::vector input_shapes = {StaticShape{1000, 256, 10, 15}, StaticShape{125, 20}, StaticShape{1000, 125, 20, 10, 15}, StaticShape{1}}, output_shapes = {StaticShape{}}; - shape_inference(scatter_update.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(scatter_update.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes[0], StaticShape({1000, 256, 10, 15})); } @@ -58,14 +58,14 @@ TEST(StaticShapeInferenceTest, ScatterUpdate_4D_incompatible_axis) { auto scatter_update = std::make_shared(data_param, indices_param, updates_param, axis_param); int32_t axis_val[] = {1}; - std::map> constant_data; - constant_data[3] = std::make_shared(element::Type_t::i32, Shape{1}, axis_val); + std::unordered_map constant_data; + constant_data[3] = ov::Tensor(element::Type_t::i32, Shape{1}, axis_val); std::vector input_shapes = {StaticShape{1000, 256, 10, 15}, StaticShape{125, 20}, StaticShape{1000, 125, 20, 10, 15}, StaticShape{1}}, output_shapes = {StaticShape{}}; - shape_inference(scatter_update.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(scatter_update.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes[0], StaticShape({1000, 256, 10, 15})); } @@ -80,9 +80,8 @@ TEST(StaticShapeInferenceTest, ScatterUpdate_axis_as_const) { std::vector input_shapes = {StaticShape{1000, 256, 10, 15}, StaticShape{125, 20}, StaticShape{1000, 125, 20, 10, 15}, - StaticShape{1}}, - output_shapes = {StaticShape{}}; - shape_inference(scatter_update.get(), input_shapes, output_shapes); + StaticShape{1}}; + const auto output_shapes = shape_inference(scatter_update.get(), input_shapes); EXPECT_EQ(output_shapes[0], StaticShape({1000, 256, 10, 15})); } @@ -95,14 +94,14 @@ TEST(StaticShapeInferenceTest, ScatterUpdate_dynamic_rank) { auto scatter_update = std::make_shared(data_param, indices_param, updates_param, axis_param); int32_t axis_val[] = {1}; - std::map> constant_data; - constant_data[3] = std::make_shared(element::Type_t::i32, Shape{1}, axis_val); + std::unordered_map constant_data; + constant_data[3] = ov::Tensor(element::Type_t::i32, Shape{1}, axis_val); std::vector input_shapes = {StaticShape{1000, 256, 10, 15}, StaticShape{125, 20}, StaticShape{1000, 125, 20, 10, 15}, StaticShape{1}}, output_shapes = {StaticShape{}}; - shape_inference(scatter_update.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(scatter_update.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes[0], StaticShape({1000, 256, 10, 15})); } @@ -115,8 +114,8 @@ TEST(StaticShapeInferenceTest, ScatterUpdate_params_dynamic_rank_incorrect_updat auto scatter_update = std::make_shared(data_param, indices_param, updates_param, axis_param); int32_t axis_val[] = {1}; - std::map> constant_data; - constant_data[3] = std::make_shared(element::Type_t::i32, Shape{1}, axis_val); + std::unordered_map constant_data; + constant_data[3] = ov::Tensor(element::Type_t::i32, Shape{1}, axis_val); // Incorrect rank of the third input shape std::vector input_shapes = {StaticShape{1000, 256, 10, 15}, @@ -126,6 +125,6 @@ TEST(StaticShapeInferenceTest, ScatterUpdate_params_dynamic_rank_incorrect_updat output_shapes = {StaticShape{}}; // ScatterUpdate shape_inference is implemented by usage of entryFirstPassthrough, no additional checks - shape_inference(scatter_update.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(scatter_update.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes[0], StaticShape({1000, 256, 10, 15})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/select_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/select_shape_inference_test.cpp index 8242f81777d091..e70a62c6d9e9cd 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/select_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/select_shape_inference_test.cpp @@ -15,35 +15,32 @@ TEST(StaticShapeInferenceTest, SelectTestBCastModeNUMPY) { auto pfalse = std::make_shared(element::f32, PartialShape::dynamic()); auto select = std::make_shared(cond, ptrue, pfalse, op::AutoBroadcastType::NUMPY); { - std::vector static_input_shapes = {StaticShape{}, StaticShape{4}, StaticShape{2, 4}}, - static_output_shapes = {StaticShape{}}; - shape_inference(select.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{}, StaticShape{4}, StaticShape{2, 4}}; + const auto static_output_shapes = shape_inference(select.get(), static_input_shapes); EXPECT_EQ(static_output_shapes[0], StaticShape({2, 4})); } { - std::vector static_input_shapes = {StaticShape{}, StaticShape{2, 4}, StaticShape{2, 4}}, - static_output_shapes = {StaticShape{}}; - shape_inference(select.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{}, StaticShape{2, 4}, StaticShape{2, 4}}; + const auto static_output_shapes = shape_inference(select.get(), static_input_shapes); EXPECT_EQ(static_output_shapes[0], StaticShape({2, 4})); } { - std::vector static_input_shapes = {StaticShape{4}, StaticShape{2, 4}, StaticShape{4}}, - static_output_shapes = {StaticShape{}}; - shape_inference(select.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{4}, StaticShape{2, 4}, StaticShape{4}}; + const auto static_output_shapes = shape_inference(select.get(), static_input_shapes); EXPECT_EQ(static_output_shapes[0], StaticShape({2, 4})); } } + TEST(StaticShapeInferenceTest, SelectTestBCastModePDPD) { auto cond = std::make_shared(element::boolean, PartialShape::dynamic()); auto ptrue = std::make_shared(element::f32, PartialShape::dynamic()); auto pfalse = std::make_shared(element::f32, PartialShape::dynamic()); auto select = std::make_shared(cond, ptrue, pfalse, op::AutoBroadcastSpec{op::AutoBroadcastType::PDPD, 1}); - std::vector static_input_shapes = {StaticShape{4}, StaticShape{2, 4}, StaticShape{4}}, - static_output_shapes = {StaticShape{}}; - shape_inference(select.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{4}, StaticShape{2, 4}, StaticShape{4}}; + const auto static_output_shapes = shape_inference(select.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({2, 4})); } @@ -53,8 +50,7 @@ TEST(StaticShapeInferenceTest, SelectTestBCastModeNone) { auto pfalse = std::make_shared(element::f32, PartialShape::dynamic()); auto select = std::make_shared(cond, ptrue, pfalse, op::AutoBroadcastType::NONE); - std::vector static_input_shapes = {StaticShape{6, 4}, StaticShape{6, 4}, StaticShape{6, 4}}, - static_output_shapes = {StaticShape{}}; - shape_inference(select.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{6, 4}, StaticShape{6, 4}, StaticShape{6, 4}}; + const auto static_output_shapes = shape_inference(select.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({6, 4})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/shape_node_tests.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/shape_node_tests.cpp index 7f2bc62fd1fd11..51379da0339e1a 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/shape_node_tests.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/shape_node_tests.cpp @@ -13,12 +13,10 @@ TEST(StaticShapeInferenceTest, ReshapeTest) { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); auto pattern = std::make_shared(element::i32, Shape{2}, std::vector{0, -1}); - auto reduce = - std::make_shared(data, pattern, true); + auto reduce = std::make_shared(data, pattern, true); - std::vector static_input_shapes = {StaticShape{3, 6, 5, 5}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; - shape_inference(reduce.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{3, 6, 5, 5}, StaticShape{2}}; + const auto static_output_shapes = shape_inference(reduce.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({3, 150})); } @@ -27,12 +25,10 @@ TEST(StaticShapeInferenceTest, ReshapeEmptyTest) { auto data = std::make_shared(element::f32, PartialShape{-1, 2, 2}); auto pattern = std::make_shared(element::i32, Shape{2}, std::vector{0, 4}); - auto reduce = - std::make_shared(data, pattern, false); + auto reduce = std::make_shared(data, pattern, false); - std::vector static_input_shapes = {StaticShape{0, 2, 2}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; - shape_inference(reduce.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{0, 2, 2}, StaticShape{2}}; + const auto static_output_shapes = shape_inference(reduce.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({0, 4})); } @@ -40,12 +36,10 @@ TEST(StaticShapeInferenceTest, ReshapeEmptyTest) { TEST(StaticShapeInferenceTest, ShapeOf5DTest) { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); - auto shapeof = - std::make_shared(data); + auto shapeof = std::make_shared(data); - std::vector static_input_shapes = {StaticShape{2, 3, 4, 5, 6}}, - static_output_shapes = {StaticShape{}}; - shape_inference(shapeof.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{2, 3, 4, 5, 6}}; + const auto static_output_shapes = shape_inference(shapeof.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({5})); } @@ -53,12 +47,10 @@ TEST(StaticShapeInferenceTest, ShapeOf5DTest) { TEST(StaticShapeInferenceTest, ShapeOf0DTest) { auto data = std::make_shared(element::f32, PartialShape{}); - auto shapeof = - std::make_shared(data); + auto shapeof = std::make_shared(data); - std::vector static_input_shapes = {StaticShape{}}, - static_output_shapes = {StaticShape{}}; - shape_inference(shapeof.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{}}; + const auto static_output_shapes = shape_inference(shapeof.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/shuffle_channels_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/shuffle_channels_shape_inference_test.cpp index f9b9b2fdd151bd..41088ff695652d 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/shuffle_channels_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/shuffle_channels_shape_inference_test.cpp @@ -25,7 +25,7 @@ TEST_F(ShuffleChannelsV0StaticShapeInferenceTest, default_ctor) { op->set_group(2); input_shapes = {StaticShape{5, 4, 9}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], input_shapes[0]); @@ -36,7 +36,7 @@ TEST_F(ShuffleChannelsV0StaticShapeInferenceTest, correct_shape_infer) { op = make_op(data, -1, 3); input_shapes = {StaticShape{5, 4, 9}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], input_shapes[0]); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/slice_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/slice_shape_inference_test.cpp index 988ad1b8ec5ea6..3aed5f4d512909 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/slice_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/slice_shape_inference_test.cpp @@ -34,7 +34,7 @@ TEST_F(SliceStaticShapeInferenceTest, reverse_steps_start_stop_outside_dimension input_shapes.push_back({3, 4, 5, max_d, max_d}); input_shapes.resize(4, start->get_shape()); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), num_of_outputs); EXPECT_EQ(output_shapes.front(), StaticShape({3, 2, 5, max_d, 3})); @@ -53,19 +53,19 @@ TEST_F(SliceStaticShapeInferenceTest, reverse_step_on_signle_axis_but_start_stop auto stop_buff = std::vector{2}; auto steps_buff = std::vector{-2}; - const auto start_tensor = std::make_shared(et, Shape{1}, static_cast(start_buff.data())); - const auto stop_tensor = std::make_shared(et, Shape{1}, static_cast(stop_buff.data())); - const auto steps_tensor = std::make_shared(et, Shape{1}, static_cast(steps_buff.data())); + const auto start_tensor = ov::Tensor(element::i64, Shape{1}, static_cast(start_buff.data())); + const auto stop_tensor = ov::Tensor(element::i64, Shape{1}, static_cast(stop_buff.data())); + const auto steps_tensor = ov::Tensor(element::i64, Shape{1}, static_cast(steps_buff.data())); const auto op = make_op(data, start, stop, steps, axes); input_shapes = ShapeVector{{3, 4, 10}, {1}, {1}, {1}, axes->get_shape()}; - const std::map>& constant_data = {{1, start_tensor}, - {2, stop_tensor}, - {3, steps_tensor}}; + const std::unordered_map& constant_data = {{1, start_tensor}, + {2, stop_tensor}, + {3, steps_tensor}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes.size(), num_of_outputs); EXPECT_EQ(output_shapes.front(), StaticShape({3, 4, 4})); @@ -86,22 +86,22 @@ TEST_F(SliceStaticShapeInferenceTest, forward_step_all_data_in_const_map) { const auto common_shape = Shape{start_buff.size()}; - const auto start_tensor = std::make_shared(et, common_shape, static_cast(start_buff.data())); - const auto stop_tensor = std::make_shared(et, common_shape, static_cast(stop_buff.data())); - const auto steps_tensor = std::make_shared(et, common_shape, static_cast(steps_buff.data())); - const auto axes_tensor = std::make_shared(et, common_shape, static_cast(axes_buff.data())); + const auto start_tensor = ov::Tensor(element::i64, common_shape, static_cast(start_buff.data())); + const auto stop_tensor = ov::Tensor(element::i64, common_shape, static_cast(stop_buff.data())); + const auto steps_tensor = ov::Tensor(element::i64, common_shape, static_cast(steps_buff.data())); + const auto axes_tensor = ov::Tensor(element::i64, common_shape, static_cast(axes_buff.data())); const auto op = make_op(data, start, stop, steps); input_shapes.push_back({10, 10, 8, max_d, max_d, max_d, 10}); input_shapes.resize(5, common_shape); - const std::map>& constant_data = {{1, start_tensor}, - {2, stop_tensor}, - {3, steps_tensor}, - {4, axes_tensor}}; + const std::unordered_map& constant_data = {{1, start_tensor}, + {2, stop_tensor}, + {3, steps_tensor}, + {4, axes_tensor}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes.size(), num_of_outputs); EXPECT_EQ(output_shapes.front(), StaticShape({10, 3, 0, 4, max_d, max_d, 3})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/space_to_batch_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/space_to_batch_shape_inference_test.cpp index fd6969e0622983..7fad4a0c4672d7 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/space_to_batch_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/space_to_batch_shape_inference_test.cpp @@ -35,13 +35,12 @@ TEST_F(SpaceToBatchV1StaticShapeInferenceTest, default_ctor) { int32_t pads_begin_val[] = {0, 2, 0, 0, 0}; int32_t pads_end_val[] = {0, 2, 1, 0, 0}; - const auto constant_data = - std::map{{1, std::make_shared(element::i32, Shape{5}, block_val)}, - {2, std::make_shared(element::i32, Shape{5}, pads_begin_val)}, - {3, std::make_shared(element::i32, Shape{5}, pads_end_val)}}; + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{5}, block_val}}, + {2, {element::i32, Shape{5}, pads_begin_val}}, + {3, {element::i32, Shape{5}, pads_end_val}}}; input_shapes = {{2, 32, 64, 128, 256}, {5}, {5}, {5}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2 * 6 * 5 * 16, (32 + 2 + 2) / 6, (64 + 1) / 5, 128, 256 / 16})); } @@ -55,7 +54,7 @@ TEST_F(SpaceToBatchV1StaticShapeInferenceTest, blocks_pads_as_constants) { const auto op = make_op(data, block_shape, pads_begin, pads_end); input_shapes = {{2, 100, 1024, 3}, {4}, {4}, {4}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes[0], (StaticShape{2 * 12 * 100 * 2, (100 + 3 + 5) / 12, (1024 + 38 + 38) / 100, (3 + 1) / 2})); @@ -68,13 +67,12 @@ TEST_F(SpaceToBatchV1StaticShapeInferenceTest, blocks_pads_in_constant_map) { int32_t pads_begin_val[] = {0, 2, 0, 0, 0}; int32_t pads_end_val[] = {0, 2, 1, 0, 0}; - const auto constant_data = - std::map{{1, std::make_shared(element::i32, Shape{5}, block_val)}, - {2, std::make_shared(element::i32, Shape{5}, pads_begin_val)}, - {3, std::make_shared(element::i32, Shape{5}, pads_end_val)}}; + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{5}, block_val}}, + {2, {element::i32, Shape{5}, pads_begin_val}}, + {3, {element::i32, Shape{5}, pads_end_val}}}; input_shapes = {{2, 32, 64, 128, 256}, {5}, {5}, {5}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{2 * 6 * 5 * 16, (32 + 2 + 2) / 6, (64 + 1) / 5, 128, 256 / 16})); } @@ -83,17 +81,16 @@ TEST_F(SpaceToBatchV1StaticShapeInferenceTest, throw_no_data_const_map) { const auto op = make_space_to_batch_dynamic(); input_shapes = {{2, 32, 64, 128, 256}, {5}, {5}, {5}}; - EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), NodeValidationFailure); + EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure); } TEST_F(SpaceToBatchV1StaticShapeInferenceTest, exception_missing_pads_data_in_const_map) { const auto op = make_space_to_batch_dynamic(); int32_t block_val[] = {1, 6, 5, 1, 16}; - const auto constant_data = - std::map{{1, std::make_shared(element::i32, Shape{5}, block_val)}}; + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{5}, block_val}}}; input_shapes = {{2, 32, 64, 128, 256}, {5}, {5}, {5}}; - EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), NodeValidationFailure); + EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/space_to_depth_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/space_to_depth_shape_inference_test.cpp index da8851751ee92c..b65397e0c2a8a0 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/space_to_depth_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/space_to_depth_shape_inference_test.cpp @@ -24,7 +24,7 @@ TEST_F(SpaceToDepthV0StaticShapeInferenceTest, default_ctor) { op->set_block_size(2); input_shapes = {StaticShape{1, 12, 4, 1080, 1616}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{1, 12 * 8, 4 / 2, 1080 / 2, 1616 / 2})); @@ -35,7 +35,7 @@ TEST_F(SpaceToDepthV0StaticShapeInferenceTest, depth_first_block_2) { const auto op = make_op(data, op_type::SpaceToDepthMode::DEPTH_FIRST, 2); input_shapes = {StaticShape{1, 12, 4, 1080, 1616}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 1); EXPECT_EQ(output_shapes[0], (StaticShape{1, 12 * 8, 4 / 2, 1080 / 2, 1616 / 2})); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/split_shape_inference_tests.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/split_shape_inference_tests.cpp index acbe99c6095b55..ccec0099e9a0f3 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/split_shape_inference_tests.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/split_shape_inference_tests.cpp @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "gmock/gmock.h" +#include + #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/split.hpp" @@ -58,7 +59,7 @@ TEST_P(SplitStaticShapeInferenceTest, shape_inference_empty_const_map) { const auto axis_node = std::make_shared(element::i64, Shape{}, axis); op = make_op(arg, axis_node, num_of_splits); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), num_of_splits); EXPECT_THAT(output_shapes, Each(exp_shape)); @@ -68,11 +69,10 @@ TEST_P(SplitStaticShapeInferenceTest, shape_inference_with_const_map) { const auto axis_node = std::make_shared(element::i64, Shape{}); op = make_op(arg, axis_node, num_of_splits); - const auto axis_const = std::make_shared(element::i64, ov::Shape{}, axis); - const auto axis_tensor = std::make_shared(axis_const); - const std::map>& constant_data = {{1, axis_tensor}}; + const auto axis_tensor = ov::Tensor(element::i64, ov::Shape{}, &axis); + const auto constant_data = std::unordered_map{{1, axis_tensor}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); ASSERT_EQ(output_shapes.front(), exp_shape); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/squeeze_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/squeeze_shape_inference_test.cpp index 72662af0cc887d..bd42a19aa70445 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/squeeze_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/squeeze_shape_inference_test.cpp @@ -2,8 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include "common_test_utils/test_assertions.hpp" -#include "gmock/gmock.h" #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/squeeze.hpp" @@ -28,7 +29,7 @@ TEST_F(SqueezeStaticShapeInferenceAssertTest, no_axes) { input_shapes = ShapeVector{{5, 6}, axes->get_shape()}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Check 'constant != nullptr'")); } @@ -40,7 +41,7 @@ TEST_F(SqueezeStaticShapeInferenceAssertTest, parameter_static_shape_axes_no_dat input_shapes = ShapeVector{arg->get_shape(), axes->get_shape()}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Check 'constant != nullptr'")); } @@ -100,7 +101,7 @@ TEST_P(SqueezeStaticShapeInferenceTest, shape_inference_empty_const_map) { const auto axes_node = std::make_shared(element::i64, Shape{axes.size()}, axes); const auto op = make_op(arg, axes_node); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes.front(), exp_shape); } @@ -109,11 +110,11 @@ TEST_P(SqueezeStaticShapeInferenceTest, shape_inference_with_const_map) { const auto axes_node = std::make_shared(element::i64, Shape{1}); const auto op = make_op(arg, axes_node); - const auto axes_const = std::make_shared(element::i64, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map>& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = axes.empty() ? ov::Tensor(element::i64, ov::Shape{axes.size()}) + : ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const auto constant_data = std::unordered_map{{1, axes_tensor}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); ASSERT_EQ(output_shapes.front(), exp_shape); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/strided_slice_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/strided_slice_shape_inference_test.cpp index edcff1d0a85e23..6dda651db4d4f6 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/strided_slice_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/strided_slice_shape_inference_test.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" @@ -28,12 +29,12 @@ TEST_F(StridedSliceStaticShapeInferenceTest, reverse_stride_begin_end_clip_to_di const auto op = make_op(data, begin, end, stride, mask, mask); - check_static_shape(op.get(), - {StaticShape{3, 4, 5}, StaticShape{3}, StaticShape{3}, StaticShape{3}}, - {StaticShape{3, 4, 5}}); + input_shapes = ShapeVector{{3, 4, 5}, {3}, {3}, {3}}; + output_shapes = shape_inference(op.get(), input_shapes); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{3, 4, 5})); } -TEST_F(StridedSliceStaticShapeInferenceTest, use_begin_end) { +TEST_F(StridedSliceStaticShapeInferenceTest, use_begin_end_variant_1) { const auto mask = std::vector(4, 0); const auto data = std::make_shared(element::f32, ov::PartialShape::dynamic()); @@ -43,11 +44,60 @@ TEST_F(StridedSliceStaticShapeInferenceTest, use_begin_end) { const auto op = make_op(data, begin, end, stride, mask, mask); - check_static_shape(op.get(), {StaticShape{3, 2, 3}, {1, 0, 0}, {2, 1, 3}, {1, 1, 1}}, {StaticShape{1, 1, 3}}); + int64_t begin_v[] = {1, 0, 0}; + int64_t end_v[] = {2, 1, 3}; + int64_t stride_v[] = {1, 1, 1}; + const auto const_data = std::unordered_map{{1, {element::i64, ov::Shape{3}, begin_v}}, + {2, {element::i64, ov::Shape{3}, end_v}}, + {3, {element::i64, ov::Shape{3}, stride_v}}}; + input_shapes = ShapeVector{{3, 2, 3}, {3}, {3}, {3}}; + output_shapes = shape_inference(op.get(), input_shapes, const_data); - check_static_shape(op.get(), {StaticShape{3, 2, 3}, {1, 0, 0}, {2, 2, 3}, {1, 1, 1}}, {StaticShape{1, 2, 3}}); + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{1, 1, 3})); +} + +TEST_F(StridedSliceStaticShapeInferenceTest, use_begin_end_variant_2) { + const auto mask = std::vector(4, 0); + + const auto data = std::make_shared(element::f32, ov::PartialShape::dynamic()); + const auto begin = std::make_shared(element::i64, Shape{3}); + const auto end = std::make_shared(element::i64, Shape{3}); + const auto stride = std::make_shared(element::i64, Shape{3}); + + const auto op = make_op(data, begin, end, stride, mask, mask); + + int64_t begin_v[] = {1, 0, 0}; + int64_t end_v[] = {2, 2, 3}; + int64_t stride_v[] = {1, 1, 1}; + const auto const_data = std::unordered_map{{1, {element::i64, ov::Shape{3}, begin_v}}, + {2, {element::i64, ov::Shape{3}, end_v}}, + {3, {element::i64, ov::Shape{3}, stride_v}}}; + input_shapes = ShapeVector{{3, 2, 3}, {3}, {3}, {3}}; + output_shapes = shape_inference(op.get(), input_shapes, const_data); + + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{1, 2, 3})); +} - check_static_shape(op.get(), {StaticShape{3, 2, 3}, {2, 0, 0}, {3, 2, 3}, {1, 1, 2}}, {StaticShape{1, 2, 2}}); +TEST_F(StridedSliceStaticShapeInferenceTest, use_begin_end_variant_3) { + const auto mask = std::vector(4, 0); + + const auto data = std::make_shared(element::f32, ov::PartialShape::dynamic()); + const auto begin = std::make_shared(element::i64, Shape{3}); + const auto end = std::make_shared(element::i64, Shape{3}); + const auto stride = std::make_shared(element::i64, Shape{3}); + + const auto op = make_op(data, begin, end, stride, mask, mask); + + int64_t begin_v[] = {2, 0, 0}; + int64_t end_v[] = {3, 2, 3}; + int64_t stride_v[] = {1, 1, 2}; + const auto const_data = std::unordered_map{{1, {element::i64, ov::Shape{3}, begin_v}}, + {2, {element::i64, ov::Shape{3}, end_v}}, + {3, {element::i64, ov::Shape{3}, stride_v}}}; + input_shapes = ShapeVector{{3, 2, 3}, {3}, {3}, {3}}; + output_shapes = shape_inference(op.get(), input_shapes, const_data); + + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{1, 2, 2})); } TEST_F(StridedSliceStaticShapeInferenceTest, ignore_begin_end) { @@ -61,7 +111,16 @@ TEST_F(StridedSliceStaticShapeInferenceTest, ignore_begin_end) { const auto op = make_op(data, begin, end, stride, begin_mask, end_mask); - check_static_shape(op.get(), {StaticShape{3, 2, 3}, {1, 0, 0}, {0, 0, 0}, {1, 1, 1}}, {StaticShape{2, 2, 3}}); + int64_t begin_v[] = {1, 0, 0}; + int64_t end_v[] = {0, 0, 0}; + int64_t stride_v[] = {1, 1, 1}; + const auto const_data = std::unordered_map{{1, {element::i64, ov::Shape{3}, begin_v}}, + {2, {element::i64, ov::Shape{3}, end_v}}, + {3, {element::i64, ov::Shape{3}, stride_v}}}; + input_shapes = ShapeVector{{3, 2, 3}, {3}, {3}, {3}}; + output_shapes = shape_inference(op.get(), input_shapes, const_data); + + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{2, 2, 3})); } TEST_F(StridedSliceStaticShapeInferenceTest, ignore_begin_end_stride_by_two_last_dim) { @@ -75,7 +134,16 @@ TEST_F(StridedSliceStaticShapeInferenceTest, ignore_begin_end_stride_by_two_last auto op = make_op(data, begin, end, stride, begin_mask, end_mask); - check_static_shape(op.get(), {StaticShape{3, 2, 3}, {0, 1, 0}, {2, 0, 0}, {1, 1, 2}}, {StaticShape{2, 1, 2}}); + int64_t begin_v[] = {0, 1, 0}; + int64_t end_v[] = {2, 0, 0}; + int64_t stride_v[] = {1, 1, 2}; + const auto const_data = std::unordered_map{{1, {element::i64, ov::Shape{3}, begin_v}}, + {2, {element::i64, ov::Shape{3}, end_v}}, + {3, {element::i64, ov::Shape{3}, stride_v}}}; + input_shapes = ShapeVector{{3, 2, 3}, {3}, {3}, {3}}; + output_shapes = shape_inference(op.get(), input_shapes, const_data); + + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{2, 1, 2})); } TEST_F(StridedSliceStaticShapeInferenceTest, use_reverse_stride_on_last_dimension) { @@ -88,7 +156,16 @@ TEST_F(StridedSliceStaticShapeInferenceTest, use_reverse_stride_on_last_dimensio const auto op = make_op(data, begin, end, stride, mask, mask); - check_static_shape(op.get(), {StaticShape{3, 2, 3}, {0, 0, 0}, {1, 0, 0}, {1, 1, -1}}, {StaticShape{1, 2, 3}}); + int64_t begin_v[] = {0, 0, 0}; + int64_t end_v[] = {1, 0, 0}; + int64_t stride_v[] = {1, 1, -1}; + const auto const_data = std::unordered_map{{1, {element::i64, ov::Shape{3}, begin_v}}, + {2, {element::i64, ov::Shape{3}, end_v}}, + {3, {element::i64, ov::Shape{3}, stride_v}}}; + input_shapes = ShapeVector{{3, 2, 3}, {3}, {3}, {3}}; + output_shapes = shape_inference(op.get(), input_shapes, const_data); + + EXPECT_THAT(output_shapes, ElementsAre(StaticShape{1, 2, 3})); } TEST_F(StridedSliceStaticShapeInferenceTest, default_stride) { @@ -101,7 +178,7 @@ TEST_F(StridedSliceStaticShapeInferenceTest, default_stride) { input_shapes = ShapeVector{{3, 2, 3}, {3}, {3}}; - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes.front(), StaticShape({1, 2, 2})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/tile_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/tile_shape_inference_test.cpp index 1b793c9133bfa0..8b232bb7deb04f 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/tile_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/tile_shape_inference_test.cpp @@ -14,15 +14,13 @@ TEST(StaticShapeInferenceTest, TileTest) { auto param1 = std::make_shared(element::i64, ov::Shape{3}, std::vector{3, 4, 1}); auto tile = std::make_shared(param0, param1); // Test Static Shape - std::vector static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{3}}, - static_output_shapes = {StaticShape{}}; - shape_inference(tile.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{3}}; + const auto static_output_shapes = shape_inference(tile.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({18, 32, 10})); // Test Wrong Static Shape - std::vector wrong_static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{}}, - wrong_static_output_shapes = {StaticShape{}}; + std::vector wrong_static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{}}; - ASSERT_THROW(shape_inference(tile.get(), wrong_static_input_shapes, wrong_static_output_shapes), ov::AssertFailure); + ASSERT_THROW(shape_inference(tile.get(), wrong_static_input_shapes), ov::AssertFailure); } TEST(StaticShapeInferenceTest, TileFewRepeatsTest) { @@ -30,9 +28,8 @@ TEST(StaticShapeInferenceTest, TileFewRepeatsTest) { auto param1 = ov::op::v0::Constant::create(element::i64, Shape{2}, {4, 1}); auto tile = std::make_shared(param0, param1); // Test Static Shape - std::vector static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{2}}, - static_output_shapes = {StaticShape{}}; - shape_inference(tile.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{2}}; + const auto static_output_shapes = shape_inference(tile.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({6, 32, 10})); } @@ -41,9 +38,8 @@ TEST(StaticShapeInferenceTest, TileSmallDataRankTest) { auto param1 = ov::op::v0::Constant::create(element::i64, Shape{3}, {3, 4, 1}); auto tile = std::make_shared(param0, param1); // Test Static Shape - std::vector static_input_shapes = {StaticShape{8, 10}, StaticShape{3}}, - static_output_shapes = {StaticShape{}}; - shape_inference(tile.get(), static_input_shapes, static_output_shapes); + std::vector static_input_shapes = {StaticShape{8, 10}, StaticShape{3}}; + const auto static_output_shapes = shape_inference(tile.get(), static_input_shapes); ASSERT_EQ(static_output_shapes[0], StaticShape({3, 32, 10})); } @@ -53,12 +49,11 @@ TEST(StaticShapeInferenceTest, TileSmallDataRankTestRepeatsInConstMap) { auto tile = std::make_shared(param0, param1); int32_t repeats[] = {3, 4, 1}; - const std::map>& constant_data = { - {1, std::make_shared(element::i32, Shape{3}, repeats)}}; + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{3}, repeats}}}; // Test Static Shape ShapeVector input_shapes = {StaticShape{8, 10}, StaticShape{3}}, output_shapes = {StaticShape{}}; - shape_inference(tile.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(tile.get(), input_shapes, constant_data); ASSERT_EQ(output_shapes.front(), StaticShape({3, 32, 10})); } @@ -86,8 +81,7 @@ TEST(StaticShapeInferenceTest, TileNewApiInputsStaticRank) { auto tile = std::make_shared(param0, param1); int32_t repeats[] = {3, 4, 1, 2}; - const std::map>& constant_data = { - {1, std::make_shared(element::i32, Shape{4}, repeats)}}; + const auto constant_data = std::unordered_map{{1, {element::i32, Shape{4}, repeats}}}; auto dims = std::vector{{8, 10}, {4}}; auto in_shapes = std::vector(dims.begin(), dims.end()); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/topk_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/topk_shape_inference_test.cpp index 7ef0658cb28093..9f1ed780153cfa 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/topk_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/topk_shape_inference_test.cpp @@ -2,8 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include "common_test_utils/test_assertions.hpp" -#include "gmock/gmock.h" #include "openvino/opsets/opset10.hpp" #include "topk_shape_inference.hpp" #include "utils.hpp" @@ -46,10 +47,9 @@ TEST_F(TopKV1AssertStaticShapeInferenceTest, k_is_negative) { output_shapes = ShapeVector(2); int64_t k = -2; - const auto const_map = - std::map{{1, std::make_shared(element::i64, Shape{}, &k)}}; + const auto const_map = std::unordered_map{{1, {element::i64, Shape{}, &k}}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_map), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_map), ov::AssertFailure, HasSubstr("The value of 'K' must be greater or equal to zero. (got " + std::to_string(k) + ")")); } @@ -63,7 +63,7 @@ TEST_P(TopKV1Test, no_constant_map) { const auto op = make_op(data, k_node, axis, "max", "value"); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 2); EXPECT_THAT(output_shapes, Each(exp_shape)); @@ -75,7 +75,7 @@ TEST_P(TopKV1Test, k_as_param_no_const_map) { const auto op = make_op(data, k_node, axis, "min", "value"); - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Static shape inference lacks constant data on port 1")); } @@ -84,12 +84,11 @@ TEST_P(TopKV1Test, k_as_param_in_const_map) { const auto data = std::make_shared(element::f32, PartialShape::dynamic()); const auto k_node = std::make_shared(element::i64, PartialShape::dynamic()); - const auto const_map = - std::map{{1, std::make_shared(element::i64, Shape{}, &k)}}; + const auto const_map = std::unordered_map{{1, {element::i64, Shape{}, &k}}}; const auto op = make_op(data, k_node, axis, "min", "value"); - shape_inference(op.get(), input_shapes, output_shapes, const_map); + output_shapes = shape_inference(op.get(), input_shapes, const_map); EXPECT_EQ(output_shapes.size(), 2); EXPECT_THAT(output_shapes, Each(exp_shape)); @@ -109,10 +108,9 @@ TEST_F(TopKV3AssertStaticShapeInferenceTest, k_is_negative) { output_shapes = ShapeVector(2); int64_t k = -2; - const auto const_map = - std::map{{1, std::make_shared(element::i64, Shape{}, &k)}}; + const auto const_map = std::unordered_map{{1, {element::i64, Shape{}, &k}}}; - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes, const_map), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, const_map), ov::AssertFailure, HasSubstr("The value of 'K' must be greater or equal to zero. (got " + std::to_string(k) + ")")); } @@ -126,7 +124,7 @@ TEST_P(TopKV3Test, k_as_constant) { const auto op = make_op(data, k_node, axis, "min", "value"); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), 2); EXPECT_THAT(output_shapes, Each(exp_shape)); @@ -138,7 +136,7 @@ TEST_P(TopKV3Test, k_as_param_no_const_map) { const auto op = make_op(data, k_node, axis, "min", "value"); - OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes), NodeValidationFailure, HasSubstr("Static shape inference lacks constant data on port 1")); } @@ -147,12 +145,11 @@ TEST_P(TopKV3Test, k_as_param_in_const_map) { const auto data = std::make_shared(element::f32, PartialShape::dynamic()); const auto k_node = std::make_shared(element::i64, PartialShape::dynamic()); - const auto const_map = - std::map{{1, std::make_shared(element::i64, Shape{}, &k)}}; + const auto const_map = std::unordered_map{{1, {element::i64, Shape{}, &k}}}; const auto op = make_op(data, k_node, axis, "max", "value"); - shape_inference(op.get(), input_shapes, output_shapes, const_map); + output_shapes = shape_inference(op.get(), input_shapes, const_map); EXPECT_EQ(output_shapes.size(), 2); EXPECT_THAT(output_shapes, Each(exp_shape)); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/transpose_shape_infernece_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/transpose_shape_infernece_test.cpp index b905d641d01c93..4461ab3e7b80ec 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/transpose_shape_infernece_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/transpose_shape_infernece_test.cpp @@ -68,9 +68,7 @@ INSTANTIATE_TEST_SUITE_P( /** \brief Check shape_infer for transpose on static shapes. */ TEST_P(StaticShapeInferenceTest, transpose_static) { - auto output_shapes = std::vector{StaticShape{}}; - - shape_inference(transpose.get(), {input_shape, transpose_order}, output_shapes); + auto output_shapes = shape_inference(transpose.get(), {input_shape, transpose_order}); ASSERT_EQ(output_shapes[op::v1::Transpose::ARG_T], exp_shape); } @@ -81,9 +79,7 @@ TEST(StaticShapeInferenceTest, transpose_input_shape_dim_dynamic) { const auto order = std::vector{1, 2, 0}; const auto transpose = make_transpose(input_shape, order); - auto output_shapes = std::vector{StaticShape{}}; - - shape_inference(transpose.get(), {StaticShape{2, 6, 3}, order}, output_shapes); + auto output_shapes = shape_inference(transpose.get(), {StaticShape{2, 6, 3}, order}); ASSERT_EQ(output_shapes[op::v1::Transpose::ARG_T], StaticShape({6, 3, 2})); } @@ -95,13 +91,12 @@ TEST(StaticShapeInferenceTest, transpose_order_in_constant_map) { const auto transpose = std::make_shared(input, order); - const auto axes_order = std::vector{1, 2, 0, 3}; - const auto axes = std::make_shared(element::i64, ov::Shape{axes_order.size()}, axes_order); - const auto const_tensor = std::make_shared(axes); - const std::map> const_map = {{1, const_tensor}}; + auto axes_order = std::vector{1, 2, 0, 3}; + const auto const_tensor = ov::Tensor(element::i64, ov::Shape{axes_order.size()}, axes_order.data()); + const std::unordered_map const_map = {{1, const_tensor}}; auto output_shapes = std::vector{StaticShape{}}; - shape_inference(transpose.get(), {StaticShape({2, 4, 6, 8}), StaticShape()}, output_shapes, const_map); + output_shapes = shape_inference(transpose.get(), {StaticShape({2, 4, 6, 8}), StaticShape()}, const_map); ASSERT_EQ(output_shapes[op::v1::Transpose::ARG_T], StaticShape({4, 6, 2, 8})); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/unsqueeze_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/unsqueeze_shape_inference_test.cpp index 83ec4002797c2e..fbb086f93be8af 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/unsqueeze_shape_inference_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/unsqueeze_shape_inference_test.cpp @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "gmock/gmock.h" +#include + #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/unsqueeze.hpp" @@ -28,7 +29,7 @@ TEST_F(UnsqueezeStaticShapeInferenceAssertTest, no_axes) { input_shapes = ShapeVector{{5, 6}, axes->get_shape()}; try { - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); FAIL() << "Axes nullptr not detected"; } catch (const NodeValidationFailure& error) { EXPECT_THAT(error.what(), HasSubstr("Check 'constant != nullptr'")); @@ -111,7 +112,7 @@ TEST_P(UnsqueezeStaticShapeInferenceTest, shape_inference_empty_const_map) { const auto axes_node = std::make_shared(element::i64, Shape{axes.size()}, axes); op = std::make_shared(arg, axes_node); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); ASSERT_EQ(output_shapes.front(), exp_shape); } @@ -120,11 +121,10 @@ TEST_P(UnsqueezeStaticShapeInferenceTest, shape_inference_with_const_map) { const auto axes_node = std::make_shared(element::i64, Shape{1}); op = std::make_shared(arg, axes_node); - const auto axes_const = std::make_shared(element::i64, ov::Shape{axes.size()}, axes); - const auto axes_tensor = std::make_shared(axes_const); - const std::map>& constant_data = {{1, axes_tensor}}; + const auto axes_tensor = ov::Tensor(element::i64, ov::Shape{axes.size()}, axes.data()); + const auto constant_data = std::unordered_map{{1, axes_tensor}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); ASSERT_EQ(output_shapes.front(), exp_shape); } diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.cpp new file mode 100644 index 00000000000000..ccc4b1898e593c --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "utils.hpp" + +namespace ov { +namespace intel_cpu { +std::vector make_static_shape_refs(const ShapeVector& shapes) { + std::vector out; + out.reserve(shapes.size()); + for (auto& s : shapes) { + out.emplace_back(s); + } + return out; +} + +ShapeVector shape_inference(ov::Node* op, + const ShapeVector& input_shapes, + const std::unordered_map& constant_data) { + const auto in_shapes = intel_cpu::make_static_shape_refs(input_shapes); + const auto shape_infer = intel_cpu::make_shape_inference(op->shared_from_this()); + auto result = shape_infer->infer(in_shapes, make_tensor_accessor(constant_data)); + OPENVINO_ASSERT(result, "There are no output shapes in shape inference result"); + return *result; +} +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.hpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.hpp index a7cf2cbd62c98a..0cbe8685af77b0 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.hpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/utils.hpp @@ -10,114 +10,16 @@ #include "shape_inference/shape_inference.hpp" #include "shape_inference/static_shape.hpp" -using ShapeVector = std::vector; namespace ov { namespace intel_cpu { -namespace { -std::vector make_static_shape_refs(const ShapeVector& shapes) { - std::vector out; - out.reserve(shapes.size()); - for (auto& s : shapes) { - out.emplace_back(s); - } - return out; -} -} // namespace - -template -void shape_inference(ov::Node* op, - const std::vector& input_shapes, - std::vector& output_shapes, - const std::map& constant_data = {}) { - const auto in_shapes = make_static_shape_refs(input_shapes); - const auto shape_infer = make_shape_inference(op->shared_from_this()); - auto result = shape_infer->infer(in_shapes, ov::make_tensor_accessor(constant_data)); - OPENVINO_ASSERT(result, "There are no output shapes in shape inference result"); - output_shapes = std::move(*result); -} - -template > -ShapeVector shape_inference(ov::Node* op, const ShapeVector& input_shapes, const T& constant_data = T{}) { - const auto in_shapes = intel_cpu::make_static_shape_refs(input_shapes); - const auto shape_infer = intel_cpu::make_shape_inference(op->shared_from_this()); - auto result = shape_infer->infer(in_shapes, make_tensor_accessor(constant_data)); - OPENVINO_ASSERT(result, "There are no output shapes in shape inference result"); - return *result; -} -} // namespace intel_cpu -} // namespace ov - -struct TestTensor { - std::shared_ptr tensor; - ov::intel_cpu::StaticShape static_shape; - - template - TestTensor(std::initializer_list values) : TestTensor(ov::intel_cpu::StaticShape({values.size()}), values) {} - - template - TestTensor(T scalar) : TestTensor(ov::intel_cpu::StaticShape({}), {scalar}) {} - - TestTensor(ov::intel_cpu::StaticShape shape) : static_shape(shape) {} - - template - TestTensor(ov::intel_cpu::StaticShape shape, std::initializer_list values) { - static_shape = shape; - - ov::Shape s; - for (auto dim : shape) - s.push_back(dim); - if (values.size() > 0) { - tensor = std::make_shared(ov::element::from(), s); - T* ptr = tensor->get_data_ptr(); - int i = 0; - for (auto& v : values) - ptr[i++] = v; - } - } -}; - -// TestTensor can be constructed from initializer_list/int64_t/Shape/Shape+initializer_list -// so each element of inputs can be: -// {1,2,3,4} tensor of shape [4] and values (1,2,3,4) -// 2 tensor of scalar with value 2 -// Shape{2,2} tensor of shape [2,2] and value unknown -// {Shape{2,2}, {1,2,3,4}} tensor of shape [2,2] and values (1,2,3,4) -inline void check_static_shape(ov::Node* op, - std::initializer_list inputs, - std::initializer_list expect_shapes) { - std::vector output_shapes; - std::vector input_shapes; - std::map> constData; - - int index = 0; - std::for_each(inputs.begin(), inputs.end(), [&](TestTensor t) { - input_shapes.push_back(t.static_shape); - if (t.tensor) - constData[index] = t.tensor; - index++; - }); - - output_shapes.resize(expect_shapes.size(), ov::intel_cpu::StaticShape{}); - - shape_inference(op, input_shapes, output_shapes, constData); +using ShapeVector = std::vector; - EXPECT_EQ(output_shapes.size(), expect_shapes.size()); - int id = 0; - for (auto& shape : expect_shapes) { - EXPECT_EQ(output_shapes[id], shape); - id++; - } -} +std::vector make_static_shape_refs(const ShapeVector& shapes); -inline void check_output_shape(ov::Node* op, std::initializer_list expect_shapes) { - int id = 0; - EXPECT_EQ(op->outputs().size(), expect_shapes.size()); - for (auto& shape : expect_shapes) { - EXPECT_EQ(op->get_output_partial_shape(id), shape); - id++; - } -} +ShapeVector shape_inference(ov::Node* op, + const ShapeVector& input_shapes, + const std::unordered_map& constant_data = {}); template class OpStaticShapeInferenceTest : public testing::Test { @@ -133,3 +35,6 @@ class OpStaticShapeInferenceTest : public testing::Test { return std::make_shared(std::forward(args)...); } }; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/variadic_split_shape_inference_tests.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/variadic_split_shape_inference_tests.cpp index a8840bc3838c09..d8c5b95f5d3c62 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/variadic_split_shape_inference_tests.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/variadic_split_shape_inference_tests.cpp @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "gmock/gmock.h" +#include + #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/variadic_split.hpp" @@ -74,7 +75,7 @@ TEST_P(VariadicSplitStaticShapeInferenceTest, shape_inference_empty_const_map) { std::make_shared(element::i64, Shape{split_lengths.size()}, split_lengths); op = make_op(data, axis_node, split_len_node); - shape_inference(op.get(), input_shapes, output_shapes); + output_shapes = shape_inference(op.get(), input_shapes); EXPECT_EQ(output_shapes.size(), split_lengths.size()); EXPECT_EQ(output_shapes, exp_shapes); @@ -86,11 +87,10 @@ TEST_P(VariadicSplitStaticShapeInferenceTest, shape_inference_axis_in_const_map) std::make_shared(element::i64, Shape{split_lengths.size()}, split_lengths); op = make_op(data, axis_node, split_len_node); - const auto axis_const = std::make_shared(element::i64, ov::Shape{}, axis); - const auto axis_tensor = std::make_shared(axis_const); - const std::map>& constant_data = {{1, axis_tensor}}; + const auto axis_tensor = ov::Tensor(element::i64, ov::Shape{}, &axis); + const auto constant_data = std::unordered_map{{1, axis_tensor}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes.size(), split_lengths.size()); EXPECT_EQ(output_shapes, exp_shapes); @@ -101,16 +101,12 @@ TEST_P(VariadicSplitStaticShapeInferenceTest, shape_inference_all_const_in_map) const auto split_len_node = std::make_shared(element::i64, ov::PartialShape::dynamic()); op = make_op(data, axis_node, split_len_node); - const auto axis_const = std::make_shared(element::i64, Shape{}, axis); - const auto axis_tensor = std::make_shared(axis_const); - const auto split_len_const = - std::make_shared(element::i64, Shape{split_lengths.size()}, split_lengths); - const auto split_len_tensor = std::make_shared(split_len_const); + const auto axis_tensor = ov::Tensor(element::i64, Shape{}, &axis); + const auto split_len_tensor = ov::Tensor(element::i64, Shape{split_lengths.size()}, split_lengths.data()); - const std::map>& constant_data = {{2, split_len_tensor}, - {1, axis_tensor}}; + const auto constant_data = std::unordered_map{{2, split_len_tensor}, {1, axis_tensor}}; - shape_inference(op.get(), input_shapes, output_shapes, constant_data); + output_shapes = shape_inference(op.get(), input_shapes, constant_data); EXPECT_EQ(output_shapes.size(), split_lengths.size()); EXPECT_EQ(output_shapes, exp_shapes); From 693c6d7a11d75c35b37398f8eb9e04a7e22acd10 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 12 Sep 2023 13:26:45 +0200 Subject: [PATCH 30/31] Migrate the Abs operator to new API (#19763) --- src/core/include/openvino/op/abs.hpp | 4 +- .../include/openvino/reference/abs.hpp | 31 +++++-- .../include/openvino/reference/reduce_l1.hpp | 13 +-- src/core/src/op/abs.cpp | 90 +++++++++---------- 4 files changed, 66 insertions(+), 72 deletions(-) diff --git a/src/core/include/openvino/op/abs.hpp b/src/core/include/openvino/op/abs.hpp index cb9cf281dd8a6a..4c255d3e625fb3 100644 --- a/src/core/include/openvino/op/abs.hpp +++ b/src/core/include/openvino/op/abs.hpp @@ -30,9 +30,7 @@ class OPENVINO_API Abs : public util::UnaryElementwiseArithmetic { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v0 diff --git a/src/core/reference/include/openvino/reference/abs.hpp b/src/core/reference/include/openvino/reference/abs.hpp index d4163f3e8840df..c58e70c5b4de02 100644 --- a/src/core/reference/include/openvino/reference/abs.hpp +++ b/src/core/reference/include/openvino/reference/abs.hpp @@ -7,19 +7,32 @@ #include #include +#include "openvino/reference/utils/type_util.hpp" + namespace ov { namespace reference { -template ::value, bool>::type = true> -void abs(const T* arg, T* out, size_t count) { - std::copy(arg, arg + count, out); +namespace func { +template ::value>::type* = nullptr> +constexpr T abs(const T num) { + return num; +} + +template ::value || ov::is_floating_point()>::type* = nullptr> +T abs(const T num) { + return std::abs(num); } +} // namespace func -template ::value, bool>::type = true> -void abs(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - // TODO: generic "abs" doesn't work here for some reason. - out[i] = (arg[i] < T(0) ? T(-arg[i]) : arg[i]); - } +/** + * @brief Reference implementation of Abs operator. + * + * @param in Input pointer to data. + * @param out Output pointer to results. + * @param count Number of elements in input buffer. + */ +template +void abs(const T* in, T* out, const size_t count) { + std::transform(in, std::next(in, count), out, &func::abs); } } // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/reduce_l1.hpp b/src/core/reference/include/openvino/reference/reduce_l1.hpp index aede07b693452f..a6c41f20a0f16f 100644 --- a/src/core/reference/include/openvino/reference/reduce_l1.hpp +++ b/src/core/reference/include/openvino/reference/reduce_l1.hpp @@ -7,6 +7,7 @@ #include #include +#include "openvino/reference/abs.hpp" #include "openvino/reference/sum.hpp" #include "openvino/reference/utils/type_util.hpp" #include "shape_util.hpp" @@ -14,18 +15,6 @@ namespace ov { namespace reference { -namespace func { -template ::value>::type* = nullptr> -constexpr T abs(const T num) { - return num; -} - -template ::value || ov::is_floating_point()>::type* = nullptr> -T abs(const T num) { - return std::abs(num); -} -} // namespace func - /** * @brief Reference implementation of ReduceL1 operator. * diff --git a/src/core/src/op/abs.cpp b/src/core/src/op/abs.cpp index cb4c093fbf3830..43e034e3dc0560 100644 --- a/src/core/src/op/abs.cpp +++ b/src/core/src/op/abs.cpp @@ -2,73 +2,67 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/abs.hpp" +#include "openvino/op/abs.hpp" +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/multiply.hpp" -#include "ngraph/op/sign.hpp" -#include "ngraph/runtime/host_tensor.hpp" #include "openvino/reference/abs.hpp" -ov::op::v0::Abs::Abs(const Output& arg) : UnaryElementwiseArithmetic(arg) { +namespace ov { +namespace op { +namespace abs { +struct Evaluate : ov::element::NoAction { + using ov::element::NoAction::visit; + + template + static result_type visit(const Tensor& in, Tensor& out, const size_t count) { + using T = typename element_type_traits::value_type; + reference::abs(in.data(), out.data(), count); + return true; + } +}; +} // namespace abs + +namespace v0 { +Abs::Abs(const Output& arg) : UnaryElementwiseArithmetic(arg) { constructor_validate_and_infer_types(); } -std::shared_ptr ov::op::v0::Abs::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Abs::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_Abs_clone_with_new_inputs); check_new_args_count(this, new_args); return std::make_shared(new_args.at(0)); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace absop { -namespace { -template -inline bool evaluate(const ngraph::HostTensorPtr& arg0, const ngraph::HostTensorPtr& out, const size_t count) { - using T = typename ov::element_type_traits::value_type; - ov::reference::abs((arg0->get_data_ptr()), (out->get_data_ptr()), count); - return true; -} - -bool evaluate_abs(const ngraph::HostTensorPtr& arg0, const ngraph::HostTensorPtr& out, const size_t count) { - bool rc = true; - out->set_unary(arg0); +bool Abs::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v0_Abs_evaluate); - switch (arg0->get_element_type()) { - NGRAPH_TYPE_CASE(evaluate_abs, i32, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_abs, i64, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_abs, u32, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_abs, u64, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_abs, f16, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_abs, f32, arg0, out, count); - NGRAPH_TYPE_CASE(evaluate_abs, bf16, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace absop + OPENVINO_ASSERT(inputs.size() == 1); + OPENVINO_ASSERT(outputs.size() == 1); + outputs[0].set_shape(inputs[0].get_shape()); -bool ov::op::v0::Abs::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v0_Abs_evaluate); - return absop::evaluate_abs(inputs[0], outputs[0], shape_size(inputs[0]->get_shape())); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(inputs[0].get_shape())); } -bool ov::op::v0::Abs::has_evaluate() const { +bool Abs::has_evaluate() const { OV_OP_SCOPE(v0_Abs_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: - case ngraph::element::bf16: + case element::bf16: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v0 +} // namespace op +} // namespace ov From e3f1ff7f2a711001eb9c002b5d618c96382b4a21 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 12 Sep 2023 13:27:51 +0200 Subject: [PATCH 31/31] Migrate mod op evaluate (#19687) --- src/core/dev_api/shape_util.hpp | 45 ++++++ .../include/openvino/reference/mod.hpp | 36 ++++- src/core/src/op/mod.cpp | 128 ++++++++---------- src/core/src/shape_util.cpp | 8 ++ src/plugins/template/backend/ops/mod.cpp | 12 +- 5 files changed, 141 insertions(+), 88 deletions(-) diff --git a/src/core/dev_api/shape_util.hpp b/src/core/dev_api/shape_util.hpp index dc86d0ec0b1a4b..fb935ed51275d9 100644 --- a/src/core/dev_api/shape_util.hpp +++ b/src/core/dev_api/shape_util.hpp @@ -27,10 +27,55 @@ OPENVINO_API Shape make_dynamic_shape(); OPENVINO_DEPRECATED("This function is deprecated and will be removed soon.") OPENVINO_API bool is_dynamic_shape(const Shape& s); +/** + * @brief Creates reduced shape from input by removing dimensions. + * + * @param input Input shape for reduce calculation. + * @param axes Reduction axes. + * @return Reduced shape. + */ OPENVINO_API Shape reduce(const Shape& input, const AxisSet& axes); + +/** + * @brief Creates reduced shape from input removing or replacing dimension. + * + * The reduction type depends on `keep_dims` flags. If it's set to true then reduced dimension will be replaced by `1`, + * otherwise removed. + * + * @param input Input shape for reduce calculation. + * @param axes Reduction axes. + * @param keep_dims Flag to keep reduced dimension. + * @return Reduced shape. + */ OPENVINO_API Shape reduce(const Shape& input, const AxisSet& axes, const bool keep_dims); + +/** + * @brief Creates reduced vector from input by removing elements. + * + * @param input Input vector for reduce calculation. + * @param axes Reduction axes. + * @return Reduced vector + */ OPENVINO_API std::vector reduce(const std::vector& input, const AxisSet& axes); +/** + * @brief Creates reduced shape from input by replacing reduced dimension with `1`. + * + * @param input Input shape for reduce calculation. + * @param axes Reduction axes. + * @return Reduced shape. + */ OPENVINO_API Shape reduce_keep_dims(const Shape& input, const AxisSet& axes); + +/** + * @brief Get the broadcast shape as merge second shape into first according to broadcast specification. + * + * @param first First input shape. + * @param second Second input shape. + * @param broadcast_spec Broadcast specification. + * + * @return Result shape from inputs with applied broadcast specification. + */ +Shape get_broadcast_shape(const Shape& first, const Shape& second, const op::AutoBroadcastSpec& broadcast_spec); } // namespace util } // namespace ov diff --git a/src/core/reference/include/openvino/reference/mod.hpp b/src/core/reference/include/openvino/reference/mod.hpp index 76af9cbe95a306..81ae69e32ebfb2 100644 --- a/src/core/reference/include/openvino/reference/mod.hpp +++ b/src/core/reference/include/openvino/reference/mod.hpp @@ -8,19 +8,41 @@ #include #include "openvino/reference/autobroadcast_binop.hpp" +#include "openvino/reference/utils/type_util.hpp" namespace ov { namespace reference { -template -void mod(const T* arg0, - const T* arg1, - T* out, +namespace func { +template ::value>::type* = nullptr> +constexpr T mod(const T x, const T y) { + return x % y; +} + +template ()>::type* = nullptr> +T mod(const T x, const T y) { + return x - (std::trunc(x / y) * y); +} +} // namespace func + +/** + * @brief Reference implementation of binary elementwise Mod operator. + * + * @param arg0 Iterator to input 0 data. + * @param arg1 Iterator to input 1 data. + * @param out Iterator to output data. + * @param arg_shape0 Input 0 shape. + * @param arg_shape1 Input 1 shape. + * @param broadcast_spec Broadcast specification mode. + */ +template +void mod(InputIt arg0, + InputIt arg1, + OutputIt out, const Shape& arg_shape0, const Shape& arg_shape1, const op::AutoBroadcastSpec& broadcast_spec) { - autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, [](T x, T y) -> T { - return static_cast(x - std::truncf(static_cast(x / y)) * y); - }); + using T = typename std::iterator_traits::value_type; + autobroadcast_binop(arg0, arg1, out, arg_shape0, arg_shape1, broadcast_spec, &func::mod); } } // namespace reference } // namespace ov diff --git a/src/core/src/op/mod.cpp b/src/core/src/op/mod.cpp index 8e605815b88846..00c245514917e1 100644 --- a/src/core/src/op/mod.cpp +++ b/src/core/src/op/mod.cpp @@ -2,101 +2,79 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/mod.hpp" +#include "openvino/op/mod.hpp" +#include "element_visitor.hpp" #include "itt.hpp" #include "openvino/reference/mod.hpp" +#include "shape_util.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace mod { +struct Evaluate : ov::element::NoAction { + using ov::element::NoAction::visit; -// ------------------------------ v1 ------------------------------------------- + template + static result_type visit(const Tensor& in0, + const Tensor& in1, + Tensor& out, + const AutoBroadcastSpec& broadcast_spec) { + using T = typename element_type_traits::value_type; + reference::mod(in0.data(), + in1.data(), + out.data(), + in0.get_shape(), + in1.get_shape(), + broadcast_spec); + return true; + } +}; +} // namespace mod -op::v1::Mod::Mod(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) +namespace v1 { +v1::Mod::Mod(const Output& arg0, const Output& arg1, const AutoBroadcastSpec& auto_broadcast) : BinaryElementwiseArithmetic(arg0, arg1, auto_broadcast) { constructor_validate_and_infer_types(); } -shared_ptr op::v1::Mod::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr Mod::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_Mod_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), this->get_autob()); -} - -namespace mod_op { -namespace { -template -bool evaluate(const ov::Tensor& arg0, - const ov::Tensor& arg1, - const ov::Tensor& out, - const op::AutoBroadcastSpec& broadcast_spec) { - ov::reference::mod(arg0.data(), - arg1.data(), - out.data(), - arg0.get_shape(), - arg1.get_shape(), - broadcast_spec); - return true; + return std::make_shared(new_args.at(0), new_args.at(1), this->get_autob()); } -bool evaluate_mod(const ov::Tensor& arg0, - const ov::Tensor& arg1, - const ov::Tensor& out, - const op::AutoBroadcastSpec& broadcast_spec) { - bool rc = true; - switch (arg0.get_element_type()) { - case ov::element::Type_t::i8: { - rc = evaluate(arg0, arg1, out, broadcast_spec); - } break; - case ov::element::Type_t::i16: { - rc = evaluate(arg0, arg1, out, broadcast_spec); - } break; - case ov::element::Type_t::i32: { - rc = evaluate(arg0, arg1, out, broadcast_spec); - } break; - case ov::element::Type_t::i64: { - rc = evaluate(arg0, arg1, out, broadcast_spec); - } break; - case ov::element::Type_t::u8: { - rc = evaluate(arg0, arg1, out, broadcast_spec); - } break; - case ov::element::Type_t::u16: { - rc = evaluate(arg0, arg1, out, broadcast_spec); - } break; - case ov::element::Type_t::u32: { - rc = evaluate(arg0, arg1, out, broadcast_spec); - } break; - case ov::element::Type_t::u64: { - rc = evaluate(arg0, arg1, out, broadcast_spec); - } break; - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace mod_op - -bool op::v1::Mod::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { +bool Mod::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const { OV_OP_SCOPE(v1_Mod_evaluate); - return mod_op::evaluate_mod(inputs[0], inputs[1], outputs[0], get_autob()); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 2); + + outputs[0].set_shape(ov::util::get_broadcast_shape(inputs[0].get_shape(), inputs[1].get_shape(), get_autob())); + using namespace ov::element; + return IfTypeOf::apply(inputs[0].get_element_type(), + inputs[0], + inputs[1], + outputs[0], + get_autob()); } -bool op::v1::Mod::has_evaluate() const { +bool Mod::has_evaluate() const { OV_OP_SCOPE(v1_Mod_has_evaluate); + switch (get_input_element_type(0)) { - case ngraph::element::i8: - case ngraph::element::i16: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u8: - case ngraph::element::u16: - case ngraph::element::u32: - case ngraph::element::u64: + case element::i8: + case element::i16: + case element::i32: + case element::i64: + case element::u8: + case element::u16: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } +} // namespace v1 +} // namespace op +} // namespace ov diff --git a/src/core/src/shape_util.cpp b/src/core/src/shape_util.cpp index c0fc6219ec24d6..411adb7ba5b6a5 100644 --- a/src/core/src/shape_util.cpp +++ b/src/core/src/shape_util.cpp @@ -6,6 +6,7 @@ #include +#include "openvino/core/partial_shape.hpp" #include "shape_util.hpp" using namespace ngraph; @@ -128,5 +129,12 @@ std::vector reduce(const std::vector& input, const AxisSet& axes Shape reduce_keep_dims(const Shape& input, const AxisSet& axes) { return ov::replace_container(input, axes); } + +Shape get_broadcast_shape(const Shape& first, const Shape& second, const op::AutoBroadcastSpec& broadcast_spec) { + auto out_shape = PartialShape(first); + OPENVINO_ASSERT(PartialShape::broadcast_merge_into(out_shape, second, broadcast_spec), + "Argument shapes are inconsistent"); + return out_shape.to_shape(); +} } // namespace util } // namespace ov diff --git a/src/plugins/template/backend/ops/mod.cpp b/src/plugins/template/backend/ops/mod.cpp index e84ef8ffe01d5f..a8e4ac1cbc44f7 100644 --- a/src/plugins/template/backend/ops/mod.cpp +++ b/src/plugins/template/backend/ops/mod.cpp @@ -11,12 +11,12 @@ bool evaluate(const std::shared_ptr& op, const ngraph::HostTensorVector& outputs, const ngraph::HostTensorVector& inputs) { using T = typename ngraph::element_type_traits::value_type; - ov::reference::mod(inputs[0]->get_data_ptr(), - inputs[1]->get_data_ptr(), - outputs[0]->get_data_ptr(), - inputs[0]->get_shape(), - inputs[1]->get_shape(), - op->get_autob()); + ov::reference::mod(inputs[0]->get_data_ptr(), + inputs[1]->get_data_ptr(), + outputs[0]->get_data_ptr(), + inputs[0]->get_shape(), + inputs[1]->get_shape(), + op->get_autob()); return true; }