diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 95edce67c2652d..a04e2004eaf470 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -68,7 +68,7 @@ jobs: - name: Install OpenVINO dependencies (Linux) if: runner.os == 'Linux' - run: $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -y + run: $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -y -c=gpu - name: Fetch setup_python action uses: actions/checkout@v4 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 024b90c947b32f..fb17cc6d54aeaa 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -621,7 +621,7 @@ jobs: Overall_Status: name: ci/gha_overall_status needs: [Smart_CI, Build, Debian_Packages, Samples, Conformance, ONNX_Runtime, CXX_Unit_Tests, Python_Unit_Tests, - CPU_Functional_Tests, TensorFlow_Hub_Models_Tests, PyTorch_Models_Tests, NVIDIA_Plugin, ONNX_Models] + CPU_Functional_Tests, TensorFlow_Hub_Models_Tests, PyTorch_Models_Tests, NVIDIA_Plugin] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/src/bindings/python/src/openvino/runtime/opset13/__init__.py b/src/bindings/python/src/openvino/runtime/opset13/__init__.py index 032d55ce841cd2..7f330fbc87766d 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset13/__init__.py @@ -57,7 +57,7 @@ from openvino.runtime.opset1.ops import exp from openvino.runtime.opset9.ops import eye from openvino.runtime.opset13.ops import fake_convert -from openvino.runtime.opset1.ops import fake_quantize +from openvino.runtime.opset13.ops import fake_quantize from openvino.runtime.opset1.ops import floor from openvino.runtime.opset1.ops import floor_mod from openvino.runtime.opset8.ops import gather diff --git a/src/bindings/python/src/openvino/runtime/opset13/ops.py b/src/bindings/python/src/openvino/runtime/opset13/ops.py index 302e0e8b5df870..0c7ce39c5f3572 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset13/ops.py @@ -15,7 +15,7 @@ from openvino.runtime.op import Constant, Result from openvino.runtime.opset1 import convert_like from openvino.runtime.opset_utils import _get_node_factory -from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.decorators import apply_affix_on, binary_op, nameable_op, unary_op from openvino.runtime.utils.types import ( NumericData, NodeInput, @@ -347,3 +347,59 @@ def result(data: Union[Node, Output, NumericData], name: Optional[str] = None) - if isinstance(data, Node): return Result(data.output(0)) return Result(data) + + +@nameable_op +@apply_affix_on("data", "input_low", "input_high", "output_low", "output_high") +def fake_quantize( + data: NodeInput, + input_low: NodeInput, + input_high: NodeInput, + output_low: NodeInput, + output_high: NodeInput, + levels: int, + auto_broadcast: str = "NUMPY", + name: Optional[str] = None, + *, + prefix: Optional[str] = None, + suffix: Optional[str] = None, +) -> Node: + r"""Perform an element-wise linear quantization on input data. + + :param data: The node with data tensor. + :param input_low: The node with the minimum for input values. + :param input_high: The node with the maximum for input values. + :param output_low: The node with the minimum quantized value. + :param output_high: The node with the maximum quantized value. + :param levels: The number of quantization levels. Integer value. + :param auto_broadcast: The type of broadcasting specifies rules used for + auto-broadcasting of input tensors. + :param name: Optional name of the new node. + :param prefix: Optional keyword-only string to apply before original names of + all generated input nodes (for example: passed as numpy arrays). + :param suffix: Optional keyword-only string to apply after original names of + all generated input nodes (for example: passed as numpy arrays). + :return: New node with quantized value. + + Input floating point values are quantized into a discrete set of floating point values. + + .. code-block:: python + + if x <= input_low: + output = output_low + if x > input_high: + output = output_high + else: + output = fake_quantize(output) + + Fake quantize uses the following logic: + + \f[ output = + \dfrac{round( \dfrac{data - input\_low}{(input\_high - input\_low)\cdot (levels-1)})} + {(levels-1)\cdot (output\_high - output\_low)} + output\_low \f] + """ + return _get_node_factory_opset13().create( + "FakeQuantize", + as_nodes(data, input_low, input_high, output_low, output_high), + {"levels": levels, "auto_broadcast": auto_broadcast.upper()}, + ) diff --git a/src/bindings/python/src/openvino/runtime/utils/decorators.py b/src/bindings/python/src/openvino/runtime/utils/decorators.py index 4cef82ac03d1e7..d29a31938526b2 100644 --- a/src/bindings/python/src/openvino/runtime/utils/decorators.py +++ b/src/bindings/python/src/openvino/runtime/utils/decorators.py @@ -3,7 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 from functools import wraps -from typing import Any, Callable +from inspect import getfullargspec +from typing import Any, Callable, List from openvino.runtime import Node, Output from openvino.runtime.utils.types import NodeInput, as_node, as_nodes @@ -27,6 +28,33 @@ def wrapper(*args: Any, **kwargs: Any) -> Node: return wrapper +def _apply_affix(node: Node, prefix: str = "", suffix: str = "") -> Node: + node.friendly_name = prefix + node.friendly_name + suffix + return node + + +def apply_affix_on(*node_names: Any) -> Callable: + """Add prefix and/or suffix to all openvino names of operators defined as arguments.""" + + def decorator(func: Callable) -> Callable: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Node: + arg_names = getfullargspec(func).args + arg_mapping = dict(zip(arg_names, args)) + for node_name in node_names: + # Apply only on auto-generated nodes. Create such node and apply affixes. + # Any Node instance supplied by the user is keeping the name as-is. + if node_name in arg_mapping and not isinstance(arg_mapping[node_name], Node): + arg_mapping[node_name] = _apply_affix(as_node(arg_mapping[node_name]), + prefix=kwargs.get("prefix", ""), + suffix=kwargs.get("suffix", ""), + ) + results = func(**arg_mapping, **kwargs) + return results + return wrapper + return decorator + + def unary_op(node_factory_function: Callable) -> Callable: """Convert the first input value to a Constant Node if a numeric value is detected.""" diff --git a/src/bindings/python/tests/test_graph/test_affix_ops.py b/src/bindings/python/tests/test_graph/test_affix_ops.py new file mode 100644 index 00000000000000..717ce1d11cc0f0 --- /dev/null +++ b/src/bindings/python/tests/test_graph/test_affix_ops.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import re + +import openvino.runtime.opset13 as ov +from openvino import Type + + +@pytest.mark.parametrize("prefix_string", [ + "ABC", + "custom_prefix_", +]) +@pytest.mark.parametrize("suffix_string", [ + "XYZ", + "_custom_suffix", +]) +def test_fake_quantize_affix_fails(prefix_string, suffix_string): + levels = np.int32(4) + data_shape = [1, 2, 3, 4] + bound_shape = [] + + data_name = "data" + parameter_data = ov.parameter(data_shape, name=data_name, dtype=np.float32) + + input_low_name = "input_low" + parameter_input_low = ov.parameter(bound_shape, name=input_low_name, dtype=np.float32) + + input_high_name = "input_high" + parameter_input_high = ov.parameter(bound_shape, name=input_high_name, dtype=np.float32) + + output_low_name = "output_low" + parameter_output_low = ov.parameter(bound_shape, name=output_low_name, dtype=np.float32) + + output_high_name = "output_high" + parameter_output_high = ov.parameter(bound_shape, name=output_high_name, dtype=np.float32) + + model = ov.fake_quantize( + parameter_data, + parameter_input_low, + parameter_input_high, + parameter_output_low, + parameter_output_high, + levels, + prefix=prefix_string, + suffix=suffix_string, + ) + + # Check if node was created correctly + assert model.get_type_name() == "FakeQuantize" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [1, 2, 3, 4] + + assert data_name == parameter_data.friendly_name + assert input_low_name == parameter_input_low.friendly_name + assert input_high_name == parameter_input_high.friendly_name + assert output_low_name == parameter_output_low.friendly_name + assert output_high_name == parameter_output_high.friendly_name + + +@pytest.mark.parametrize("prefix_string", [ + "", + "ABC", + "custom_prefix_", +]) +@pytest.mark.parametrize("suffix_string", [ + "", + "XYZ", + "_custom_suffix", +]) +def test_fake_quantize_affix(prefix_string, suffix_string): + levels = np.int32(4) + data_shape = [1, 2, 3, 4] + bound_shape = [1] + + a_arr = np.ones(data_shape, dtype=np.float32) + b_arr = np.array(bound_shape, dtype=np.float32) + c_arr = np.array(bound_shape, dtype=np.float32) + d_arr = np.array(bound_shape, dtype=np.float32) + e_arr = np.array(bound_shape, dtype=np.float32) + + model = ov.fake_quantize( + a_arr, + b_arr, + c_arr, + d_arr, + e_arr, + levels, + prefix=prefix_string, + suffix=suffix_string, + ) + + # Check if node was created correctly + assert model.get_type_name() == "FakeQuantize" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [1, 2, 3, 4] + # Check that data parameter and node itself do not change: + if prefix_string != "": + assert prefix_string not in model.friendly_name + if suffix_string != "": + assert suffix_string not in model.friendly_name + # Check that other parameters change: + for node_input in model.inputs(): + generated_node = node_input.get_source_output().get_node() + assert prefix_string in generated_node.friendly_name + assert suffix_string in generated_node.friendly_name diff --git a/src/core/dev_api/validation_util.hpp b/src/core/dev_api/validation_util.hpp index 57430f5cef35ca..c2e8729bccfa76 100644 --- a/src/core/dev_api/validation_util.hpp +++ b/src/core/dev_api/validation_util.hpp @@ -56,12 +56,12 @@ OPENVINO_API std::shared_ptr get_constant_from_source(const Ou /// \brief Make scalar tensor which stores maximum value of ov::element::Type. /// \param et Element type to get its maximum. /// \return Tensor with maximum value. -Tensor make_tensor_of_max_value(const element::Type_t et); +OPENVINO_API Tensor make_tensor_of_max_value(const element::Type_t et); /// \brief Make scalar tensor which stores minimum value of ov::element::Type. /// \param et Element type to get its minimum. /// \return Tensor with minimum value. -Tensor make_tensor_of_min_value(const element::Type_t et); +OPENVINO_API Tensor make_tensor_of_min_value(const element::Type_t et); /// @brief Get the tensors shapes as ov::PartialShape. /// diff --git a/src/core/include/ngraph/evaluator.hpp b/src/core/include/ngraph/evaluator.hpp index 486b3135ad3d5c..b81028e20fbe04 100644 --- a/src/core/include/ngraph/evaluator.hpp +++ b/src/core/include/ngraph/evaluator.hpp @@ -114,7 +114,7 @@ class NGRAPH_API_DEPRECATED Evaluator { /// \brief Ensure value has been analyzed class ValueInst : public Inst { public: - ValueInst(const Output& value) : Inst(value.get_node()), m_index(value.get_index()) {} + ValueInst(const ov::Output& value) : Inst(value.get_node()), m_index(value.get_index()) {} ValueInst(const RawNodeOutput& value) : Inst(value.node), m_index(value.index) {} @@ -162,7 +162,7 @@ class NGRAPH_API_DEPRECATED Evaluator { public: /// \brief Determine information about value - V evaluate(const Output& value) { + V evaluate(const ov::Output& value) { InstStack inst_stack; inst_stack.push(InstPtr(new ValueInst(value))); while (!inst_stack.empty()) { diff --git a/src/core/include/ngraph/graph_util.hpp b/src/core/include/ngraph/graph_util.hpp index 7b91dd34cffe2c..31c4d5859a83e0 100644 --- a/src/core/include/ngraph/graph_util.hpp +++ b/src/core/include/ngraph/graph_util.hpp @@ -124,7 +124,7 @@ bool is_post_dominated(Node* X, Node* Y); NGRAPH_API_DEPRECATED NGRAPH_API -bool is_equal_to_const_value(const std::string& const_value, const Output& reduce_constant); +bool is_equal_to_const_value(const std::string& const_value, const ov::Output& reduce_constant); // input nodes are cloned and returned // NodeMap input may contain default node mapping i.e. pre-cloned nodes @@ -156,15 +156,17 @@ void insert_new_node_between(const std::shared_ptr& src_node, NGRAPH_API_DEPRECATED NGRAPH_API -std::shared_ptr make_zero(const element::Type& element_type, const Shape& shape); +std::shared_ptr make_zero(const ov::element::Type& element_type, const ov::Shape& shape); NGRAPH_API_DEPRECATED NGRAPH_API -std::shared_ptr make_constant_from_string(std::string val, const element::Type& element_type, const Shape& shape); +std::shared_ptr make_constant_from_string(std::string val, + const ov::element::Type& element_type, + const ov::Shape& shape); NGRAPH_API_DEPRECATED NGRAPH_API -bool is_zero(const Output& reduce_constant); +bool is_zero(const ov::Output& reduce_constant); NGRAPH_API_DEPRECATED NGRAPH_API @@ -182,7 +184,7 @@ NodeVector extract_subgraph(const NodeVector& results, const NodeVector& args); NGRAPH_API_DEPRECATED NGRAPH_API -bool is_one(const Output& reduce_constant); +bool is_one(const ov::Output& reduce_constant); // Returns true if `node` is live in the graph i.e. a result op // transitively uses this `node` @@ -213,12 +215,12 @@ void plot_graph(std::shared_ptr f, /// of `src`. NGRAPH_API_DEPRECATED NGRAPH_API -std::vector> get_inputs_from(Node& src, Node& dst); +std::vector> get_inputs_from(Node& src, Node& dst); /// \return A vector containing a handle for each output of src that is connected to an input /// of `dst`. NGRAPH_API_DEPRECATED NGRAPH_API -std::vector> get_outputs_to(Node& src, Node& dst); +std::vector> get_outputs_to(Node& src, Node& dst); /// Checks the func for graph cycles starting from results going backwards, then from parameters /// going forward. diff --git a/src/core/include/ngraph/ngraph.hpp b/src/core/include/ngraph/ngraph.hpp index fa6b9465279794..9acaf23c5755e1 100644 --- a/src/core/include/ngraph/ngraph.hpp +++ b/src/core/include/ngraph/ngraph.hpp @@ -59,7 +59,6 @@ #include "ngraph/shape.hpp" #include "ngraph/specialize_function.hpp" #include "ngraph/type/element_type.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/core/descriptor/input.hpp" #include "openvino/core/descriptor/output.hpp" #include "openvino/core/descriptor/tensor.hpp" diff --git a/src/core/include/ngraph/node.hpp b/src/core/include/ngraph/node.hpp index 58b2fd06df6109..5d08235e10a4fc 100644 --- a/src/core/include/ngraph/node.hpp +++ b/src/core/include/ngraph/node.hpp @@ -31,8 +31,6 @@ #include "ngraph/check.hpp" #include "ngraph/coordinate_diff.hpp" #include "ngraph/deprecated.hpp" -#include "ngraph/node_input.hpp" -#include "ngraph/node_output.hpp" #include "ngraph/op/util/attr_types.hpp" #include "ngraph/output_vector.hpp" #include "ngraph/strides.hpp" @@ -90,7 +88,7 @@ using NodeTypeInfo = Node::type_info_t; // Like an Output but with a Node* instead of a shared_ptr using ov::RawNodeOutput; -using RawNodeOutputMap = std::map>; +using RawNodeOutputMap = std::map>; using ov::check_new_args_count; diff --git a/src/core/include/ngraph/node_input.hpp b/src/core/include/ngraph/node_input.hpp deleted file mode 100644 index bac74bc0d312be..00000000000000 --- a/src/core/include/ngraph/node_input.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#if !defined(IN_OV_COMPONENT) && !defined(NGRAPH_LEGACY_HEADER_INCLUDED) -# define NGRAPH_LEGACY_HEADER_INCLUDED -# ifdef _MSC_VER -# pragma message( \ - "The nGraph API is deprecated and will be removed in the 2024.0 release. For instructions on transitioning to the new API, please refer to https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html") -# else -# warning("The nGraph API is deprecated and will be removed in the 2024.0 release. For instructions on transitioning to the new API, please refer to https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html") -# endif -#endif - -#include -#include - -#include "ngraph/partial_shape.hpp" -#include "ngraph/shape.hpp" -#include "ngraph/type/element_type.hpp" -#include "openvino/core/descriptor/tensor.hpp" -#include "openvino/core/node_input.hpp" - -namespace ngraph { -using ov::Input; -using ov::Node; -} // namespace ngraph diff --git a/src/core/include/ngraph/node_output.hpp b/src/core/include/ngraph/node_output.hpp deleted file mode 100644 index f8f1da44b2eff9..00000000000000 --- a/src/core/include/ngraph/node_output.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#if !defined(IN_OV_COMPONENT) && !defined(NGRAPH_LEGACY_HEADER_INCLUDED) -# define NGRAPH_LEGACY_HEADER_INCLUDED -# ifdef _MSC_VER -# pragma message( \ - "The nGraph API is deprecated and will be removed in the 2024.0 release. For instructions on transitioning to the new API, please refer to https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html") -# else -# warning("The nGraph API is deprecated and will be removed in the 2024.0 release. For instructions on transitioning to the new API, please refer to https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html") -# endif -#endif - -#include -#include -#include - -#include "ngraph/partial_shape.hpp" -#include "ngraph/shape.hpp" -#include "ngraph/type/element_type.hpp" -#include "openvino/core/descriptor/tensor.hpp" -#include "openvino/core/node_output.hpp" - -namespace ngraph { -using ov::Node; -using ov::Output; -} // namespace ngraph diff --git a/src/core/include/ngraph/specialize_function.hpp b/src/core/include/ngraph/specialize_function.hpp index b428bb8e45f47f..96a92683e79a1d 100644 --- a/src/core/include/ngraph/specialize_function.hpp +++ b/src/core/include/ngraph/specialize_function.hpp @@ -102,7 +102,7 @@ namespace ngraph { NGRAPH_API_DEPRECATED NGRAPH_API std::shared_ptr specialize_function(std::shared_ptr f, - const std::vector& parameter_element_types, - const std::vector& parameter_shapes, + const std::vector& parameter_element_types, + const std::vector& parameter_shapes, const std::vector& parameter_values); } // namespace ngraph diff --git a/src/core/include/ngraph/util.hpp b/src/core/include/ngraph/util.hpp index f08b70233a0c2b..372347e68669d4 100644 --- a/src/core/include/ngraph/util.hpp +++ b/src/core/include/ngraph/util.hpp @@ -206,15 +206,15 @@ AxisVector get_default_order(size_t rank); NGRAPH_API NGRAPH_API_DEPRECATED -AxisVector get_default_order(const Rank& rank); +AxisVector get_default_order(const ov::Rank& rank); NGRAPH_API NGRAPH_API_DEPRECATED -AxisVector get_default_order(const Shape& shape); +AxisVector get_default_order(const ov::Shape& shape); NGRAPH_API NGRAPH_API_DEPRECATED -AxisVector get_default_order(const PartialShape& shape); +AxisVector get_default_order(const ov::PartialShape& shape); /// \brief Function to query parsed version information of the version of ngraph which /// contains this function. Version information strictly follows Semantic Versioning diff --git a/src/core/include/ngraph/validation_util.hpp b/src/core/include/ngraph/validation_util.hpp index 09d485791b99b7..fc6f2245927e5f 100644 --- a/src/core/include/ngraph/validation_util.hpp +++ b/src/core/include/ngraph/validation_util.hpp @@ -31,26 +31,28 @@ using ov::op::v0::Constant; NGRAPH_API_DEPRECATED NGRAPH_API -Strides conv_default_strides(const Node* node, const PartialShape& data_batch_shape, const PartialShape& filters_shape); +Strides conv_default_strides(const Node* node, + const ov::PartialShape& data_batch_shape, + const ov::PartialShape& filters_shape); NGRAPH_API_DEPRECATED NGRAPH_API CoordinateDiff conv_default_padding(const Node* node, - const PartialShape& data_batch_shape, - const PartialShape& filters_shape); + const ov::PartialShape& data_batch_shape, + const ov::PartialShape& filters_shape); NGRAPH_API_DEPRECATED NGRAPH_API -PartialShape infer_windowed_reduction_output_shape(const Node* node, - const PartialShape& data_shape, - const Strides& data_dilation, - const CoordinateDiff& data_padding_below, - const CoordinateDiff& data_padding_above, - const PartialShape& window_shape, - const Strides& window_strides, - const Strides& window_dilation, - bool is_window_all_in_padding_allowed, - bool ceil_mode = false); +ov::PartialShape infer_windowed_reduction_output_shape(const Node* node, + const ov::PartialShape& data_shape, + const Strides& data_dilation, + const CoordinateDiff& data_padding_below, + const CoordinateDiff& data_padding_above, + const ov::PartialShape& window_shape, + const Strides& window_strides, + const Strides& window_dilation, + bool is_window_all_in_padding_allowed, + bool ceil_mode = false); NGRAPH_API_DEPRECATED void validate_conv_params_spatial_dimensions(const Node* node, @@ -63,59 +65,61 @@ void validate_conv_params_spatial_dimensions(const Node* node, NGRAPH_API_DEPRECATED NGRAPH_API -PartialShape infer_batched_pooling_forward(const Node* node, - const PartialShape& data_batch_shape, - const CoordinateDiff& data_padding_below, - const CoordinateDiff& data_padding_above, - const PartialShape& window_shape, - const Strides& window_strides, - bool is_window_all_in_padding_allowed, - bool ceil_mode = false, - const Strides& window_dilation = Strides{}); +ov::PartialShape infer_batched_pooling_forward(const Node* node, + const ov::PartialShape& data_batch_shape, + const CoordinateDiff& data_padding_below, + const CoordinateDiff& data_padding_above, + const ov::PartialShape& window_shape, + const Strides& window_strides, + bool is_window_all_in_padding_allowed, + bool ceil_mode = false, + const Strides& window_dilation = Strides{}); NGRAPH_API_DEPRECATED NGRAPH_API -std::tuple infer_batch_norm_forward(const Node* node, - element::Type input_element_type, - element::Type gamma_element_type, - element::Type beta_element_type, - element::Type mean_element_type, - element::Type variance_element_type, - const PartialShape& input_shape, - const PartialShape& gamma_shape, - const PartialShape& beta_shape, - const PartialShape& mean_shape, - const PartialShape& variance_shape); +std::tuple infer_batch_norm_forward( + const Node* node, + element::Type input_element_type, + element::Type gamma_element_type, + element::Type beta_element_type, + element::Type mean_element_type, + element::Type variance_element_type, + const ov::PartialShape& input_shape, + const ov::PartialShape& gamma_shape, + const ov::PartialShape& beta_shape, + const ov::PartialShape& mean_shape, + const ov::PartialShape& variance_shape); NGRAPH_API_DEPRECATED NGRAPH_API -std::tuple infer_batch_norm_forward(const Node* node, - element::Type input_element_type, - element::Type gamma_element_type, - element::Type beta_element_type, - const PartialShape& input_shape, - const PartialShape& gamma_shape, - const PartialShape& beta_shape); +std::tuple infer_batch_norm_forward( + const Node* node, + element::Type input_element_type, + element::Type gamma_element_type, + element::Type beta_element_type, + const ov::PartialShape& input_shape, + const ov::PartialShape& gamma_shape, + const ov::PartialShape& beta_shape); NGRAPH_API_DEPRECATED NGRAPH_API -PartialShape infer_slice_shape(const Node* node, - const PartialShape& input_shape, - const std::vector& begin, - const std::vector& end, - const std::vector& strides, - const AxisSet& begin_mask, - const AxisSet& end_mask, - const AxisSet& new_axis_mask, - const AxisSet& shrink_axis_mask, - const AxisSet& ellipsis_mask); +ov::PartialShape infer_slice_shape(const Node* node, + const ov::PartialShape& input_shape, + const std::vector& begin, + const std::vector& end, + const std::vector& strides, + const AxisSet& begin_mask, + const AxisSet& end_mask, + const AxisSet& new_axis_mask, + const AxisSet& shrink_axis_mask, + const AxisSet& ellipsis_mask); /// \brief Try to compute the maximum value of value /// \return (true, max_value) if can be determined, or (false, numeric_limits::max()) /// if not. /// \deprecated Use evaluate_upper_bound instead NGRAPH_API_DEPRECATED -NGRAPH_API std::pair maximum_value(const Output& value); +NGRAPH_API std::pair maximum_value(const ov::Output& value); /// \brief Returns a Constant storing scalar value equal to std::numeric_limits::max() NGRAPH_API_DEPRECATED diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp index 8705943ea5d632..a574c0bf72c738 100644 --- a/src/core/src/bound_evaluate.cpp +++ b/src/core/src/bound_evaluate.cpp @@ -4,7 +4,6 @@ #include "bound_evaluate.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/core/dimension_tracker.hpp" #include "openvino/core/rt_info.hpp" #include "openvino/core/shape_util.hpp" @@ -172,14 +171,11 @@ bool default_bound_evaluator(const ov::Node* node, return node->evaluate(output_values, inputs); } -ov::Tensor equality_mask(const ov::Tensor& tensor, const std::shared_ptr& constant) { - auto mask_out = ov::TensorVector{{element::boolean, tensor.get_shape()}}; - - auto c_tensor = ov::Tensor(constant->get_element_type(), constant->get_shape()); - memcpy(c_tensor.data(), constant->get_data_ptr(), c_tensor.get_byte_size()); - - const auto& param = std::make_shared(tensor.get_element_type(), tensor.get_shape()); - op::v1::Equal(param, constant).evaluate(mask_out, ov::TensorVector{tensor, c_tensor}); +ov::Tensor equality_mask(const ov::Tensor& lhs, const ov::Tensor& rhs) { + auto mask_out = ov::TensorVector{{element::boolean, lhs.get_shape()}}; + const auto l_param = std::make_shared(lhs.get_element_type(), lhs.get_shape()); + const auto r_param = std::make_shared(rhs.get_element_type(), rhs.get_shape()); + op::v1::Equal(l_param, r_param).evaluate(mask_out, ov::TensorVector{lhs, rhs}); return mask_out.front(); } @@ -429,19 +425,17 @@ bool ov::interval_bound_evaluator(const Node* node, } unsqueezed_output_variants.push_back(vector_of_unsqueezed_output_variants); } - OPENVINO_SUPPRESS_DEPRECATED_START - auto input_0_maximum_value = ngraph::get_constant_max_of_type(low_0.get_element_type()); - auto input_1_maximum_value = ngraph::get_constant_max_of_type(low_1.get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_END - if (input_0_maximum_value == nullptr || input_1_maximum_value == nullptr) + const auto input_0_maximum_value = ov::util::make_tensor_of_max_value(low_0.get_element_type()); + const auto input_1_maximum_value = ov::util::make_tensor_of_max_value(low_1.get_element_type()); + if (!input_0_maximum_value || !input_1_maximum_value) return false; - auto input_0_low_dyn_mask = equality_mask(low_0, input_0_maximum_value); - auto input_0_up_dyn_mask = equality_mask(up_0, input_0_maximum_value); - auto input_1_low_dyn_mask = equality_mask(low_1, input_1_maximum_value); - auto input_1_up_dyn_mask = equality_mask(up_1, input_1_maximum_value); - auto final_input_dyn_mask = or_tensor(or_tensor(input_0_low_dyn_mask, input_0_up_dyn_mask), - or_tensor(input_1_low_dyn_mask, input_1_up_dyn_mask)); + const auto input_0_low_dyn_mask = equality_mask(low_0, input_0_maximum_value); + const auto input_0_up_dyn_mask = equality_mask(up_0, input_0_maximum_value); + const auto input_1_low_dyn_mask = equality_mask(low_1, input_1_maximum_value); + const auto input_1_up_dyn_mask = equality_mask(up_1, input_1_maximum_value); + const auto final_input_dyn_mask = or_tensor(or_tensor(input_0_low_dyn_mask, input_0_up_dyn_mask), + or_tensor(input_1_low_dyn_mask, input_1_up_dyn_mask)); bool fully_defined = true; for (size_t i = 0; i < num_of_outputs; ++i) { @@ -479,7 +473,7 @@ bool ov::interval_bound_evaluator(const Node* node, if (!lower_output_values[i]) { fully_defined = false; } else { - // Can not set to get_constant_min_of_type(lower_output_values[i]->get_element_type()) yet + // Can not set to make_tensor_of_min_value(lower_output_values[i]->get_element_type()) yet const auto then = Tensor{lower_out[0].get_element_type(), Shape{}}; const auto then_data = static_cast(then.data()); std::memset(then_data, 0, then.get_byte_size()); @@ -514,9 +508,9 @@ bool ov::tensor_is_non_negative(const Tensor& bound) { bool ov::tensor_has_max_value(const Tensor& bound) { const auto bound_constant = std::make_shared(bound.get_element_type(), bound.get_shape(), bound.data()); - OPENVINO_SUPPRESS_DEPRECATED_START - auto max_constant = ngraph::get_constant_max_of_type(bound.get_element_type()); - OPENVINO_SUPPRESS_DEPRECATED_END + + const auto max_values = ov::util::make_tensor_of_max_value(bound.get_element_type()); + const auto max_constant = std::make_shared(max_values); OutputVector equal(1); bool folded = std::make_shared(bound_constant, max_constant) diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp index 2de09440992736..9cb4d84b68ea93 100644 --- a/src/core/src/graph_util.cpp +++ b/src/core/src/graph_util.cpp @@ -405,7 +405,7 @@ std::vector> clone_nodes(const std::vectorinputs()) { - Output output = input.get_source_output(); + ov::Output output = input.get_source_output(); cloned_args.push_back(output.for_node(node_map.at(output.get_node()))); } std::vector> cloned_dependencies; @@ -491,7 +491,7 @@ std::list> clone_nodes(const std::vector& reduce_constant) { +bool is_equal_to_const_value(const std::string& const_value, const ov::Output& reduce_constant) { if (auto rc = ov::as_type_ptr(reduce_constant.get_node_shared_ptr())) { return (rc->get_all_data_elements_bitwise_identical() && rc->convert_value_to_string(0) == const_value); } else { @@ -524,13 +524,13 @@ std::pair, std::shared_ptr(src_node->get_output_element_type(0), src_node->get_output_shape(0)); // Fix input / output among src, dst and par - std::vector> dst_inputs = get_inputs_from(*src_node, *dst_node); + std::vector> dst_inputs = get_inputs_from(*src_node, *dst_node); OPENVINO_ASSERT(dst_inputs.size() == 1, "insert_result_parameter_split encountered more than " "one input between the source and destination nodes"); auto& dst_input = dst_inputs[0]; - std::vector> src_outputs = get_outputs_to(*src_node, *dst_node); + std::vector> src_outputs = get_outputs_to(*src_node, *dst_node); OPENVINO_ASSERT(src_outputs.size() == 1, "insert_result_parameter_split encountered more than " "one output between the source and destination nodes"); @@ -593,13 +593,13 @@ void insert_new_node_between(const std::shared_ptr& src_node, const std::shared_ptr& dst_node, const std::shared_ptr& new_node) { // Fix input / output - std::vector> dst_inputs = get_inputs_from(*src_node, *dst_node); + std::vector> dst_inputs = get_inputs_from(*src_node, *dst_node); OPENVINO_ASSERT(dst_inputs.size() == 1, "insert_new_node_between encountered more than one " "input between the source and destination nodes"); auto& dst_input = dst_inputs[0]; - std::vector> src_outputs = get_outputs_to(*src_node, *dst_node); + std::vector> src_outputs = get_outputs_to(*src_node, *dst_node); OPENVINO_ASSERT(src_outputs.size() == 1, "insert_new_node_between encountered more than one " "output between the source and destination nodes"); @@ -626,12 +626,12 @@ std::shared_ptr make_constant_from_string(std::string val, return std::make_shared(element_type, shape, cvals); } -bool is_zero(const Output& reduce_constant) { +bool is_zero(const ov::Output& reduce_constant) { auto result_bool = is_equal_to_const_value("0", reduce_constant); return result_bool; } -bool is_one(const Output& reduce_constant) { +bool is_one(const ov::Output& reduce_constant) { auto result_bool = is_equal_to_const_value("1", reduce_constant); return result_bool; } @@ -729,7 +729,7 @@ void plot_graph(std::shared_ptr f, } std::vector> get_inputs_from(Node& src, Node& dst) { - std::vector> result; + std::vector> result; for (auto& input : dst.inputs()) { if (input.get_source_output().get_node() == &src) { @@ -741,7 +741,7 @@ std::vector> get_inputs_from(Node& src, Node& dst) { } std::vector> get_outputs_to(Node& src, Node& dst) { - std::vector> result; + std::vector> result; for (auto& output : src.outputs()) { bool targets_dst = false; diff --git a/src/core/src/specialize_function.cpp b/src/core/src/specialize_function.cpp index 08401fde6fc389..58df4818bbc99a 100644 --- a/src/core/src/specialize_function.cpp +++ b/src/core/src/specialize_function.cpp @@ -14,8 +14,8 @@ NGRAPH_SUPPRESS_DEPRECATED_START; using ov::op::v0::Constant; std::shared_ptr ngraph::specialize_function(std::shared_ptr f, - const std::vector& parameter_element_types, - const std::vector& parameter_shapes, + const std::vector& parameter_element_types, + const std::vector& parameter_shapes, const std::vector& parameter_values) { diff --git a/src/core/src/util.cpp b/src/core/src/util.cpp index cf94286b04116d..7e14a09cf67a2d 100644 --- a/src/core/src/util.cpp +++ b/src/core/src/util.cpp @@ -197,7 +197,7 @@ AxisVector get_default_order(const Shape& shape) { return get_default_order(shape.size()); } -AxisVector get_default_order(const PartialShape& shape) { +AxisVector get_default_order(const ov::PartialShape& shape) { return get_default_order(shape.rank()); } @@ -207,7 +207,7 @@ AxisVector get_default_order(size_t rank) { return default_order; } -AxisVector get_default_order(const Rank& rank) { +AxisVector get_default_order(const ov::Rank& rank) { OPENVINO_ASSERT(rank.is_static(), "Can not calculate default order for dynamic rank"); AxisVector default_order(rank.get_length()); diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp index 23256da00e3c5b..cb8e43ffea74a0 100644 --- a/src/core/src/validation_util.cpp +++ b/src/core/src/validation_util.cpp @@ -30,8 +30,8 @@ using ov::op::v0::Negative; } // namespace op Strides conv_default_strides(const Node* /* node */, - const PartialShape& data_batch_shape, - const PartialShape& filters_shape) { + const ov::PartialShape& data_batch_shape, + const ov::PartialShape& filters_shape) { size_t rank; if (data_batch_shape.rank().is_static() && data_batch_shape.rank().get_length() >= 2) { @@ -46,8 +46,8 @@ Strides conv_default_strides(const Node* /* node */, } CoordinateDiff conv_default_padding(const Node* /* node */, - const PartialShape& data_batch_shape, - const PartialShape& filters_shape) { + const ov::PartialShape& data_batch_shape, + const ov::PartialShape& filters_shape) { size_t rank; if (data_batch_shape.rank().is_static() && data_batch_shape.rank().get_length() >= 2) { @@ -68,17 +68,17 @@ CoordinateDiff conv_default_padding(const Node* /* node */, // TODO(amprocte): The messages here would be a bit friendlier if we didn't say "after // padding/after dilation" for cases where there is actually no padding/dilation. // -PartialShape infer_windowed_reduction_output_shape(const Node* node, - const PartialShape& data_shape, - const Strides& data_dilation, - const CoordinateDiff& data_padding_below, - const CoordinateDiff& data_padding_above, - const PartialShape& window_shape, - const Strides& window_strides, - const Strides& window_dilation, - bool is_window_all_in_padding_allowed, - bool ceil_mode) { - PartialShape data_shape_merged{PartialShape::dynamic()}; +ov::PartialShape infer_windowed_reduction_output_shape(const Node* node, + const ov::PartialShape& data_shape, + const Strides& data_dilation, + const CoordinateDiff& data_padding_below, + const CoordinateDiff& data_padding_above, + const ov::PartialShape& window_shape, + const Strides& window_strides, + const Strides& window_dilation, + bool is_window_all_in_padding_allowed, + bool ceil_mode) { + ov::PartialShape data_shape_merged{ov::PartialShape::dynamic()}; NODE_VALIDATION_CHECK( node, @@ -103,7 +103,7 @@ PartialShape infer_windowed_reduction_output_shape(const Node* node, window_dilation, ") do not match."); - PartialShape output_shape = PartialShape::dynamic(data_shape_merged.rank()); + ov::PartialShape output_shape = ov::PartialShape::dynamic(data_shape_merged.rank()); if (output_shape.rank().is_static()) { for (int64_t i = 0; i < output_shape.rank().get_length(); i++) { NODE_VALIDATION_CHECK(node, @@ -233,15 +233,15 @@ void validate_conv_params_spatial_dimensions(const Node* node, // // Infers the output batch shape and element type for batched pooling fprop. // -PartialShape infer_batched_pooling_forward(const Node* node, - const PartialShape& data_batch_shape, - const CoordinateDiff& data_padding_below, - const CoordinateDiff& data_padding_above, - const PartialShape& window_shape, - const Strides& window_strides, - bool is_window_all_in_padding_allowed, - bool ceil_mode, - const Strides& window_dilation) { +ov::PartialShape infer_batched_pooling_forward(const Node* node, + const ov::PartialShape& data_batch_shape, + const CoordinateDiff& data_padding_below, + const CoordinateDiff& data_padding_above, + const ov::PartialShape& window_shape, + const Strides& window_strides, + bool is_window_all_in_padding_allowed, + bool ceil_mode, + const Strides& window_dilation) { NODE_VALIDATION_CHECK(node, data_batch_shape.rank().is_dynamic() || (data_batch_shape.rank().get_length() >= 3 && data_batch_shape.rank().get_length() <= 5), @@ -251,7 +251,7 @@ PartialShape infer_batched_pooling_forward(const Node* node, data_batch_shape, ")."); - PartialShape data_spatial_shape{PartialShape::dynamic()}; + ov::PartialShape data_spatial_shape{ov::PartialShape::dynamic()}; NODE_VALIDATION_CHECK(node, data_spatial_shape.merge_rank(data_batch_shape.rank() - 2) && @@ -275,7 +275,7 @@ PartialShape infer_batched_pooling_forward(const Node* node, Dimension batch_size{Dimension::dynamic()}; Dimension channel_count{Dimension::dynamic()}; - PartialShape data_output_spatial_shape{PartialShape::dynamic(data_spatial_shape.rank())}; + ov::PartialShape data_output_spatial_shape{ov::PartialShape::dynamic(data_spatial_shape.rank())}; if (data_batch_shape.rank().is_static()) { batch_size = data_batch_shape[0]; @@ -312,7 +312,7 @@ PartialShape infer_batched_pooling_forward(const Node* node, ceil_mode); } - PartialShape data_batch_output_shape{PartialShape::dynamic(data_output_spatial_shape.rank() + 2)}; + ov::PartialShape data_batch_output_shape{ov::PartialShape::dynamic(data_output_spatial_shape.rank() + 2)}; data_batch_output_shape[0] = batch_size; data_batch_output_shape[1] = channel_count; @@ -325,14 +325,14 @@ PartialShape infer_batched_pooling_forward(const Node* node, struct ChannelShapedInputSpec { element::Type m_element_type; - PartialShape m_shape; + ov::PartialShape m_shape; std::string m_input_name; }; -static std::tuple infer_batch_norm_forward_helper( +static std::tuple infer_batch_norm_forward_helper( const Node* node, element::Type input_element_type, - const PartialShape& input_shape, + const ov::PartialShape& input_shape, const std::vector& channel_shaped_inputs) { // Built up a slash-separated string naming all the channel-shaped inputs, for use in error // messages. @@ -364,7 +364,7 @@ static std::tuple infer_batch_norm_fo // Extract channel dimension from input shape. Dimension channel_dim{Dimension::dynamic()}; - Rank input_rank = input_shape.rank(); + ov::Rank input_rank = input_shape.rank(); if (input_rank.is_static()) { NODE_VALIDATION_CHECK(node, input_rank.get_length() >= 2, @@ -377,11 +377,11 @@ static std::tuple infer_batch_norm_fo // Infer gamma/beta/mu/sigma shape, which must be consistent with a vector of size // "channel_dim". - PartialShape channel_shape{PartialShape::dynamic()}; + ov::PartialShape channel_shape{ov::PartialShape::dynamic()}; for (const auto& inp : channel_shaped_inputs) { NODE_VALIDATION_CHECK(node, - PartialShape::merge_into(channel_shape, inp.m_shape), + ov::PartialShape::merge_into(channel_shape, inp.m_shape), "Shapes for ", channel_input_names, " do not match."); @@ -411,26 +411,27 @@ static std::tuple infer_batch_norm_fo // Batch result shape is same as the input shape, except we may possibly have inferred more // information from the channel count via gamma/beta/etc. - PartialShape batch_result_shape{input_shape}; + ov::PartialShape batch_result_shape{input_shape}; if (batch_result_shape.rank().is_static()) { batch_result_shape[1] = channel_dim; } - return std::make_tuple(et_result, batch_result_shape, PartialShape{channel_dim}); + return std::make_tuple(et_result, batch_result_shape, ov::PartialShape{channel_dim}); } -std::tuple infer_batch_norm_forward(const Node* node, - element::Type input_element_type, - element::Type gamma_element_type, - element::Type beta_element_type, - element::Type mean_element_type, - element::Type variance_element_type, - const PartialShape& input_shape, - const PartialShape& gamma_shape, - const PartialShape& beta_shape, - const PartialShape& mean_shape, - const PartialShape& variance_shape) { +std::tuple infer_batch_norm_forward( + const Node* node, + element::Type input_element_type, + element::Type gamma_element_type, + element::Type beta_element_type, + element::Type mean_element_type, + element::Type variance_element_type, + const ov::PartialShape& input_shape, + const ov::PartialShape& gamma_shape, + const ov::PartialShape& beta_shape, + const ov::PartialShape& mean_shape, + const ov::PartialShape& variance_shape) { return infer_batch_norm_forward_helper(node, input_element_type, input_shape, @@ -440,13 +441,14 @@ std::tuple infer_batch_norm_forward(c {variance_element_type, variance_shape, "variance"}}); } -std::tuple infer_batch_norm_forward(const Node* node, - element::Type input_element_type, - element::Type gamma_element_type, - element::Type beta_element_type, - const PartialShape& input_shape, - const PartialShape& gamma_shape, - const PartialShape& beta_shape) { +std::tuple infer_batch_norm_forward( + const Node* node, + element::Type input_element_type, + element::Type gamma_element_type, + element::Type beta_element_type, + const ov::PartialShape& input_shape, + const ov::PartialShape& gamma_shape, + const ov::PartialShape& beta_shape) { return infer_batch_norm_forward_helper( node, input_element_type, @@ -454,16 +456,16 @@ std::tuple infer_batch_norm_forward(c {{gamma_element_type, gamma_shape, "gamma"}, {beta_element_type, beta_shape, "beta"}}); } -PartialShape infer_slice_shape(const Node* node, - const PartialShape& input_shape, - const std::vector& begin, - const std::vector& end, - const std::vector& strides, - const AxisSet& begin_mask, - const AxisSet& end_mask, - const AxisSet& new_axis_mask, - const AxisSet& shrink_axis_mask, - const AxisSet& ellipsis_mask) { +ov::PartialShape infer_slice_shape(const Node* node, + const ov::PartialShape& input_shape, + const std::vector& begin, + const std::vector& end, + const std::vector& strides, + const AxisSet& begin_mask, + const AxisSet& end_mask, + const AxisSet& new_axis_mask, + const AxisSet& shrink_axis_mask, + const AxisSet& ellipsis_mask) { if (begin.size() && end.size()) { NODE_VALIDATION_CHECK(node, begin.size() == end.size(), @@ -483,7 +485,7 @@ PartialShape infer_slice_shape(const Node* node, NODE_VALIDATION_CHECK(node, ellipsis_mask.size() <= 1, "At most one ellipsis is allowed."); if (input_shape.rank().is_dynamic()) { - return PartialShape::dynamic(); + return ov::PartialShape::dynamic(); } NODE_VALIDATION_CHECK(node, @@ -788,7 +790,7 @@ std::vector exec_nop(Node* node, std::vector& inputs) { } } // namespace -std::pair maximum_value(const Output& value) { +std::pair maximum_value(const ov::Output& value) { static ngraph::Evaluator::op_handler_map handlers = { {ov::op::v0::Concat::get_type_info_static(), exec_concat}, {ov::op::v0::Constant::get_type_info_static(), exec_constant}, @@ -906,7 +908,7 @@ int64_t ov::normalize_axis(const std::string& node_description, return ov::util::normalize_axis(node_description, axis, tensor_rank, axis_range_min, axis_range_max); } -bool ov::evaluate_as_partial_shape(const Output& output, PartialShape& pshape) { +bool ov::evaluate_as_partial_shape(const ov::Output& output, ov::PartialShape& pshape) { return ov::util::evaluate_as_partial_shape(output, pshape); } @@ -914,7 +916,7 @@ bool ov::default_label_evaluator(const Node* node, TensorLabelVector& output_lab return ov::util::default_label_evaluator(node, output_labels); } -std::shared_ptr ov::get_constant_from_source(const Output& source) { +std::shared_ptr ov::get_constant_from_source(const ov::Output& source) { return ov::util::get_constant_from_source(source); } @@ -939,7 +941,7 @@ int64_t ov::util::clip(const int64_t& value, const int64_t& min, const int64_t& return std::min(std::max(value, min), max); }; -std::shared_ptr ov::util::constantfold_subgraph(const Output& subgraph_sink) { +std::shared_ptr ov::util::constantfold_subgraph(const ov::Output& subgraph_sink) { if (const auto& c = ov::as_type_ptr(subgraph_sink.get_node_shared_ptr())) return c; @@ -976,7 +978,7 @@ namespace ov { namespace util { using ov::op::v0::Constant; -std::shared_ptr get_constant_from_source(const Output& source) { +std::shared_ptr get_constant_from_source(const ov::Output& source) { if (const auto& c = ov::as_type_ptr(source.get_node_shared_ptr())) { return c; } else if (has_and_set_equal_bounds(source)) { @@ -1070,8 +1072,8 @@ Tensor make_tensor_of_min_value(const element::Type_t et) { } } -std::vector get_tensors_partial_shapes(const TensorVector& tensors) { - std::vector shapes; +std::vector get_tensors_partial_shapes(const TensorVector& tensors) { + std::vector shapes; shapes.reserve(tensors.size()); for (const auto& t : tensors) { shapes.emplace_back(t.get_shape()); @@ -1079,8 +1081,8 @@ std::vector get_tensors_partial_shapes(const TensorVector& tensors return shapes; } -std::vector get_node_input_partial_shapes(const Node& node) { - std::vector shapes; +std::vector get_node_input_partial_shapes(const Node& node) { + std::vector shapes; shapes.reserve(node.get_input_size()); for (size_t i = 0; i < node.get_input_size(); ++i) { shapes.push_back(node.get_input_partial_shape(i)); @@ -1094,7 +1096,7 @@ bool is_rank_compatible_any_of(const Rank& r, std::initializer_list others }); } -bool evaluate_as_partial_shape(const Output& output, PartialShape& pshape) { +bool evaluate_as_partial_shape(const ov::Output& output, ov::PartialShape& pshape) { Tensor lb, ub; std::tie(lb, ub) = evaluate_both_bounds(output); bool shape_defined = false; @@ -1121,7 +1123,7 @@ bool evaluate_as_partial_shape(const Output& output, PartialShape& pshape) if (!labels.empty() && labels[i]) DimensionTracker::set_label(resulting_pshape[i], labels[i]); } - pshape = PartialShape(resulting_pshape); + pshape = ov::PartialShape(resulting_pshape); shape_defined = true; } return shape_defined; diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp index b92adc29b1ac99..8704a6a9251acf 100644 --- a/src/core/tests/eval.cpp +++ b/src/core/tests/eval.cpp @@ -14,7 +14,6 @@ #include "common_test_utils/type_prop.hpp" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "ngraph/validation_util.hpp" #include "openvino/core/except.hpp" #include "openvino/core/model.hpp" #include "openvino/core/shape.hpp" @@ -93,53 +92,6 @@ std::vector read_vector(const ov::Tensor& tv) { ASSERT_FLOAT_EQ(expected[i], result[i]) << "at index: " << i; \ } -TEST(eval, max_eval_parameter) { - auto p = make_shared(element::i64, Shape{}); - - OPENVINO_SUPPRESS_DEPRECATED_START - auto result = ngraph::maximum_value(p); - OPENVINO_SUPPRESS_DEPRECATED_END - EXPECT_FALSE(result.first); - EXPECT_EQ(result.second, numeric_limits::max()); -} - -TEST(eval, max_eval_constant) { - auto c = ov::op::v0::Constant::create(element::i64, Shape{}, {27}); - OPENVINO_SUPPRESS_DEPRECATED_START - auto result = ngraph::maximum_value(c); - OPENVINO_SUPPRESS_DEPRECATED_END - ASSERT_TRUE(result.first); - EXPECT_EQ(result.second, 27); -} - -TEST(eval, max_eval_minimum_constant) { - auto c = ov::op::v0::Constant::create(element::i64, Shape{}, {27}); - auto p = make_shared(element::i64, Shape{}); - auto m = make_shared(c, p); - OPENVINO_SUPPRESS_DEPRECATED_START - auto result = ngraph::maximum_value(m); - OPENVINO_SUPPRESS_DEPRECATED_END - ASSERT_TRUE(result.first); - EXPECT_EQ(result.second, 27); -} - -TEST(eval, max_eval_reduce_min) { - auto concat = make_shared( - make_shared(OutputVector{make_shared(element::i64, Shape{4}), - make_shared(element::i64, Shape{4}, 37)}, - 0), - element::i32); - auto reduce = make_shared( - make_shared(concat, make_shared(element::i32, Shape{1}, 0)), - element::i64); - auto squeezes = make_shared( - make_shared(reduce, make_shared(element::i32, Shape{1}, 0)), - make_shared(element::i64, Shape{1}, 0)); - OPENVINO_SUPPRESS_DEPRECATED_START - EXPECT_EQ(ngraph::maximum_value(squeezes).second, 37); - OPENVINO_SUPPRESS_DEPRECATED_END -} - TEST(eval, evaluate_shape_of) { auto p = make_shared(element::f32, PartialShape{-1, -1}); auto so = make_shared(p); diff --git a/src/core/tests/partial_shape.cpp b/src/core/tests/partial_shape.cpp index 0bd1fad1342bb1..3ab9fe9c01f32a 100644 --- a/src/core/tests/partial_shape.cpp +++ b/src/core/tests/partial_shape.cpp @@ -7,7 +7,6 @@ #include #include "common_test_utils/test_tools.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/core/coordinate_diff.hpp" #include "openvino/core/descriptor/tensor.hpp" #include "openvino/op/parameter.hpp" @@ -758,517 +757,6 @@ TEST(partial_shape, copy_with_back_inserter_iterator) { ASSERT_EQ(s_copy, s); } -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{PartialShape::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE(result_shape.same_scheme( - PartialShape{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()})); -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_data_dilation) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 0, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{PartialShape::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_window_dilation) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{PartialShape::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 0, 1, 1}; - bool is_window_all_in_padding_allowed = true; - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_dynamic_zero_window_strides) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{PartialShape::dynamic()}; - Strides window_strides{1, 1, 1, 0}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{PartialShape::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4))); -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_zero_data_post_padding) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, -1, 0, 0}; - CoordinateDiff data_padding_above{0, -1, 0, 0}; - PartialShape window_shape{PartialShape::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_dynamic_neg_padding_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), 4, 3, Dimension::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, -1, 0, 0}; - CoordinateDiff data_padding_above{0, -2, 0, 0}; - PartialShape window_shape{PartialShape::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4))); -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4))); -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_window_dim_zero) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 0, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_window_dilated_dim_zero) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 0, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 3, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_window_all_in_padding_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 3, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4))); -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_window_all_in_padding_not_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 3, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = false; - - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - -TEST(partial_shape, infer_windowed_reduction_rank_dynamic_rank_static_dynamic_dilated_window_not_all_in_padding) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{PartialShape::dynamic()}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 3, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 2, 1}; - bool is_window_all_in_padding_allowed = false; - - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE(result_shape.same_scheme(PartialShape::dynamic(4))); -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dynamic_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), Dimension::dynamic(), 6, 4}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE( - result_shape.same_scheme(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 4, Dimension::dynamic()})); -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dynamic_with_padding_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), Dimension::dynamic(), 6, 4}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 2, 0}; - CoordinateDiff data_padding_above{0, 0, -1, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE( - result_shape.same_scheme(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 5, Dimension::dynamic()})); -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dynamic_with_padding_and_stride_ok) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), Dimension::dynamic(), 6, 4}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 2, 0}; - CoordinateDiff data_padding_above{0, 0, -1, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 3, Dimension::dynamic()}; - Strides window_strides{1, 1, 2, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE( - result_shape.same_scheme(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 3, Dimension::dynamic()})); -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dynamic_window_too_big) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), Dimension::dynamic(), 6, 4}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 0, 0}; - CoordinateDiff data_padding_above{0, 0, 0, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 7, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dynamic_window_not_too_big_padding) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), Dimension::dynamic(), 6, 4}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 5, 0}; - CoordinateDiff data_padding_above{0, 0, -3, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 7, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 1, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - OPENVINO_SUPPRESS_DEPRECATED_END - - ASSERT_TRUE( - result_shape.same_scheme(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 2, Dimension::dynamic()})); -} - -TEST(partial_shape, infer_windowed_reduction_rank_static_dynamic_rank_static_dynamic_window_dilated_too_big) { - auto node = std::make_shared(element::f32, Shape{}); - PartialShape data_shape{Dimension::dynamic(), Dimension::dynamic(), 6, 4}; - Strides data_dilation{1, 1, 1, 1}; - CoordinateDiff data_padding_below{0, 0, 5, 0}; - CoordinateDiff data_padding_above{0, 0, -3, 0}; - PartialShape window_shape{Dimension::dynamic(), 2, 7, Dimension::dynamic()}; - Strides window_strides{1, 1, 1, 1}; - Strides window_dilation{1, 1, 2, 1}; - bool is_window_all_in_padding_allowed = true; - - OPENVINO_SUPPRESS_DEPRECATED_START - ASSERT_THROW( - { - PartialShape result_shape = ngraph::infer_windowed_reduction_output_shape(node.get(), - data_shape, - data_dilation, - data_padding_below, - data_padding_above, - window_shape, - window_strides, - window_dilation, - is_window_all_in_padding_allowed); - }, - NodeValidationFailure); - OPENVINO_SUPPRESS_DEPRECATED_END -} - TEST(partial_shape, const_subscribe_operator) { const auto shape = ov::PartialShape{-1, {2, 10}, 5, 6, 7}; diff --git a/src/core/tests/specialize_function.cpp b/src/core/tests/specialize_function.cpp index 9cd4b3bbc27376..1a632b0963b25a 100644 --- a/src/core/tests/specialize_function.cpp +++ b/src/core/tests/specialize_function.cpp @@ -32,7 +32,7 @@ TEST(specialize_function, et_shape_static) { auto g = specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}}, param_vals); ASSERT_EQ(g->get_output_shape(0), (Shape{1, 2, 3})); @@ -53,7 +53,7 @@ TEST(specialize_function, et_dynamic_shape_static) { auto g = specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}}, param_vals); ASSERT_EQ(g->get_output_shape(0), (Shape{1, 2, 3})); @@ -62,8 +62,8 @@ TEST(specialize_function, et_dynamic_shape_static) { // Test specialization of rank-dynamic shapes. TEST(specialize_function, et_static_shape_rank_dynamic) { - auto p0 = std::make_shared(element::f32, PartialShape::dynamic()); - auto p1 = std::make_shared(element::i32, PartialShape::dynamic()); + auto p0 = std::make_shared(element::f32, ov::PartialShape::dynamic()); + auto p1 = std::make_shared(element::i32, ov::PartialShape::dynamic()); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -74,7 +74,7 @@ TEST(specialize_function, et_static_shape_rank_dynamic) { auto g = specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}}, param_vals); ASSERT_EQ(g->get_output_shape(0), (Shape{1, 2, 3})); @@ -83,8 +83,8 @@ TEST(specialize_function, et_static_shape_rank_dynamic) { // Test specialization of rank-static dynamic shapes. TEST(specialize_function, et_static_shape_rank_static_dynamic) { - auto p0 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto p1 = std::make_shared(element::i32, PartialShape::dynamic(3)); + auto p0 = std::make_shared(element::f32, ov::PartialShape::dynamic(3)); + auto p1 = std::make_shared(element::i32, ov::PartialShape::dynamic(3)); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -95,7 +95,7 @@ TEST(specialize_function, et_static_shape_rank_static_dynamic) { auto g = specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}}, param_vals); ASSERT_EQ(g->get_output_shape(0), (Shape{1, 2, 3})); @@ -104,8 +104,8 @@ TEST(specialize_function, et_static_shape_rank_static_dynamic) { // Test specialization of values to a shape-dynamic parameters. TEST(specialize_function, et_static_shape_rank_static_dynamic_subst_val) { - auto p0 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto p1 = std::make_shared(element::i32, PartialShape::dynamic(3)); + auto p0 = std::make_shared(element::f32, ov::PartialShape::dynamic(3)); + auto p1 = std::make_shared(element::i32, ov::PartialShape::dynamic(3)); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -118,7 +118,7 @@ TEST(specialize_function, et_static_shape_rank_static_dynamic_subst_val) { auto g = specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}}, param_vals); ASSERT_EQ(g->get_output_shape(0), (Shape{1, 2, 3})); @@ -140,8 +140,8 @@ TEST(specialize_function, et_static_shape_rank_static_dynamic_subst_val) { // // (The input shapes we provide at specialization time are inconsistent.) TEST(specialize_function, et_static_shape_rank_dynamic_validation_fails) { - auto p0 = std::make_shared(element::f32, PartialShape::dynamic()); - auto p1 = std::make_shared(element::i32, PartialShape::dynamic()); + auto p0 = std::make_shared(element::f32, ov::PartialShape::dynamic()); + auto p1 = std::make_shared(element::i32, ov::PartialShape::dynamic()); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -154,7 +154,7 @@ TEST(specialize_function, et_static_shape_rank_dynamic_validation_fails) { { specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3, 4}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3, 4}}, param_vals); }, NodeValidationFailure); @@ -178,7 +178,7 @@ TEST(specialize_function, et_dynamic_shape_static_validation_fails) { { specialize_function(f, {element::u32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}}, param_vals); }, NodeValidationFailure); @@ -191,8 +191,8 @@ TEST(specialize_function, et_dynamic_shape_static_validation_fails) { // specialize_shape's pre-checks, which use OPENVINO_ASSERT, rather than inside validation as we // reconstruct the graph.) TEST(specialize_function, et_static_shape_rank_static_dynamic_rank_mismatch) { - auto p0 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto p1 = std::make_shared(element::i32, PartialShape::dynamic(3)); + auto p0 = std::make_shared(element::f32, ov::PartialShape::dynamic(3)); + auto p1 = std::make_shared(element::i32, ov::PartialShape::dynamic(3)); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -205,7 +205,7 @@ TEST(specialize_function, et_static_shape_rank_static_dynamic_rank_mismatch) { { specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3, 4}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3, 4}}, param_vals); }, CheckFailure); @@ -218,8 +218,8 @@ TEST(specialize_function, et_static_shape_rank_static_dynamic_rank_mismatch) { // specialize_shape's pre-checks, which use OPENVINO_ASSERT, rather than inside validation as we // reconstruct the graph.) TEST(specialize_function, et_static_shape_rank_static_dynamic_dim_mismatch) { - auto p0 = std::make_shared(element::f32, PartialShape{1, 2, 3}); - auto p1 = std::make_shared(element::i32, PartialShape{1, ov::Dimension::dynamic(), 3}); + auto p0 = std::make_shared(element::f32, ov::PartialShape{1, 2, 3}); + auto p1 = std::make_shared(element::i32, ov::PartialShape{1, ov::Dimension::dynamic(), 3}); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -232,7 +232,7 @@ TEST(specialize_function, et_static_shape_rank_static_dynamic_dim_mismatch) { { specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 9, 4}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 9, 4}}, param_vals); }, CheckFailure); @@ -240,8 +240,8 @@ TEST(specialize_function, et_static_shape_rank_static_dynamic_dim_mismatch) { // Test for failure when we supply the wrong number of replacement element types. TEST(specialize_function, et_count_wrong) { - auto p0 = std::make_shared(element::f32, PartialShape{1, 2, 3}); - auto p1 = std::make_shared(element::i32, PartialShape{1, 2, 3}); + auto p0 = std::make_shared(element::f32, ov::PartialShape{1, 2, 3}); + auto p1 = std::make_shared(element::i32, ov::PartialShape{1, 2, 3}); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -254,7 +254,7 @@ TEST(specialize_function, et_count_wrong) { { specialize_function(f, {element::f32, element::i32, element::u32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}}, param_vals); }, CheckFailure); @@ -262,8 +262,8 @@ TEST(specialize_function, et_count_wrong) { // Test for failure when we supply the wrong number of replacement shapes. TEST(specialize_function, shape_count_wrong) { - auto p0 = std::make_shared(element::f32, PartialShape{1, 2, 3}); - auto p1 = std::make_shared(element::i32, PartialShape{1, 2, 3}); + auto p0 = std::make_shared(element::f32, ov::PartialShape{1, 2, 3}); + auto p1 = std::make_shared(element::i32, ov::PartialShape{1, 2, 3}); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -276,7 +276,7 @@ TEST(specialize_function, shape_count_wrong) { { specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}, PartialShape{4, 5, 6}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}, ov::PartialShape{4, 5, 6}}, param_vals); }, CheckFailure); @@ -284,8 +284,8 @@ TEST(specialize_function, shape_count_wrong) { // Test for failure when we supply the wrong number of replacement parameter values. TEST(specialize_function, value_count_wrong) { - auto p0 = std::make_shared(element::f32, PartialShape{1, 2, 3}); - auto p1 = std::make_shared(element::i32, PartialShape{1, 2, 3}); + auto p0 = std::make_shared(element::f32, ov::PartialShape{1, 2, 3}); + auto p1 = std::make_shared(element::i32, ov::PartialShape{1, 2, 3}); auto k = std::make_shared(p1, element::f32); auto a = std::make_shared(p0, k); @@ -298,7 +298,7 @@ TEST(specialize_function, value_count_wrong) { { specialize_function(f, {element::f32, element::i32}, - {PartialShape{1, 2, 3}, PartialShape{1, 2, 3}}, + {ov::PartialShape{1, 2, 3}, ov::PartialShape{1, 2, 3}}, param_vals); }, CheckFailure); diff --git a/src/frontends/onnx/frontend/src/core/graph.hpp b/src/frontends/onnx/frontend/src/core/graph.hpp index f11f0936f5dadb..058887ccb0912e 100644 --- a/src/frontends/onnx/frontend/src/core/graph.hpp +++ b/src/frontends/onnx/frontend/src/core/graph.hpp @@ -50,7 +50,7 @@ class Graph : public std::enable_shared_from_this { return m_parameters; } virtual bool is_ov_node_in_cache(const std::string& name) const; - virtual Output get_ov_node_from_cache(const std::string& name); + virtual ov::Output get_ov_node_from_cache(const std::string& name); OPENVINO_SUPPRESS_DEPRECATED_START OutputVector make_ov_nodes(const Node& onnx_node); OPENVINO_SUPPRESS_DEPRECATED_END @@ -109,7 +109,7 @@ class Subgraph : public Graph { /// \brief Return nodes which are on the edge the subgraph and the parent graph. /// \return Vector of edge nodes from parent scope. - const std::vector> get_inputs_from_parent() const; + const std::vector> get_inputs_from_parent() const; std::shared_ptr convert() override; @@ -122,7 +122,7 @@ class Subgraph : public Graph { Subgraph& operator=(Subgraph&&) = default; bool is_ov_node_in_cache(const std::string& name) const override; - Output get_ov_node_from_cache(const std::string& name) override; + ov::Output get_ov_node_from_cache(const std::string& name) override; void infer_inputs_from_parent(); private: diff --git a/src/frontends/onnx/frontend/src/core/value_info.hpp b/src/frontends/onnx/frontend/src/core/value_info.hpp index 5004064c425fd3..f369d0d70caad7 100644 --- a/src/frontends/onnx/frontend/src/core/value_info.hpp +++ b/src/frontends/onnx/frontend/src/core/value_info.hpp @@ -41,7 +41,7 @@ class ValueInfo { const std::string& get_name() const { return m_value_info_proto->name(); } - const PartialShape& get_shape() const { + const ov::PartialShape& get_shape() const { return m_partial_shape; } const element::Type& get_element_type() const { @@ -75,7 +75,7 @@ class ValueInfo { private: const ONNX_NAMESPACE::ValueInfoProto* m_value_info_proto; - PartialShape m_partial_shape = PartialShape::dynamic(); + ov::PartialShape m_partial_shape = ov::PartialShape::dynamic(); }; inline std::ostream& operator<<(std::ostream& outs, const ValueInfo& info) { diff --git a/src/frontends/onnx/frontend/src/op/aten.cpp b/src/frontends/onnx/frontend/src/op/aten.cpp index 380718b2745674..9b7128b8a22139 100644 --- a/src/frontends/onnx/frontend/src/op/aten.cpp +++ b/src/frontends/onnx/frontend/src/op/aten.cpp @@ -48,7 +48,7 @@ OutputVector aten(const Node& node) { inputs.size() == 4 && ov::op::util::is_null(inputs[2]) && !ov::op::util::is_null(inputs[3]); const bool is_offsets_three_inputs = inputs.size() == 3 && !ov::op::util::is_null(inputs[2]); - Output embedding_bag; + ov::Output embedding_bag; if (is_packed_two_inputs) { embedding_bag = std::make_shared(inputs[0], inputs[1]); } else if (is_packed_three_inputs) { diff --git a/src/frontends/onnx/frontend/src/op/batch_norm.cpp b/src/frontends/onnx/frontend/src/op/batch_norm.cpp index e187128e8bc1f8..5c82bc9768766c 100644 --- a/src/frontends/onnx/frontend/src/op/batch_norm.cpp +++ b/src/frontends/onnx/frontend/src/op/batch_norm.cpp @@ -24,8 +24,8 @@ OutputVector batch_norm(const Node& node) { auto x = inputs.at(0); auto scale = inputs.at(1); auto bias = inputs.at(2); - Output mean; - Output var; + ov::Output mean; + ov::Output var; double epsilon{node.get_attribute_value("epsilon", 1e-5)}; diff --git a/src/frontends/onnx/frontend/src/op/bitshift.cpp b/src/frontends/onnx/frontend/src/op/bitshift.cpp index 317ad25ee57c33..bc5160a88ad956 100644 --- a/src/frontends/onnx/frontend/src/op/bitshift.cpp +++ b/src/frontends/onnx/frontend/src/op/bitshift.cpp @@ -18,8 +18,8 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector bitshift(const Node& node) { - const Output input_x = node.get_ng_inputs().at(0); - const Output input_y = node.get_ng_inputs().at(1); + const ov::Output input_x = node.get_ng_inputs().at(0); + const ov::Output input_y = node.get_ng_inputs().at(1); std::string direction = node.get_attribute_value("direction", ""); diff --git a/src/frontends/onnx/frontend/src/op/clip.cpp b/src/frontends/onnx/frontend/src/op/clip.cpp index 06b634797c46c1..cbef8d9391c414 100644 --- a/src/frontends/onnx/frontend/src/op/clip.cpp +++ b/src/frontends/onnx/frontend/src/op/clip.cpp @@ -6,11 +6,12 @@ #include -#include "ngraph/validation_util.hpp" +#include "default_opset.hpp" #include "onnx_import/core/null_node.hpp" #include "openvino/op/clamp.hpp" #include "openvino/op/maximum.hpp" #include "openvino/op/minimum.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -32,21 +33,58 @@ OutputVector clip(const Node& node) { } // namespace set_1 namespace set_11 { +namespace { +std::shared_ptr get_constant_lowest_of_type(ov::element::Type_t t) { +#define OPENVINO_TYPE_TO_LOWEST_CONST(t) \ + case t: \ + return ov::op::v0::Constant::create( \ + t, \ + {}, \ + {std::numeric_limits::value_type>::lowest()}); \ + break + + switch (t) { + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::boolean); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::bf16); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::f16); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::f32); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::f64); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::i8); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::i16); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::i32); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::i64); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::u1); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::u8); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::u16); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::u32); + OPENVINO_TYPE_TO_LOWEST_CONST(ov::element::u64); + + case ov::element::undefined: + case ov::element::dynamic: + default: + return nullptr; + } +} + +std::shared_ptr get_constant_max_of_type(ov::element::Type_t t) { + auto tensor = ov::util::make_tensor_of_max_value(t); + return tensor ? std::make_shared(tensor) : nullptr; +} +} // namespace + OutputVector clip(const Node& node) { const OutputVector inputs{node.get_ng_inputs()}; - const Output data = inputs.at(0); + const ov::Output data = inputs.at(0); const element::Type data_type = data.get_element_type(); - Output min; - Output max; + ov::Output min; + ov::Output max; // If second input is provided, assign to min input, otherwise set lowest // numeric limit of data type as min input. if (inputs.size() > 1 && !ov::op::util::is_null(inputs.at(1))) { min = inputs.at(1); } else { - OPENVINO_SUPPRESS_DEPRECATED_START - min = ngraph::get_constant_lowest_of_type(data_type); - OPENVINO_SUPPRESS_DEPRECATED_END + min = get_constant_lowest_of_type(data_type); } // If third input is provided, assign to max input, otherwise set maximum @@ -54,9 +92,7 @@ OutputVector clip(const Node& node) { if (inputs.size() == 3 && !ov::op::util::is_null(inputs.at(2))) { max = inputs.at(2); } else { - OPENVINO_SUPPRESS_DEPRECATED_START - max = ngraph::get_constant_max_of_type(data_type); - OPENVINO_SUPPRESS_DEPRECATED_END + max = get_constant_max_of_type(data_type); } const auto max_of_min_and_data = std::make_shared(min, data); diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/attention.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/attention.cpp index 4b2f326c784499..194bd3edd36d09 100644 --- a/src/frontends/onnx/frontend/src/op/com.microsoft/attention.cpp +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/attention.cpp @@ -265,7 +265,7 @@ NodeVector split_to_QKV(const std::shared_ptr& node, // Handling both mask_index variants (so (batch_size) and (2 * batch_size)) is tricky since we don't // know its dimensions upfront. So we compute both variants and use Select operator to select // the right one in the runtime (unless it gets constantfolded before). -std::shared_ptr attention_mask_from_indices(const Output& mask_index, +std::shared_ptr attention_mask_from_indices(const ov::Output& mask_index, const element::Type_t& type, const std::shared_ptr& batch_size, const std::shared_ptr& all_seq_len) { @@ -371,7 +371,7 @@ NodeTuple unidirectional_mask(const element::Type_t& type, // // Shape (batch_size, 1, max_sequence_length, max_sequence_length) is not supported in onnxruntime: // https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_helper.h#L78 -std::shared_ptr raw_mask(const Output& mask_index, +std::shared_ptr raw_mask(const ov::Output& mask_index, ov::Dimension::value_type mask_rank, const element::Type_t& type) { std::shared_ptr mask = std::make_shared(mask_index, type); diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/fusedgemm.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/fusedgemm.cpp index 6f6039e5496f4c..bf4c8a3dac56cf 100644 --- a/src/frontends/onnx/frontend/src/op/com.microsoft/fusedgemm.cpp +++ b/src/frontends/onnx/frontend/src/op/com.microsoft/fusedgemm.cpp @@ -27,9 +27,9 @@ OutputVector fusedgemm(const Node& node) { FRONT_END_GENERAL_CHECK(num_inputs == 2 || num_inputs == 3, "FusedGemm takes 2/3 inputs. Provided " + std::to_string(num_inputs)); - Output input_a = inputs.at(0); - Output input_b = inputs.at(1); - Output input_c; + ov::Output input_a = inputs.at(0); + ov::Output input_b = inputs.at(1); + ov::Output input_c; if (num_inputs == 3 && !ov::op::util::is_null(inputs[2])) { input_c = inputs.at(2); diff --git a/src/frontends/onnx/frontend/src/op/constant_fill.cpp b/src/frontends/onnx/frontend/src/op/constant_fill.cpp index e4641c29be1cf3..66faec76b33b84 100644 --- a/src/frontends/onnx/frontend/src/op/constant_fill.cpp +++ b/src/frontends/onnx/frontend/src/op/constant_fill.cpp @@ -20,7 +20,7 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector constant_fill(const Node& node) { - Output target_shape; + ov::Output target_shape; const auto dtype = node.get_attribute_value("dtype", static_cast(TensorProto_DataType_FLOAT)); const auto ng_type = onnx_to_ov_data_type(static_cast(dtype)); const auto const_val_to_fill = node.get_attribute_as_constant("value", 0.f, ng_type); diff --git a/src/frontends/onnx/frontend/src/op/constant_of_shape.cpp b/src/frontends/onnx/frontend/src/op/constant_of_shape.cpp index e9d628ce628db7..a4a3af5e0863fd 100644 --- a/src/frontends/onnx/frontend/src/op/constant_of_shape.cpp +++ b/src/frontends/onnx/frontend/src/op/constant_of_shape.cpp @@ -20,7 +20,7 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector constant_of_shape(const onnx_import::Node& node) { - Output constant_value; + ov::Output constant_value; if (node.has_attribute("value")) { auto value_tensor = node.get_attribute_value("value"); constant_value = value_tensor.get_ov_constant(); diff --git a/src/frontends/onnx/frontend/src/op/conv.cpp b/src/frontends/onnx/frontend/src/op/conv.cpp index 35c4228dca6c74..a9b65707ff7ba0 100644 --- a/src/frontends/onnx/frontend/src/op/conv.cpp +++ b/src/frontends/onnx/frontend/src/op/conv.cpp @@ -23,14 +23,17 @@ namespace op { namespace set_1 { namespace detail { -std::shared_ptr add_bias(const Output& ng_conv, const Output& bias) { +std::shared_ptr add_bias(const ov::Output& ng_conv, const ov::Output& bias) { const auto conv_shape = std::make_shared(ng_conv); const auto conv_rank = std::make_shared(conv_shape); return {std::make_shared(ng_conv, reshape::reshape_channel_shaped_node_to_nchw(bias, conv_rank))}; } -OutputVector conv(const Node& node, Output data, Output filters, Output bias) { +OutputVector conv(const Node& node, + ov::Output data, + ov::Output filters, + ov::Output bias) { // in the current implementation we assume that the data input rank is static // and only the 'batch' dimension can be dynamic const auto groups = node.get_attribute_value("group", 1); diff --git a/src/frontends/onnx/frontend/src/op/conv.hpp b/src/frontends/onnx/frontend/src/op/conv.hpp index 85e75b5cf61202..c4ba2aa249bc13 100644 --- a/src/frontends/onnx/frontend/src/op/conv.hpp +++ b/src/frontends/onnx/frontend/src/op/conv.hpp @@ -15,7 +15,7 @@ namespace onnx_import { namespace op { namespace set_1 { namespace detail { -OutputVector conv(const Node& node, Output data, Output filters, Output bias); +OutputVector conv(const Node& node, ov::Output data, ov::Output filters, ov::Output bias); } /// \brief Performs ONNX Conv operation. /// diff --git a/src/frontends/onnx/frontend/src/op/conv_transpose.cpp b/src/frontends/onnx/frontend/src/op/conv_transpose.cpp index 565696be7fb86d..b20b21c8dfc854 100644 --- a/src/frontends/onnx/frontend/src/op/conv_transpose.cpp +++ b/src/frontends/onnx/frontend/src/op/conv_transpose.cpp @@ -26,15 +26,15 @@ namespace onnx_import { namespace op { namespace set_1 { namespace { -Output make_group_conv_backprop(const Output& data, - const Output& filters, - const Strides& strides, - const Strides& dilations, - const CoordinateDiff& pads_begin, - const CoordinateDiff& pads_end, - const ov::op::PadType& auto_pad_type, - const std::vector& output_shape, - const std::vector& output_padding) { +ov::Output make_group_conv_backprop(const ov::Output& data, + const ov::Output& filters, + const Strides& strides, + const Strides& dilations, + const CoordinateDiff& pads_begin, + const CoordinateDiff& pads_end, + const ov::op::PadType& auto_pad_type, + const std::vector& output_shape, + const std::vector& output_padding) { if (output_shape.empty()) { return std::make_shared( data, @@ -57,15 +57,15 @@ Output make_group_conv_backprop(const Output& data, } } -Output make_conv_backprop(const Output& data, - const Output& filters, - const Strides& strides, - const Strides& dilations, - const CoordinateDiff& pads_begin, - const CoordinateDiff& pads_end, - const ov::op::PadType& auto_pad_type, - const std::vector& output_shape, - const std::vector& output_padding) { +ov::Output make_conv_backprop(const ov::Output& data, + const ov::Output& filters, + const Strides& strides, + const Strides& dilations, + const CoordinateDiff& pads_begin, + const CoordinateDiff& pads_end, + const ov::op::PadType& auto_pad_type, + const std::vector& output_shape, + const std::vector& output_padding) { if (output_shape.empty()) { return std::make_shared( data, @@ -90,7 +90,7 @@ Output make_conv_backprop(const Output& data, } } -Output get_prepared_bias(const Output& bias, const Output& conv) { +ov::Output get_prepared_bias(const ov::Output& bias, const ov::Output& conv) { // Prepare bias shape [1, C, 1, 1] const auto& conv_pshape = conv.get_partial_shape(); std::shared_ptr bias_shape_node; @@ -176,7 +176,7 @@ OutputVector conv_transpose(const Node& node) { CHECK_VALID_NODE(node, groups >= 0, "Incorrect value of 'group' attribute: ", groups); - Output conv_node; + ov::Output conv_node; if (groups > 1) { filters = convpool::get_reshaped_filters(filters, groups); diff --git a/src/frontends/onnx/frontend/src/op/cum_sum.cpp b/src/frontends/onnx/frontend/src/op/cum_sum.cpp index f3bcf29fde38dc..91e793bfa42416 100644 --- a/src/frontends/onnx/frontend/src/op/cum_sum.cpp +++ b/src/frontends/onnx/frontend/src/op/cum_sum.cpp @@ -20,7 +20,7 @@ OutputVector cum_sum(const Node& node) { auto data = inputs.at(0); bool exclusive = node.get_attribute_value("exclusive", 0); bool reverse = node.get_attribute_value("reverse", 0); - Output axis; + ov::Output axis; if (inputs.size() > 1) { // optional input, 0-D or 1-D tensor diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp index 5e234a39b1a5d0..80e802cde774ec 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp @@ -8,7 +8,6 @@ #include #include "onnx_import/core/null_node.hpp" -#include "openvino/core/validation_util.hpp" #include "openvino/frontend/exception.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" @@ -16,6 +15,7 @@ #include "openvino/op/reshape.hpp" #include "openvino/op/subtract.hpp" #include "utils/common.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -64,7 +64,7 @@ OutputVector dequantize_linear(const Node& node) { namespace set_13 { namespace detail { -void validate_scale(const Output scale, const Output x, const int64_t axis) { +void validate_scale(const ov::Output scale, const ov::Output x, const int64_t axis) { const auto& scale_shape = scale.get_partial_shape(); FRONT_END_GENERAL_CHECK(scale_shape.rank().get_length() == 0 || scale_shape.rank().get_length() == 1, "Dequantization scale needs to be a scalar or a vector."); @@ -84,7 +84,7 @@ void validate_scale(const Output scale, const Output x, cons } } -void validate_zero_point(const Output zero_point, const Output x, const int64_t axis) { +void validate_zero_point(const ov::Output zero_point, const ov::Output x, const int64_t axis) { const auto& zero_point_shape = zero_point.get_partial_shape(); FRONT_END_GENERAL_CHECK(zero_point_shape.rank().get_length() == 0 || zero_point_shape.rank().get_length() == 1, "Zero point needs to be a scalar or a vector."); @@ -104,9 +104,9 @@ void validate_zero_point(const Output zero_point, const Output reshape_input(const Output& input, +std::shared_ptr reshape_input(const ov::Output& input, const int64_t axis, - const PartialShape& x_shape) { + const ov::PartialShape& x_shape) { // these reshapes make sure that dequantization happens over the specified axis auto input_rank = input.get_partial_shape().rank(); @@ -136,8 +136,8 @@ std::shared_ptr reshape_input(const Output& input, return std::make_shared(input, target_shape, true); } -OutputVector dequantize_linear(const Output& x, - const Output& scale, +OutputVector dequantize_linear(const ov::Output& x, + const ov::Output& scale, const std::shared_ptr& zero_point, int64_t axis, const Node& node) { @@ -145,9 +145,7 @@ OutputVector dequantize_linear(const Output& x, FRONT_END_GENERAL_CHECK(x_shape.rank().is_static(), "Rank of the input data tensor has to be known (static)."); - OPENVINO_SUPPRESS_DEPRECATED_START - axis = ov::normalize_axis(node.get_description(), axis, x_shape.rank()); - OPENVINO_SUPPRESS_DEPRECATED_END + axis = ov::util::normalize_axis(node.get_description(), axis, x_shape.rank()); validate_scale(scale, x, axis); const auto scale_reshaped = reshape_input(scale, axis, x_shape); diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.hpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.hpp index 7bb121d7e2df29..e52dd75589c8b2 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.hpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.hpp @@ -21,8 +21,8 @@ OutputVector dequantize_linear(const Node& node); namespace set_13 { namespace detail { -OutputVector dequantize_linear(const Output& x, - const Output& scale, +OutputVector dequantize_linear(const ov::Output& x, + const ov::Output& scale, const std::shared_ptr& zero_point, int64_t axis, const Node& node); diff --git a/src/frontends/onnx/frontend/src/op/dynamic_quantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dynamic_quantize_linear.cpp index fabe0c784d14b5..2786d5f2303a1e 100644 --- a/src/frontends/onnx/frontend/src/op/dynamic_quantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dynamic_quantize_linear.cpp @@ -60,10 +60,10 @@ std::shared_ptr find_max_value(const ov::Output& input) { return std::make_shared(zero_node_u8, input_max); } -std::shared_ptr quantize_linear(Output x, - Output x_span, - Output quant_range_span, - Output y_zero_point) { +std::shared_ptr quantize_linear(ov::Output x, + ov::Output x_span, + ov::Output quant_range_span, + ov::Output y_zero_point) { const auto& x_scaled = std::make_shared(std::make_shared(x, quant_range_span), x_span); const auto& x_rounded = std::make_shared(x_scaled, ov::op::v5::Round::RoundMode::HALF_TO_EVEN); diff --git a/src/frontends/onnx/frontend/src/op/expand.cpp b/src/frontends/onnx/frontend/src/op/expand.cpp index cb5c1d462dede6..ccddf49c3447aa 100644 --- a/src/frontends/onnx/frontend/src/op/expand.cpp +++ b/src/frontends/onnx/frontend/src/op/expand.cpp @@ -16,8 +16,8 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector expand(const Node& node) { - const Output data{node.get_ng_inputs().at(0)}; - const Output shape{node.get_ng_inputs().at(1)}; + const ov::Output data{node.get_ng_inputs().at(0)}; + const ov::Output shape{node.get_ng_inputs().at(1)}; if (common::is_failsafe_node(shape.get_node_shared_ptr())) { // in case the "shape" input is connected to a failsafe node created in place of an invalid initializer diff --git a/src/frontends/onnx/frontend/src/op/eye_like.cpp b/src/frontends/onnx/frontend/src/op/eye_like.cpp index b617bda7f1d47a..9712cd46d79883 100644 --- a/src/frontends/onnx/frontend/src/op/eye_like.cpp +++ b/src/frontends/onnx/frontend/src/op/eye_like.cpp @@ -21,7 +21,7 @@ namespace detail { namespace { /// \brief Split a shape returned by a ShapeOf operation into two outputs: width and height. -OutputVector get_shape_width_and_height(const Output& shape) { +OutputVector get_shape_width_and_height(const ov::Output& shape) { const auto axis = v0::Constant::create(ov::element::i64, {1}, {0}); const auto height = std::make_shared(shape, v0::Constant::create(ov::element::i64, {1}, {0}), axis); const auto width = std::make_shared(shape, v0::Constant::create(ov::element::i64, {1}, {1}), axis); @@ -38,7 +38,7 @@ OutputVector eye_like(const Node& node) { const auto& input_rank = input.get_partial_shape().rank(); CHECK_VALID_NODE(node, - input_rank.compatible(Rank(2)), + input_rank.compatible(ov::Rank(2)), "The provided shape rank: ", input_rank.get_length(), " is unsupported, only 2D shapes are supported"); diff --git a/src/frontends/onnx/frontend/src/op/flatten.cpp b/src/frontends/onnx/frontend/src/op/flatten.cpp index cbdc74697540b1..c5b683709a40a3 100644 --- a/src/frontends/onnx/frontend/src/op/flatten.cpp +++ b/src/frontends/onnx/frontend/src/op/flatten.cpp @@ -5,8 +5,8 @@ #include "op/flatten.hpp" #include "exceptions.hpp" -#include "openvino/core/validation_util.hpp" #include "ov_models/ov_builders/reshape.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -24,9 +24,8 @@ OutputVector flatten(const Node& node) { if (data_rank.is_static()) { const std::int64_t data_rank_value = data_rank.get_length(); // Accepted range is [-r, r] where r = rank(input). - OPENVINO_SUPPRESS_DEPRECATED_START - axis = ov::normalize_axis(node.get_description(), axis, data_rank_value, -data_rank_value, data_rank_value); - OPENVINO_SUPPRESS_DEPRECATED_END + axis = + ov::util::normalize_axis(node.get_description(), axis, data_rank_value, -data_rank_value, data_rank_value); } return {ov::op::util::flatten(data, static_cast(axis))}; } diff --git a/src/frontends/onnx/frontend/src/op/gemm.cpp b/src/frontends/onnx/frontend/src/op/gemm.cpp index a2988cc013aa2a..ee58da9c36fea2 100644 --- a/src/frontends/onnx/frontend/src/op/gemm.cpp +++ b/src/frontends/onnx/frontend/src/op/gemm.cpp @@ -19,9 +19,9 @@ namespace op { namespace set_1 { OutputVector gemm(const Node& node) { OutputVector inputs{node.get_ng_inputs()}; - Output input_a = inputs.at(0); - Output input_b = inputs.at(1); - Output input_c; + ov::Output input_a = inputs.at(0); + ov::Output input_b = inputs.at(1); + ov::Output input_c; if (inputs.size() == 3) { input_c = inputs.at(2); @@ -63,9 +63,9 @@ OutputVector gemm(const Node& node) { namespace set_6 { OutputVector gemm(const Node& node) { OutputVector inputs{node.get_ng_inputs()}; - Output input_a = inputs.at(0); - Output input_b = inputs.at(1); - Output input_c; + ov::Output input_a = inputs.at(0); + ov::Output input_b = inputs.at(1); + ov::Output input_c; if (inputs.size() == 3) { input_c = inputs.at(2); diff --git a/src/frontends/onnx/frontend/src/op/hardmax.cpp b/src/frontends/onnx/frontend/src/op/hardmax.cpp index 3bb60bfee46e9d..876098fb935984 100644 --- a/src/frontends/onnx/frontend/src/op/hardmax.cpp +++ b/src/frontends/onnx/frontend/src/op/hardmax.cpp @@ -5,7 +5,6 @@ #include "op/hardmax.hpp" #include "exceptions.hpp" -#include "openvino/core/validation_util.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/gather.hpp" @@ -16,6 +15,7 @@ #include "ov_models/ov_builders/reshape.hpp" #include "utils/common.hpp" #include "utils/reshape.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -30,18 +30,16 @@ OutputVector hardmax(const Node& node) { auto axis = node.get_attribute_value("axis", 1); if (input_shape.rank().is_static()) { - OPENVINO_SUPPRESS_DEPRECATED_START - axis = ov::normalize_axis(node.get_description(), axis, input_shape.rank()); - OPENVINO_SUPPRESS_DEPRECATED_END + axis = ov::util::normalize_axis(node.get_description(), axis, input_shape.rank()); } // reshape to 2D - "batch size" x "input feature dimensions" (NxD) const auto coerced_tensor = ov::op::util::flatten(input, static_cast(axis)); const auto coerced_tensor_shape = std::make_shared(coerced_tensor); - Output row_size = std::make_shared(coerced_tensor_shape, - ov::op::v0::Constant::create(element::i64, {1}, {1}), - ov::op::v0::Constant::create(element::i64, {}, {0})); + ov::Output row_size = std::make_shared(coerced_tensor_shape, + ov::op::v0::Constant::create(element::i64, {1}, {1}), + ov::op::v0::Constant::create(element::i64, {}, {0})); row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size); const auto indices_axis = 1; @@ -68,14 +66,13 @@ OutputVector hardmax(const Node& node) { const auto& input_shape = input.get_partial_shape(); auto axis = node.get_attribute_value("axis", -1); - OPENVINO_SUPPRESS_DEPRECATED_START - axis = ov::normalize_axis(node.get_description(), axis, input_shape.rank()); - OPENVINO_SUPPRESS_DEPRECATED_END + axis = ov::util::normalize_axis(node.get_description(), axis, input_shape.rank()); const auto input_runtime_shape = std::make_shared(input); - Output row_size = std::make_shared(input_runtime_shape, - ov::op::v0::Constant::create(element::i64, {1}, {axis}), - ov::op::v0::Constant::create(element::i64, {}, {0})); + ov::Output row_size = + std::make_shared(input_runtime_shape, + ov::op::v0::Constant::create(element::i64, {1}, {axis}), + ov::op::v0::Constant::create(element::i64, {}, {0})); row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size); const auto topk = std::make_shared(input, diff --git a/src/frontends/onnx/frontend/src/op/instance_norm.cpp b/src/frontends/onnx/frontend/src/op/instance_norm.cpp index 4fc3fd1fad1e60..7df746ee95a4a3 100644 --- a/src/frontends/onnx/frontend/src/op/instance_norm.cpp +++ b/src/frontends/onnx/frontend/src/op/instance_norm.cpp @@ -20,12 +20,12 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector instance_norm(const Node& node) { - Output data(node.get_ng_inputs().at(0)); - Output scale(node.get_ng_inputs().at(1)); - Output bias(node.get_ng_inputs().at(2)); - const PartialShape& data_pshape = data.get_partial_shape(); - const PartialShape& scale_pshape = scale.get_partial_shape(); - const PartialShape& bias_pshape = bias.get_partial_shape(); + ov::Output data(node.get_ng_inputs().at(0)); + ov::Output scale(node.get_ng_inputs().at(1)); + ov::Output bias(node.get_ng_inputs().at(2)); + const ov::PartialShape& data_pshape = data.get_partial_shape(); + const ov::PartialShape& scale_pshape = scale.get_partial_shape(); + const ov::PartialShape& bias_pshape = bias.get_partial_shape(); const float epsilon{node.get_attribute_value("epsilon", 1e-5f)}; element::Type result_et; diff --git a/src/frontends/onnx/frontend/src/op/log_softmax.cpp b/src/frontends/onnx/frontend/src/op/log_softmax.cpp index eb3c11f1719d3c..7b3734eb08b4fe 100644 --- a/src/frontends/onnx/frontend/src/op/log_softmax.cpp +++ b/src/frontends/onnx/frontend/src/op/log_softmax.cpp @@ -12,6 +12,7 @@ #include "openvino/op/reshape.hpp" #include "openvino/op/shape_of.hpp" #include "ov_models/ov_builders/reshape.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -19,7 +20,7 @@ OPENVINO_SUPPRESS_DEPRECATED_START namespace ngraph { namespace onnx_import { namespace { -std::shared_ptr onnx_logsoftmax(const Output data, const int64_t axis) { +std::shared_ptr onnx_logsoftmax(const ov::Output data, const int64_t axis) { const auto coerced_data = ov::op::util::flatten(data, static_cast(axis)); const auto result = std::make_shared(coerced_data, 1); const auto data_shape = std::make_shared(data); @@ -43,16 +44,12 @@ OutputVector log_softmax(const Node& node, const int64_t DEFAULT_AXIS) { } case 1: { // checks if the axis belongs to the allowed values set (-1 and 0 for 1D) - OPENVINO_SUPPRESS_DEPRECATED_START - ov::normalize_axis(node.get_description(), axis, data_rank); - OPENVINO_SUPPRESS_DEPRECATED_END + ov::util::normalize_axis(node.get_description(), axis, data_rank); result = std::make_shared(data, 0); break; } default: { - OPENVINO_SUPPRESS_DEPRECATED_START - const auto normalized_axis = ov::normalize_axis(node.get_description(), axis, data_rank); - OPENVINO_SUPPRESS_DEPRECATED_END + const auto normalized_axis = ov::util::normalize_axis(node.get_description(), axis, data_rank); result = onnx_logsoftmax(data, normalized_axis); break; diff --git a/src/frontends/onnx/frontend/src/op/loop.cpp b/src/frontends/onnx/frontend/src/op/loop.cpp index 7cc7c16a3ee9a4..1da495e36d7e87 100644 --- a/src/frontends/onnx/frontend/src/op/loop.cpp +++ b/src/frontends/onnx/frontend/src/op/loop.cpp @@ -56,7 +56,7 @@ OutputVector loop(const Node& node) { } // optional inputs - Output trip_count; + ov::Output trip_count; // trip count skipped or has value max(int64_t) means infinitive loop if (ov::op::util::is_null(ng_inputs.at(0)) || (ov::op::util::is_constant(ng_inputs.at(0).get_node_shared_ptr()) && @@ -68,7 +68,7 @@ OutputVector loop(const Node& node) { trip_count = ng_inputs.at(0); } - Output termination_cond; // true means that first interation should be run + ov::Output termination_cond; // true means that first interation should be run if (ov::op::util::is_null(ng_inputs.at(1).get_node_shared_ptr())) // termination condition skipped { termination_cond = v0::Constant::create(ov::element::boolean, {1}, {true}); diff --git a/src/frontends/onnx/frontend/src/op/lp_norm.cpp b/src/frontends/onnx/frontend/src/op/lp_norm.cpp index 6417a38721b117..2f5978731ce7d4 100644 --- a/src/frontends/onnx/frontend/src/op/lp_norm.cpp +++ b/src/frontends/onnx/frontend/src/op/lp_norm.cpp @@ -9,6 +9,7 @@ #include "openvino/op/constant.hpp" #include "openvino/op/divide.hpp" #include "ov_models/ov_builders/norm.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -18,16 +19,14 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector lp_norm(const Node& node) { - const Output data{node.get_ng_inputs().at(0)}; + const ov::Output data{node.get_ng_inputs().at(0)}; const auto data_shape = data.get_partial_shape(); const auto data_rank = data_shape.rank(); const std::int64_t p_norm{node.get_attribute_value("p", 2)}; const std::int64_t axis{node.get_attribute_value("axis", -1)}; - OPENVINO_SUPPRESS_DEPRECATED_START - const size_t normalize_axis = ov::normalize_axis(node.get_description(), axis, data_rank); - OPENVINO_SUPPRESS_DEPRECATED_END + const size_t normalize_axis = ov::util::normalize_axis(node.get_description(), axis, data_rank); CHECK_VALID_NODE(node, p_norm == 1 || p_norm == 2, diff --git a/src/frontends/onnx/frontend/src/op/lp_pool.cpp b/src/frontends/onnx/frontend/src/op/lp_pool.cpp index 3d70eba4f4960f..baeaa11c20b780 100644 --- a/src/frontends/onnx/frontend/src/op/lp_pool.cpp +++ b/src/frontends/onnx/frontend/src/op/lp_pool.cpp @@ -21,7 +21,7 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector global_lp_pool(const Node& node) { - const Output data{node.get_ng_inputs().at(0)}; + const ov::Output data{node.get_ng_inputs().at(0)}; const std::size_t channel_axis{1}; const auto data_shape = data.get_partial_shape(); diff --git a/src/frontends/onnx/frontend/src/op/lstm.cpp b/src/frontends/onnx/frontend/src/op/lstm.cpp index b18ea86c5797cb..a949a0fea90331 100644 --- a/src/frontends/onnx/frontend/src/op/lstm.cpp +++ b/src/frontends/onnx/frontend/src/op/lstm.cpp @@ -160,10 +160,10 @@ struct LSTMNgInputMap { } } - Output& at(const LSTMInput& key) { + ov::Output& at(const LSTMInput& key) { return m_input_map.at(key); } - std::map> m_input_map; + std::map> m_input_map; }; // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ATTRIBUTES PARSING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/frontends/onnx/frontend/src/op/matmul.hpp b/src/frontends/onnx/frontend/src/op/matmul.hpp index 96a9dad986ad7d..2f33be58bf6380 100644 --- a/src/frontends/onnx/frontend/src/op/matmul.hpp +++ b/src/frontends/onnx/frontend/src/op/matmul.hpp @@ -14,7 +14,7 @@ namespace ngraph { namespace onnx_import { namespace op { namespace detail { -inline OutputVector matmul(const Output& a, const Output& b) { +inline OutputVector matmul(const ov::Output& a, const ov::Output& b) { return {std::make_shared(a, b)}; } } // namespace detail diff --git a/src/frontends/onnx/frontend/src/op/matmul_integer.cpp b/src/frontends/onnx/frontend/src/op/matmul_integer.cpp index a467b4cc82b452..9181ff6baf0d8d 100644 --- a/src/frontends/onnx/frontend/src/op/matmul_integer.cpp +++ b/src/frontends/onnx/frontend/src/op/matmul_integer.cpp @@ -33,7 +33,7 @@ OutputVector matmul_integer(const Node& node) { const auto& A_zero_point_rank = A_zero_point.get_partial_shape().rank(); - Output shifted_A; + ov::Output shifted_A; if (A_zero_point_rank.is_static() && A_zero_point_rank.get_length() == 1) { const auto& one_node = v0::Constant::create(ov::element::i32, {1}, {1}); const auto& reshaped_A_zero_point = std::make_shared(converted_A_zero_point, one_node); diff --git a/src/frontends/onnx/frontend/src/op/mean_variance_normalization.cpp b/src/frontends/onnx/frontend/src/op/mean_variance_normalization.cpp index a8992a65d8d4a2..679ee91250dcf4 100644 --- a/src/frontends/onnx/frontend/src/op/mean_variance_normalization.cpp +++ b/src/frontends/onnx/frontend/src/op/mean_variance_normalization.cpp @@ -4,9 +4,9 @@ #include "op/mean_variance_normalization.hpp" -#include "openvino/core/validation_util.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/mvn.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -29,10 +29,8 @@ namespace set_9 { OutputVector mean_variance_normalization(const Node& node) { auto data = node.get_ng_inputs().at(0); auto axes = node.get_attribute_value>("axes", {0, 2, 3}); - OPENVINO_SUPPRESS_DEPRECATED_START const std::vector normalized_axes = - ov::normalize_axes(node.get_description(), axes, data.get_partial_shape().rank()); - OPENVINO_SUPPRESS_DEPRECATED_END + ov::util::normalize_axes(node.get_description(), axes, data.get_partial_shape().rank()); auto const_axes = v0::Constant::create(element::i64, Shape{normalized_axes.size()}, normalized_axes); return {std::make_shared(data, const_axes, true, 1e-09f, ov::op::MVNEpsMode::OUTSIDE_SQRT)}; } diff --git a/src/frontends/onnx/frontend/src/op/mod.cpp b/src/frontends/onnx/frontend/src/op/mod.cpp index 2d167252ef54b3..fe8cff2f2e3537 100644 --- a/src/frontends/onnx/frontend/src/op/mod.cpp +++ b/src/frontends/onnx/frontend/src/op/mod.cpp @@ -17,8 +17,8 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector mod(const Node& node) { - Output dividend{node.get_ng_inputs().at(0)}; - Output divisor{node.get_ng_inputs().at(1)}; + ov::Output dividend{node.get_ng_inputs().at(0)}; + ov::Output divisor{node.get_ng_inputs().at(1)}; std::int64_t fmod = node.get_attribute_value("fmod", 0); OutputVector output; diff --git a/src/frontends/onnx/frontend/src/op/non_max_suppression.cpp b/src/frontends/onnx/frontend/src/op/non_max_suppression.cpp index ee18f5981f66ca..96dc378423b1d9 100644 --- a/src/frontends/onnx/frontend/src/op/non_max_suppression.cpp +++ b/src/frontends/onnx/frontend/src/op/non_max_suppression.cpp @@ -22,24 +22,24 @@ OutputVector non_max_suppression(const Node& node) { // a reference implementation is added const auto ng_inputs = node.get_ng_inputs(); - const Output boxes = ng_inputs.at(0); - const Output scores = ng_inputs.at(1); + const ov::Output boxes = ng_inputs.at(0); + const ov::Output scores = ng_inputs.at(1); - Output max_output_boxes_per_class; + ov::Output max_output_boxes_per_class; if (ng_inputs.size() > 2 && !is_null(ng_inputs.at(2))) { max_output_boxes_per_class = ngraph::onnx_import::reshape::interpret_as_scalar(ng_inputs.at(2)); } else { max_output_boxes_per_class = v0::Constant::create(element::i64, Shape{}, {0}); } - Output iou_threshold; + ov::Output iou_threshold; if (ng_inputs.size() > 3 && !is_null(ng_inputs.at(3))) { iou_threshold = ngraph::onnx_import::reshape::interpret_as_scalar(ng_inputs.at(3)); } else { iou_threshold = v0::Constant::create(element::f32, Shape{}, {.0f}); } - Output score_threshold; + ov::Output score_threshold; if (ng_inputs.size() > 4 && !is_null(ng_inputs.at(4))) { score_threshold = ngraph::onnx_import::reshape::interpret_as_scalar(ng_inputs.at(4)); } else { diff --git a/src/frontends/onnx/frontend/src/op/org.openvinotoolkit/generate_proposals.cpp b/src/frontends/onnx/frontend/src/op/org.openvinotoolkit/generate_proposals.cpp index 9cae371d68bdfe..0aa62afcfecf5b 100644 --- a/src/frontends/onnx/frontend/src/op/org.openvinotoolkit/generate_proposals.cpp +++ b/src/frontends/onnx/frontend/src/op/org.openvinotoolkit/generate_proposals.cpp @@ -27,7 +27,7 @@ void validate_generate_proposals_inputs(const OutputVector& inputs) { const auto& anchors_shape = inputs[3].get_partial_shape(); const auto anchors_rank = anchors_shape.rank(); - OPENVINO_ASSERT(anchors_rank == Rank(2), "GenerateProposals input anchors rank should be 2, is ", anchors_rank); + OPENVINO_ASSERT(anchors_rank == ov::Rank(2), "GenerateProposals input anchors rank should be 2, is ", anchors_rank); OPENVINO_ASSERT(anchors_shape[1].compatible(4), "GenerateProposals input anchors shape should be {A, 4}, is ", anchors_shape); diff --git a/src/frontends/onnx/frontend/src/op/org.openvinotoolkit/swish.cpp b/src/frontends/onnx/frontend/src/op/org.openvinotoolkit/swish.cpp index da94cd72810b20..2e28bb8e22faca 100644 --- a/src/frontends/onnx/frontend/src/op/org.openvinotoolkit/swish.cpp +++ b/src/frontends/onnx/frontend/src/op/org.openvinotoolkit/swish.cpp @@ -18,7 +18,7 @@ namespace set_1 { OutputVector swish(const Node& node) { OutputVector ng_inputs{node.get_ng_inputs()}; - Output beta; + ov::Output beta; if (ng_inputs.size() > 1) { beta = ngraph::onnx_import::reshape::interpret_as_scalar(ng_inputs.at(1)); } else { diff --git a/src/frontends/onnx/frontend/src/op/pad.cpp b/src/frontends/onnx/frontend/src/op/pad.cpp index 476fc9b1642245..f9256818bea240 100644 --- a/src/frontends/onnx/frontend/src/op/pad.cpp +++ b/src/frontends/onnx/frontend/src/op/pad.cpp @@ -66,9 +66,9 @@ OutputVector pad(const Node& node) { const auto inputs = node.get_ng_inputs(); const auto& data = inputs[0]; const auto& pads = inputs[1]; - Output values; - Output padding_begin; - Output padding_end; + ov::Output values; + ov::Output padding_begin; + ov::Output padding_end; if (inputs.size() == 3 && !ov::op::util::is_null(inputs[2])) { values = reshape::interpret_as_scalar(inputs[2]); diff --git a/src/frontends/onnx/frontend/src/op/qlinear_conv.cpp b/src/frontends/onnx/frontend/src/op/qlinear_conv.cpp index 7f7f16d5b2ad34..6e26505d4c3299 100644 --- a/src/frontends/onnx/frontend/src/op/qlinear_conv.cpp +++ b/src/frontends/onnx/frontend/src/op/qlinear_conv.cpp @@ -33,7 +33,7 @@ OutputVector qlinear_conv(const Node& node) { auto w_zero_point = inputs.at(5); auto y_scale = inputs.at(6); auto y_zero_point = inputs.at(7); - Output B = inputs.size() > 8 ? inputs.at(8) : std::make_shared()->output(0); + ov::Output B = inputs.size() > 8 ? inputs.at(8) : std::make_shared()->output(0); x = set_13::detail::dequantize_linear(x, x_scale, diff --git a/src/frontends/onnx/frontend/src/op/quantize_linear.cpp b/src/frontends/onnx/frontend/src/op/quantize_linear.cpp index 19cb20ad3294f1..5fdd3deb725cb0 100644 --- a/src/frontends/onnx/frontend/src/op/quantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/quantize_linear.cpp @@ -5,7 +5,6 @@ #include "op/quantize_linear.hpp" #include "exceptions.hpp" -#include "openvino/core/validation_util.hpp" #include "openvino/frontend/exception.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" @@ -14,6 +13,7 @@ #include "openvino/op/subtract.hpp" #include "ov_models/ov_builders/reshape.hpp" #include "utils/reshape.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -23,7 +23,7 @@ namespace onnx_import { namespace op { namespace detail { namespace { -Output get_zero_point(const OutputVector& inputs) { +ov::Output get_zero_point(const OutputVector& inputs) { if (inputs.size() > 2) { return inputs.at(2); } else { @@ -31,7 +31,7 @@ Output get_zero_point(const OutputVector& inputs) { } } -void validate_zero_point_type(const Node& onnx_node, const Output& y_zero_point) { +void validate_zero_point_type(const Node& onnx_node, const ov::Output& y_zero_point) { const auto& y_zero_point_et = y_zero_point.get_element_type(); CHECK_VALID_NODE( onnx_node, @@ -41,7 +41,7 @@ void validate_zero_point_type(const Node& onnx_node, const Output& y_z "integer type."); } -Output validate_scale(const Node& onnx_node, const Output& y_scale) { +ov::Output validate_scale(const Node& onnx_node, const ov::Output& y_scale) { const auto& y_scale_et = y_scale.get_element_type(); CHECK_VALID_NODE(onnx_node, y_scale_et.is_static(), "\"y_scale\" input data type must be static."); if (y_scale_et != element::f32) { @@ -50,7 +50,7 @@ Output validate_scale(const Node& onnx_node, const Output& y return y_scale; } -Output validate_data(const Node& onnx_node, const Output& data) { +ov::Output validate_data(const Node& onnx_node, const ov::Output& data) { const auto& data_et = data.get_element_type(); CHECK_VALID_NODE(onnx_node, data_et.is_static(), "\"x\" input data type must be static."); @@ -93,8 +93,8 @@ std::tuple, std::shared_ptr> get_output_band } std::tuple, std::shared_ptr> get_input_bands( - const Output& y_scale, - const Output& y_zero_point, + const ov::Output& y_scale, + const ov::Output& y_zero_point, const std::shared_ptr& output_low, const std::shared_ptr& output_high, const element::Type& data_type) { @@ -103,24 +103,20 @@ std::tuple, std::shared_ptr> get_input_bands const auto& zero_point = std::make_shared(y_zero_point, data_type); input_low = std::make_shared(y_scale, std::make_shared(output_low, zero_point)); - OPENVINO_SUPPRESS_DEPRECATED_START - if (auto constant = ov::get_constant_from_source(input_low)) { - OPENVINO_SUPPRESS_DEPRECATED_END + if (auto constant = ov::util::get_constant_from_source(input_low)) { input_low = constant; } input_high = std::make_shared(y_scale, std::make_shared(output_high, zero_point)); - OPENVINO_SUPPRESS_DEPRECATED_START - if (auto constant = ov::get_constant_from_source(input_high)) { - OPENVINO_SUPPRESS_DEPRECATED_END + if (auto constant = ov::util::get_constant_from_source(input_high)) { input_high = constant; } return std::make_tuple(input_low, input_high); } } // namespace -std::shared_ptr make_fake_quantize(const Output& y_scale, - const Output& y_zero_point, - const Output& data) { +std::shared_ptr make_fake_quantize(const ov::Output& y_scale, + const ov::Output& y_zero_point, + const ov::Output& data) { const element::Type& destination_type = y_zero_point.get_element_type(); const element::Type& data_type = data.get_element_type(); @@ -158,9 +154,9 @@ OutputVector quantize_linear(const Node& node) { namespace set_13 { namespace { -OutputVector quantize_linear(Output x, - Output y_scale, - Output y_zero_point, +OutputVector quantize_linear(ov::Output x, + ov::Output y_scale, + ov::Output y_zero_point, int64_t axis, Node node) { namespace detail = ngraph::onnx_import::op::detail; @@ -171,9 +167,7 @@ OutputVector quantize_linear(Output x, const auto& x_shape = x.get_partial_shape(); - OPENVINO_SUPPRESS_DEPRECATED_START - axis = normalize_axis(node.get_description(), axis, x_shape.rank()); - OPENVINO_SUPPRESS_DEPRECATED_END + axis = ov::util::normalize_axis(node.get_description(), axis, x_shape.rank()); const auto& y_scale_shape = y_scale.get_partial_shape(); const auto& y_zero_point_shape = y_zero_point.get_partial_shape(); diff --git a/src/frontends/onnx/frontend/src/op/quantize_linear.hpp b/src/frontends/onnx/frontend/src/op/quantize_linear.hpp index 7ad9df7406869f..59243a177760d0 100644 --- a/src/frontends/onnx/frontend/src/op/quantize_linear.hpp +++ b/src/frontends/onnx/frontend/src/op/quantize_linear.hpp @@ -14,9 +14,9 @@ namespace ngraph { namespace onnx_import { namespace op { namespace detail { -std::shared_ptr make_fake_quantize(const Output& y_scale, - const Output& y_zero_point, - const Output& data); +std::shared_ptr make_fake_quantize(const ov::Output& y_scale, + const ov::Output& y_zero_point, + const ov::Output& data); } namespace set_1 { OutputVector quantize_linear(const Node& node); diff --git a/src/frontends/onnx/frontend/src/op/range.cpp b/src/frontends/onnx/frontend/src/op/range.cpp index eb44a54d7530dd..af46b639ca1858 100644 --- a/src/frontends/onnx/frontend/src/op/range.cpp +++ b/src/frontends/onnx/frontend/src/op/range.cpp @@ -19,9 +19,9 @@ OutputVector range(const Node& node) { const auto inputs = node.get_ng_inputs(); CHECK_VALID_NODE(node, inputs.size() >= 3, "Minimum 3 inputs are required. Got: ", inputs.size()); - Output start{inputs[0]}; - Output stop{inputs[1]}; - Output step{inputs[2]}; + ov::Output start{inputs[0]}; + ov::Output stop{inputs[1]}; + ov::Output step{inputs[2]}; auto axes = std::make_shared(ov::element::i64, ov::Shape{}, std::vector{0}); diff --git a/src/frontends/onnx/frontend/src/op/reduce.cpp b/src/frontends/onnx/frontend/src/op/reduce.cpp index c8231ef5d8b3e1..2b8af72625bbcf 100644 --- a/src/frontends/onnx/frontend/src/op/reduce.cpp +++ b/src/frontends/onnx/frontend/src/op/reduce.cpp @@ -91,7 +91,7 @@ std::shared_ptr get_reduction_axes_from_attr(const Node& node) { template std::shared_ptr make_ng_reduction_op(const Node& node, - const Output& ng_input, + const ov::Output& ng_input, bool axes_as_attr = true) { const std::int64_t keepdims = node.get_attribute_value("keepdims", 1); @@ -112,13 +112,13 @@ OutputVector reduce_sum(const Node& node) { namespace set_1 { OutputVector reduce_log_sum(const Node& node) { - const Output sum_node = make_ng_reduction_op(node, node.get_ng_inputs().at(0)); + const ov::Output sum_node = make_ng_reduction_op(node, node.get_ng_inputs().at(0)); return {std::make_shared(sum_node)}; } OutputVector reduce_log_sum_exp(const Node& node) { const auto exp_node = std::make_shared(node.get_ng_inputs().at(0)); - const Output sum_node = make_ng_reduction_op(node, exp_node); + const ov::Output sum_node = make_ng_reduction_op(node, exp_node); return {std::make_shared(sum_node)}; } @@ -151,7 +151,7 @@ OutputVector reduce_sum(const Node& node) { } OutputVector reduce_sum_square(const Node& node) { - const auto input = Output{node.get_ng_inputs().at(0)}; + const auto input = ov::Output{node.get_ng_inputs().at(0)}; const auto square_node = std::make_shared(input, input); return {make_ng_reduction_op(node, square_node)}; } diff --git a/src/frontends/onnx/frontend/src/op/reshape.cpp b/src/frontends/onnx/frontend/src/op/reshape.cpp index f66a2a3b41891b..58c19c576cec08 100644 --- a/src/frontends/onnx/frontend/src/op/reshape.cpp +++ b/src/frontends/onnx/frontend/src/op/reshape.cpp @@ -19,7 +19,7 @@ OutputVector reshape(const Node& node) { OutputVector ng_inputs{node.get_ng_inputs()}; const auto data = ng_inputs.at(0); - Output pattern; + ov::Output pattern; bool special_zero = true; // Since opset 5 the target shape is provided as input if (ng_inputs.size() == 2) { diff --git a/src/frontends/onnx/frontend/src/op/reverse_sequence.cpp b/src/frontends/onnx/frontend/src/op/reverse_sequence.cpp index 15747bb7f8d9ed..5b03e079016abb 100644 --- a/src/frontends/onnx/frontend/src/op/reverse_sequence.cpp +++ b/src/frontends/onnx/frontend/src/op/reverse_sequence.cpp @@ -5,10 +5,10 @@ #include "op/reverse_sequence.hpp" #include "onnx_import/core/node.hpp" -#include "openvino/core/validation_util.hpp" #include "openvino/frontend/exception.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/reverse_sequence.hpp" +#include "validation_util.hpp" using namespace ov::op; @@ -26,13 +26,9 @@ OutputVector reverse_sequence(const Node& node) { const auto data_rank = data.get_partial_shape().rank(); const auto batch_axis = node.get_attribute_value("batch_axis", 1); - OPENVINO_SUPPRESS_DEPRECATED_START - const auto normalized_batch_axis = ov::normalize_axis(node.get_description(), batch_axis, data_rank); - OPENVINO_SUPPRESS_DEPRECATED_END + const auto normalized_batch_axis = ov::util::normalize_axis(node.get_description(), batch_axis, data_rank); const auto time_axis = node.get_attribute_value("time_axis", 0); - OPENVINO_SUPPRESS_DEPRECATED_START - const auto normalized_time_axis = ov::normalize_axis(node.get_description(), time_axis, data_rank); - OPENVINO_SUPPRESS_DEPRECATED_END + const auto normalized_time_axis = ov::util::normalize_axis(node.get_description(), time_axis, data_rank); FRONT_END_GENERAL_CHECK(normalized_batch_axis == 0 || normalized_batch_axis == 1, "Allowed values of the 'batch_axis' attribute for ReverseSequence " diff --git a/src/frontends/onnx/frontend/src/op/slice.cpp b/src/frontends/onnx/frontend/src/op/slice.cpp index 89d7494d29bd5b..c0e9e0216504df 100644 --- a/src/frontends/onnx/frontend/src/op/slice.cpp +++ b/src/frontends/onnx/frontend/src/op/slice.cpp @@ -27,7 +27,7 @@ OutputVector slice(const Node& node) { const bool axes_input_provided = inputs.size() >= 4 && !is_null(inputs.at(3)); const bool steps_input_provided = inputs.size() == 5 && !is_null(inputs.at(4)); - Output steps; + ov::Output steps; if (steps_input_provided) { steps = inputs.at(4); } else { @@ -48,7 +48,7 @@ OutputVector slice(const Node& node) { namespace set_1 { OutputVector slice(const Node& node) { - Output data = node.get_ng_inputs().at(0); + ov::Output data = node.get_ng_inputs().at(0); const auto starts_atr = node.get_attribute_value>("starts"); const auto ends = node.get_attribute_as_constant>("ends"); diff --git a/src/frontends/onnx/frontend/src/op/softmax.cpp b/src/frontends/onnx/frontend/src/op/softmax.cpp index 6650ce6cee67a0..ff11f78b034edd 100644 --- a/src/frontends/onnx/frontend/src/op/softmax.cpp +++ b/src/frontends/onnx/frontend/src/op/softmax.cpp @@ -7,7 +7,6 @@ #include #include "default_opset.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/frontend/exception.hpp" #include "ov_models/ov_builders/reshape.hpp" @@ -15,7 +14,7 @@ OPENVINO_SUPPRESS_DEPRECATED_START namespace ngraph { namespace onnx_import { namespace { -std::shared_ptr onnx_softmax(const Output data, const int64_t axis) { +std::shared_ptr onnx_softmax(const ov::Output data, const int64_t axis) { const auto coerced_data = ov::op::util::flatten(data, static_cast(axis)); const auto result = std::make_shared(coerced_data, 1); const auto data_shape = std::make_shared(data); diff --git a/src/frontends/onnx/frontend/src/op/topk.cpp b/src/frontends/onnx/frontend/src/op/topk.cpp index b19eb8f53ccd33..907333fa0e94de 100644 --- a/src/frontends/onnx/frontend/src/op/topk.cpp +++ b/src/frontends/onnx/frontend/src/op/topk.cpp @@ -11,14 +11,13 @@ #include "ngraph/node.hpp" #include "ngraph/shape.hpp" #include "ngraph/type/element_type.hpp" -#include "ngraph/validation_util.hpp" #include "openvino/frontend/exception.hpp" #include "utils/reshape.hpp" OPENVINO_SUPPRESS_DEPRECATED_START namespace { /// \return Return the second input to the TopK node reshaped to a scalar. -ngraph::Output get_k(const ngraph::onnx_import::Node& node) { +ov::Output get_k(const ngraph::onnx_import::Node& node) { auto k_node = node.get_ng_inputs().at(1); FRONT_END_GENERAL_CHECK(shape_size(k_node.get_shape()) == 1, "ONNX TopK operator: 'K' parameter must contain a single positive value.", diff --git a/src/frontends/onnx/frontend/src/op/transpose.cpp b/src/frontends/onnx/frontend/src/op/transpose.cpp index 5c3eb212ca289d..c8bcf1bf103e91 100644 --- a/src/frontends/onnx/frontend/src/op/transpose.cpp +++ b/src/frontends/onnx/frontend/src/op/transpose.cpp @@ -16,7 +16,7 @@ namespace onnx_import { namespace op { namespace set_1 { OutputVector transpose(const Node& node) { - Output data = node.get_ng_inputs().at(0); + ov::Output data = node.get_ng_inputs().at(0); auto permute_axes = node.get_attribute_value>("perm", {}); diff --git a/src/frontends/onnx/frontend/src/op/unsqueeze.cpp b/src/frontends/onnx/frontend/src/op/unsqueeze.cpp index 773e7225e7a506..a3a7dff618e53a 100644 --- a/src/frontends/onnx/frontend/src/op/unsqueeze.cpp +++ b/src/frontends/onnx/frontend/src/op/unsqueeze.cpp @@ -9,7 +9,6 @@ #include "default_opset.hpp" #include "exceptions.hpp" #include "ngraph/shape.hpp" -#include "ngraph/validation_util.hpp" OPENVINO_SUPPRESS_DEPRECATED_START namespace ngraph { diff --git a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.hpp b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.hpp index e4547e2366297e..08d4da34147353 100644 --- a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.hpp +++ b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.hpp @@ -35,7 +35,7 @@ class ArgMinMaxFactory { std::shared_ptr make_topk_subgraph(ov::op::v11::TopK::Mode mode) const; const std::int64_t m_keep_dims; - Output m_input_node; + ov::Output m_input_node; std::int64_t m_axis; std::int64_t m_select_last_index; }; diff --git a/src/frontends/onnx/frontend/src/utils/common.cpp b/src/frontends/onnx/frontend/src/utils/common.cpp index 206f0b0325127f..2767a5a1cfdf66 100644 --- a/src/frontends/onnx/frontend/src/utils/common.cpp +++ b/src/frontends/onnx/frontend/src/utils/common.cpp @@ -60,7 +60,7 @@ const ov::element::Type& get_ov_element_type(int64_t onnx_type) { OPENVINO_THROW("unsupported element type"); } -std::shared_ptr get_monotonic_range_along_node_rank(const Output& value, +std::shared_ptr get_monotonic_range_along_node_rank(const ov::Output& value, int64_t start_value, int64_t step) { if (value.get_partial_shape().rank().is_static()) { @@ -99,8 +99,8 @@ void validate_scalar_input(const char* input_name, template OutputVector handle_opset6_binary_op(const Node& node) { - const Output lhs_node = node.get_ng_inputs().at(0); - Output rhs_node = node.get_ng_inputs().at(1); + const ov::Output lhs_node = node.get_ng_inputs().at(0); + ov::Output rhs_node = node.get_ng_inputs().at(1); const bool broadcast = node.get_attribute_value("broadcast", 0); if (broadcast) { if (node.has_attribute("axis")) { @@ -149,11 +149,11 @@ bool is_failsafe_node(const std::shared_ptr& node) { const std::string OPTIMIZED_OUT_NODE = "OPTIMIZED_OUT_NODE"; -void mark_as_optimized_out(Output& node_output) { +void mark_as_optimized_out(ov::Output& node_output) { node_output.get_rt_info()[OPTIMIZED_OUT_NODE] = true; } -bool is_optimized_out(const Output& node_output) { +bool is_optimized_out(const ov::Output& node_output) { const auto& rt_info = node_output.get_rt_info(); return rt_info.find(OPTIMIZED_OUT_NODE) != rt_info.end(); } diff --git a/src/frontends/onnx/frontend/src/utils/common.hpp b/src/frontends/onnx/frontend/src/utils/common.hpp index 1209937a748b52..15edf2e8eb6f7e 100644 --- a/src/frontends/onnx/frontend/src/utils/common.hpp +++ b/src/frontends/onnx/frontend/src/utils/common.hpp @@ -60,7 +60,7 @@ std::vector get_monotonic_range(T end_value, T start_value = T{0}, T step = T /// \param[in] step The step value for the sequence. /// /// \return The node which represents monotonic sequence. -std::shared_ptr get_monotonic_range_along_node_rank(const Output& value, +std::shared_ptr get_monotonic_range_along_node_rank(const ov::Output& value, int64_t start_value = 0, int64_t step = 1); @@ -149,10 +149,10 @@ bool is_failsafe_node(const std::shared_ptr& node); /// \brief Marks an output of a node as "optimized out" meaning that during the import of an ONNX operation /// no OV nodes have been created and the ONNX operator returns its inputs as its outputs. /// This information is later used to add extra names to the tensors associated with such outputs. -void mark_as_optimized_out(Output& node_output); +void mark_as_optimized_out(ov::Output& node_output); /// \brief Checks if a given output was marked as optimized out byt the function above. -bool is_optimized_out(const Output& node_output); +bool is_optimized_out(const ov::Output& node_output); /// \brief Collect unsupported operators after convert_partially and all exceptions from translation process. std::string collect_translation_exceptions(const std::shared_ptr& partially_converted); diff --git a/src/frontends/onnx/frontend/src/utils/conv_factory.cpp b/src/frontends/onnx/frontend/src/utils/conv_factory.cpp index da42025c29bbd4..730cf1fe8f0d3d 100644 --- a/src/frontends/onnx/frontend/src/utils/conv_factory.cpp +++ b/src/frontends/onnx/frontend/src/utils/conv_factory.cpp @@ -16,8 +16,8 @@ namespace ngraph { namespace onnx_import { namespace conv_factory { -std::shared_ptr make_ng_convolution(const Output& data, - const Output& filters, +std::shared_ptr make_ng_convolution(const ov::Output& data, + const ov::Output& filters, const ov::Strides& strides, const ov::Strides& dilations, const ov::CoordinateDiff& padding_below, diff --git a/src/frontends/onnx/frontend/src/utils/conv_factory.hpp b/src/frontends/onnx/frontend/src/utils/conv_factory.hpp index c73b2e2832447f..849d90f7988bb1 100644 --- a/src/frontends/onnx/frontend/src/utils/conv_factory.hpp +++ b/src/frontends/onnx/frontend/src/utils/conv_factory.hpp @@ -11,8 +11,8 @@ namespace ngraph { namespace onnx_import { namespace conv_factory { -std::shared_ptr make_ng_convolution(const Output& data, - const Output& filters, +std::shared_ptr make_ng_convolution(const ov::Output& data, + const ov::Output& filters, const ov::Strides& strides, const ov::Strides& dilations, const ov::CoordinateDiff& padding_below, diff --git a/src/frontends/onnx/frontend/src/utils/convpool.cpp b/src/frontends/onnx/frontend/src/utils/convpool.cpp index d78d403c098bd7..cac377b42d891e 100644 --- a/src/frontends/onnx/frontend/src/utils/convpool.cpp +++ b/src/frontends/onnx/frontend/src/utils/convpool.cpp @@ -7,7 +7,6 @@ #include #include "exceptions.hpp" -#include "openvino/core/validation_util.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/divide.hpp" diff --git a/src/frontends/onnx/frontend/src/utils/convpool.hpp b/src/frontends/onnx/frontend/src/utils/convpool.hpp index e275271918ff2a..078a3975de4d7b 100644 --- a/src/frontends/onnx/frontend/src/utils/convpool.hpp +++ b/src/frontends/onnx/frontend/src/utils/convpool.hpp @@ -105,7 +105,7 @@ ov::op::PadType get_auto_pad(const Node& node); /// \param[in] groups Number of groups /// /// \return Reshaped filters input. -Output get_reshaped_filters(const Output& filters, int64_t groups); +ov::Output get_reshaped_filters(const ov::Output& filters, int64_t groups); } // namespace convpool } // namespace onnx_import diff --git a/src/frontends/onnx/frontend/src/utils/pooling_factory.cpp b/src/frontends/onnx/frontend/src/utils/pooling_factory.cpp index cec8a8f1f30599..adfaa488190aab 100644 --- a/src/frontends/onnx/frontend/src/utils/pooling_factory.cpp +++ b/src/frontends/onnx/frontend/src/utils/pooling_factory.cpp @@ -20,7 +20,7 @@ namespace onnx_import { namespace pooling { namespace { -std::shared_ptr transposition_axis_order(const Rank& input_rank) { +std::shared_ptr transposition_axis_order(const ov::Rank& input_rank) { FRONT_END_GENERAL_CHECK(input_rank.is_static(), "Generating column-major MaxPool results is supported only for inputs with static rank."); diff --git a/src/frontends/onnx/frontend/src/utils/recurrent.cpp b/src/frontends/onnx/frontend/src/utils/recurrent.cpp index 35344d8f64adb7..c01316f0a59e8d 100644 --- a/src/frontends/onnx/frontend/src/utils/recurrent.cpp +++ b/src/frontends/onnx/frontend/src/utils/recurrent.cpp @@ -87,10 +87,10 @@ OpInputMap::OpInputMap(const onnx_import::Node& node, std::size_t gates_count) { OpInputMap::OpInputMap(container_type&& map) : m_map(std::move(map)) {} -Output& OpInputMap::at(const OpInput& key) { +ov::Output& OpInputMap::at(const OpInput& key) { return m_map.at(key); } -const Output& OpInputMap::at(const OpInput& key) const { +const ov::Output& OpInputMap::at(const OpInput& key) const { return m_map.at(key); } diff --git a/src/frontends/onnx/frontend/src/utils/recurrent.hpp b/src/frontends/onnx/frontend/src/utils/recurrent.hpp index a5cab40cf266ac..f57399bf22f6dc 100644 --- a/src/frontends/onnx/frontend/src/utils/recurrent.hpp +++ b/src/frontends/onnx/frontend/src/utils/recurrent.hpp @@ -39,7 +39,7 @@ enum class OpInput { /// \brief This structure aggregates operator's inptus in a key-value map. /// struct OpInputMap { - using container_type = std::map>; + using container_type = std::map>; OPENVINO_SUPPRESS_DEPRECATED_START explicit OpInputMap(const onnx_import::Node& node, std::size_t gates_count); @@ -47,8 +47,8 @@ struct OpInputMap { OpInputMap(container_type&& map); virtual ~OpInputMap() = default; - Output& at(const OpInput& key); - const Output& at(const OpInput& key) const; + ov::Output& at(const OpInput& key); + const ov::Output& at(const OpInput& key) const; container_type m_map; }; diff --git a/src/frontends/onnx/frontend/src/utils/reshape.cpp b/src/frontends/onnx/frontend/src/utils/reshape.cpp index 67e7781d692030..0fc904c4e78c7e 100644 --- a/src/frontends/onnx/frontend/src/utils/reshape.cpp +++ b/src/frontends/onnx/frontend/src/utils/reshape.cpp @@ -70,7 +70,7 @@ std::vector infer_dimensions(const std::string& node_name, return inferred_dims; } -Output interpret_as_scalar(const Output& node) { +ov::Output interpret_as_scalar(const ov::Output& node) { Shape node_shape = node.get_shape(); // If node is already a scalar, return original @@ -91,8 +91,8 @@ Output interpret_as_scalar(const Output& node) { return ov::op::util::reshape(node, Shape{}); } -Output reshape_channel_shaped_node_to_nchw(const Output& node, - const Output& expected_rank) { +ov::Output reshape_channel_shaped_node_to_nchw(const ov::Output& node, + const ov::Output& expected_rank) { // Prepare tail shape (rank = conv.rank - 2): [1, 1, 1, 1, ... ] const auto one_const = v0::Constant::create(element::i64, Shape{1}, {1}); const auto two_const = v0::Constant::create(element::i64, Shape{1}, {2}); diff --git a/src/frontends/onnx/frontend/src/utils/reshape.hpp b/src/frontends/onnx/frontend/src/utils/reshape.hpp index 57d76d08823f29..198696310cb72c 100644 --- a/src/frontends/onnx/frontend/src/utils/reshape.hpp +++ b/src/frontends/onnx/frontend/src/utils/reshape.hpp @@ -43,7 +43,7 @@ std::vector infer_dimensions(const std::string& node_name, /// /// \return Original node or a node representing a reshape of the original. /// -Output interpret_as_scalar(const Output& node); +ov::Output interpret_as_scalar(const ov::Output& node); /// \brief Reshape node from shape {C} to {1, C, 1, 1,...} /// @@ -57,8 +57,8 @@ Output interpret_as_scalar(const Output& node); /// /// \return Original node or a node representing a reshape of the original. /// -Output reshape_channel_shaped_node_to_nchw(const Output& node, - const Output& expected_rank); +ov::Output reshape_channel_shaped_node_to_nchw(const ov::Output& node, + const ov::Output& expected_rank); } // namespace reshape } // namespace onnx_import diff --git a/src/frontends/onnx/frontend/src/utils/variadic.hpp b/src/frontends/onnx/frontend/src/utils/variadic.hpp index 50e8a5ebadcff5..427e3095fffb41 100644 --- a/src/frontends/onnx/frontend/src/utils/variadic.hpp +++ b/src/frontends/onnx/frontend/src/utils/variadic.hpp @@ -29,7 +29,8 @@ inline OutputVector make_ng_variadic_op( const OutputVector ng_inputs{node.get_ng_inputs()}; // Templated binary operation - Creates Add, Minimum, Maximum, etc. - const auto binary_operation = [&auto_broadcast](const Output& arg0, const Output& arg1) { + const auto binary_operation = [&auto_broadcast](const ov::Output& arg0, + const ov::Output& arg1) { return std::make_shared(arg0, arg1, auto_broadcast); }; diff --git a/src/frontends/tensorflow_common/src/helper_transforms/block_lstm_replacer.cpp b/src/frontends/tensorflow_common/src/helper_transforms/block_lstm_replacer.cpp index d2931eef4fc986..5cba1bc5fbf7f6 100644 --- a/src/frontends/tensorflow_common/src/helper_transforms/block_lstm_replacer.cpp +++ b/src/frontends/tensorflow_common/src/helper_transforms/block_lstm_replacer.cpp @@ -8,7 +8,19 @@ #include #include "helper_ops/block_lstm.hpp" -#include "openvino/opsets/opset9.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/gather_nd.hpp" +#include "openvino/op/lstm_sequence.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/strided_slice.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "openvino/op/variadic_split.hpp" #include "openvino/pass/pattern/matcher.hpp" #include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -17,8 +29,8 @@ using namespace std; using namespace ov::pass; +using namespace ov::op; using namespace ov::pass::pattern; -using namespace ov::opset9; using namespace ov::frontend::tensorflow; namespace { @@ -44,8 +56,8 @@ pass::BlockLSTMReplacer::BlockLSTMReplacer() { // Pattern 1: BlockLSTM with last state cell output (BlockLSTM -> Concat -> GatherND) // used in DeepSpeech model auto block_lstm_1 = pattern::wrap_type(can_have_outputs({1, 6})); - auto states_cell_1 = pattern::wrap_type({pattern::any_input(), block_lstm_1}); - auto pattern1 = pattern::wrap_type({states_cell_1, pattern::any_input()}); + auto states_cell_1 = pattern::wrap_type({pattern::any_input(), block_lstm_1}); + auto pattern1 = pattern::wrap_type({states_cell_1, pattern::any_input()}); // Pattern 2: BlockLSTM with just one output, concatenated hidden states (BlockLSTM) auto pattern2 = pattern::wrap_type(can_have_outputs({6})); @@ -64,7 +76,7 @@ pass::BlockLSTMReplacer::BlockLSTMReplacer() { ov::NodeVector rt_info_from; if (is_pattern1) { block_lstm_node = std::dynamic_pointer_cast(pattern_map.at(block_lstm_1)); - auto concat_node = std::dynamic_pointer_cast(pattern_map.at(states_cell_1)); + auto concat_node = std::dynamic_pointer_cast(pattern_map.at(states_cell_1)); if (!concat_node || concat_node->get_axis() != 0) { // timestep is the first dimension return false; @@ -107,95 +119,95 @@ pass::BlockLSTMReplacer::BlockLSTMReplacer() { auto bias = block_lstm_node->input_value(8); // retrieve input_size - auto x_shape = rg.make(x, element::i64); - auto ss_start = rg.make(element::i64, Shape{1}, 2); - auto ss_stop = rg.make(element::i64, Shape{1}, 3); - auto ss_step = rg.make(element::i64, Shape{1}, 1); - auto input_size = rg.make(x_shape, - ss_start, - ss_stop, - ss_step, - std::vector{0}, - std::vector{0}); + auto x_shape = rg.make(x, element::i64); + auto ss_start = rg.make(element::i64, Shape{1}, 2); + auto ss_stop = rg.make(element::i64, Shape{1}, 3); + auto ss_step = rg.make(element::i64, Shape{1}, 1); + auto input_size = rg.make(x_shape, + ss_start, + ss_stop, + ss_step, + std::vector{0}, + std::vector{0}); // retrieve the batch size // now x is in a format [time_len, batch_size, input_size] - auto ss_start2 = rg.make(element::i64, Shape{1}, 1); - auto ss_stop2 = rg.make(element::i64, Shape{1}, 2); - auto batch_size = rg.make(x_shape, - ss_start2, - ss_stop2, - ss_step, - std::vector{0}, - std::vector{0}); + auto ss_start2 = rg.make(element::i64, Shape{1}, 1); + auto ss_stop2 = rg.make(element::i64, Shape{1}, 2); + auto batch_size = rg.make(x_shape, + ss_start2, + ss_stop2, + ss_step, + std::vector{0}, + std::vector{0}); auto hidden_size_const = - rg.make(element::i64, Shape{1}, std::vector{hidden_size.get_length()}); + rg.make(element::i64, Shape{1}, std::vector{hidden_size.get_length()}); // adjust weights and bias // 1. reshape weights and bias to highlight channel dimension - auto new_weight_shape = rg.make(element::i64, Shape{3}, std::vector{0, 4, -1}); - auto weight_reshape = rg.make(weights, new_weight_shape, true); - auto new_bias_shape = rg.make(element::i64, Shape{2}, std::vector{4, -1}); - auto bias_reshape = rg.make(bias, new_bias_shape, true); + auto new_weight_shape = rg.make(element::i64, Shape{3}, std::vector{0, 4, -1}); + auto weight_reshape = rg.make(weights, new_weight_shape, true); + auto new_bias_shape = rg.make(element::i64, Shape{2}, std::vector{4, -1}); + auto bias_reshape = rg.make(bias, new_bias_shape, true); // 2. reorder gates icfo --> fico for both weights and biases - auto reorder_const = rg.make(element::i64, Shape{4}, std::vector{2, 0, 1, 3}); - auto weights_axis = rg.make(element::i64, Shape{}, 1); - auto weights_reorder = rg.make(weight_reshape, reorder_const, weights_axis); - auto bias_axis = rg.make(element::i64, Shape{}, 0); - auto bias_reorder = rg.make(bias_reshape, reorder_const, bias_axis); + auto reorder_const = rg.make(element::i64, Shape{4}, std::vector{2, 0, 1, 3}); + auto weights_axis = rg.make(element::i64, Shape{}, 1); + auto weights_reorder = rg.make(weight_reshape, reorder_const, weights_axis); + auto bias_axis = rg.make(element::i64, Shape{}, 0); + auto bias_reorder = rg.make(bias_reshape, reorder_const, bias_axis); // 3. shift_const.value should be added to the first 1 / 4th part of the biases(f - gate : 0) - auto shift_const = rg.make(element::f32, Shape{}, block_lstm_node->get_forget_bias()); - auto bias_split_lens = rg.make(element::i64, Shape{2}, std::vector{1, 3}); - auto bias_split = rg.make(bias_reorder, bias_axis, bias_split_lens); - auto bias_first_shift = rg.make(bias_split->output(0), shift_const); - auto bias_shift = rg.make(OutputVector{bias_first_shift, bias_split->output(1)}, 0); + auto shift_const = rg.make(element::f32, Shape{}, block_lstm_node->get_forget_bias()); + auto bias_split_lens = rg.make(element::i64, Shape{2}, std::vector{1, 3}); + auto bias_split = rg.make(bias_reorder, bias_axis, bias_split_lens); + auto bias_first_shift = rg.make(bias_split->output(0), shift_const); + auto bias_shift = rg.make(OutputVector{bias_first_shift, bias_split->output(1)}, 0); // 4. return to the original shapes - auto new_weight_shape2 = rg.make(element::i64, Shape{2}, std::vector{0, -1}); - auto weight_reshape2 = rg.make(weights_reorder, new_weight_shape2, true); + auto new_weight_shape2 = rg.make(element::i64, Shape{2}, std::vector{0, -1}); + auto weight_reshape2 = rg.make(weights_reorder, new_weight_shape2, true); // 5. normalize weights and bias - auto transpose_order = rg.make(element::i64, Shape{2}, std::vector{1, 0}); - auto new_bias_shape2 = rg.make(element::i64, Shape{1}, std::vector{-1}); - auto weights_normalize = rg.make(weight_reshape2, transpose_order); - auto bias_normalized = rg.make(bias_shift, new_bias_shape2, true); + auto transpose_order = rg.make(element::i64, Shape{2}, std::vector{1, 0}); + auto new_bias_shape2 = rg.make(element::i64, Shape{1}, std::vector{-1}); + auto weights_normalize = rg.make(weight_reshape2, transpose_order); + auto bias_normalized = rg.make(bias_shift, new_bias_shape2, true); // 6. split weights into W and R inputs - auto WR_split_axis = rg.make(element::i64, Shape{}, 1); - auto WR_split_lens = rg.make(OutputVector{input_size, hidden_size_const}, 0); - auto WR_split = rg.make(weights_normalize, WR_split_axis, WR_split_lens); + auto WR_split_axis = rg.make(element::i64, Shape{}, 1); + auto WR_split_lens = rg.make(OutputVector{input_size, hidden_size_const}, 0); + auto WR_split = rg.make(weights_normalize, WR_split_axis, WR_split_lens); // 7. unsqueeze weights and bias to have a dimension for a number of directions - auto num_direct_axis = rg.make(element::i64, Shape{1}, std::vector{0}); - auto W = rg.make(WR_split->output(0), num_direct_axis); - auto R = rg.make(WR_split->output(1), num_direct_axis); - auto B = rg.make(bias_normalized, num_direct_axis); + auto num_direct_axis = rg.make(element::i64, Shape{1}, std::vector{0}); + auto W = rg.make(WR_split->output(0), num_direct_axis); + auto R = rg.make(WR_split->output(1), num_direct_axis); + auto B = rg.make(bias_normalized, num_direct_axis); // normalize initial hidden and cell states - auto unsqueeze_axis = rg.make(element::i64, Shape{1}, std::vector{1}); - auto init_hidden_state = rg.make(h_prev, unsqueeze_axis); - auto init_cell_state = rg.make(cs_prev, unsqueeze_axis); + auto unsqueeze_axis = rg.make(element::i64, Shape{1}, std::vector{1}); + auto init_hidden_state = rg.make(h_prev, unsqueeze_axis); + auto init_cell_state = rg.make(cs_prev, unsqueeze_axis); // prepare sequence length input for LSTMSequence - auto seq_len_max_adjusted = rg.make(seq_len_max, batch_size); + auto seq_len_max_adjusted = rg.make(seq_len_max, batch_size); // prepare input data since LSTMSequence accept it in a format [batch_size, time_len, input_size] - auto x_order = rg.make(element::i64, Shape{3}, std::vector{1, 0, 2}); - auto x_adjusted = rg.make(x, x_order); + auto x_order = rg.make(element::i64, Shape{3}, std::vector{1, 0, 2}); + auto x_adjusted = rg.make(x, x_order); // create LSTMSequence node and reconnect inputs and normalized weights and bias - auto lstm_sequence = rg.make(x_adjusted, - init_hidden_state, - init_cell_state, - seq_len_max_adjusted, - W, - R, - B, - hidden_size.get_length(), - LSTMSequence::direction::FORWARD); + auto lstm_sequence = rg.make(x_adjusted, + init_hidden_state, + init_cell_state, + seq_len_max_adjusted, + W, + R, + B, + hidden_size.get_length(), + v5::LSTMSequence::direction::FORWARD); if (block_lstm_node->output(1).get_target_inputs().size() > 0) { // adjust output with the last state cell and connect to the main graph // squeeze extra dimension - num_directions - auto squeeze_axis = rg.make(element::i64, Shape{1}, std::vector{1}); - auto squeeze_last_state_cell = rg.make(lstm_sequence->output(2), squeeze_axis); + auto squeeze_axis = rg.make(element::i64, Shape{1}, std::vector{1}); + auto squeeze_last_state_cell = rg.make(lstm_sequence->output(2), squeeze_axis); // preserve names of the node and the output tensor squeeze_last_state_cell->set_friendly_name(last_state_c_node->get_friendly_name()); @@ -207,11 +219,13 @@ pass::BlockLSTMReplacer::BlockLSTMReplacer() { // adjust output of concatenated of hidden states from LSTMSequence // to have it in a format [time_len, batch_size, hidden_size] // 1. squeeze extra dimension - num_directions - auto squeeze_axis = rg.make(element::i64, Shape{1}, std::vector{1}); - auto squeeze_output_hidden_states = rg.make(lstm_sequence->output(0), squeeze_axis); + auto squeeze_axis = rg.make(element::i64, Shape{1}, std::vector{1}); + auto squeeze_output_hidden_states = rg.make(lstm_sequence->output(0), squeeze_axis); // 2. transpose the output to rotate batch and time dimensions - auto output_hidden_states_order = rg.make(element::i64, Shape{3}, std::vector{1, 0, 2}); - auto output_hidden_states = rg.make(squeeze_output_hidden_states, output_hidden_states_order); + auto output_hidden_states_order = + rg.make(element::i64, Shape{3}, std::vector{1, 0, 2}); + auto output_hidden_states = + rg.make(squeeze_output_hidden_states, output_hidden_states_order); // preserve names of the node and the output tensor output_hidden_states->set_friendly_name(block_lstm_node->get_friendly_name() + ":6"); diff --git a/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp b/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp index d16152ca492246..2e32490e565b9a 100644 --- a/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp +++ b/src/frontends/tensorflow_common/src/helper_transforms/const_to_result_remover.cpp @@ -5,10 +5,10 @@ #include "helper_transforms/const_to_result_remover.hpp" #include "helper_ops/unsupported_constant.hpp" -#include "openvino/opsets/opset10.hpp" +#include "openvino/op/constant.hpp" using namespace std; -using namespace ov::opset10; +using namespace ov::op; namespace ov { namespace frontend { @@ -23,7 +23,7 @@ bool ConstToResultRemover::run_on_model(const std::shared_ptr& m) { // also, find isolated Constant->Result sub-graphs to remove for (const auto& result : m->get_results()) { auto unsupported_const = as_type_ptr(result->get_input_node_shared_ptr(0)); - auto const_node = as_type_ptr(result->get_input_node_shared_ptr(0)); + auto const_node = as_type_ptr(result->get_input_node_shared_ptr(0)); if (unsupported_const || const_node) { results_to_remove.push_back(result); } diff --git a/src/frontends/tensorflow_common/src/helper_transforms/embedding_segments_feature_fusing.cpp b/src/frontends/tensorflow_common/src/helper_transforms/embedding_segments_feature_fusing.cpp index 91fa1d4be4d3fa..bbe02e5bd39352 100644 --- a/src/frontends/tensorflow_common/src/helper_transforms/embedding_segments_feature_fusing.cpp +++ b/src/frontends/tensorflow_common/src/helper_transforms/embedding_segments_feature_fusing.cpp @@ -9,7 +9,22 @@ #include "helper_ops/sparse_fill_empty_rows.hpp" #include "helper_ops/sparse_segment_ops.hpp" -#include "openvino/opsets/opset10.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/embedding_segments_sum.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/greater_eq.hpp" +#include "openvino/op/non_zero.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/split.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/strided_slice.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/op/tile.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/unique.hpp" +#include "openvino/op/unsqueeze.hpp" #include "openvino/pass/pattern/matcher.hpp" #include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -18,7 +33,7 @@ using namespace std; using namespace ov::pass; -using namespace ov::opset10; +using namespace ov::op; ov::frontend::tensorflow::pass::EmbeddingSegmentSingleFeatureFusion::EmbeddingSegmentSingleFeatureFusion() { // The transformation looks for pattern (sub-graph) that performs extraction of embedding vectors from the @@ -30,19 +45,19 @@ ov::frontend::tensorflow::pass::EmbeddingSegmentSingleFeatureFusion::EmbeddingSe auto dense_shape_pattern = ov::pass::pattern::any_input(); auto default_value_pattern = ov::pass::pattern::any_input(); - auto greaterequal0_const = make_shared(element::i64, Shape{}, vector{0}); - auto greaterequal0 = std::make_shared(input_values_pattern, greaterequal0_const); - auto where0 = make_shared(make_shared(greaterequal0), - make_shared(element::i64, Shape{2}, vector{1, 0})); + auto greaterequal0_const = make_shared(element::i64, Shape{}, vector{0}); + auto greaterequal0 = std::make_shared(input_values_pattern, greaterequal0_const); + auto where0 = make_shared(make_shared(greaterequal0), + make_shared(element::i64, Shape{2}, vector{1, 0})); - auto reshape0_shape = make_shared(element::i32, Shape{1}, vector{-1}); - auto reshape0 = make_shared(where0, reshape0_shape, false); - auto gather0_1 = make_shared(input_indices_pattern, - reshape0, - make_shared(element::i32, Shape{}, vector{0})); - auto gather0_2 = make_shared(input_values_pattern, - reshape0, - make_shared(element::i32, Shape{}, vector{0})); + auto reshape0_shape = make_shared(element::i32, Shape{1}, vector{-1}); + auto reshape0 = make_shared(where0, reshape0_shape, false); + auto gather0_1 = make_shared(input_indices_pattern, + reshape0, + make_shared(element::i32, Shape{}, vector{0})); + auto gather0_2 = make_shared(input_values_pattern, + reshape0, + make_shared(element::i32, Shape{}, vector{0})); // SparseFillEmptyRows outputs segment ids along with indices for each segment. Indices correspond to vectors from // embedding table if some segment ids are not specified, SparseFillEmptyRows generate default indice for this @@ -52,75 +67,78 @@ ov::frontend::tensorflow::pass::EmbeddingSegmentSingleFeatureFusion::EmbeddingSe dense_shape_pattern->output(0), default_value_pattern->output(0)); - auto strided_slice = make_shared(sparse_fill_empty_rows->output(0), - make_shared(element::i32, Shape{2}, vector{0, 0}), - make_shared(element::i32, Shape{2}, vector{0, 1}), - make_shared(element::i32, Shape{2}, vector{1, 1}), - std::vector{1}, - std::vector{1}); - auto cast = make_shared(strided_slice, ov::element::i64); + auto strided_slice = + make_shared(sparse_fill_empty_rows->output(0), + make_shared(element::i32, Shape{2}, vector{0, 0}), + make_shared(element::i32, Shape{2}, vector{0, 1}), + make_shared(element::i32, Shape{2}, vector{1, 1}), + std::vector{1}, + std::vector{1}); + auto cast = make_shared(strided_slice, ov::element::i64); - auto unique = make_shared(sparse_fill_empty_rows->output(1), false, ov::element::i32); - auto gather = make_shared(embedding_table_pattern, - unique->output(0), - make_shared(element::i64, Shape{}, vector{0})); + auto unique = make_shared(sparse_fill_empty_rows->output(1), false, ov::element::i32); + auto gather = make_shared(embedding_table_pattern, + unique->output(0), + make_shared(element::i64, Shape{}, vector{0})); // SparseSegmentSum sums-up extracted embedding vectors by indices for each segment auto sparse_segment_op = make_shared(gather->output(0), unique->output(2), cast->output(0)); - auto shape = make_shared(sparse_segment_op, ov::element::i32); - auto strided_slice_for_shape_begin = make_shared(element::i32, Shape{1}, vector{1}); - auto strided_slice_for_shape_end = make_shared(element::i32, Shape{1}, vector{2}); - auto strided_slice_for_shape_step = make_shared(element::i32, Shape{1}, vector{1}); - auto strided_slice_for_shape = make_shared(shape, - strided_slice_for_shape_begin, - strided_slice_for_shape_end, - strided_slice_for_shape_step, - std::vector{0}, - std::vector{0}, - std::vector{}, - std::vector{1}); - auto pack = make_shared( - OutputVector{make_shared(make_shared(element::i32, Shape{}, 1), - make_shared(element::i64, Shape{}, 0)), - make_shared(strided_slice_for_shape, make_shared(element::i64, Shape{}, 0))}, + auto shape = make_shared(sparse_segment_op, ov::element::i32); + auto strided_slice_for_shape_begin = make_shared(element::i32, Shape{1}, vector{1}); + auto strided_slice_for_shape_end = make_shared(element::i32, Shape{1}, vector{2}); + auto strided_slice_for_shape_step = make_shared(element::i32, Shape{1}, vector{1}); + auto strided_slice_for_shape = make_shared(shape, + strided_slice_for_shape_begin, + strided_slice_for_shape_end, + strided_slice_for_shape_step, + std::vector{0}, + std::vector{0}, + std::vector{}, + std::vector{1}); + auto pack = make_shared( + OutputVector{ + make_shared(make_shared(element::i32, Shape{}, 1), + make_shared(element::i64, Shape{}, 0)), + make_shared(strided_slice_for_shape, make_shared(element::i64, Shape{}, 0))}, 0); - auto reshape = make_shared(sparse_fill_empty_rows->output(2), - make_shared(element::i32, Shape{2}, vector{-1, 1}), - false); - auto tile = make_shared(reshape, pack); + auto reshape = make_shared(sparse_fill_empty_rows->output(2), + make_shared(element::i32, Shape{2}, vector{-1, 1}), + false); + auto tile = make_shared(reshape, pack); - auto zero_int_const = make_shared(element::i32, Shape{1}, 0); - auto one_int_const = make_shared(element::i32, Shape{1}, 1); - Output shape_of = make_shared(sparse_segment_op, element::i32); - shape_of = make_shared(OutputVector{one_int_const, shape_of}, 0); + auto zero_int_const = make_shared(element::i32, Shape{1}, 0); + auto one_int_const = make_shared(element::i32, Shape{1}, 1); + Output shape_of = make_shared(sparse_segment_op, element::i32); + shape_of = make_shared(OutputVector{one_int_const, shape_of}, 0); Output zeros_like = - make_shared(make_shared(ov::element::f32, Shape{1}, std::vector{0}), shape_of); - zeros_like = make_shared(zeros_like, zero_int_const); + make_shared(make_shared(ov::element::f32, Shape{1}, std::vector{0}), + shape_of); + zeros_like = make_shared(zeros_like, zero_int_const); // compute number of dimensions to unsqueeze the condition auto cond_rank = compute_subgraph_scalar_rank(tile, element::i32); auto x_rank = compute_subgraph_scalar_rank(zeros_like, element::i32); - auto num_new_axes = make_shared(x_rank, cond_rank); + auto num_new_axes = make_shared(x_rank, cond_rank); // generate a new shape for the condition - auto const_one = make_shared(element::i32, Shape{1}, 1); - auto new_subshape = make_shared(const_one, num_new_axes); - auto cond_shape = make_shared(tile, element::i32); + auto const_one = make_shared(element::i32, Shape{1}, 1); + auto new_subshape = make_shared(const_one, num_new_axes); + auto cond_shape = make_shared(tile, element::i32); // use extra dimensions in the begin to avoid concatenation of empty tensors that is not supported by Concat // remove this workaround once 100671 is resolved - auto const_1 = make_shared(element::i32, Shape{1}, 1); - auto new_cond_shape = make_shared(OutputVector{const_1, cond_shape, new_subshape}, 0); + auto const_1 = make_shared(element::i32, Shape{1}, 1); + auto new_cond_shape = make_shared(OutputVector{const_1, cond_shape, new_subshape}, 0); // prepare the condition to have the same rank as operands `x` and `y` - auto prep_cond = make_shared(tile, new_cond_shape, false)->output(0); + auto prep_cond = make_shared(tile, new_cond_shape, false)->output(0); // squeeze prep_cond by one extra dimension specially added - auto const_0 = make_shared(element::i32, Shape{1}, 0); - prep_cond = make_shared(prep_cond, const_0); + auto const_0 = make_shared(element::i32, Shape{1}, 0); + prep_cond = make_shared(prep_cond, const_0); - auto select_pattern = make_shared(pattern_map.at(select_pattern).get_node_shared_ptr()); + auto select = as_type_ptr(pattern_map.at(select_pattern).get_node_shared_ptr()); // prepare input of indices for EmbeddingSegment operation - auto cast_indices = make_shared(input_values, ov::element::i32); + auto cast_indices = make_shared(input_values, ov::element::i32); // prepare input of segment indices for EmbeddingSegment operation - auto split_for_indices_axis = make_shared(ov::element::i64, ov::Shape{}, 1); - auto split_for_indices = make_shared(input_indices, split_for_indices_axis, 2); - auto squeeze_for_indices = make_shared(split_for_indices->output(0), - make_shared(ov::element::i64, ov::Shape{1}, 1)); - auto cast_segment_ids = make_shared(squeeze_for_indices, ov::element::i32); + auto split_for_indices_axis = make_shared(ov::element::i64, ov::Shape{}, 1); + auto split_for_indices = make_shared(input_indices, split_for_indices_axis, 2); + auto squeeze_for_indices = + make_shared(split_for_indices->output(0), + make_shared(ov::element::i64, ov::Shape{1}, 1)); + auto cast_segment_ids = make_shared(squeeze_for_indices, ov::element::i32); // prepare input of a number of segments for EmbeddingSegment operation - auto split_for_dense_shape_axis = make_shared(ov::element::i64, ov::Shape{}, 0); - auto split_for_dense_shape = make_shared(dense_shape, split_for_dense_shape_axis, 2); - auto squeeze_to_scalar_axis = make_shared(ov::element::i64, ov::Shape{1}, 0); - auto squeeze_to_scalar = make_shared(split_for_dense_shape, squeeze_to_scalar_axis); - auto cast_num_segments = make_shared(squeeze_to_scalar, ov::element::i32); + auto split_for_dense_shape_axis = make_shared(ov::element::i64, ov::Shape{}, 0); + auto split_for_dense_shape = make_shared(dense_shape, split_for_dense_shape_axis, 2); + auto squeeze_to_scalar_axis = make_shared(ov::element::i64, ov::Shape{1}, 0); + auto squeeze_to_scalar = make_shared(split_for_dense_shape, squeeze_to_scalar_axis); + auto cast_num_segments = make_shared(squeeze_to_scalar, ov::element::i32); // prepare the default value for EmbeddingSegment operation - auto cast_default_value = make_shared(default_value, ov::element::i32); + auto cast_default_value = make_shared(default_value, ov::element::i32); // TODO : remove Cast nodes once we start to support EmbeddingSegmentSum(new version) with segment_ids, // indices, and num_segments of different integer type. // Because the real cases show that it is possible to have it in TensorFlow - auto embedding_segments_op = make_shared(embedding_table, - cast_indices, - cast_segment_ids, - cast_num_segments, - cast_default_value); + auto embedding_segments_op = make_shared(embedding_table, + cast_indices, + cast_segment_ids, + cast_num_segments, + cast_default_value); embedding_segments_op->set_friendly_name(select->get_friendly_name()); ov::copy_runtime_info(select, embedding_segments_op); ov::replace_node(select, embedding_segments_op); diff --git a/src/frontends/tensorflow_common/src/helper_transforms/gru_block_cell_replacer.cpp b/src/frontends/tensorflow_common/src/helper_transforms/gru_block_cell_replacer.cpp index 5bb65a1dae9bcc..d689eeba5804e3 100644 --- a/src/frontends/tensorflow_common/src/helper_transforms/gru_block_cell_replacer.cpp +++ b/src/frontends/tensorflow_common/src/helper_transforms/gru_block_cell_replacer.cpp @@ -8,7 +8,13 @@ #include #include "helper_ops/gru_block_cell.hpp" -#include "openvino/opsets/opset9.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gru_cell.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/split.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/variadic_split.hpp" #include "openvino/pass/pattern/matcher.hpp" #include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -17,7 +23,7 @@ using namespace std; using namespace ov::pass; -using namespace ov::opset9; +using namespace ov::op; using namespace ov::frontend::tensorflow; pass::GRUBlockCellReplacer::GRUBlockCellReplacer() { @@ -54,41 +60,45 @@ pass::GRUBlockCellReplacer::GRUBlockCellReplacer() { auto b_c = gru_block_cell_node->input_value(5); // retrive input_size and hidden_size - auto x_shape = rg.make(x, element::i64); - auto ss_start = rg.make(element::i64, Shape{1}, 1); - auto ss_end = rg.make(element::i64, Shape{1}, 2); - auto ss_step = rg.make(element::i64, Shape{1}, 1); - auto input_size = rg.make(x_shape, ss_start, ss_end, ss_step); - auto h_prev_shape = rg.make(h_prev, element::i64); - auto hidden_size = rg.make(h_prev_shape, ss_start, ss_end, ss_step); + auto x_shape = rg.make(x, element::i64); + auto ss_start = rg.make(element::i64, Shape{1}, 1); + auto ss_end = rg.make(element::i64, Shape{1}, 2); + auto ss_step = rg.make(element::i64, Shape{1}, 1); + auto input_size = rg.make(x_shape, ss_start, ss_end, ss_step); + auto h_prev_shape = rg.make(h_prev, element::i64); + auto hidden_size = rg.make(h_prev_shape, ss_start, ss_end, ss_step); // prepare weights input // TensorFlow provides weights in a format w_ru and w_c, where // z or u - update, r - reset, c or h - hidden (connection) // OpenVINO GRUCell accepts weights in a format w_zrh (or w_urс) // 1. split w_ru into w_r and w_u - auto split_w_ru = rg.make(w_ru, rg.make(element::i64, Shape{}, 1), 2); + auto split_w_ru = rg.make(w_ru, rg.make(element::i64, Shape{}, 1), 2); // 2. concatenate different parts of weights into w_zrh (or w_urс) - auto w_urc = rg.make(OutputVector{split_w_ru->output(1), split_w_ru->output(0), w_c}, 1); + auto w_urc = rg.make(OutputVector{split_w_ru->output(1), split_w_ru->output(0), w_c}, 1); // prepare bias in the same way - auto split_b_ru = rg.make(b_ru, rg.make(element::i64, Shape{}, 0), 2); - auto b_urc = rg.make(OutputVector{split_b_ru->output(1), split_b_ru->output(0), b_c}, 0); + auto split_b_ru = rg.make(b_ru, rg.make(element::i64, Shape{}, 0), 2); + auto b_urc = rg.make(OutputVector{split_b_ru->output(1), split_b_ru->output(0), b_c}, 0); // transpose weights // the current shape - [input_size + hidden_size, 3 * hidden_size] // we need the shape [3 * hidden_size, input_size + hidden_size] // in order to split WR into W and R - auto transpose_order = rg.make(element::i64, Shape{2}, std::vector{1, 0}); - auto w_urc_transpose = rg.make(w_urc, transpose_order); + auto transpose_order = rg.make(element::i64, Shape{2}, std::vector{1, 0}); + auto w_urc_transpose = rg.make(w_urc, transpose_order); // split combined weights WR into W and R - auto split_axis = rg.make(element::i64, Shape{}, 1); - auto split_nums = rg.make(OutputVector{input_size, hidden_size}, 0); - auto split_WR = rg.make(w_urc_transpose, split_axis, split_nums); - - auto gru_cell = - rg.make(x, h_prev, split_WR->output(0), split_WR->output(1), b_urc, m_hidden_size.get_length()); + auto split_axis = rg.make(element::i64, Shape{}, 1); + auto split_nums = rg.make(OutputVector{input_size, hidden_size}, 0); + auto split_WR = rg.make(w_urc_transpose, split_axis, split_nums); + + auto gru_cell = rg.make(x, + h_prev, + split_WR->output(0), + split_WR->output(1), + b_urc, + m_hidden_size.get_length()); // preserve names of the node and the output tensor gru_cell->set_friendly_name(m.get_match_root()->get_friendly_name() + ":3"); diff --git a/src/frontends/tensorflow_common/src/helper_transforms/saved_model_unused_remover.cpp b/src/frontends/tensorflow_common/src/helper_transforms/saved_model_unused_remover.cpp index 681c1712e48a2a..db5501147b635a 100644 --- a/src/frontends/tensorflow_common/src/helper_transforms/saved_model_unused_remover.cpp +++ b/src/frontends/tensorflow_common/src/helper_transforms/saved_model_unused_remover.cpp @@ -4,10 +4,10 @@ #include "helper_transforms/saved_model_unused_remover.hpp" -#include "openvino/opsets/opset8.hpp" +#include "openvino/op/parameter.hpp" using namespace std; -using namespace ov::opset8; +using namespace ov::op; namespace ov { namespace frontend { @@ -33,7 +33,7 @@ bool SavedModelUnusedRemover::run_on_model(const std::shared_ptr& m) continue; } - auto param = as_type_ptr(result->get_input_node_shared_ptr(0)); + auto param = as_type_ptr(result->get_input_node_shared_ptr(0)); if (param) { isUsed = false; for (size_t i = 0; i < param->get_output_size(); ++i) { diff --git a/src/inference/src/cnn_network_ngraph_impl.cpp b/src/inference/src/cnn_network_ngraph_impl.cpp index fc665a97cc62a7..c9e3911cd1ca1d 100644 --- a/src/inference/src/cnn_network_ngraph_impl.cpp +++ b/src/inference/src/cnn_network_ngraph_impl.cpp @@ -37,7 +37,7 @@ using details::CNNNetworkNGraphImpl; using InferenceEngine::details::CNNNetworkNGraphImpl; using ngraph::Function; -void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::Node>& output, +void CNNNetworkNGraphImpl::createDataForResult(const ::ov::Output<::ngraph::Node>& output, const std::string& outName, DataPtr& ptr) { const auto isCompatible = [](int64_t size, const Layout& l) -> bool { @@ -297,7 +297,7 @@ StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, return DescriptionBuffer(NOT_FOUND, resp) << "Cannot add output! Layer " << layerName << " wasn't found!"; } -void CNNNetworkNGraphImpl::addOutput(const ::ngraph::Output<::ngraph::Node>& output) { +void CNNNetworkNGraphImpl::addOutput(const ::ov::Output<::ngraph::Node>& output) { auto dataName = ov::op::util::create_ie_output_name(output); DataPtr data; if (_data.count(dataName)) @@ -339,7 +339,7 @@ void CNNNetworkNGraphImpl::reshape() { reshape({}); } -StatusCode CNNNetworkNGraphImpl::reshape(const std::map& inputShapes, +StatusCode CNNNetworkNGraphImpl::reshape(const std::map& inputShapes, ResponseDesc* responseDesc) noexcept { try { if (inputShapes.empty()) @@ -364,7 +364,7 @@ StatusCode CNNNetworkNGraphImpl::reshape(const std::map originalInputShapes; + std::map originalInputShapes; for (const auto& param : params) { originalInputShapes[param->get_friendly_name()] = param->get_output_partial_shape(0); } @@ -397,9 +397,9 @@ StatusCode CNNNetworkNGraphImpl::reshape(const std::map& inputShapes, ResponseDesc* responseDesc) noexcept { - std::map shapes; + std::map shapes; for (const auto& shape : inputShapes) - shapes[shape.first] = ngraph::PartialShape(shape.second); + shapes[shape.first] = ov::PartialShape(shape.second); return reshape(shapes, responseDesc); } @@ -456,7 +456,7 @@ void collect_dynamism_signature(const std::shared_ptr& ov_model, } // namespace #endif -void CNNNetworkNGraphImpl::reshape(const std::map& inputShapes) { +void CNNNetworkNGraphImpl::reshape(const std::map& inputShapes) { OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "CNNNetworkNGraphImpl::reshape"); auto params = _ngraph_function->get_parameters(); diff --git a/src/inference/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp b/src/inference/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp index cf4dd6a59c28aa..e388fc868fbcfe 100644 --- a/src/inference/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp +++ b/src/inference/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp @@ -290,7 +290,7 @@ void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, } const auto output = findOutputByNodeName(name); isDynamic = output && output->get_output_partial_shape(0).is_dynamic(); - ngraph::PartialShape blobPartialShape(blob->getTensorDesc().getDims()); + ov::PartialShape blobPartialShape(blob->getTensorDesc().getDims()); if (output && output->get_output_partial_shape(0).compatible(blobPartialShape)) { dims = blob->getTensorDesc().getDims(); } else { diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/rope.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/rope.cpp index a9bc9292238f30..fb38c4e3f5bc7e 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/rope.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/rope.cpp @@ -12,7 +12,7 @@ ov::intel_cpu::RoPENode::RoPENode(const OutputVector& args, const Config& cfg) : } std::shared_ptr ov::intel_cpu::RoPENode::clone_with_new_inputs( - const ngraph::OutputVector& new_args) const { + const ov::OutputVector& new_args) const { INTERNAL_OP_SCOPE(RoPENode_with_new_inputs); check_new_args_count(this, new_args); return std::make_shared(new_args, m_config); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/rope.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/rope.hpp index 47c460d1cf269b..f5a919f3bde11a 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/rope.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/rope.hpp @@ -85,7 +85,7 @@ class RoPENode : public ov::op::Op { void validate_and_infer_types() override; - std::shared_ptr clone_with_new_inputs(const ngraph::OutputVector& new_args) const override; + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; const Config& get_config() const { return m_config; diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt index b0c66a435d6470..e48c985ad753bf 100644 --- a/src/plugins/intel_gpu/CMakeLists.txt +++ b/src/plugins/intel_gpu/CMakeLists.txt @@ -47,10 +47,6 @@ add_subdirectory(src/graph) file(GLOB_RECURSE PLUGIN_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plugin/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/*.hpp) -if(ENABLE_PROXY) - set(PLUGIN_DEFAULT_CONFIG "PROXY_CONFIGURATION_ALIAS:GPU;PROXY_CONFIGURATION_PRIORITY:0;PROXY_CONFIGURATION_INTERNAL_NAME:OCL_GPU") -endif() - ov_add_plugin(NAME ${TARGET_NAME} DEVICE_NAME "GPU" SOURCES ${PLUGIN_SOURCES} diff --git a/src/plugins/intel_gpu/README.md b/src/plugins/intel_gpu/README.md index 0cecca2eb7edbe..cf220164645416 100644 --- a/src/plugins/intel_gpu/README.md +++ b/src/plugins/intel_gpu/README.md @@ -31,6 +31,27 @@ GPU Plugin contains the following components: * [OpenCL Runtime issues troubleshooting](./docs/gpu_plugin_driver_troubleshooting.md) * [GPU plugin unit test](./docs/gpu_plugin_unit_test.md) +## Documentation on dynamic-shape +This contents explain the internal implementation of dynamic shape support in the GPU Plugin. For general usage of dynamic shape and limitations of the GPU plugin, please refer to this link: [GPU Device — OpenVINO™ documentation - Version(2023.1)](https://docs.openvino.ai/2023.1/openvino_docs_OV_UG_supported_plugins_GPU.html#dynamic-shapes). + +* [Overall flow for dynamic shape execution](./docs/dynamic_shape/overall_flow.md) +* Implementation details + * [Preprocessing: Shape inference / update weight / realloc memory](./docs/dynamic_shape/preprocessing.md) + * [dynamic impl of kernels](./docs/dynamic_shape/dynamic_impl.md) + * [in-memory kernel cache](./docs/dynamic_shape/in_memory_cache.md) + * [async kernel compilation](./docs/dynamic_shape/async_compilation.md) + +* Optimization features + * [Memory preallocation](./docs/dynamic_shape/memory_preallocation.md) + + ## Attached licenses GPU plugin uses 3rd-party components licensed under following licenses: diff --git a/src/plugins/intel_gpu/docs/dynamic_shape/async_compilation.PNG b/src/plugins/intel_gpu/docs/dynamic_shape/async_compilation.PNG new file mode 100644 index 00000000000000..4a60387d8d3198 --- /dev/null +++ b/src/plugins/intel_gpu/docs/dynamic_shape/async_compilation.PNG @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0776d3eaa699a638d55015941f806745191b4878fc62e5bdcd2004c72a2237c1 +size 43345 diff --git a/src/plugins/intel_gpu/docs/dynamic_shape/dynamic_impl.md b/src/plugins/intel_gpu/docs/dynamic_shape/dynamic_impl.md new file mode 100644 index 00000000000000..177b99fe9c8117 --- /dev/null +++ b/src/plugins/intel_gpu/docs/dynamic_shape/dynamic_impl.md @@ -0,0 +1,90 @@ +# Dynamic primitive impls + +* Unlike static impl, dynamic impl has no shape information before execution. The input shape is dependent with input layer's output shape or execution result value. To do handle this dynamic case, the shape agnostic kernel is used. + +* Kernels that support dynamic shapes must declare EnableDynamicShapesSupport() as supported key. + +```cpp +ParamsKey CumSumKernelRef::GetSupportedKey() const { + ParamsKey k; + k.EnableDynamicShapesSupport(); + return k; +} +``` + +* Just same as static shape, dynamic impl for each primitive should be added in 'implementation_map' with impl_types::ocl for GPU with supported types and formats + +```cpp +implementation_map::add(impl_types::ocl, + shape_types::dynamic_shape, + typed_primitive_impl_ocl::create, + types, + dyn_formats); + +implementation_map::add(impl_types::ocl, + shape_types::static_shape, + typed_primitive_impl_ocl::create, + keys); +``` + +## Shape agnostic kernel + +The shape agnostic kernel is an implementation which can work for variable shape instead of predefined shapes. It declares some variables with dim information of input and output inside the kernel and is replaced with the required value at each execution. + +#### Update shape information +Inputs and outputs shape information of the kernel should be updated for every execution. + +* The __Shape information__ to be used inside the kernel is stored as **memory::ptr _shape_info_memory** in __class primitive_inst__. + +* This shape information is determined in **update_shape_info** during **update_impl()**. + +* In **update_shape_info**, all shapes and padding values of dynamic inputs and all shapes and padding values of dynamic outputs are stored in order. + +#### OPTIONAL_SHAPE_INFO_ARG +Define OPTIONAL_SHAPE_INFO_ARG and OPTIONAL_SHAPE_INFO_TENSOR to be used in shape agnostic kernel + +> openvino/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp +```cpp +if (params.is_shape_agnostic) { + jit.AddConstant(MakeJitConstant("IS_DYNAMIC", 1)); + jit.AddConstant(MakeJitConstant("OPTIONAL_SHAPE_INFO_ARG", "__global const int* shape_info,")); + jit.AddConstant(MakeJitConstant("OPTIONAL_SHAPE_INFO_TENSOR", "shape_info,")); +} else { + jit.AddConstant(MakeJitConstant("OPTIONAL_SHAPE_INFO_ARG", "")); + jit.AddConstant(MakeJitConstant("OPTIONAL_SHAPE_INFO_TENSOR", "")); +} +``` + +#### Update dispatch data +**update_dispatch_data_func** should be defined for every shape-agnostic kernel impl. This is called for every execution. Global workgroup, local workgroup, and skip_execution should be updated. And more Kernel data could be updated in here if necessary for every execution. + +```cpp +kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) { + const auto& prim_params = static_cast(params); + auto dispatchData = SetDefault(prim_params); + OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); + kd.kernels[0].params.workGroups.global = dispatchData.gws; + kd.kernels[0].params.workGroups.local = dispatchData.lws; + kd.kernels[0].skip_execution = KernelData::SkipKernelExecution(prim_params); +}; +``` + +#### Example to use shape_info[] + +* Example of **shape_info[]** in kernel +```cpp +KERNEL(shape_of_ref)( + OPTIONAL_SHAPE_INFO_ARG + __global OUTPUT_TYPE* output + ) +{ + const unsigned int i = (uint)get_global_id(2); + +#if IS_DYNAMIC + output[i] = TO_OUTPUT_TYPE(shape_info[i]); // shape_info[] is directly used +#else + size_t shapes[] = INPUT_DIMS_INIT; + output[i] = TO_OUTPUT_TYPE(shapes[i]); +#endif +} +``` \ No newline at end of file diff --git a/src/plugins/intel_gpu/docs/dynamic_shape/in_memory_cache.md b/src/plugins/intel_gpu/docs/dynamic_shape/in_memory_cache.md new file mode 100644 index 00000000000000..79847c6648bb38 --- /dev/null +++ b/src/plugins/intel_gpu/docs/dynamic_shape/in_memory_cache.md @@ -0,0 +1,22 @@ +# In memory cache + +## Motivation + +When creating a primitive_impl in the Dynamic Shape model, if each primitive_impls are created about the same primitive with the same type and input / output shapes, it creates duplicated primitive_impl including new cl kernel build for same kernel source. this may result in inefficiency and performance degradation due to build the exactly same cl kernel source code multiple times for same layout and primitive type on the run time for dynamic model. To resolve this issue, ImplementationCache handle is newly introduced. + + +## Property + +* ImplementationCache only handles primitive_impl which is created in primitive_inst::update_impl() and primitive_inst::update_weights() on dynamic shape model. In the case of static shape, kernels_cache handles static shape kernel duplication. +* ImplementationCache inherits LRUCacheThreadSafe which is ThreadSafe version of LRUCache which handles primitive_impl cache by increasing the cache hit rate for frequently used items. Therefore, ImplementationCache optimizes the performance of dynamic execution through frequently used primitive_impl. +* Since cldnn::program creates ImplementationCache as unique_ptr at cldnn::program constructor, its lifecycle is set to cldnn::program. +* ImplementationCache supports multi-stream, so the cldnn::network of each stream manages primitive_impl in same cache. +* ImplementationCache Capacity is set to 10000 by default, but may change in the future optimization. + + +## Usages + +ImplementationCache is used to handle primitive_impl cache at primitive_inst::update_impl() and primitive_inst::update_weights() in dynamic shape model. + +* In primitive_inst::update_impl(), it looks up the cache with key which is hash value of kernel_impl_param which is updated by the current primitive_inst. If it is not found from ImplementationCache, new primitive_impl is created and save it into the cache. +* In primitive_inst::update_weights(), if it is not found a primitive_impl with a hash key value which matches the weights_reorder_kernel_params of the primitive inst, it also create a new primitive_impl for weight reorder and put it in the cache. diff --git a/src/plugins/intel_gpu/docs/dynamic_shape/memory_preallocation.md b/src/plugins/intel_gpu/docs/dynamic_shape/memory_preallocation.md new file mode 100644 index 00000000000000..e1f7580e282c25 --- /dev/null +++ b/src/plugins/intel_gpu/docs/dynamic_shape/memory_preallocation.md @@ -0,0 +1,92 @@ +# Memory preallocation +## Description +The `ShapePredictor` class is responsible for gathering information about shape changes of primitives and attempting to predict the shape size for the next iteration during dynamic model execution. + +The motivation of implementing of such a prediction mechanism is caused by significant time costs of memory allocation on GPU for all supported types (USM and OpenCL buffers). During the dynamic model execution, the output shape of `primitive_inst` can change in any way, and in case of an increase we have to allocate new buffer. In particular, the issue becomes especially critical in the case of monotonic shape increase (as occurs with large language models). + +`ShapePredictor` object is a unique object per each `InferRequest`, it stores up to 3 the most recent shapes for each primitive. It requires to have 3 shapes for the prediction, otherwise it will not perform any shape modification and return shape unchanged. Once it has 3 or more shapes of record, it starts to predict next shape. If the record shows that the shape size increases monotonically with fixed step-size, `ShapePredictor` will return larger size enough for the next 10 execution iterations. If shape size changes unpredictably, `ShapePredictor` will return shape increased by 10 percent. + +## Operation modes + +The main function of `ShapePredictor` is `predict_preallocation_shape()`: +```cpp +std::pair predict_preallocation_shape(const std::string& id, + const ov::Shape& current_shape, + size_t dt_bitwidth, + bool can_reuse_buffer); +``` + +Parameters description: +* `id` refers to the `cldnn::primitive`'s unique name related to the `current_shape` +* `current_shape` describes actual shape +* `dt_bitwidth` describes buffer's data type size in bits +* `can_reuse_buffer` allows to record `current_shape` for history without applying preallocation if current buffer is enough + +Return value: `predict_preallocation_shape` returns a pair of `bool` and `ov::Shape`, where `bool` value says if shape is successfully predicted and can be preallocated, and the second value is `ov::Shape` itself (empty shape will be returned if prediction is not possible). + +`ShapePredictor` can operate in two modes: + +* By default, it tries to predict shape for the next `_next_iters_preallocation_count` (10 by default) iterations. There are two requirements for successful shape prediction in this mode: per-iteration buffer size should be less than `_max_per_iter_size` (16KB by default) and difference between shapes' dimensions should be less than `_max_per_dim_diff`. These restrictions are needed to prevent unexpected large memory preallocations which lead to inefficient memory usage. + +* The second operation mode is percentage preallocation - this mode can be configured with `ov::intel_gpu::buffers_preallocation_ratio` (1.1 by default) internal property - it increases buffer size by `_buffers_preallocation_ratio` value unconditionally if it's not possible to predict the shape for multiple iterations ahead. + +Also, `ShapePredictor` has helper `can_preallocate()` function which is designed to check if desired buffer size can be preallocated or not: +```cpp +ShapePredictor::can_preallocate(size_t desired_buffer_size) +``` +The flowchart describing algorithm of shape prediction: + + + + + +## `ShapePredictor` usage: +* Inputs/outputs preallocation (src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp): +```cpp +ov::Shape predict_shape(const std::string& name, + const ov::Shape current_shape, + ov::element::Type element_type, + cldnn::ShapePredictor& shape_predictor) { + // Request prediction for `current_shape` and `element_type` data type + auto prealloc_info = shape_predictor.predict_preallocation_shape(name, current_shape, element_type.bitwidth(), false); + const auto& preallocation_shape = prealloc_info.second; + // Check if shape was successfully predicted and there is enough free memory for preallocation + auto can_preallocate_buffer = prealloc_info.first && + shape_predictor.can_preallocate(cldnn::ceil_div(ov::shape_size(preallocation_shape) * element_type.bitwidth(), 8)); + if (can_preallocate_buffer) { + return preallocation_shape; + } + + return current_shape; +} +``` +* Nodes' output buffer preallocation (src/plugins/intel_gpu/src/graph/primitive_inst.cpp): +```cpp +auto current_shape = actual_layout.get_shape(); +auto& sp = *get_network().get_shape_predictor(); +auto dt_size = ov::element::Type(actual_layout.data_type).bitwidth(); +// Request prediction for `current_shape` and `actual_layout.data_type` data type +auto prealloc_info = sp.predict_preallocation_shape(id(), current_shape, dt_size, can_reuse_buffer); +// Check if shape was successfully predicted and there is enough free memory for preallocation +if (prealloc_info.first && sp.can_preallocate(ov::shape_size(prealloc_info.second) * dt_size)) { + auto new_layout = actual_layout; + new_layout.set_partial_shape(prealloc_info.second); + // Update `updated_params` output layout which will be used for memory allocation + updated_params.output_layouts[0] = new_layout; +} +``` + +## `ShapePredictor` debug capabilities: +You can use `OV_GPU_MemPreallocationOptions` environment variable (in case of enabled `DEBUG_CAPS`, see details in src/plugins/intel_gpu/docs/gpu_debug_utils.md) to change buffer preallocation behaviour. This property expects 4 values separated by space in the following order: number of iterations for preallocation(int), max size of single iteration in bytes(int), max per-dim allowed diff(int), unconditional buffers preallocation ratio(float). For example, for disabling memory preallocation at all, you can use `OV_GPU_MemPreallocationOptions='0 0 0 1.0'` diff --git a/src/plugins/intel_gpu/docs/dynamic_shape/overall_flow.md b/src/plugins/intel_gpu/docs/dynamic_shape/overall_flow.md new file mode 100644 index 00000000000000..fc429e5735f354 --- /dev/null +++ b/src/plugins/intel_gpu/docs/dynamic_shape/overall_flow.md @@ -0,0 +1,47 @@ +# Overall Flow of Dynamic-Shaped Model Execution + +```mermaid +flowchart TD + A["primitive_inst::execute()"] --> B{"is dynamic?"} + B --> |No | H["Execute impl"] + B --> |Yes | C["runtime in_place_concat"] + C --> D["update_shape()"] + D --> E{"shape changed from
previous inference?"} + E --> |No | H["Execute impl"] + E --> |Yes| G{"Valid fusion?"} + G --> |No | I{"Create unfused subgraph"} + I --> II["Execute subgraph"] + G --> |Yes | J["update_impl()"] + J --> JJ{"Impl changed?"} + JJ --> |No | L["Set arguments"] + L --> H + JJ --> |Yes | KK{"preferred weight format
changed?"} + KK --> |Yes | M["update_weights()"] + KK --> |No | O{"Is current memory enough
for the new shape?"} + M --> O + O --> |No |P["reallocate output memory"] + O --> |Yes | L + P --> L +``` + +Figure 1 presents the basic flow of a primitive execution when its program_node has a dynamic shape. A brief explanation for each steps are as follows, and the more detailed explanation of which is to be found in the implementation details section. + +* [update_shape](https://github.com/openvinotoolkit/openvino/blob/539b5a83ba7fcbbd348e4dc308e4a0f2dee8343c/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L798) + * This checks if the input shape of the primitive has changed from the previous inference. If it has changed, it performs shape inference for the primitive. + * If the byte size of the new output shape is empty, skip the execution of this primitive for the current inference. + * Note that shape inference for some primitives is performed during the execution of other primitives' inference time optimization stage (e.g., in do_runtime_in_place_concat). In such cases, the update_shape_done_by_other flag is set to true. More detailed descriptions of these optimization stages will be provided in the near future. +* [Unfusion](https://github.com/openvinotoolkit/openvino/blob/539b5a83ba7fcbbd348e4dc308e4a0f2dee8343c/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L811C9-L811C9) + * If the primitive has fused operations but the kernel does not support fusion for the current output shape, then it performs unfusion, i.e., creating a subgraph that decomposes the current primitive and the fused primitives. +* If either the input or output shapes are changed, the following processes are performed: + * [update_impl](https://github.com/openvinotoolkit/openvino/blob/539b5a83ba7fcbbd348e4dc308e4a0f2dee8343c/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L845C22-L845C22) + + * Checks whether the expected primitive_impl can be obtained from the in-memory cache. If not, it checks whether there is a dynamic_impl (i.e., shape agnostic impl) available for the primitive. If a dynamic_impl is available, then it is selected and used. If not, it builds a new static_impl for the primitive and add it to the in_memory_cache. + * When dynamic_impl is selected and the primitive is critical (e.g., fully_connected, gemm, convolution, deconvolution), a building task for the static kernel is enqueued to the async compilation context. + * [update_weights](https://github.com/openvinotoolkit/openvino/blob/539b5a83ba7fcbbd348e4dc308e4a0f2dee8343c/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L846C44-L846C44) + + * If the impl is changed and the expected weight format is changed, the weights data are reordered to the corresponding format. + * [realloc_if_needed](https://github.com/openvinotoolkit/openvino/blob/539b5a83ba7fcbbd348e4dc308e4a0f2dee8343c/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L849C11-L849C1) + + * If the current output memory is smaller than the required memory for the new shape, then allocate new memory. +* If any kernel arguments are changed (e.g., memory address, work group size, etc), set_argument() is performed. +* Finally, the selected impl is executed as a normal processing. \ No newline at end of file diff --git a/src/plugins/intel_gpu/docs/dynamic_shape/shape_predictor_flowchart.png b/src/plugins/intel_gpu/docs/dynamic_shape/shape_predictor_flowchart.png new file mode 100644 index 00000000000000..2d067d9b3beed2 --- /dev/null +++ b/src/plugins/intel_gpu/docs/dynamic_shape/shape_predictor_flowchart.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc11cdc4a2b2084a47c6b07c79a325b0c14a8513b3ee63a5efef8efc3872305f +size 22448 diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index bc015b6ad5830c..3080853d437b3d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -226,13 +226,6 @@ struct network { std::shared_ptr get_shape_predictor() { return _shape_predictor; } void set_shape_predictor(std::shared_ptr shape_predictor) { _shape_predictor = shape_predictor; } - std::unordered_map> get_kv_cache_mem_deps() { - return _kv_cache_mem_deps; - } - void add_kv_cache_mem_deps(primitive_id kv_cache, primitive_id read_value) { - _kv_cache_mem_deps[kv_cache].push_back(read_value); - } - #ifdef GPU_DEBUG_CONFIG int64_t get_current_iteration_num() { return iteration; } #endif @@ -274,9 +267,6 @@ struct network { std::shared_ptr _shape_predictor; - // Record corresponding read_values for kv_cache so that we can release those read_value's output memories when they are no longer needed - std::unordered_map> _kv_cache_mem_deps; - void build_exec_order(); void allocate_primitive_instance(program_node const& node); void transfer_memory_to_device(std::shared_ptr instance, program_node const& node); diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/gemm.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/gemm.hpp new file mode 100644 index 00000000000000..3391cf89976c2b --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/op/gemm.hpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/op.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { + +class Gemm : public ov::op::v0::MatMul { +public: + OPENVINO_OP("Gemm", "gpu_opset"); + + Gemm() = default; + + Gemm(const ov::Output& A, + const ov::Output& B, + const std::vector& order_a, + const std::vector& order_b, + const std::vector& order_c, + const ov::element::Type output_type = ov::element::undefined); + + bool visit_attributes(ov::AttributeVisitor &visitor) override; + + void validate_and_infer_types() override; + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + std::vector get_input0_order() const { return m_order_a; } + std::vector get_input1_order() const { return m_order_b; } + std::vector get_output_order() const { return m_order_c; } + ov::element::Type get_output_type() const { return m_output_type; } + +protected: + std::vector m_order_a; + std::vector m_order_b; + std::vector m_order_c; + ov::element::Type m_output_type; +}; + +std::vector shape_infer(const Gemm* op, + std::vector input_shapes, + const std::vector& order_a, + const std::vector& order_b, + const std::vector& order_c); + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 479f300eecd67b..75bf7f51c19000 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -275,3 +275,4 @@ REGISTER_FACTORY(internal, RMS); REGISTER_FACTORY(internal, GatherCompressed); REGISTER_FACTORY(internal, KVCache); REGISTER_FACTORY(internal, ReadValue); +REGISTER_FACTORY(internal, Gemm); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/gemm.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/gemm.hpp index cef11bbb0b11b5..4517ba78629440 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/gemm.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/gemm.hpp @@ -25,6 +25,12 @@ namespace cldnn { struct gemm : public primitive_base { CLDNN_DECLARE_PRIMITIVE(gemm) + typedef enum { + X_LAST = 0, + Y_LAST, + OTHER, + } TransposeType; + gemm() : primitive_base("", {}) {} /// @brief Constructs gemm layer. @@ -38,16 +44,16 @@ struct gemm : public primitive_base { gemm(const primitive_id& id, const std::vector& inputs, const data_types data_type, - const bool transpose_input0 = false, - const bool transpose_input1 = false, + const bool transpose_input0, + const bool transpose_input1, const float alpha = 1.0f, const float beta = 0.0f, const size_t input_rank = 4, const size_t weight_rank = 4, const padding& output_padding = padding()) : primitive_base(id, inputs, {output_padding}, {optional_data_type{ data_type }}), - transpose_input0(transpose_input0), - transpose_input1(transpose_input1), + transpose_input0(transpose_input0 ? 1 : 0), + transpose_input1(transpose_input1 ? 1 : 0), alpha(alpha), beta(beta), input_rank(input_rank), @@ -55,25 +61,97 @@ struct gemm : public primitive_base { if (inputs.size() != 2 && inputs.size() != 3) { throw std::invalid_argument("Invalid inputs count - gemm expects either two or three inputs"); } + + auto get_transposed_order = [] (size_t rank, bool transposed) { + std::vector order(rank); + std::iota(order.begin(), order.end(), 0); + if (transposed) + std::swap(order[rank - 1], order[rank - 2]); + return order; + }; + + input0_order = get_transposed_order(input_rank, transpose_input0); + input1_order = get_transposed_order(weight_rank, transpose_input1); + output_order = {}; + } + + /// @brief Constructs gemm layer. + /// @brief Primitive id containing first matrix + /// @brief Primitive id containing second matrix + /// @brief Transposed order of first input matrix + /// @brief Transposed order of second input matrix + /// @brief Transposed order of output matrix + /// @brief Variable containing ALPHA parameter + /// @brief Variable containing BETA parameter + gemm(const primitive_id& id, + const std::vector& inputs, + const data_types data_type, + const std::vector& input0_order = {0, 1, 2, 3}, + const std::vector& input1_order = {0, 1, 2, 3}, + const std::vector& output_order = {}, + const float alpha = 1.0f, + const float beta = 0.0f, + const padding& output_padding = padding()) + : primitive_base(id, inputs, {output_padding}, {optional_data_type{ data_type }}), + input0_order(input0_order), + input1_order(input1_order), + output_order(output_order), + alpha(alpha), + beta(beta), + input_rank(input0_order.size()), + weight_rank(input1_order.size()) { + if (inputs.size() != 2 && inputs.size() != 3) { + throw std::invalid_argument("Invalid inputs count - gemm expects either two or three inputs"); + } + + auto get_transpose_mode = [](const std::vector& order_idx) { + int64_t rank = order_idx.size() - 1; + + if (rank == order_idx[rank]) { + // normal + return TransposeType::X_LAST; + } else if (rank == order_idx[rank - 1]) { + // the second last dim is moved to the last + return TransposeType::Y_LAST; + } else { + // other + return TransposeType::OTHER; + } + }; + + transpose_input0 = get_transpose_mode(input0_order); + transpose_input1 = get_transpose_mode(input1_order); } /// @brief Flag for transposing first input matrix - bool transpose_input0 = false; + uint32_t transpose_input0 = 0; /// @brief Flag for transposing second input matrix - bool transpose_input1 = false; + uint32_t transpose_input1 = 0; + /// @brief order of input 0 + std::vector input0_order; + /// @brief order of input 1 + std::vector input1_order; + /// @brief order of output + std::vector output_order; /// @brief Variable containing ALPHA parameter float alpha = 1.0f; /// @brief Variable containing BETA parameter float beta = 1.0f; /// @brief First matrix rank size_t input_rank = 4; - /// @brief Second matrix rank + /// @brief Second matrix rank size_t weight_rank = 4; size_t hash() const override { size_t seed = primitive::hash(); seed = hash_combine(seed, transpose_input0); seed = hash_combine(seed, transpose_input1); + for (auto order : input0_order) + seed = hash_combine(seed, order); + for (auto order : input1_order) + seed = hash_combine(seed, order); + for (auto order : output_order) + seed = hash_combine(seed, order); seed = hash_combine(seed, alpha); seed = hash_combine(seed, beta); return seed; @@ -97,6 +175,9 @@ struct gemm : public primitive_base { primitive_base::save(ob); ob << transpose_input0; ob << transpose_input1; + ob << input0_order; + ob << input1_order; + ob << output_order; ob << alpha; ob << beta; ob << input_rank; @@ -107,6 +188,9 @@ struct gemm : public primitive_base { primitive_base::load(ib); ib >> transpose_input0; ib >> transpose_input1; + ib >> input0_order; + ib >> input1_order; + ib >> output_order; ib >> alpha; ib >> beta; ib >> input_rank; diff --git a/src/plugins/intel_gpu/src/graph/gemm.cpp b/src/plugins/intel_gpu/src/graph/gemm.cpp index 2b8bdafed4a8e1..99e8fd7d674a29 100644 --- a/src/plugins/intel_gpu/src/graph/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/gemm.cpp @@ -8,7 +8,7 @@ #include #include -#include "matmul_shape_inference.hpp" +#include "intel_gpu/op/gemm.hpp" namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(gemm) @@ -22,35 +22,40 @@ layout gemm_inst::calc_output_layout(gemm_node const& node, kernel_impl_params c auto input0_shape = input0_layout.get_shape(); auto input1_shape = input1_layout.get_shape(); - bool transpose_input0 = prim->transpose_input0; - bool transpose_input1 = prim->transpose_input1; + auto input0_order = prim->input0_order; + auto input1_order = prim->input1_order; bool reordered = prim->input_rank > 4 || prim->weight_rank > 4; size_t output_rank = std::max(prim->input_rank, prim->weight_rank); size_t input_rank = reordered ? output_rank : prim->input_rank; size_t weight_rank = reordered ? output_rank : prim->weight_rank; - auto update_input_shape = [&output_rank](const ov::Shape& input_shape, size_t rank, bool transpose, bool first_input) { - auto input_shape_update = ov::Shape(input_shape.begin(), input_shape.begin() + std::min(rank, input_shape.size())); + auto update_input_shape = [&output_rank](const ov::Shape& input_shape, size_t rank, std::vector input_order, bool first_input) { + auto input_shape_update = ov::Shape(); + auto _input_shape_update = ov::Shape(input_shape.begin(), input_shape.begin() + std::min(rank, input_shape.size())); + if (_input_shape_update.size() == input_order.size() && input_order.size() > 1) { + for (auto idx : input_order) { + input_shape_update.push_back(_input_shape_update[idx]); + } + } else { + input_shape_update = _input_shape_update; + } if (input_shape_update.size() == 1) { first_input ? input_shape_update.insert(input_shape_update.begin(), 1) : input_shape_update.insert(input_shape_update.end(), 1); - if (transpose) { - std::swap(input_shape_update[0], input_shape_update[1]); - } output_rank = std::max(output_rank, rank + 1); } input_shape_update.insert(input_shape_update.begin(), output_rank - input_shape_update.size(), 1); return input_shape_update; }; - auto input0_shape_update = update_input_shape(input0_shape, input_rank, transpose_input0, true); - auto input1_shape_update = update_input_shape(input1_shape, weight_rank, transpose_input1, false); + auto input0_shape_update = update_input_shape(input0_shape, input_rank, input0_order, true); + auto input1_shape_update = update_input_shape(input1_shape, weight_rank, input1_order, false); ov::Shape bias_shape(output_rank); if (prim->input_size() == 3) { bias_shape = impl_param.get_input_layout(2).get_shape(); - bias_shape = update_input_shape(bias_shape, weight_rank, transpose_input1, false); + bias_shape = update_input_shape(bias_shape, weight_rank, input1_order, false); } auto output_shape = input0_shape_update; @@ -58,8 +63,8 @@ layout gemm_inst::calc_output_layout(gemm_node const& node, kernel_impl_params c output_shape[i] = std::max(std::max(input0_shape_update[i], input1_shape_update[i]), bias_shape[i]); } - size_t M = !transpose_input0 ? *(input0_shape_update.end() - 2) : input0_shape_update.back(); - size_t N = !transpose_input1 ? input1_shape_update.back() : *(input1_shape_update.end() - 2); + size_t M = *(input0_shape_update.end() - 2); + size_t N = input1_shape_update.back(); output_shape[output_rank - 2] = M; output_shape[output_rank - 1] = N; @@ -97,16 +102,17 @@ std::vector gemm_inst::calc_output_layouts(gemm_node const& node, const output_type = impl_param.get_fused_output_layout().data_type; } - ov::op::v0::MatMul op; - op.set_transpose_a(prim->transpose_input0); - op.set_transpose_b(prim->transpose_input1); + ov::intel_gpu::op::Gemm op; + op.set_transpose_a(false); + op.set_transpose_b(false); std::vector input_shapes = { input0_layout.get(), input1_layout.get() }; - std::vector output_shapes = ov::op::v0::shape_infer(&op, input_shapes); + std::vector output_shapes = ov::intel_gpu::op::shape_infer(&op, input_shapes, + prim->input0_order, prim->input1_order, prim->output_order); cldnn::format output_format = input0_layout.format; if (node.get_preferred_output_fmt() != format::any) @@ -186,8 +192,14 @@ layout gemm_inst::transform_output_layout(const std::shared_ptr prim auto input0_pshape = input_layouts[0].get_partial_shape(); auto input1_pshape = input_layouts[1].get_partial_shape(); - auto M = !primitive->transpose_input0 ? input0_pshape[input0_pshape.size() - 2] : input0_pshape[input0_pshape.size() - 1]; - auto N = !primitive->transpose_input1 ? input1_pshape[input1_pshape.size() - 1] : input1_pshape[input1_pshape.size() - 2]; + auto input0_order = primitive->input0_order; + auto input1_order = primitive->input1_order; + + auto m_idx = ((input0_order.size() > 1) ? input0_order[input0_order.size() - 2] : input0_order[0]) + + input0_pshape.size() - input0_order.size(); + auto n_idx = input1_order[input1_order.size() - 1] + input1_pshape.size() - input1_order.size(); + auto M = input0_pshape[m_idx]; + auto N = input1_pshape[n_idx]; auto output_pshape = input_layouts[0].get_partial_shape(); for (size_t i = 0; i != input_layouts.size(); ++i) { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp index 9989316c7f3a91..b0ea6ddaebebdf 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp @@ -47,6 +47,9 @@ struct gemm_impl : typed_primitive_impl_ocl { params.beta = primitive->beta; params.transpose_input0 = primitive->transpose_input0; params.transpose_input1 = primitive->transpose_input1; + params.input0_order = primitive->input0_order; + params.input1_order = primitive->input1_order; + params.output_order = primitive->output_order; bool is_quantized = true; for (auto& input : impl_param.input_layouts) diff --git a/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h b/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h index 49914e0f08cf7f..34c4ccf555008b 100644 --- a/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/kv_cache_inst.h @@ -80,8 +80,6 @@ class typed_primitive_inst : public typed_primitive_inst_base #include @@ -57,94 +55,4 @@ std::string kv_cache_inst::to_string(const kv_cache_node& node) { return primitive_description.str(); } -void kv_cache_inst::post_realloc_optimization(const layout& allocated_layout) { - auto desc = _node->as().get_primitive(); - auto& variable = get_network().get_variable(desc->variable_info.variable_id); - auto present_layout = _impl_params->output_layouts[0]; - const auto& sequence_axis = desc->concat_axis; - auto sequence_axis_legacy = - kv_cache_inst::get_sequence_axis_legacy(sequence_axis, present_layout.get_partial_shape().size()); - GPU_DEBUG_TRACE_DETAIL << id() << " is kv_cache => set the variable with newly allocated output memory" - << std::endl; - bool axis_is_outer_most = true; - for (int64_t dim = 0; dim < sequence_axis; ++dim) { - if (present_layout.get_shape()[dim] > 1) { - axis_is_outer_most = false; - break; - } - } - if (present_layout.data_padding.get_dynamic_pad_dims().sizes()[sequence_axis_legacy] == 1) { - // Apply padding of variable to make it be optimized in the next iteration - auto max_pad = kv_cache_inst::get_max_pad( - present_layout, - allocated_layout.get_buffer_size().count(), - sequence_axis_legacy, - "present_layout"); - if (max_pad > 0) { - kv_cache_inst::update_pad(present_layout, max_pad, sequence_axis_legacy); - if (!axis_is_outer_most) { - GPU_DEBUG_TRACE_DETAIL << id() << ": Update impl with new output padding" << std::endl; - set_shape_change(); - _impl_params->output_layouts[0] = present_layout; - update_impl(); - } - GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable " << variable.get_name() - << "'s memory with allocated kv cache output: " << present_layout.to_short_string() - << " is_set = " << variable.is_set() << std::endl; - variable.set_memory(_outputs[0], present_layout); - _impl_params->_can_be_optimized = true; - // No need to copy, still it can be optimized - GPU_DEBUG_TRACE_DETAIL << id() << ": Set can_be_optimized = true " << std::endl; - { - // Garbage collection of kv cache meories : - // Once the corresponding kv cache's execution is done, the input mems are no - // longer needed and can be released. - GPU_DEBUG_TRACE_DETAIL << ": Check releasable kv cache memories" << std::endl; - std::vector mem_deps_eol; - for (auto kms : _network.get_kv_cache_mem_deps()) { - const auto kv_cache_id = kms.first; - auto queue_type = get_network().get_stream().get_queue_type(); - if (queue_type == QueueTypes::in_order || - (_network.has_event(kv_cache_id) && _network.get_primitive_event(kv_cache_id)->is_set())) { - for (auto mem_deps : kms.second) { - mem_deps_eol.push_back(mem_deps); - } - } - } - for (auto mem_dep : mem_deps_eol) { - auto mem_dep_inst = _network.get_primitive(mem_dep); - GPU_DEBUG_TRACE_DETAIL << "Release output memory of " << mem_dep_inst->id() << ": " - << ((mem_dep_inst->_outputs.size() > 0 && mem_dep_inst->_outputs[0]) ? mem_dep_inst->_outputs[0]->buffer_ptr() : " 0x0") - << std::endl; - - mem_dep_inst->_outputs[0] = nullptr; - } - } - { - // Add mem_deps for current kv_cache op for future release - GPU_DEBUG_TRACE_DETAIL << "Record kv cache mem deps for future garbage collection " << id() << ": " << std::endl; - if (_deps[0].first->get_node().is_type() && _deps[0].first->can_be_optimized()) { - _network.add_kv_cache_mem_deps(id(), _deps[0].first->id()); - GPU_DEBUG_TRACE_DETAIL << id() << " can clear " << _deps[0].first->id() << "'s mem" << std::endl; - if (_deps[0].first->_deps[0].first->get_node().is_type() && - _deps[0].first->_deps[0].first->can_be_optimized()) { - _network.add_kv_cache_mem_deps(id(), _deps[0].first->_deps[0].first->id()); - GPU_DEBUG_TRACE_DETAIL << id() << " can clear " << _deps[0].first->_deps[0].first->id() << "'s mem" << std::endl; - } - } - } - } else { - GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable " << variable.get_name() - << "'s layout with allocated kv cache output: " << present_layout.to_short_string() - << " (is_set = " << variable.is_set() << ") " << std::endl; - variable.set_layout(present_layout); - } - } else { - GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable " << variable.get_name() - << "'s layout with allocated kv cache output: " << present_layout.to_short_string() - << " (is_set = " << variable.is_set() << ") " << std::endl; - variable.set_layout(present_layout); - } -} - } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 2cd2a51d5caf05..5429aa30d40e2d 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -911,7 +911,30 @@ static bool is_node_for_onednn(fully_connected_node const& node) { } static bool is_node_for_onednn(gemm_node const& node) { - return layout_optimizer::are_data_types_suitable_for_onednn((program_node&)node); + if (!layout_optimizer::are_data_types_suitable_for_onednn((program_node&)node)) + return false; + + auto gemm_prim = node.get_primitive(); + + for (size_t idx = 0; idx < gemm_prim->output_order.size(); idx++) { + if (idx != static_cast(gemm_prim->output_order[idx])) + return false; + } + + if (gemm_prim->transpose_input0 > 1 || gemm_prim->transpose_input0 > 1) + return false; + + for (size_t idx = 0; idx < (gemm_prim->input0_order.size() - 2); idx++) { + if (idx != static_cast(gemm_prim->input0_order[idx])) + return false; + } + + for (size_t idx = 0; idx < (gemm_prim->input1_order.size() - 2); idx++) { + if (idx != static_cast(gemm_prim->input1_order[idx])) + return false; + } + + return true; } // This function is needed to avoid performance regressions for the convolutions with byxf layout @@ -1673,6 +1696,11 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format impl_candidate = impl_types::ocl; } + if (node.is_type()) { + if (!is_node_for_onednn(node.as())) + impl_candidate = impl_types::ocl; + } + preferred_impl = impl_candidate; } else if (node.is_type()) { preferred_impl = impl_types::ocl; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 73db30b0e45955..80b077e740c99b 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -443,7 +443,7 @@ void network::reset_execution(bool wait) { get_stream().wait_for_events(events); } } - _kv_cache_mem_deps.clear(); + // Move events to temporarily map to deallocate them at the end of network::execute() call for better overlapping with // kernels execution, since it may take significant time for high amount of events _old_events = std::move(_events); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index fbcb48856b9f44..0ab59a351f50ff 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -597,8 +597,57 @@ event::ptr primitive_inst::realloc_if_needed() { } // Set variable memory same as output memory if (_node->is_type()) { - dynamic_cast(this)->post_realloc_optimization(updated_params.output_layouts[0]); + auto desc = _node->as().get_primitive(); + auto& variable = get_network().get_variable(desc->variable_info.variable_id); + auto present_layout = _impl_params->output_layouts[0]; + const auto& sequence_axis = desc->concat_axis; + auto sequence_axis_legacy = + kv_cache_inst::get_sequence_axis_legacy(sequence_axis, present_layout.get_partial_shape().size()); + GPU_DEBUG_TRACE_DETAIL << id() << " is kv_cache => set the variable with newly allocated output memory" + << std::endl; + bool axis_is_outer_most = true; + for (int64_t dim = 0; dim < sequence_axis; ++dim) { + if (present_layout.get_shape()[dim] > 1) { + axis_is_outer_most = false; + break; + } + } + if (present_layout.data_padding.get_dynamic_pad_dims().sizes()[sequence_axis_legacy] == 1) { + // Apply padding of variable to make it be optimized in the next iteration + auto max_pad = kv_cache_inst::get_max_pad(present_layout, + updated_params.output_layouts[0].get_buffer_size().count(), + sequence_axis_legacy, + "present_layout"); + if (max_pad > 0) { + kv_cache_inst::update_pad(present_layout, max_pad, sequence_axis_legacy); + if (!axis_is_outer_most) { + GPU_DEBUG_TRACE_DETAIL << id() << ": Update impl with new output padding" << std::endl; + set_shape_change(); + _impl_params->output_layouts[0] = present_layout; + update_impl(); + } + GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable " << variable.get_name() + << "'s memory with allocated kv cache output: " + << present_layout.to_short_string() << " is_set = " << variable.is_set() + << std::endl; + variable.set_memory(_outputs[0], present_layout); + _impl_params->_can_be_optimized = true; + // No need to copy, still it can be optimized + GPU_DEBUG_TRACE_DETAIL << id() << ": Set can_be_optimized = true " << std::endl; + } else { + GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable " << variable.get_name() + << "'s layout with allocated kv cache output: " << present_layout.to_short_string() + << " (is_set = " << variable.is_set() << ") " << std::endl; + variable.set_layout(present_layout); + } + } else { + GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable " << variable.get_name() + << "'s layout with allocated kv cache output: " << present_layout.to_short_string() + << " (is_set = " << variable.is_set() << ") " << std::endl; + variable.set_layout(present_layout); + } } + _mem_allocated = true; // intermediate memory allocation is required for primitives consisting of multiple kernels in dynamic case { @@ -753,6 +802,13 @@ bool primitive_inst::update_impl() { if (!can_be_optimized()) { auto impl = _node->type()->choose_impl(*_node, updated_params_no_dyn_pad); + // In the case of gemm, if current dynamic impl is not gemm_ref and newly chosen impl is gemm_ref, + // the newly chosen impl is not added to the impl cache for beffer performance. + if (_node->is_type() && + _dynamic_impl->get_kernel_name().find("gemm_ref") == std::string::npos && + impl->get_kernel_name().find("gemm_ref") != std::string::npos) { + return; + } if (impl->get_kernels_source().size() > 0) { auto kernels = _program->get_kernels_cache().compile(updated_params_no_dyn_pad, impl->get_kernels_source()); impl->set_kernels(kernels); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_ref.cl index ab431ca975cbf1..fa1ac644fb7ab4 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_ref.cl @@ -26,11 +26,7 @@ inline uint FUNC(get_input0_index_nt)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, ui } inline uint FUNC(get_input0_index)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z, uint y, uint x) { -#if !TRANSPOSE_INPUT0 - return FUNC_CALL(get_input0_index_nt)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); -#else - return FUNC_CALL(get_input0_index_nt)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, x, y); -#endif + return FUNC_CALL(get_input0_index_nt)(OPTIONAL_SHAPE_INFO_TENSOR INPUT0_DIMS_ORDER); } inline uint FUNC(get_input1_index_nt)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z, uint y, uint x) { @@ -50,11 +46,7 @@ inline uint FUNC(get_input1_index_nt)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, ui } inline uint FUNC(get_input1_index)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z, uint y, uint x) { -#if !TRANSPOSE_INPUT1 - return FUNC_CALL(get_input1_index_nt)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); -#else - return FUNC_CALL(get_input1_index_nt)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, x, y); -#endif + return FUNC_CALL(get_input1_index_nt)(OPTIONAL_SHAPE_INFO_TENSOR INPUT1_DIMS_ORDER); } #ifdef INPUT2_TYPE @@ -100,11 +92,7 @@ KERNEL(gemm_ref)( bidx /= OUTPUT_SIZE_Z; const uint w = bidx % OUTPUT_SIZE_W; -#if !TRANSPOSE_INPUT0 - const uint K = INPUT0_SIZE_X; -#else - const uint K = INPUT0_SIZE_Y; -#endif + const uint K = CAT(INPUT0_SIZE_, MATMUL_AXIS); ACCUMULATOR_TYPE acc = ACCUMULATOR_VAL_ZERO; @@ -129,7 +117,7 @@ KERNEL(gemm_ref)( } #endif - const uint dst_index = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x); + const uint dst_index = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR TR_B, TR_F, TR_W, TR_Z, TR_Y, TR_X); ACTIVATION_TYPE dequantized = TO_ACTIVATION_TYPE(acc); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl index 0fc29d9ace1fb6..85371273899795 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gemm_tiled_opt.cl @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "include/fetch_utils.cl" #include "include/batch_headers/fetch_data.cl" #include "include/batch_headers/sub_group_block_read.cl" #include "include/batch_headers/sub_group_block_write.cl" @@ -27,20 +28,28 @@ #define BLOCK_WRITE_C(ptr, offset, data) BLOCK_WRITEN(OUTPUT_TYPE, 1, ptr, offset, data) #endif // TILE_N > SIMD_WIDTH -inline uint FUNC(get_input0_batch_offset)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z) { +inline uint FUNC(get_input0_index_nt)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z, uint y, uint x) { #if INPUT0_SIMPLE - return GET_DATA_INDEX_6D_SAFE(INPUT0, b, f, w, z, 0, 0); -#else // INPUT0_SIMPLE + return GET_DATA_INDEX_6D_SAFE(INPUT0, b, f, w, z, y, x); +#else # error gemm_tiled_opt.cl : Unsupported input 0 format -#endif // INPUT0_SIMPLE +#endif } -inline uint FUNC(get_input1_batch_offset)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z) { +inline uint FUNC(get_input0_index)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z, uint y, uint x) { + return FUNC_CALL(get_input0_index_nt)(OPTIONAL_SHAPE_INFO_TENSOR INPUT0_DIMS_ORDER); +} + +inline uint FUNC(get_input1_index_nt)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z, uint y, uint x) { #if INPUT1_SIMPLE - return GET_DATA_INDEX_6D_SAFE(INPUT1, b, f, w, z, 0, 0); -#else // INPUT1_SIMPLE + return GET_DATA_INDEX_6D_SAFE(INPUT1, b, f, w, z, y, x); +#else # error gemm_tiled_opt.cl : Unsupported input 1 format -#endif // INPUT1_SIMPLE +#endif +} + +inline uint FUNC(get_input1_index)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z, uint y, uint x) { + return FUNC_CALL(get_input1_index_nt)(OPTIONAL_SHAPE_INFO_TENSOR INPUT1_DIMS_ORDER); } #ifdef INPUT2_TYPE @@ -53,13 +62,7 @@ inline uint FUNC(get_input2_batch_offset)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f } #endif // INPUT2_TYPE -inline uint FUNC(get_output_batch_offset)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z) { -#if OUTPUT_SIMPLE - return GET_DATA_INDEX_6D(OUTPUT, b, f, w, z, 0, 0); -#else // OUTPUT_SIMPLE -# error gemm_tiled_opt.cl : Unsupported output format -#endif // OUTPUT_SIMPLE -} +#define VLOAD CAT(vload, SIMD_WIDTH) // Optimized gemm kernel for fp16/fp32 inputs REQD_SUB_GROUP_SIZE(SIMD_WIDTH) @@ -99,48 +102,78 @@ KERNEL(gemm_tiled_opt)( uint y = tile_m_offset; const uint tile_m_iterations = TILE_M_NOT_DIVISIBLE ? (tile_m_num == (tile_m_size - 1) ? TILE_M_LEFTOVER : TILE_M) : TILE_M; - const uint z = batch_number % OUTPUT_SIZE_Z; - batch_number /= OUTPUT_SIZE_Z; - const uint w = batch_number % OUTPUT_SIZE_W; - batch_number /= OUTPUT_SIZE_W; - const uint f = batch_number % OUTPUT_FEATURE_NUM; - batch_number /= OUTPUT_FEATURE_NUM; - const uint b = batch_number % OUTPUT_BATCH_NUM; + const uint z = batch_number % TR_OUTPUT_SIZE_Z; + batch_number /= TR_OUTPUT_SIZE_Z; + const uint w = batch_number % TR_OUTPUT_SIZE_W; + batch_number /= TR_OUTPUT_SIZE_W; + const uint f = batch_number % TR_OUTPUT_FEATURE_NUM; + batch_number /= TR_OUTPUT_FEATURE_NUM; + const uint b = batch_number % TR_OUTPUT_BATCH_NUM; // Batch offsets - const uint batch_offset_input0 = FUNC_CALL(get_input0_batch_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z); - const uint batch_offset_input1 = FUNC_CALL(get_input1_batch_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z); + const uint batch_offset_input0 = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, 0); + const uint batch_offset_input1 = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, 0, tile_n_offset); #ifdef INPUT2_TYPE const uint batch_offset_input2 = FUNC_CALL(get_input2_batch_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z); #endif // INPUT2_TYPE - const uint batch_offset_output = FUNC_CALL(get_output_batch_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z); + uint write_id = 0; + const uint batch_offset_output = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR TR_B, TR_F, TR_W, TR_Z, TR_Y, TR_X); + write_id = 1; + const uint batch_offset_output_diff = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR TR_B, TR_F, TR_W, TR_Z, TR_Y, TR_X) - batch_offset_output; // Start pointers offsets -#if !TRANSPOSE_INPUT0 - const __global INPUT0_TYPE* a_ptr = input0 + batch_offset_input0 + tile_m_offset * K_PADDED_IN0; -#else // !TRANSPOSE_INPUT0 - const __global INPUT0_TYPE* a_ptr = input0 + batch_offset_input0 + tile_m_offset; -#endif // !TRANSPOSE_INPUT0 -#if !TRANSPOSE_INPUT1 - const __global INPUT1_TYPE* b_ptr = input1 + batch_offset_input1 + tile_n_offset; -#else // !TRANSPOSE_INPUT1 - const __global INPUT1_TYPE* b_ptr = input1 + batch_offset_input1 + tile_n_offset * K; -#endif // !TRANSPOSE_INPUT1 +#if TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST + const __global INPUT0_TYPE* a_ptr = input0 + batch_offset_input0; + #if HAS_DYNAMIC_K_PADDING + const uint input0_offset = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y+1), 0) - batch_offset_input0; + const uint input0_offset1 = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, (TILE_K)) - batch_offset_input0; + #else + const uint input0_offset = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, 1, 0); + const uint input0_offset1 = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, 0, (TILE_K)); + #endif +#elif TRANSPOSE_INPUT0 == TRANSPOSE_Y_LAST + const __global INPUT0_TYPE* a_ptr = input0 + batch_offset_input0; + #if HAS_DYNAMIC_K_PADDING + const uint input0_offset = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, 1) - batch_offset_input0; + const uint input0_offset1 = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, (TILE_K)) - batch_offset_input0; + #else + const uint input0_offset = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, 0, 1); + const uint input0_offset1 = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, 0, (TILE_K)); + #endif +#endif // TRANSPOSE_INPUT0 +#if TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST + const __global INPUT1_TYPE* b_ptr = input1 + batch_offset_input1; + #if HAS_DYNAMIC_K_PADDING + const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, 1, tile_n_offset) - batch_offset_input1; + #else + const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, 1, 0); + #endif +#elif TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST + const __global INPUT1_TYPE* b_ptr = input1 + batch_offset_input1; + #if HAS_DYNAMIC_K_PADDING + const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, 0, (tile_n_offset + 1)) - batch_offset_input1; + const uint input1_offset1 = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (TILE_K), tile_n_offset) - batch_offset_input1; + #else + const uint input1_offset = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, 0, 1); + const uint input1_offset1 = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR 0, 0, 0, 0, (TILE_K), 0); + #endif + const uint input1_fetch_size = ((N - tile_n_offset) < TILE_K) ? (N - tile_n_offset) : TILE_K; +#endif // TRANSPOSE_INPUT1 #ifdef INPUT2_TYPE const __global INPUT2_TYPE* c_ptr = input2 + batch_offset_input2 + tile_m_offset * N + tile_n_offset; #endif // INPUT2_TYPE - __global OUTPUT_TYPE* d_ptr = output + batch_offset_output + tile_m_offset * N + tile_n_offset; + __global OUTPUT_TYPE* d_ptr = output + batch_offset_output; const uint b_raw_global_id = tile_n_offset + sglid; -#if TRANSPOSE_INPUT0 +#if TRANSPOSE_INPUT0 != TRANSPOSE_X_LAST MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile; -#endif // TRANSPOSE_INPUT0 -#if !TRANSPOSE_INPUT1 +#endif // TRANSPOSE_INPUT0 != TRANSPOSE_X_LAST +#if TRANSPOSE_INPUT1 != TRANSPOSE_Y_LAST B_FLOATN b_tile[TILE_K]; -#else // !TRANSPOSE_INPUT1 +#else // TRANSPOSE_INPUT1 != TRANSPOSE_Y_LAST MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile; -#endif // !TRANSPOSE_INPUT1 +#endif // TRANSPOSE_INPUT1 != TRANSPOSE_Y_LAST B_FLOATN c_tile[TILE_M]; unroll_for (uint i = 0; i < TILE_M; i++) { @@ -153,76 +186,65 @@ KERNEL(gemm_tiled_opt)( // Loading B tile unroll_for (uint b_load_id = 0; b_load_id < TILE_K; b_load_id++) { #if IS_DYNAMIC +#if TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST #if HAS_DYNAMIC_N_PADDING - // In case of dynamic padding we can't guarantee memory access alignment for - // block reads (4 bytes), so use scattered read b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; #else b_tile[b_load_id] = TILE_N_NOT_DIVISIBLE ? (b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]) : BLOCK_READ_B(b_ptr, 0); #endif + b_ptr += input1_offset; +#elif TRANSPOSE_INPUT1 == TRANSPOSE_OTHER // TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST + if (b_raw_global_id > N - 1) { + b_tile[b_load_id] = 0; + } else { + uint b_idx = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (b_load_id + k * TILE_K), x); + b_tile[b_load_id] = input1[b_idx]; + } +#endif // TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST #else // IS_DYNAMIC +#if TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST #if TILE_N_NOT_DIVISIBLE b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; #else // TILE_N_NOT_DIVISIBLE b_tile[b_load_id] = BLOCK_READ_B(b_ptr, 0); #endif // TILE_N_NOT_DIVISIBLE + b_ptr += input1_offset; +#elif TRANSPOSE_INPUT1 == TRANSPOSE_OTHER // TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST + if (b_raw_global_id > N - 1) { + b_tile[b_load_id] = 0; + } else { + uint b_idx = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (b_load_id + k * TILE_K), x); + b_tile[b_load_id] = input1[b_idx]; + } +#endif // TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST #endif // IS_DYNAMIC -#if !TRANSPOSE_INPUT1 - b_ptr += N_PADDED; -#else // !TRANSPOSE_INPUT1 - b_ptr += K; -#endif // !TRANSPOSE_INPUT1 } // Loading B tile end - -#if TRANSPOSE_INPUT1 - b_ptr -= K * SIMD_WIDTH - SIMD_WIDTH; - - // B tile shuffling for NT, TT cases - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col0 = BLOCK_SHUFFLE(b_tile, 0); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col1 = BLOCK_SHUFFLE(b_tile, 1); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col2 = BLOCK_SHUFFLE(b_tile, 2); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col3 = BLOCK_SHUFFLE(b_tile, 3); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col4 = BLOCK_SHUFFLE(b_tile, 4); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col5 = BLOCK_SHUFFLE(b_tile, 5); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col6 = BLOCK_SHUFFLE(b_tile, 6); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col7 = BLOCK_SHUFFLE(b_tile, 7); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col8 = BLOCK_SHUFFLE(b_tile, 8); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col9 = BLOCK_SHUFFLE(b_tile, 9); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col10 = BLOCK_SHUFFLE(b_tile, 10); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col11 = BLOCK_SHUFFLE(b_tile, 11); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col12 = BLOCK_SHUFFLE(b_tile, 12); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col13 = BLOCK_SHUFFLE(b_tile, 13); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col14 = BLOCK_SHUFFLE(b_tile, 14); - MAKE_VECTOR_TYPE(INPUT1_TYPE, SIMD_WIDTH) b_tile_col15 = BLOCK_SHUFFLE(b_tile, 15); - - b_tile.s0 = b_tile_col0[sglid]; b_tile.s1 = b_tile_col1[sglid]; - b_tile.s2 = b_tile_col2[sglid]; b_tile.s3 = b_tile_col3[sglid]; - b_tile.s4 = b_tile_col4[sglid]; b_tile.s5 = b_tile_col5[sglid]; - b_tile.s6 = b_tile_col6[sglid]; b_tile.s7 = b_tile_col7[sglid]; - b_tile.s8 = b_tile_col8[sglid]; b_tile.s9 = b_tile_col9[sglid]; - b_tile.sa = b_tile_col10[sglid]; b_tile.sb = b_tile_col11[sglid]; - b_tile.sc = b_tile_col12[sglid]; b_tile.sd = b_tile_col13[sglid]; - b_tile.se = b_tile_col14[sglid]; b_tile.sf = b_tile_col15[sglid]; -#endif // TRANSPOSE_INPUT1 +#if TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST + b_ptr = b_ptr + (input1_offset * sglid); + b_tile = (N > b_raw_global_id) ? VLOAD(0, b_ptr) : 0; + b_ptr = b_ptr + input1_offset1 - (input1_offset * sglid); +#endif // TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST // Loading A tile and tile C calculation unroll_for (uint dot_id = 0; dot_id < tile_m_iterations; dot_id++) { -#if !TRANSPOSE_INPUT0 +#if TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST #if IS_DYNAMIC #if HAS_DYNAMIC_K_PADDING // In case of dynamic padding we can't guarantee memory access alignment for // block reads (4 bytes), so use scattered read - A_FLOATN a_read = a_ptr[dot_id * K_PADDED_IN0 + sglid]; + uint a_idx = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (k * TILE_K + sglid)); + A_FLOATN a_read = input0[a_idx]; #else - A_FLOATN a_read = TILE_K_NOT_DIVISIBLE ? a_ptr[dot_id * K_PADDED_IN0 + sglid] : BLOCK_READ_A(a_ptr, dot_id * K); + A_FLOATN a_read = TILE_K_NOT_DIVISIBLE ? a_ptr[sglid] : BLOCK_READ_A(a_ptr, 0); #endif #else // IS_DYNAMIC #if TILE_K_NOT_DIVISIBLE - A_FLOATN a_read = a_ptr[dot_id * K + sglid]; + A_FLOATN a_read = a_ptr[sglid]; #else // TILE_K_NOT_DIVISIBLE - A_FLOATN a_read = BLOCK_READ_A(a_ptr, dot_id * K); + A_FLOATN a_read = BLOCK_READ_A(a_ptr, 0); #endif // TILE_K_NOT_DIVISIBLE #endif // IS_DYNAMIC + a_ptr += input0_offset; unroll_for (uint subtile_k_id = 0; subtile_k_id < TILE_K / SIMD_WIDTH; subtile_k_id++) { unroll_for (uint simd_local_id = 0; simd_local_id < SIMD_WIDTH; simd_local_id++) { @@ -234,42 +256,20 @@ KERNEL(gemm_tiled_opt)( #endif // TILE_K > SIMD_WIDTH } } -#else // !TRANSPOSE_INPUT0 - a_tile[dot_id] = BLOCK_READ_A(a_ptr, dot_id * M); -#endif // !TRANSPOSE_INPUT0 +#elif TRANSPOSE_INPUT0 == TRANSPOSE_OTHER // TRANSPOSE_INPUT0 + uint a_idx = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (k * TILE_K + sglid)); + a_tile[dot_id] = input0[a_idx]; +#endif // TRANSPOSE_INPUT0 } // Loading A tile and tile C calculation end -#if !TRANSPOSE_INPUT0 - a_ptr += TILE_K; -#else // !TRANSPOSE_INPUT0 - a_ptr += TILE_K * M; - - // A tile shuffling for TN, TT cases - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col0 = BLOCK_SHUFFLE(a_tile, 0); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col1 = BLOCK_SHUFFLE(a_tile, 1); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col2 = BLOCK_SHUFFLE(a_tile, 2); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col3 = BLOCK_SHUFFLE(a_tile, 3); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col4 = BLOCK_SHUFFLE(a_tile, 4); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col5 = BLOCK_SHUFFLE(a_tile, 5); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col6 = BLOCK_SHUFFLE(a_tile, 6); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col7 = BLOCK_SHUFFLE(a_tile, 7); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col8 = BLOCK_SHUFFLE(a_tile, 8); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col9 = BLOCK_SHUFFLE(a_tile, 9); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col10 = BLOCK_SHUFFLE(a_tile, 10); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col11 = BLOCK_SHUFFLE(a_tile, 11); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col12 = BLOCK_SHUFFLE(a_tile, 12); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col13 = BLOCK_SHUFFLE(a_tile, 13); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col14 = BLOCK_SHUFFLE(a_tile, 14); - MAKE_VECTOR_TYPE(INPUT0_TYPE, SIMD_WIDTH) a_tile_col15 = BLOCK_SHUFFLE(a_tile, 15); - - a_tile.s0 = a_tile_col0[sglid]; a_tile.s1 = a_tile_col1[sglid]; - a_tile.s2 = a_tile_col2[sglid]; a_tile.s3 = a_tile_col3[sglid]; - a_tile.s4 = a_tile_col4[sglid]; a_tile.s5 = a_tile_col5[sglid]; - a_tile.s6 = a_tile_col6[sglid]; a_tile.s7 = a_tile_col7[sglid]; - a_tile.s8 = a_tile_col8[sglid]; a_tile.s9 = a_tile_col9[sglid]; - a_tile.sa = a_tile_col10[sglid]; a_tile.sb = a_tile_col11[sglid]; - a_tile.sc = a_tile_col12[sglid]; a_tile.sd = a_tile_col13[sglid]; - a_tile.se = a_tile_col14[sglid]; a_tile.sf = a_tile_col15[sglid]; +#if TRANSPOSE_INPUT0 == TRANSPOSE_X_LAST + a_ptr = a_ptr + input0_offset1 - (input0_offset * tile_m_iterations); +#else // TRANSPOSE_INPUT0 + #if TRANSPOSE_INPUT0 == TRANSPOSE_Y_LAST + a_ptr = a_ptr + (input0_offset * sglid); + a_tile = VLOAD(0, a_ptr); + a_ptr = a_ptr + input0_offset1 - (input0_offset * sglid); + #endif // Tile C calculation for TN, TT cases unroll_for (uint dot_id = 0; dot_id < tile_m_iterations; dot_id++) { @@ -285,17 +285,31 @@ KERNEL(gemm_tiled_opt)( if (TILE_K_NOT_DIVISIBLE) { // Loading leftovers of the matrix B unroll_for (uint b_load_id = 0; b_load_id < TILE_K_LEFTOVER; b_load_id++) { -#if HAS_DYNAMIC_N_PADDING - b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; -#else - b_tile[b_load_id] = TILE_N_NOT_DIVISIBLE ? (b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]) : BLOCK_READ_B(b_ptr, 0); -#endif - b_ptr += N_PADDED; + #if TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST + #if HAS_DYNAMIC_N_PADDING + b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; + #else + b_tile[b_load_id] = TILE_N_NOT_DIVISIBLE ? (b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]) : BLOCK_READ_B(b_ptr, 0); + #endif + b_ptr += input1_offset; + #elif TRANSPOSE_INPUT1 == TRANSPOSE_OTHER // TRANSPOSE_INPUT1 == 0 + if (b_raw_global_id > N - 1) { + b_tile[b_load_id] = 0; + } else { + uint b_idx = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (b_load_id + K_FULL_ITERATIONS * TILE_K), x); + b_tile[b_load_id] = input1[b_idx]; + } + #endif } // Loading leftovers of the matrix B end + #if TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST + b_ptr = b_ptr + (input1_offset * sglid); + b_tile = (N > b_raw_global_id) ? VLOAD(0, b_ptr) : 0; + #endif // TRANSPOSE_INPUT1 // Loading leftovers of the matrix A and tile C calculation unroll_for (uint dot_id = 0; dot_id < tile_m_iterations; dot_id++) { - INPUT0_TYPE a_read = a_ptr[dot_id * K_PADDED_IN0 + sglid]; + uint a_idx = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (K_FULL_ITERATIONS * TILE_K + sglid)); + INPUT0_TYPE a_read = input0[a_idx]; unroll_for (uint simd_id = 0; simd_id < TILE_K_LEFTOVER; simd_id++) { c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read, simd_id)), b_tile[simd_id], c_tile[dot_id]); @@ -306,17 +320,31 @@ KERNEL(gemm_tiled_opt)( #if TILE_K_NOT_DIVISIBLE // Loading leftovers of the matrix B unroll_for (uint b_load_id = 0; b_load_id < TILE_K_LEFTOVER; b_load_id++) { -#if TILE_N_NOT_DIVISIBLE - b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; -#else // TILE_N_NOT_DIVISIBLE - b_tile[b_load_id] = BLOCK_READ_B(b_ptr, 0); -#endif // TILE_N_NOT_DIVISIBLE - b_ptr += N; + #if TRANSPOSE_INPUT1 == TRANSPOSE_X_LAST + #if TILE_N_NOT_DIVISIBLE + b_tile[b_load_id] = b_raw_global_id > N - 1 ? 0 : b_ptr[sglid]; + #else // TILE_N_NOT_DIVISIBLE + b_tile[b_load_id] = BLOCK_READ_B(b_ptr, 0); + #endif // TILE_N_NOT_DIVISIBLE + b_ptr += input1_offset; + #elif TRANSPOSE_INPUT1 == TRANSPOSE_OTHER // TRANSPOSE_INPUT1 == 0 + if (b_raw_global_id > N - 1) { + b_tile[b_load_id] = 0; + } else { + uint b_idx = FUNC_CALL(get_input1_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (b_load_id + K_FULL_ITERATIONS * TILE_K), x); + b_tile[b_load_id] = input1[b_idx]; + } + #endif } // Loading leftovers of the matrix B end + #if TRANSPOSE_INPUT1 == TRANSPOSE_Y_LAST + b_ptr = b_ptr + (input1_offset * sglid); + b_tile = (N > b_raw_global_id) ? VLOAD(0, b_ptr) : 0; + #endif // TRANSPOSE_INPUT1 // Loading leftovers of the matrix A and tile C calculation unroll_for (uint dot_id = 0; dot_id < tile_m_iterations; dot_id++) { - INPUT0_TYPE a_read = a_ptr[dot_id * K + sglid]; + uint a_idx = FUNC_CALL(get_input0_index)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, (y + dot_id), (K_FULL_ITERATIONS * TILE_K + sglid)); + INPUT0_TYPE a_read = input0[a_idx]; unroll_for (uint simd_id = 0; simd_id < TILE_K_LEFTOVER; simd_id++) { c_tile[dot_id] = mad((INPUT0_TYPE)(sub_group_broadcast(a_read, simd_id)), b_tile[simd_id], c_tile[dot_id]); @@ -354,9 +382,9 @@ KERNEL(gemm_tiled_opt)( FUSED_OPS_SCALAR; #endif // FUSED_OPS_CAN_USE_PRELOAD OUTPUT_TYPE res = FUSED_OPS_RESULT_SCALAR; - d_ptr[sglid] = res; + *d_ptr = res; #else // HAS_FUSED_OPS - d_ptr[sglid] = dequantized; + *d_ptr = dequantized; #endif // HAS_FUSED_OPS } #else // IS_DYNAMIC @@ -375,9 +403,9 @@ KERNEL(gemm_tiled_opt)( FUSED_OPS_SCALAR; #endif // FUSED_OPS_CAN_USE_PRELOAD OUTPUT_TYPE res = FUSED_OPS_RESULT_SCALAR; - d_ptr[sglid] = res; + *d_ptr = res; #else // HAS_FUSED_OPS - d_ptr[sglid] = dequantized; + *d_ptr = dequantized; #endif // HAS_FUSED_OPS } @@ -404,7 +432,7 @@ KERNEL(gemm_tiled_opt)( #endif // TILE_N_NOT_DIVISIBLE || B_VEC_SIZE == 1 #endif // IS_DYNAMIC - d_ptr += N; + d_ptr += batch_offset_output_diff; #ifdef INPUT2_TYPE c_ptr += N; #endif // INPUT2_TYPE @@ -415,3 +443,4 @@ KERNEL(gemm_tiled_opt)( #undef BLOCK_READ_A #undef BLOCK_READ_B #undef BLOCK_WRITE_C +#undef VLOAD diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp index 35d0c002e8cfd6..2e3333a517112e 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.cpp @@ -7,6 +7,168 @@ #include "kernel_selector_utils.h" namespace kernel_selector { + +size_t GemmKernelBase::GetOuputSize(const std::vector& output_order, const kernel_selector::DataTensor &output, + char target_dim) const { + OPENVINO_ASSERT(target_dim == 'X' || target_dim == 'Y'); + + auto output_dims_order = GetDimsOrder(output_order); + int dim_idx = (target_dim == 'X') ? 10 : 8; + switch (output_dims_order[dim_idx]) { + case 'b': + return output.Batch().v; + case 'f': + return output.Feature().v; + case 'w': + return output.W().v; + case 'z': + return output.Z().v; + case 'y': + return output.Y().v; + case 'x': + return output.X().v; + default: + OPENVINO_THROW("Unsupported dimension: ", output_dims_order[dim_idx]); + } +} + +std::vector GemmKernelBase::ConvTo8dims(const std::vector& order_idx) const { + std::vector dims_order; + if (order_idx.size() == 2) { + dims_order = {0, 1, 2, 3, 4, 5}; + dims_order.push_back(order_idx[0] + 6); + dims_order.push_back(order_idx[1] + 6); + } else if (order_idx.size() == 3) { + dims_order.push_back(0); + dims_order.push_back(order_idx[0] == 0 ? 1 : order_idx[0] + 5); + dims_order.push_back(2); + dims_order.push_back(3); + dims_order.push_back(4); + dims_order.push_back(5); + dims_order.push_back(order_idx[1] == 0 ? 1 : order_idx[1] + 5); + dims_order.push_back(order_idx[2] == 0 ? 1 : order_idx[2] + 5); + } else if (order_idx.size() == 4) { + dims_order.push_back(order_idx[0] < 2 ? order_idx[0] : order_idx[0] + 4); + dims_order.push_back(order_idx[1] < 2 ? order_idx[1] : order_idx[1] + 4); + dims_order.push_back(2); + dims_order.push_back(3); + dims_order.push_back(4); + dims_order.push_back(5); + dims_order.push_back(order_idx[2] < 2 ? order_idx[2] : order_idx[2] + 4); + dims_order.push_back(order_idx[3] < 2 ? order_idx[3] : order_idx[3] + 4); + } else if (order_idx.size() == 5) { + dims_order.push_back(order_idx[0] < 2 ? order_idx[0] : order_idx[0] + 3); + dims_order.push_back(order_idx[1] < 2 ? order_idx[1] : order_idx[1] + 3); + dims_order.push_back(2); + dims_order.push_back(3); + dims_order.push_back(4); + dims_order.push_back(order_idx[2] < 2 ? order_idx[2] : order_idx[2] + 3); + dims_order.push_back(order_idx[3] < 2 ? order_idx[3] : order_idx[3] + 3); + dims_order.push_back(order_idx[4] < 2 ? order_idx[4] : order_idx[4] + 3); + } else if (order_idx.size() == 6) { + dims_order.push_back(order_idx[0] < 2 ? order_idx[0] : order_idx[0] + 2); + dims_order.push_back(order_idx[1] < 2 ? order_idx[1] : order_idx[1] + 2); + dims_order.push_back(2); + dims_order.push_back(3); + dims_order.push_back(order_idx[2] < 2 ? order_idx[2] : order_idx[2] + 2); + dims_order.push_back(order_idx[3] < 2 ? order_idx[3] : order_idx[3] + 2); + dims_order.push_back(order_idx[4] < 2 ? order_idx[4] : order_idx[4] + 2); + dims_order.push_back(order_idx[5] < 2 ? order_idx[5] : order_idx[5] + 2); + } else if (order_idx.size() == 7) { + dims_order.push_back(order_idx[0] < 2 ? order_idx[0] : order_idx[0] + 1); + dims_order.push_back(order_idx[1] < 2 ? order_idx[1] : order_idx[1] + 1); + dims_order.push_back(2); + dims_order.push_back(order_idx[2] < 2 ? order_idx[2] : order_idx[2] + 1); + dims_order.push_back(order_idx[3] < 2 ? order_idx[3] : order_idx[3] + 1); + dims_order.push_back(order_idx[4] < 2 ? order_idx[4] : order_idx[4] + 1); + dims_order.push_back(order_idx[5] < 2 ? order_idx[5] : order_idx[5] + 1); + dims_order.push_back(order_idx[6] < 2 ? order_idx[6] : order_idx[6] + 1); + } else { + dims_order = {0, 1, 2, 3, 4, 5, 6, 7}; + } + return dims_order; +} + +std::vector GemmKernelBase::GetTransposedDims(const std::vector& order_idx, bool is_tiled_opt) const { + auto converted_dims = ConvTo8dims(order_idx); + std::vector dim_ids; + for (auto dim : converted_dims) { + switch (dim) { + case 0: + dim_ids.push_back("b"); + break; + case 1: + dim_ids.push_back("f"); + break; + case 2: + dim_ids.push_back("u"); + break; + case 3: + dim_ids.push_back("v"); + break; + case 4: + dim_ids.push_back("w"); + break; + case 5: + dim_ids.push_back("z"); + break; + case 6: + if (is_tiled_opt) { + dim_ids.push_back("(y+write_id)"); + } else { + dim_ids.push_back("y"); + } + break; + case 7: + dim_ids.push_back("x"); + break; + default: + break; + } + } + return dim_ids; +} + +std::string GemmKernelBase::GetDimsOrder(const std::vector& order_idx) const { + auto get_order_idx = [](std::vector order_idx, int64_t dim_idx) { + int loc = 0; + for (auto idx : order_idx) { + if (idx == dim_idx) + break; + loc += 1; + } + return loc; + }; + + std::string dims_order = ""; + if (order_idx.size() == 2) { + const std::vector dims2 = {"y", "x"}; + dims_order = "b,f,w,z," + + dims2[get_order_idx(order_idx, 0)] + "," + dims2[get_order_idx(order_idx, 1)]; + } else if (order_idx.size() == 3) { + const std::vector dims3 = {"f", "y", "x"}; + dims_order = "b," + dims3[get_order_idx(order_idx, 0)] + ",w,z," + + dims3[get_order_idx(order_idx, 1)] + "," + dims3[get_order_idx(order_idx, 2)]; + } else if (order_idx.size() == 4) { + const std::vector dims4 = {"b", "f", "y", "x"}; + dims_order = dims4[get_order_idx(order_idx, 0)] + "," + dims4[get_order_idx(order_idx, 1)] + ",w,z," + + dims4[get_order_idx(order_idx, 2)] + "," + dims4[get_order_idx(order_idx, 3)]; + } else if (order_idx.size() == 5) { + const std::vector dims5 = {"b", "f", "z", "y", "x"}; + dims_order = dims5[get_order_idx(order_idx, 0)] + "," + dims5[get_order_idx(order_idx, 1)] + ",w," + + dims5[get_order_idx(order_idx, 2)] + "," + dims5[get_order_idx(order_idx, 3)] + "," + + dims5[get_order_idx(order_idx, 4)]; + } else if (order_idx.size() == 6) { + const std::vector dims6 = {"b", "f", "w", "z", "y", "x"}; + dims_order = dims6[get_order_idx(order_idx, 0)] + "," + dims6[get_order_idx(order_idx, 1)] + "," + + dims6[get_order_idx(order_idx, 2)] + "," + dims6[get_order_idx(order_idx, 3)] + "," + + dims6[get_order_idx(order_idx, 4)] + "," + dims6[get_order_idx(order_idx, 5)]; + } else { + dims_order = "b,f,w,z,y,x"; + } + return dims_order; +} + JitConstants GemmKernelBase::GetJitConstants(const gemm_params& params) const { JitConstants jit = MakeBaseParamsJitConstants(params); @@ -18,6 +180,15 @@ JitConstants GemmKernelBase::GetJitConstants(const gemm_params& params) const { MakeJitConstant("QUANTIZATION_TERM", params.quantization != QuantizationType::NONE), }); + jit.AddConstants({ + MakeJitConstant("TRANSPOSE_X_LAST", 0), + MakeJitConstant("TRANSPOSE_Y_LAST", 1), + MakeJitConstant("TRANSPOSE_OTHER", 2), + MakeJitConstant("INPUT0_DIMS_ORDER", GetDimsOrder(params.input0_order)), + MakeJitConstant("INPUT1_DIMS_ORDER", GetDimsOrder(params.input1_order)), + MakeJitConstant("MATMUL_AXIS", static_cast(std::toupper(GetDimsOrder(params.input0_order).at(10)))), + }); + return jit; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.h index 28032fb1ac591e..aa0aef24e3e982 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_base.h @@ -17,8 +17,11 @@ struct gemm_params : public base_params { float alpha; float beta; - bool transpose_input0; - bool transpose_input1; + uint32_t transpose_input0; + uint32_t transpose_input1; + std::vector input0_order; + std::vector input1_order; + std::vector output_order; QuantizationType quantization = QuantizationType::NONE; ParamsKey GetParamsKey() const override { @@ -49,6 +52,12 @@ class GemmKernelBase : public KernelBaseOpenCL { virtual JitConstants GetJitConstants(const gemm_params& params) const; virtual DispatchData SetDefault(const gemm_params& params) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const; + + std::string GetDimsOrder(const std::vector& order_idx) const; + size_t GetOuputSize(const std::vector& output_order, const kernel_selector::DataTensor &output, char target_dim) const; + std::vector ConvTo8dims(const std::vector& order_idx) const; + std::vector GetTransposedDims(const std::vector& order_idx, bool is_tiled_opt = false) const; + // Fused ops virtual JitConstants GetFusedPrimitivesJitConstants(const gemm_params& params, const DispatchData& dispatchData) const; Datatype GetActivationType(const gemm_params& params) const; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_ref.cpp index 36d58b7cebf53a..0149527d658360 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_ref.cpp @@ -3,6 +3,7 @@ // #include "gemm_kernel_ref.h" +#include "kernel_selector_utils.h" namespace kernel_selector { ParamsKey GemmKernelRef::GetSupportedKey() const { @@ -35,6 +36,22 @@ DeviceFeaturesKey GemmKernelRef::get_required_device_features_key(const Params& return DeviceFeaturesKey(); } +GemmKernelBase::DispatchData GemmKernelRef::SetDefault(const gemm_params& params) const { + const auto& output = params.outputs[0]; + + DispatchData dispatchData; + + if (!output.is_dynamic()) { + auto total_batches = output.LogicalSize() / + (GetOuputSize(params.output_order, output, 'X') * GetOuputSize(params.output_order, output, 'Y')); + dispatchData.gws = { GetOuputSize(params.output_order, output, 'X'), GetOuputSize(params.output_order, output, 'Y'), + total_batches }; + dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); + } + + return dispatchData; +} + JitConstants GemmKernelRef::GetJitConstants(const gemm_params& params) const { JitConstants jit = Parent::GetJitConstants(params); @@ -46,6 +63,15 @@ JitConstants GemmKernelRef::GetJitConstants(const gemm_params& params) const { jit.Merge(MakeTypeJitConstants(Datatype::F32, "ACTIVATION")); } + jit.AddConstants({ + MakeJitConstant("TR_B", GetTransposedDims(params.output_order).at(0)), + MakeJitConstant("TR_F", GetTransposedDims(params.output_order).at(1)), + MakeJitConstant("TR_W", GetTransposedDims(params.output_order).at(4)), + MakeJitConstant("TR_Z", GetTransposedDims(params.output_order).at(5)), + MakeJitConstant("TR_Y", GetTransposedDims(params.output_order).at(6)), + MakeJitConstant("TR_X", GetTransposedDims(params.output_order).at(7)), + }); + if (!params.fused_ops.empty()) { auto input_dt = GetActivationType(params); FusedOpsConfiguration conf = { "", {"b", "f", "y", "x"}, "dequantized", input_dt, 1 }; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_ref.h index bdabba80d03c77..23aa8441331bf9 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_ref.h @@ -24,6 +24,7 @@ class GemmKernelRef : public GemmKernelBase { FusedOpType::ELTWISE }; } bool Validate(const Params& params, const optional_params& options) const override; + DispatchData SetDefault(const gemm_params& params) const override; JitConstants GetJitConstants(const gemm_params& params) const override; DeviceFeaturesKey get_required_device_features_key(const Params& params, const optional_params& /*options*/) const override; }; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp index e62eb5c04426c8..577e0fbea79106 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gemm/gemm_kernel_tiled_opt.cpp @@ -46,8 +46,10 @@ GemmKernelBase::DispatchData GemmKernelTiledOpt::SetDefault(const gemm_params& p if (!params.has_dynamic_tensors()) { GemmTuningData td = SetTuningParams(params); - auto total_batches = output.LogicalSize() / (output.X().v * output.Y().v); - std::vector global = { output.X().v, output.Y().v, total_batches }; + auto total_batches = output.LogicalSize() / + (GetOuputSize(params.output_order, output, 'X') * GetOuputSize(params.output_order, output, 'Y')); + std::vector global = { GetOuputSize(params.output_order, output, 'X'), GetOuputSize(params.output_order, output, 'Y'), + total_batches }; dispatchData.gws[0] = Align(global[0], td.tile_n_size) / (td.tile_n_size / td.simd_size); dispatchData.gws[1] = Align(global[1], td.tile_m_size) / td.tile_m_size; @@ -109,6 +111,27 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons GemmTuningData tuning_data = SetTuningParams(params); auto b_vec_size = tuning_data.tile_n_size / tuning_data.simd_size; + auto get_output_size = [this](const std::vector& output_order_idx, const int target_idx) { + auto output_dims_order = Parent::GetDimsOrder(output_order_idx); + + switch (output_dims_order.at(target_idx)) { + case 'b': + return "OUTPUT_BATCH_NUM"; + case 'f': + return "OUTPUT_FEATURE_NUM"; + case 'w': + return "OUTPUT_SIZE_W"; + case 'z': + return "OUTPUT_SIZE_Z"; + case 'y': + return "OUTPUT_SIZE_Y"; + case 'x': + return "OUTPUT_SIZE_X"; + default: + return ""; + } + }; + jit.Merge(MakeTypeJitConstants(params.inputs[0].GetDType(), "ACCUMULATOR")); if (params.has_dynamic_tensors()) { DimensionAccessHelper dims0(params.inputs[0]); @@ -117,13 +140,13 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons DimensionAccessHelper dims1_padded(params.inputs[1], true); // Note: Actually currently this kernel is not being selected if it is shape agnostic impl && transposed inputs // Because we cannot get the original rank - auto m_size = params.transpose_input0 ? dims0.x() : dims0.y(); - auto n_size = params.transpose_input1 ? dims1.y() : dims1.x(); - auto n_padded_size = params.transpose_input1 ? "(" + dims1_padded.y() + ")" - : "(" + dims1_padded.x() + ")"; - auto k_size = params.transpose_input0 ? dims0.y() : dims0.x(); - auto k_padded_size_in0 = params.transpose_input0 ? "(" + dims0_padded.y() + ")" - : "(" + dims0_padded.x() + ")"; + auto input0_dims = ConvTo8dims(params.input0_order); + auto input1_dims = ConvTo8dims(params.input1_order); + auto m_size = dims0.dims_sizes[input0_dims[6]]; + auto n_size = dims1.dims_sizes[input1_dims[7]]; + auto n_padded_size = "(" + dims1_padded.dims_sizes[input1_dims[7]] + ")"; + auto k_size = dims0.dims_sizes[input0_dims[7]]; + auto k_padded_size_in0 = "(" + dims0_padded.dims_sizes[input0_dims[7]] + ")"; const std::string leftover_m = "(" + m_size + "%" + std::to_string(tuning_data.tile_m_size) + ")"; const std::string leftover_n = "(" + n_size + "%" + std::to_string(tuning_data.tile_n_size) + ")"; const std::string leftover_k = "(" + k_size + "%" + std::to_string(tuning_data.tile_k_size) + ")"; @@ -149,6 +172,16 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons MakeJitConstant("TILE_M_LEFTOVER", leftover_m), MakeJitConstant("TILE_K_LEFTOVER", leftover_k), MakeJitConstant("TILE_N_LEFTOVER", leftover_n), + MakeJitConstant("TR_B", GetTransposedDims(params.output_order, true).at(0)), + MakeJitConstant("TR_F", GetTransposedDims(params.output_order, true).at(1)), + MakeJitConstant("TR_W", GetTransposedDims(params.output_order, true).at(4)), + MakeJitConstant("TR_Z", GetTransposedDims(params.output_order, true).at(5)), + MakeJitConstant("TR_Y", GetTransposedDims(params.output_order, true).at(6)), + MakeJitConstant("TR_X", GetTransposedDims(params.output_order, true).at(7)), + MakeJitConstant("TR_OUTPUT_SIZE_Z", get_output_size(params.output_order, 6)), + MakeJitConstant("TR_OUTPUT_SIZE_W", get_output_size(params.output_order, 4)), + MakeJitConstant("TR_OUTPUT_FEATURE_NUM", get_output_size(params.output_order, 2)), + MakeJitConstant("TR_OUTPUT_BATCH_NUM", get_output_size(params.output_order, 0)), }); bool has_dynamic_k_padding = params.transpose_input0 ? params.inputs[0].Y().pad.is_dynamic @@ -160,9 +193,49 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons if (has_dynamic_n_padding) jit.AddConstant(MakeJitConstant("HAS_DYNAMIC_N_PADDING", 1)); } else { - auto m_size = output.Y().v; - auto n_size = output.X().v; - auto k_size = params.transpose_input0 ? params.inputs[0].Y().v : params.inputs[0].X().v; + auto get_untransposed_dim_size = [](const kernel_selector::DataTensor &data_tensor, + const std::vector& dims_order, const std::string dim) { + int64_t target_dim_idx; + const size_t rank = data_tensor.GetDims().size(); + if (dim.compare("Y") == 0) { + target_dim_idx = rank - 2; + } else if (dim.compare("X") == 0) { + target_dim_idx = rank - 1; + } else { + OPENVINO_THROW("Unsupported dimension: ", dim); + } + + size_t loc = (dims_order.size() < rank) ? (rank - dims_order.size()) : 0; + if (dims_order.size() == 0) { + loc = static_cast(target_dim_idx); + } else { + target_dim_idx = (dims_order.size() < rank) ? (target_dim_idx + dims_order.size() - rank) : target_dim_idx; + for (auto dim_idx : dims_order) { + if (dim_idx == target_dim_idx) + break; + loc += 1; + } + } + + if (loc == 0) { + return data_tensor.Batch().v; + } else if (loc == 1) { + return data_tensor.Feature().v; + } else if (loc == (rank - 1) && rank >= 3) { + return data_tensor.X().v; + } else if (loc == (rank - 2) && rank >= 4) { + return data_tensor.Y().v; + } else if (loc == (rank - 3) && rank >= 5) { + return data_tensor.Z().v; + } else if (loc == (rank - 4) && rank >= 6) { + return data_tensor.W().v; + } + OPENVINO_THROW("Target dimension is not found."); + }; + + auto m_size = get_untransposed_dim_size(output, params.output_order, "Y"); + auto n_size = get_untransposed_dim_size(output, params.output_order, "X"); + auto k_size = get_untransposed_dim_size(params.inputs[0], params.input0_order, "X"); auto leftover_m = m_size % tuning_data.tile_m_size; auto leftover_n = n_size % tuning_data.tile_n_size; auto leftover_k = k_size % tuning_data.tile_k_size; @@ -184,6 +257,16 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons MakeJitConstant("TILE_M_LEFTOVER", leftover_m), MakeJitConstant("TILE_K_LEFTOVER", leftover_k), MakeJitConstant("TILE_N_LEFTOVER", leftover_n), + MakeJitConstant("TR_B", GetTransposedDims(params.output_order, true).at(0)), + MakeJitConstant("TR_F", GetTransposedDims(params.output_order, true).at(1)), + MakeJitConstant("TR_W", GetTransposedDims(params.output_order, true).at(4)), + MakeJitConstant("TR_Z", GetTransposedDims(params.output_order, true).at(5)), + MakeJitConstant("TR_Y", GetTransposedDims(params.output_order, true).at(6)), + MakeJitConstant("TR_X", GetTransposedDims(params.output_order, true).at(7)), + MakeJitConstant("TR_OUTPUT_SIZE_Z", get_output_size(params.output_order, 6)), + MakeJitConstant("TR_OUTPUT_SIZE_W", get_output_size(params.output_order, 4)), + MakeJitConstant("TR_OUTPUT_FEATURE_NUM", get_output_size(params.output_order, 2)), + MakeJitConstant("TR_OUTPUT_BATCH_NUM", get_output_size(params.output_order, 0)), }); } @@ -283,7 +366,8 @@ bool GemmKernelTiledOpt::Validate(const Params& params, const optional_params& o gmm_params.inputs[1].X().v % 16 || gmm_params.inputs[1].Y().v % 16; // If gmm_params has dynamic inputs, the correct dimension value cannot be obtained // and leftovers cannot be calculated, so it returns false - if ((gmm_params.transpose_input0 || gmm_params.transpose_input1) && (gemm_leftovers || gmm_params.has_dynamic_inputs())) + if ((gmm_params.transpose_input0 || gmm_params.transpose_input1) && (gemm_leftovers || gmm_params.has_dynamic_inputs()) && + !gmm_params.is_shape_agnostic) return false; for (size_t i = 1; i < gmm_params.inputs.size(); i++) diff --git a/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp b/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp index b3b69fee255287..2cea239ae4d9c8 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp @@ -10,6 +10,7 @@ #include "openvino/op/matmul.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/fake_quantize.hpp" +#include "intel_gpu/op/gemm.hpp" #include "intel_gpu/primitives/gemm.hpp" #include "intel_gpu/primitives/fully_connected.hpp" @@ -17,6 +18,14 @@ #include "intel_gpu/primitives/reorder.hpp" #include "intel_gpu/primitives/permute.hpp" +namespace ov { +namespace op { +namespace internal { +using Gemm = ov::intel_gpu::op::Gemm; +} // namespace internal +} // namespace op +} // namespace ov + namespace ov { namespace intel_gpu { @@ -133,7 +142,52 @@ static void CreateMatMulOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {2}); + auto inputs = p.GetInputInfo(op); + std::string layerName = layer_type_name_ID(op); + + auto alpha = 1.0f; + auto beta = 0.0f; + + auto shape_a = op->get_input_partial_shape(0); + auto shape_b = op->get_input_partial_shape(1); + auto out_shape = op->get_output_partial_shape(0); + + size_t rank_a = shape_a.rank().get_length(); + size_t rank_b = shape_b.rank().get_length(); + size_t output_rank = out_shape.rank().get_length(); + + OPENVINO_ASSERT(rank_a == op->get_input0_order().size(), "[GPU] Length of input0_order is not same as rank of input0"); + OPENVINO_ASSERT(rank_b == op->get_input1_order().size(), "[GPU] Length of input1_order is not same as rank of input1"); + OPENVINO_ASSERT(output_rank == op->get_output_order().size(), "[GPU] Length of output_order is not same as rank of output"); + + auto gemmPrim = cldnn::gemm(layerName, + inputs, + cldnn::element_type_to_data_type(op->get_output_element_type(0)), + op->get_input0_order(), + op->get_input1_order(), + op->get_output_order(), + alpha, + beta); + + p.add_primitive(*op, gemmPrim); + + if (!p.use_new_shape_infer()) { + auto outDims = op->get_output_shape(0); + auto outDimsN = outDims.size(); + // Reshape output if gemm specific shape does not match default one + if (outDimsN < 4) { + auto outputShape = tensor_from_dims(outDims); + auto outReshapeName = layerName + "_cldnn_out_reshape"; + auto outReshapePrim = cldnn::reshape(outReshapeName, cldnn::input_info(layerName), outputShape); + p.add_primitive(*op, outReshapePrim); + } + } +} + REGISTER_FACTORY_IMPL(v0, MatMul); +REGISTER_FACTORY_IMPL(internal, Gemm); } // namespace intel_gpu } // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/gemm.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/gemm.cpp new file mode 100644 index 00000000000000..1bc5261cbdbb22 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/gemm.cpp @@ -0,0 +1,88 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/op/gemm.hpp" +#include "matmul_shape_inference.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/op/matmul.hpp" + +namespace ov { +namespace intel_gpu { +namespace op { + +Gemm::Gemm(const ov::Output& A, + const ov::Output& B, + const std::vector& order_a, + const std::vector& order_b, + const std::vector& order_c, + const ov::element::Type output_type) + : ov::op::v0::MatMul() + , m_order_a(order_a) + , m_order_b(order_b) + , m_order_c(order_c) + , m_output_type(output_type) { + set_arguments({A, B}); + set_transpose_a(false); + set_transpose_b(false); + validate_and_infer_types(); +} + +std::shared_ptr Gemm::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + + return std::make_shared(new_args.at(0), new_args.at(1), m_order_a, m_order_b, m_order_c, m_output_type); +} + +void Gemm::validate_and_infer_types() { + const auto input_size = get_input_size(); + NODE_VALIDATION_CHECK(this, + input_size == 2, + "Number of inputs is incorrect. Current value is: ", + input_size, + ", expected 2."); + + auto out_shapes = shape_infer(this, std::vector{get_input_partial_shape(0), get_input_partial_shape(1)}, m_order_a, m_order_b, m_order_c); + + auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; + set_output_type(0, output_type, out_shapes[0]); +} + +bool Gemm::visit_attributes(ov::AttributeVisitor &visitor) { + visitor.on_attribute("order_a", m_order_a); + visitor.on_attribute("order_b", m_order_b); + visitor.on_attribute("order_c", m_order_c); + visitor.on_attribute("output_type", m_output_type); + return true; +} + +std::vector shape_infer(const Gemm* op, + std::vector input_shapes, + const std::vector& order_a, + const std::vector& order_b, + const std::vector& order_c) { + auto transpose_shape = [](const ov::PartialShape shape, const std::vector& order) { + auto shape_transposed = ov::PartialShape::dynamic(shape.rank()); + for (size_t i = 0; i < order.size(); i++) { + shape_transposed[i] = shape[order[i]]; + } + + return shape_transposed; + }; + auto shape_a = input_shapes[0]; + auto shape_b = input_shapes[1]; + + auto shape_a_t = (order_a.size() > 1) ? transpose_shape(shape_a, order_a) : shape_a; + auto shape_b_t = (order_b.size() > 1) ? transpose_shape(shape_b, order_b) : shape_b; + auto out_shapes = ov::op::v0::shape_infer(dynamic_cast(op), std::vector{shape_a_t, shape_b_t}); + + if (order_c.size() > 0) { + return { transpose_shape(out_shapes[0], order_c) }; + } else { + return { out_shapes[0] }; + } +} + +} // namespace op +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/transpose_matmul_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/transpose_matmul_fusion.cpp new file mode 100644 index 00000000000000..5cdb0b75d6b787 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/transpose_matmul_fusion.cpp @@ -0,0 +1,193 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/op/gemm.hpp" +#include "openvino/core/node_vector.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "transpose_matmul_fusion.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/any.hpp" +#include "transformations/utils/utils.hpp" + +using namespace ov::pass::pattern; +using ov::pass::pattern::op::Or; + +namespace ov { +namespace intel_gpu { + +namespace { +std::vector default_order(size_t rank) { + std::vector order(rank); + std::iota(order.begin(), order.end(), 0); + return order; +} +} // namespace + +class TransposeMatMulMatcher : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("TransposeMatMulMatcher", "0"); + TransposeMatMulMatcher(); +}; + +class TransposeMatMulTransposeMatcher : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("TransposeMatMulTransposeMatcher", "0"); + TransposeMatMulTransposeMatcher(); +}; + +TransposeMatMulFusion::TransposeMatMulFusion() { + add_matcher(); + add_matcher(); +} + + +TransposeMatMulMatcher::TransposeMatMulMatcher() { + auto is_fp_type = [](const ov::Output& output) -> bool { + switch (output.get_element_type()) { + case ov::element::f16: + case ov::element::f32: return true; + default: return false; + } + }; + auto not_transpose = [is_fp_type](const ov::Output& output) -> bool { + return std::dynamic_pointer_cast(output.get_node_shared_ptr()) == nullptr + && is_fp_type(output); + }; + auto input_a_m = any_input(not_transpose); + auto input_b_m = any_input(not_transpose); + auto transpose_a_order_m = wrap_type(consumers_count(1)); + auto transpose_b_order_m = wrap_type(consumers_count(1)); + auto transpose_a_m = wrap_type({input_a_m, transpose_a_order_m}, is_fp_type); + auto transpose_b_m = wrap_type({input_b_m, transpose_b_order_m}, is_fp_type); + + auto matmul_in_a = std::make_shared(OutputVector{input_a_m, transpose_a_m}); + auto matmul_in_b = std::make_shared(OutputVector{input_b_m, transpose_b_m}); + + auto matmul_m = wrap_type({ matmul_in_a, matmul_in_b }); + + ov::matcher_pass_callback callback = [=](Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + + auto matmul = std::dynamic_pointer_cast(pattern_map.at(matmul_m).get_node_shared_ptr()); + if (!matmul || transformation_callback(matmul)) { + return false; + } + + auto users = matmul->get_output_target_inputs(0); + if (users.size() == 1 && dynamic_cast(users.begin()->get_node()) != nullptr) { + return false; + } + + auto order_a = default_order(matmul->get_input_partial_shape(0).size()); + auto order_b = default_order(matmul->get_input_partial_shape(1).size()); + auto order_c = default_order(matmul->get_output_partial_shape(0).size()); + + if (pattern_map.count(transpose_a_m) > 0) { + auto tranpose_a_order = std::dynamic_pointer_cast(pattern_map.at(transpose_a_order_m).get_node_shared_ptr()); + order_a = tranpose_a_order->cast_vector(); + } + if (matmul->get_transpose_a() && order_a.size() > 1) { + std::swap(*(order_a.end() - 1), *(order_a.end() - 2)); + } + if (pattern_map.count(transpose_b_m) > 0) { + auto tranpose_b_order = std::dynamic_pointer_cast(pattern_map.at(transpose_b_order_m).get_node_shared_ptr()); + order_b = tranpose_b_order->cast_vector(); + } + if (matmul->get_transpose_b() && order_b.size() > 1) { + std::swap(*(order_b.end() - 1), *(order_b.end() - 2)); + } + + auto input_a = pattern_map.at(input_a_m).get_node_shared_ptr(); + auto input_b = pattern_map.at(input_b_m).get_node_shared_ptr(); + + auto gemm = std::make_shared(input_a, input_b, order_a, order_b, order_c); + gemm->set_friendly_name(matmul->get_friendly_name()); + ov::copy_runtime_info(m.get_matched_nodes(), gemm); + ov::replace_node(matmul, gemm); + return true; + }; + + auto m = std::make_shared(matmul_m, "TransposeMatMulMatcher"); + this->register_matcher(m, callback); +} + +TransposeMatMulTransposeMatcher::TransposeMatMulTransposeMatcher() { + auto is_fp_type = [](const ov::Output& output) -> bool { + switch (output.get_element_type()) { + case ov::element::f16: + case ov::element::f32: return true; + default: return false; + } + }; + auto not_transpose = [is_fp_type](const ov::Output& output) -> bool { + return std::dynamic_pointer_cast(output.get_node_shared_ptr()) == nullptr + && is_fp_type(output); + }; + auto input_a_m = any_input(not_transpose); + auto input_b_m = any_input(not_transpose); + auto transpose_a_order_m = wrap_type(consumers_count(1)); + auto transpose_b_order_m = wrap_type(consumers_count(1)); + auto transpose_a_m = wrap_type({input_a_m, transpose_a_order_m}, is_fp_type); + auto transpose_b_m = wrap_type({input_b_m, transpose_b_order_m}, is_fp_type); + + auto matmul_in_a = std::make_shared(OutputVector{input_a_m, transpose_a_m}); + auto matmul_in_b = std::make_shared(OutputVector{input_b_m, transpose_b_m}); + + auto matmul_m = wrap_type({ matmul_in_a, matmul_in_b }); + auto transpose_c_order_m = wrap_type(consumers_count(1)); + auto transpose_c_m = wrap_type({matmul_m, transpose_c_order_m}); + + ov::matcher_pass_callback callback = [=](Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + + auto matmul = std::dynamic_pointer_cast(pattern_map.at(matmul_m).get_node_shared_ptr()); + if (!matmul || transformation_callback(matmul)) { + return false; + } + + auto tranpose_c_order = std::dynamic_pointer_cast(pattern_map.at(transpose_c_order_m).get_node_shared_ptr()); + auto order_a = default_order(matmul->get_input_partial_shape(0).size()); + auto order_b = default_order(matmul->get_input_partial_shape(1).size()); + auto order_c = tranpose_c_order->cast_vector(); + + if (pattern_map.count(transpose_a_m) > 0) { + auto tranpose_a_order = std::dynamic_pointer_cast(pattern_map.at(transpose_a_order_m).get_node_shared_ptr()); + order_a = tranpose_a_order->cast_vector(); + } + if (matmul->get_transpose_a() && order_a.size() > 1) { + std::swap(*(order_a.end() - 1), *(order_a.end() - 2)); + } + if (pattern_map.count(transpose_b_m) > 0) { + auto tranpose_b_order = std::dynamic_pointer_cast(pattern_map.at(transpose_b_order_m).get_node_shared_ptr()); + order_b = tranpose_b_order->cast_vector(); + } + if (matmul->get_transpose_b() && order_b.size() > 1) { + std::swap(*(order_b.end() - 1), *(order_b.end() - 2)); + } + + auto input_a = pattern_map.at(input_a_m).get_node_shared_ptr(); + auto input_b = pattern_map.at(input_b_m).get_node_shared_ptr(); + + auto gemm = std::make_shared(input_a, input_b, order_a, order_b, order_c); + gemm->set_friendly_name(m.get_match_root()->get_friendly_name()); + ov::copy_runtime_info(m.get_matched_nodes(), gemm); + ov::replace_node(m.get_match_root(), gemm); + return true; + }; + + auto m = std::make_shared(transpose_c_m, "TransposeMatMulTransposeMatcher"); + this->register_matcher(m, callback); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations/transpose_matmul_fusion.hpp b/src/plugins/intel_gpu/src/plugin/transformations/transpose_matmul_fusion.hpp new file mode 100644 index 00000000000000..38752f0866e079 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/transformations/transpose_matmul_fusion.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_gpu { + +class TransposeMatMulFusion: public ov::pass::GraphRewrite { +public: + OPENVINO_RTTI("TransposeMatMulFusion", "0"); + TransposeMatMulFusion(); +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 0c57b56671349c..1e90482984d169 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -124,6 +124,7 @@ #include "plugin/transformations/kv_cache_fusion.hpp" #include "plugin/transformations/fc_convert_fusion.hpp" #include "plugin/transformations/clamp_fp16_output.hpp" +#include "plugin/transformations/transpose_matmul_fusion.hpp" #include "transformations/low_precision/mark_dequantization_subgraph.hpp" #include "low_precision/pull_reshape_through_dequantization.hpp" @@ -702,6 +703,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + if (!device_info.supports_immad) + manager.register_pass(); // This is supposed to be the last pass to ensure that we don't have name collisions until // GPU plugin stops using friendly names for program creation diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 5ba72f7ac0e99c..906e8276de8c80 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -62,8 +62,6 @@ std::vector disabledTestPatterns() { R"(.*smoke_LPT/InterpolateTransformation.*)", // Issue: 128924 R"(.*OVClassModelTestP/OVClassModelTestP.ImportModelWithNullContextThrows.*)", - // Issue: 129802 - R"(.*smoke_OVClassBasicTestP/OVClassBasicTestP.registerExistingPluginThrows.*)", #if defined(_WIN32) R"(.*KernelCachingSupportCase.*CanCreateCacheDirAndDumpBinariesUnicodePath.*)", #endif diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/primitive_comparison_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/primitive_comparison_test.cpp index 7fafb55beeccb8..0390593b591dfb 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/primitive_comparison_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/primitive_comparison_test.cpp @@ -69,7 +69,7 @@ TEST(primitive_comparison, gemm) { auto gemm_prim_eq = gemm("gemm_eq", {input_info("input0_eq"), input_info("input1_eq")}, data_types::f32); auto gemm_prim_rank = gemm("gemm", def_inputs, data_types::f32, false, false, 1.0f, 0.0f, 2, 2); auto gemm_prim_alpha = gemm("gemm", def_inputs, data_types::f32, false, false, 1.5f); - auto gemm_prim_transpose = gemm("gemm", def_inputs, data_types::f32, true); + auto gemm_prim_transpose = gemm("gemm", def_inputs, data_types::f32, true, false); ASSERT_EQ(gemm_prim, gemm_prim_eq); ASSERT_NE(gemm_prim, gemm_prim_rank); diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/matmul_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/matmul_si_test.cpp index a641305b4329af..ae5fcceeecaeef 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/matmul_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/matmul_si_test.cpp @@ -42,7 +42,9 @@ TEST_P(gemm_test, shape_infer) { auto matrix_a_layout_prim = std::make_shared("matrix_a", p.matrix_a_layout); auto matrix_b_layout_prim = std::make_shared("matrix_b", p.matrix_b_layout); auto gemm_prim = std::make_shared("output", std::vector{ input_info("matrix_a"), input_info("matrix_b") }, - p.data_type, p.transpose_a, p.transpose_b); + p.data_type, p.transpose_a, p.transpose_b, 1.0f, 0.0f, + p.matrix_a_layout.get_partial_shape().rank().get_length(), + p.matrix_b_layout.get_partial_shape().rank().get_length()); cldnn::program prog(engine); @@ -166,7 +168,9 @@ TEST_P(gemm_test_preferred_output_format, shape_infer) { auto matrix_a_layout_prim = std::make_shared("matrix_a", p.matrix_a_layout); auto matrix_b_layout_prim = std::make_shared("matrix_b", p.matrix_b_layout); auto gemm_prim = std::make_shared("output", std::vector{ input_info("matrix_a"), input_info("matrix_b") }, - p.data_type, p.transpose_a, p.transpose_b); + p.data_type, p.transpose_a, p.transpose_b, 1.0f, 0.0f, + p.matrix_a_layout.get_partial_shape().rank().get_length(), + p.matrix_b_layout.get_partial_shape().rank().get_length()); cldnn::program prog(engine, {ov::intel_gpu::allow_new_shape_infer(true)}); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index 4ef078f7df7ee4..96d046b7ba224f 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -9,6 +9,7 @@ #include #include #include "openvino/reference/matmul.hpp" +#include "openvino/reference/transpose.hpp" #include "intel_gpu/runtime/compilation_context.hpp" #include "gemm_inst.h" @@ -690,6 +691,229 @@ class gemm_gpu_tests: public ::testing::Test { } } } + + void test_transpose_matmul(bool is_caching_test) { + tests::random_generator rg; + rg.set_seed(GET_SUITE_NAME); + + const unsigned long BATCH_SIZE = 19; + const unsigned long M_SIZE = 37; + const unsigned long K_SIZE = 23; + const unsigned long N_SIZE = 29; + + auto fill_mem = [&](cldnn::memory_ptr mem, std::vector& data) { + cldnn::mem_lock mem_ptr(mem, get_test_stream()); + auto&& l = mem->get_layout(); + auto data_idx = 0; + for (cldnn::tensor::value_type b = 0; b < l.batch(); ++b) { + for (cldnn::tensor::value_type f = 0; f < l.feature(); ++f) { + for (cldnn::tensor::value_type y = 0; y < l.spatial(1); ++y) { + for (cldnn::tensor::value_type x = 0; x < l.spatial(0); ++x) { + auto tensor_coord = cldnn::tensor{{b, f, x, y}, 0}; + auto buffer_idx = l.get_linear_offset(tensor_coord); + mem_ptr[buffer_idx] = data[data_idx++]; + } + } + } + } + }; + + auto& engine = get_test_engine(); + ov::Shape input0_shape = { BATCH_SIZE, K_SIZE, 1, M_SIZE }; + ov::Shape input1_shape = { N_SIZE, BATCH_SIZE, 1, K_SIZE }; + std::vector input0_order = {0, 2, 3, 1}; + std::vector input1_order = {1, 2, 3, 0}; + auto input0_layout = layout{ov::PartialShape::dynamic(input0_shape.size()), data_types::f32, format::bfyx}; + auto input1_layout = layout{ov::PartialShape::dynamic(input1_shape.size()), data_types::f32, format::bfyx}; + auto input0_mem = engine.allocate_memory(layout{ov::PartialShape(input0_shape), data_types::f32, format::bfyx}); + auto input1_mem = engine.allocate_memory(layout{ov::PartialShape(input1_shape), data_types::f32, format::bfyx}); + + auto input_0_data = rg.generate_random_1d(ov::shape_size(input0_shape), -2, 2); + auto input_1_data = rg.generate_random_1d(ov::shape_size(input1_shape), -2, 2); + + fill_mem(input0_mem, input_0_data); + fill_mem(input1_mem, input_1_data); + + topology topology; + topology.add(input_layout("input0", input0_layout), + input_layout("input1", input1_layout), + gemm("gemm", { input_info("input0"), input_info("input1") }, data_types::f32, input0_order, input1_order) + ); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + network->set_input_data("input0", input0_mem); + network->set_input_data("input1", input1_mem); + + auto inst = network->get_primitive("gemm"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + auto outputs = network->execute(); + + auto output_mem = outputs.at("gemm").get_memory(); + cldnn::mem_lock output_ptr(output_mem, get_test_stream()); + + ov::Shape ref_input0_shape = { BATCH_SIZE, 1, M_SIZE, K_SIZE }; + ov::Shape ref_input1_shape = { BATCH_SIZE, 1, K_SIZE, N_SIZE }; + ov::Shape ref_output_shape = { BATCH_SIZE, 1, M_SIZE, N_SIZE }; + + std::vector ref_out_data; + ref_out_data.resize(ov::shape_size(ref_output_shape)); + + std::vector ref_input_0_data(input_0_data.size()); + std::vector ref_input_1_data(input_1_data.size()); + + ov::reference::transpose((const char *)(input_0_data.data()), + (char *)(ref_input_0_data.data()), + input0_shape, + sizeof(float), + input0_order, + ref_input0_shape); + + ov::reference::transpose((const char *)(input_1_data.data()), + (char *)(ref_input_1_data.data()), + input1_shape, + sizeof(float), + input1_order, + ref_input1_shape); + + ov::reference::matmul(ref_input_0_data.data(), + ref_input_1_data.data(), + ref_out_data.data(), + ref_input0_shape, + ref_input1_shape, + ref_output_shape, + false, + false); + + ASSERT_EQ(output_ptr.size(), ref_out_data.size()); + + const auto abs_error = 0.0001; + for (uint32_t i = 0; i < ref_out_data.size(); ++i) { + ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error); + } + } + + void test_transpose_matmul_transpose(bool is_caching_test) { + tests::random_generator rg; + rg.set_seed(GET_SUITE_NAME); + + const unsigned long BATCH_SIZE = 19; + const unsigned long M_SIZE = 17; + const unsigned long K_SIZE = 22; + const unsigned long N_SIZE = 32; + + auto fill_mem = [&](cldnn::memory_ptr mem, std::vector& data) { + cldnn::mem_lock mem_ptr(mem, get_test_stream()); + auto&& l = mem->get_layout(); + auto data_idx = 0; + for (cldnn::tensor::value_type b = 0; b < l.batch(); ++b) { + for (cldnn::tensor::value_type f = 0; f < l.feature(); ++f) { + for (cldnn::tensor::value_type y = 0; y < l.spatial(1); ++y) { + for (cldnn::tensor::value_type x = 0; x < l.spatial(0); ++x) { + auto tensor_coord = cldnn::tensor{{b, f, x, y}, 0}; + auto buffer_idx = l.get_linear_offset(tensor_coord); + mem_ptr[buffer_idx] = data[data_idx++]; + } + } + } + } + }; + + auto& engine = get_test_engine(); + ov::Shape input0_shape = { M_SIZE, K_SIZE, 1, BATCH_SIZE }; + ov::Shape input1_shape = { N_SIZE, 1, BATCH_SIZE, K_SIZE }; + std::vector input0_order = {3, 2, 0, 1}; + std::vector input1_order = {2, 1, 3, 0}; + std::vector output_order = {1, 0, 3, 2}; + auto input0_layout = layout{ov::PartialShape::dynamic(input0_shape.size()), data_types::f16, format::bfyx}; + auto input1_layout = layout{ov::PartialShape::dynamic(input1_shape.size()), data_types::f16, format::bfyx}; + auto input0_mem = engine.allocate_memory(layout{ov::PartialShape(input0_shape), data_types::f16, format::bfyx}); + auto input1_mem = engine.allocate_memory(layout{ov::PartialShape(input1_shape), data_types::f16, format::bfyx}); + + auto input_0_data = rg.generate_random_1d(ov::shape_size(input0_shape), -2, 2); + auto input_1_data = rg.generate_random_1d(ov::shape_size(input1_shape), -2, 2); + + fill_mem(input0_mem, input_0_data); + fill_mem(input1_mem, input_1_data); + + topology topology; + topology.add(input_layout("input0", input0_layout), + input_layout("input1", input1_layout), + gemm("gemm", { input_info("input0"), input_info("input1") }, data_types::f16, input0_order, input1_order, output_order) + ); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + network->set_input_data("input0", input0_mem); + network->set_input_data("input1", input1_mem); + + auto inst = network->get_primitive("gemm"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + auto outputs = network->execute(); + + auto output_mem = outputs.at("gemm").get_memory(); + cldnn::mem_lock output_ptr(output_mem, get_test_stream()); + + ov::Shape ref_input0_shape = { BATCH_SIZE, 1, M_SIZE, K_SIZE }; + ov::Shape ref_input1_shape = { BATCH_SIZE, 1, K_SIZE, N_SIZE }; + ov::Shape ref_output_shape = { BATCH_SIZE, 1, M_SIZE, N_SIZE }; + ov::Shape transposed_output_shape = { 1, BATCH_SIZE, N_SIZE, M_SIZE }; + + std::vector ref_out_data; + ref_out_data.resize(ov::shape_size(ref_output_shape)); + std::vector transposed_out_data; + transposed_out_data.resize(ov::shape_size(ref_output_shape)); + + std::vector ref_input_0_data(input_0_data.size()); + std::vector ref_input_1_data(input_1_data.size()); + + ov::reference::transpose((const char *)(input_0_data.data()), + (char *)(ref_input_0_data.data()), + input0_shape, + sizeof(ov::float16), + input0_order, + ref_input0_shape); + + ov::reference::transpose((const char *)(input_1_data.data()), + (char *)(ref_input_1_data.data()), + input1_shape, + sizeof(ov::float16), + input1_order, + ref_input1_shape); + + ov::reference::matmul(ref_input_0_data.data(), + ref_input_1_data.data(), + ref_out_data.data(), + ref_input0_shape, + ref_input1_shape, + ref_output_shape, + false, + false); + + ov::reference::transpose((const char *)(ref_out_data.data()), + (char *)(transposed_out_data.data()), + ref_output_shape, + sizeof(ov::float16), + output_order, + transposed_output_shape); + + ASSERT_EQ(output_ptr.size(), transposed_out_data.size()); + + const auto abs_error = 0.0001; + for (uint32_t i = 0; i < transposed_out_data.size(); ++i) { + ASSERT_NEAR(output_ptr[i], transposed_out_data[i], abs_error); + } + } }; TEST_F(gemm_gpu_tests, basic_bfyx_t2_inplace_crop_with_pad) { @@ -712,6 +936,14 @@ TEST_F(gemm_gpu_tests, dynamic_multi_inference_different_shape) { this->test_dynamic_multi_inference_different_shape(false); } +TEST_F(gemm_gpu_tests, transpose_matmul) { + this->test_transpose_matmul(false); +} + +TEST_F(gemm_gpu_tests, transpose_matmul_transpose) { + this->test_transpose_matmul_transpose(false); +} + INSTANTIATE_TEST_SUITE_P( GemmGPUTest_t1t2, GemmGPUTestRandom, @@ -2133,4 +2365,12 @@ TEST_F(gemm_gpu_tests, dynamic_multi_inference_different_shape_cached) { TEST_F(gemm_gpu_tests, basic_bfyx_t2_inplace_crop_with_pad_cached) { this->test_basic_bfyx_t2_inplace_crop_with_pad(true); } + +TEST_F(gemm_gpu_tests, transpose_matmul_cached) { + this->test_transpose_matmul(true); +} + +TEST_F(gemm_gpu_tests, transpose_matmul_transpose_cached) { + this->test_transpose_matmul_transpose(true); +} } // namespace diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp index 2c11e2ec1ea1dd..1e3917e16fc0a4 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp @@ -127,8 +127,8 @@ class check_hash_value: public ::testing::Test { const auto primitive_hash = primitve->hash(); const auto params_hash = prim_inst->get_impl_params()->hash(); - ASSERT_EQ(primitive_hash, 8009877756431655269UL); - ASSERT_EQ(params_hash, 12585836190897043350UL); + ASSERT_EQ(primitive_hash, 6333308204192016515UL); + ASSERT_EQ(params_hash, 5512364123521496254UL); } void test_permute_basic(bool is_caching_test) { diff --git a/src/plugins/intel_gpu/tests/unit/transformations/transpose_matmul_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/transpose_matmul_fusion_test.cpp new file mode 100644 index 00000000000000..61638930c3b63f --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/transformations/transpose_matmul_fusion_test.cpp @@ -0,0 +1,129 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_test_utils.hpp" + +#include "openvino/core/model.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/pass/manager.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/result.hpp" +#include "intel_gpu/op/gemm.hpp" + +#include "plugin/transformations/transpose_matmul_fusion.hpp" + +#include + +using namespace testing; +using namespace ov::intel_gpu; + +namespace ov { +namespace test { +namespace intel_gpu { + +TEST_F(TransformationTestsF, TranposeMatmulFusion1) { + { + auto input_a = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto input_b = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto matmul = std::make_shared(input_a, input_b); + + model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input_a, input_b }); + manager.register_pass(); + } + { + std::vector order_a = {0, 1, 2, 3}; + std::vector order_b = {0, 1, 2, 3}; + std::vector order_c = {0, 1, 2, 3}; + auto input_a = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto input_b = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto gemm = std::make_shared(input_a, input_b, order_a, order_b, order_c, ov::element::undefined); + + model_ref = std::make_shared(ov::NodeVector{ gemm }, ov::ParameterVector{ input_a, input_b }); + comparator.enable(FunctionsComparator::ATTRIBUTES); + } +} + +TEST_F(TransformationTestsF, TranposeMatmulFusion2) { + { + auto input_a = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto tranpose_a_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {0, 2, 1, 3}); + auto tranpose_a = std::make_shared(input_a, tranpose_a_const); + auto input_b = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto matmul = std::make_shared(tranpose_a, input_b); + + model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input_a, input_b }); + manager.register_pass(); + } + { + std::vector order_a = {0, 2, 1, 3}; + std::vector order_b = {0, 1, 2, 3}; + std::vector order_c = {0, 1, 2, 3}; + auto input_a = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto input_b = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto gemm = std::make_shared(input_a, input_b, order_a, order_b, order_c, ov::element::undefined); + + model_ref = std::make_shared(ov::NodeVector{ gemm }, ov::ParameterVector{ input_a, input_b }); + comparator.enable(FunctionsComparator::ATTRIBUTES); + } +} + +TEST_F(TransformationTestsF, TranposeMatmulFusion3) { + { + auto input_a = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto tranpose_a_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {0, 2, 1, 3}); + auto tranpose_a = std::make_shared(input_a, tranpose_a_const); + auto input_b = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto tranpose_b_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {0, 1, 3, 2}); + auto tranpose_b = std::make_shared(input_b, tranpose_b_const); + auto matmul = std::make_shared(tranpose_a, tranpose_b); + + model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input_a, input_b }); + manager.register_pass(); + } + { + std::vector order_a = {0, 2, 1, 3}; + std::vector order_b = {0, 1, 3, 2}; + std::vector order_c = {0, 1, 2, 3}; + auto input_a = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto input_b = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto gemm = std::make_shared(input_a, input_b, order_a, order_b, order_c, ov::element::undefined); + + model_ref = std::make_shared(ov::NodeVector{ gemm }, ov::ParameterVector{ input_a, input_b }); + comparator.enable(FunctionsComparator::ATTRIBUTES); + } +} + +TEST_F(TransformationTestsF, TranposeMatmulFusion4) { + { + auto input_a = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto tranpose_a_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {0, 2, 1, 3}); + auto tranpose_a = std::make_shared(input_a, tranpose_a_const); + auto input_b = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto tranpose_b_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {0, 2, 1, 3}); + auto tranpose_b = std::make_shared(input_b, tranpose_b_const); + auto matmul = std::make_shared(tranpose_a, tranpose_b); + auto tranpose_c_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{4}, {0, 2, 1, 3}); + auto tranpose_c = std::make_shared(matmul, tranpose_c_const); + + model = std::make_shared(ov::NodeVector{ tranpose_c }, ov::ParameterVector{ input_a, input_b }); + manager.register_pass(); + } + { + std::vector order_a = {0, 2, 1, 3}; + std::vector order_b = {0, 2, 1, 3}; + std::vector order_c = {0, 2, 1, 3}; + auto input_a = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto input_b = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(4)); + auto gemm = std::make_shared(input_a, input_b, order_a, order_b, order_c, ov::element::undefined); + + model_ref = std::make_shared(ov::NodeVector{ gemm }, ov::ParameterVector{ input_a, input_b }); + comparator.enable(FunctionsComparator::ATTRIBUTES); + } +} + +} // namespace intel_gpu +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/fuse_convert_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/fuse_convert_transformation.hpp index b1ee4e0156c820..e90a2bad6c2b39 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/fuse_convert_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/fuse_convert_transformation.hpp @@ -17,7 +17,7 @@ namespace LayerTestsDefinitions { typedef std::tuple < element::Type, - PartialShape, + ov::PartialShape, std::string, ov::builder::subgraph::DequantizationOperations, bool> FuseConvertTransformationParams; diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/mvn_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/mvn_transformation.hpp index 8111f6f05375ed..9d57299b61fb90 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/mvn_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/mvn_transformation.hpp @@ -16,7 +16,7 @@ namespace LayerTestsDefinitions { typedef std::tuple < element::Type, - PartialShape, + ov::PartialShape, std::string, AxisSet, bool> MVNTransformationParams; diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/pad.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/pad.hpp index 0a0a02adf0771c..2fa4e951e47f52 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/pad.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/pad.hpp @@ -33,7 +33,7 @@ class PadLayerTest : public testing::WithParamInterface, protected: void SetUp() override; - virtual std::shared_ptr CreatePadOp(const ngraph::Output& data, + virtual std::shared_ptr CreatePadOp(const ov::Output& data, const std::vector& padsBegin, const std::vector& padsEnd, float argPadValue, @@ -47,7 +47,7 @@ class PadLayerTest : public testing::WithParamInterface, class PadLayerTest12 : public PadLayerTest { protected: - std::shared_ptr CreatePadOp(const ngraph::Output& data, + std::shared_ptr CreatePadOp(const ov::Output& data, const std::vector& padsBegin, const std::vector& padsEnd, float argPadValue, diff --git a/src/tests/functional/shared_test_classes/src/single_layer/logical.cpp b/src/tests/functional/shared_test_classes/src/single_layer/logical.cpp index 76c383662e67ce..db3760f067599d 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/logical.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/logical.cpp @@ -76,7 +76,7 @@ void LogicalLayerTest::SetUp() { } logicalNode = ngraph::builder::makeLogical(inputs[0], secondInput, logicalOpType); } else { - logicalNode = ngraph::builder::makeLogical(inputs[0], ngraph::Output(), logicalOpType); + logicalNode = ngraph::builder::makeLogical(inputs[0], ov::Output(), logicalOpType); } function = std::make_shared(logicalNode, inputs, "Logical"); diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/common/builders.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/common/builders.hpp index 6d65a76dc5c42f..319d9752569d12 100644 --- a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/common/builders.hpp +++ b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/common/builders.hpp @@ -64,29 +64,29 @@ std::shared_ptr makeElementwise(const std::shared_ptr data, cons } std::shared_ptr makeDequantization( - const Output& data, + const ov::Output& data, const DequantizationOperations& dequantizationOperations); -std::shared_ptr makeMultiply(const Output& data, const DequantizationOperations::Multiply& multiply); +std::shared_ptr makeMultiply(const ov::Output& data, const DequantizationOperations::Multiply& multiply); -std::shared_ptr makeReshape(const Output& data, const Reshape& reshape); +std::shared_ptr makeReshape(const ov::Output& data, const Reshape& reshape); -std::shared_ptr makeTranspose(const Output& data, const Transpose& reshape); +std::shared_ptr makeTranspose(const ov::Output& data, const Transpose& reshape); std::shared_ptr makeFakeQuantize( - const Output& output, + const ov::Output& output, const ov::element::Type precision, const FakeQuantizeOnData& fqOnData); -std::shared_ptr makeConvolution(const Output& output, const Convolution& convolution); +std::shared_ptr makeConvolution(const ov::Output& output, const Convolution& convolution); std::shared_ptr makeFakeQuantizeTypeRelaxed( - const Output& output, + const ov::Output& output, const ov::element::Type precision, const FakeQuantizeOnData& fqOnData); std::shared_ptr makeFakeQuantize( - const Output& input, + const ov::Output& input, const ov::element::Type constantPrecision, const FakeQuantizeOnDataWithConstant& fqOnData, const bool subgraphOnConstantPath = false); diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/concat.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/concat.hpp index c7863b42708baa..95e21ee1c8d359 100644 --- a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/concat.hpp +++ b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/concat.hpp @@ -292,7 +292,7 @@ class ConcatFunction { const DequantizationOperations& dequantizationAfter); private: - static std::shared_ptr makeMaxPool(const Output& parent, const std::vector& kernel); + static std::shared_ptr makeMaxPool(const ov::Output& parent, const std::vector& kernel); }; } // namespace subgraph diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/precision_propagation.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/precision_propagation.hpp index ce4ca3609b8c5e..2c2d452c3731b2 100644 --- a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/precision_propagation.hpp +++ b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/precision_propagation.hpp @@ -42,7 +42,7 @@ class PrecisionPropagationFunction { const DequantizationOperations& dequantizationOperations2); private: - static std::shared_ptr makeMaxPool(const Output& parent, const std::vector& kernel); + static std::shared_ptr makeMaxPool(const ov::Output& parent, const std::vector& kernel); }; } // namespace subgraph diff --git a/src/tests/ov_helpers/ov_lpt_models/src/common/builders.cpp b/src/tests/ov_helpers/ov_lpt_models/src/common/builders.cpp index f4d5234d7496b3..5ec16a126e1e2d 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/common/builders.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/common/builders.cpp @@ -20,9 +20,9 @@ namespace subgraph { using namespace ov::pass::low_precision; std::shared_ptr makeDequantization( - const Output& data, + const ov::Output& data, const DequantizationOperations& dequantizationOperations) { - Output parent = data; + ov::Output parent = data; if (!dequantizationOperations.convert.empty()) { auto convert = std::make_shared(data, dequantizationOperations.convert.outPrecision); @@ -73,8 +73,8 @@ std::shared_ptr makeDequantization( subtractConst = subtractConstConvert; } - Output leftBranchParent = dequantizationOperations.subtract.constantIndex == 1 ? parent : subtractConst; - Output rightBranchParent = dequantizationOperations.subtract.constantIndex == 1 ? subtractConst : parent; + ov::Output leftBranchParent = dequantizationOperations.subtract.constantIndex == 1 ? parent : subtractConst; + ov::Output rightBranchParent = dequantizationOperations.subtract.constantIndex == 1 ? subtractConst : parent; if (((dequantizationOperations.subtract.outPrecision == element::undefined) || (dequantizationOperations.subtract.outPrecision == parent.get_element_type())) && @@ -125,7 +125,7 @@ std::shared_ptr makeDequantization( return parent.get_node_shared_ptr(); } -std::shared_ptr makeMultiply(const Output& parent, const DequantizationOperations::Multiply& multiply) { +std::shared_ptr makeMultiply(const ov::Output& parent, const DequantizationOperations::Multiply& multiply) { std::vector shape; auto values = multiply.values; if (multiply.constantShapeIsDefined) { @@ -183,18 +183,18 @@ std::shared_ptr makeMultiply(const Output& parent, const Dequantizat return newMultiply; } -std::shared_ptr makeReshape(const Output& data, const Reshape& reshape) { +std::shared_ptr makeReshape(const ov::Output& data, const Reshape& reshape) { auto constant = ov::test::utils::deprecated::make_constant(ov::element::i64, Shape({ reshape.values.size() }), reshape.values); return std::make_shared(data, constant->output(0), reshape.special_zero); } -std::shared_ptr makeTranspose(const Output& data, const Transpose& transpose) { +std::shared_ptr makeTranspose(const ov::Output& data, const Transpose& transpose) { auto constant = ov::test::utils::deprecated::make_constant(ov::element::i64, Shape({ transpose.values.size() }), transpose.values); return std::make_shared(data, constant->output(0)); } std::shared_ptr makeFakeQuantize( - const Output& output, + const ov::Output& output, const ov::element::Type constantType, const FakeQuantizeOnData& fqOnData) { return ov::as_type_ptr(ov::test::utils::make_fake_quantize( @@ -208,7 +208,7 @@ std::shared_ptr makeFakeQuantize( fqOnData.outputHighValues)); } -std::shared_ptr makeConvolution(const Output& output, const Convolution& convolution) { +std::shared_ptr makeConvolution(const ov::Output& output, const Convolution& convolution) { auto parentOnActivations = output; if (!convolution.zeroPointOnActivations.empty()) { auto constant = std::make_shared( @@ -239,7 +239,7 @@ std::shared_ptr makeConvolution(const Output& out } std::shared_ptr makeFakeQuantizeTypeRelaxed( - const Output& output, + const ov::Output& output, const ov::element::Type precision, const FakeQuantizeOnData& fqOnData) { const std::shared_ptr fq = makeFakeQuantize(output, precision, fqOnData); @@ -249,7 +249,7 @@ std::shared_ptr makeFakeQuantizeTypeRelaxed( } std::shared_ptr makeFakeQuantize( - const Output& input, + const ov::Output& input, const ov::element::Type constantPrecision, const FakeQuantizeOnDataWithConstant& fqOnData, const bool subgraphOnConstantPath) { diff --git a/src/tests/ov_helpers/ov_lpt_models/src/concat.cpp b/src/tests/ov_helpers/ov_lpt_models/src/concat.cpp index 951b82e62281f4..0b18d5bb67e650 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/concat.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/concat.cpp @@ -450,7 +450,7 @@ std::shared_ptr ConcatFunction::getOriginalWithSplitedIntermediate( auto& rtInfo = concat->get_rt_info(); rtInfo["Variant::std::string"] = "concat"; - Output lastOutput = intermediateOp->output(1); + ov::Output lastOutput = intermediateOp->output(1); if (addConvolution) { auto weights = ov::opset1::Constant::create( precision, ov::Shape{ static_cast(inputShape[1].get_length() / numSplit), @@ -1511,7 +1511,7 @@ std::shared_ptr ConcatFunction::getReferenceWithSplitedIntermediate( const auto lastDequantization2 = makeDequantization(intermediateOp->output(1), dequantizationOperations2); lastDequantization1->set_friendly_name("output_1"); - Output lastOutput = lastDequantization2; + ov::Output lastOutput = lastDequantization2; if (addConvolution) { auto weights = ov::opset1::Constant::create( precision, @@ -2027,7 +2027,7 @@ std::shared_ptr ConcatFunction::getReferenceWithIntermediateReshape( return function; } -std::shared_ptr ConcatFunction::makeMaxPool(const Output& parent, const std::vector& kernel) { +std::shared_ptr ConcatFunction::makeMaxPool(const ov::Output& parent, const std::vector& kernel) { const std::vector stride = { 1, 1 }; const std::vector padBegin = { 0, 0 }; const std::vector padEnd = { 0, 0 }; diff --git a/src/tests/ov_helpers/ov_lpt_models/src/elementwise.cpp b/src/tests/ov_helpers/ov_lpt_models/src/elementwise.cpp index 32e23946813c0d..29cf2813869e01 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/elementwise.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/elementwise.cpp @@ -17,7 +17,7 @@ namespace subgraph { namespace { std::shared_ptr makeFakeQuantizeWithNames( - const Output& parent, + const ov::Output& parent, const ov::element::Type precision, const ov::builder::subgraph::FakeQuantizeOnData& fqOnData, const std::string name) { diff --git a/src/tests/ov_helpers/ov_lpt_models/src/precision_propagation.cpp b/src/tests/ov_helpers/ov_lpt_models/src/precision_propagation.cpp index 5773d5bb2f8041..7db40bc36126ed 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/precision_propagation.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/precision_propagation.cpp @@ -280,7 +280,7 @@ std::shared_ptr PrecisionPropagationFunction::getReferenceWithNeighbo return function; } -std::shared_ptr PrecisionPropagationFunction::makeMaxPool(const Output& parent, const std::vector& kernel) { +std::shared_ptr PrecisionPropagationFunction::makeMaxPool(const ov::Output& parent, const std::vector& kernel) { const std::vector stride = { 1, 1 }; const std::vector padBegin = { 0, 0 }; const std::vector padEnd = { 0, 0 }; diff --git a/src/tests/ov_helpers/ov_lpt_models/src/transformations_after_split.cpp b/src/tests/ov_helpers/ov_lpt_models/src/transformations_after_split.cpp index dded4b2f891a2b..469125962037df 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/transformations_after_split.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/transformations_after_split.cpp @@ -40,7 +40,7 @@ std::shared_ptr TransformationsAfterSplitFunction::get(const std::str std::shared_ptr TransformationsAfterSplitFunction::getLayerByTransformationName( const std::string transformationName, - const Output parent) { + const ov::Output parent) { if (transformationName == "AddTransformationWithoutConcat") { const auto dequantization = makeDequantization(parent, { {}, {}, { 3.f } }); const auto addConstant = ov::opset1::Constant::create(element::u8, Shape{}, { 128.f }); diff --git a/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp b/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp index 73389379ee58e9..98d08578d66603 100644 --- a/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp +++ b/src/tests/ov_helpers/ov_models/include/ov_models/builders.hpp @@ -311,8 +311,8 @@ std::shared_ptr makeMVN(const ov::Output& in, double eps); OPENVINO_DEPRECATED("This function is deprecated and will be removed soon.") -std::shared_ptr makeMVN6(const Output& in, - const Output& axesNode, +std::shared_ptr makeMVN6(const ov::Output& in, + const ov::Output& axesNode, bool normalizeVariance, float eps, std::string& epsMode); diff --git a/src/tests/ov_helpers/ov_models/src/binary_convolution.cpp b/src/tests/ov_helpers/ov_models/src/binary_convolution.cpp index 70d4c5695d85d7..2cf302b8a79726 100644 --- a/src/tests/ov_helpers/ov_models/src/binary_convolution.cpp +++ b/src/tests/ov_helpers/ov_models/src/binary_convolution.cpp @@ -13,7 +13,7 @@ namespace ngraph { namespace builder { -std::shared_ptr makeBinaryConvolution(const Output& in, +std::shared_ptr makeBinaryConvolution(const ov::Output& in, const std::vector& filterSize, const std::vector& strides, const std::vector& padsBegin, diff --git a/src/tests/ov_helpers/ov_models/src/mvn.cpp b/src/tests/ov_helpers/ov_models/src/mvn.cpp index cbbdb32c4fa95c..a55bbec6dbc9f2 100644 --- a/src/tests/ov_helpers/ov_models/src/mvn.cpp +++ b/src/tests/ov_helpers/ov_models/src/mvn.cpp @@ -32,8 +32,8 @@ std::shared_ptr makeMVN(const ov::Output& in, return mvnNode; } -std::shared_ptr makeMVN6(const Output& in, - const Output& axesNode, +std::shared_ptr makeMVN6(const ov::Output& in, + const ov::Output& axesNode, bool normalizeVariance, float eps, std::string& epsMode) { diff --git a/src/tests/ov_helpers/ov_models/src/utils/ov_helpers.cpp b/src/tests/ov_helpers/ov_models/src/utils/ov_helpers.cpp index 0524b92572cd18..d670b11118057e 100644 --- a/src/tests/ov_helpers/ov_models/src/utils/ov_helpers.cpp +++ b/src/tests/ov_helpers/ov_models/src/utils/ov_helpers.cpp @@ -199,7 +199,7 @@ std::shared_ptr foldFunction(const std::shared_ptr& functi } std::vector paramElementTypes; - std::vector paramShapes; + std::vector paramShapes; std::vector> vecTmpConvertedInputs; vecTmpConvertedInputs.reserve(inputs.size());