Merge branch 'master' into github_actions/local_cache_impl

mryzhov · Mar 25, 2024 · 7af7c9f · 7af7c9f
2 parents 618b4ab + 5b3216d
commit 7af7c9f
Show file tree

Hide file tree

Showing 60 changed files with 1,513 additions and 259 deletions.
diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml
@@ -172,6 +172,7 @@ jobs:
             -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
             -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \
             -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \
+            -DOV_CPU_AARCH64_USE_MULTI_ISA=OFF \
             -S ${OPENVINO_REPO} \
             -B ${BUILD_DIR}
 

diff --git a/.gitignore b/.gitignore
@@ -61,4 +61,5 @@ __pycache__
 /tools/mo/*.svg
 /src/plugins/intel_cpu/tools/commit_slider/*.json
 /src/plugins/intel_cpu/tools/commit_slider/slider_cache/*
+/src/plugins/intel_cpu/thirdparty/ComputeLibrary/build/*
 .github/GITHUB_OUTPUT
diff --git a/src/bindings/python/src/pyopenvino/graph/dict_attribute_visitor.cpp b/src/bindings/python/src/pyopenvino/graph/dict_attribute_visitor.cpp
@@ -274,6 +274,17 @@ void util::DictAttributeSerializer::on_adapter(const std::string& name, ov::Valu
     if (m_attributes.contains(name)) {
         OPENVINO_THROW("No AttributeVisitor support for accessing attribute named: ", name);
     }
+
+    if (auto _adapter = dynamic_cast<ov::AttributeAdapter<std::shared_ptr<ov::op::util::Variable>>*>(&adapter)) {
+        m_attributes[name.c_str()] = _adapter->get()->get_info().variable_id;
+    } else if (auto _adapter = dynamic_cast<ov::AttributeAdapter<ov::PartialShape>*>(&adapter)) {
+        auto partial_shape = _adapter->get();
+        std::vector<ov::Dimension::value_type> shape;
+        for (const auto& dim : partial_shape) {
+            shape.push_back(dim.is_dynamic() ? -1 : dim.get_length());
+        }
+        m_attributes[name.c_str()] = shape;
+    }
 }
 void util::DictAttributeSerializer::on_adapter(const std::string& name, ov::ValueAccessor<bool>& adapter) {
     m_attributes[name.c_str()] = adapter.get();

diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py
@@ -1183,11 +1183,15 @@ def test_read_value():
     init_value = ov.parameter([2, 2], name="init_value", dtype=np.int32)
 
     node = ov.read_value(init_value, "var_id_667", np.int32, [2, 2])
+    read_value_attributes = node.get_attributes()
 
     assert node.get_type_name() == "ReadValue"
     assert node.get_output_size() == 1
     assert list(node.get_output_shape(0)) == [2, 2]
     assert node.get_output_element_type(0) == Type.i32
+    assert read_value_attributes["variable_type"] == "i32"
+    assert read_value_attributes["variable_id"] == "var_id_667"
+    assert read_value_attributes["variable_shape"] == [2, 2]
 
 
 def test_read_value_dyn_variable_pshape():
@@ -1205,11 +1209,13 @@ def test_assign():
     input_data = ov.parameter([5, 7], name="input_data", dtype=np.int32)
     rv = ov.read_value(input_data, "var_id_667", np.int32, [5, 7])
     node = ov.assign(rv, "var_id_667")
+    assign_attributes = node.get_attributes()
 
     assert node.get_type_name() == "Assign"
     assert node.get_output_size() == 1
     assert list(node.get_output_shape(0)) == [5, 7]
     assert node.get_output_element_type(0) == Type.i32
+    assert assign_attributes["variable_id"] == "var_id_667"
 
 
 def test_extract_image_patches():
@@ -2353,3 +2359,10 @@ def test_topk_opset11():
     assert node.get_output_size() == 2
     assert list(node.get_output_shape(0)) == [1, 3, 3]
     assert list(node.get_output_shape(1)) == [1, 3, 3]
+
+
+def test_parameter_get_attributes():
+    parameter = ov.parameter([2, 2], dtype=np.float32, name="InputData")
+    parameter_attributes = parameter.get_attributes()
+    assert parameter_attributes["element_type"] == "f32"
+    assert parameter_attributes["shape"] == [2, 2]
diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -69,7 +69,6 @@
 #include "transformations/op_conversions/convert_bitwise_to_logical_bool.hpp"
 #include "transformations/op_conversions/convert_broadcast_to_tiles.hpp"
 #include "transformations/op_conversions/convert_convertlike.hpp"
-#include "transformations/op_conversions/convert_convertpromotetypes.hpp"
 #include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp"
 #include "transformations/op_conversions/convert_depth_to_space.hpp"
 #include "transformations/op_conversions/convert_divide.hpp"
@@ -174,7 +173,6 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr<ov::Model
     ADD_MATCHER(decomp, ConvertDivide)
     ADD_MATCHER(decomp, ConvertDepthToSpace)
     ADD_MATCHER(decomp, ConvertSpaceToDepth)
-    ADD_MATCHER(decomp, ConvertConvertPromoteTypes)
     ADD_MATCHER(decomp, ConvertConvertLike)
     ADD_MATCHER(decomp, BatchNormDecomposition)
     ADD_MATCHER(decomp, GroupNormalizationDecomposition)

diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -81,6 +81,7 @@
 #include "transformations/init_node_info.hpp"
 #include "transformations/low_precision/mark_dequantization_subgraph.hpp"
 #include "transformations/op_conversions/batch_norm_decomposition.hpp"
+#include "transformations/op_conversions/convert_convertpromotetypes.hpp"
 #include "transformations/op_conversions/convert_divide.hpp"
 #include "transformations/op_conversions/convert_negative.hpp"
 #include "transformations/op_conversions/convert_scatter_elements_to_scatter.hpp"
@@ -238,7 +239,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>
     ADD_MATCHER(decomp, ConvertDivideWithConstant)
     ADD_MATCHER(decomp, ConvertSubtractWithConstant)
     ADD_MATCHER(decomp, ConvertNegative)
-
+    ADD_MATCHER(decomp, ConvertConvertPromoteTypes)
     manager.register_pass<ov::pass::LinOpSequenceFusion>();
 
     auto multiply_fusions = manager.register_pass<ov::pass::GraphRewrite>();

diff --git a/src/frontends/onnx/tests/tests_python/utils/model_importer.py b/src/frontends/onnx/tests/tests_python/utils/model_importer.py
@@ -141,14 +141,15 @@ def _execute_pb_data(
             executed_tests = executed_tests + 1
         return executed_tests
 
+
     def _add_model_import_test(self, model_test: ExtOnnxTestCase) -> None:
         # model is loaded at runtime, note sometimes it could even
         # never loaded if the test skipped
         model_marker = [None]  # type: List[Optional[Union[ModelProto, NodeProto]]]
 
         def run_import(test_self: Any, device: Text) -> None:
             model = ModelImportRunner._load_onnx_model(model_test.model_dir, model_test.model)
-            model_marker[0] = model
+            model_marker[0] = model_test.model_dir / model_test.model
             assert import_onnx_model(model)
 
         self._add_test("ModelImport", model_test.name, run_import, model_marker)
@@ -160,7 +161,7 @@ def _add_model_execution_test(self, model_test: ExtOnnxTestCase) -> None:
 
         def run_execution(test_self: Any, device: Text) -> None:
             model = ModelImportRunner._load_onnx_model(model_test.model_dir, model_test.model)
-            model_marker[0] = model
+            model_marker[0] = model_test.model_dir / model_test.model
             prepared_model = self.backend.prepare(model, device)
             assert prepared_model is not None
             executed_tests = ModelImportRunner._execute_npz_data(

diff --git a/src/frontends/pytorch/src/op/bucketize.cpp b/src/frontends/pytorch/src/op/bucketize.cpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/op/bucketize.hpp"
+
+#include "openvino/frontend/pytorch/node_context.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/concat.hpp"
+#include "openvino/op/convert_like.hpp"
+#include "openvino/op/logical_or.hpp"
+#include "openvino/op/multiply.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace op {
+
+using namespace ov::op;
+
+OutputVector translate_bucketize(const NodeContext& context) {
+    num_inputs_check(context, 2, 5);
+    auto input = context.get_input(0);
+    auto boundaries = context.get_input(1);
+
+    element::Type output_type = ov::element::i64;
+    if (!context.input_is_none(2) && context.const_input<bool>(2)) {
+        output_type = ov::element::i32;
+    }
+
+    bool with_right_bound = true;
+    if (!context.input_is_none(3)) {
+        with_right_bound = !context.const_input<bool>(3);
+    }
+
+    auto bucketize =
+        context.mark_node(std::make_shared<v3::Bucketize>(input, boundaries, output_type, with_right_bound));
+
+    if (!context.input_is_none(4)) {
+        context.mutate_input(4, bucketize);
+    }
+
+    return {bucketize};
+};
+
+}  // namespace op
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp
@@ -47,6 +47,7 @@ OP_CONVERTER(translate_bitwise_and);
 OP_CONVERTER(translate_bitwise_not);
 OP_CONVERTER(translate_bitwise_or);
 OP_CONVERTER(translate_bitwise_xor);
+OP_CONVERTER(translate_bucketize);
 OP_CONVERTER(translate_cat);
 OP_CONVERTER(translate_cdist);
 OP_CONVERTER(translate_celu);
@@ -374,6 +375,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::Bool", op::translate_bool},
         // aten::broadcast_tensors - Supported in limited set of patterns
         {"aten::broadcast_to", op::translate_expand},
+        {"aten::bucketize", op::translate_bucketize},
         {"aten::cat", op::translate_cat},
         {"aten::cdist", op::translate_cdist},
         {"aten::ceil", op::optional_out<op::translate_1to1_match_1_inputs<opset10::Ceiling>, 1>},
@@ -522,6 +524,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops_ts() {
         {"aten::masked_scatter_", op::inplace_op<op::translate_masked_scatter>},
         {"aten::matmul", op::translate_1to1_match_2_inputs<opset10::MatMul>},
         {"aten::max", op::translate_max},
+        {"aten::mv", op::translate_1to1_match_2_inputs<opset10::MatMul>},
         {"aten::maximum", op::translate_maximum},
         {"aten::max_pool1d", op::quantizable_op<op::translate_max_poolnd>},
         {"aten::max_pool1d_with_indices", op::quantizable_op<op::translate_max_poolnd>},

diff --git a/src/frontends/tensorflow/docs/supported_ops.md b/src/frontends/tensorflow/docs/supported_ops.md
@@ -26,7 +26,7 @@ A "supported operation" is one that TensorFlow Frontend can convert to the OpenV
 | All                                                     | YES                           |                               |
 | AllCandidateSampler                                     | NO                            |                               |
 | AllToAll                                                | NO                            |                               |
-| Angle                                                   | NO                            |                               |
+| Angle                                                   | YES                           |                               |
 | AnonymousHashTable                                      | NO                            |                               |
 | AnonymousIterator                                       | NO                            |                               |
 | AnonymousIteratorV2                                     | NO                            |                               |

diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp
@@ -205,6 +205,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         // Separate translators:
         {"AddN", CreatorFunction(translate_add_n_op)},
         {"AdjustContrastv2", CreatorFunction(translate_adjust_contrast_op)},
+        {"Angle", CreatorFunction(translate_angle_op)},
         {"ArgMax", CreatorFunction(translate_arg_max_op)},
         {"ArgMin", CreatorFunction(translate_arg_min_op)},
         {"Assert", CreatorFunction(translate_no_op)},

diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp
@@ -35,6 +35,7 @@ OP_CONVERTER(translate_addv2_op);
 OP_CONVERTER(translate_add_n_op);
 OP_CONVERTER(translate_approximate_equal_op);
 OP_CONVERTER(translate_adjust_contrast_op);
+OP_CONVERTER(translate_angle_op);
 OP_CONVERTER(translate_arg_max_op);
 OP_CONVERTER(translate_arg_min_op);
 OP_CONVERTER(translate_atan2_op);

diff --git a/src/frontends/tensorflow_common/src/op/angle.cpp b/src/frontends/tensorflow_common/src/op/angle.cpp
@@ -0,0 +1,90 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "common_op_table.hpp"
+#include "helper_ops/complex_type_mark.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/atan.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/equal.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/greater.hpp"
+#include "openvino/op/greater_eq.hpp"
+#include "openvino/op/less.hpp"
+#include "openvino/op/logical_and.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/select.hpp"
+#include "openvino/op/subtract.hpp"
+
+using namespace std;
+using namespace ov::op;
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace op {
+
+OutputVector translate_angle_op(const NodeContext& node) {
+    default_op_checks(node, 1, {"Angle"}, true);
+    auto complex = node.get_input(0);
+    auto result_type = node.get_attribute<ov::element::Type>("Tout");
+
+    auto complex_type_mark = as_type_ptr<ComplexTypeMark>(complex.get_node_shared_ptr());
+
+    TENSORFLOW_OP_VALIDATION(
+        node,
+        complex_type_mark,
+        "[TensorFlow Frontend] inconsistent model: Angle operation expects complex type tensor on input");
+
+    complex = complex_type_mark->input_value(0);
+    auto real_index = make_shared<v0::Constant>(element::i32, Shape{}, 0);
+    auto imag_index = make_shared<v0::Constant>(element::i32, Shape{}, 1);
+    auto gather_axis = make_shared<v0::Constant>(element::i32, Shape{1}, -1);
+
+    auto x = make_shared<v8::Gather>(complex, real_index, gather_axis)->output(0);
+    auto y = make_shared<v8::Gather>(complex, imag_index, gather_axis)->output(0);
+
+    // handle the first condition : x>0
+    auto div_y_x = make_shared<v1::Divide>(y, x);
+    auto atan = make_shared<v0::Atan>(div_y_x);
+    auto const_zero = create_same_type_const_scalar<int32_t>(x, 0);
+    auto result = atan->output(0);
+
+    // handle the second condition : x<0 && y>=0
+    auto const_pi = create_same_type_const_scalar<double>(x, std::atan(1.0) * 4);
+    auto is_x_negative = make_shared<v1::Less>(x, const_zero);
+    auto y_non_negative = make_shared<v1::GreaterEqual>(y, const_zero);
+    auto cond1 = make_shared<v1::LogicalAnd>(is_x_negative, y_non_negative);
+    auto atan_y_x_plus_pi = make_shared<v1::Add>(atan, const_pi);
+    result = make_shared<v1::Select>(cond1, atan_y_x_plus_pi, result);
+
+    // handle the third condition : x<0 && y<0
+    auto is_y_negative = make_shared<v1::Less>(y, const_zero);
+    auto cond2 = make_shared<v1::LogicalAnd>(is_x_negative, is_y_negative);
+    auto atan_y_x_minus_pi = make_shared<v1::Subtract>(atan, const_pi);
+    result = make_shared<v1::Select>(cond2, atan_y_x_minus_pi, result);
+
+    // handle the fourth condition : x=0 && y>0
+    auto is_x_zero = make_shared<v1::Equal>(x, const_zero);
+    auto is_y_positive = make_shared<v1::Greater>(y, const_zero);
+    auto cond3 = make_shared<v1::LogicalAnd>(is_x_zero, is_y_positive);
+    auto const_two = create_same_type_const_scalar<int32_t>(x, 2);
+    auto pi_div_two = make_shared<v1::Divide>(const_pi, const_two);
+    result = make_shared<v1::Select>(cond3, pi_div_two, result);
+
+    // handle the fifth condition : x=0 && y<0
+    auto cond4 = make_shared<v1::LogicalAnd>(is_x_zero, is_y_negative);
+    auto const_minus_two = create_same_type_const_scalar<int32_t>(x, -2);
+    auto pi_div_minus_two = make_shared<v1::Divide>(const_pi, const_minus_two);
+    result = make_shared<v1::Select>(cond4, pi_div_two, result);
+    auto result_changed_type = make_shared<v0::Convert>(result, result_type)->output(0);
+
+    set_node_name(node.get_name(), result_changed_type.get_node_shared_ptr());
+    return {result_changed_type};
+}
+}  // namespace op
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt
@@ -30,6 +30,16 @@ elseif(OV_COMPILER_IS_CLANG)
     endif()
 endif()
 
+if (AARCH64 AND NOT APPLE AND CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2)
+    # according to https://github.com/ARM-software/ComputeLibrary/issues/1053#issuecomment-1846903707 comment
+    # the 'multi_isa=1' below enables FP32, FP16 and SVE / SVE2 kernels
+    # But: arm_sve.h header is not available on gcc older 10.2 (let's test it), so we have to check it
+    set(OV_CPU_AARCH64_USE_MULTI_ISA_DEFAULT ON)
+else()
+    set(OV_CPU_AARCH64_USE_MULTI_ISA_DEFAULT OFF)
+endif()
+set(OV_CPU_AARCH64_USE_MULTI_ISA ${OV_CPU_AARCH64_USE_MULTI_ISA_DEFAULT} CACHE BOOL "Build multi-ISA ACL")
+
 set(OV_CPU_ARM_TARGET_GENERIC_ARCHS armv8a
                                     armv8.2-a
                                     armv8.6-a armv8.6-a-sve armv8.6-a-sve2 armv8.6-a-sve2-sme2
@@ -41,17 +51,25 @@ if(ARM)
                                 # requires estate=32
                                 ${OV_CPU_ARM_TARGET_GENERIC_ARCHS})
 elseif(AARCH64)
-    set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a)
+    if(APPLE)
+        set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a)
+    else()
+        if(OV_CPU_AARCH64_USE_MULTI_ISA)
+            # set v8a even we want fp16 kernels, because
+            # we use multi_isa=1 in ACLConfig.cmake to enable both fp16 and fp32 kernels
+            # actual kernel is selected in runtime based on runtime capabilities
+            set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8a)
+        else()
+            set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a)
+        endif()
+    endif()
     set(OV_CPU_ARM_TARGET_ARCHS arm64-v8a
                                 arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2
                                 # used with estate=64
                                 ${OV_CPU_ARM_TARGET_GENERIC_ARCHS})
 endif()
 set(OV_CPU_ARM_TARGET_ARCH ${OV_CPU_ARM_TARGET_ARCH_DEFAULT} CACHE STRING "Architecture for ARM ComputeLibrary")
 set_property(CACHE OV_CPU_ARM_TARGET_ARCH PROPERTY STRINGS ${OV_CPU_ARM_TARGET_ARCHS})
-if(OV_CPU_ARM_TARGET_ARCH MATCHES "(armv|arm64-v)[8-9]\\.")
-    add_definitions(-DOV_CPU_ARM_ENABLE_FP16)
-endif()
 
 if(X86 OR X86_64 OR AARCH64)
     # disable mlas with webassembly