Merge branch 'master' into xc/fix_sdpa_fusion_for_f16

openvinotoolkit · Nov 26, 2024 · 0dba6ff · 0dba6ff
2 parents 5e33466 + 147d0af
commit 0dba6ff
Show file tree

Hide file tree

Showing 13 changed files with 489 additions and 47 deletions.
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py
@@ -145,6 +145,7 @@ def __init__(self, options):
             "torch.ops.aten.hardtanh.default": None,
             "torch.ops.aten.hardtanh_.default": None,
             "torch.ops.aten.index.Tensor": None,
+            "torch.ops.aten._unsafe_index.Tensor": None,
             "torch.ops.aten.index_select.default": None,
             "torch.ops.aten.isfinite.default": None,
             "torch.ops.aten.isinf.default": None,

diff --git a/src/frontends/onnx/frontend/src/op/com.microsoft/simplified_layer_normalization.cpp b/src/frontends/onnx/frontend/src/op/com.microsoft/simplified_layer_normalization.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "core/operator_set.hpp"
+#include "exceptions.hpp"
+#include "openvino/frontend/exception.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/range.hpp"
+#include "openvino/op/reduce_mean.hpp"
+#include "openvino/op/shape_of.hpp"
+#include "openvino/op/sqrt.hpp"
+#include "utils/common.hpp"
+
+using namespace ov::op;
+using ::ONNX_NAMESPACE::TensorProto_DataType;
+
+namespace ov {
+namespace frontend {
+namespace onnx {
+namespace com_microsoft {
+namespace opset_1 {
+
+ov::OutputVector simplified_layer_normalization(const ov::frontend::onnx::Node& node) {
+    common::default_op_checks(node, 2);
+
+    const auto inputs = node.get_ov_inputs();
+    auto X = inputs[0];
+    const auto scale = inputs[1];
+
+    CHECK_VALID_NODE(node,
+                     X.get_element_type() == scale.get_element_type(),
+                     "X and scale must be of same type, got :",
+                     X.get_element_type(),
+                     scale.get_element_type());
+
+    float epsilon = node.get_attribute_value<float>("epsilon", 1e-5f);
+    int64_t axis = node.get_attribute_value<int64_t>("axis", -1);
+    int64_t default_stash_type = static_cast<int64_t>(TensorProto_DataType::TensorProto_DataType_FLOAT);
+    int64_t stash_type_i = node.get_attribute_value<int64_t>("stash_type", default_stash_type);
+    element::Type stash_type = common::get_ov_element_type(stash_type_i);
+
+    auto rank = std::make_shared<v0::ShapeOf>(X);
+    auto axes = std::make_shared<v4::Range>(v0::Constant::create(element::i64, {}, {axis}),
+                                            (axis < 0 ? v0::Constant::create(element::i64, {}, {0})->output(0) : rank),
+                                            v0::Constant::create(element::i64, {}, {1}),
+                                            element::i64);
+
+    bool needs_type_casting = stash_type != X.get_element_type();
+    if (needs_type_casting) {
+        X = std::make_shared<v0::Convert>(X, stash_type);
+    }
+
+    auto squared_X = std::make_shared<v1::Multiply>(X, X);                // X^2
+    auto mean = std::make_shared<v1::ReduceMean>(squared_X, axes, true);  // mean = (1/N) * Σ(j=1 to N) X_j^2
+    auto rms_value =
+        std::make_shared<v0::Sqrt>(std::make_shared<v1::Add>(mean, v0::Constant::create(stash_type, {}, {epsilon})));
+    auto inv_std_var = std::make_shared<v1::Divide>(v0::Constant::create(stash_type, {}, {1.0}), rms_value);
+    auto normalized = std::make_shared<v1::Multiply>(X, inv_std_var);  // X / RMS(X)
+
+    auto scaled = std::make_shared<v1::Multiply>(normalized, scale);  // (X / RMS(X)) * scale
+
+    return ov::OutputVector{scaled, inv_std_var};
+}
+
+ONNX_OP("SimplifiedLayerNormalization",
+        OPSET_SINCE(1),
+        com_microsoft::opset_1::simplified_layer_normalization,
+        MICROSOFT_DOMAIN);
+}  // namespace opset_1
+}  // namespace com_microsoft
+}  // namespace onnx
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/onnx/tests/models/com.microsoft/simplified_layer_normalization.prototxt b/src/frontends/onnx/tests/models/com.microsoft/simplified_layer_normalization.prototxt
@@ -0,0 +1,69 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "X"
+    input: "scale"
+    output: "simplified_layer_norm"
+    name: "test_simplified_layer_norm"
+    op_type: "SimplifiedLayerNormalization"
+    attribute {
+      name: "epsilon"
+      f: 1e-05
+      type: FLOAT
+    }
+    attribute {
+      name: "axis"
+      i: -1
+      type: INT
+    }
+    attribute {
+      name: "stash_type"
+      i: 1
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  initializer {
+    dims: 8
+    data_type: 1
+    name: "scale"
+    raw_data: "\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f"
+  }
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "simplified_layer_norm"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/src/frontends/onnx/tests/models/com.microsoft/simplified_layer_normalization_2x2x8.prototxt b/src/frontends/onnx/tests/models/com.microsoft/simplified_layer_normalization_2x2x8.prototxt
@@ -0,0 +1,75 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "X"
+    input: "scale"
+    output: "simplified_layer_norm"
+    name: "test_simplified_layer_norm"
+    op_type: "SimplifiedLayerNormalization"
+    attribute {
+      name: "epsilon"
+      f: 1e-05
+      type: FLOAT
+    }
+    attribute {
+      name: "axis"
+      i: -1
+      type: INT
+    }
+    attribute {
+      name: "stash_type"
+      i: 1
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  initializer {
+    dims: 8
+    data_type: 1
+    name: "scale"
+    raw_data: "\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f"
+  }
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "simplified_layer_norm"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+         dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/src/frontends/onnx/tests/models/com.microsoft/simplified_layer_normalization_3x8.prototxt b/src/frontends/onnx/tests/models/com.microsoft/simplified_layer_normalization_3x8.prototxt
@@ -0,0 +1,69 @@
+ir_version: 3
+producer_name: "OpenVINO ONNX Frontend"
+graph {
+  node {
+    input: "X"
+    input: "scale"
+    output: "simplified_layer_norm"
+    name: "test_simplified_layer_norm"
+    op_type: "SimplifiedLayerNormalization"
+    attribute {
+      name: "epsilon"
+      f: 1e-05
+      type: FLOAT
+    }
+    attribute {
+      name: "axis"
+      i: -1
+      type: INT
+    }
+    attribute {
+      name: "stash_type"
+      i: 1
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  initializer {
+    dims: 8
+    data_type: 1
+    name: "scale"
+    raw_data: "\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f"
+  }
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "simplified_layer_norm"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+         dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp
@@ -1356,3 +1356,42 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_quickgelu) {
         test_case.run();
     }
 }
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_simplified_layer_normalization_2x2x8) {
+    const auto model = convert_model("com.microsoft/simplified_layer_normalization_2x2x8.onnx");
+    auto test_case = ov::test::TestCase(model, s_device);
+
+    const std::vector<float> X{1.f,  2.f,  3.f,  4.f,  5.f,  6.f,  7.f,  8.f,  9.f,  10.f, 11.f,
+                               12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f,
+                               23.f, 24.f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f, 32.f};
+
+    test_case.add_input<float>(Shape{2, 2, 8}, X);
+
+    test_case.add_expected_output<float>(
+        Shape{2, 2, 8},
+        {0.19802947f, 0.39605895f, 0.59408844f, 0.7921179f,  0.9901474f, 1.1881769f, 1.3862064f, 1.5842358f,
+         0.7082005f,  0.78688943f, 0.8655784f,  0.94426733f, 1.0229563f, 1.1016452f, 1.1803342f, 1.2590232f,
+         0.8241365f,  0.8726151f,  0.9210937f,  0.96957237f, 1.0180509f, 1.0665295f, 1.1150082f, 1.1634868f,
+         0.87437177f, 0.90934664f, 0.9443215f,  0.9792964f,  1.0142713f, 1.0492461f, 1.084221f,  1.1191958f});
+
+    test_case.run();
+}
+
+OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_simplified_layer_normalization_3x8) {
+    const auto model = convert_model("com.microsoft/simplified_layer_normalization_3x8.onnx");
+    auto test_case = ov::test::TestCase(model, s_device);
+
+    const std::vector<float> X{0.198f, 0.396f, 0.594f, 0.792f, 0.990f, 1.188f, 1.386f, 1.584f,
+                               0.708f, 0.786f, 0.865f, 0.944f, 1.023f, 1.102f, 1.180f, 1.259f,
+                               0.824f, 0.873f, 0.921f, 0.970f, 1.018f, 1.067f, 1.115f, 1.163f};
+
+    test_case.add_input<float>(Shape{3, 8}, X);
+
+    test_case.add_expected_output<float>(
+        Shape{3, 8},
+        {0.19802852f, 0.39605704f, 0.5940855f,  0.7921141f,  0.9901426f, 1.188171f,  1.3861997f, 1.5842282f,
+         0.70813656f, 0.7861516f,  0.86516684f, 0.94418204f, 1.0231973f, 1.1022125f, 1.1802275f, 1.2592428f,
+         0.82395196f, 0.8729491f,  0.9209463f,  0.96994346f, 1.0179406f, 1.0669378f, 1.114935f,  1.1629322f});
+
+    test_case.run();
+}
diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp
@@ -862,6 +862,7 @@ const std::unordered_map<std::string, CreatorFunction> get_supported_ops_fx() {
         {"aten.hardtanh.default", op::translate_hardtanh},
         {"aten.hardtanh_.default", op::inplace_op<op::translate_hardtanh>},
         {"aten.index.Tensor", op::translate_index_fx},
+        {"aten._unsafe_index.Tensor", op::translate_index_fx},
         {"aten.index_select.default", op::translate_index_select},
         {"aten.isfinite.default", op::translate_1to1_match_1_inputs<opset10::IsFinite>},
         {"aten.isinf.default", op::translate_1to1_match_1_inputs<opset10::IsInf>},

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp
@@ -25,8 +25,9 @@ namespace {
 static const std::map<std::string, std::string> ISOL_PRESETS = {{"COMPUTE",
                                                                  "P:DQMatMulGQu4/compute,P:DQMatMulCWu4/compute,"
                                                                  "P:DQMatMulGQi4/compute,P:DQMatMulCWi4/compute,"
+                                                                 "P:DQMatMulConv/compute,"
                                                                  "P:VocabMatMul/compute,"
-                                                                 "P:RMSNorm/compute"}};
+                                                                 "P:RMSNorm/compute,P:RMSNorm2/compute"}};
 }
 
 // For missing declaration warning