Skip to content

Commit

Permalink
Merge branch 'master' into xc/fix_sdpa_fusion_for_f16
Browse files Browse the repository at this point in the history
  • Loading branch information
xczhai authored Nov 26, 2024
2 parents 5e33466 + 147d0af commit 0dba6ff
Show file tree
Hide file tree
Showing 13 changed files with 489 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def __init__(self, options):
"torch.ops.aten.hardtanh.default": None,
"torch.ops.aten.hardtanh_.default": None,
"torch.ops.aten.index.Tensor": None,
"torch.ops.aten._unsafe_index.Tensor": None,
"torch.ops.aten.index_select.default": None,
"torch.ops.aten.isfinite.default": None,
"torch.ops.aten.isinf.default": None,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "core/operator_set.hpp"
#include "exceptions.hpp"
#include "openvino/frontend/exception.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/divide.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/range.hpp"
#include "openvino/op/reduce_mean.hpp"
#include "openvino/op/shape_of.hpp"
#include "openvino/op/sqrt.hpp"
#include "utils/common.hpp"

using namespace ov::op;
using ::ONNX_NAMESPACE::TensorProto_DataType;

namespace ov {
namespace frontend {
namespace onnx {
namespace com_microsoft {
namespace opset_1 {

ov::OutputVector simplified_layer_normalization(const ov::frontend::onnx::Node& node) {
common::default_op_checks(node, 2);

const auto inputs = node.get_ov_inputs();
auto X = inputs[0];
const auto scale = inputs[1];

CHECK_VALID_NODE(node,
X.get_element_type() == scale.get_element_type(),
"X and scale must be of same type, got :",
X.get_element_type(),
scale.get_element_type());

float epsilon = node.get_attribute_value<float>("epsilon", 1e-5f);
int64_t axis = node.get_attribute_value<int64_t>("axis", -1);
int64_t default_stash_type = static_cast<int64_t>(TensorProto_DataType::TensorProto_DataType_FLOAT);
int64_t stash_type_i = node.get_attribute_value<int64_t>("stash_type", default_stash_type);
element::Type stash_type = common::get_ov_element_type(stash_type_i);

auto rank = std::make_shared<v0::ShapeOf>(X);
auto axes = std::make_shared<v4::Range>(v0::Constant::create(element::i64, {}, {axis}),
(axis < 0 ? v0::Constant::create(element::i64, {}, {0})->output(0) : rank),
v0::Constant::create(element::i64, {}, {1}),
element::i64);

bool needs_type_casting = stash_type != X.get_element_type();
if (needs_type_casting) {
X = std::make_shared<v0::Convert>(X, stash_type);
}

auto squared_X = std::make_shared<v1::Multiply>(X, X); // X^2
auto mean = std::make_shared<v1::ReduceMean>(squared_X, axes, true); // mean = (1/N) * Σ(j=1 to N) X_j^2
auto rms_value =
std::make_shared<v0::Sqrt>(std::make_shared<v1::Add>(mean, v0::Constant::create(stash_type, {}, {epsilon})));
auto inv_std_var = std::make_shared<v1::Divide>(v0::Constant::create(stash_type, {}, {1.0}), rms_value);
auto normalized = std::make_shared<v1::Multiply>(X, inv_std_var); // X / RMS(X)

auto scaled = std::make_shared<v1::Multiply>(normalized, scale); // (X / RMS(X)) * scale

return ov::OutputVector{scaled, inv_std_var};
}

ONNX_OP("SimplifiedLayerNormalization",
OPSET_SINCE(1),
com_microsoft::opset_1::simplified_layer_normalization,
MICROSOFT_DOMAIN);
} // namespace opset_1
} // namespace com_microsoft
} // namespace onnx
} // namespace frontend
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
ir_version: 3
producer_name: "OpenVINO ONNX Frontend"
graph {
node {
input: "X"
input: "scale"
output: "simplified_layer_norm"
name: "test_simplified_layer_norm"
op_type: "SimplifiedLayerNormalization"
attribute {
name: "epsilon"
f: 1e-05
type: FLOAT
}
attribute {
name: "axis"
i: -1
type: INT
}
attribute {
name: "stash_type"
i: 1
type: INT
}
domain: "com.microsoft"
}
initializer {
dims: 8
data_type: 1
name: "scale"
raw_data: "\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f"
}
input {
name: "X"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 2
}
dim {
dim_value: 8
}
}
}
}
}
output {
name: "simplified_layer_norm"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 2
}
dim {
dim_value: 8
}
}
}
}
}
}
opset_import {
domain: "com.microsoft"
version: 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
ir_version: 3
producer_name: "OpenVINO ONNX Frontend"
graph {
node {
input: "X"
input: "scale"
output: "simplified_layer_norm"
name: "test_simplified_layer_norm"
op_type: "SimplifiedLayerNormalization"
attribute {
name: "epsilon"
f: 1e-05
type: FLOAT
}
attribute {
name: "axis"
i: -1
type: INT
}
attribute {
name: "stash_type"
i: 1
type: INT
}
domain: "com.microsoft"
}
initializer {
dims: 8
data_type: 1
name: "scale"
raw_data: "\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f"
}
input {
name: "X"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 2
}
dim {
dim_value: 2
}
dim {
dim_value: 8
}
}
}
}
}
output {
name: "simplified_layer_norm"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 2
}
dim {
dim_value: 2
}
dim {
dim_value: 8
}
}
}
}
}
}
opset_import {
domain: "com.microsoft"
version: 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
ir_version: 3
producer_name: "OpenVINO ONNX Frontend"
graph {
node {
input: "X"
input: "scale"
output: "simplified_layer_norm"
name: "test_simplified_layer_norm"
op_type: "SimplifiedLayerNormalization"
attribute {
name: "epsilon"
f: 1e-05
type: FLOAT
}
attribute {
name: "axis"
i: -1
type: INT
}
attribute {
name: "stash_type"
i: 1
type: INT
}
domain: "com.microsoft"
}
initializer {
dims: 8
data_type: 1
name: "scale"
raw_data: "\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f"
}
input {
name: "X"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 3
}
dim {
dim_value: 8
}
}
}
}
}
output {
name: "simplified_layer_norm"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 3
}
dim {
dim_value: 8
}
}
}
}
}
}
opset_import {
domain: "com.microsoft"
version: 1
}
39 changes: 39 additions & 0 deletions src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1356,3 +1356,42 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_quickgelu) {
test_case.run();
}
}

OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_simplified_layer_normalization_2x2x8) {
const auto model = convert_model("com.microsoft/simplified_layer_normalization_2x2x8.onnx");
auto test_case = ov::test::TestCase(model, s_device);

const std::vector<float> X{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f,
12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f,
23.f, 24.f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f, 32.f};

test_case.add_input<float>(Shape{2, 2, 8}, X);

test_case.add_expected_output<float>(
Shape{2, 2, 8},
{0.19802947f, 0.39605895f, 0.59408844f, 0.7921179f, 0.9901474f, 1.1881769f, 1.3862064f, 1.5842358f,
0.7082005f, 0.78688943f, 0.8655784f, 0.94426733f, 1.0229563f, 1.1016452f, 1.1803342f, 1.2590232f,
0.8241365f, 0.8726151f, 0.9210937f, 0.96957237f, 1.0180509f, 1.0665295f, 1.1150082f, 1.1634868f,
0.87437177f, 0.90934664f, 0.9443215f, 0.9792964f, 1.0142713f, 1.0492461f, 1.084221f, 1.1191958f});

test_case.run();
}

OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_simplified_layer_normalization_3x8) {
const auto model = convert_model("com.microsoft/simplified_layer_normalization_3x8.onnx");
auto test_case = ov::test::TestCase(model, s_device);

const std::vector<float> X{0.198f, 0.396f, 0.594f, 0.792f, 0.990f, 1.188f, 1.386f, 1.584f,
0.708f, 0.786f, 0.865f, 0.944f, 1.023f, 1.102f, 1.180f, 1.259f,
0.824f, 0.873f, 0.921f, 0.970f, 1.018f, 1.067f, 1.115f, 1.163f};

test_case.add_input<float>(Shape{3, 8}, X);

test_case.add_expected_output<float>(
Shape{3, 8},
{0.19802852f, 0.39605704f, 0.5940855f, 0.7921141f, 0.9901426f, 1.188171f, 1.3861997f, 1.5842282f,
0.70813656f, 0.7861516f, 0.86516684f, 0.94418204f, 1.0231973f, 1.1022125f, 1.1802275f, 1.2592428f,
0.82395196f, 0.8729491f, 0.9209463f, 0.96994346f, 1.0179406f, 1.0669378f, 1.114935f, 1.1629322f});

test_case.run();
}
1 change: 1 addition & 0 deletions src/frontends/pytorch/src/op_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,7 @@ const std::unordered_map<std::string, CreatorFunction> get_supported_ops_fx() {
{"aten.hardtanh.default", op::translate_hardtanh},
{"aten.hardtanh_.default", op::inplace_op<op::translate_hardtanh>},
{"aten.index.Tensor", op::translate_index_fx},
{"aten._unsafe_index.Tensor", op::translate_index_fx},
{"aten.index_select.default", op::translate_index_select},
{"aten.isfinite.default", op::translate_1to1_match_1_inputs<opset10::IsFinite>},
{"aten.isinf.default", op::translate_1to1_match_1_inputs<opset10::IsInf>},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ namespace {
static const std::map<std::string, std::string> ISOL_PRESETS = {{"COMPUTE",
"P:DQMatMulGQu4/compute,P:DQMatMulCWu4/compute,"
"P:DQMatMulGQi4/compute,P:DQMatMulCWi4/compute,"
"P:DQMatMulConv/compute,"
"P:VocabMatMul/compute,"
"P:RMSNorm/compute"}};
"P:RMSNorm/compute,P:RMSNorm2/compute"}};
}

// For missing declaration warning
Expand Down
Loading

0 comments on commit 0dba6ff

Please sign in to comment.