From c1ee5d2b68f94c5a3fa812df43c056ce90633f70 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Fri, 12 Nov 2021 15:35:46 +0100 Subject: [PATCH] Add support for ONNX operator com.microsoft.Attention (#8008) Ticket: 62890 --- .../src/op/com.microsoft/attention.cpp | 548 ++++++++++++++++++ .../src/op/com.microsoft/attention.hpp | 17 + .../frontend/onnx/frontend/src/ops_bridge.cpp | 2 + .../onnx/com.microsoft/attention.prototxt | 123 ++++ .../attention_dynamic_shapes.prototxt | 90 +++ .../attention_extra_add.prototxt | 190 ++++++ .../attention_mask_index_1.prototxt | 163 ++++++ .../attention_mask_index_2.prototxt | 168 ++++++ .../attention_mask_index_3.prototxt | 166 ++++++ .../attention_mask_index_4.prototxt | 169 ++++++ .../com.microsoft/attention_past.prototxt | 193 ++++++ .../attention_qkv_hidden_sizes.prototxt | 130 +++++ .../attention_unidirectional.prototxt | 154 +++++ .../{ => com.microsoft}/bias_gelu.prototxt | 0 .../embed_layer_normalization.prototxt | 0 ...ayer_normalization_dynamic_shapes.prototxt | 0 ...malization_with_segment_embedding.prototxt | 0 ...n_with_segment_embedding_and_mask.prototxt | 0 ...yer_normalization_dynamic_shapes.prototxt} | 0 ...ip_layer_normalization_with_gamma.prototxt | 0 ...yer_normalization_with_gamma_beta.prototxt | 0 ...ormalization_with_gamma_beta_bias.prototxt | 0 .../onnx/onnx_import_com_microsoft.in.cpp | 493 +++++++++++++++- ngraph/test/runtime/ie/unit_test.manifest | 2 + 24 files changed, 2595 insertions(+), 13 deletions(-) create mode 100644 ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.cpp create mode 100644 ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.hpp create mode 100644 ngraph/test/models/onnx/com.microsoft/attention.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_dynamic_shapes.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_extra_add.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_mask_index_1.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_mask_index_2.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_mask_index_3.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_mask_index_4.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_past.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_qkv_hidden_sizes.prototxt create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_unidirectional.prototxt rename ngraph/test/models/onnx/{ => com.microsoft}/bias_gelu.prototxt (100%) rename ngraph/test/models/onnx/{ => com.microsoft}/embed_layer_normalization.prototxt (100%) rename ngraph/test/models/onnx/{dynamic_shapes => com.microsoft}/embed_layer_normalization_dynamic_shapes.prototxt (100%) rename ngraph/test/models/onnx/{ => com.microsoft}/embed_layer_normalization_with_segment_embedding.prototxt (100%) rename ngraph/test/models/onnx/{ => com.microsoft}/embed_layer_normalization_with_segment_embedding_and_mask.prototxt (100%) rename ngraph/test/models/onnx/{dynamic_shapes/skip_layer_normalization.prototxt => com.microsoft/skip_layer_normalization_dynamic_shapes.prototxt} (100%) rename ngraph/test/models/onnx/{ => com.microsoft}/skip_layer_normalization_with_gamma.prototxt (100%) rename ngraph/test/models/onnx/{ => com.microsoft}/skip_layer_normalization_with_gamma_beta.prototxt (100%) rename ngraph/test/models/onnx/{ => com.microsoft}/skip_layer_normalization_with_gamma_beta_bias.prototxt (100%) diff --git a/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.cpp b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.cpp new file mode 100644 index 00000000000000..f874f96bb3cf3e --- /dev/null +++ b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.cpp @@ -0,0 +1,548 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op/com.microsoft/attention.hpp" + +#include "default_opset.hpp" +#include "ngraph/builder/split.hpp" +#include "onnx_import/core/null_node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace detail { +namespace { +NodeVector split_to_QKV(const std::shared_ptr& node, + int64_t num_heads, + const std::vector& qkv_hidden_sizes); + +using NodeTuple = std::tuple, std::shared_ptr>; + +NodeTuple get_attention_mask(const OutputVector& op_inputs, bool unidirectional); + +std::shared_ptr attention_softmax(const OutputVector& op_inputs, + const std::shared_ptr& Q, + std::shared_ptr K, + std::shared_ptr V, + const std::shared_ptr& attention_mask, + const std::shared_ptr& bin_mask, + const std::shared_ptr& head_size, + bool unidirectional); + +std::shared_ptr get_present_state(const std::shared_ptr& K, + const std::shared_ptr& V, + const OutputVector& op_inputs); +} // namespace +} // namespace detail + +namespace set_1 { +OutputVector attention(const Node& node) { + auto nodes = node.get_ng_inputs(); + const auto& input = nodes[0]; + const auto& weights = nodes[1]; + const auto& bias = nodes[2]; + + // Attention is defined as: + // Q = input x Wq, K = input x Wk, V = input x Wv + // attention = softmax((Q x K') / sqrt(head_size)) x V + // + // In this operator, Wq, Wk and Wv are combined in a single input 'weights' along the second axis. + // So the approach here is to do a single big matrix multiply + // and then split the result into Q, K, V matrices + + auto matmul = std::make_shared(input, weights); + auto add = std::make_shared(matmul, bias); + + const auto num_heads = node.get_attribute_value("num_heads"); + const auto qkv_hidden_sizes = node.get_attribute_value>("qkv_hidden_sizes", {}); + const auto split_result = detail::split_to_QKV(add, num_heads, qkv_hidden_sizes); + + bool unidirectional = static_cast(node.get_attribute_value("unidirectional", 0)); + // mask has values either 0 or -10000 and its shape must be + // broadcastable to (batch_size, num_heads, sequence_length, past_sequence_length + sequence_length) + // so it can be added to Q x K' later + // past_sequence_length can be 0 if 'past' input is not available + std::shared_ptr attention_mask = nullptr, bin_mask = nullptr; + std::tie(attention_mask, bin_mask) = detail::get_attention_mask(nodes, unidirectional); + + const auto& Q = split_result[0]; + const auto& K = split_result[1]; + const auto& V = split_result[2]; + const auto& head_size = split_result[3]; + + // compute softmax((Q x K' + mask) / sqrt(head_size)) + const auto output = detail::attention_softmax(nodes, Q, K, V, attention_mask, bin_mask, head_size, unidirectional); + + // present = concat(K, V) if 'past' input is unavailable + // or + // present = concat(past, K, V) + const auto present = detail::get_present_state(K, V, nodes); + + return {output, present}; +} +} // namespace set_1 + +namespace detail { +namespace { + +std::shared_ptr get_dimensions(const std::shared_ptr& shape, + const std::vector& dims) { + static const auto zero = default_opset::Constant::create(element::i32, Shape{}, {0}); + const auto dims_const = default_opset::Constant::create(element::i32, Shape{dims.size()}, dims); + return std::make_shared(shape, dims_const, zero); +} + +std::shared_ptr get_dimensions(const std::shared_ptr& node, const std::vector& dims) { + return get_dimensions(std::make_shared(node), dims); +} + +std::shared_ptr get_hidden_size(const std::shared_ptr& node_shape) { + // node has shape (batch_size, sequence_length, 3 * hidden_size) + const auto zero = default_opset::Constant::create(element::i32, Shape{}, {0}); + const auto hidden_size_x3 = get_dimensions(node_shape, {2}); + const auto three = default_opset::Constant::create(element::i64, Shape{}, {3}); + const auto hidden_size = std::make_shared(hidden_size_x3, three); + return hidden_size; +} + +NodeVector split_to_QKV(const std::shared_ptr& node, + int64_t num_heads, + const std::vector& qkv_hidden_sizes) { + OutputVector split; + std::shared_ptr head_size = nullptr; + const auto& node_type = node->get_element_type(); + const auto node_shape = std::make_shared(node); + // node has shape (batch_size, sequence_length, 3 * hidden_size) + // fetch the first two dimensions + const auto batch_size_seq_len = get_dimensions(node_shape, {0, 1}); + const auto num_heads_node = default_opset::Constant::create(element::i64, Shape{1}, {num_heads}); + if (qkv_hidden_sizes.size() == 0) { + const auto hidden_size = get_hidden_size(node_shape); + // head_size = hidden_size / num_heads + head_size = std::make_shared(hidden_size, num_heads_node); + // split the node into 3 even parts Q, K, V with shape (batch_size, sequence_len, hidden_size) + split = ngraph::builder::opset1::split(node, 3, 2); + // and reshape each part to new shape (batch_size, sequence_len, num_heads, head_size) + auto new_shape = + std::make_shared(NodeVector{batch_size_seq_len, num_heads_node, head_size}, 0); + for (size_t i = 0; i < split.size(); i++) { + split[i] = std::make_shared(split[i], new_shape, false); + } + head_size = std::make_shared(head_size, node_type); + } else { + // in this case, weights have shape + // (input_hidden_size, qkv_hidden_sizes[0] + qkv_hidden_sizes[1] + qkv_hidden_sizes[2]) + // so user specified hidden_sizes for Q, K and V + NGRAPH_CHECK(qkv_hidden_sizes.size() == 3, "qkv_hidden_sizes attribute needs to have 3 values"); + NGRAPH_CHECK(qkv_hidden_sizes[0] == qkv_hidden_sizes[1], + "qkv_hidden_sizes first element should be same as the second"); + // split the node into 3 parts Q, K, V with shapes + // Q: (batch_size, sequence_len, qkv_hidden_sizes[0]) + // K: (batch_size, sequence_len, qkv_hidden_sizes[1]) + // V: (batch_size, sequence_len, qkv_hidden_sizes[2]) + split = ngraph::builder::opset1::split(node, qkv_hidden_sizes, 2); + // and reshape each part to new shape (batch_size, sequence_len, num_heads, head_size) + for (size_t i = 0; i < split.size(); i++) { + auto new_shape = std::make_shared( + NodeVector{batch_size_seq_len, + num_heads_node, + default_opset::Constant::create(element::i64, Shape{1}, {qkv_hidden_sizes[i] / num_heads})}, + 0); + split[i] = std::make_shared(split[i], new_shape, false); + } + float head_size_val = qkv_hidden_sizes[0] > 0 ? static_cast(qkv_hidden_sizes[0]) / num_heads + : static_cast(qkv_hidden_sizes[2]) / num_heads; + head_size = default_opset::Constant::create(node_type, Shape{1}, {head_size_val}); + } + + // transpose Q, K and V to (batch_size, num_heads, sequence_len, head_size) + auto perm = default_opset::Constant::create(element::i64, Shape{4}, {0, 2, 1, 3}); + auto Q = std::make_shared(split[0], perm); + auto K = std::make_shared(split[1], perm); + auto V = std::make_shared(split[2], perm); + + return {Q, K, V, head_size}; +} + +// This function handles the case when mask_index rank is 1 - so its shape is (batch_size) or (2 * batch_size). +// The returned mask consists of 0 and -10000 and has shape (batch_size, 1, 1, all_seq_len). 'mask_index' input contains +// positions from where the -10000 values start appearing in the final mask per batch (if shape is (batch_size)) or if +// shape is (2 * batch_size), user can define two ranges of -10000 values appearing in the final mask. For example: +// +// batch_size = 3, all_seq_len = 5, mask_index = [2, 4, 3] +// the function returns following mask with shape (3, 1, 1, 5): +// 0, 0, -10000, -10000, -10000 +// 0, 0, 0, 0, -10000 +// 0, 0, 0, -10000, -10000 +// +// e.g., for batch = 2, -10000 values appear within range [mask_index[2]:5] (or [3:5]) +// +// Another example, but with mask_index shape (2 * batch_size) +// batch_size = 3, all_seq_len = 5, mask_index = [2, 4, 3, 1, 2, 2] +// the function returns following mask with shape (3, 1, 1, 5): +// -10000, 0, -10000, -10000, -10000 +// -10000, -10000, 0, 0, -10000 +// -10000, -10000, 0, -10000, -10000 +// +// e.g., for batch = 1, -10000 values appear within two ranges [0, mask_index[4]] and [mask_index[1]:5] (or [0:2],[4:5]) +// +// +// This is how it's done with nGraph operations: +// +// First the 'base' is generated by range + broadcast: +// base = range(0, all_seq_len) +// base = broadcast(base, shape=(batch_size, all_seq_len)) +// +// With batch_size = 3 and all_seq_len = 5, 'base' looks as follows: +// [[0, 1, 2, 3, 4], +// [0, 1, 2, 3, 4], +// [0, 1, 2, 3, 4]] +// +// Next step is to reshape mask_index: +// mask_index = reshape(mask_index, shape=(-1, batch_size)) +// +// With the second example above (mask_index = [2, 4, 3, 1, 2, 2]), now it looks like: +// mask_index = [[2, 4, 3], +// [1, 2, 2]] +// +// Now we get the first row and reshape it to (batch_size, 1) to have indices laid out in column: +// tail_range_indices = gather(mask_index, indices=[0], axis=0) # tail_range_indices = [2, 4, 3] +// tail_range_indices = reshape(tail_range_indices, shape=(batch_size, 1) +// # tail_range_indices = [[2], +// # [4], +// # [3]] +// +// Then the base is compared with the indices +// tail_range_mask = base >= tail_range_indices +// +// Thanks to autobroadcast in elementwise operators, the comparison conceptually happens between: +// [[0, 1, 2, 3, 4], [[2, 2, 2, 2, 2], +// [0, 1, 2, 3, 4], >= [4, 4, 4, 4, 4], +// [0, 1, 2, 3, 4]] [3, 3, 3, 3, 3]] +// +// and the result is: +// [[0, 0, 1, 1, 1], +// [0, 0, 0, 0, 1], +// [0, 0, 0, 1, 1]] +// +// So we get the final tail range mask by multiplying this by -10000 +// +// Similarly we process with head range - we fetch the second row from reshaped mask_index, +// compare it with 'base' (but with 'Less' operator instead of 'GreaterEqual') and combine it +// with tail_range_mask. +// +// Handling both mask_index variants (so (batch_size) and (2 * batch_size)) is tricky since we don't +// know its dimensions upfront. So we compute both variants and use Select operator to select +// the right one in the runtime (unless it gets constantfolded before). +std::shared_ptr attention_mask_from_indices(const Output& mask_index, + const element::Type_t& type, + const std::shared_ptr& batch_size, + const std::shared_ptr& all_seq_len) { + const auto zero = default_opset::Constant::create(element::i64, Shape{}, {0}); + const auto one = default_opset::Constant::create(element::i64, Shape{}, {1}); + const auto stop = std::make_shared(all_seq_len, zero); + std::shared_ptr base = + std::make_shared(zero, stop, one, mask_index.get_element_type()); + const auto target_shape = std::make_shared(NodeVector{batch_size, all_seq_len}, 0); + // broadcast 'base' to (batch_size, all_seq_len) + base = std::make_shared(base, target_shape); + const auto indices_shape = std::make_shared( + NodeVector{default_opset::Constant::create(element::i64, Shape{1}, {-1}), batch_size}, + 0); + std::shared_ptr indices = std::make_shared(mask_index, indices_shape, false); + // fetch first row from indices + std::shared_ptr tail_range_indices = std::make_shared(indices, zero, zero); + tail_range_indices = + std::make_shared(tail_range_indices, + default_opset::Constant::create(element::i32, Shape{2}, {-1, 1}), + false); + const auto greater_eq = std::make_shared(base, tail_range_indices); + std::shared_ptr tail_range_mask = + std::make_shared(std::make_shared(greater_eq, type), + default_opset::Constant::create(type, Shape{}, {-10000})); + tail_range_mask = + std::make_shared(tail_range_mask, + default_opset::Constant::create(element::i64, Shape{2}, {1, 2})); + + const auto gather_index = + std::make_shared(default_opset::Constant::create(element::i64, Shape{}, {1}), + get_dimensions(indices, {0})); + // fetch indices from the second row (or first if not available) + std::shared_ptr head_range_indices = + std::make_shared(indices, gather_index, zero); + head_range_indices = + std::make_shared(head_range_indices, + default_opset::Constant::create(element::i32, Shape{2}, {-1, 1}), + false); + const auto less = std::make_shared(base, head_range_indices); + std::shared_ptr mask = std::make_shared(less, greater_eq); + mask = std::make_shared(std::make_shared(mask, type), + default_opset::Constant::create(type, Shape{}, {-10000})); + // reshape from (batch_size, all_seq_len) to (batch_size, 1, 1, all_seq_len) + mask = std::make_shared(mask, + default_opset::Constant::create(element::i64, Shape{2}, {1, 2})); + + const auto mask_index_first_dim = get_dimensions(mask_index.get_node_shared_ptr(), {0}); + // compare mask_index.shape[0] with batch_size value + // if they're equal - select tail_range_mask + // else select full mask + mask = std::make_shared( + std::make_shared(batch_size, mask_index_first_dim), + tail_range_mask, + mask); + + return mask; +} + +// Prepare unidirectional_mask like it's done in +// https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_helper.h#L87-L96 +// +// Function returns two masks - one attention mask with values 0 or -10000 with shape (seq_len, all_seq_len), +// the second one is a binary mask where it has 0 on positions where attention mask has -10000 values and 1 otherwise. +// +// For example: +// seq_len = 4, all_seq_len = 7, past_seq_len = 3. Returned attention mask has shape (4, 7) and contains: +// 0 0 0 0 -10000 -10000 -10000 +// 0 0 0 0 0 -10000 -10000 +// 0 0 0 0 0 0 -10000 +// 0 0 0 0 0 0 0 +// +// Returned binary mask has the shape (4, 7) and following values: +// 1 1 1 1 0 0 0 +// 1 1 1 1 1 0 0 +// 1 1 1 1 1 1 0 +// 1 1 1 1 1 1 1 +// +// Binary mask is used later before softmax to achieve +// https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h#L158-L166 +// +// The approach used to generate those masks is similar to one from attention_mask_from_indices function (see comments +// there). +NodeTuple unidirectional_mask(const element::Type_t& type, + const std::shared_ptr& seq_len, + const std::shared_ptr& all_seq_len, + const std::shared_ptr& past_seq_len) { + const auto zero = default_opset::Constant::create(element::i64, Shape{}, {0}); + const auto one = default_opset::Constant::create(element::i64, Shape{}, {1}); + const auto stop = std::make_shared(all_seq_len, zero); + std::shared_ptr bin_mask = std::make_shared(zero, stop, one, element::i32); + auto target_shape = std::make_shared(NodeVector{seq_len, all_seq_len}, 0); + bin_mask = std::make_shared(bin_mask, target_shape); + auto start = + std::make_shared(std::make_shared(past_seq_len, one), zero); + auto end = std::make_shared(std::make_shared(all_seq_len, one), zero); + auto indices = std::make_shared( + std::make_shared(start, end, one, element::i32), + default_opset::Constant::create(element::i32, Shape{1}, {1})); + bin_mask = std::make_shared(bin_mask, indices); + std::shared_ptr attention_mask = + std::make_shared(std::make_shared(bin_mask, type), + default_opset::Constant::create(type, Shape{}, {-10000})); + bin_mask = std::make_shared(std::make_shared(bin_mask), type); + return NodeTuple{attention_mask, bin_mask}; +} + +// This is the easiest variant of 'mask_index' input - the input consists of 0 or 1 values +// and we transform them to: +// * -10000 for positions where mask_index == 0 +// * 0 for positions where mask_index == 1 +// +// It handles mask_index with shapes: +// (batch_size, past_sequence_length + sequence_length) or +// (batch_size, sequence_length, past_sequence_length + sequence_length) +// +// Shape (batch_size, 1, max_sequence_length, max_sequence_length) is not supported in onnxruntime: +// https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_helper.h#L78 +std::shared_ptr raw_mask(const Output& mask_index, + Dimension::value_type mask_rank, + const element::Type_t& type) { + std::shared_ptr mask = std::make_shared(mask_index, type); + mask = std::make_shared(mask, type); + mask = std::make_shared(default_opset::Constant::create(type, Shape{}, {1}), mask); + mask = std::make_shared(mask, default_opset::Constant::create(type, Shape{}, {-10000})); + switch (mask_rank) { + // Handle mask_index with (batch_size, past_sequence_length + sequence_length) shape + // Reshape it to (batch_size, 1, 1, past_sequence_length + sequence_length) + case 2: + mask = std::make_shared( + mask, + default_opset::Constant::create(element::i64, Shape{4}, {0, 1, 1, -1}), + true); + break; + // Handle mask_index with (batch_size, sequence_length, past_sequence_length + sequence_length) shape + // Reshape it to (batch_size, 1, sequence_length, past_sequence_length + sequence_length) + case 3: + mask = std::make_shared( + mask, + default_opset::Constant::create(element::i64, Shape{4}, {0, 1, 0, -1}), + true); + break; + } + return mask; +} + +bool is_past_input_available(const OutputVector& op_inputs) { + return op_inputs.size() > 4 && !ngraph::op::is_null(op_inputs[4]); +} + +NodeTuple get_attention_mask(const OutputVector& op_inputs, bool unidirectional) { + const auto zero = default_opset::Constant::create(element::i64, Shape{1}, {0}); + const auto one = default_opset::Constant::create(element::i64, Shape{1}, {1}); + + std::shared_ptr past_seq_len; + // get the value of past_sequence_length + if (is_past_input_available(op_inputs)) { + const auto& past = op_inputs[4]; + // 'past' node has shape (2, batch_size, num_heads, past_sequence_length, head_size) + past_seq_len = get_dimensions(past.get_node_shared_ptr(), {3}); + } else { + past_seq_len = zero; + } + + // 'input' node has shape (batch_size, sequence_length, input_hidden_size) + auto input_shape = std::make_shared(op_inputs[0]); + auto seq_len = get_dimensions(input_shape, {1}); + auto all_seq_len = std::make_shared(seq_len, past_seq_len); + const auto& type = op_inputs[0].get_element_type(); + std::shared_ptr attention_mask = nullptr; + std::shared_ptr bin_mask = nullptr; + if (unidirectional) { + std::tie(attention_mask, bin_mask) = unidirectional_mask(type, seq_len, all_seq_len, past_seq_len); + } + if (op_inputs.size() > 3 && !ngraph::op::is_null(op_inputs[3])) { + const auto& mask_index = op_inputs[3]; + NGRAPH_CHECK(mask_index.get_element_type() == element::i32, "'mask_index' type must be int32"); + auto batch_size = get_dimensions(input_shape, {0}); + const auto mask_rank = mask_index.get_partial_shape().rank(); + NGRAPH_CHECK(mask_rank.is_static(), "'mask_index' rank must be static"); + auto mask_rank_val = mask_rank.get_length(); + std::shared_ptr mask; + if (mask_rank_val == 1) { + // case when mask_index has shape (batch_size) or (2 * batch_size) + // so it contains positions that specify how mask should be generated + mask = attention_mask_from_indices(mask_index, type, batch_size, all_seq_len); + } else if (mask_rank_val < 4) { + mask = raw_mask(mask_index, mask_rank.get_length(), type); + } else { + NGRAPH_CHECK(false, "mask_index with rank " + std::to_string(mask_rank_val) + " is not supported"); + } + // add the mask with unidirectional mask if available + if (attention_mask) { + attention_mask = std::make_shared(attention_mask, mask); + } else { + attention_mask = mask; + } + } + return NodeTuple{attention_mask, bin_mask}; +} + +// Compute softmax(Q x K' / sqrt(head_size)) x V +std::shared_ptr attention_softmax(const OutputVector& op_inputs, + const std::shared_ptr& Q, + std::shared_ptr K, + std::shared_ptr V, + const std::shared_ptr& attention_mask, + const std::shared_ptr& bin_mask, + const std::shared_ptr& head_size, + bool unidirectional) { + auto zero = default_opset::Constant::create(element::i64, Shape{}, {0}); + if (is_past_input_available(op_inputs)) { + // concat past K and V with present ones + const auto& past = op_inputs[4]; + // 'past' input has two matrices K and V with shape (1, batch_size, num_heads, past_sequence_length, head_size) + // concatenated along first axis to a single + // (2, batch_size, num_heads, past_sequence_length + sequence_length, head_size) + // so we need to split it into two parts, remove first dimension from each part and concatenate first part + // with current K and second part with current V + const auto split = ngraph::builder::opset1::split(past, 2, 0); + const auto past_K = std::make_shared(split[0], zero); + K = std::make_shared(NodeVector{past_K, K}, 2); + const auto past_V = std::make_shared(split[1], zero); + V = std::make_shared(NodeVector{past_V, V}, 2); + } + // perform Q x K' + std::shared_ptr softmax_input = std::make_shared(Q, K, false, true); + // Q x K' + mask + if (attention_mask) { + if (unidirectional) { + // Perform the equivalent of + // https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h#L158-L166 + // For positions where unidirectional_mask has -10000 values - attention_mask is moved to softmax input + softmax_input = std::make_shared(softmax_input, bin_mask); + } + softmax_input = std::make_shared(softmax_input, attention_mask); + } + const auto sqrt = std::make_shared(head_size); + // (Q x K' + mask) / sqrt(head_size) + softmax_input = std::make_shared(softmax_input, sqrt); + // handle 'extra_add' input + if (op_inputs.size() > 5 && !ngraph::op::is_null(op_inputs[5])) { + NGRAPH_CHECK(!is_past_input_available(op_inputs), + "Cannot use both 'past' and 'extra_add' inputs in the same node"); + const auto& extra_add = op_inputs[5]; + softmax_input = std::make_shared(softmax_input, extra_add); + } + // softmax((Q x K' + mask) / sqrt(head_size)) + const auto softmax = std::make_shared(softmax_input, 3); + + // softmax((Q x K' + mask) / sqrt(head_size)) x V + std::shared_ptr output = std::make_shared(softmax, V); + // transpose the result from (batch_size, num_heads, sequence_length, head_size) + // to (batch_size, sequence_length, num_heads, head_size) + const auto perm = default_opset::Constant::create(element::i64, Shape{4}, {0, 2, 1, 3}); + output = std::make_shared(output, perm); + auto new_shape = default_opset::Constant::create(element::i32, Shape{3}, {0, 0, -1}); + // reshape the result from (batch_size, sequence_length, num_heads, head_size) to (batch_size, sequence_length, + // num_heads * head_size) + output = std::make_shared(output, new_shape, true); + + return output; +} + +// Make present state from K and V matrices by reshaping them from: +// (batch_size, num_heads, sequence_length, head_size) to (1, batch_size, num_heads, sequence_length, head_size) +// and concatenating them along first axis to make 'present' output. +// If fifth input ('past') is available, it gets concatenated with 'present' output along fourth axis. +std::shared_ptr get_present_state(const std::shared_ptr& K, + const std::shared_ptr& V, + const OutputVector& op_inputs) { + auto zero = default_opset::Constant::create(element::i64, Shape{1}, {0}); + // expand K shape (batch_size, num_heads, sequence_length, head_size) to + // (1, batch_size, num_heads, sequence_length, head_size) + auto K_unsqueezed = std::make_shared(K, zero); + // similarly expand V shape + auto V_unsqueezed = std::make_shared(V, zero); + + // add padding in case K and V have different shapes (it happens when used provided uneven qkv_hidden_sizes) + // if the shapes are equal (so padding will be zero), Pad gets eliminated in NopElimination pass + const auto K_shape = std::make_shared(K_unsqueezed); + const auto V_shape = std::make_shared(V_unsqueezed); + const auto K_pads_end = + std::make_shared(std::make_shared(V_shape, K_shape), zero); + const auto V_pads_end = + std::make_shared(std::make_shared(K_shape, V_shape), zero); + const auto pads_begin = + std::make_shared(zero, std::make_shared(K_shape)); + const auto K_padded = + std::make_shared(K_unsqueezed, pads_begin, K_pads_end, ngraph::op::PadMode::CONSTANT); + const auto V_padded = + std::make_shared(V_unsqueezed, pads_begin, V_pads_end, ngraph::op::PadMode::CONSTANT); + + // concat key and value tensors along first axis to make 'present' state + // after that operation, 'present' has shape (2, batch_size, num_heads, sequence_length, head_size) + std::shared_ptr present = std::make_shared(NodeVector{K_padded, V_padded}, 0); + if (is_past_input_available(op_inputs)) { + const auto& past = op_inputs[4]; + // concat 'past' to 'present' output along fourth axis + // after that operation, 'present' has shape: + // (2, batch_size, num_heads, past_sequence_length + sequence_length, head_size) + present = std::make_shared(OutputVector{past, present}, 3); + } + return present; +} +} // namespace +} // namespace detail +} // namespace op +} // namespace onnx_import +} // namespace ngraph diff --git a/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.hpp b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.hpp new file mode 100644 index 00000000000000..50ecbd82ef57fe --- /dev/null +++ b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector attention(const Node& node); +} // namespace set_1 +} // namespace op +} // namespace onnx_import +} // namespace ngraph diff --git a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp index ffe15e5b1e5e59..7d12281d44674e 100644 --- a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp +++ b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp @@ -31,6 +31,7 @@ #include "op/cast_like.hpp" #include "op/ceil.hpp" #include "op/clip.hpp" +#include "op/com.microsoft/attention.hpp" #include "op/com.microsoft/bias_gelu.hpp" #include "op/com.microsoft/embed_layer_normalization.hpp" #include "op/com.microsoft/skip_layer_normalization.hpp" @@ -490,6 +491,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR_WITH_DOMAIN(OPENVINO_ONNX_DOMAIN, "PriorBoxClustered", 1, prior_box_clustered); REGISTER_OPERATOR_WITH_DOMAIN(OPENVINO_ONNX_DOMAIN, "Swish", 1, swish); + REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "Attention", 1, attention); REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "BiasGelu", 1, bias_gelu); REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "EmbedLayerNormalization", 1, embed_layer_normalization); REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "SkipLayerNormalization", 1, skip_layer_normalization); diff --git a/ngraph/test/models/onnx/com.microsoft/attention.prototxt b/ngraph/test/models/onnx/com.microsoft/attention.prototxt new file mode 100644 index 00000000000000..53ac350573b055 --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention.prototxt @@ -0,0 +1,123 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + output: "output" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_dynamic_shapes.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_dynamic_shapes.prototxt new file mode 100644 index 00000000000000..97a4f3f1f9134a --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_dynamic_shapes.prototxt @@ -0,0 +1,90 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + input: "mask" + input: "past" + output: "output" + output: "present" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + input { + name: "input" + type { + tensor_type { + elem_type: 1 + } + } + } + input { + name: "weights" + type { + tensor_type { + elem_type: 1 + } + } + } + input { + name: "bias" + type { + tensor_type { + elem_type: 1 + } + } + } + input { + name: "mask" + type { + tensor_type { + elem_type: 6 + shape { + dim {} + dim {} + } + } + } + } + input { + name: "past" + type { + tensor_type { + elem_type: 1 + } + } + } + + output { + name: "output" + type { + tensor_type { + elem_type: 1 + } + } + } + output { + name: "present" + type { + tensor_type { + elem_type: 1 + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_extra_add.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_extra_add.prototxt new file mode 100644 index 00000000000000..f8664f4507f459 --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_extra_add.prototxt @@ -0,0 +1,190 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + input: "mask" + input: "" + input: "extra_add" + output: "output" + output: "present" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "mask" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } + input { + name: "extra_add" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "present" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_mask_index_1.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_1.prototxt new file mode 100644 index 00000000000000..56d4e1d1142a4e --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_1.prototxt @@ -0,0 +1,163 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + input: "mask_index" + output: "output" + output: "present" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "mask_index" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "present" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_mask_index_2.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_2.prototxt new file mode 100644 index 00000000000000..481d9ea86f5488 --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_2.prototxt @@ -0,0 +1,168 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + input: "mask_index" + output: "output" + output: "present" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + attribute { + name: "unidirectional" + i: 1 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "mask_index" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "present" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_mask_index_3.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_3.prototxt new file mode 100644 index 00000000000000..67558f33599282 --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_3.prototxt @@ -0,0 +1,166 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + input: "mask" + output: "output" + output: "present" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "mask" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "present" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_mask_index_4.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_4.prototxt new file mode 100644 index 00000000000000..9b9387991a9c64 --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_4.prototxt @@ -0,0 +1,169 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + input: "mask" + output: "output" + output: "present" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "mask" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "present" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_past.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_past.prototxt new file mode 100644 index 00000000000000..7625195fa044c8 --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_past.prototxt @@ -0,0 +1,193 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + input: "mask" + input: "past" + output: "output" + output: "present" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + input { + name: "mask" + type { + tensor_type { + elem_type: 6 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 9 + } + } + } + } + } + input { + name: "past" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 5 + } + dim { + dim_value: 2 + } + } + } + } + } + + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "present" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 9 + } + dim { + dim_value: 2 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_qkv_hidden_sizes.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_qkv_hidden_sizes.prototxt new file mode 100644 index 00000000000000..5ee43aa5c3624a --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_qkv_hidden_sizes.prototxt @@ -0,0 +1,130 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + output: "output" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + attribute { + name: "qkv_hidden_sizes" + ints: 2 + ints: 2 + ints: 8 + type: INTS + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 8 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/com.microsoft/attention_unidirectional.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_unidirectional.prototxt new file mode 100644 index 00000000000000..31a65b299d60ee --- /dev/null +++ b/ngraph/test/models/onnx/com.microsoft/attention_unidirectional.prototxt @@ -0,0 +1,154 @@ +ir_version: 6 +producer_name: "nGraph" +graph { + node { + input: "input" + input: "weights" + input: "bias" + output: "output" + output: "present" + name: "Attention_1" + op_type: "Attention" + attribute { + name: "num_heads" + i: 2 + type: INT + } + attribute { + name: "unidirectional" + i: 1 + type: INT + } + domain: "com.microsoft" + } + name: "attention-model" + initializer { + name: "weights" + dims: 3 + dims: 12 + data_type: 1 + float_data: 0.01326417364180088 + float_data: -0.017005326226353645 + float_data: 0.021556973457336426 + float_data: -0.079218357801437378 + float_data: -0.019958715885877609 + float_data: 0.066062852740287781 + float_data: -0.063465960323810577 + float_data: -0.036202378571033478 + float_data: -0.038673330098390579 + float_data: -0.050637193024158478 + float_data: 0.0024814880453050137 + float_data: -0.017267324030399323 + float_data: -0.0047671985812485218 + float_data: -0.014202062971889973 + float_data: 0.10090816766023636 + float_data: 0.044896259903907776 + float_data: 0.015443948097527027 + float_data: -0.0010053194127976894 + float_data: 0.071923978626728058 + float_data: 0.01173736434429884 + float_data: 0.034053854644298553 + float_data: -0.037060577422380447 + float_data: 0.01355923805385828 + float_data: 0.054467327892780304 + float_data: 0.088897556066513062 + float_data: 0.019563071429729462 + float_data: 0.025579970329999924 + float_data: -0.032200627028942108 + float_data: -0.0083356937393546104 + float_data: -0.10528338700532913 + float_data: 0.04967513307929039 + float_data: -0.093638911843299866 + float_data: 0.0018587876111268997 + float_data: 0.01037109550088644 + float_data: -0.011854520998895168 + float_data: 0.035907052457332611 + } + initializer { + name: "bias" + dims: 12 + data_type: 1 + float_data: -0.2587452232837677 + float_data: -0.095395378768444061 + float_data: 0.12785771489143372 + float_data: 0.16469171643257141 + float_data: -0.58997648954391479 + float_data: -0.28082749247550964 + float_data: 0.077637940645217896 + float_data: -0.03203071653842926 + float_data: 0.075582884252071381 + float_data: 0.14739133417606354 + float_data: -0.19812127947807312 + float_data: 0.50444173812866211 + } + input { + name: "input" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 3 + } + } + } + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "present" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 2 + } + dim { + dim_value: 4 + } + dim { + dim_value: 2 + } + } + } + } + } +} +opset_import { + version: 11 +} +opset_import { + domain: "com.microsoft" + version: 1 +} diff --git a/ngraph/test/models/onnx/bias_gelu.prototxt b/ngraph/test/models/onnx/com.microsoft/bias_gelu.prototxt similarity index 100% rename from ngraph/test/models/onnx/bias_gelu.prototxt rename to ngraph/test/models/onnx/com.microsoft/bias_gelu.prototxt diff --git a/ngraph/test/models/onnx/embed_layer_normalization.prototxt b/ngraph/test/models/onnx/com.microsoft/embed_layer_normalization.prototxt similarity index 100% rename from ngraph/test/models/onnx/embed_layer_normalization.prototxt rename to ngraph/test/models/onnx/com.microsoft/embed_layer_normalization.prototxt diff --git a/ngraph/test/models/onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.prototxt b/ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_dynamic_shapes.prototxt similarity index 100% rename from ngraph/test/models/onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.prototxt rename to ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_dynamic_shapes.prototxt diff --git a/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding.prototxt b/ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_with_segment_embedding.prototxt similarity index 100% rename from ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding.prototxt rename to ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_with_segment_embedding.prototxt diff --git a/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding_and_mask.prototxt b/ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_with_segment_embedding_and_mask.prototxt similarity index 100% rename from ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding_and_mask.prototxt rename to ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_with_segment_embedding_and_mask.prototxt diff --git a/ngraph/test/models/onnx/dynamic_shapes/skip_layer_normalization.prototxt b/ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.prototxt similarity index 100% rename from ngraph/test/models/onnx/dynamic_shapes/skip_layer_normalization.prototxt rename to ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.prototxt diff --git a/ngraph/test/models/onnx/skip_layer_normalization_with_gamma.prototxt b/ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma.prototxt similarity index 100% rename from ngraph/test/models/onnx/skip_layer_normalization_with_gamma.prototxt rename to ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma.prototxt diff --git a/ngraph/test/models/onnx/skip_layer_normalization_with_gamma_beta.prototxt b/ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.prototxt similarity index 100% rename from ngraph/test/models/onnx/skip_layer_normalization_with_gamma_beta.prototxt rename to ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.prototxt diff --git a/ngraph/test/models/onnx/skip_layer_normalization_with_gamma_beta_bias.prototxt b/ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.prototxt similarity index 100% rename from ngraph/test/models/onnx/skip_layer_normalization_with_gamma_beta_bias.prototxt rename to ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.prototxt diff --git a/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp b/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp index 63611843a27dfd..5fb5999cf6c244 100644 --- a/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp +++ b/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp @@ -25,13 +25,11 @@ using namespace ngraph; static std::string s_manifest = "${MANIFEST}"; -using Inputs = std::vector>; -using Outputs = std::vector>; - using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME}); NGRAPH_TEST(${BACKEND_NAME}, onnx_model_bias_gelu) { - const auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/bias_gelu.onnx")); + const auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/bias_gelu.onnx")); auto test_case = test::TestCase(function); test_case.add_input({0.5488135, @@ -52,7 +50,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_bias_gelu) { NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta_bias) { const auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/skip_layer_normalization_with_gamma_beta_bias.onnx")); + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.onnx")); std::vector input = { 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, @@ -78,7 +76,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta) { const auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/skip_layer_normalization_with_gamma_beta.onnx")); + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.onnx")); std::vector input = { 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, @@ -104,7 +102,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) { const auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/skip_layer_normalization_with_gamma.onnx")); + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/skip_layer_normalization_with_gamma.onnx")); std::vector input = { 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, @@ -130,7 +128,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) { NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes) { const auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/skip_layer_normalization.onnx")); + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.onnx")); std::vector input = { 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, @@ -177,8 +175,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes) } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization) { - const auto function = - onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization.onnx")); + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/embed_layer_normalization.onnx")); std::vector input_ids = { 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, @@ -209,7 +207,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization) { NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding) { const auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization_with_segment_embedding.onnx")); + file_util::path_join(SERIALIZED_ZOO, + "onnx/com.microsoft/embed_layer_normalization_with_segment_embedding.onnx")); std::vector input_ids = { 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, @@ -251,7 +250,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding_and_mask) { const auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization_with_segment_embedding_and_mask.onnx")); + file_util::path_join(SERIALIZED_ZOO, + "onnx/com.microsoft/embed_layer_normalization_with_segment_embedding_and_mask.onnx")); std::vector input_ids = { 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, @@ -296,7 +296,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes) { const auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.onnx")); + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/embed_layer_normalization_dynamic_shapes.onnx")); std::vector input_ids = { 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, @@ -389,3 +389,470 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes test_case.add_expected_output(Shape{3}, expected_mask_index); test_case.run_with_tolerance_as_fp(1e-6); } + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention) { + const auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.91475844, 0.91523546, 0.82536930, 0.37491974, 0.22384071, 0.05941105, 0.01902100, 0.70131350, + 0.09603709, 0.44200060, 0.53106076, 0.79464376, 0.35469049, 0.25225943, 0.25179818, 0.29592562, + 0.24836586, 0.65088797, 0.93126643, 0.67980725, 0.85708112, 0.59808528, 0.46321425, 0.19301885, + }; + std::vector output = { + 0.07966283, 0.10783536, -0.19424979, 0.54514766, 0.07965867, 0.10783093, -0.19424866, 0.54510003, + 0.07965846, 0.10783067, -0.19424550, 0.54509139, 0.07966217, 0.10783640, -0.19424903, 0.54512268, + 0.06940663, 0.10962760, -0.19698445, 0.53492010, 0.06940675, 0.10962828, -0.19698484, 0.53492326, + 0.06940714, 0.10963022, -0.19698712, 0.53494006, 0.06940673, 0.10962812, -0.19698519, 0.53492481, + }; + + test_case.add_input(input); + test_case.add_expected_output(output); + test_case.run_with_tolerance_as_fp(1e-7); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_qkv_hidden_sizes) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_qkv_hidden_sizes.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.56477863, 0.60309958, 0.35158035, 0.03123519, 0.81918180, 0.76905495, 0.47219241, 0.72016627, + 0.59377003, 0.91380632, 0.56797302, 0.34846428, 0.83839595, 0.16394103, 0.34676281, 0.09161621, + 0.45562279, 0.23317528, 0.37197968, 0.06727808, 0.08500192, 0.84915495, 0.68266946, 0.00227691, + }; + std::vector output = { + -0.59370947, -0.30300471, 0.12048547, -0.09029539, 0.08041390, 0.10250041, -0.19381392, 0.55126983, + -0.59370828, -0.30301332, 0.12049319, -0.09029691, 0.08041921, 0.10250521, -0.19381438, 0.55127531, + -0.59370869, -0.30301058, 0.12049074, -0.09029643, 0.08041564, 0.10250199, -0.19381410, 0.55127168, + -0.59370929, -0.30300608, 0.12048667, -0.09029562, 0.08041184, 0.10249855, -0.19381374, 0.55126774, + -0.59681994, -0.26327702, 0.07638434, -0.06311120, 0.06671587, 0.10916986, -0.19412412, 0.51977092, + -0.59682053, -0.26328400, 0.07638102, -0.06311222, 0.06671817, 0.10917170, -0.19412397, 0.51977223, + -0.59682077, -0.26328647, 0.07637984, -0.06311259, 0.06671739, 0.10917108, -0.19412403, 0.51977175, + -0.59682101, -0.26328778, 0.07637922, -0.06311278, 0.06671065, 0.10916568, -0.19412443, 0.51976782, + }; + + test_case.add_input(input); + test_case.add_expected_output(output); + test_case.run_with_tolerance_as_fp(1e-6); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_unidirectional) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_unidirectional.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.89578921, 0.42421508, 0.35630688, 0.77461642, 0.65753633, 0.09723099, 0.62597734, 0.72117692, + 0.57636845, 0.17104276, 0.13245547, 0.59879875, 0.15624641, 0.44903454, 0.50483286, 0.92975074, + 0.36934483, 0.29919949, 0.57185954, 0.83036488, 0.08384345, 0.20378476, 0.74684393, 0.46716982, + }; + std::vector output = { + 0.05604819, 0.09000472, -0.19437021, 0.52487367, 0.06211422, 0.08740954, -0.19139624, 0.52762908, + 0.06708897, 0.08992603, -0.19214047, 0.53631783, 0.06896879, 0.10248676, -0.19485690, 0.53477794, + 0.08577005, 0.12807365, -0.19762954, 0.54432857, 0.06929274, 0.10893210, -0.19599904, 0.53184807, + 0.07348281, 0.10215081, -0.19280069, 0.53552240, 0.07861833, 0.10517240, -0.19285706, 0.54126489, + }; + std::vector present = { + -0.60427380, -0.25958878, -0.59609234, -0.24055196, -0.59613681, -0.30088067, -0.59633607, -0.33270463, + 0.06899665, -0.09284544, 0.08059876, -0.06146053, 0.11841078, -0.10019838, 0.10605468, -0.09273906, + -0.59036821, -0.32410735, -0.60532302, -0.25127757, -0.58926487, -0.25271094, -0.58640373, -0.31730092, + 0.12509561, -0.07968873, 0.06005794, -0.08937149, 0.10523240, -0.05083811, 0.14162725, -0.07438751, + 0.05604819, 0.09000472, 0.06819826, 0.08480665, 0.07700446, 0.09494394, 0.07459175, 0.14003153, + -0.19437021, 0.52487367, -0.18843602, 0.53037173, -0.19362189, 0.55360907, -0.20299932, 0.53020388, + 0.08577005, 0.12807365, 0.05276009, 0.08972625, 0.08190014, 0.08852972, 0.09400313, 0.11423884, + -0.19762954, 0.54432857, -0.19435294, 0.51924801, -0.18643703, 0.54280555, -0.19302703, 0.55837619, + }; + + test_case.add_input(input); + test_case.add_expected_output(output); + test_case.add_expected_output(present); + test_case.run_with_tolerance_as_fp(1e-7); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_mask_index_1.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.02841483, 0.47845092, 0.14633700, 0.54597300, 0.40160629, 0.55281311, 0.14931096, 0.64483738, + 0.96559167, 0.05262021, 0.12391864, 0.20093553, 0.74290562, 0.19367455, 0.19253619, 0.41593507, + 0.91188699, 0.61606920, 0.72673517, 0.86981291, 0.19963337, 0.22747350, 0.34308898, 0.57267183, + }; + std::vector mask_index = { + 0, + 1, + }; + std::vector output = { + 0.08298690, 0.12711772, -0.19757506, 0.54029012, 0.08298548, 0.12711433, -0.19757731, 0.54031140, + 0.08298430, 0.12711799, -0.19757695, 0.54031777, 0.08298548, 0.12711433, -0.19757444, 0.54028159, + 0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606, + 0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606, + }; + std::vector present = { + -0.58437425, -0.29483819, -0.59927911, -0.30336475, -0.59104657, -0.37327260, -0.59078789, -0.29863101, + 0.11751597, -0.04114649, 0.09933343, -0.09884726, 0.16250694, -0.12028439, 0.09319257, -0.05129660, + -0.60341775, -0.25221461, -0.58933026, -0.31912822, -0.59271193, -0.25470981, -0.59399152, -0.32643768, + 0.05398282, -0.07468132, 0.14743008, -0.09407346, 0.10399222, -0.06682440, 0.11632499, -0.08986320, + 0.09104910, 0.12973849, 0.06917210, 0.11059431, 0.09356256, 0.12594685, 0.07814129, 0.14221822, + -0.19329809, 0.53526556, -0.19787431, 0.53673857, -0.20045389, 0.57165766, -0.19869246, 0.51749766, + 0.05380550, 0.10459180, 0.09169570, 0.09892380, 0.07746917, 0.08042616, 0.07953370, 0.12909687, + -0.19593412, 0.50907606, -0.19202785, 0.56904894, -0.18689045, 0.54643762, -0.19969353, 0.53976399, + }; + + test_case.add_input(input); + test_case.add_input(mask_index); + test_case.add_expected_output(output); + test_case.add_expected_output(present); + test_case.run_with_tolerance_as_fp(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_2) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_mask_index_2.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.75259578, 0.81492645, 0.46713001, 0.29483622, 0.06768602, 0.95105755, 0.32065326, 0.52417183, + 0.73136383, 0.77176476, 0.60997742, 0.64625764, 0.16311000, 0.89680773, 0.01331447, 0.42468646, + 0.58711547, 0.00345124, 0.13053808, 0.46278623, 0.13786320, 0.65182054, 0.74864876, 0.81506181, + }; + std::vector mask_index = { + 3, + 3, + 1, + 1, + }; + std::vector output = { + 0.07524174, 0.11320241, -0.19909523, 0.54785377, 0.06825337, 0.13981669, -0.20774621, 0.53718704, + 0.07531278, 0.12957911, -0.20330518, 0.54547405, 0.07531209, 0.12958010, -0.20330583, 0.54547292, + 0.08900890, 0.11150353, -0.18931937, 0.53757656, 0.07915881, 0.10416336, -0.18914750, 0.52921104, + 0.08285815, 0.11462159, -0.19115375, 0.53077918, 0.08285838, 0.11462225, -0.19115454, 0.53077984, + }; + std::vector present = { + -0.59630549, -0.28110915, -0.60274345, -0.36154836, -0.59437746, -0.33717164, -0.60134649, -0.29849592, + 0.11169122, -0.09345293, 0.11103803, -0.13096604, 0.13131849, -0.10597084, 0.10463209, -0.11332577, + -0.57949269, -0.27235535, -0.58941406, -0.25372508, -0.58658379, -0.28718373, -0.59821802, -0.32433146, + 0.13244939, -0.02865628, 0.09308393, -0.04083736, 0.10948701, -0.04423397, 0.13060363, -0.12316251, + 0.07509718, 0.08392500, 0.06825337, 0.13981669, 0.08239168, 0.11931328, 0.06770951, 0.09240761, + -0.19074154, 0.55260652, -0.20774621, 0.53718704, -0.19888818, 0.55371630, -0.19559640, 0.54754448, + 0.09983939, 0.10603377, 0.07915881, 0.10416336, 0.08655046, 0.12505992, 0.07738422, 0.09509270, + -0.18571433, 0.55095005, -0.18914750, 0.52921104, -0.19315663, 0.53234470, -0.19601485, 0.56322992, + }; + + test_case.add_input(input); + test_case.add_input(mask_index); + test_case.add_expected_output(output); + test_case.add_expected_output(present); + test_case.run_with_tolerance_as_fp(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_3) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_mask_index_3.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.33093750, 0.39181390, 0.14586255, 0.39709702, 0.98086524, 0.03891133, 0.72234219, 0.21966648, + 0.79986620, 0.97251678, 0.04131543, 0.43971965, 0.50185394, 0.11452501, 0.88111717, 0.76076663, + 0.31870860, 0.54107893, 0.91756296, 0.58112669, 0.99117357, 0.00256292, 0.58885485, 0.93481058, + }; + std::vector mask = { + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 1, + }; + std::vector output = { + 0.07551830, 0.10666487, -0.19357042, 0.53683108, 0.07551410, 0.10666656, -0.19356072, 0.53684169, + 0.07552745, 0.10666100, -0.19358172, 0.53682435, 0.07552218, 0.10666317, -0.19358677, 0.53681952, + 0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713, + 0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713, + }; + std::vector present = { + -0.59174627, -0.27471560, -0.58307797, -0.25967693, -0.60766846, -0.31754097, -0.61241394, -0.26291698, + 0.09206123, -0.05307099, 0.12491645, -0.03853742, 0.08732655, -0.13050151, 0.04073093, -0.10792807, + -0.60556883, -0.34055573, -0.60474855, -0.28785610, -0.60757709, -0.32514900, -0.58872569, -0.37967020, + 0.09779400, -0.13136166, 0.07915612, -0.10649752, 0.11043755, -0.15124020, 0.16626491, -0.11274654, + 0.07639833, 0.11762549, 0.09370039, 0.09133558, 0.05661478, 0.11096847, 0.04019671, 0.10117501, + -0.19371650, 0.52530587, -0.18429738, 0.55240726, -0.20283231, 0.53265429, -0.20036045, 0.50568837, + 0.06171235, 0.12687264, 0.05802051, 0.10266830, 0.06172965, 0.08967118, 0.09727416, 0.13513327, + -0.20576829, 0.53365225, -0.19832623, 0.52809310, -0.19971462, 0.55584043, -0.20121223, 0.57003713, + }; + + test_case.add_input(input); + test_case.add_input(mask); + test_case.add_expected_output(output); + test_case.add_expected_output(present); + test_case.run_with_tolerance_as_fp(1e-7); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_4) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_mask_index_4.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.23565151, 0.58627969, 0.75137484, 0.68586946, 0.62750375, 0.13284931, 0.13347220, 0.36357051, + 0.56910241, 0.48275986, 0.49440190, 0.45483324, 0.63547862, 0.97893149, 0.40630588, 0.38783622, + 0.07172249, 0.46385381, 0.99764502, 0.22219376, 0.67735291, 0.40799847, 0.74337566, 0.87263006, + }; + std::vector mask = { + 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, + }; + std::vector output = { + 0.07771622, 0.10724538, -0.19453585, 0.54342043, 0.07459468, 0.10934003, -0.19561143, 0.53936625, + 0.07927690, 0.10619678, -0.19399606, 0.54543519, 0.07459468, 0.10934003, -0.19561143, 0.53936625, + 0.05485561, 0.11278091, -0.20117569, 0.52096349, 0.06629646, 0.10195158, -0.19900991, 0.54654449, + 0.06491723, 0.10292297, -0.19678673, 0.53451663, 0.06549793, 0.11126325, -0.19989857, 0.53717279, + }; + std::vector present = { + -0.59188855, -0.34495637, -0.59508181, -0.25013468, -0.59176934, -0.33229247, -0.59576762, -0.29731843, + 0.14217430, -0.10403840, 0.08584045, -0.06193545, 0.12358667, -0.08588549, 0.10515238, -0.08629489, + -0.59092808, -0.28260738, -0.60047609, -0.30411413, -0.61210287, -0.28645760, -0.59391296, -0.34649473, + 0.12789863, -0.08159252, 0.08122411, -0.08866425, 0.06395009, -0.12896645, 0.14855847, -0.11978809, + 0.08783118, 0.12152332, 0.07067389, 0.09078297, 0.08385989, 0.13306075, 0.07459468, 0.10934003, + -0.19849420, 0.55928540, -0.18948570, 0.53154731, -0.19960676, 0.54237455, -0.19561143, 0.53936625, + 0.08509844, 0.08314656, 0.06388859, 0.12990499, 0.04582624, 0.09566365, 0.08674107, 0.10823163, + -0.18808734, 0.56137776, -0.20168513, 0.51830697, -0.20066255, 0.52363914, -0.19737384, 0.56921995, + }; + + test_case.add_input(input); + test_case.add_input(mask); + test_case.add_expected_output(output); + test_case.add_expected_output(present); + test_case.run_with_tolerance_as_fp(1e-7); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { + const auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_past.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.82966000, 0.77751911, 0.08977074, 0.06076468, 0.40659550, 0.19995944, 0.55544919, 0.83971608, + 0.86254036, 0.30894691, 0.80156928, 0.83092463, 0.14506543, 0.32196075, 0.42209163, 0.24465553, + 0.93944097, 0.73528159, 0.23347616, 0.60544974, 0.93329269, 0.67604774, 0.56349903, 0.26199624, + }; + std::vector mask = { + 1, + 1, + 1, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 1, + 1, + }; + std::vector past = { + 0.92467678, 0.79873562, 0.00939191, 0.34891853, 0.35521412, 0.21872006, 0.89974332, 0.74132687, 0.73566031, + 0.75168055, 0.06773245, 0.85702997, 0.76256698, 0.51739877, 0.91567177, 0.66617578, 0.88056499, 0.08436447, + 0.54744655, 0.25466520, 0.08500137, 0.19271941, 0.86525357, 0.21717627, 0.97158766, 0.42288730, 0.09890039, + 0.01148765, 0.97024685, 0.19697112, 0.67671591, 0.67960924, 0.46656516, 0.30850092, 0.73536104, 0.73938161, + 0.91650903, 0.57628596, 0.51164514, 0.11695814, 0.79792547, 0.97192264, 0.29246020, 0.41030061, 0.19014873, + 0.90233624, 0.84986305, 0.26141909, 0.84528726, 0.81416380, 0.00429944, 0.31476986, 0.00440918, 0.77413058, + 0.13409913, 0.20965169, 0.61764991, 0.55266041, 0.56107825, 0.42051074, 0.16804738, 0.80362344, 0.52392679, + 0.27550557, 0.66738850, 0.39348483, 0.31801429, 0.30325863, 0.37068403, 0.92767614, 0.60799408, 0.01458820, + 0.24194679, 0.59596598, 0.81762302, 0.38094005, 0.16618672, 0.92488551, 0.84298438, 0.21752745, + }; + std::vector output = { + 0.26186451, 0.45950246, -0.04001215, 0.47680017, 0.26333901, 0.46158865, -0.04006424, 0.47588652, + 0.26875457, 0.47031689, -0.03951600, 0.47674999, 0.26851410, 0.46987134, -0.03919901, 0.47629333, + 0.18083976, 0.16579385, -0.05161894, 0.63075018, 0.18228555, 0.16642828, -0.04873618, 0.63316816, + 0.18362364, 0.16702136, -0.05045432, 0.63178891, 0.18000112, 0.16541445, -0.05139139, 0.63105792, + }; + std::vector present = { + 0.92467678, 0.79873562, 0.00939191, 0.34891853, 0.35521412, 0.21872006, 0.89974332, 0.74132687, + 0.73566031, 0.75168055, -0.59527576, -0.23625080, -0.58657664, -0.29827437, -0.59528387, -0.33578828, + -0.59068960, -0.34870598, 0.06773245, 0.85702997, 0.76256698, 0.51739877, 0.91567177, 0.66617578, + 0.88056499, 0.08436447, 0.54744655, 0.25466520, 0.08536442, -0.06134639, 0.11295843, -0.04818217, + 0.14562836, -0.12305059, 0.15695867, -0.11161390, 0.08500137, 0.19271941, 0.86525357, 0.21717627, + 0.97158766, 0.42288730, 0.09890039, 0.01148765, 0.97024685, 0.19697112, -0.59141791, -0.31600696, + -0.58647990, -0.34302223, -0.59306550, -0.36427227, -0.59695083, -0.26431620, 0.67671591, 0.67960924, + 0.46656516, 0.30850092, 0.73536104, 0.73938161, 0.91650903, 0.57628596, 0.51164514, 0.11695814, + 0.11255538, -0.07302766, 0.16620418, -0.09871224, 0.15272795, -0.12076923, 0.08827571, -0.07442430, + 0.79792547, 0.97192264, 0.29246020, 0.41030061, 0.19014873, 0.90233624, 0.84986305, 0.26141909, + 0.84528726, 0.81416380, 0.07014155, 0.07749540, 0.08745074, 0.13131952, 0.08430066, 0.09709007, + 0.09247591, 0.11065811, 0.00429944, 0.31476986, 0.00440918, 0.77413058, 0.13409913, 0.20965169, + 0.61764991, 0.55266041, 0.56107825, 0.42051074, -0.18658412, 0.53568852, -0.19482780, 0.53271860, + -0.19558203, 0.57155901, -0.19633618, 0.57260245, 0.16804738, 0.80362344, 0.52392679, 0.27550557, + 0.66738850, 0.39348483, 0.31801429, 0.30325863, 0.37068403, 0.92767614, 0.08172131, 0.13249113, + 0.09947956, 0.10781212, 0.08890627, 0.12280971, 0.06911418, 0.09499176, 0.60799408, 0.01458820, + 0.24194679, 0.59596598, 0.81762302, 0.38094005, 0.16618672, 0.92488551, 0.84298438, 0.21752745, + -0.19839945, 0.53462923, -0.19349247, 0.57778782, -0.20039621, 0.56689924, -0.19190890, 0.53286803, + }; + + test_case.add_input(input); + test_case.add_input(mask); + test_case.add_input(past); + test_case.add_expected_output(output); + test_case.add_expected_output(present); + test_case.run_with_tolerance_as_fp(1e-6); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_extra_add.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.14930259, 0.11199699, 0.81292826, 0.08368169, 0.05704883, 0.41276145, 0.38760167, 0.00146112, + 0.14275745, 0.54254925, 0.07962929, 0.31023681, 0.09597706, 0.60583973, 0.90233743, 0.33360451, + 0.18193199, 0.19159532, 0.07869831, 0.86026299, 0.20683478, 0.40150928, 0.93124926, 0.31805834, + }; + std::vector mask = { + 0, + 0, + 1, + 0, + 1, + 1, + 1, + 0, + }; + std::vector extra_add = { + 0.73230380, 0.61824518, 0.19738488, 0.57034588, 0.22331032, 0.53262889, 0.60098642, 0.72943515, + 0.09009175, 0.81116527, 0.47240964, 0.49679127, 0.41110733, 0.29418564, 0.93818313, 0.64175284, + 0.06807775, 0.66733366, 0.78848422, 0.48788327, 0.38806340, 0.14002480, 0.72263688, 0.22772972, + 0.24000823, 0.75820386, 0.64254439, 0.19385594, 0.95595860, 0.59840417, 0.93769604, 0.62474734, + 0.36690548, 0.76047903, 0.62352085, 0.58574778, 0.64251810, 0.78072041, 0.43344691, 0.75383639, + 0.73950553, 0.92625278, 0.05066428, 0.08448382, 0.25980917, 0.50312829, 0.97800279, 0.05422170, + 0.05171391, 0.82828254, 0.42234898, 0.95752198, 0.96325767, 0.97909677, 0.35578200, 0.48091716, + 0.03637243, 0.91552693, 0.43403026, 0.94275808, 0.51182085, 0.86773109, 0.38459453, 0.87822068, + }; + std::vector output = { + 0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448, + 0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448, + 0.08714182, 0.12259886, -0.19516067, 0.54010558, 0.08671370, 0.12369543, -0.19658084, 0.54502594, + 0.08458151, 0.12488046, -0.19519810, 0.53906947, 0.09063499, 0.12088943, -0.19583938, 0.54266596, + }; + std::vector present = { + -0.59800303, -0.35666457, -0.59420627, -0.31881350, -0.59887993, -0.27025288, -0.60216135, -0.27772796, + 0.11659990, -0.11224300, 0.09693416, -0.07304113, 0.06023501, -0.05941332, 0.06434284, -0.07978789, + -0.59005713, -0.37009716, -0.59542215, -0.27914333, -0.57998544, -0.29826957, -0.58625919, -0.28872511, + 0.15994480, -0.11288825, 0.07906821, -0.05991337, 0.14479136, -0.04415035, 0.13493451, -0.06541853, + 0.07513385, 0.14411135, 0.07505661, 0.14532046, 0.06090815, 0.12919067, 0.05788904, 0.12018456, + -0.20586906, 0.53715372, -0.20203318, 0.52092510, -0.19883196, 0.50295448, -0.19937295, 0.51055026, + 0.09417956, 0.12943678, 0.06923291, 0.12574309, 0.10221909, 0.11366953, 0.09235901, 0.09584601, + -0.20036517, 0.56818324, -0.19709785, 0.51547027, -0.18871340, 0.55736589, -0.18826833, 0.55965197, + }; + + test_case.add_input(input); + test_case.add_input(mask); + test_case.add_input(extra_add); + test_case.add_expected_output(output); + test_case.add_expected_output(present); + test_case.run_with_tolerance_as_fp(1e-7); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { + const auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_dynamic_shapes.onnx")); + auto test_case = test::TestCase(function); + + std::vector input = { + 0.42226878, 0.50984067, 0.80440795, 0.68040705, 0.93614250, 0.45104721, 0.71767306, 0.48596525, + 0.70076728, 0.04500086, 0.28930107, 0.77435863, 0.19392140, 0.90290719, 0.91955870, 0.58811885, + 0.76795286, 0.62884814, 0.23377730, 0.49212688, 0.87256873, 0.11944817, 0.57715887, 0.91886938, + }; + std::vector weights = { + 0.99377930, 0.22733542, 0.43217131, 0.60717988, 0.97224706, 0.70020503, 0.92439449, 0.41512674, 0.47728160, + 0.40306625, 0.72619593, 0.37954643, 0.36950976, 0.84305370, 0.61671126, 0.22251014, 0.73839295, 0.73471880, + 0.37428924, 0.80240524, 0.23120961, 0.06072779, 0.92840081, 0.71558088, 0.08719950, 0.51666921, 0.53768843, + 0.48113129, 0.46389169, 0.01036468, 0.37341005, 0.67195475, 0.53599644, 0.41795707, 0.58081782, 0.97939289, + }; + std::vector bias = { + 0.77122736, + 0.75600564, + 0.86177206, + 0.69982684, + 0.74719858, + 0.78054035, + 0.80007398, + 0.74902135, + 0.81258053, + 0.01575289, + 0.08463049, + 0.39671996, + }; + std::vector mask = { + 0, + 1, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + }; + std::vector past = { + 0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437, + 0.91846281, 0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829, + 0.89891797, 0.27753425, 0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194, + 0.62769043, 0.61990744, 0.59077013, 0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718, + 0.80831683, 0.29390740, 0.29051417, 0.51964313, 0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689, + 0.98571628, 0.07566493, 0.37537411, 0.42080343, 0.21715857, 0.57869565, 0.55962265, 0.82500041, 0.60776925, + 0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676, 0.98562658, 0.64355153, 0.69856495, 0.30377558, + 0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584, 0.94460547, 0.90907097, 0.22525074, + 0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738, 0.22606593, + }; + std::vector output = { + 1.47439122, 0.50951630, 1.17974961, 1.58501005, 1.49403512, 0.51560062, 1.18972027, 1.59668207, + 1.48384988, 0.51248586, 1.18596375, 1.59219086, 1.44181466, 0.50219649, 1.15537691, 1.55348074, + 0.83429223, 0.59521818, 0.87688094, 0.13611843, 0.82936716, 0.61004817, 0.87633312, 0.13887596, + 0.83155584, 0.59382534, 0.87496555, 0.14041223, 0.83309680, 0.58982348, 0.87517864, 0.13930768, + }; + std::vector present = { + 0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437, + 0.91846281, 1.90736914, 1.45914197, 2.30920029, 1.94944119, 2.12886763, 1.64736962, 1.36378694, 1.03263116, + 0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829, 0.89891797, + 0.27753425, 1.68161881, 1.87394094, 1.94785213, 2.08572555, 1.90705216, 1.90777159, 1.23910809, 1.52017307, + 0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194, 0.62769043, 0.61990744, + 0.59077013, 2.02901411, 1.58923888, 2.17776394, 1.76309133, 1.74264824, 1.31485105, 1.71575761, 1.29775190, + 0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718, 0.80831683, 0.29390740, 0.29051417, + 0.51964313, 1.66065478, 2.17192268, 1.86598253, 2.03193212, 1.52620018, 1.82728052, 1.46963060, 1.87916136, + 0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689, 0.98571628, 0.07566493, 0.37537411, 0.42080343, + 0.21715857, 1.56316149, 0.55312467, 1.59553123, 0.53537023, 1.64308119, 0.62742490, 1.31600118, 0.37510848, + 0.57869565, 0.55962265, 0.82500041, 0.60776925, 0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676, + 0.98562658, 1.33183134, 1.70965421, 1.70983100, 1.76660407, 1.46399045, 1.70318413, 0.83565855, 1.37921953, + 0.64355153, 0.69856495, 0.30377558, 0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584, + 0.94460547, 1.60677671, 0.53308368, 1.60789728, 0.56227136, 1.50563633, 0.50456268, 1.49554634, 0.48299593, + 0.90907097, 0.22525074, 0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738, + 0.22606593, 1.59781134, 2.01703453, 1.58993423, 1.78536010, 1.21809304, 1.69219351, 1.24090374, 1.75499403, + }; + + test_case.add_input(Shape{2, 4, 3}, input); + test_case.add_input(Shape{3, 12}, weights); + test_case.add_input(Shape{12}, bias); + test_case.add_input(Shape{2, 9}, mask); + test_case.add_input(Shape{2, 2, 2, 5, 2}, past); + test_case.add_expected_output(Shape{2, 4, 4}, output); + test_case.add_expected_output(Shape{2, 2, 2, 9, 2}, present); + test_case.run_with_tolerance_as_fp(1e-6); +} diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index 5942e64e2563db..c818f7c00eae4a 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -1564,3 +1564,5 @@ IE_CPU.onnx_model_gather_float_2D_neg_indices onnx_model_skip_layer_normalization_dynamic_shapes # Doesn't support op with dynamic shapes onnx_model_embed_layer_normalization_dynamic_shapes +# CPU plug-in doesn't support operation with dynamic rank +onnx_model_attention_dynamic_shapes