From c1ee5d2b68f94c5a3fa812df43c056ce90633f70 Mon Sep 17 00:00:00 2001
From: Mateusz Tabaka <mateusz.tabaka@intel.com>
Date: Fri, 12 Nov 2021 15:35:46 +0100
Subject: [PATCH] Add support for ONNX operator com.microsoft.Attention (#8008)

Ticket: 62890
---
 .../src/op/com.microsoft/attention.cpp        | 548 ++++++++++++++++++
 .../src/op/com.microsoft/attention.hpp        |  17 +
 .../frontend/onnx/frontend/src/ops_bridge.cpp |   2 +
 .../onnx/com.microsoft/attention.prototxt     | 123 ++++
 .../attention_dynamic_shapes.prototxt         |  90 +++
 .../attention_extra_add.prototxt              | 190 ++++++
 .../attention_mask_index_1.prototxt           | 163 ++++++
 .../attention_mask_index_2.prototxt           | 168 ++++++
 .../attention_mask_index_3.prototxt           | 166 ++++++
 .../attention_mask_index_4.prototxt           | 169 ++++++
 .../com.microsoft/attention_past.prototxt     | 193 ++++++
 .../attention_qkv_hidden_sizes.prototxt       | 130 +++++
 .../attention_unidirectional.prototxt         | 154 +++++
 .../{ => com.microsoft}/bias_gelu.prototxt    |   0
 .../embed_layer_normalization.prototxt        |   0
 ...ayer_normalization_dynamic_shapes.prototxt |   0
 ...malization_with_segment_embedding.prototxt |   0
 ...n_with_segment_embedding_and_mask.prototxt |   0
 ...yer_normalization_dynamic_shapes.prototxt} |   0
 ...ip_layer_normalization_with_gamma.prototxt |   0
 ...yer_normalization_with_gamma_beta.prototxt |   0
 ...ormalization_with_gamma_beta_bias.prototxt |   0
 .../onnx/onnx_import_com_microsoft.in.cpp     | 493 +++++++++++++++-
 ngraph/test/runtime/ie/unit_test.manifest     |   2 +
 24 files changed, 2595 insertions(+), 13 deletions(-)
 create mode 100644 ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.cpp
 create mode 100644 ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.hpp
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_dynamic_shapes.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_extra_add.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_mask_index_1.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_mask_index_2.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_mask_index_3.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_mask_index_4.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_past.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_qkv_hidden_sizes.prototxt
 create mode 100644 ngraph/test/models/onnx/com.microsoft/attention_unidirectional.prototxt
 rename ngraph/test/models/onnx/{ => com.microsoft}/bias_gelu.prototxt (100%)
 rename ngraph/test/models/onnx/{ => com.microsoft}/embed_layer_normalization.prototxt (100%)
 rename ngraph/test/models/onnx/{dynamic_shapes => com.microsoft}/embed_layer_normalization_dynamic_shapes.prototxt (100%)
 rename ngraph/test/models/onnx/{ => com.microsoft}/embed_layer_normalization_with_segment_embedding.prototxt (100%)
 rename ngraph/test/models/onnx/{ => com.microsoft}/embed_layer_normalization_with_segment_embedding_and_mask.prototxt (100%)
 rename ngraph/test/models/onnx/{dynamic_shapes/skip_layer_normalization.prototxt => com.microsoft/skip_layer_normalization_dynamic_shapes.prototxt} (100%)
 rename ngraph/test/models/onnx/{ => com.microsoft}/skip_layer_normalization_with_gamma.prototxt (100%)
 rename ngraph/test/models/onnx/{ => com.microsoft}/skip_layer_normalization_with_gamma_beta.prototxt (100%)
 rename ngraph/test/models/onnx/{ => com.microsoft}/skip_layer_normalization_with_gamma_beta_bias.prototxt (100%)

diff --git a/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.cpp b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.cpp
new file mode 100644
index 00000000000000..f874f96bb3cf3e
--- /dev/null
+++ b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.cpp
@@ -0,0 +1,548 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "op/com.microsoft/attention.hpp"
+
+#include "default_opset.hpp"
+#include "ngraph/builder/split.hpp"
+#include "onnx_import/core/null_node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace detail {
+namespace {
+NodeVector split_to_QKV(const std::shared_ptr<default_opset::Add>& node,
+                        int64_t num_heads,
+                        const std::vector<size_t>& qkv_hidden_sizes);
+
+using NodeTuple = std::tuple<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>>;
+
+NodeTuple get_attention_mask(const OutputVector& op_inputs, bool unidirectional);
+
+std::shared_ptr<ngraph::Node> attention_softmax(const OutputVector& op_inputs,
+                                                const std::shared_ptr<ngraph::Node>& Q,
+                                                std::shared_ptr<ngraph::Node> K,
+                                                std::shared_ptr<ngraph::Node> V,
+                                                const std::shared_ptr<ngraph::Node>& attention_mask,
+                                                const std::shared_ptr<ngraph::Node>& bin_mask,
+                                                const std::shared_ptr<ngraph::Node>& head_size,
+                                                bool unidirectional);
+
+std::shared_ptr<ngraph::Node> get_present_state(const std::shared_ptr<ngraph::Node>& K,
+                                                const std::shared_ptr<ngraph::Node>& V,
+                                                const OutputVector& op_inputs);
+}  // namespace
+}  // namespace detail
+
+namespace set_1 {
+OutputVector attention(const Node& node) {
+    auto nodes = node.get_ng_inputs();
+    const auto& input = nodes[0];
+    const auto& weights = nodes[1];
+    const auto& bias = nodes[2];
+
+    // Attention is defined as:
+    // Q = input x Wq, K = input x Wk, V = input x Wv
+    // attention = softmax((Q x K') / sqrt(head_size)) x V
+    //
+    // In this operator, Wq, Wk and Wv are combined in a single input 'weights' along the second axis.
+    // So the approach here is to do a single big matrix multiply
+    // and then split the result into Q, K, V matrices
+
+    auto matmul = std::make_shared<default_opset::MatMul>(input, weights);
+    auto add = std::make_shared<default_opset::Add>(matmul, bias);
+
+    const auto num_heads = node.get_attribute_value<int64_t>("num_heads");
+    const auto qkv_hidden_sizes = node.get_attribute_value<std::vector<size_t>>("qkv_hidden_sizes", {});
+    const auto split_result = detail::split_to_QKV(add, num_heads, qkv_hidden_sizes);
+
+    bool unidirectional = static_cast<bool>(node.get_attribute_value<int64_t>("unidirectional", 0));
+    // mask has values either 0 or -10000 and its shape must be
+    // broadcastable to (batch_size, num_heads, sequence_length, past_sequence_length + sequence_length)
+    // so it can be added to Q x K' later
+    // past_sequence_length can be 0 if 'past' input is not available
+    std::shared_ptr<ngraph::Node> attention_mask = nullptr, bin_mask = nullptr;
+    std::tie(attention_mask, bin_mask) = detail::get_attention_mask(nodes, unidirectional);
+
+    const auto& Q = split_result[0];
+    const auto& K = split_result[1];
+    const auto& V = split_result[2];
+    const auto& head_size = split_result[3];
+
+    // compute softmax((Q x K' + mask) / sqrt(head_size))
+    const auto output = detail::attention_softmax(nodes, Q, K, V, attention_mask, bin_mask, head_size, unidirectional);
+
+    // present = concat(K, V) if 'past' input is unavailable
+    // or
+    // present = concat(past, K, V)
+    const auto present = detail::get_present_state(K, V, nodes);
+
+    return {output, present};
+}
+}  // namespace set_1
+
+namespace detail {
+namespace {
+
+std::shared_ptr<ngraph::Node> get_dimensions(const std::shared_ptr<default_opset::ShapeOf>& shape,
+                                             const std::vector<int>& dims) {
+    static const auto zero = default_opset::Constant::create(element::i32, Shape{}, {0});
+    const auto dims_const = default_opset::Constant::create(element::i32, Shape{dims.size()}, dims);
+    return std::make_shared<default_opset::Gather>(shape, dims_const, zero);
+}
+
+std::shared_ptr<ngraph::Node> get_dimensions(const std::shared_ptr<ngraph::Node>& node, const std::vector<int>& dims) {
+    return get_dimensions(std::make_shared<default_opset::ShapeOf>(node), dims);
+}
+
+std::shared_ptr<ngraph::Node> get_hidden_size(const std::shared_ptr<default_opset::ShapeOf>& node_shape) {
+    // node has shape (batch_size, sequence_length, 3 * hidden_size)
+    const auto zero = default_opset::Constant::create(element::i32, Shape{}, {0});
+    const auto hidden_size_x3 = get_dimensions(node_shape, {2});
+    const auto three = default_opset::Constant::create(element::i64, Shape{}, {3});
+    const auto hidden_size = std::make_shared<default_opset::Divide>(hidden_size_x3, three);
+    return hidden_size;
+}
+
+NodeVector split_to_QKV(const std::shared_ptr<default_opset::Add>& node,
+                        int64_t num_heads,
+                        const std::vector<size_t>& qkv_hidden_sizes) {
+    OutputVector split;
+    std::shared_ptr<ngraph::Node> head_size = nullptr;
+    const auto& node_type = node->get_element_type();
+    const auto node_shape = std::make_shared<default_opset::ShapeOf>(node);
+    // node has shape (batch_size, sequence_length, 3 * hidden_size)
+    // fetch the first two dimensions
+    const auto batch_size_seq_len = get_dimensions(node_shape, {0, 1});
+    const auto num_heads_node = default_opset::Constant::create(element::i64, Shape{1}, {num_heads});
+    if (qkv_hidden_sizes.size() == 0) {
+        const auto hidden_size = get_hidden_size(node_shape);
+        // head_size = hidden_size / num_heads
+        head_size = std::make_shared<default_opset::Divide>(hidden_size, num_heads_node);
+        // split the node into 3 even parts Q, K, V with shape (batch_size, sequence_len, hidden_size)
+        split = ngraph::builder::opset1::split(node, 3, 2);
+        // and reshape each part to new shape (batch_size, sequence_len, num_heads, head_size)
+        auto new_shape =
+            std::make_shared<default_opset::Concat>(NodeVector{batch_size_seq_len, num_heads_node, head_size}, 0);
+        for (size_t i = 0; i < split.size(); i++) {
+            split[i] = std::make_shared<default_opset::Reshape>(split[i], new_shape, false);
+        }
+        head_size = std::make_shared<default_opset::Convert>(head_size, node_type);
+    } else {
+        // in this case, weights have shape
+        // (input_hidden_size, qkv_hidden_sizes[0] + qkv_hidden_sizes[1] + qkv_hidden_sizes[2])
+        // so user specified hidden_sizes for Q, K and V
+        NGRAPH_CHECK(qkv_hidden_sizes.size() == 3, "qkv_hidden_sizes attribute needs to have 3 values");
+        NGRAPH_CHECK(qkv_hidden_sizes[0] == qkv_hidden_sizes[1],
+                     "qkv_hidden_sizes first element should be same as the second");
+        // split the node into 3 parts Q, K, V with shapes
+        // Q: (batch_size, sequence_len, qkv_hidden_sizes[0])
+        // K: (batch_size, sequence_len, qkv_hidden_sizes[1])
+        // V: (batch_size, sequence_len, qkv_hidden_sizes[2])
+        split = ngraph::builder::opset1::split(node, qkv_hidden_sizes, 2);
+        // and reshape each part to new shape (batch_size, sequence_len, num_heads, head_size)
+        for (size_t i = 0; i < split.size(); i++) {
+            auto new_shape = std::make_shared<default_opset::Concat>(
+                NodeVector{batch_size_seq_len,
+                           num_heads_node,
+                           default_opset::Constant::create(element::i64, Shape{1}, {qkv_hidden_sizes[i] / num_heads})},
+                0);
+            split[i] = std::make_shared<default_opset::Reshape>(split[i], new_shape, false);
+        }
+        float head_size_val = qkv_hidden_sizes[0] > 0 ? static_cast<float>(qkv_hidden_sizes[0]) / num_heads
+                                                      : static_cast<float>(qkv_hidden_sizes[2]) / num_heads;
+        head_size = default_opset::Constant::create(node_type, Shape{1}, {head_size_val});
+    }
+
+    // transpose Q, K and V to (batch_size, num_heads, sequence_len, head_size)
+    auto perm = default_opset::Constant::create(element::i64, Shape{4}, {0, 2, 1, 3});
+    auto Q = std::make_shared<default_opset::Transpose>(split[0], perm);
+    auto K = std::make_shared<default_opset::Transpose>(split[1], perm);
+    auto V = std::make_shared<default_opset::Transpose>(split[2], perm);
+
+    return {Q, K, V, head_size};
+}
+
+// This function handles the case when mask_index rank is 1 - so its shape is (batch_size) or (2 * batch_size).
+// The returned mask consists of 0 and -10000 and has shape (batch_size, 1, 1, all_seq_len). 'mask_index' input contains
+// positions from where the -10000 values start appearing in the final mask per batch (if shape is (batch_size)) or if
+// shape is (2 * batch_size), user can define two ranges of -10000 values appearing in the final mask. For example:
+//
+// batch_size = 3, all_seq_len = 5, mask_index = [2, 4, 3]
+// the function returns following mask with shape (3, 1, 1, 5):
+// 0,  0,  -10000,  -10000,  -10000
+// 0,  0,       0,       0,  -10000
+// 0,  0,       0,  -10000,  -10000
+//
+// e.g., for batch = 2, -10000 values appear within range [mask_index[2]:5] (or [3:5])
+//
+// Another example, but with mask_index shape (2 * batch_size)
+// batch_size = 3, all_seq_len = 5, mask_index = [2, 4, 3, 1, 2, 2]
+// the function returns following mask with shape (3, 1, 1, 5):
+// -10000,       0,  -10000,  -10000,  -10000
+// -10000,  -10000,       0,       0,  -10000
+// -10000,  -10000,       0,  -10000,  -10000
+//
+// e.g., for batch = 1, -10000 values appear within two ranges [0, mask_index[4]] and [mask_index[1]:5] (or [0:2],[4:5])
+//
+//
+// This is how it's done with nGraph operations:
+//
+//  First the 'base' is generated by range + broadcast:
+//     base = range(0, all_seq_len)
+//     base = broadcast(base, shape=(batch_size, all_seq_len))
+//
+//  With batch_size = 3 and all_seq_len = 5, 'base' looks as follows:
+//       [[0, 1, 2, 3, 4],
+//        [0, 1, 2, 3, 4],
+//        [0, 1, 2, 3, 4]]
+//
+//  Next step is to reshape mask_index:
+//     mask_index = reshape(mask_index, shape=(-1, batch_size))
+//
+//  With the second example above (mask_index = [2, 4, 3, 1, 2, 2]), now it looks like:
+//     mask_index = [[2, 4, 3],
+//                 [1, 2, 2]]
+//
+//  Now we get the first row and reshape it to (batch_size, 1) to have indices laid out in column:
+//     tail_range_indices = gather(mask_index, indices=[0], axis=0)  # tail_range_indices = [2, 4, 3]
+//     tail_range_indices = reshape(tail_range_indices, shape=(batch_size, 1)
+//     # tail_range_indices = [[2],
+//     #                       [4],
+//     #                       [3]]
+//
+//  Then the base is compared with the indices
+//     tail_range_mask = base >= tail_range_indices
+//
+//  Thanks to autobroadcast in elementwise operators, the comparison conceptually happens between:
+//       [[0, 1, 2, 3, 4],      [[2, 2, 2, 2, 2],
+//        [0, 1, 2, 3, 4],  >=   [4, 4, 4, 4, 4],
+//        [0, 1, 2, 3, 4]]       [3, 3, 3, 3, 3]]
+//
+//   and the result is:
+//               [[0, 0, 1, 1, 1],
+//                [0, 0, 0, 0, 1],
+//                [0, 0, 0, 1, 1]]
+//
+// So we get the final tail range mask by multiplying this by -10000
+//
+// Similarly we process with head range - we fetch the second row from reshaped mask_index,
+// compare it with 'base' (but with 'Less' operator instead of 'GreaterEqual') and combine it
+// with tail_range_mask.
+//
+// Handling both mask_index variants (so (batch_size) and (2 * batch_size)) is tricky since we don't
+// know its dimensions upfront. So we compute both variants and use Select operator to select
+// the right one in the runtime (unless it gets constantfolded before).
+std::shared_ptr<ngraph::Node> attention_mask_from_indices(const Output<ngraph::Node>& mask_index,
+                                                          const element::Type_t& type,
+                                                          const std::shared_ptr<ngraph::Node>& batch_size,
+                                                          const std::shared_ptr<ngraph::Node>& all_seq_len) {
+    const auto zero = default_opset::Constant::create(element::i64, Shape{}, {0});
+    const auto one = default_opset::Constant::create(element::i64, Shape{}, {1});
+    const auto stop = std::make_shared<default_opset::Squeeze>(all_seq_len, zero);
+    std::shared_ptr<ngraph::Node> base =
+        std::make_shared<default_opset::Range>(zero, stop, one, mask_index.get_element_type());
+    const auto target_shape = std::make_shared<default_opset::Concat>(NodeVector{batch_size, all_seq_len}, 0);
+    // broadcast 'base' to (batch_size, all_seq_len)
+    base = std::make_shared<default_opset::Broadcast>(base, target_shape);
+    const auto indices_shape = std::make_shared<default_opset::Concat>(
+        NodeVector{default_opset::Constant::create(element::i64, Shape{1}, {-1}), batch_size},
+        0);
+    std::shared_ptr<ngraph::Node> indices = std::make_shared<default_opset::Reshape>(mask_index, indices_shape, false);
+    // fetch first row from indices
+    std::shared_ptr<ngraph::Node> tail_range_indices = std::make_shared<default_opset::Gather>(indices, zero, zero);
+    tail_range_indices =
+        std::make_shared<default_opset::Reshape>(tail_range_indices,
+                                                 default_opset::Constant::create(element::i32, Shape{2}, {-1, 1}),
+                                                 false);
+    const auto greater_eq = std::make_shared<default_opset::GreaterEqual>(base, tail_range_indices);
+    std::shared_ptr<ngraph::Node> tail_range_mask =
+        std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Convert>(greater_eq, type),
+                                                  default_opset::Constant::create(type, Shape{}, {-10000}));
+    tail_range_mask =
+        std::make_shared<default_opset::Unsqueeze>(tail_range_mask,
+                                                   default_opset::Constant::create(element::i64, Shape{2}, {1, 2}));
+
+    const auto gather_index =
+        std::make_shared<default_opset::FloorMod>(default_opset::Constant::create(element::i64, Shape{}, {1}),
+                                                  get_dimensions(indices, {0}));
+    // fetch indices from the second row (or first if not available)
+    std::shared_ptr<ngraph::Node> head_range_indices =
+        std::make_shared<default_opset::Gather>(indices, gather_index, zero);
+    head_range_indices =
+        std::make_shared<default_opset::Reshape>(head_range_indices,
+                                                 default_opset::Constant::create(element::i32, Shape{2}, {-1, 1}),
+                                                 false);
+    const auto less = std::make_shared<default_opset::Less>(base, head_range_indices);
+    std::shared_ptr<ngraph::Node> mask = std::make_shared<default_opset::LogicalOr>(less, greater_eq);
+    mask = std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Convert>(mask, type),
+                                                     default_opset::Constant::create(type, Shape{}, {-10000}));
+    // reshape from (batch_size, all_seq_len) to (batch_size, 1, 1, all_seq_len)
+    mask = std::make_shared<default_opset::Unsqueeze>(mask,
+                                                      default_opset::Constant::create(element::i64, Shape{2}, {1, 2}));
+
+    const auto mask_index_first_dim = get_dimensions(mask_index.get_node_shared_ptr(), {0});
+    // compare mask_index.shape[0] with batch_size value
+    // if they're equal - select tail_range_mask
+    // else select full mask
+    mask = std::make_shared<default_opset::Select>(
+        std::make_shared<default_opset::Equal>(batch_size, mask_index_first_dim),
+        tail_range_mask,
+        mask);
+
+    return mask;
+}
+
+// Prepare unidirectional_mask like it's done in
+// https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_helper.h#L87-L96
+//
+// Function returns two masks - one attention mask with values 0 or -10000 with shape (seq_len, all_seq_len),
+// the second one is a binary mask where it has 0 on positions where attention mask has -10000 values and 1 otherwise.
+//
+// For example:
+// seq_len = 4, all_seq_len = 7, past_seq_len = 3. Returned attention mask has shape (4, 7) and contains:
+// 0  0  0  0  -10000  -10000  -10000
+// 0  0  0  0       0  -10000  -10000
+// 0  0  0  0       0       0  -10000
+// 0  0  0  0       0       0       0
+//
+// Returned binary mask has the shape (4, 7) and following values:
+// 1  1  1  1  0  0  0
+// 1  1  1  1  1  0  0
+// 1  1  1  1  1  1  0
+// 1  1  1  1  1  1  1
+//
+// Binary mask is used later before softmax to achieve
+// https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h#L158-L166
+//
+// The approach used to generate those masks is similar to one from attention_mask_from_indices function (see comments
+// there).
+NodeTuple unidirectional_mask(const element::Type_t& type,
+                              const std::shared_ptr<ngraph::Node>& seq_len,
+                              const std::shared_ptr<ngraph::Node>& all_seq_len,
+                              const std::shared_ptr<ngraph::Node>& past_seq_len) {
+    const auto zero = default_opset::Constant::create(element::i64, Shape{}, {0});
+    const auto one = default_opset::Constant::create(element::i64, Shape{}, {1});
+    const auto stop = std::make_shared<default_opset::Squeeze>(all_seq_len, zero);
+    std::shared_ptr<ngraph::Node> bin_mask = std::make_shared<default_opset::Range>(zero, stop, one, element::i32);
+    auto target_shape = std::make_shared<default_opset::Concat>(NodeVector{seq_len, all_seq_len}, 0);
+    bin_mask = std::make_shared<default_opset::Broadcast>(bin_mask, target_shape);
+    auto start =
+        std::make_shared<default_opset::Squeeze>(std::make_shared<default_opset::Add>(past_seq_len, one), zero);
+    auto end = std::make_shared<default_opset::Squeeze>(std::make_shared<default_opset::Add>(all_seq_len, one), zero);
+    auto indices = std::make_shared<default_opset::Unsqueeze>(
+        std::make_shared<default_opset::Range>(start, end, one, element::i32),
+        default_opset::Constant::create(element::i32, Shape{1}, {1}));
+    bin_mask = std::make_shared<default_opset::GreaterEqual>(bin_mask, indices);
+    std::shared_ptr<ngraph::Node> attention_mask =
+        std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Convert>(bin_mask, type),
+                                                  default_opset::Constant::create(type, Shape{}, {-10000}));
+    bin_mask = std::make_shared<default_opset::Convert>(std::make_shared<default_opset::LogicalNot>(bin_mask), type);
+    return NodeTuple{attention_mask, bin_mask};
+}
+
+// This is the easiest variant of 'mask_index' input - the input consists of 0 or 1 values
+// and we transform them to:
+// * -10000 for positions where mask_index == 0
+// * 0 for positions where mask_index == 1
+//
+// It handles mask_index with shapes:
+// (batch_size, past_sequence_length + sequence_length) or
+// (batch_size, sequence_length, past_sequence_length + sequence_length)
+//
+// Shape (batch_size, 1, max_sequence_length, max_sequence_length) is not supported in onnxruntime:
+// https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_helper.h#L78
+std::shared_ptr<ngraph::Node> raw_mask(const Output<ngraph::Node>& mask_index,
+                                       Dimension::value_type mask_rank,
+                                       const element::Type_t& type) {
+    std::shared_ptr<ngraph::Node> mask = std::make_shared<default_opset::Convert>(mask_index, type);
+    mask = std::make_shared<default_opset::Convert>(mask, type);
+    mask = std::make_shared<default_opset::Subtract>(default_opset::Constant::create(type, Shape{}, {1}), mask);
+    mask = std::make_shared<default_opset::Multiply>(mask, default_opset::Constant::create(type, Shape{}, {-10000}));
+    switch (mask_rank) {
+    // Handle mask_index with (batch_size, past_sequence_length + sequence_length) shape
+    // Reshape it to (batch_size, 1, 1, past_sequence_length + sequence_length)
+    case 2:
+        mask = std::make_shared<default_opset::Reshape>(
+            mask,
+            default_opset::Constant::create(element::i64, Shape{4}, {0, 1, 1, -1}),
+            true);
+        break;
+    // Handle mask_index with (batch_size, sequence_length, past_sequence_length + sequence_length) shape
+    // Reshape it to (batch_size, 1, sequence_length, past_sequence_length + sequence_length)
+    case 3:
+        mask = std::make_shared<default_opset::Reshape>(
+            mask,
+            default_opset::Constant::create(element::i64, Shape{4}, {0, 1, 0, -1}),
+            true);
+        break;
+    }
+    return mask;
+}
+
+bool is_past_input_available(const OutputVector& op_inputs) {
+    return op_inputs.size() > 4 && !ngraph::op::is_null(op_inputs[4]);
+}
+
+NodeTuple get_attention_mask(const OutputVector& op_inputs, bool unidirectional) {
+    const auto zero = default_opset::Constant::create(element::i64, Shape{1}, {0});
+    const auto one = default_opset::Constant::create(element::i64, Shape{1}, {1});
+
+    std::shared_ptr<ngraph::Node> past_seq_len;
+    // get the value of past_sequence_length
+    if (is_past_input_available(op_inputs)) {
+        const auto& past = op_inputs[4];
+        // 'past' node has shape (2, batch_size, num_heads, past_sequence_length, head_size)
+        past_seq_len = get_dimensions(past.get_node_shared_ptr(), {3});
+    } else {
+        past_seq_len = zero;
+    }
+
+    // 'input' node has shape (batch_size, sequence_length, input_hidden_size)
+    auto input_shape = std::make_shared<default_opset::ShapeOf>(op_inputs[0]);
+    auto seq_len = get_dimensions(input_shape, {1});
+    auto all_seq_len = std::make_shared<default_opset::Add>(seq_len, past_seq_len);
+    const auto& type = op_inputs[0].get_element_type();
+    std::shared_ptr<ngraph::Node> attention_mask = nullptr;
+    std::shared_ptr<ngraph::Node> bin_mask = nullptr;
+    if (unidirectional) {
+        std::tie(attention_mask, bin_mask) = unidirectional_mask(type, seq_len, all_seq_len, past_seq_len);
+    }
+    if (op_inputs.size() > 3 && !ngraph::op::is_null(op_inputs[3])) {
+        const auto& mask_index = op_inputs[3];
+        NGRAPH_CHECK(mask_index.get_element_type() == element::i32, "'mask_index' type must be int32");
+        auto batch_size = get_dimensions(input_shape, {0});
+        const auto mask_rank = mask_index.get_partial_shape().rank();
+        NGRAPH_CHECK(mask_rank.is_static(), "'mask_index' rank must be static");
+        auto mask_rank_val = mask_rank.get_length();
+        std::shared_ptr<ngraph::Node> mask;
+        if (mask_rank_val == 1) {
+            // case when mask_index has shape (batch_size) or (2 * batch_size)
+            // so it contains positions that specify how mask should be generated
+            mask = attention_mask_from_indices(mask_index, type, batch_size, all_seq_len);
+        } else if (mask_rank_val < 4) {
+            mask = raw_mask(mask_index, mask_rank.get_length(), type);
+        } else {
+            NGRAPH_CHECK(false, "mask_index with rank " + std::to_string(mask_rank_val) + " is not supported");
+        }
+        // add the mask with unidirectional mask if available
+        if (attention_mask) {
+            attention_mask = std::make_shared<default_opset::Add>(attention_mask, mask);
+        } else {
+            attention_mask = mask;
+        }
+    }
+    return NodeTuple{attention_mask, bin_mask};
+}
+
+// Compute softmax(Q x K' / sqrt(head_size)) x V
+std::shared_ptr<ngraph::Node> attention_softmax(const OutputVector& op_inputs,
+                                                const std::shared_ptr<ngraph::Node>& Q,
+                                                std::shared_ptr<ngraph::Node> K,
+                                                std::shared_ptr<ngraph::Node> V,
+                                                const std::shared_ptr<ngraph::Node>& attention_mask,
+                                                const std::shared_ptr<ngraph::Node>& bin_mask,
+                                                const std::shared_ptr<ngraph::Node>& head_size,
+                                                bool unidirectional) {
+    auto zero = default_opset::Constant::create(element::i64, Shape{}, {0});
+    if (is_past_input_available(op_inputs)) {
+        // concat past K and V with present ones
+        const auto& past = op_inputs[4];
+        // 'past' input has two matrices K and V with shape (1, batch_size, num_heads, past_sequence_length, head_size)
+        // concatenated along first axis to a single
+        // (2, batch_size, num_heads, past_sequence_length + sequence_length, head_size)
+        // so we need to split it into two parts, remove first dimension from each part and concatenate first part
+        // with current K and second part with current V
+        const auto split = ngraph::builder::opset1::split(past, 2, 0);
+        const auto past_K = std::make_shared<default_opset::Squeeze>(split[0], zero);
+        K = std::make_shared<default_opset::Concat>(NodeVector{past_K, K}, 2);
+        const auto past_V = std::make_shared<default_opset::Squeeze>(split[1], zero);
+        V = std::make_shared<default_opset::Concat>(NodeVector{past_V, V}, 2);
+    }
+    // perform Q x K'
+    std::shared_ptr<ngraph::Node> softmax_input = std::make_shared<default_opset::MatMul>(Q, K, false, true);
+    // Q x K' + mask
+    if (attention_mask) {
+        if (unidirectional) {
+            // Perform the equivalent of
+            // https://github.com/microsoft/onnxruntime/blob/851554536ca8185b3413ee57449ea5ac93370193/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h#L158-L166
+            // For positions where unidirectional_mask has -10000 values - attention_mask is moved to softmax input
+            softmax_input = std::make_shared<default_opset::Multiply>(softmax_input, bin_mask);
+        }
+        softmax_input = std::make_shared<default_opset::Add>(softmax_input, attention_mask);
+    }
+    const auto sqrt = std::make_shared<default_opset::Sqrt>(head_size);
+    // (Q x K' + mask) / sqrt(head_size)
+    softmax_input = std::make_shared<default_opset::Divide>(softmax_input, sqrt);
+    // handle 'extra_add' input
+    if (op_inputs.size() > 5 && !ngraph::op::is_null(op_inputs[5])) {
+        NGRAPH_CHECK(!is_past_input_available(op_inputs),
+                     "Cannot use both 'past' and 'extra_add' inputs in the same node");
+        const auto& extra_add = op_inputs[5];
+        softmax_input = std::make_shared<default_opset::Add>(softmax_input, extra_add);
+    }
+    // softmax((Q x K' + mask) / sqrt(head_size))
+    const auto softmax = std::make_shared<default_opset::Softmax>(softmax_input, 3);
+
+    // softmax((Q x K' + mask) / sqrt(head_size)) x V
+    std::shared_ptr<ngraph::Node> output = std::make_shared<default_opset::MatMul>(softmax, V);
+    // transpose the result from (batch_size, num_heads, sequence_length, head_size)
+    // to (batch_size, sequence_length, num_heads, head_size)
+    const auto perm = default_opset::Constant::create(element::i64, Shape{4}, {0, 2, 1, 3});
+    output = std::make_shared<default_opset::Transpose>(output, perm);
+    auto new_shape = default_opset::Constant::create(element::i32, Shape{3}, {0, 0, -1});
+    // reshape the result from (batch_size, sequence_length, num_heads, head_size) to (batch_size, sequence_length,
+    // num_heads * head_size)
+    output = std::make_shared<default_opset::Reshape>(output, new_shape, true);
+
+    return output;
+}
+
+// Make present state from K and V matrices by reshaping them from:
+// (batch_size, num_heads, sequence_length, head_size) to (1, batch_size, num_heads, sequence_length, head_size)
+// and concatenating them along first axis to make 'present' output.
+// If fifth input ('past') is available, it gets concatenated with 'present' output along fourth axis.
+std::shared_ptr<ngraph::Node> get_present_state(const std::shared_ptr<ngraph::Node>& K,
+                                                const std::shared_ptr<ngraph::Node>& V,
+                                                const OutputVector& op_inputs) {
+    auto zero = default_opset::Constant::create(element::i64, Shape{1}, {0});
+    // expand K shape (batch_size, num_heads, sequence_length, head_size) to
+    // (1, batch_size, num_heads, sequence_length, head_size)
+    auto K_unsqueezed = std::make_shared<default_opset::Unsqueeze>(K, zero);
+    // similarly expand V shape
+    auto V_unsqueezed = std::make_shared<default_opset::Unsqueeze>(V, zero);
+
+    // add padding in case K and V have different shapes (it happens when used provided uneven qkv_hidden_sizes)
+    // if the shapes are equal (so padding will be zero), Pad gets eliminated in NopElimination pass
+    const auto K_shape = std::make_shared<default_opset::ShapeOf>(K_unsqueezed);
+    const auto V_shape = std::make_shared<default_opset::ShapeOf>(V_unsqueezed);
+    const auto K_pads_end =
+        std::make_shared<default_opset::Maximum>(std::make_shared<default_opset::Subtract>(V_shape, K_shape), zero);
+    const auto V_pads_end =
+        std::make_shared<default_opset::Maximum>(std::make_shared<default_opset::Subtract>(K_shape, V_shape), zero);
+    const auto pads_begin =
+        std::make_shared<default_opset::Broadcast>(zero, std::make_shared<default_opset::ShapeOf>(K_shape));
+    const auto K_padded =
+        std::make_shared<default_opset::Pad>(K_unsqueezed, pads_begin, K_pads_end, ngraph::op::PadMode::CONSTANT);
+    const auto V_padded =
+        std::make_shared<default_opset::Pad>(V_unsqueezed, pads_begin, V_pads_end, ngraph::op::PadMode::CONSTANT);
+
+    // concat key and value tensors along first axis to make 'present' state
+    // after that operation, 'present' has shape (2, batch_size, num_heads, sequence_length, head_size)
+    std::shared_ptr<ngraph::Node> present = std::make_shared<default_opset::Concat>(NodeVector{K_padded, V_padded}, 0);
+    if (is_past_input_available(op_inputs)) {
+        const auto& past = op_inputs[4];
+        // concat 'past' to 'present' output along fourth axis
+        // after that operation, 'present' has shape:
+        // (2, batch_size, num_heads, past_sequence_length + sequence_length, head_size)
+        present = std::make_shared<default_opset::Concat>(OutputVector{past, present}, 3);
+    }
+    return present;
+}
+}  // namespace
+}  // namespace detail
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
diff --git a/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.hpp b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.hpp
new file mode 100644
index 00000000000000..50ecbd82ef57fe
--- /dev/null
+++ b/ngraph/frontend/onnx/frontend/src/op/com.microsoft/attention.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+OutputVector attention(const Node& node);
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
diff --git a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp
index ffe15e5b1e5e59..7d12281d44674e 100644
--- a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp
+++ b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp
@@ -31,6 +31,7 @@
 #include "op/cast_like.hpp"
 #include "op/ceil.hpp"
 #include "op/clip.hpp"
+#include "op/com.microsoft/attention.hpp"
 #include "op/com.microsoft/bias_gelu.hpp"
 #include "op/com.microsoft/embed_layer_normalization.hpp"
 #include "op/com.microsoft/skip_layer_normalization.hpp"
@@ -490,6 +491,7 @@ OperatorsBridge::OperatorsBridge() {
     REGISTER_OPERATOR_WITH_DOMAIN(OPENVINO_ONNX_DOMAIN, "PriorBoxClustered", 1, prior_box_clustered);
     REGISTER_OPERATOR_WITH_DOMAIN(OPENVINO_ONNX_DOMAIN, "Swish", 1, swish);
 
+    REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "Attention", 1, attention);
     REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "BiasGelu", 1, bias_gelu);
     REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "EmbedLayerNormalization", 1, embed_layer_normalization);
     REGISTER_OPERATOR_WITH_DOMAIN(MICROSOFT_DOMAIN, "SkipLayerNormalization", 1, skip_layer_normalization);
diff --git a/ngraph/test/models/onnx/com.microsoft/attention.prototxt b/ngraph/test/models/onnx/com.microsoft/attention.prototxt
new file mode 100644
index 00000000000000..53ac350573b055
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention.prototxt
@@ -0,0 +1,123 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    output: "output"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_dynamic_shapes.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_dynamic_shapes.prototxt
new file mode 100644
index 00000000000000..97a4f3f1f9134a
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_dynamic_shapes.prototxt
@@ -0,0 +1,90 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    input: "mask"
+    input: "past"
+    output: "output"
+    output: "present"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+  input {
+    name: "weights"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+  input {
+    name: "bias"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+  input {
+    name: "mask"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+            dim {}
+            dim {}
+        }
+      }
+    }
+  }
+  input {
+    name: "past"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+  output {
+    name: "present"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_extra_add.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_extra_add.prototxt
new file mode 100644
index 00000000000000..f8664f4507f459
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_extra_add.prototxt
@@ -0,0 +1,190 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    input: "mask"
+    input: ""
+    input: "extra_add"
+    output: "output"
+    output: "present"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mask"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "extra_add"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "present"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_mask_index_1.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_1.prototxt
new file mode 100644
index 00000000000000..56d4e1d1142a4e
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_1.prototxt
@@ -0,0 +1,163 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    input: "mask_index"
+    output: "output"
+    output: "present"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mask_index"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "present"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_mask_index_2.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_2.prototxt
new file mode 100644
index 00000000000000..481d9ea86f5488
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_2.prototxt
@@ -0,0 +1,168 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    input: "mask_index"
+    output: "output"
+    output: "present"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    attribute {
+      name: "unidirectional"
+      i: 1
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mask_index"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "present"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_mask_index_3.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_3.prototxt
new file mode 100644
index 00000000000000..67558f33599282
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_3.prototxt
@@ -0,0 +1,166 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    input: "mask"
+    output: "output"
+    output: "present"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mask"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "present"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_mask_index_4.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_4.prototxt
new file mode 100644
index 00000000000000..9b9387991a9c64
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_mask_index_4.prototxt
@@ -0,0 +1,169 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    input: "mask"
+    output: "output"
+    output: "present"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mask"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "present"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_past.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_past.prototxt
new file mode 100644
index 00000000000000..7625195fa044c8
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_past.prototxt
@@ -0,0 +1,193 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    input: "mask"
+    input: "past"
+    output: "output"
+    output: "present"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "mask"
+    type {
+      tensor_type {
+        elem_type: 6
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 9
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "past"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 5
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "present"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 9
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_qkv_hidden_sizes.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_qkv_hidden_sizes.prototxt
new file mode 100644
index 00000000000000..5ee43aa5c3624a
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_qkv_hidden_sizes.prototxt
@@ -0,0 +1,130 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    output: "output"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    attribute {
+      name: "qkv_hidden_sizes"
+      ints: 2
+      ints: 2
+      ints: 8
+      type: INTS
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 8
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/com.microsoft/attention_unidirectional.prototxt b/ngraph/test/models/onnx/com.microsoft/attention_unidirectional.prototxt
new file mode 100644
index 00000000000000..31a65b299d60ee
--- /dev/null
+++ b/ngraph/test/models/onnx/com.microsoft/attention_unidirectional.prototxt
@@ -0,0 +1,154 @@
+ir_version: 6
+producer_name: "nGraph"
+graph {
+  node {
+    input: "input"
+    input: "weights"
+    input: "bias"
+    output: "output"
+    output: "present"
+    name: "Attention_1"
+    op_type: "Attention"
+    attribute {
+      name: "num_heads"
+      i: 2
+      type: INT
+    }
+    attribute {
+      name: "unidirectional"
+      i: 1
+      type: INT
+    }
+    domain: "com.microsoft"
+  }
+  name: "attention-model"
+  initializer {
+    name: "weights"
+    dims: 3
+    dims: 12
+    data_type: 1
+    float_data: 0.01326417364180088
+    float_data: -0.017005326226353645
+    float_data: 0.021556973457336426
+    float_data: -0.079218357801437378
+    float_data: -0.019958715885877609
+    float_data: 0.066062852740287781
+    float_data: -0.063465960323810577
+    float_data: -0.036202378571033478
+    float_data: -0.038673330098390579
+    float_data: -0.050637193024158478
+    float_data: 0.0024814880453050137
+    float_data: -0.017267324030399323
+    float_data: -0.0047671985812485218
+    float_data: -0.014202062971889973
+    float_data: 0.10090816766023636
+    float_data: 0.044896259903907776
+    float_data: 0.015443948097527027
+    float_data: -0.0010053194127976894
+    float_data: 0.071923978626728058
+    float_data: 0.01173736434429884
+    float_data: 0.034053854644298553
+    float_data: -0.037060577422380447
+    float_data: 0.01355923805385828
+    float_data: 0.054467327892780304
+    float_data: 0.088897556066513062
+    float_data: 0.019563071429729462
+    float_data: 0.025579970329999924
+    float_data: -0.032200627028942108
+    float_data: -0.0083356937393546104
+    float_data: -0.10528338700532913
+    float_data: 0.04967513307929039
+    float_data: -0.093638911843299866
+    float_data: 0.0018587876111268997
+    float_data: 0.01037109550088644
+    float_data: -0.011854520998895168
+    float_data: 0.035907052457332611
+  }
+  initializer {
+    name: "bias"
+    dims: 12
+    data_type: 1
+    float_data: -0.2587452232837677
+    float_data: -0.095395378768444061
+    float_data: 0.12785771489143372
+    float_data: 0.16469171643257141
+    float_data: -0.58997648954391479
+    float_data: -0.28082749247550964
+    float_data: 0.077637940645217896
+    float_data: -0.03203071653842926
+    float_data: 0.075582884252071381
+    float_data: 0.14739133417606354
+    float_data: -0.19812127947807312
+    float_data: 0.50444173812866211
+  }
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "present"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 4
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+opset_import {
+  domain: "com.microsoft"
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/bias_gelu.prototxt b/ngraph/test/models/onnx/com.microsoft/bias_gelu.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/bias_gelu.prototxt
rename to ngraph/test/models/onnx/com.microsoft/bias_gelu.prototxt
diff --git a/ngraph/test/models/onnx/embed_layer_normalization.prototxt b/ngraph/test/models/onnx/com.microsoft/embed_layer_normalization.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/embed_layer_normalization.prototxt
rename to ngraph/test/models/onnx/com.microsoft/embed_layer_normalization.prototxt
diff --git a/ngraph/test/models/onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.prototxt b/ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_dynamic_shapes.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.prototxt
rename to ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_dynamic_shapes.prototxt
diff --git a/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding.prototxt b/ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_with_segment_embedding.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding.prototxt
rename to ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_with_segment_embedding.prototxt
diff --git a/ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding_and_mask.prototxt b/ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_with_segment_embedding_and_mask.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/embed_layer_normalization_with_segment_embedding_and_mask.prototxt
rename to ngraph/test/models/onnx/com.microsoft/embed_layer_normalization_with_segment_embedding_and_mask.prototxt
diff --git a/ngraph/test/models/onnx/dynamic_shapes/skip_layer_normalization.prototxt b/ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/dynamic_shapes/skip_layer_normalization.prototxt
rename to ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.prototxt
diff --git a/ngraph/test/models/onnx/skip_layer_normalization_with_gamma.prototxt b/ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/skip_layer_normalization_with_gamma.prototxt
rename to ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma.prototxt
diff --git a/ngraph/test/models/onnx/skip_layer_normalization_with_gamma_beta.prototxt b/ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/skip_layer_normalization_with_gamma_beta.prototxt
rename to ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.prototxt
diff --git a/ngraph/test/models/onnx/skip_layer_normalization_with_gamma_beta_bias.prototxt b/ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.prototxt
similarity index 100%
rename from ngraph/test/models/onnx/skip_layer_normalization_with_gamma_beta_bias.prototxt
rename to ngraph/test/models/onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.prototxt
diff --git a/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp b/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp
index 63611843a27dfd..5fb5999cf6c244 100644
--- a/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp
+++ b/ngraph/test/onnx/onnx_import_com_microsoft.in.cpp
@@ -25,13 +25,11 @@ using namespace ngraph;
 
 static std::string s_manifest = "${MANIFEST}";
 
-using Inputs = std::vector<std::vector<float>>;
-using Outputs = std::vector<std::vector<float>>;
-
 using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_bias_gelu) {
-    const auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/bias_gelu.onnx"));
+    const auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/bias_gelu.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.5488135,
@@ -52,7 +50,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_bias_gelu) {
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta_bias) {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/skip_layer_normalization_with_gamma_beta_bias.onnx"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.onnx"));
 
     std::vector<float> input = {
         0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299,
@@ -78,7 +76,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta) {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/skip_layer_normalization_with_gamma_beta.onnx"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.onnx"));
 
     std::vector<float> input = {
         0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299,
@@ -104,7 +102,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/skip_layer_normalization_with_gamma.onnx"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/skip_layer_normalization_with_gamma.onnx"));
 
     std::vector<float> input = {
         0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299,
@@ -130,7 +128,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) {
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes) {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/skip_layer_normalization.onnx"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.onnx"));
 
     std::vector<float> input = {
         0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299,
@@ -177,8 +175,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes)
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization) {
-    const auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization.onnx"));
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/embed_layer_normalization.onnx"));
 
     std::vector<int> input_ids = {
         8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9,
@@ -209,7 +207,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization) {
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding) {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization_with_segment_embedding.onnx"));
+        file_util::path_join(SERIALIZED_ZOO,
+                             "onnx/com.microsoft/embed_layer_normalization_with_segment_embedding.onnx"));
 
     std::vector<int> input_ids = {
         8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9,
@@ -251,7 +250,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding_and_mask) {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/embed_layer_normalization_with_segment_embedding_and_mask.onnx"));
+        file_util::path_join(SERIALIZED_ZOO,
+                             "onnx/com.microsoft/embed_layer_normalization_with_segment_embedding_and_mask.onnx"));
 
     std::vector<int> input_ids = {
         8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9,
@@ -296,7 +296,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes) {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/embed_layer_normalization_dynamic_shapes.onnx"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/embed_layer_normalization_dynamic_shapes.onnx"));
 
     std::vector<int> input_ids = {
         8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9,
@@ -389,3 +389,470 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes
     test_case.add_expected_output<int>(Shape{3}, expected_mask_index);
     test_case.run_with_tolerance_as_fp(1e-6);
 }
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention) {
+    const auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.91475844, 0.91523546, 0.82536930, 0.37491974, 0.22384071, 0.05941105, 0.01902100, 0.70131350,
+        0.09603709, 0.44200060, 0.53106076, 0.79464376, 0.35469049, 0.25225943, 0.25179818, 0.29592562,
+        0.24836586, 0.65088797, 0.93126643, 0.67980725, 0.85708112, 0.59808528, 0.46321425, 0.19301885,
+    };
+    std::vector<float> output = {
+        0.07966283, 0.10783536, -0.19424979, 0.54514766, 0.07965867, 0.10783093, -0.19424866, 0.54510003,
+        0.07965846, 0.10783067, -0.19424550, 0.54509139, 0.07966217, 0.10783640, -0.19424903, 0.54512268,
+        0.06940663, 0.10962760, -0.19698445, 0.53492010, 0.06940675, 0.10962828, -0.19698484, 0.53492326,
+        0.06940714, 0.10963022, -0.19698712, 0.53494006, 0.06940673, 0.10962812, -0.19698519, 0.53492481,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_expected_output<float>(output);
+    test_case.run_with_tolerance_as_fp(1e-7);
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_qkv_hidden_sizes) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_qkv_hidden_sizes.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.56477863, 0.60309958, 0.35158035, 0.03123519, 0.81918180, 0.76905495, 0.47219241, 0.72016627,
+        0.59377003, 0.91380632, 0.56797302, 0.34846428, 0.83839595, 0.16394103, 0.34676281, 0.09161621,
+        0.45562279, 0.23317528, 0.37197968, 0.06727808, 0.08500192, 0.84915495, 0.68266946, 0.00227691,
+    };
+    std::vector<float> output = {
+        -0.59370947, -0.30300471, 0.12048547, -0.09029539, 0.08041390, 0.10250041, -0.19381392, 0.55126983,
+        -0.59370828, -0.30301332, 0.12049319, -0.09029691, 0.08041921, 0.10250521, -0.19381438, 0.55127531,
+        -0.59370869, -0.30301058, 0.12049074, -0.09029643, 0.08041564, 0.10250199, -0.19381410, 0.55127168,
+        -0.59370929, -0.30300608, 0.12048667, -0.09029562, 0.08041184, 0.10249855, -0.19381374, 0.55126774,
+        -0.59681994, -0.26327702, 0.07638434, -0.06311120, 0.06671587, 0.10916986, -0.19412412, 0.51977092,
+        -0.59682053, -0.26328400, 0.07638102, -0.06311222, 0.06671817, 0.10917170, -0.19412397, 0.51977223,
+        -0.59682077, -0.26328647, 0.07637984, -0.06311259, 0.06671739, 0.10917108, -0.19412403, 0.51977175,
+        -0.59682101, -0.26328778, 0.07637922, -0.06311278, 0.06671065, 0.10916568, -0.19412443, 0.51976782,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_expected_output<float>(output);
+    test_case.run_with_tolerance_as_fp(1e-6);
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_unidirectional) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_unidirectional.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.89578921, 0.42421508, 0.35630688, 0.77461642, 0.65753633, 0.09723099, 0.62597734, 0.72117692,
+        0.57636845, 0.17104276, 0.13245547, 0.59879875, 0.15624641, 0.44903454, 0.50483286, 0.92975074,
+        0.36934483, 0.29919949, 0.57185954, 0.83036488, 0.08384345, 0.20378476, 0.74684393, 0.46716982,
+    };
+    std::vector<float> output = {
+        0.05604819, 0.09000472, -0.19437021, 0.52487367, 0.06211422, 0.08740954, -0.19139624, 0.52762908,
+        0.06708897, 0.08992603, -0.19214047, 0.53631783, 0.06896879, 0.10248676, -0.19485690, 0.53477794,
+        0.08577005, 0.12807365, -0.19762954, 0.54432857, 0.06929274, 0.10893210, -0.19599904, 0.53184807,
+        0.07348281, 0.10215081, -0.19280069, 0.53552240, 0.07861833, 0.10517240, -0.19285706, 0.54126489,
+    };
+    std::vector<float> present = {
+        -0.60427380, -0.25958878, -0.59609234, -0.24055196, -0.59613681, -0.30088067, -0.59633607, -0.33270463,
+        0.06899665,  -0.09284544, 0.08059876,  -0.06146053, 0.11841078,  -0.10019838, 0.10605468,  -0.09273906,
+        -0.59036821, -0.32410735, -0.60532302, -0.25127757, -0.58926487, -0.25271094, -0.58640373, -0.31730092,
+        0.12509561,  -0.07968873, 0.06005794,  -0.08937149, 0.10523240,  -0.05083811, 0.14162725,  -0.07438751,
+        0.05604819,  0.09000472,  0.06819826,  0.08480665,  0.07700446,  0.09494394,  0.07459175,  0.14003153,
+        -0.19437021, 0.52487367,  -0.18843602, 0.53037173,  -0.19362189, 0.55360907,  -0.20299932, 0.53020388,
+        0.08577005,  0.12807365,  0.05276009,  0.08972625,  0.08190014,  0.08852972,  0.09400313,  0.11423884,
+        -0.19762954, 0.54432857,  -0.19435294, 0.51924801,  -0.18643703, 0.54280555,  -0.19302703, 0.55837619,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_expected_output<float>(output);
+    test_case.add_expected_output<float>(present);
+    test_case.run_with_tolerance_as_fp(1e-7);
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_mask_index_1.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.02841483, 0.47845092, 0.14633700, 0.54597300, 0.40160629, 0.55281311, 0.14931096, 0.64483738,
+        0.96559167, 0.05262021, 0.12391864, 0.20093553, 0.74290562, 0.19367455, 0.19253619, 0.41593507,
+        0.91188699, 0.61606920, 0.72673517, 0.86981291, 0.19963337, 0.22747350, 0.34308898, 0.57267183,
+    };
+    std::vector<int> mask_index = {
+        0,
+        1,
+    };
+    std::vector<float> output = {
+        0.08298690, 0.12711772, -0.19757506, 0.54029012, 0.08298548, 0.12711433, -0.19757731, 0.54031140,
+        0.08298430, 0.12711799, -0.19757695, 0.54031777, 0.08298548, 0.12711433, -0.19757444, 0.54028159,
+        0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606,
+        0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606,
+    };
+    std::vector<float> present = {
+        -0.58437425, -0.29483819, -0.59927911, -0.30336475, -0.59104657, -0.37327260, -0.59078789, -0.29863101,
+        0.11751597,  -0.04114649, 0.09933343,  -0.09884726, 0.16250694,  -0.12028439, 0.09319257,  -0.05129660,
+        -0.60341775, -0.25221461, -0.58933026, -0.31912822, -0.59271193, -0.25470981, -0.59399152, -0.32643768,
+        0.05398282,  -0.07468132, 0.14743008,  -0.09407346, 0.10399222,  -0.06682440, 0.11632499,  -0.08986320,
+        0.09104910,  0.12973849,  0.06917210,  0.11059431,  0.09356256,  0.12594685,  0.07814129,  0.14221822,
+        -0.19329809, 0.53526556,  -0.19787431, 0.53673857,  -0.20045389, 0.57165766,  -0.19869246, 0.51749766,
+        0.05380550,  0.10459180,  0.09169570,  0.09892380,  0.07746917,  0.08042616,  0.07953370,  0.12909687,
+        -0.19593412, 0.50907606,  -0.19202785, 0.56904894,  -0.18689045, 0.54643762,  -0.19969353, 0.53976399,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_input<int>(mask_index);
+    test_case.add_expected_output<float>(output);
+    test_case.add_expected_output<float>(present);
+    test_case.run_with_tolerance_as_fp();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_2) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_mask_index_2.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.75259578, 0.81492645, 0.46713001, 0.29483622, 0.06768602, 0.95105755, 0.32065326, 0.52417183,
+        0.73136383, 0.77176476, 0.60997742, 0.64625764, 0.16311000, 0.89680773, 0.01331447, 0.42468646,
+        0.58711547, 0.00345124, 0.13053808, 0.46278623, 0.13786320, 0.65182054, 0.74864876, 0.81506181,
+    };
+    std::vector<int> mask_index = {
+        3,
+        3,
+        1,
+        1,
+    };
+    std::vector<float> output = {
+        0.07524174, 0.11320241, -0.19909523, 0.54785377, 0.06825337, 0.13981669, -0.20774621, 0.53718704,
+        0.07531278, 0.12957911, -0.20330518, 0.54547405, 0.07531209, 0.12958010, -0.20330583, 0.54547292,
+        0.08900890, 0.11150353, -0.18931937, 0.53757656, 0.07915881, 0.10416336, -0.18914750, 0.52921104,
+        0.08285815, 0.11462159, -0.19115375, 0.53077918, 0.08285838, 0.11462225, -0.19115454, 0.53077984,
+    };
+    std::vector<float> present = {
+        -0.59630549, -0.28110915, -0.60274345, -0.36154836, -0.59437746, -0.33717164, -0.60134649, -0.29849592,
+        0.11169122,  -0.09345293, 0.11103803,  -0.13096604, 0.13131849,  -0.10597084, 0.10463209,  -0.11332577,
+        -0.57949269, -0.27235535, -0.58941406, -0.25372508, -0.58658379, -0.28718373, -0.59821802, -0.32433146,
+        0.13244939,  -0.02865628, 0.09308393,  -0.04083736, 0.10948701,  -0.04423397, 0.13060363,  -0.12316251,
+        0.07509718,  0.08392500,  0.06825337,  0.13981669,  0.08239168,  0.11931328,  0.06770951,  0.09240761,
+        -0.19074154, 0.55260652,  -0.20774621, 0.53718704,  -0.19888818, 0.55371630,  -0.19559640, 0.54754448,
+        0.09983939,  0.10603377,  0.07915881,  0.10416336,  0.08655046,  0.12505992,  0.07738422,  0.09509270,
+        -0.18571433, 0.55095005,  -0.18914750, 0.52921104,  -0.19315663, 0.53234470,  -0.19601485, 0.56322992,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_input<int>(mask_index);
+    test_case.add_expected_output<float>(output);
+    test_case.add_expected_output<float>(present);
+    test_case.run_with_tolerance_as_fp();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_3) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_mask_index_3.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.33093750, 0.39181390, 0.14586255, 0.39709702, 0.98086524, 0.03891133, 0.72234219, 0.21966648,
+        0.79986620, 0.97251678, 0.04131543, 0.43971965, 0.50185394, 0.11452501, 0.88111717, 0.76076663,
+        0.31870860, 0.54107893, 0.91756296, 0.58112669, 0.99117357, 0.00256292, 0.58885485, 0.93481058,
+    };
+    std::vector<int> mask = {
+        1,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+        1,
+    };
+    std::vector<float> output = {
+        0.07551830, 0.10666487, -0.19357042, 0.53683108, 0.07551410, 0.10666656, -0.19356072, 0.53684169,
+        0.07552745, 0.10666100, -0.19358172, 0.53682435, 0.07552218, 0.10666317, -0.19358677, 0.53681952,
+        0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713,
+        0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713,
+    };
+    std::vector<float> present = {
+        -0.59174627, -0.27471560, -0.58307797, -0.25967693, -0.60766846, -0.31754097, -0.61241394, -0.26291698,
+        0.09206123,  -0.05307099, 0.12491645,  -0.03853742, 0.08732655,  -0.13050151, 0.04073093,  -0.10792807,
+        -0.60556883, -0.34055573, -0.60474855, -0.28785610, -0.60757709, -0.32514900, -0.58872569, -0.37967020,
+        0.09779400,  -0.13136166, 0.07915612,  -0.10649752, 0.11043755,  -0.15124020, 0.16626491,  -0.11274654,
+        0.07639833,  0.11762549,  0.09370039,  0.09133558,  0.05661478,  0.11096847,  0.04019671,  0.10117501,
+        -0.19371650, 0.52530587,  -0.18429738, 0.55240726,  -0.20283231, 0.53265429,  -0.20036045, 0.50568837,
+        0.06171235,  0.12687264,  0.05802051,  0.10266830,  0.06172965,  0.08967118,  0.09727416,  0.13513327,
+        -0.20576829, 0.53365225,  -0.19832623, 0.52809310,  -0.19971462, 0.55584043,  -0.20121223, 0.57003713,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_input<int>(mask);
+    test_case.add_expected_output<float>(output);
+    test_case.add_expected_output<float>(present);
+    test_case.run_with_tolerance_as_fp(1e-7);
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_4) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_mask_index_4.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.23565151, 0.58627969, 0.75137484, 0.68586946, 0.62750375, 0.13284931, 0.13347220, 0.36357051,
+        0.56910241, 0.48275986, 0.49440190, 0.45483324, 0.63547862, 0.97893149, 0.40630588, 0.38783622,
+        0.07172249, 0.46385381, 0.99764502, 0.22219376, 0.67735291, 0.40799847, 0.74337566, 0.87263006,
+    };
+    std::vector<int> mask = {
+        1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
+    };
+    std::vector<float> output = {
+        0.07771622, 0.10724538, -0.19453585, 0.54342043, 0.07459468, 0.10934003, -0.19561143, 0.53936625,
+        0.07927690, 0.10619678, -0.19399606, 0.54543519, 0.07459468, 0.10934003, -0.19561143, 0.53936625,
+        0.05485561, 0.11278091, -0.20117569, 0.52096349, 0.06629646, 0.10195158, -0.19900991, 0.54654449,
+        0.06491723, 0.10292297, -0.19678673, 0.53451663, 0.06549793, 0.11126325, -0.19989857, 0.53717279,
+    };
+    std::vector<float> present = {
+        -0.59188855, -0.34495637, -0.59508181, -0.25013468, -0.59176934, -0.33229247, -0.59576762, -0.29731843,
+        0.14217430,  -0.10403840, 0.08584045,  -0.06193545, 0.12358667,  -0.08588549, 0.10515238,  -0.08629489,
+        -0.59092808, -0.28260738, -0.60047609, -0.30411413, -0.61210287, -0.28645760, -0.59391296, -0.34649473,
+        0.12789863,  -0.08159252, 0.08122411,  -0.08866425, 0.06395009,  -0.12896645, 0.14855847,  -0.11978809,
+        0.08783118,  0.12152332,  0.07067389,  0.09078297,  0.08385989,  0.13306075,  0.07459468,  0.10934003,
+        -0.19849420, 0.55928540,  -0.18948570, 0.53154731,  -0.19960676, 0.54237455,  -0.19561143, 0.53936625,
+        0.08509844,  0.08314656,  0.06388859,  0.12990499,  0.04582624,  0.09566365,  0.08674107,  0.10823163,
+        -0.18808734, 0.56137776,  -0.20168513, 0.51830697,  -0.20066255, 0.52363914,  -0.19737384, 0.56921995,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_input<int>(mask);
+    test_case.add_expected_output<float>(output);
+    test_case.add_expected_output<float>(present);
+    test_case.run_with_tolerance_as_fp(1e-7);
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) {
+    const auto function =
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_past.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.82966000, 0.77751911, 0.08977074, 0.06076468, 0.40659550, 0.19995944, 0.55544919, 0.83971608,
+        0.86254036, 0.30894691, 0.80156928, 0.83092463, 0.14506543, 0.32196075, 0.42209163, 0.24465553,
+        0.93944097, 0.73528159, 0.23347616, 0.60544974, 0.93329269, 0.67604774, 0.56349903, 0.26199624,
+    };
+    std::vector<int> mask = {
+        1,
+        1,
+        1,
+        0,
+        0,
+        0,
+        1,
+        0,
+        1,
+        0,
+        1,
+        0,
+        1,
+        0,
+        1,
+        1,
+        1,
+        1,
+    };
+    std::vector<float> past = {
+        0.92467678, 0.79873562, 0.00939191, 0.34891853, 0.35521412, 0.21872006, 0.89974332, 0.74132687, 0.73566031,
+        0.75168055, 0.06773245, 0.85702997, 0.76256698, 0.51739877, 0.91567177, 0.66617578, 0.88056499, 0.08436447,
+        0.54744655, 0.25466520, 0.08500137, 0.19271941, 0.86525357, 0.21717627, 0.97158766, 0.42288730, 0.09890039,
+        0.01148765, 0.97024685, 0.19697112, 0.67671591, 0.67960924, 0.46656516, 0.30850092, 0.73536104, 0.73938161,
+        0.91650903, 0.57628596, 0.51164514, 0.11695814, 0.79792547, 0.97192264, 0.29246020, 0.41030061, 0.19014873,
+        0.90233624, 0.84986305, 0.26141909, 0.84528726, 0.81416380, 0.00429944, 0.31476986, 0.00440918, 0.77413058,
+        0.13409913, 0.20965169, 0.61764991, 0.55266041, 0.56107825, 0.42051074, 0.16804738, 0.80362344, 0.52392679,
+        0.27550557, 0.66738850, 0.39348483, 0.31801429, 0.30325863, 0.37068403, 0.92767614, 0.60799408, 0.01458820,
+        0.24194679, 0.59596598, 0.81762302, 0.38094005, 0.16618672, 0.92488551, 0.84298438, 0.21752745,
+    };
+    std::vector<float> output = {
+        0.26186451, 0.45950246, -0.04001215, 0.47680017, 0.26333901, 0.46158865, -0.04006424, 0.47588652,
+        0.26875457, 0.47031689, -0.03951600, 0.47674999, 0.26851410, 0.46987134, -0.03919901, 0.47629333,
+        0.18083976, 0.16579385, -0.05161894, 0.63075018, 0.18228555, 0.16642828, -0.04873618, 0.63316816,
+        0.18362364, 0.16702136, -0.05045432, 0.63178891, 0.18000112, 0.16541445, -0.05139139, 0.63105792,
+    };
+    std::vector<float> present = {
+        0.92467678,  0.79873562,  0.00939191,  0.34891853,  0.35521412,  0.21872006,  0.89974332,  0.74132687,
+        0.73566031,  0.75168055,  -0.59527576, -0.23625080, -0.58657664, -0.29827437, -0.59528387, -0.33578828,
+        -0.59068960, -0.34870598, 0.06773245,  0.85702997,  0.76256698,  0.51739877,  0.91567177,  0.66617578,
+        0.88056499,  0.08436447,  0.54744655,  0.25466520,  0.08536442,  -0.06134639, 0.11295843,  -0.04818217,
+        0.14562836,  -0.12305059, 0.15695867,  -0.11161390, 0.08500137,  0.19271941,  0.86525357,  0.21717627,
+        0.97158766,  0.42288730,  0.09890039,  0.01148765,  0.97024685,  0.19697112,  -0.59141791, -0.31600696,
+        -0.58647990, -0.34302223, -0.59306550, -0.36427227, -0.59695083, -0.26431620, 0.67671591,  0.67960924,
+        0.46656516,  0.30850092,  0.73536104,  0.73938161,  0.91650903,  0.57628596,  0.51164514,  0.11695814,
+        0.11255538,  -0.07302766, 0.16620418,  -0.09871224, 0.15272795,  -0.12076923, 0.08827571,  -0.07442430,
+        0.79792547,  0.97192264,  0.29246020,  0.41030061,  0.19014873,  0.90233624,  0.84986305,  0.26141909,
+        0.84528726,  0.81416380,  0.07014155,  0.07749540,  0.08745074,  0.13131952,  0.08430066,  0.09709007,
+        0.09247591,  0.11065811,  0.00429944,  0.31476986,  0.00440918,  0.77413058,  0.13409913,  0.20965169,
+        0.61764991,  0.55266041,  0.56107825,  0.42051074,  -0.18658412, 0.53568852,  -0.19482780, 0.53271860,
+        -0.19558203, 0.57155901,  -0.19633618, 0.57260245,  0.16804738,  0.80362344,  0.52392679,  0.27550557,
+        0.66738850,  0.39348483,  0.31801429,  0.30325863,  0.37068403,  0.92767614,  0.08172131,  0.13249113,
+        0.09947956,  0.10781212,  0.08890627,  0.12280971,  0.06911418,  0.09499176,  0.60799408,  0.01458820,
+        0.24194679,  0.59596598,  0.81762302,  0.38094005,  0.16618672,  0.92488551,  0.84298438,  0.21752745,
+        -0.19839945, 0.53462923,  -0.19349247, 0.57778782,  -0.20039621, 0.56689924,  -0.19190890, 0.53286803,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_input<int>(mask);
+    test_case.add_input<float>(past);
+    test_case.add_expected_output<float>(output);
+    test_case.add_expected_output<float>(present);
+    test_case.run_with_tolerance_as_fp(1e-6);
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_extra_add.onnx"));
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    std::vector<float> input = {
+        0.14930259, 0.11199699, 0.81292826, 0.08368169, 0.05704883, 0.41276145, 0.38760167, 0.00146112,
+        0.14275745, 0.54254925, 0.07962929, 0.31023681, 0.09597706, 0.60583973, 0.90233743, 0.33360451,
+        0.18193199, 0.19159532, 0.07869831, 0.86026299, 0.20683478, 0.40150928, 0.93124926, 0.31805834,
+    };
+    std::vector<int> mask = {
+        0,
+        0,
+        1,
+        0,
+        1,
+        1,
+        1,
+        0,
+    };
+    std::vector<float> extra_add = {
+        0.73230380, 0.61824518, 0.19738488, 0.57034588, 0.22331032, 0.53262889, 0.60098642, 0.72943515,
+        0.09009175, 0.81116527, 0.47240964, 0.49679127, 0.41110733, 0.29418564, 0.93818313, 0.64175284,
+        0.06807775, 0.66733366, 0.78848422, 0.48788327, 0.38806340, 0.14002480, 0.72263688, 0.22772972,
+        0.24000823, 0.75820386, 0.64254439, 0.19385594, 0.95595860, 0.59840417, 0.93769604, 0.62474734,
+        0.36690548, 0.76047903, 0.62352085, 0.58574778, 0.64251810, 0.78072041, 0.43344691, 0.75383639,
+        0.73950553, 0.92625278, 0.05066428, 0.08448382, 0.25980917, 0.50312829, 0.97800279, 0.05422170,
+        0.05171391, 0.82828254, 0.42234898, 0.95752198, 0.96325767, 0.97909677, 0.35578200, 0.48091716,
+        0.03637243, 0.91552693, 0.43403026, 0.94275808, 0.51182085, 0.86773109, 0.38459453, 0.87822068,
+    };
+    std::vector<float> output = {
+        0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448,
+        0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448,
+        0.08714182, 0.12259886, -0.19516067, 0.54010558, 0.08671370, 0.12369543, -0.19658084, 0.54502594,
+        0.08458151, 0.12488046, -0.19519810, 0.53906947, 0.09063499, 0.12088943, -0.19583938, 0.54266596,
+    };
+    std::vector<float> present = {
+        -0.59800303, -0.35666457, -0.59420627, -0.31881350, -0.59887993, -0.27025288, -0.60216135, -0.27772796,
+        0.11659990,  -0.11224300, 0.09693416,  -0.07304113, 0.06023501,  -0.05941332, 0.06434284,  -0.07978789,
+        -0.59005713, -0.37009716, -0.59542215, -0.27914333, -0.57998544, -0.29826957, -0.58625919, -0.28872511,
+        0.15994480,  -0.11288825, 0.07906821,  -0.05991337, 0.14479136,  -0.04415035, 0.13493451,  -0.06541853,
+        0.07513385,  0.14411135,  0.07505661,  0.14532046,  0.06090815,  0.12919067,  0.05788904,  0.12018456,
+        -0.20586906, 0.53715372,  -0.20203318, 0.52092510,  -0.19883196, 0.50295448,  -0.19937295, 0.51055026,
+        0.09417956,  0.12943678,  0.06923291,  0.12574309,  0.10221909,  0.11366953,  0.09235901,  0.09584601,
+        -0.20036517, 0.56818324,  -0.19709785, 0.51547027,  -0.18871340, 0.55736589,  -0.18826833, 0.55965197,
+    };
+
+    test_case.add_input<float>(input);
+    test_case.add_input<int>(mask);
+    test_case.add_input<float>(extra_add);
+    test_case.add_expected_output<float>(output);
+    test_case.add_expected_output<float>(present);
+    test_case.run_with_tolerance_as_fp(1e-7);
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/com.microsoft/attention_dynamic_shapes.onnx"));
+    auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
+
+    std::vector<float> input = {
+        0.42226878, 0.50984067, 0.80440795, 0.68040705, 0.93614250, 0.45104721, 0.71767306, 0.48596525,
+        0.70076728, 0.04500086, 0.28930107, 0.77435863, 0.19392140, 0.90290719, 0.91955870, 0.58811885,
+        0.76795286, 0.62884814, 0.23377730, 0.49212688, 0.87256873, 0.11944817, 0.57715887, 0.91886938,
+    };
+    std::vector<float> weights = {
+        0.99377930, 0.22733542, 0.43217131, 0.60717988, 0.97224706, 0.70020503, 0.92439449, 0.41512674, 0.47728160,
+        0.40306625, 0.72619593, 0.37954643, 0.36950976, 0.84305370, 0.61671126, 0.22251014, 0.73839295, 0.73471880,
+        0.37428924, 0.80240524, 0.23120961, 0.06072779, 0.92840081, 0.71558088, 0.08719950, 0.51666921, 0.53768843,
+        0.48113129, 0.46389169, 0.01036468, 0.37341005, 0.67195475, 0.53599644, 0.41795707, 0.58081782, 0.97939289,
+    };
+    std::vector<float> bias = {
+        0.77122736,
+        0.75600564,
+        0.86177206,
+        0.69982684,
+        0.74719858,
+        0.78054035,
+        0.80007398,
+        0.74902135,
+        0.81258053,
+        0.01575289,
+        0.08463049,
+        0.39671996,
+    };
+    std::vector<int> mask = {
+        0,
+        1,
+        0,
+        0,
+        0,
+        1,
+        0,
+        0,
+        1,
+        1,
+        0,
+        0,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+    };
+    std::vector<float> past = {
+        0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437,
+        0.91846281, 0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829,
+        0.89891797, 0.27753425, 0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194,
+        0.62769043, 0.61990744, 0.59077013, 0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718,
+        0.80831683, 0.29390740, 0.29051417, 0.51964313, 0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689,
+        0.98571628, 0.07566493, 0.37537411, 0.42080343, 0.21715857, 0.57869565, 0.55962265, 0.82500041, 0.60776925,
+        0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676, 0.98562658, 0.64355153, 0.69856495, 0.30377558,
+        0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584, 0.94460547, 0.90907097, 0.22525074,
+        0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738, 0.22606593,
+    };
+    std::vector<float> output = {
+        1.47439122, 0.50951630, 1.17974961, 1.58501005, 1.49403512, 0.51560062, 1.18972027, 1.59668207,
+        1.48384988, 0.51248586, 1.18596375, 1.59219086, 1.44181466, 0.50219649, 1.15537691, 1.55348074,
+        0.83429223, 0.59521818, 0.87688094, 0.13611843, 0.82936716, 0.61004817, 0.87633312, 0.13887596,
+        0.83155584, 0.59382534, 0.87496555, 0.14041223, 0.83309680, 0.58982348, 0.87517864, 0.13930768,
+    };
+    std::vector<float> present = {
+        0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437,
+        0.91846281, 1.90736914, 1.45914197, 2.30920029, 1.94944119, 2.12886763, 1.64736962, 1.36378694, 1.03263116,
+        0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829, 0.89891797,
+        0.27753425, 1.68161881, 1.87394094, 1.94785213, 2.08572555, 1.90705216, 1.90777159, 1.23910809, 1.52017307,
+        0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194, 0.62769043, 0.61990744,
+        0.59077013, 2.02901411, 1.58923888, 2.17776394, 1.76309133, 1.74264824, 1.31485105, 1.71575761, 1.29775190,
+        0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718, 0.80831683, 0.29390740, 0.29051417,
+        0.51964313, 1.66065478, 2.17192268, 1.86598253, 2.03193212, 1.52620018, 1.82728052, 1.46963060, 1.87916136,
+        0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689, 0.98571628, 0.07566493, 0.37537411, 0.42080343,
+        0.21715857, 1.56316149, 0.55312467, 1.59553123, 0.53537023, 1.64308119, 0.62742490, 1.31600118, 0.37510848,
+        0.57869565, 0.55962265, 0.82500041, 0.60776925, 0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676,
+        0.98562658, 1.33183134, 1.70965421, 1.70983100, 1.76660407, 1.46399045, 1.70318413, 0.83565855, 1.37921953,
+        0.64355153, 0.69856495, 0.30377558, 0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584,
+        0.94460547, 1.60677671, 0.53308368, 1.60789728, 0.56227136, 1.50563633, 0.50456268, 1.49554634, 0.48299593,
+        0.90907097, 0.22525074, 0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738,
+        0.22606593, 1.59781134, 2.01703453, 1.58993423, 1.78536010, 1.21809304, 1.69219351, 1.24090374, 1.75499403,
+    };
+
+    test_case.add_input<float>(Shape{2, 4, 3}, input);
+    test_case.add_input<float>(Shape{3, 12}, weights);
+    test_case.add_input<float>(Shape{12}, bias);
+    test_case.add_input<int>(Shape{2, 9}, mask);
+    test_case.add_input<float>(Shape{2, 2, 2, 5, 2}, past);
+    test_case.add_expected_output<float>(Shape{2, 4, 4}, output);
+    test_case.add_expected_output<float>(Shape{2, 2, 2, 9, 2}, present);
+    test_case.run_with_tolerance_as_fp(1e-6);
+}
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index 5942e64e2563db..c818f7c00eae4a 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -1564,3 +1564,5 @@ IE_CPU.onnx_model_gather_float_2D_neg_indices
 onnx_model_skip_layer_normalization_dynamic_shapes
 # Doesn't support op with dynamic shapes
 onnx_model_embed_layer_normalization_dynamic_shapes
+# CPU plug-in doesn't support operation with dynamic rank
+onnx_model_attention_dynamic_shapes