From 3a5f211519ec6644ee7840769530b28e3fc43554 Mon Sep 17 00:00:00 2001 From: Mateusz Mikolajczyk Date: Thu, 20 Jun 2024 07:49:18 +0200 Subject: [PATCH] [PyAPI][Opset15] Add EmbeddingBagPacked and EmbeddingBagOffsets to PyAPI (#25101) ### Details: - *Add EmbeddingBagPacked and EmbeddingBagOffsets to PyAPI* - *...* ### Tickets: - *141864* - *141865* --------- Co-authored-by: Michal Lukaszewski --- .../src/openvino/runtime/opset15/__init__.py | 2 + .../src/openvino/runtime/opset15/ops.py | 63 +++++++++++- .../tests/test_graph/test_ops_embedding.py | 96 +++++++++++++++++++ 3 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 src/bindings/python/tests/test_graph/test_ops_embedding.py diff --git a/src/bindings/python/src/openvino/runtime/opset15/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/__init__.py index 3cecb6e5e03540..d7040d974c1ab9 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset15/__init__.py @@ -7,4 +7,6 @@ # TODO (ticket 138273): Add previous opset operators at the end of opset15 development from openvino.runtime.opset1.ops import parameter from openvino.runtime.opset15.ops import col2im +from openvino.runtime.opset15.ops import embedding_bag_offsets +from openvino.runtime.opset15.ops import embedding_bag_packed from openvino.runtime.opset15.ops import scatter_nd_update diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops.py b/src/bindings/python/src/openvino/runtime/opset15/ops.py index 2b7406099c71ba..5613de6bd8267a 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset15/ops.py @@ -4,9 +4,12 @@ """Factory functions for ops added to openvino opset15.""" from functools import partial -from typing import Optional, Literal, List +from typing import List, Literal, Optional +import numpy as np from openvino.runtime import Node, Type +from openvino.runtime.opset1 import convert_like +from openvino.runtime.opset14 import constant from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import nameable_op from openvino.runtime.utils.types import NodeInput, as_nodes @@ -83,3 +86,61 @@ def col2im( "pads_end": pads_end, }, ) + + +@nameable_op +def embedding_bag_offsets( + emb_table: NodeInput, + indices: NodeInput, + offsets: NodeInput, + default_index: Optional[NodeInput] = None, + per_sample_weights: Optional[NodeInput] = None, + reduction: Literal["sum", "mean"] = "sum", + name: Optional[str] = None, +) -> Node: + """Return a node which performs sums or means of bags of embeddings without the intermediate embeddings. + + :param emb_table: Tensor containing the embedding lookup table. + :param indices: 1D Tensor with indices. + :param offsets: 1D Tensor containing the starting index positions of each bag in indices. + :param per_sample_weights: Tensor with weights for each sample. + :param default_index: Scalar containing default index in embedding table to fill empty bags. + If unset or set to -1, empty bags will be filled with 0. + Reverse indexing using negative indices is not supported. + :param reduction: String to select algorithm used to perform reduction of elements in bag. + :param name: Optional name for output node. + :return: The new node performing EmbeddingBagOffsets operation. + """ + inputs = [emb_table, indices, offsets] + if default_index is not None: + inputs.append(default_index) + elif per_sample_weights is not None: + inputs.append(convert_like(constant(np.array(-1, np.int32)), inputs[1])) + if per_sample_weights is not None: + inputs.append(per_sample_weights) + + return _get_node_factory_opset15().create("EmbeddingBagOffsets", as_nodes(*inputs, name=name), {"reduction": reduction}) + + +@nameable_op +def embedding_bag_packed( + emb_table: NodeInput, + indices: NodeInput, + per_sample_weights: Optional[NodeInput] = None, + reduction: Literal["sum", "mean"] = "sum", + name: Optional[str] = None, +) -> Node: + """Return a node which performs sums or means of "bags" of embeddings, without the intermediate embeddings. + + :param emb_table: Tensor containing the embedding lookup table. + :param indices: 2D Tensor of shape [batch, indices_per_bag] with indices. + :param per_sample_weights: Tensor of weights to be multiplied with embedding table with same shape as indices. + :param reduction: Operator to perform reduction of elements in bag. + :param name: Optional name for output node. + :return: The new node performing EmbeddingBagPacked operation. + """ + inputs = [emb_table, indices] + if per_sample_weights is not None: + inputs.append(per_sample_weights) + + return _get_node_factory_opset15().create("EmbeddingBagPacked", as_nodes(*inputs, name=name), {"reduction": reduction}) diff --git a/src/bindings/python/tests/test_graph/test_ops_embedding.py b/src/bindings/python/tests/test_graph/test_ops_embedding.py new file mode 100644 index 00000000000000..4560506df4f3b7 --- /dev/null +++ b/src/bindings/python/tests/test_graph/test_ops_embedding.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from openvino import Type +from openvino.runtime import opset15 + + +def test_embedding_bag_offsets_15(): + emb_table = opset15.parameter([5, 2], name="emb_table", dtype=np.float32) + indices = opset15.parameter([4], name="indices", dtype=np.int64) + offsets = opset15.parameter([3], name="offsets", dtype=np.int64) + + node = opset15.embedding_bag_offsets(emb_table, indices, offsets) + + assert node.get_type_name() == "EmbeddingBagOffsets" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == [3, 2] + assert node.get_output_element_type(0) == Type.f32 + assert node.get_attributes()["reduction"] == "sum" + + +def test_embedding_bag_offsets_15_default_index(): + emb_table = opset15.parameter([5, 2], name="emb_table", dtype=np.float32) + indices = opset15.parameter([4], name="indices", dtype=np.int64) + offsets = opset15.parameter([3], name="offsets", dtype=np.int64) + default_index = opset15.parameter([], name="default_index", dtype=np.int64) + + node = opset15.embedding_bag_offsets(emb_table, indices, offsets, default_index, reduction="MeAn") + + assert node.get_type_name() == "EmbeddingBagOffsets" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == [3, 2] + assert node.get_output_element_type(0) == Type.f32 + assert node.get_attributes()["reduction"] == "mean" + + +def test_embedding_bag_offsets_15_per_sample_weights(): + emb_table = opset15.parameter([5, 2], name="emb_table", dtype=np.float32) + indices = opset15.parameter([4], name="indices", dtype=np.int64) + offsets = opset15.parameter([3], name="offsets", dtype=np.int64) + per_sample_weights = opset15.parameter([4], name="per_sample_weights", dtype=np.float32) + + node = opset15.embedding_bag_offsets(emb_table, indices, offsets, per_sample_weights=per_sample_weights, reduction="SUM") + + assert node.get_type_name() == "EmbeddingBagOffsets" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == [3, 2] + assert node.get_output_element_type(0) == Type.f32 + assert node.get_attributes()["reduction"] == "sum" + + +def test_embedding_bag_offsets_15_default_index_per_sample_weights(): + emb_table = opset15.parameter([5, 2], name="emb_table", dtype=np.float32) + indices = opset15.parameter([4], name="indices", dtype=np.int64) + offsets = opset15.parameter([3], name="offsets", dtype=np.int64) + default_index = opset15.parameter([], name="default_index", dtype=np.int64) + per_sample_weights = opset15.parameter([4], name="per_sample_weights", dtype=np.float32) + + node = opset15.embedding_bag_offsets(emb_table, indices, offsets, default_index, per_sample_weights, "sum") + + assert node.get_type_name() == "EmbeddingBagOffsets" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == [3, 2] + assert node.get_output_element_type(0) == Type.f32 + assert node.get_attributes()["reduction"] == "sum" + + +def test_embedding_bag_packed_15(): + emb_table = opset15.parameter([5, 2], name="emb_table", dtype=np.float32) + indices = opset15.parameter([3, 3], name="indices", dtype=np.int64) + + node = opset15.embedding_bag_packed(emb_table, indices, reduction="mEaN") + + assert node.get_type_name() == "EmbeddingBagPacked" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == [3, 2] + assert node.get_output_element_type(0) == Type.f32 + assert node.get_attributes()["reduction"] == "mean" + + +def test_embedding_bag_packed_15_per_sample_weights(): + emb_table = opset15.parameter([5, 2], name="emb_table", dtype=np.float32) + indices = opset15.parameter([3, 3], name="indices", dtype=np.int64) + per_sample_weights = opset15.parameter([3, 3], name="per_sample_weights", dtype=np.float32) + + node = opset15.embedding_bag_packed(emb_table, indices, per_sample_weights) + + assert node.get_type_name() == "EmbeddingBagPacked" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == [3, 2] + assert node.get_output_element_type(0) == Type.f32 + assert node.get_attributes()["reduction"] == "sum"