From f8b6e3cb6eedb34f2cf58f1d80a5355517ffbb77 Mon Sep 17 00:00:00 2001 From: Lyamin-Roman Date: Thu, 30 May 2024 08:27:39 +0900 Subject: [PATCH] [GPU] Shared RoPE func tests --- .../subgraph_tests/src/rotary_pos_emb.cpp | 621 ----------------- .../subgraph_tests/rotary_pos_emb.cpp | 32 + .../skip_tests_config.cpp | 3 - .../subgraph_tests/rotary_pos_emb.cpp | 624 +----------------- .../include/subgraph_tests/rotary_pos_emb.hpp | 52 ++ .../subgraph/rotary_pos_emb.hpp | 67 ++ .../src/subgraph/rotary_pos_emb.cpp | 590 +++++++++++++++++ 7 files changed, 751 insertions(+), 1238 deletions(-) delete mode 100644 src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp create mode 100644 src/tests/functional/plugin/shared/include/subgraph_tests/rotary_pos_emb.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/rotary_pos_emb.hpp create mode 100644 src/tests/functional/shared_test_classes/src/subgraph/rotary_pos_emb.cpp diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp deleted file mode 100644 index a505a010a20910..00000000000000 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/rotary_pos_emb.cpp +++ /dev/null @@ -1,621 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include - -#include "common_test_utils/common_utils.hpp" -#include "shared_test_classes/base/ov_subgraph.hpp" -#include "utils/cpu_test_utils.hpp" -#include "utils/fusing_test_utils.hpp" -#include "transformations/utils/gen_pattern.hpp" - -using namespace CPUTestUtils; -using namespace ov::gen_pattern; -using namespace ov; - -namespace ov { -namespace test { - -static ov::OutputVector makeCosSinCache(int max_position_embeddings, int rotary_ndims) { - std::vector lut_sin(max_position_embeddings * rotary_ndims, 0.0f); - std::vector lut_cos(max_position_embeddings * rotary_ndims, 0.0f); - - // rotate_half style cos/sin table: - // y1 = cos(m*xita_i) * x1 - sin(m*xita_i) * x2 - // y2 = cos(m*xita_i) * x2 + sin(m*xita_i) * x1 - // - for (int i = 0, k = 0; i < rotary_ndims; i += 2, k++) { - auto xita_i = 1.0 / std::pow(10000.0, static_cast(i) / rotary_ndims); - float* psin = lut_sin.data(); - float* pcos = lut_cos.data(); - for (int m = 0; m < max_position_embeddings; m++, psin += rotary_ndims, pcos += rotary_ndims) { - auto vsin = std::sin(xita_i * m); - auto vcos = std::cos(xita_i * m); - pcos[k] = pcos[k + rotary_ndims / 2] = vcos; - psin[k] = psin[k + rotary_ndims / 2] = vsin; - } - } - auto shape = ov::Shape({1, 1, static_cast(max_position_embeddings), static_cast(rotary_ndims)}); - auto Cos = makeConst(ov::element::f32, shape, lut_cos); - auto Sin = makeConst(ov::element::f32, shape, lut_sin); - return {Cos, Sin}; -} - -static std::shared_ptr buildROPE_Llama2(const int batch, - const int seq_length, - const int max_position_embeddings, - const int num_head, - const int ndims) { - auto input = std::make_shared(ov::element::f32, PartialShape{batch, -1, num_head, ndims}); - auto pos_id_end = std::make_shared(ov::element::i32, ov::Shape{}); - auto pos_ids = std::make_shared(ov::element::i32, PartialShape{1, -1}); - - auto cos_sin_cache = makeCosSinCache(max_position_embeddings, ndims); - auto Constant582 = cos_sin_cache[0]; - auto Constant585 = cos_sin_cache[1]; - - // concat KV length - auto transpose_Transpose = makeOP({input, {0, 2, 1, 3}}); - auto slice_Unsqueeze_426 = makeOP({pos_id_end, 0}); - auto ScatterUpdate_152236 = makeOP({{0, 0, 0}, {2}, slice_Unsqueeze_426, {0}}); - auto slice_Slice = makeOP({Constant582, {0, 0, 0}, ScatterUpdate_152236, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto squeeze_Squeeze = makeOP({slice_Slice, 1}); - auto squeeze_Squeeze_435 = makeOP({squeeze_Squeeze, 0}); - auto index_441_Gather = makeOP({squeeze_Squeeze_435, pos_ids, 0}, {{"batch_dims", 0}}); - auto unsqueeze_Unsqueeze = makeOP({index_441_Gather, 1}); - auto mul_Multiply = - makeOP({transpose_Transpose, unsqueeze_Unsqueeze}, {{"auto_broadcast", "numpy"}}); - auto size_ShapeOf_448 = makeOP({transpose_Transpose}, {{"output_type", "i32"}}); - auto size_Gather_450 = makeOP({size_ShapeOf_448, 3, 0}, {{"batch_dims", 0}}); - auto floor_divide_Divide = - makeOP({size_Gather_450, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); - auto floor_divide_Floor = makeOP({floor_divide_Divide}); - auto slice_Unsqueeze_452 = makeOP({floor_divide_Floor, 0}); - auto ScatterUpdate_152312 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_452, {0}}); - auto slice_Slice_459 = makeOP( - {transpose_Transpose, ScatterUpdate_152312, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto Constant_182988 = makeConst(element::f32, - ov::Shape({ - 1, - 1, - 1, - 1, - }), - {-1.000000f}); - auto neg_Multiply = makeOP({slice_Slice_459, Constant_182988}, {{"auto_broadcast", "numpy"}}); - auto ScatterUpdate_152368 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_452, {0}}); - auto slice_Slice2 = - makeOP({transpose_Transpose, {0, 0, 0, 0}, ScatterUpdate_152368, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto cat_Concat = makeOP({neg_Multiply, slice_Slice2}, {{"axis", -1}}); - auto ScatterUpdate_152421 = makeOP({{0, 0, 0}, {2}, slice_Unsqueeze_426, {0}}); - auto slice_Slice_433 = makeOP({Constant585, {0, 0, 0}, ScatterUpdate_152421, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto squeeze_Squeeze_436 = makeOP({slice_Slice_433, 1}); - auto squeeze_Squeeze_437 = makeOP({squeeze_Squeeze_436, 0}); - auto index_446_Gather = makeOP({squeeze_Squeeze_437, pos_ids, 0}, {{"batch_dims", 0}}); - auto unsqueeze_Unsqueeze_447 = makeOP({index_446_Gather, 1}); - auto mul_Multiply_463 = - makeOP({cat_Concat, unsqueeze_Unsqueeze_447}, {{"auto_broadcast", "numpy"}}); - auto add_Add = makeOP({mul_Multiply, mul_Multiply_463}, {{"auto_broadcast", "numpy"}}); - - return std::make_shared(ov::NodeVector{add_Add}, ov::ParameterVector{input, pos_id_end, pos_ids}); -} - -class RoPECPUTestLlama2 : public SubgraphBaseTest { -public: - ov::Tensor create_i32_tensor(const ov::Shape& shape, int start, int step = 1) { - auto tensor = ov::Tensor(ov::element::i32, shape); - auto* ptr = static_cast(tensor.data()); - for (size_t i = 0; i < tensor.get_size(); i++) { - ptr[i] = start; - start += step; - } - return tensor; - } - - void generate_inputs(const std::vector& targetInputStaticShapes) override { - const auto& funcInputs = function->inputs(); - - const int position_id_start = 15; - auto& input_shape = targetInputStaticShapes[0]; - auto seq_length = input_shape[1]; - - ov::test::utils::InputGenerateData in_data; - in_data.start_from = -1; - in_data.range = 2; - in_data.resolution = 32768; - ov::Tensor t_input = utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, in_data); - ov::Tensor t_position_id_end = create_i32_tensor(ov::Shape({}), position_id_start + seq_length); - ov::Tensor t_position_ids = create_i32_tensor(ov::Shape({1, seq_length}), position_id_start); - - inputs.clear(); - inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); - inputs.insert({funcInputs[1].get_node_shared_ptr(), t_position_id_end}); - inputs.insert({funcInputs[2].get_node_shared_ptr(), t_position_ids}); - } - -protected: - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_CPU; - - const int batch = 2; - const int seq_length = 7; - const size_t max_position_embeddings = 2048; - const size_t ndims = 128; - const size_t num_head = 32; - - InputShape inpShape = {{batch, seq_length, num_head, ndims}, {{batch, seq_length, num_head, ndims}}}; - init_input_shapes({inpShape}); - function = buildROPE_Llama2(batch, seq_length, max_position_embeddings, num_head, ndims); - } -}; - -TEST_F(RoPECPUTestLlama2, smoke_CompareWithRefs) { - run(); - CheckNumberOfNodesWithType(compiledModel, "RoPE", 1); -} - -class RoPECPUTestChatGLM : public SubgraphBaseTest { -public: - ov::Tensor create_i32_tensor(const ov::Shape& shape, int start, int step = 1) { - auto tensor = ov::Tensor(ov::element::i32, shape); - auto* ptr = static_cast(tensor.data()); - for (size_t i = 0; i < tensor.get_size(); i++) { - ptr[i] = start; - start += step; - } - return tensor; - } - - void generate_inputs(const std::vector& targetInputStaticShapes) override { - const auto& funcInputs = function->inputs(); - - auto& input_shape = targetInputStaticShapes[0]; - auto seq_length = input_shape[0]; - // auto batch = input_shape[1]; - - ov::Tensor t_input = - utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); - ov::Tensor t_cos_sin_cache = - utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {32768, 32, 2}, 2, -1.0f, 32768); - ov::Tensor t_position_ids = create_i32_tensor(ov::Shape({1, seq_length}), 15); - - inputs.clear(); - inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); - inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_sin_cache}); - inputs.insert({funcInputs[2].get_node_shared_ptr(), t_position_ids}); - } - -protected: - std::shared_ptr buildROPE_ChatGLM(int batch, int head_cnt, int rotary_dims) { - auto input = - std::make_shared(ov::element::f32, PartialShape{-1, batch, 4096 + 256 + 256}); - auto cos_sin_cache = std::make_shared(ov::element::f32, PartialShape{32768, 32, 2}); - auto position_ids = std::make_shared(ov::element::i32, PartialShape{-1, -1}); - - auto __module_transformer_index_67_Gather = - makeOP({cos_sin_cache, position_ids, 0}, {{"batch_dims", 0}}); - auto __module_transformer_transpose_Transpose = - makeOP({__module_transformer_index_67_Gather, {1, 0, 2, 3}}); - auto size_ShapeOf_110 = - makeOP({__module_transformer_transpose_Transpose}, {{"output_type", "i32"}}); - auto __getitem___Gather = makeOP({size_ShapeOf_110, -2, 0}, {{"batch_dims", 0}}); - auto mul_Multiply = makeOP({__getitem___Gather, 2}, {{"auto_broadcast", "numpy"}}); - auto slice_Unsqueeze_112 = makeOP({mul_Multiply, 0}); - - auto floordiv_Divide = - makeOP({mul_Multiply, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); - auto floordiv_Floor = makeOP({floordiv_Divide}); - auto ListConstruct_126_Reshape_2 = makeOP({floordiv_Floor, {-1}}, {{"special_zero", false}}); - - auto ListUnpack_321 = makeOP({input, -1, {4096, 256, 256}}); - auto view_Reshape = - makeOP({ListUnpack_321->output(0), {0, 0, 32, 128}}, {{"special_zero", true}}); - - auto ScatterUpdate_229053 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_112, {0}}); - auto slice_Slice_357 = - makeOP({view_Reshape, {0, 0, 0, 0}, ScatterUpdate_229053, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto size_ShapeOf_346 = makeOP({view_Reshape}, {{"output_type", "i32"}}); - auto size_Gather_348 = makeOP({size_ShapeOf_346, 0, 0}, {{"batch_dims", 0}}); - auto ListConstruct_372_Reshape = makeOP({size_Gather_348, {-1}}, {{"special_zero", false}}); - auto size_Gather_351 = makeOP({size_ShapeOf_346, {2}, 0}, {{"batch_dims", 0}}); - auto ListConstruct_372_Concat = - makeOP({ListConstruct_372_Reshape, {-1}, size_Gather_351, ListConstruct_126_Reshape_2, {2}}, - {{"axis", 0}}); - auto reshape_Reshape_373 = - makeOP({slice_Slice_357, ListConstruct_372_Concat}, {{"special_zero", false}}); - auto select_Gather_381 = makeOP({reshape_Reshape_373, 0, -1}, {{"batch_dims", 0}}); - auto slice_Unsqueeze_367 = makeOP({size_Gather_348, 0}); - auto slice_Slice_369 = - makeOP({__module_transformer_transpose_Transpose, {0}, slice_Unsqueeze_367, {1}}, - {{"begin_mask", {0}}, - {"end_mask", {0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto size_ShapeOf_374 = makeOP({reshape_Reshape_373}, {{"output_type", "i32"}}); - auto size_Gather_376 = makeOP({size_ShapeOf_374, {3}, 0}, {{"batch_dims", 0}}); - auto ListConstruct_379_Concat = - makeOP({ListConstruct_372_Reshape, {-1}, {1}, size_Gather_376, {2}}, {{"axis", 0}}); - auto view_Reshape_380 = - makeOP({slice_Slice_369, ListConstruct_379_Concat}, {{"special_zero", false}}); - auto select_Gather_382 = makeOP({view_Reshape_380, 0, -1}, {{"batch_dims", 0}}); - auto mul_Multiply_383 = - makeOP({select_Gather_381, select_Gather_382}, {{"auto_broadcast", "numpy"}}); - auto select_Gather_384 = makeOP({reshape_Reshape_373, 1, -1}, {{"batch_dims", 0}}); - auto select_Gather_385 = makeOP({view_Reshape_380, 1, -1}, {{"batch_dims", 0}}); - auto mul_Multiply_386 = - makeOP({select_Gather_384, select_Gather_385}, {{"auto_broadcast", "numpy"}}); - auto sub_Subtract_389 = - makeOP({mul_Multiply_383, mul_Multiply_386}, {{"auto_broadcast", "numpy"}}); - auto Unsqueeze_62716 = makeOP({sub_Subtract_389, -1}); - auto mul_Multiply_391 = - makeOP({select_Gather_384, select_Gather_382}, {{"auto_broadcast", "numpy"}}); - auto mul_Multiply_393 = - makeOP({select_Gather_381, select_Gather_385}, {{"auto_broadcast", "numpy"}}); - auto add_Add_396 = makeOP({mul_Multiply_391, mul_Multiply_393}, {{"auto_broadcast", "numpy"}}); - auto Unsqueeze_62717 = makeOP({add_Add_396, -1}); - auto stack_401 = makeOP({Unsqueeze_62716, Unsqueeze_62717}, {{"axis", -1}}); - auto flatten_ShapeOf_402 = makeOP({stack_401}, {{"output_type", "i32"}}); - auto flatten_Slice_417 = makeOP({flatten_ShapeOf_402, {0}, {3}, {1}}, - {{"begin_mask", {0}}, - {"end_mask", {0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto flatten_Concat_420 = makeOP({flatten_Slice_417, {-1}}, {{"axis", 0}}); - auto flatten_Reshape_421 = makeOP({stack_401, flatten_Concat_420}, {{"special_zero", true}}); - auto ScatterUpdate_229067 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_112, {0}}); - auto slice_Slice_363 = - makeOP({view_Reshape, ScatterUpdate_229067, {0, 0, 0, INT_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto cat_Concat_425 = makeOP({flatten_Reshape_421, slice_Slice_363}, {{"axis", -1}}); - return std::make_shared(ov::NodeVector{cat_Concat_425}, - ov::ParameterVector{input, cos_sin_cache, position_ids}); - } - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_CPU; - - const int batch = 2; - const int seq_length = 7; - const int num_head = 32; - const int rotary_dims = 64; - - InputShape inpShape = {{-1, batch, 4096 + 256 + 256}, {{seq_length, batch, 4096 + 256 + 256}}}; - init_input_shapes({inpShape}); - function = buildROPE_ChatGLM(batch, num_head, rotary_dims); - } -}; - -TEST_F(RoPECPUTestChatGLM, smoke_CompareWithRefs) { - run(); - CheckNumberOfNodesWithType(compiledModel, "RoPE", 1); -} - -class RoPECPUTestQwen7b : public SubgraphBaseTest, public testing::WithParamInterface { -public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - const bool specialReshape = obj.param; - std::ostringstream result; - result << "specialReshape=" << specialReshape << std::endl; - return result.str(); - } - void generate_inputs(const std::vector& targetInputStaticShapes) override { - const auto& funcInputs = function->inputs(); - - auto& input_shape = targetInputStaticShapes[0]; - - ov::Tensor t_input = - utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); - ov::Tensor t_cos_cache = - utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {1, 4096, 1, 128}, 2, -1.0f, 32768); - ov::Tensor t_sin_cache = - utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {1, 4096, 1, 128}, 2, -1.0f, 32768); - - inputs.clear(); - inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); - inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_cache}); - inputs.insert({funcInputs[2].get_node_shared_ptr(), t_sin_cache}); - } - -protected: - std::shared_ptr buildROPE_QWen7b(bool specialReshape) { - auto input = - std::make_shared(ov::element::f32, PartialShape{-1, -1, 4096 + 4096 + 4096}); - auto cos_cache = std::make_shared(ov::element::f32, PartialShape{1, -1, 1, 128}); - auto sin_cache = std::make_shared(ov::element::f32, PartialShape{1, -1, 1, 128}); - - auto ListUnpack_389_VariadicSplit = makeOP({input, 2, {4096, 4096, -1}}); - auto view_Reshape = makeOP({ListUnpack_389_VariadicSplit->output(0), {0, 0, 32, 128}}, - {{"special_zero", true}}); - auto size_ShapeOf_414 = makeOP({view_Reshape}, {{"output_type", "i32"}}); - auto size_Gather_416 = makeOP({size_ShapeOf_414, 1, 0}, {{"batch_dims", 0}}); - auto neg_Multiply = makeOP({size_Gather_416, -1}, {{"auto_broadcast", "numpy"}}); - auto slice_Unsqueeze_422 = makeOP({neg_Multiply, 0}); - auto ScatterUpdate_261437 = makeOP({{0, 0}, {1}, slice_Unsqueeze_422, {0}}); - auto slice_Slice_425 = makeOP({cos_cache, ScatterUpdate_261437, {0ll, LLONG_MAX}, {1, 1}}, - {{"begin_mask", {1, 0}}, - {"end_mask", {1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto slice_Slice_431 = - makeOP({slice_Slice_425, {0, 0, 0}, {0ll, 0ll, LLONG_MAX}, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto slice_Slice_437 = - makeOP({slice_Slice_431, {0, 0, 0, 0}, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto size_ShapeOf_462 = makeOP({slice_Slice_437}, {{"output_type", "i32"}}); - auto size_Gather_464 = makeOP({size_ShapeOf_462, {3}, 0}, {{"batch_dims", 0}}); - auto ScatterUpdate_261533 = makeOP({{0, 0, 0, 0}, {3}, size_Gather_464, {0}}); - auto slice_Slice_470 = - makeOP({view_Reshape, {0, 0, 0, 0}, ScatterUpdate_261533, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto mul_Multiply = makeOP({slice_Slice_470, slice_Slice_437}, {{"auto_broadcast", "numpy"}}); - auto size_ShapeOf_478 = makeOP({slice_Slice_470}, {{"output_type", "i32"}}); - auto Gather_239390 = makeOP({size_ShapeOf_478, {0, 1, 2}, 0}, {{"batch_dims", 0}}); - auto size_Gather_489 = makeOP({size_ShapeOf_478, 3, 0}, {{"batch_dims", 0}}); - auto floor_divide_Divide = - makeOP({size_Gather_489, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); - auto floor_divide_Floor = makeOP({floor_divide_Divide}); - auto ListConstruct_493_Reshape_3 = - makeOP({floor_divide_Floor, {-1}}, {{"special_zero", false}}); - auto ListConstruct_493_Concat = - makeOP({Gather_239390, {2}, ListConstruct_493_Reshape_3}, {{"axis", 0}}); - std::shared_ptr reshape_Reshape = nullptr; - if (specialReshape) { - reshape_Reshape = makeOP({slice_Slice_470, {0, 0, 32, 2, 64}}, {{"special_zero", true}}); - } else { - reshape_Reshape = - makeOP({slice_Slice_470, ListConstruct_493_Concat}, {{"special_zero", false}}); - } - auto ListUnpack_496_Split = makeOP({reshape_Reshape, -2}, {{"num_splits", 2}}); - auto ListUnpack_496_Squeeze_0 = makeOP({ListUnpack_496_Split->output(1), -2}); - auto Constant_296840_compressed = makeConst(element::f16, - ov::Shape({ - 1, - 1, - 1, - 1, - }), - {-1}); - auto Constant_296840 = makeOP({Constant_296840_compressed}, {{"destination_type", "f32"}}); - auto neg_Multiply_499 = - makeOP({ListUnpack_496_Squeeze_0, Constant_296840}, {{"auto_broadcast", "numpy"}}); - auto ListUnpack_496_Squeeze = makeOP({ListUnpack_496_Split->output(0), -2}); - auto cat_Concat = makeOP({neg_Multiply_499, ListUnpack_496_Squeeze}, {{"axis", -1}}); - auto slice_Slice_449 = makeOP({sin_cache, ScatterUpdate_261437, {0ll, LLONG_MAX}, {1, 1}}, - {{"begin_mask", {1, 0}}, - {"end_mask", {1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto slice_Slice_455 = - makeOP({slice_Slice_449, {0, 0, 0}, {0ll, 0ll, LLONG_MAX}, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto slice_Slice_461 = - makeOP({slice_Slice_455, {0, 0, 0, 0}, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto mul_Multiply_503 = makeOP({cat_Concat, slice_Slice_461}, {{"auto_broadcast", "numpy"}}); - auto add_Add = makeOP({mul_Multiply, mul_Multiply_503}, {{"auto_broadcast", "numpy"}}); - return std::make_shared(ov::NodeVector{add_Add}, ov::ParameterVector{input, cos_cache, sin_cache}); - } - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_CPU; - const bool specialReshape = this->GetParam(); - const int batch = 2; - const int seq_length = 7; - InputShape inpShape = {{batch, -1, 4096 + 4096 + 4096}, {{batch, seq_length, 4096 + 4096 + 4096}}}; - init_input_shapes({inpShape}); - function = buildROPE_QWen7b(specialReshape); - } -}; - -TEST_P(RoPECPUTestQwen7b, smoke_CompareWithRefs) { - run(); - CheckNumberOfNodesWithType(compiledModel, "RoPE", 1); -} - -INSTANTIATE_TEST_SUITE_P(smoke_RoPECPUTestQwen7b, - RoPECPUTestQwen7b, - ::testing::Values(true, false), - RoPECPUTestQwen7b::getTestCaseName); - -class RoPECPUTestGPTJ : public SubgraphBaseTest, public testing::WithParamInterface { -public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - bool hasShapeOf; - hasShapeOf = obj.param; - std::ostringstream result; - result << "hasShapeOf=" << hasShapeOf << std::endl; - return result.str(); - } - void generate_inputs(const std::vector& targetInputStaticShapes) override { - const auto& funcInputs = function->inputs(); - - auto& input_shape = targetInputStaticShapes[0]; - auto& sincos_shape = targetInputStaticShapes[1]; - ov::Tensor t_input = - utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); - ov::Tensor t_cos_sin_cache = - utils::create_and_fill_tensor(funcInputs[1].get_element_type(), sincos_shape, 2, -1.0f, 32768); - - inputs.clear(); - inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); - inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_sin_cache}); - } - -protected: - std::shared_ptr buildROPE_GPTJ(const int num_head, - const int hidden_dims, - const int rotary_dims, - bool hasShapeOf) { - auto int32_max = std::numeric_limits::max(); - auto input = - std::make_shared(ov::element::f32, PartialShape{-1, -1, num_head, hidden_dims}); - auto sincos = std::make_shared(ov::element::f32, PartialShape{-1, -1, rotary_dims}); - - auto slice_Slice_965 = - makeOP({input, {0, 0, 0, 0}, {0, 0, 0, rotary_dims}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - slice_Slice_965->set_friendly_name("slice_Slice_965"); - - auto varsplit = makeOP({sincos, -1, {rotary_dims / 2, -1}}); - varsplit->set_output_size(2); - varsplit->set_friendly_name("varsplit"); - auto unsqueeze_sin = makeOP({varsplit->output(0), 2}); - auto unsqueeze_cos = makeOP({varsplit->output(1), 2}); - std::vector gather_idx(rotary_dims, 1); - int32_t v = 0; - for (size_t i = 0; i < gather_idx.size(); i += 2, v++) { - gather_idx[i] = v; - gather_idx[i + 1] = v; - } - - auto const_idx = makeConst(ov::element::i32, ov::Shape({static_cast(rotary_dims)}), gather_idx); - auto constant_155588 = makeConst(element::f32, - ov::Shape({ - 1, - 1, - 1, - 1, - }), - {-1.000000f}); - auto repeat_interleave_sin = makeOP({unsqueeze_sin, const_idx, 3}, {{"batch_dims", 0}}); - auto repeat_interleave_cos = makeOP({unsqueeze_cos, const_idx, 3}, {{"batch_dims", 0}}); - repeat_interleave_sin->set_friendly_name("repeat_interleave_sin"); - repeat_interleave_cos->set_friendly_name("repeat_interleave_cos"); - // x interleave (-x[:,:,:, 1::2], x[:,:,:, 0::2]) - auto slice_Slice_1174 = - makeOP({slice_Slice_965, {0, 0, 0, 1}, {0, 0, 0, int32_max}, {1, 1, 1, 2}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto neg_Multiply_1177 = - makeOP({slice_Slice_1174, constant_155588}, {{"auto_broadcast", "numpy"}}); - auto Unsqueeze_65524 = makeOP({neg_Multiply_1177, -1}); - - auto slice_Slice_1168 = - makeOP({slice_Slice_965, {0, 0, 0, 0}, {0, 0, 0, int32_max}, {1, 1, 1, 2}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto Unsqueeze_65525 = makeOP({slice_Slice_1168, -1}); - auto stack_1182 = makeOP({Unsqueeze_65524, Unsqueeze_65525}, {{"axis", -1}}); - auto flatten_Reshape_1198 = - makeOP({stack_1182, {0, 0, num_head, rotary_dims}}, {{"special_zero", true}}); - // x*cos [B,L,H,ndims] - auto mul_cos = - makeOP({slice_Slice_965, repeat_interleave_cos}, {{"auto_broadcast", "numpy"}}); - mul_cos->set_friendly_name("mul_cos"); - auto mul_sin = - makeOP({flatten_Reshape_1198, repeat_interleave_sin}, {{"auto_broadcast", "numpy"}}); - // *cos + *sin - auto rotary_emb = makeOP({mul_cos, mul_sin}, {{"auto_broadcast", "numpy"}}); - - auto slice_Slice_971 = - makeOP({input, {0, 0, 0, rotary_dims}, {0, 0, 0, int32_max}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto cat_Concat_1211 = makeOP({rotary_emb, slice_Slice_971}, {{"axis", -1}}); - auto permute_Transpose_1213 = makeOP({cat_Concat_1211, {0, 2, 1, 3}}); - ov::NodeVector model_output = {permute_Transpose_1213}; - if (hasShapeOf) { - auto shapeOf = makeOP({rotary_emb}, {{"output_type", "i32"}}); - auto gather = makeOP({shapeOf, {1}, 0}, {{"batch_dims", 0}}); - model_output.push_back(gather); - } - return std::make_shared(model_output, ov::ParameterVector{input, sincos}); - } - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_CPU; - bool hasShapeOf = this->GetParam(); - const int batch = 2; - const int seq_length = 7; - const int num_head = 16; - const int hidden_dims = 256; - const int rotary_dims = 64; - - InputShape input = {{batch, seq_length, num_head, hidden_dims}, {{batch, seq_length, num_head, hidden_dims}}}; - InputShape sincos = {{batch, seq_length, rotary_dims}, {{batch, seq_length, rotary_dims}}}; - init_input_shapes({input, sincos}); - function = buildROPE_GPTJ(num_head, hidden_dims, rotary_dims, hasShapeOf); - } -}; - -TEST_P(RoPECPUTestGPTJ, smoke_CompareWithRefs) { - run(); - CheckNumberOfNodesWithType(compiledModel, "RoPE", 1); -} - -INSTANTIATE_TEST_SUITE_P(smoke_RoPECPUTestGPTJ, - RoPECPUTestGPTJ, - ::testing::Values(true, false), - RoPECPUTestGPTJ::getTestCaseName); - -} // namespace test -} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp new file mode 100644 index 00000000000000..0ff1d18ae09ff7 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/rotary_pos_emb.hpp" + +namespace ov { +namespace test { + +INSTANTIATE_TEST_SUITE_P(smoke_RoPETestLlama2, + RoPETestLlama2, + ::testing::Values(ov::test::utils::DEVICE_CPU), + RoPETestLlama2::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RoPETestChatGLM, + RoPETestChatGLM, + ::testing::Values(ov::test::utils::DEVICE_CPU), + RoPETestChatGLM::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RoPETestQwen7b, + RoPETestQwen7b, + ::testing::Combine(::testing::Values(true, false), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + RoPETestQwen7b::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RoPETestGPTJ, + RoPETestGPTJ, + ::testing::Combine(::testing::Values(true, false), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + RoPETestGPTJ::getTestCaseName); +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 44ef49b36b59e7..4e265bb41c89a9 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -202,9 +202,6 @@ std::vector disabledTestPatterns() { // Issue: 136862 R"(.*smoke_ConditionGPUTest_static/StaticConditionLayerGPUTest.CompareWithRefs/IS=\(3.6\)_netPRC=i8_ifCond=PARAM_targetDevice=GPU_.*)", - // TODO: Add RoPE support for Llama2, GPTJ models - R"(.*(RoPEGPUTestLlama2).*)", - R"(.*(RoPEGPUTestGPTJ).*)", #if defined(_WIN32) // by calc abs_threshold with expected value R"(.*smoke_RemoteTensor/OVRemoteTensorBatched_Test.NV12toBGR_buffer/(num_batch_4|num_batch_2).*)", diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp index 66b322620c5845..bed957cc35fc0d 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp @@ -2,625 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include - -#include "common_test_utils/common_utils.hpp" -#include "shared_test_classes/base/ov_subgraph.hpp" -#include "transformations/utils/gen_pattern.hpp" -#include "openvino/runtime/exec_model_info.hpp" - -#include - -using namespace ov::gen_pattern; -using namespace ov; +#include "subgraph_tests/rotary_pos_emb.hpp" namespace ov { namespace test { -inline void CheckNumberOfNodesWithType(std::shared_ptr function, - const std::unordered_set& nodeTypes, - size_t expectedCount) { - ASSERT_NE(nullptr, function); - int num_ops = 0; - for (const auto& node : function->get_ordered_ops()) { - const auto& rt_info = node->get_rt_info(); - const auto layer_type = rt_info.find("layerType")->second.as(); - std::cout << layer_type << std::endl; - if (nodeTypes.count(layer_type)) { - num_ops++; - } - } - ASSERT_EQ(num_ops, expectedCount); -} - -static ov::OutputVector makeCosSinCache(int max_position_embeddings, int rotary_ndims) { - std::vector lut_sin(max_position_embeddings * rotary_ndims, 0.0f); - std::vector lut_cos(max_position_embeddings * rotary_ndims, 0.0f); - - // rotate_half style cos/sin table: - // y1 = cos(m*xita_i) * x1 - sin(m*xita_i) * x2 - // y2 = cos(m*xita_i) * x2 + sin(m*xita_i) * x1 - // - for (int i = 0, k = 0; i < rotary_ndims; i += 2, k++) { - auto xita_i = 1.0 / std::pow(10000.0, static_cast(i) / rotary_ndims); - float* psin = lut_sin.data(); - float* pcos = lut_cos.data(); - for (int m = 0; m < max_position_embeddings; m++, psin += rotary_ndims, pcos += rotary_ndims) { - auto vsin = std::sin(xita_i * m); - auto vcos = std::cos(xita_i * m); - pcos[k] = pcos[k + rotary_ndims / 2] = vcos; - psin[k] = psin[k + rotary_ndims / 2] = vsin; - } - } - auto shape = ov::Shape({1, 1, static_cast(max_position_embeddings), static_cast(rotary_ndims)}); - auto Cos = makeConst(ov::element::f32, shape, lut_cos); - auto Sin = makeConst(ov::element::f32, shape, lut_sin); - return {Cos, Sin}; -} - -static std::shared_ptr buildROPE_Llama2(const int batch, - const int seq_length, - const int max_position_embeddings, - const int num_head, - const int ndims) { - auto input = std::make_shared(ov::element::f32, PartialShape{batch, -1, num_head, ndims}); - auto pos_id_end = std::make_shared(ov::element::i32, ov::Shape{}); - auto pos_ids = std::make_shared(ov::element::i32, PartialShape{1, -1}); - - auto cos_sin_cache = makeCosSinCache(max_position_embeddings, ndims); - auto Constant582 = cos_sin_cache[0]; - auto Constant585 = cos_sin_cache[1]; - - // concat KV length - auto transpose_Transpose = makeOP({input, {0, 2, 1, 3}}); - auto slice_Unsqueeze_426 = makeOP({pos_id_end, 0}); - auto ScatterUpdate_152236 = makeOP({{0, 0, 0}, {2}, slice_Unsqueeze_426, {0}}); - auto slice_Slice = makeOP({Constant582, {0, 0, 0}, ScatterUpdate_152236, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto squeeze_Squeeze = makeOP({slice_Slice, 1}); - auto squeeze_Squeeze_435 = makeOP({squeeze_Squeeze, 0}); - auto index_441_Gather = makeOP({squeeze_Squeeze_435, pos_ids, 0}, {{"batch_dims", 0}}); - auto unsqueeze_Unsqueeze = makeOP({index_441_Gather, 1}); - auto mul_Multiply = - makeOP({transpose_Transpose, unsqueeze_Unsqueeze}, {{"auto_broadcast", "numpy"}}); - auto size_ShapeOf_448 = makeOP({transpose_Transpose}, {{"output_type", "i32"}}); - auto size_Gather_450 = makeOP({size_ShapeOf_448, 3, 0}, {{"batch_dims", 0}}); - auto floor_divide_Divide = - makeOP({size_Gather_450, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); - auto floor_divide_Floor = makeOP({floor_divide_Divide}); - auto slice_Unsqueeze_452 = makeOP({floor_divide_Floor, 0}); - auto ScatterUpdate_152312 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_452, {0}}); - auto slice_Slice_459 = makeOP( - {transpose_Transpose, ScatterUpdate_152312, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto Constant_182988 = makeConst(element::f32, - ov::Shape({ - 1, - 1, - 1, - 1, - }), - {-1.000000f}); - auto neg_Multiply = makeOP({slice_Slice_459, Constant_182988}, {{"auto_broadcast", "numpy"}}); - auto ScatterUpdate_152368 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_452, {0}}); - auto slice_Slice2 = - makeOP({transpose_Transpose, {0, 0, 0, 0}, ScatterUpdate_152368, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto cat_Concat = makeOP({neg_Multiply, slice_Slice2}, {{"axis", -1}}); - auto ScatterUpdate_152421 = makeOP({{0, 0, 0}, {2}, slice_Unsqueeze_426, {0}}); - auto slice_Slice_433 = makeOP({Constant585, {0, 0, 0}, ScatterUpdate_152421, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto squeeze_Squeeze_436 = makeOP({slice_Slice_433, 1}); - auto squeeze_Squeeze_437 = makeOP({squeeze_Squeeze_436, 0}); - auto index_446_Gather = makeOP({squeeze_Squeeze_437, pos_ids, 0}, {{"batch_dims", 0}}); - auto unsqueeze_Unsqueeze_447 = makeOP({index_446_Gather, 1}); - auto mul_Multiply_463 = - makeOP({cat_Concat, unsqueeze_Unsqueeze_447}, {{"auto_broadcast", "numpy"}}); - auto add_Add = makeOP({mul_Multiply, mul_Multiply_463}, {{"auto_broadcast", "numpy"}}); - - return std::make_shared(ov::NodeVector{add_Add}, ov::ParameterVector{input, pos_id_end, pos_ids}); -} - -class RoPEGPUTestLlama2 : public SubgraphBaseTest { -public: - ov::Tensor create_i32_tensor(const ov::Shape& shape, int start, int step = 1) { - auto tensor = ov::Tensor(ov::element::i32, shape); - auto* ptr = static_cast(tensor.data()); - for (size_t i = 0; i < tensor.get_size(); i++) { - ptr[i] = start; - start += step; - } - return tensor; - } - - void generate_inputs(const std::vector& targetInputStaticShapes) override { - const auto& funcInputs = function->inputs(); - - const int position_id_start = 15; - auto& input_shape = targetInputStaticShapes[0]; - auto seq_length = input_shape[1]; - - ov::test::utils::InputGenerateData in_data; - in_data.start_from = -1; - in_data.range = 2; - in_data.resolution = 32768; - ov::Tensor t_input = utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, in_data); - ov::Tensor t_position_id_end = create_i32_tensor(ov::Shape({}), position_id_start + seq_length); - ov::Tensor t_position_ids = create_i32_tensor(ov::Shape({1, seq_length}), position_id_start); - - inputs.clear(); - inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); - inputs.insert({funcInputs[1].get_node_shared_ptr(), t_position_id_end}); - inputs.insert({funcInputs[2].get_node_shared_ptr(), t_position_ids}); - } - -protected: - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_GPU; - - const int batch = 2; - const int seq_length = 7; - const size_t max_position_embeddings = 2048; - const size_t ndims = 128; - const size_t num_head = 32; - - InputShape inpShape = {{batch, seq_length, num_head, ndims}, {{batch, seq_length, num_head, ndims}}}; - init_input_shapes({inpShape}); - function = buildROPE_Llama2(batch, seq_length, max_position_embeddings, num_head, ndims); - } -}; - -TEST_F(RoPEGPUTestLlama2, smoke_CompareWithRefs) { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - run(); - std::shared_ptr function = compiledModel.get_runtime_model(); - CheckNumberOfNodesWithType(function, {"RoPE"}, 1); -} - -class RoPEGPUTestChatGLM : public SubgraphBaseTest { -public: - ov::Tensor create_i32_tensor(const ov::Shape& shape, int start, int step = 1) { - auto tensor = ov::Tensor(ov::element::i32, shape); - auto* ptr = static_cast(tensor.data()); - for (size_t i = 0; i < tensor.get_size(); i++) { - ptr[i] = start; - start += step; - } - return tensor; - } - - void generate_inputs(const std::vector& targetInputStaticShapes) override { - const auto& funcInputs = function->inputs(); - - auto& input_shape = targetInputStaticShapes[0]; - auto seq_length = input_shape[0]; - // auto batch = input_shape[1]; - - ov::Tensor t_input = - utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); - ov::Tensor t_cos_sin_cache = - utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {32768, 32, 2}, 2, -1.0f, 32768); - ov::Tensor t_position_ids = create_i32_tensor(ov::Shape({1, seq_length}), 15); - - inputs.clear(); - inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); - inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_sin_cache}); - inputs.insert({funcInputs[2].get_node_shared_ptr(), t_position_ids}); - } - -protected: - std::shared_ptr buildROPE_ChatGLM(int batch, int head_cnt, int rotary_dims) { - auto input = - std::make_shared(ov::element::f32, PartialShape{-1, batch, 4096 + 256 + 256}); - auto cos_sin_cache = std::make_shared(ov::element::f32, PartialShape{32768, 32, 2}); - auto position_ids = std::make_shared(ov::element::i32, PartialShape{-1, -1}); - - auto __module_transformer_index_67_Gather = - makeOP({cos_sin_cache, position_ids, 0}, {{"batch_dims", 0}}); - auto __module_transformer_transpose_Transpose = - makeOP({__module_transformer_index_67_Gather, {1, 0, 2, 3}}); - auto size_ShapeOf_110 = - makeOP({__module_transformer_transpose_Transpose}, {{"output_type", "i32"}}); - auto __getitem___Gather = makeOP({size_ShapeOf_110, -2, 0}, {{"batch_dims", 0}}); - auto mul_Multiply = makeOP({__getitem___Gather, 2}, {{"auto_broadcast", "numpy"}}); - auto slice_Unsqueeze_112 = makeOP({mul_Multiply, 0}); - - auto floordiv_Divide = - makeOP({mul_Multiply, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); - auto floordiv_Floor = makeOP({floordiv_Divide}); - auto ListConstruct_126_Reshape_2 = makeOP({floordiv_Floor, {-1}}, {{"special_zero", false}}); - - auto ListUnpack_321 = makeOP({input, -1, {4096, 256, 256}}); - auto view_Reshape = - makeOP({ListUnpack_321->output(0), {0, 0, 32, 128}}, {{"special_zero", true}}); - - auto ScatterUpdate_229053 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_112, {0}}); - auto slice_Slice_357 = - makeOP({view_Reshape, {0, 0, 0, 0}, ScatterUpdate_229053, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto size_ShapeOf_346 = makeOP({view_Reshape}, {{"output_type", "i32"}}); - auto size_Gather_348 = makeOP({size_ShapeOf_346, 0, 0}, {{"batch_dims", 0}}); - auto ListConstruct_372_Reshape = makeOP({size_Gather_348, {-1}}, {{"special_zero", false}}); - auto size_Gather_351 = makeOP({size_ShapeOf_346, {2}, 0}, {{"batch_dims", 0}}); - auto ListConstruct_372_Concat = - makeOP({ListConstruct_372_Reshape, {-1}, size_Gather_351, ListConstruct_126_Reshape_2, {2}}, - {{"axis", 0}}); - auto reshape_Reshape_373 = - makeOP({slice_Slice_357, ListConstruct_372_Concat}, {{"special_zero", false}}); - auto select_Gather_381 = makeOP({reshape_Reshape_373, 0, -1}, {{"batch_dims", 0}}); - auto slice_Unsqueeze_367 = makeOP({size_Gather_348, 0}); - auto slice_Slice_369 = - makeOP({__module_transformer_transpose_Transpose, {0}, slice_Unsqueeze_367, {1}}, - {{"begin_mask", {0}}, - {"end_mask", {0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto size_ShapeOf_374 = makeOP({reshape_Reshape_373}, {{"output_type", "i32"}}); - auto size_Gather_376 = makeOP({size_ShapeOf_374, {3}, 0}, {{"batch_dims", 0}}); - auto ListConstruct_379_Concat = - makeOP({ListConstruct_372_Reshape, {-1}, {1}, size_Gather_376, {2}}, {{"axis", 0}}); - auto view_Reshape_380 = - makeOP({slice_Slice_369, ListConstruct_379_Concat}, {{"special_zero", false}}); - auto select_Gather_382 = makeOP({view_Reshape_380, 0, -1}, {{"batch_dims", 0}}); - auto mul_Multiply_383 = - makeOP({select_Gather_381, select_Gather_382}, {{"auto_broadcast", "numpy"}}); - auto select_Gather_384 = makeOP({reshape_Reshape_373, 1, -1}, {{"batch_dims", 0}}); - auto select_Gather_385 = makeOP({view_Reshape_380, 1, -1}, {{"batch_dims", 0}}); - auto mul_Multiply_386 = - makeOP({select_Gather_384, select_Gather_385}, {{"auto_broadcast", "numpy"}}); - auto sub_Subtract_389 = - makeOP({mul_Multiply_383, mul_Multiply_386}, {{"auto_broadcast", "numpy"}}); - auto Unsqueeze_62716 = makeOP({sub_Subtract_389, -1}); - auto mul_Multiply_391 = - makeOP({select_Gather_384, select_Gather_382}, {{"auto_broadcast", "numpy"}}); - auto mul_Multiply_393 = - makeOP({select_Gather_381, select_Gather_385}, {{"auto_broadcast", "numpy"}}); - auto add_Add_396 = makeOP({mul_Multiply_391, mul_Multiply_393}, {{"auto_broadcast", "numpy"}}); - auto Unsqueeze_62717 = makeOP({add_Add_396, -1}); - auto stack_401 = makeOP({Unsqueeze_62716, Unsqueeze_62717}, {{"axis", -1}}); - auto flatten_ShapeOf_402 = makeOP({stack_401}, {{"output_type", "i32"}}); - auto flatten_Slice_417 = makeOP({flatten_ShapeOf_402, {0}, {3}, {1}}, - {{"begin_mask", {0}}, - {"end_mask", {0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto flatten_Concat_420 = makeOP({flatten_Slice_417, {-1}}, {{"axis", 0}}); - auto flatten_Reshape_421 = makeOP({stack_401, flatten_Concat_420}, {{"special_zero", true}}); - auto ScatterUpdate_229067 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_112, {0}}); - auto slice_Slice_363 = - makeOP({view_Reshape, ScatterUpdate_229067, {0, 0, 0, INT_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto cat_Concat_425 = makeOP({flatten_Reshape_421, slice_Slice_363}, {{"axis", -1}}); - return std::make_shared(ov::NodeVector{cat_Concat_425}, - ov::ParameterVector{input, cos_sin_cache, position_ids}); - } - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_GPU; - - const int batch = 2; - const int seq_length = 7; - const int num_head = 32; - const int rotary_dims = 64; - - InputShape inpShape = {{-1, batch, 4096 + 256 + 256}, {{seq_length, batch, 4096 + 256 + 256}}}; - init_input_shapes({inpShape}); - function = buildROPE_ChatGLM(batch, num_head, rotary_dims); - } -}; - -TEST_F(RoPEGPUTestChatGLM, smoke_CompareWithRefs) { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - run(); - std::shared_ptr function = compiledModel.get_runtime_model(); - CheckNumberOfNodesWithType(function, {"RoPE"}, 1); -} - -class RoPEGPUTestQwen7b : public SubgraphBaseTest { -public: - void generate_inputs(const std::vector& targetInputStaticShapes) override { - const auto& funcInputs = function->inputs(); - - auto& input_shape = targetInputStaticShapes[0]; - - ov::Tensor t_input = - utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); - ov::Tensor t_cos_cache = - utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {1, 4096, 1, 128}, 2, -1.0f, 32768); - ov::Tensor t_sin_cache = - utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {1, 4096, 1, 128}, 2, -1.0f, 32768); - - inputs.clear(); - inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); - inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_cache}); - inputs.insert({funcInputs[2].get_node_shared_ptr(), t_sin_cache}); - } - -protected: - std::shared_ptr buildROPE_QWen7b() { - auto input = - std::make_shared(ov::element::f32, PartialShape{-1, -1, 4096 + 4096 + 4096}); - auto cos_cache = std::make_shared(ov::element::f32, PartialShape{1, -1, 1, 128}); - auto sin_cache = std::make_shared(ov::element::f32, PartialShape{1, -1, 1, 128}); - - auto ListUnpack_389_VariadicSplit = makeOP({input, 2, {4096, 4096, -1}}); - auto view_Reshape = makeOP({ListUnpack_389_VariadicSplit->output(0), {0, 0, 32, 128}}, - {{"special_zero", true}}); - auto size_ShapeOf_414 = makeOP({view_Reshape}, {{"output_type", "i32"}}); - auto size_Gather_416 = makeOP({size_ShapeOf_414, 1, 0}, {{"batch_dims", 0}}); - auto neg_Multiply = makeOP({size_Gather_416, -1}, {{"auto_broadcast", "numpy"}}); - auto slice_Unsqueeze_422 = makeOP({neg_Multiply, 0}); - auto ScatterUpdate_261437 = makeOP({{0, 0}, {1}, slice_Unsqueeze_422, {0}}); - auto slice_Slice_425 = makeOP({cos_cache, ScatterUpdate_261437, {0ll, LLONG_MAX}, {1, 1}}, - {{"begin_mask", {1, 0}}, - {"end_mask", {1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto slice_Slice_431 = - makeOP({slice_Slice_425, {0, 0, 0}, {0ll, 0ll, LLONG_MAX}, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto slice_Slice_437 = - makeOP({slice_Slice_431, {0, 0, 0, 0}, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto size_ShapeOf_462 = makeOP({slice_Slice_437}, {{"output_type", "i32"}}); - auto size_Gather_464 = makeOP({size_ShapeOf_462, {3}, 0}, {{"batch_dims", 0}}); - auto ScatterUpdate_261533 = makeOP({{0, 0, 0, 0}, {3}, size_Gather_464, {0}}); - auto slice_Slice_470 = - makeOP({view_Reshape, {0, 0, 0, 0}, ScatterUpdate_261533, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto mul_Multiply = makeOP({slice_Slice_470, slice_Slice_437}, {{"auto_broadcast", "numpy"}}); - auto size_ShapeOf_478 = makeOP({slice_Slice_470}, {{"output_type", "i32"}}); - auto Gather_239390 = makeOP({size_ShapeOf_478, {0, 1, 2}, 0}, {{"batch_dims", 0}}); - auto size_Gather_489 = makeOP({size_ShapeOf_478, 3, 0}, {{"batch_dims", 0}}); - auto floor_divide_Divide = - makeOP({size_Gather_489, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); - auto floor_divide_Floor = makeOP({floor_divide_Divide}); - auto ListConstruct_493_Reshape_3 = - makeOP({floor_divide_Floor, {-1}}, {{"special_zero", false}}); - auto ListConstruct_493_Concat = - makeOP({Gather_239390, {2}, ListConstruct_493_Reshape_3}, {{"axis", 0}}); - auto reshape_Reshape = - makeOP({slice_Slice_470, ListConstruct_493_Concat}, {{"special_zero", false}}); - auto ListUnpack_496_Split = makeOP({reshape_Reshape, -2}, {{"num_splits", 2}}); - auto ListUnpack_496_Squeeze_0 = makeOP({ListUnpack_496_Split->output(1), -2}); - auto Constant_296840_compressed = makeConst(element::f16, - ov::Shape({ - 1, - 1, - 1, - 1, - }), - {-1}); - auto Constant_296840 = makeOP({Constant_296840_compressed}, {{"destination_type", "f32"}}); - auto neg_Multiply_499 = - makeOP({ListUnpack_496_Squeeze_0, Constant_296840}, {{"auto_broadcast", "numpy"}}); - auto ListUnpack_496_Squeeze = makeOP({ListUnpack_496_Split->output(0), -2}); - auto cat_Concat = makeOP({neg_Multiply_499, ListUnpack_496_Squeeze}, {{"axis", -1}}); - auto slice_Slice_449 = makeOP({sin_cache, ScatterUpdate_261437, {0ll, LLONG_MAX}, {1, 1}}, - {{"begin_mask", {1, 0}}, - {"end_mask", {1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto slice_Slice_455 = - makeOP({slice_Slice_449, {0, 0, 0}, {0ll, 0ll, LLONG_MAX}, {1, 1, 1}}, - {{"begin_mask", {1, 1, 0}}, - {"end_mask", {1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto slice_Slice_461 = - makeOP({slice_Slice_455, {0, 0, 0, 0}, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto mul_Multiply_503 = makeOP({cat_Concat, slice_Slice_461}, {{"auto_broadcast", "numpy"}}); - auto add_Add = makeOP({mul_Multiply, mul_Multiply_503}, {{"auto_broadcast", "numpy"}}); - return std::make_shared(ov::NodeVector{add_Add}, ov::ParameterVector{input, cos_cache, sin_cache}); - } - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_GPU; - const int batch = 2; - const int seq_length = 7; - InputShape inpShape = {{batch, -1, 4096 + 4096 + 4096}, {{batch, seq_length, 4096 + 4096 + 4096}}}; - init_input_shapes({inpShape}); - function = buildROPE_QWen7b(); - } -}; - -TEST_F(RoPEGPUTestQwen7b, smoke_CompareWithRefs) { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - run(); - std::shared_ptr function = compiledModel.get_runtime_model(); - CheckNumberOfNodesWithType(function, {"RoPE"}, 1); -} - -class RoPEGPUTestGPTJ : public SubgraphBaseTest, public testing::WithParamInterface { -public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - bool hasShapeOf; - hasShapeOf = obj.param; - std::ostringstream result; - result << "hasShapeOf=" << hasShapeOf << std::endl; - return result.str(); - } - void generate_inputs(const std::vector& targetInputStaticShapes) override { - const auto& funcInputs = function->inputs(); - - auto& input_shape = targetInputStaticShapes[0]; - auto& sincos_shape = targetInputStaticShapes[1]; - ov::Tensor t_input = - utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); - ov::Tensor t_cos_sin_cache = - utils::create_and_fill_tensor(funcInputs[1].get_element_type(), sincos_shape, 2, -1.0f, 32768); - - inputs.clear(); - inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); - inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_sin_cache}); - } - -protected: - std::shared_ptr buildROPE_GPTJ(const int num_head, - const int hidden_dims, - const int rotary_dims, - bool hasShapeOf) { - auto int32_max = std::numeric_limits::max(); - auto input = - std::make_shared(ov::element::f32, PartialShape{-1, -1, num_head, hidden_dims}); - auto sincos = std::make_shared(ov::element::f32, PartialShape{-1, -1, rotary_dims}); - - auto slice_Slice_965 = - makeOP({input, {0, 0, 0, 0}, {0, 0, 0, rotary_dims}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - slice_Slice_965->set_friendly_name("slice_Slice_965"); - - auto varsplit = makeOP({sincos, -1, {rotary_dims / 2, -1}}); - varsplit->set_output_size(2); - varsplit->set_friendly_name("varsplit"); - auto unsqueeze_sin = makeOP({varsplit->output(0), 2}); - auto unsqueeze_cos = makeOP({varsplit->output(1), 2}); - std::vector gather_idx(rotary_dims, 1); - int32_t v = 0; - for (size_t i = 0; i < gather_idx.size(); i += 2, v++) { - gather_idx[i] = v; - gather_idx[i + 1] = v; - } - - auto const_idx = makeConst(ov::element::i32, ov::Shape({static_cast(rotary_dims)}), gather_idx); - auto constant_155588 = makeConst(element::f32, - ov::Shape({ - 1, - 1, - 1, - 1, - }), - {-1.000000f}); - auto repeat_interleave_sin = makeOP({unsqueeze_sin, const_idx, 3}, {{"batch_dims", 0}}); - auto repeat_interleave_cos = makeOP({unsqueeze_cos, const_idx, 3}, {{"batch_dims", 0}}); - repeat_interleave_sin->set_friendly_name("repeat_interleave_sin"); - repeat_interleave_cos->set_friendly_name("repeat_interleave_cos"); - // x interleave (-x[:,:,:, 1::2], x[:,:,:, 0::2]) - auto slice_Slice_1174 = - makeOP({slice_Slice_965, {0, 0, 0, 1}, {0, 0, 0, int32_max}, {1, 1, 1, 2}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto neg_Multiply_1177 = - makeOP({slice_Slice_1174, constant_155588}, {{"auto_broadcast", "numpy"}}); - auto Unsqueeze_65524 = makeOP({neg_Multiply_1177, -1}); - - auto slice_Slice_1168 = - makeOP({slice_Slice_965, {0, 0, 0, 0}, {0, 0, 0, int32_max}, {1, 1, 1, 2}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto Unsqueeze_65525 = makeOP({slice_Slice_1168, -1}); - auto stack_1182 = makeOP({Unsqueeze_65524, Unsqueeze_65525}, {{"axis", -1}}); - auto flatten_Reshape_1198 = - makeOP({stack_1182, {0, 0, num_head, rotary_dims}}, {{"special_zero", true}}); - // x*cos [B,L,H,ndims] - auto mul_cos = - makeOP({slice_Slice_965, repeat_interleave_cos}, {{"auto_broadcast", "numpy"}}); - mul_cos->set_friendly_name("mul_cos"); - auto mul_sin = - makeOP({flatten_Reshape_1198, repeat_interleave_sin}, {{"auto_broadcast", "numpy"}}); - // *cos + *sin - auto rotary_emb = makeOP({mul_cos, mul_sin}, {{"auto_broadcast", "numpy"}}); - - auto slice_Slice_971 = - makeOP({input, {0, 0, 0, rotary_dims}, {0, 0, 0, int32_max}, {1, 1, 1, 1}}, - {{"begin_mask", {1, 1, 1, 0}}, - {"end_mask", {1, 1, 1, 0}}, - {"new_axis_mask", {}}, - {"shrink_axis_mask", {}}, - {"ellipsis_mask", {}}}); - auto cat_Concat_1211 = makeOP({rotary_emb, slice_Slice_971}, {{"axis", -1}}); - auto permute_Transpose_1213 = makeOP({cat_Concat_1211, {0, 2, 1, 3}}); - ov::NodeVector model_output = {permute_Transpose_1213}; - if (hasShapeOf) { - auto shapeOf = makeOP({rotary_emb}, {{"output_type", "i32"}}); - auto gather = makeOP({shapeOf, {1}, 0}, {{"batch_dims", 0}}); - model_output.push_back(gather); - } - return std::make_shared(model_output, ov::ParameterVector{input, sincos}); - } - void SetUp() override { - targetDevice = ov::test::utils::DEVICE_GPU; - bool hasShapeOf = this->GetParam(); - const int batch = 2; - const int seq_length = 7; - const int num_head = 16; - const int hidden_dims = 256; - const int rotary_dims = 64; - - InputShape input = {{batch, seq_length, num_head, hidden_dims}, {{batch, seq_length, num_head, hidden_dims}}}; - InputShape sincos = {{batch, seq_length, rotary_dims}, {{batch, seq_length, rotary_dims}}}; - init_input_shapes({input, sincos}); - function = buildROPE_GPTJ(num_head, hidden_dims, rotary_dims, hasShapeOf); - } -}; - -TEST_P(RoPEGPUTestGPTJ, smoke_CompareWithRefs) { - GTEST_SKIP(); - run(); - std::shared_ptr function = compiledModel.get_runtime_model(); - CheckNumberOfNodesWithType(function, {"RoPE"}, 1); -} +INSTANTIATE_TEST_SUITE_P(smoke_RoPETestChatGLM, + RoPETestChatGLM, + ::testing::Values(ov::test::utils::DEVICE_GPU), + RoPETestChatGLM::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_RoPEGPUTestGPTJ, - RoPEGPUTestGPTJ, - ::testing::Values(true, false), - RoPEGPUTestGPTJ::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_RoPETestQwen7b, + RoPETestQwen7b, + ::testing::Combine(::testing::Values(true, false), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + RoPETestQwen7b::getTestCaseName); } // namespace test } // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/rotary_pos_emb.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/rotary_pos_emb.hpp new file mode 100644 index 00000000000000..9b0f8a401690bd --- /dev/null +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/rotary_pos_emb.hpp @@ -0,0 +1,52 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/subgraph/rotary_pos_emb.hpp" + +namespace ov { +namespace test { + +inline void CheckNumberOfNodesWithType(std::shared_ptr function, + const std::unordered_set& nodeTypes, + size_t expectedCount) { + ASSERT_NE(nullptr, function); + int num_ops = 0; + for (const auto& node : function->get_ordered_ops()) { + const auto& rt_info = node->get_rt_info(); + const auto layer_type = rt_info.find("layerType")->second.as(); + if (nodeTypes.count(layer_type)) { + num_ops++; + } + } + ASSERT_EQ(num_ops, expectedCount); +} + +TEST_P(RoPETestLlama2, CompareWithRefs) { + run(); + auto function = compiledModel.get_runtime_model(); + CheckNumberOfNodesWithType(function, {"RoPE"}, 1); +}; + +TEST_P(RoPETestChatGLM, CompareWithRefs) { + run(); + auto function = compiledModel.get_runtime_model(); + CheckNumberOfNodesWithType(function, {"RoPE"}, 1); +}; + +TEST_P(RoPETestQwen7b, CompareWithRefs) { + run(); + auto function = compiledModel.get_runtime_model(); + CheckNumberOfNodesWithType(function, {"RoPE"}, 1); +}; + +TEST_P(RoPETestGPTJ, CompareWithRefs) { + run(); + auto function = compiledModel.get_runtime_model(); + CheckNumberOfNodesWithType(function, {"RoPE"}, 1); +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/rotary_pos_emb.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/rotary_pos_emb.hpp new file mode 100644 index 00000000000000..c18b57062f0295 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/rotary_pos_emb.hpp @@ -0,0 +1,67 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +class RoPETestLlama2 : public SubgraphBaseTest, public testing::WithParamInterface { +private: + ov::OutputVector makeCosSinCache(int max_position_embeddings, int rotary_ndims); + std::shared_ptr buildROPE_Llama2(int batch, + int seq_length, + int max_position_embeddings, + int num_head, + int ndims); + ov::Tensor create_i32_tensor(const ov::Shape& shape, int start, int step = 1); +protected: + void generate_inputs(const std::vector& targetInputStaticShapes) override; + void SetUp() override; + +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); +}; + +class RoPETestChatGLM : public SubgraphBaseTest, public testing::WithParamInterface { +private: + std::shared_ptr buildROPE_ChatGLM(int batch, int head_cnt, int rotary_dims); + ov::Tensor create_i32_tensor(const ov::Shape& shape, int start, int step = 1); +protected: + void generate_inputs(const std::vector& targetInputStaticShapes) override; + void SetUp() override; + +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); +}; + +class RoPETestQwen7b : public SubgraphBaseTest, public testing::WithParamInterface> { +private: + std::shared_ptr buildROPE_QWen7b(bool specialReshape); +protected: + void generate_inputs(const std::vector& targetInputStaticShapes) override; + void SetUp() override; + +public: + static std::string getTestCaseName(const testing::TestParamInfo>& obj); +}; + +class RoPETestGPTJ : public SubgraphBaseTest, public testing::WithParamInterface> { +private: + std::shared_ptr buildROPE_GPTJ(int num_head, + int hidden_dims, + int rotary_dims, + bool hasShapeOf); +protected: + void generate_inputs(const std::vector& targetInputStaticShapes) override; + void SetUp() override; + +public: + static std::string getTestCaseName(const testing::TestParamInfo>& obj); +}; + +} // namespace test +} // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/rotary_pos_emb.cpp b/src/tests/functional/shared_test_classes/src/subgraph/rotary_pos_emb.cpp new file mode 100644 index 00000000000000..829c0a0562ad36 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/subgraph/rotary_pos_emb.cpp @@ -0,0 +1,590 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "shared_test_classes/subgraph/rotary_pos_emb.hpp" +#include "transformations/utils/gen_pattern.hpp" + +using namespace ov::gen_pattern; +using namespace ov; + +namespace ov { +namespace test { + +ov::OutputVector RoPETestLlama2::makeCosSinCache(int max_position_embeddings, int rotary_ndims) { + std::vector lut_sin(max_position_embeddings * rotary_ndims, 0.0f); + std::vector lut_cos(max_position_embeddings * rotary_ndims, 0.0f); + + // rotate_half style cos/sin table: + // y1 = cos(m*xita_i) * x1 - sin(m*xita_i) * x2 + // y2 = cos(m*xita_i) * x2 + sin(m*xita_i) * x1 + // + for (int i = 0, k = 0; i < rotary_ndims; i += 2, k++) { + auto xita_i = 1.0 / std::pow(10000.0, static_cast(i) / rotary_ndims); + float* psin = lut_sin.data(); + float* pcos = lut_cos.data(); + for (int m = 0; m < max_position_embeddings; m++, psin += rotary_ndims, pcos += rotary_ndims) { + auto vsin = std::sin(xita_i * m); + auto vcos = std::cos(xita_i * m); + pcos[k] = pcos[k + rotary_ndims / 2] = vcos; + psin[k] = psin[k + rotary_ndims / 2] = vsin; + } + } + auto shape = ov::Shape({1, 1, static_cast(max_position_embeddings), static_cast(rotary_ndims)}); + auto Cos = makeConst(ov::element::f32, shape, lut_cos); + auto Sin = makeConst(ov::element::f32, shape, lut_sin); + return {Cos, Sin}; +} + +std::shared_ptr RoPETestLlama2::buildROPE_Llama2(int batch, + int seq_length, + int max_position_embeddings, + int num_head, + int ndims) { + auto input = std::make_shared(ov::element::f32, PartialShape{batch, -1, num_head, ndims}); + auto pos_id_end = std::make_shared(ov::element::i32, ov::Shape{}); + auto pos_ids = std::make_shared(ov::element::i32, PartialShape{1, -1}); + + auto cos_sin_cache = makeCosSinCache(max_position_embeddings, ndims); + auto Constant582 = cos_sin_cache[0]; + auto Constant585 = cos_sin_cache[1]; + + // concat KV length + auto transpose_Transpose = makeOP({input, {0, 2, 1, 3}}); + auto slice_Unsqueeze_426 = makeOP({pos_id_end, 0}); + auto ScatterUpdate_152236 = makeOP({{0, 0, 0}, {2}, slice_Unsqueeze_426, {0}}); + auto slice_Slice = makeOP({Constant582, {0, 0, 0}, ScatterUpdate_152236, {1, 1, 1}}, + {{"begin_mask", {1, 1, 0}}, + {"end_mask", {1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto squeeze_Squeeze = makeOP({slice_Slice, 1}); + auto squeeze_Squeeze_435 = makeOP({squeeze_Squeeze, 0}); + auto index_441_Gather = makeOP({squeeze_Squeeze_435, pos_ids, 0}, {{"batch_dims", 0}}); + auto unsqueeze_Unsqueeze = makeOP({index_441_Gather, 1}); + auto mul_Multiply = + makeOP({transpose_Transpose, unsqueeze_Unsqueeze}, {{"auto_broadcast", "numpy"}}); + auto size_ShapeOf_448 = makeOP({transpose_Transpose}, {{"output_type", "i32"}}); + auto size_Gather_450 = makeOP({size_ShapeOf_448, 3, 0}, {{"batch_dims", 0}}); + auto floor_divide_Divide = + makeOP({size_Gather_450, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); + auto floor_divide_Floor = makeOP({floor_divide_Divide}); + auto slice_Unsqueeze_452 = makeOP({floor_divide_Floor, 0}); + auto ScatterUpdate_152312 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_452, {0}}); + auto slice_Slice_459 = makeOP( + {transpose_Transpose, ScatterUpdate_152312, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto Constant_182988 = makeConst(element::f32, + ov::Shape({ + 1, + 1, + 1, + 1, + }), + {-1.000000f}); + auto neg_Multiply = makeOP({slice_Slice_459, Constant_182988}, {{"auto_broadcast", "numpy"}}); + auto ScatterUpdate_152368 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_452, {0}}); + auto slice_Slice2 = + makeOP({transpose_Transpose, {0, 0, 0, 0}, ScatterUpdate_152368, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto cat_Concat = makeOP({neg_Multiply, slice_Slice2}, {{"axis", -1}}); + auto ScatterUpdate_152421 = makeOP({{0, 0, 0}, {2}, slice_Unsqueeze_426, {0}}); + auto slice_Slice_433 = makeOP({Constant585, {0, 0, 0}, ScatterUpdate_152421, {1, 1, 1}}, + {{"begin_mask", {1, 1, 0}}, + {"end_mask", {1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto squeeze_Squeeze_436 = makeOP({slice_Slice_433, 1}); + auto squeeze_Squeeze_437 = makeOP({squeeze_Squeeze_436, 0}); + auto index_446_Gather = makeOP({squeeze_Squeeze_437, pos_ids, 0}, {{"batch_dims", 0}}); + auto unsqueeze_Unsqueeze_447 = makeOP({index_446_Gather, 1}); + auto mul_Multiply_463 = + makeOP({cat_Concat, unsqueeze_Unsqueeze_447}, {{"auto_broadcast", "numpy"}}); + auto add_Add = makeOP({mul_Multiply, mul_Multiply_463}, {{"auto_broadcast", "numpy"}}); + + return std::make_shared(ov::NodeVector{add_Add}, ov::ParameterVector{input, pos_id_end, pos_ids}); +} + +ov::Tensor RoPETestLlama2::create_i32_tensor(const ov::Shape& shape, int start, int step) { + auto tensor = ov::Tensor(ov::element::i32, shape); + auto* ptr = static_cast(tensor.data()); + for (size_t i = 0; i < tensor.get_size(); i++) { + ptr[i] = start; + start += step; + } + return tensor; +} + +void RoPETestLlama2::generate_inputs(const std::vector& targetInputStaticShapes) { + const auto& funcInputs = function->inputs(); + + const int position_id_start = 15; + auto& input_shape = targetInputStaticShapes[0]; + auto seq_length = input_shape[1]; + + ov::test::utils::InputGenerateData in_data; + in_data.start_from = -1; + in_data.range = 2; + in_data.resolution = 32768; + ov::Tensor t_input = utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, in_data); + ov::Tensor t_position_id_end = create_i32_tensor(ov::Shape({}), position_id_start + seq_length); + ov::Tensor t_position_ids = create_i32_tensor(ov::Shape({1, seq_length}), position_id_start); + + inputs.clear(); + inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); + inputs.insert({funcInputs[1].get_node_shared_ptr(), t_position_id_end}); + inputs.insert({funcInputs[2].get_node_shared_ptr(), t_position_ids}); +} + +void RoPETestLlama2::SetUp() { + targetDevice = this->GetParam(); + + const int batch = 2; + const int seq_length = 7; + const size_t max_position_embeddings = 2048; + const size_t ndims = 128; + const size_t num_head = 32; + + InputShape inpShape = {{batch, seq_length, num_head, ndims}, {{batch, seq_length, num_head, ndims}}}; + init_input_shapes({inpShape}); + function = buildROPE_Llama2(batch, seq_length, max_position_embeddings, num_head, ndims); +} + +std::string RoPETestLlama2::getTestCaseName(const testing::TestParamInfo& obj) { + std::string targetDevice = obj.param; + std::ostringstream result; + result << "targetDevice=" << targetDevice << std::endl; + return result.str(); +} + +std::shared_ptr RoPETestChatGLM::buildROPE_ChatGLM(int batch, int head_cnt, int rotary_dims) { + auto input = + std::make_shared(ov::element::f32, PartialShape{-1, batch, 4096 + 256 + 256}); + auto cos_sin_cache = std::make_shared(ov::element::f32, PartialShape{32768, 32, 2}); + auto position_ids = std::make_shared(ov::element::i32, PartialShape{-1, -1}); + + auto __module_transformer_index_67_Gather = + makeOP({cos_sin_cache, position_ids, 0}, {{"batch_dims", 0}}); + auto __module_transformer_transpose_Transpose = + makeOP({__module_transformer_index_67_Gather, {1, 0, 2, 3}}); + auto size_ShapeOf_110 = + makeOP({__module_transformer_transpose_Transpose}, {{"output_type", "i32"}}); + auto __getitem___Gather = makeOP({size_ShapeOf_110, -2, 0}, {{"batch_dims", 0}}); + auto mul_Multiply = makeOP({__getitem___Gather, 2}, {{"auto_broadcast", "numpy"}}); + auto slice_Unsqueeze_112 = makeOP({mul_Multiply, 0}); + + auto floordiv_Divide = + makeOP({mul_Multiply, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); + auto floordiv_Floor = makeOP({floordiv_Divide}); + auto ListConstruct_126_Reshape_2 = makeOP({floordiv_Floor, {-1}}, {{"special_zero", false}}); + + auto ListUnpack_321 = makeOP({input, -1, {4096, 256, 256}}); + auto view_Reshape = + makeOP({ListUnpack_321->output(0), {0, 0, 32, 128}}, {{"special_zero", true}}); + + auto ScatterUpdate_229053 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_112, {0}}); + auto slice_Slice_357 = + makeOP({view_Reshape, {0, 0, 0, 0}, ScatterUpdate_229053, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto size_ShapeOf_346 = makeOP({view_Reshape}, {{"output_type", "i32"}}); + auto size_Gather_348 = makeOP({size_ShapeOf_346, 0, 0}, {{"batch_dims", 0}}); + auto ListConstruct_372_Reshape = makeOP({size_Gather_348, {-1}}, {{"special_zero", false}}); + auto size_Gather_351 = makeOP({size_ShapeOf_346, {2}, 0}, {{"batch_dims", 0}}); + auto ListConstruct_372_Concat = + makeOP({ListConstruct_372_Reshape, {-1}, size_Gather_351, ListConstruct_126_Reshape_2, {2}}, + {{"axis", 0}}); + auto reshape_Reshape_373 = + makeOP({slice_Slice_357, ListConstruct_372_Concat}, {{"special_zero", false}}); + auto select_Gather_381 = makeOP({reshape_Reshape_373, 0, -1}, {{"batch_dims", 0}}); + auto slice_Unsqueeze_367 = makeOP({size_Gather_348, 0}); + auto slice_Slice_369 = + makeOP({__module_transformer_transpose_Transpose, {0}, slice_Unsqueeze_367, {1}}, + {{"begin_mask", {0}}, + {"end_mask", {0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto size_ShapeOf_374 = makeOP({reshape_Reshape_373}, {{"output_type", "i32"}}); + auto size_Gather_376 = makeOP({size_ShapeOf_374, {3}, 0}, {{"batch_dims", 0}}); + auto ListConstruct_379_Concat = + makeOP({ListConstruct_372_Reshape, {-1}, {1}, size_Gather_376, {2}}, {{"axis", 0}}); + auto view_Reshape_380 = + makeOP({slice_Slice_369, ListConstruct_379_Concat}, {{"special_zero", false}}); + auto select_Gather_382 = makeOP({view_Reshape_380, 0, -1}, {{"batch_dims", 0}}); + auto mul_Multiply_383 = + makeOP({select_Gather_381, select_Gather_382}, {{"auto_broadcast", "numpy"}}); + auto select_Gather_384 = makeOP({reshape_Reshape_373, 1, -1}, {{"batch_dims", 0}}); + auto select_Gather_385 = makeOP({view_Reshape_380, 1, -1}, {{"batch_dims", 0}}); + auto mul_Multiply_386 = + makeOP({select_Gather_384, select_Gather_385}, {{"auto_broadcast", "numpy"}}); + auto sub_Subtract_389 = + makeOP({mul_Multiply_383, mul_Multiply_386}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze_62716 = makeOP({sub_Subtract_389, -1}); + auto mul_Multiply_391 = + makeOP({select_Gather_384, select_Gather_382}, {{"auto_broadcast", "numpy"}}); + auto mul_Multiply_393 = + makeOP({select_Gather_381, select_Gather_385}, {{"auto_broadcast", "numpy"}}); + auto add_Add_396 = makeOP({mul_Multiply_391, mul_Multiply_393}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze_62717 = makeOP({add_Add_396, -1}); + auto stack_401 = makeOP({Unsqueeze_62716, Unsqueeze_62717}, {{"axis", -1}}); + auto flatten_ShapeOf_402 = makeOP({stack_401}, {{"output_type", "i32"}}); + auto flatten_Slice_417 = makeOP({flatten_ShapeOf_402, {0}, {3}, {1}}, + {{"begin_mask", {0}}, + {"end_mask", {0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto flatten_Concat_420 = makeOP({flatten_Slice_417, {-1}}, {{"axis", 0}}); + auto flatten_Reshape_421 = makeOP({stack_401, flatten_Concat_420}, {{"special_zero", true}}); + auto ScatterUpdate_229067 = makeOP({{0, 0, 0, 0}, {3}, slice_Unsqueeze_112, {0}}); + auto slice_Slice_363 = + makeOP({view_Reshape, ScatterUpdate_229067, {0, 0, 0, INT_MAX}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto cat_Concat_425 = makeOP({flatten_Reshape_421, slice_Slice_363}, {{"axis", -1}}); + return std::make_shared(ov::NodeVector{cat_Concat_425}, + ov::ParameterVector{input, cos_sin_cache, position_ids}); +} + +ov::Tensor RoPETestChatGLM::create_i32_tensor(const ov::Shape& shape, int start, int step) { + auto tensor = ov::Tensor(ov::element::i32, shape); + auto* ptr = static_cast(tensor.data()); + for (size_t i = 0; i < tensor.get_size(); i++) { + ptr[i] = start; + start += step; + } + return tensor; +} + +void RoPETestChatGLM::generate_inputs(const std::vector& targetInputStaticShapes) { + const auto& funcInputs = function->inputs(); + + auto& input_shape = targetInputStaticShapes[0]; + auto seq_length = input_shape[0]; + + ov::Tensor t_input = + utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); + ov::Tensor t_cos_sin_cache = + utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {32768, 32, 2}, 2, -1.0f, 32768); + ov::Tensor t_position_ids = create_i32_tensor(ov::Shape({1, seq_length}), 15); + + inputs.clear(); + inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); + inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_sin_cache}); + inputs.insert({funcInputs[2].get_node_shared_ptr(), t_position_ids}); +} + +void RoPETestChatGLM::SetUp() { + targetDevice = this->GetParam(); + + const int batch = 2; + const int seq_length = 7; + const int num_head = 32; + const int rotary_dims = 64; + + InputShape inpShape = {{-1, batch, 4096 + 256 + 256}, {{seq_length, batch, 4096 + 256 + 256}}}; + init_input_shapes({inpShape}); + function = buildROPE_ChatGLM(batch, num_head, rotary_dims); +} + +std::string RoPETestChatGLM::getTestCaseName(const testing::TestParamInfo& obj) { + std::string targetDevice = obj.param; + std::ostringstream result; + result << "targetDevice=" << targetDevice << std::endl; + return result.str(); +} + +std::shared_ptr RoPETestQwen7b::buildROPE_QWen7b(bool specialReshape) { + auto input = + std::make_shared(ov::element::f32, PartialShape{-1, -1, 4096 + 4096 + 4096}); + auto cos_cache = std::make_shared(ov::element::f32, PartialShape{1, -1, 1, 128}); + auto sin_cache = std::make_shared(ov::element::f32, PartialShape{1, -1, 1, 128}); + + auto ListUnpack_389_VariadicSplit = makeOP({input, 2, {4096, 4096, -1}}); + auto view_Reshape = makeOP({ListUnpack_389_VariadicSplit->output(0), {0, 0, 32, 128}}, + {{"special_zero", true}}); + auto size_ShapeOf_414 = makeOP({view_Reshape}, {{"output_type", "i32"}}); + auto size_Gather_416 = makeOP({size_ShapeOf_414, 1, 0}, {{"batch_dims", 0}}); + auto neg_Multiply = makeOP({size_Gather_416, -1}, {{"auto_broadcast", "numpy"}}); + auto slice_Unsqueeze_422 = makeOP({neg_Multiply, 0}); + auto ScatterUpdate_261437 = makeOP({{0, 0}, {1}, slice_Unsqueeze_422, {0}}); + auto slice_Slice_425 = makeOP({cos_cache, ScatterUpdate_261437, {0ll, LLONG_MAX}, {1, 1}}, + {{"begin_mask", {1, 0}}, + {"end_mask", {1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto slice_Slice_431 = + makeOP({slice_Slice_425, {0, 0, 0}, {0ll, 0ll, LLONG_MAX}, {1, 1, 1}}, + {{"begin_mask", {1, 1, 0}}, + {"end_mask", {1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto slice_Slice_437 = + makeOP({slice_Slice_431, {0, 0, 0, 0}, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto size_ShapeOf_462 = makeOP({slice_Slice_437}, {{"output_type", "i32"}}); + auto size_Gather_464 = makeOP({size_ShapeOf_462, {3}, 0}, {{"batch_dims", 0}}); + auto ScatterUpdate_261533 = makeOP({{0, 0, 0, 0}, {3}, size_Gather_464, {0}}); + auto slice_Slice_470 = + makeOP({view_Reshape, {0, 0, 0, 0}, ScatterUpdate_261533, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto mul_Multiply = makeOP({slice_Slice_470, slice_Slice_437}, {{"auto_broadcast", "numpy"}}); + auto size_ShapeOf_478 = makeOP({slice_Slice_470}, {{"output_type", "i32"}}); + auto Gather_239390 = makeOP({size_ShapeOf_478, {0, 1, 2}, 0}, {{"batch_dims", 0}}); + auto size_Gather_489 = makeOP({size_ShapeOf_478, 3, 0}, {{"batch_dims", 0}}); + auto floor_divide_Divide = + makeOP({size_Gather_489, 2}, {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); + auto floor_divide_Floor = makeOP({floor_divide_Divide}); + auto ListConstruct_493_Reshape_3 = + makeOP({floor_divide_Floor, {-1}}, {{"special_zero", false}}); + auto ListConstruct_493_Concat = + makeOP({Gather_239390, {2}, ListConstruct_493_Reshape_3}, {{"axis", 0}}); + std::shared_ptr reshape_Reshape = nullptr; + if (specialReshape) { + reshape_Reshape = makeOP({slice_Slice_470, {0, 0, 32, 2, 64}}, {{"special_zero", true}}); + } else { + reshape_Reshape = + makeOP({slice_Slice_470, ListConstruct_493_Concat}, {{"special_zero", false}}); + } + auto ListUnpack_496_Split = makeOP({reshape_Reshape, -2}, {{"num_splits", 2}}); + auto ListUnpack_496_Squeeze_0 = makeOP({ListUnpack_496_Split->output(1), -2}); + auto Constant_296840_compressed = makeConst(element::f16, + ov::Shape({ + 1, + 1, + 1, + 1, + }), + {-1}); + auto Constant_296840 = makeOP({Constant_296840_compressed}, {{"destination_type", "f32"}}); + auto neg_Multiply_499 = + makeOP({ListUnpack_496_Squeeze_0, Constant_296840}, {{"auto_broadcast", "numpy"}}); + auto ListUnpack_496_Squeeze = makeOP({ListUnpack_496_Split->output(0), -2}); + auto cat_Concat = makeOP({neg_Multiply_499, ListUnpack_496_Squeeze}, {{"axis", -1}}); + auto slice_Slice_449 = makeOP({sin_cache, ScatterUpdate_261437, {0ll, LLONG_MAX}, {1, 1}}, + {{"begin_mask", {1, 0}}, + {"end_mask", {1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto slice_Slice_455 = + makeOP({slice_Slice_449, {0, 0, 0}, {0ll, 0ll, LLONG_MAX}, {1, 1, 1}}, + {{"begin_mask", {1, 1, 0}}, + {"end_mask", {1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto slice_Slice_461 = + makeOP({slice_Slice_455, {0, 0, 0, 0}, {0ll, 0ll, 0ll, LLONG_MAX}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto mul_Multiply_503 = makeOP({cat_Concat, slice_Slice_461}, {{"auto_broadcast", "numpy"}}); + auto add_Add = makeOP({mul_Multiply, mul_Multiply_503}, {{"auto_broadcast", "numpy"}}); + return std::make_shared(ov::NodeVector{add_Add}, ov::ParameterVector{input, cos_cache, sin_cache}); +} + +void RoPETestQwen7b::generate_inputs(const std::vector& targetInputStaticShapes) { + const auto& funcInputs = function->inputs(); + + auto& input_shape = targetInputStaticShapes[0]; + + ov::Tensor t_input = + utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); + ov::Tensor t_cos_cache = + utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {1, 4096, 1, 128}, 2, -1.0f, 32768); + ov::Tensor t_sin_cache = + utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {1, 4096, 1, 128}, 2, -1.0f, 32768); + + inputs.clear(); + inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); + inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_cache}); + inputs.insert({funcInputs[2].get_node_shared_ptr(), t_sin_cache}); +} + +void RoPETestQwen7b::SetUp() { + bool specialReshape; + std::tie(specialReshape, targetDevice) = this->GetParam(); + const int batch = 2; + const int seq_length = 7; + InputShape inpShape = {{batch, -1, 4096 + 4096 + 4096}, {{batch, seq_length, 4096 + 4096 + 4096}}}; + init_input_shapes({inpShape}); + function = buildROPE_QWen7b(specialReshape); +} + +std::string RoPETestQwen7b::getTestCaseName(const testing::TestParamInfo>& obj) { + bool specialReshape; + std::string targetDevice; + std::tie(specialReshape, targetDevice) = obj.param; + std::ostringstream result; + result << "specialReshape=" << specialReshape << "_" + << "targetDevice=" << targetDevice << std::endl; + return result.str(); +} + +std::shared_ptr RoPETestGPTJ::buildROPE_GPTJ(int num_head, + int hidden_dims, + int rotary_dims, + bool hasShapeOf) { + auto int32_max = std::numeric_limits::max(); + auto input = + std::make_shared(ov::element::f32, PartialShape{-1, -1, num_head, hidden_dims}); + auto sincos = std::make_shared(ov::element::f32, PartialShape{-1, -1, rotary_dims}); + + auto slice_Slice_965 = + makeOP({input, {0, 0, 0, 0}, {0, 0, 0, rotary_dims}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + slice_Slice_965->set_friendly_name("slice_Slice_965"); + + auto varsplit = makeOP({sincos, -1, {rotary_dims / 2, -1}}); + varsplit->set_output_size(2); + varsplit->set_friendly_name("varsplit"); + auto unsqueeze_sin = makeOP({varsplit->output(0), 2}); + auto unsqueeze_cos = makeOP({varsplit->output(1), 2}); + std::vector gather_idx(rotary_dims, 1); + int32_t v = 0; + for (size_t i = 0; i < gather_idx.size(); i += 2, v++) { + gather_idx[i] = v; + gather_idx[i + 1] = v; + } + + auto const_idx = makeConst(ov::element::i32, ov::Shape({static_cast(rotary_dims)}), gather_idx); + auto constant_155588 = makeConst(element::f32, + ov::Shape({ + 1, + 1, + 1, + 1, + }), + {-1.000000f}); + auto repeat_interleave_sin = makeOP({unsqueeze_sin, const_idx, 3}, {{"batch_dims", 0}}); + auto repeat_interleave_cos = makeOP({unsqueeze_cos, const_idx, 3}, {{"batch_dims", 0}}); + repeat_interleave_sin->set_friendly_name("repeat_interleave_sin"); + repeat_interleave_cos->set_friendly_name("repeat_interleave_cos"); + // x interleave (-x[:,:,:, 1::2], x[:,:,:, 0::2]) + auto slice_Slice_1174 = + makeOP({slice_Slice_965, {0, 0, 0, 1}, {0, 0, 0, int32_max}, {1, 1, 1, 2}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto neg_Multiply_1177 = + makeOP({slice_Slice_1174, constant_155588}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze_65524 = makeOP({neg_Multiply_1177, -1}); + + auto slice_Slice_1168 = + makeOP({slice_Slice_965, {0, 0, 0, 0}, {0, 0, 0, int32_max}, {1, 1, 1, 2}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto Unsqueeze_65525 = makeOP({slice_Slice_1168, -1}); + auto stack_1182 = makeOP({Unsqueeze_65524, Unsqueeze_65525}, {{"axis", -1}}); + auto flatten_Reshape_1198 = + makeOP({stack_1182, {0, 0, num_head, rotary_dims}}, {{"special_zero", true}}); + // x*cos [B,L,H,ndims] + auto mul_cos = + makeOP({slice_Slice_965, repeat_interleave_cos}, {{"auto_broadcast", "numpy"}}); + mul_cos->set_friendly_name("mul_cos"); + auto mul_sin = + makeOP({flatten_Reshape_1198, repeat_interleave_sin}, {{"auto_broadcast", "numpy"}}); + // *cos + *sin + auto rotary_emb = makeOP({mul_cos, mul_sin}, {{"auto_broadcast", "numpy"}}); + + auto slice_Slice_971 = + makeOP({input, {0, 0, 0, rotary_dims}, {0, 0, 0, int32_max}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto cat_Concat_1211 = makeOP({rotary_emb, slice_Slice_971}, {{"axis", -1}}); + auto permute_Transpose_1213 = makeOP({cat_Concat_1211, {0, 2, 1, 3}}); + ov::NodeVector model_output = {permute_Transpose_1213}; + if (hasShapeOf) { + auto shapeOf = makeOP({rotary_emb}, {{"output_type", "i32"}}); + auto gather = makeOP({shapeOf, {1}, 0}, {{"batch_dims", 0}}); + model_output.push_back(gather); + } + return std::make_shared(model_output, ov::ParameterVector{input, sincos}); +} + +void RoPETestGPTJ::generate_inputs(const std::vector& targetInputStaticShapes) { + const auto& funcInputs = function->inputs(); + + auto& input_shape = targetInputStaticShapes[0]; + auto& sincos_shape = targetInputStaticShapes[1]; + ov::Tensor t_input = + utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); + ov::Tensor t_cos_sin_cache = + utils::create_and_fill_tensor(funcInputs[1].get_element_type(), sincos_shape, 2, -1.0f, 32768); + + inputs.clear(); + inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); + inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_sin_cache}); +} + +std::string RoPETestGPTJ::getTestCaseName(const testing::TestParamInfo>& obj) { + bool hasShapeOf; + std::string targetDevice; + std::tie(hasShapeOf, targetDevice) = obj.param; + std::ostringstream result; + result << "hasShapeOf=" << hasShapeOf << "_" + << "targetDevice=" << targetDevice << std::endl; + return result.str(); +} + +void RoPETestGPTJ::SetUp() { + bool hasShapeOf; + std::tie(hasShapeOf, targetDevice) = this->GetParam(); + + const int batch = 2; + const int seq_length = 7; + const int num_head = 16; + const int hidden_dims = 256; + const int rotary_dims = 64; + + InputShape input = {{batch, seq_length, num_head, hidden_dims}, {{batch, seq_length, num_head, hidden_dims}}}; + InputShape sincos = {{batch, seq_length, rotary_dims}, {{batch, seq_length, rotary_dims}}}; + init_input_shapes({input, sincos}); + function = buildROPE_GPTJ(num_head, hidden_dims, rotary_dims, hasShapeOf); +} + +} // namespace test +} // namespace ov