diff --git a/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp index d2ef3f578407f8..e4c9e488708e18 100644 --- a/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp +++ b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp @@ -380,8 +380,7 @@ class Qwen7bChatPA { } static std::shared_ptr gen_Q(const std::shared_ptr& total_seq_len, - const std::shared_ptr& rope_Q, - std::shared_ptr& scale) { + const std::shared_ptr& rope_Q) { auto Constant_463 = makeConst(element::f32, {1, 32767, 1, 1}, MOCK_VALUE); auto ShapeOf_489 = makeOP({rope_Q}, {{"output_type", "i32"}}); auto Gather_492 = makeOP({ShapeOf_489, {1}, 0ll}, {{"batch_dims", 0}}); @@ -390,12 +389,6 @@ class Qwen7bChatPA { auto Multiply_631 = makeOP({rope_Q, Slice_496}, {numpy_broadcast}); auto Transpose_633 = makeOP({Multiply_631, {0, 2, 1, 3}}); - auto ShapeOf_1238 = makeOP({Transpose_633}, {{"output_type", "i64"}}); - auto Gather_1241 = makeOP({ShapeOf_1238, -1ll, 0ll}, {{"batch_dims", 0}}); - auto Convert_1242 = makeOP({Gather_1241}, {dest_type_f32}); - auto Sqrt_1243 = makeOP({Convert_1242}); - scale = makeOP({1.000000f, Sqrt_1243}, {numpy_broadcast, {"m_pythondiv", true}}); - auto Transpose_1223 = makeOP({Transpose_633, {0, 2, 1, 3}}); return makeOP({Transpose_1223, {0, -1}}, {special_zero_true}); } @@ -506,15 +499,16 @@ TEST_F(TransformationTestsF, SDPAToPA_Qwen) { auto total_seq_len = Qwen7bChatPA::gen_total_len(current_seq_len, past_seq_len); // Q, K, V: - shared_ptr scale; + shared_ptr head_size_2; - auto Q = Qwen7bChatPA::gen_Q(total_seq_len, rope_Q, scale); + auto Q = Qwen7bChatPA::gen_Q(total_seq_len, rope_Q); auto K = Qwen7bChatPA::gen_K(rope_K); auto V = Qwen7bChatPA::gen_V(qkv_proj, head_size_2); // Additional PA arguments: auto sliding_window = std::make_shared(element::i32, Shape{}, 0); auto alibi_slopes = std::make_shared(element::f32, Shape{0}); + auto scale = std::make_shared(element::f32, Shape{}, MOCK_VALUE); // PagedAttention: auto pa = std::make_shared(OutputVector{Q,