Skip to content

Commit

Permalink
[GPU] rope optimization (#27907)
Browse files Browse the repository at this point in the history
### Details:
 - Optimize rope opencl kernel to improve its performance
- Test result shows it can improve RoPE performance about 50% in
average.

<html xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
xmlns="http://www.w3.org/TR/REC-html40">

<head>

<meta name=ProgId content=OneNote.File>
<meta name=Generator content="Microsoft OneNote 15">
</head>

<body lang=en-US style='font-family:Calibri;font-size:11.0pt'>
<!--StartFragment-->

<div style='direction:ltr'>


batch=128, seq_length = 7 | base latency(ns) | optimized latency(ns) |
latency decreased |   |  
-- | -- | -- | -- | -- | --
rope_ref_5266667119713786613_0_0__sa, | 921352 | 872395 | 5.31% |
RoPETestQwen7b | f32
rope_ref_2672092794364911740_0_0__sa, | 1724374 | 514790 | 70.15% |
RoPETestChatGLM | f32
rope_ref_8061762790816124098_0_0__sa, | 633019 | 127186 | 79.91% |
RoPETestQwen7b | f16
rope_ref_4392014836945391706_0_0__sa, | 629791 | 518749 | 17.63% |
RoPETestLlama2 | f32
rope_ref_13829176589243505378_0_0__sa, | 870312 | 259583 | 70.17% |
RoPETestChatGLM | f32
rope_ref_6813544162411765619_0_0__sa, | 749895 | 421875 | 43.74% |
RoPETestChatGLM | f16
rope_ref_15054358246334082928_0_0__sa, | 637708 | 45208 | 92.91% |
RoPETestFlux | f32
rope_ref_3898891400599565440_0_0__sa, | 378333 | 335937 | 11.21% |
RoPETestRotateHalfWithoutTranspose | f32
rope_ref_18119704851383556529_0_0__sa, | 371250 | 208645 | 43.80% |
RoPETestChatGLM | f16
rope_ref_17460680473512025171_0_0__sa, | 299166 | 98958 | 66.92% |
RoPETestFlux | f16



</div>

<!--EndFragment-->
</body>

</html>


![image](https://github.com/user-attachments/assets/4328b1a7-18ec-485f-abd0-b0fe16785854)


### Tickets:
 - *CVS-157438*
  • Loading branch information
riverlijunjie authored Dec 16, 2024
1 parent 357eb54 commit 9651768
Show file tree
Hide file tree
Showing 13 changed files with 760 additions and 379 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,51 +9,65 @@ namespace test {

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestLlama2StridedSlice,
RoPETestLlama2StridedSlice,
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::Combine(
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestLlama2StridedSlice::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestChatGLMStridedSlice,
RoPETestChatGLMStridedSlice,
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::Combine(
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestChatGLMStridedSlice::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestQwen7bStridedSlice,
RoPETestQwen7bStridedSlice,
::testing::Combine(::testing::Values(true, false),
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestQwen7bStridedSlice::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestGPTJStridedSlice,
RoPETestGPTJStridedSlice,
::testing::Combine(::testing::Values(true, false),
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestGPTJStridedSlice::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestLlama2Slice,
RoPETestLlama2Slice,
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::Combine(
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestLlama2Slice::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestChatGLMSlice,
RoPETestChatGLMSlice,
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::Combine(
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestChatGLMSlice::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestQwen7bSlice,
RoPETestQwen7bSlice,
::testing::Combine(::testing::Values(true, false),
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestQwen7bSlice::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestGPTJSlice,
RoPETestGPTJSlice,
::testing::Combine(::testing::Values(true, false),
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestGPTJSlice::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_RoPETestChatGLM,
RoPETestChatGLM2DRoPEStridedSlice,
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::Combine(
::testing::Values(ov::element::f32),
::testing::Values(ov::test::utils::DEVICE_CPU)),
RoPETestChatGLM2DRoPEStridedSlice::getTestCaseName);

} // namespace test
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/impls/ocl/rope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include "rope_inst.h"
#include "rope/rope_kernel_selector.h"
#include "rope/rope_kernel_ref.h"
#include "rope/rope_kernel_opt.h"

namespace cldnn {
namespace ocl {
Expand Down
Loading

0 comments on commit 9651768

Please sign in to comment.