Skip to content

Commit

Permalink
[CPU]add cache precision check
Browse files Browse the repository at this point in the history
Signed-off-by: Zhang Yi3 <[email protected]>
  • Loading branch information
zhangYiIntel committed Dec 11, 2024
1 parent e56639a commit a34ce8b
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/plugins/intel_cpu/src/nodes/paged_attn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ void PagedAttention::execute(dnnl::stream strm) {

bool PagedAttention::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
try {
auto vCachePrecision = op->get_input_element_type(PagedAttentionExecutor::ID_VCACHE);
auto kCachePrecision = op->get_input_element_type(PagedAttentionExecutor::ID_KCACHE);
if (one_of(vCachePrecision, ov::element::i4, ov::element::u4, ov::element::u8)) {
if (kCachePrecision != ov::element::u8) {
errorMessage = "PageAttn key value cache compression doesn't support key cache prec " + kCachePrecision.to_string() + " value cache prec " + vCachePrecision.to_string();
return false;
}
}
int orgInput = static_cast<int>(op->get_input_size());
if (op->get_type_name() == std::string("PagedAttentionExtension") && orgInput == PagedAttentionExecutor::ID_SLIDING_WINDOW + 1) {
return true;
Expand Down

0 comments on commit a34ce8b

Please sign in to comment.