[CPU]add cache precision check

Signed-off-by: Zhang Yi3 <[email protected]>
openvinotoolkit · Dec 11, 2024 · a34ce8b · a34ce8b
1 parent e56639a
commit a34ce8b
Showing 1 changed file with 8 additions and 0 deletions.
diff --git a/src/plugins/intel_cpu/src/nodes/paged_attn.cpp b/src/plugins/intel_cpu/src/nodes/paged_attn.cpp
@@ -196,6 +196,14 @@ void PagedAttention::execute(dnnl::stream strm) {
 
 bool PagedAttention::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
     try {
+        auto vCachePrecision = op->get_input_element_type(PagedAttentionExecutor::ID_VCACHE);
+        auto kCachePrecision = op->get_input_element_type(PagedAttentionExecutor::ID_KCACHE);
+        if (one_of(vCachePrecision, ov::element::i4, ov::element::u4, ov::element::u8)) {
+            if (kCachePrecision != ov::element::u8) {
+                errorMessage = "PageAttn key value cache compression doesn't support key cache prec " + kCachePrecision.to_string() + " value cache prec " + vCachePrecision.to_string();
+                return false;
+            }
+        }
         int orgInput = static_cast<int>(op->get_input_size());
         if (op->get_type_name() == std::string("PagedAttentionExtension") && orgInput == PagedAttentionExecutor::ID_SLIDING_WINDOW + 1) {
             return true;