Merge branch 'master' into yi3/4bit-cache

Signed-off-by: [email protected] <[email protected]>
openvinotoolkit · Dec 12, 2024 · 5bc75f8 · 5bc75f8
2 parents fe6c311 + 45bf77b
commit 5bc75f8
Show file tree

Hide file tree

Showing 651 changed files with 30,642 additions and 23,530 deletions.
diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt
@@ -242,7 +242,8 @@ ov_add_plugin(NAME ${TARGET_NAME}
               DEVICE_NAME "CPU"
               AS_EXTENSION
               VERSION_DEFINES_FOR src/plugin.cpp
-              SOURCES ${SOURCES} ${HEADERS})
+              SOURCES ${SOURCES} ${HEADERS}
+              ADD_CLANG_FORMAT)
 
 # give a different file name depending on target platform architecture
 if(ARM OR AARCH64)

diff --git a/src/plugins/intel_cpu/src/cache/cache_entry.h b/src/plugins/intel_cpu/src/cache/cache_entry.h
@@ -4,36 +4,34 @@
 
 #pragma once
 
-#include <memory>
 #include <functional>
+#include <memory>
+
 #include "lru_cache.h"
 
 namespace ov {
 namespace intel_cpu {
 
 class CacheEntryBase {
 public:
-    enum class LookUpStatus : int8_t {
-        Hit,
-        Miss
-    };
+    enum class LookUpStatus : int8_t { Hit, Miss };
+
 public:
     virtual ~CacheEntryBase() = default;
 };
 
 /**
  * @brief Class represents a templated record in multi cache
- * @tparam KeyType is a key type that must define hash() const method with return type convertible to size_t and define comparison operator.
+ * @tparam KeyType is a key type that must define hash() const method with return type convertible to size_t and define
+ * comparison operator.
  * @tparam ValType is a type that must meet all the requirements to the std::unordered_map mapped type
- * @tparam ImplType is a type for the internal storage. It must provide put(KeyType, ValueType) and ValueType get(const KeyType&)
- *         interface and must have constructor of type ImplType(size_t).
+ * @tparam ImplType is a type for the internal storage. It must provide put(KeyType, ValueType) and ValueType get(const
+ * KeyType&) interface and must have constructor of type ImplType(size_t).
  *
  * @note In this implementation default constructed value objects are treated as empty objects.
  */
 
-template<typename KeyType,
-         typename ValType,
-         typename ImplType = LruCache<KeyType, ValType>>
+template <typename KeyType, typename ValType, typename ImplType = LruCache<KeyType, ValType>>
 class CacheEntry : public CacheEntryBase {
 public:
     using ResultType = std::pair<ValType, LookUpStatus>;
@@ -42,11 +40,12 @@ class CacheEntry : public CacheEntryBase {
     explicit CacheEntry(size_t capacity) : _impl(capacity) {}
 
     /**
-     * @brief Searches the key in the underlying storage and returns value if it exists, or creates a value using the builder functor and adds it to
-     *        the underlying storage.
+     * @brief Searches the key in the underlying storage and returns value if it exists, or creates a value using the
+     * builder functor and adds it to the underlying storage.
      * @param key is the search key
      * @param builder is a callable object that creates the ValType object from the KeyType lval reference
-     * @return result of the operation which is a pair of the requested object of ValType and the status of whether the cache hit or miss occurred
+     * @return result of the operation which is a pair of the requested object of ValType and the status of whether the
+     * cache hit or miss occurred
      */
 
     ResultType getOrCreate(const KeyType& key, std::function<ValType(const KeyType&)> builder) {
@@ -70,5 +69,5 @@ class CacheEntry : public CacheEntryBase {
     ImplType _impl;
 };
 
-}   // namespace intel_cpu
-}   // namespace ov
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/cache/lru_cache.h b/src/plugins/intel_cpu/src/cache/lru_cache.h
@@ -10,7 +10,8 @@
 
 /**
  * @brief This is yet another implementation of a preemptive cache with LRU eviction policy.
- * @tparam Key is a key type that must define hash() const method with return type convertible to size_t and define comparison operator.
+ * @tparam Key is a key type that must define hash() const method with return type convertible to size_t and define
+ * comparison operator.
  * @tparam Value is a type that must meet all the requirements to the std::unordered_map mapped type
  *
  * @attention This cache implementation IS NOT THREAD SAFE!
@@ -19,7 +20,7 @@
 namespace ov {
 namespace intel_cpu {
 
-template<typename Key, typename Value>
+template <typename Key, typename Value>
 class LruCache {
 public:
     using value_type = std::pair<Key, Value>;
@@ -33,7 +34,7 @@ class LruCache {
      * @param value
      */
 
-    void put(const Key &key, const Value &val) {
+    void put(const Key& key, const Value& val) {
         if (0 == _capacity) {
             return;
         }
@@ -56,7 +57,7 @@ class LruCache {
      * @return Value associated with the key or default constructed instance of the Value type.
      */
 
-    Value get(const Key &key) {
+    Value get(const Key& key) {
         auto itr = _cacheMapper.find(key);
         if (itr == _cacheMapper.end()) {
             return Value();
@@ -82,13 +83,13 @@ class LruCache {
      * @brief Returns the current capacity value
      * @return the current capacity value
      */
-     size_t getCapacity() const noexcept {
-         return _capacity;
-     }
+    size_t getCapacity() const noexcept {
+        return _capacity;
+    }
 
 private:
     struct key_hasher {
-        std::size_t operator()(const Key &k) const {
+        std::size_t operator()(const Key& k) const {
             return k.hash();
         }
     };
@@ -105,5 +106,5 @@ class LruCache {
     size_t _capacity;
 };
 
-}   // namespace intel_cpu
-}   // namespace ov
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/cache/multi_cache.cpp b/src/plugins/intel_cpu/src/cache/multi_cache.cpp
@@ -9,5 +9,5 @@ namespace intel_cpu {
 
 std::atomic_size_t MultiCache::_typeIdCounter{0};
 
-}   // namespace intel_cpu
-}   // namespace ov
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/cache/multi_cache.h b/src/plugins/intel_cpu/src/cache/multi_cache.h
@@ -4,9 +4,10 @@
 
 #pragma once
 
+#include <atomic>
 #include <functional>
 #include <unordered_map>
-#include <atomic>
+
 #include "cache_entry.h"
 
 namespace ov {
@@ -20,27 +21,28 @@ namespace intel_cpu {
 
 class MultiCache {
 public:
-    template<typename KeyType, typename ValueType>
+    template <typename KeyType, typename ValueType>
     using EntryTypeT = CacheEntry<KeyType, ValueType>;
     using EntryBasePtr = std::shared_ptr<CacheEntryBase>;
-    template<typename KeyType, typename ValueType>
+    template <typename KeyType, typename ValueType>
     using EntryPtr = std::shared_ptr<EntryTypeT<KeyType, ValueType>>;
 
 public:
     /**
-    * @param capacity here means maximum records limit FOR EACH entry specified by a pair of Key/Value types.
-    * @note zero capacity means empty cache so no records are stored and no entries are created
-    */
+     * @param capacity here means maximum records limit FOR EACH entry specified by a pair of Key/Value types.
+     * @note zero capacity means empty cache so no records are stored and no entries are created
+     */
     explicit MultiCache(size_t capacity) : _capacity(capacity) {}
 
     /**
-    * @brief Searches a value of ValueType in the cache using the provided key or creates a new ValueType instance (if nothing was found)
-    *       using the key and the builder functor and adds the new record to the cache
-    * @param key is the search key
-    * @param builder is a callable object that creates the ValType object from the KeyType lval reference.
-    *       Also the builder type is used for the ValueType deduction
-    * @return result of the operation which is a pair of the requested object of ValType and the status of whether the cache hit or miss occurred
-    */
+     * @brief Searches a value of ValueType in the cache using the provided key or creates a new ValueType instance (if
+     * nothing was found) using the key and the builder functor and adds the new record to the cache
+     * @param key is the search key
+     * @param builder is a callable object that creates the ValType object from the KeyType lval reference.
+     *       Also the builder type is used for the ValueType deduction
+     * @return result of the operation which is a pair of the requested object of ValType and the status of whether the
+     * cache hit or miss occurred
+     */
     template <typename KeyType,
               typename BuilderType,
 #if (defined(_MSVC_LANG) && (_MSVC_LANG > 201703L)) || (defined(__cplusplus) && (__cplusplus > 201703L))
@@ -54,9 +56,9 @@ class MultiCache {
     }
 
 private:
-    template<typename T>
+    template <typename T>
     size_t getTypeId();
-    template<typename KeyType, typename ValueType>
+    template <typename KeyType, typename ValueType>
     EntryPtr<KeyType, ValueType> getEntry();
 
 private:
@@ -65,13 +67,13 @@ class MultiCache {
     std::unordered_map<size_t, EntryBasePtr> _storage;
 };
 
-template<typename T>
+template <typename T>
 size_t MultiCache::getTypeId() {
     static size_t id = _typeIdCounter.fetch_add(1);
     return id;
 }
 
-template<typename KeyType, typename ValueType>
+template <typename KeyType, typename ValueType>
 MultiCache::EntryPtr<KeyType, ValueType> MultiCache::getEntry() {
     using EntryType = EntryTypeT<KeyType, ValueType>;
     size_t id = getTypeId<EntryType>();
@@ -88,5 +90,5 @@ using MultiCacheWeakCPtr = std::weak_ptr<const MultiCache>;
 using MultiCachePtr = std::shared_ptr<MultiCache>;
 using MultiCacheCPtr = std::shared_ptr<const MultiCache>;
 
-}   // namespace intel_cpu
-}   // namespace ov
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -3,29 +3,30 @@
 //
 
 #include "compiled_model.h"
+
+#include <cstring>
+#include <utility>
+
 #include "async_infer_request.h"
+#include "cpu/x64/cpu_isa_traits.hpp"
 #include "infer_request.h"
 #include "itt.h"
 #include "low_precision/low_precision.hpp"
 #include "memory_state.h"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
-#include "openvino/runtime/threading/executor_manager.hpp"
-#include "transformations/transformation_pipeline.h"
 #include "openvino/runtime/properties.hpp"
-#include "openvino/util/common_util.hpp"
+#include "openvino/runtime/threading/cpu_message.hpp"
 #include "openvino/runtime/threading/cpu_streams_executor.hpp"
-#include "transformations/utils/utils.hpp"
 #include "openvino/runtime/threading/cpu_streams_info.hpp"
-#include "openvino/runtime/threading/cpu_message.hpp"
+#include "openvino/runtime/threading/executor_manager.hpp"
+#include "openvino/util/common_util.hpp"
+#include "transformations/transformation_pipeline.h"
+#include "transformations/utils/utils.hpp"
 #include "utils/serialize.hpp"
 
-#include "cpu/x64/cpu_isa_traits.hpp"
-#include <cstring>
-#include <utility>
-
 #if defined(OV_CPU_WITH_ACL)
-#include "nodes/executors/acl/acl_ie_scheduler.hpp"
+#    include "nodes/executors/acl/acl_ie_scheduler.hpp"
 #endif
 
 using namespace ov::threading;
@@ -333,8 +334,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
         return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type(
             config.fcSparseWeiDecompressionRate);
     } else if (name == ov::hint::dynamic_quantization_group_size) {
-        return decltype(ov::hint::dynamic_quantization_group_size)::value_type(
-            config.fcDynamicQuantizationGroupSize);
+        return decltype(ov::hint::dynamic_quantization_group_size)::value_type(config.fcDynamicQuantizationGroupSize);
     } else if (name == ov::hint::kv_cache_precision) {
         return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision);
     } else if (name == ov::hint::key_cache_precision) {

diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
@@ -94,5 +94,5 @@ class CompiledModel : public ov::ICompiledModel {
     bool m_has_sub_compiled_models = false;
 };
 
-}   // namespace intel_cpu
-}   // namespace ov
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
@@ -4,19 +4,19 @@
 
 #include "config.h"
 
+#include <algorithm>
+#include <map>
+#include <string>
+
 #include "cpu/x64/cpu_isa_traits.hpp"
 #include "openvino/core/parallel.hpp"
 #include "openvino/core/type/element_type_traits.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "utils/cpu_utils.hpp"
 #include "utils/debug_capabilities.h"
 #include "utils/precision_support.h"
-#include "utils/cpu_utils.hpp"
-
-#include <algorithm>
-#include <map>
-#include <string>
 
 namespace ov {
 namespace intel_cpu {
@@ -61,9 +61,7 @@ Config::Config() {
  */
 void Config::applyDebugCapsProperties() {
     // always enable perf counters for verbose, performance summary and average counters
-    if (!debugCaps.verbose.empty() ||
-        !debugCaps.summaryPerf.empty() ||
-        !debugCaps.averageCountersPath.empty()) {
+    if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty() || !debugCaps.averageCountersPath.empty()) {
         collectPerfCounters = true;
     }
 }
@@ -151,10 +149,10 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                 logLevel = val.as<ov::log::Level>();
             } catch (const ov::Exception&) {
                 OPENVINO_THROW("Wrong value ",
-                        val.as<std::string>(),
-                        " for property key ",
-                        key,
-                        ". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE.");
+                               val.as<std::string>(),
+                               " for property key ",
+                               key,
+                               ". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE.");
             }
         } else if (key == ov::hint::num_requests.name()) {
             try {
@@ -243,8 +241,8 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                 fcDynamicQuantizationGroupSize = val.as<uint64_t>();
             } catch (const ov::Exception&) {
                 OPENVINO_THROW("Wrong value for property key ",
-                                ov::hint::dynamic_quantization_group_size.name(),
-                                ". Expected only unsinged integer numbers");
+                               ov::hint::dynamic_quantization_group_size.name(),
+                               ". Expected only unsinged integer numbers");
             }
         } else if (key == ov::enable_profiling.name()) {
             try {
@@ -366,7 +364,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                 if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) {
                     kvCachePrecision = prec;
                 } else {
-                     OPENVINO_THROW("invalid value");
+                    OPENVINO_THROW("invalid value");
                 }
             } catch (ov::Exception&) {
                 OPENVINO_THROW("Wrong value ",
@@ -511,10 +509,13 @@ void Config::updateProperties() {
 
 void Config::applyRtInfo(const std::shared_ptr<const ov::Model>& model) {
     // if user sets explicitly, it will be higher priority than rt_info
-    if (!kvCachePrecisionSetExplicitly && model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) {
-        this->kvCachePrecision = model->get_rt_info<ov::element::Type>({"runtime_options", ov::hint::kv_cache_precision.name()});
+    if (!kvCachePrecisionSetExplicitly &&
+        model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) {
+        this->kvCachePrecision =
+            model->get_rt_info<ov::element::Type>({"runtime_options", ov::hint::kv_cache_precision.name()});
     }
-    if (!fcDynamicQuantizationGroupSizeSetExplicitly && model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) {
+    if (!fcDynamicQuantizationGroupSizeSetExplicitly &&
+        model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) {
         this->fcDynamicQuantizationGroupSize =
             model->get_rt_info<uint64_t>({"runtime_options", ov::hint::dynamic_quantization_group_size.name()});
     }