Skip to content

Commit

Permalink
Merge branch 'master' into yi3/4bit-cache
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangYiIntel committed Dec 12, 2024
2 parents fe6c311 + 45bf77b commit 5bc75f8
Show file tree
Hide file tree
Showing 651 changed files with 30,642 additions and 23,530 deletions.
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,8 @@ ov_add_plugin(NAME ${TARGET_NAME}
DEVICE_NAME "CPU"
AS_EXTENSION
VERSION_DEFINES_FOR src/plugin.cpp
SOURCES ${SOURCES} ${HEADERS})
SOURCES ${SOURCES} ${HEADERS}
ADD_CLANG_FORMAT)

# give a different file name depending on target platform architecture
if(ARM OR AARCH64)
Expand Down
31 changes: 15 additions & 16 deletions src/plugins/intel_cpu/src/cache/cache_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,34 @@

#pragma once

#include <memory>
#include <functional>
#include <memory>

#include "lru_cache.h"

namespace ov {
namespace intel_cpu {

class CacheEntryBase {
public:
enum class LookUpStatus : int8_t {
Hit,
Miss
};
enum class LookUpStatus : int8_t { Hit, Miss };

public:
virtual ~CacheEntryBase() = default;
};

/**
* @brief Class represents a templated record in multi cache
* @tparam KeyType is a key type that must define hash() const method with return type convertible to size_t and define comparison operator.
* @tparam KeyType is a key type that must define hash() const method with return type convertible to size_t and define
* comparison operator.
* @tparam ValType is a type that must meet all the requirements to the std::unordered_map mapped type
* @tparam ImplType is a type for the internal storage. It must provide put(KeyType, ValueType) and ValueType get(const KeyType&)
* interface and must have constructor of type ImplType(size_t).
* @tparam ImplType is a type for the internal storage. It must provide put(KeyType, ValueType) and ValueType get(const
* KeyType&) interface and must have constructor of type ImplType(size_t).
*
* @note In this implementation default constructed value objects are treated as empty objects.
*/

template<typename KeyType,
typename ValType,
typename ImplType = LruCache<KeyType, ValType>>
template <typename KeyType, typename ValType, typename ImplType = LruCache<KeyType, ValType>>
class CacheEntry : public CacheEntryBase {
public:
using ResultType = std::pair<ValType, LookUpStatus>;
Expand All @@ -42,11 +40,12 @@ class CacheEntry : public CacheEntryBase {
explicit CacheEntry(size_t capacity) : _impl(capacity) {}

/**
* @brief Searches the key in the underlying storage and returns value if it exists, or creates a value using the builder functor and adds it to
* the underlying storage.
* @brief Searches the key in the underlying storage and returns value if it exists, or creates a value using the
* builder functor and adds it to the underlying storage.
* @param key is the search key
* @param builder is a callable object that creates the ValType object from the KeyType lval reference
* @return result of the operation which is a pair of the requested object of ValType and the status of whether the cache hit or miss occurred
* @return result of the operation which is a pair of the requested object of ValType and the status of whether the
* cache hit or miss occurred
*/

ResultType getOrCreate(const KeyType& key, std::function<ValType(const KeyType&)> builder) {
Expand All @@ -70,5 +69,5 @@ class CacheEntry : public CacheEntryBase {
ImplType _impl;
};

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
21 changes: 11 additions & 10 deletions src/plugins/intel_cpu/src/cache/lru_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

/**
* @brief This is yet another implementation of a preemptive cache with LRU eviction policy.
* @tparam Key is a key type that must define hash() const method with return type convertible to size_t and define comparison operator.
* @tparam Key is a key type that must define hash() const method with return type convertible to size_t and define
* comparison operator.
* @tparam Value is a type that must meet all the requirements to the std::unordered_map mapped type
*
* @attention This cache implementation IS NOT THREAD SAFE!
Expand All @@ -19,7 +20,7 @@
namespace ov {
namespace intel_cpu {

template<typename Key, typename Value>
template <typename Key, typename Value>
class LruCache {
public:
using value_type = std::pair<Key, Value>;
Expand All @@ -33,7 +34,7 @@ class LruCache {
* @param value
*/

void put(const Key &key, const Value &val) {
void put(const Key& key, const Value& val) {
if (0 == _capacity) {
return;
}
Expand All @@ -56,7 +57,7 @@ class LruCache {
* @return Value associated with the key or default constructed instance of the Value type.
*/

Value get(const Key &key) {
Value get(const Key& key) {
auto itr = _cacheMapper.find(key);
if (itr == _cacheMapper.end()) {
return Value();
Expand All @@ -82,13 +83,13 @@ class LruCache {
* @brief Returns the current capacity value
* @return the current capacity value
*/
size_t getCapacity() const noexcept {
return _capacity;
}
size_t getCapacity() const noexcept {
return _capacity;
}

private:
struct key_hasher {
std::size_t operator()(const Key &k) const {
std::size_t operator()(const Key& k) const {
return k.hash();
}
};
Expand All @@ -105,5 +106,5 @@ class LruCache {
size_t _capacity;
};

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/cache/multi_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ namespace intel_cpu {

std::atomic_size_t MultiCache::_typeIdCounter{0};

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
40 changes: 21 additions & 19 deletions src/plugins/intel_cpu/src/cache/multi_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

#pragma once

#include <atomic>
#include <functional>
#include <unordered_map>
#include <atomic>

#include "cache_entry.h"

namespace ov {
Expand All @@ -20,27 +21,28 @@ namespace intel_cpu {

class MultiCache {
public:
template<typename KeyType, typename ValueType>
template <typename KeyType, typename ValueType>
using EntryTypeT = CacheEntry<KeyType, ValueType>;
using EntryBasePtr = std::shared_ptr<CacheEntryBase>;
template<typename KeyType, typename ValueType>
template <typename KeyType, typename ValueType>
using EntryPtr = std::shared_ptr<EntryTypeT<KeyType, ValueType>>;

public:
/**
* @param capacity here means maximum records limit FOR EACH entry specified by a pair of Key/Value types.
* @note zero capacity means empty cache so no records are stored and no entries are created
*/
* @param capacity here means maximum records limit FOR EACH entry specified by a pair of Key/Value types.
* @note zero capacity means empty cache so no records are stored and no entries are created
*/
explicit MultiCache(size_t capacity) : _capacity(capacity) {}

/**
* @brief Searches a value of ValueType in the cache using the provided key or creates a new ValueType instance (if nothing was found)
* using the key and the builder functor and adds the new record to the cache
* @param key is the search key
* @param builder is a callable object that creates the ValType object from the KeyType lval reference.
* Also the builder type is used for the ValueType deduction
* @return result of the operation which is a pair of the requested object of ValType and the status of whether the cache hit or miss occurred
*/
* @brief Searches a value of ValueType in the cache using the provided key or creates a new ValueType instance (if
* nothing was found) using the key and the builder functor and adds the new record to the cache
* @param key is the search key
* @param builder is a callable object that creates the ValType object from the KeyType lval reference.
* Also the builder type is used for the ValueType deduction
* @return result of the operation which is a pair of the requested object of ValType and the status of whether the
* cache hit or miss occurred
*/
template <typename KeyType,
typename BuilderType,
#if (defined(_MSVC_LANG) && (_MSVC_LANG > 201703L)) || (defined(__cplusplus) && (__cplusplus > 201703L))
Expand All @@ -54,9 +56,9 @@ class MultiCache {
}

private:
template<typename T>
template <typename T>
size_t getTypeId();
template<typename KeyType, typename ValueType>
template <typename KeyType, typename ValueType>
EntryPtr<KeyType, ValueType> getEntry();

private:
Expand All @@ -65,13 +67,13 @@ class MultiCache {
std::unordered_map<size_t, EntryBasePtr> _storage;
};

template<typename T>
template <typename T>
size_t MultiCache::getTypeId() {
static size_t id = _typeIdCounter.fetch_add(1);
return id;
}

template<typename KeyType, typename ValueType>
template <typename KeyType, typename ValueType>
MultiCache::EntryPtr<KeyType, ValueType> MultiCache::getEntry() {
using EntryType = EntryTypeT<KeyType, ValueType>;
size_t id = getTypeId<EntryType>();
Expand All @@ -88,5 +90,5 @@ using MultiCacheWeakCPtr = std::weak_ptr<const MultiCache>;
using MultiCachePtr = std::shared_ptr<MultiCache>;
using MultiCacheCPtr = std::shared_ptr<const MultiCache>;

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
24 changes: 12 additions & 12 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,30 @@
//

#include "compiled_model.h"

#include <cstring>
#include <utility>

#include "async_infer_request.h"
#include "cpu/x64/cpu_isa_traits.hpp"
#include "infer_request.h"
#include "itt.h"
#include "low_precision/low_precision.hpp"
#include "memory_state.h"
#include "openvino/core/type/element_type.hpp"
#include "openvino/runtime/intel_cpu/properties.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#include "transformations/transformation_pipeline.h"
#include "openvino/runtime/properties.hpp"
#include "openvino/util/common_util.hpp"
#include "openvino/runtime/threading/cpu_message.hpp"
#include "openvino/runtime/threading/cpu_streams_executor.hpp"
#include "transformations/utils/utils.hpp"
#include "openvino/runtime/threading/cpu_streams_info.hpp"
#include "openvino/runtime/threading/cpu_message.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#include "openvino/util/common_util.hpp"
#include "transformations/transformation_pipeline.h"
#include "transformations/utils/utils.hpp"
#include "utils/serialize.hpp"

#include "cpu/x64/cpu_isa_traits.hpp"
#include <cstring>
#include <utility>

#if defined(OV_CPU_WITH_ACL)
#include "nodes/executors/acl/acl_ie_scheduler.hpp"
# include "nodes/executors/acl/acl_ie_scheduler.hpp"
#endif

using namespace ov::threading;
Expand Down Expand Up @@ -333,8 +334,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type(
config.fcSparseWeiDecompressionRate);
} else if (name == ov::hint::dynamic_quantization_group_size) {
return decltype(ov::hint::dynamic_quantization_group_size)::value_type(
config.fcDynamicQuantizationGroupSize);
return decltype(ov::hint::dynamic_quantization_group_size)::value_type(config.fcDynamicQuantizationGroupSize);
} else if (name == ov::hint::kv_cache_precision) {
return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision);
} else if (name == ov::hint::key_cache_precision) {
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/compiled_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,5 @@ class CompiledModel : public ov::ICompiledModel {
bool m_has_sub_compiled_models = false;
};

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
37 changes: 19 additions & 18 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

#include "config.h"

#include <algorithm>
#include <map>
#include <string>

#include "cpu/x64/cpu_isa_traits.hpp"
#include "openvino/core/parallel.hpp"
#include "openvino/core/type/element_type_traits.hpp"
#include "openvino/runtime/intel_cpu/properties.hpp"
#include "openvino/runtime/internal_properties.hpp"
#include "openvino/runtime/properties.hpp"
#include "utils/cpu_utils.hpp"
#include "utils/debug_capabilities.h"
#include "utils/precision_support.h"
#include "utils/cpu_utils.hpp"

#include <algorithm>
#include <map>
#include <string>

namespace ov {
namespace intel_cpu {
Expand Down Expand Up @@ -61,9 +61,7 @@ Config::Config() {
*/
void Config::applyDebugCapsProperties() {
// always enable perf counters for verbose, performance summary and average counters
if (!debugCaps.verbose.empty() ||
!debugCaps.summaryPerf.empty() ||
!debugCaps.averageCountersPath.empty()) {
if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty() || !debugCaps.averageCountersPath.empty()) {
collectPerfCounters = true;
}
}
Expand Down Expand Up @@ -151,10 +149,10 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
logLevel = val.as<ov::log::Level>();
} catch (const ov::Exception&) {
OPENVINO_THROW("Wrong value ",
val.as<std::string>(),
" for property key ",
key,
". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE.");
val.as<std::string>(),
" for property key ",
key,
". Expected only ov::log::Level::NO/ERR/WARNING/INFO/DEBUG/TRACE.");
}
} else if (key == ov::hint::num_requests.name()) {
try {
Expand Down Expand Up @@ -243,8 +241,8 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
fcDynamicQuantizationGroupSize = val.as<uint64_t>();
} catch (const ov::Exception&) {
OPENVINO_THROW("Wrong value for property key ",
ov::hint::dynamic_quantization_group_size.name(),
". Expected only unsinged integer numbers");
ov::hint::dynamic_quantization_group_size.name(),
". Expected only unsinged integer numbers");
}
} else if (key == ov::enable_profiling.name()) {
try {
Expand Down Expand Up @@ -366,7 +364,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) {
kvCachePrecision = prec;
} else {
OPENVINO_THROW("invalid value");
OPENVINO_THROW("invalid value");
}
} catch (ov::Exception&) {
OPENVINO_THROW("Wrong value ",
Expand Down Expand Up @@ -511,10 +509,13 @@ void Config::updateProperties() {

void Config::applyRtInfo(const std::shared_ptr<const ov::Model>& model) {
// if user sets explicitly, it will be higher priority than rt_info
if (!kvCachePrecisionSetExplicitly && model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) {
this->kvCachePrecision = model->get_rt_info<ov::element::Type>({"runtime_options", ov::hint::kv_cache_precision.name()});
if (!kvCachePrecisionSetExplicitly &&
model->has_rt_info({"runtime_options", ov::hint::kv_cache_precision.name()})) {
this->kvCachePrecision =
model->get_rt_info<ov::element::Type>({"runtime_options", ov::hint::kv_cache_precision.name()});
}
if (!fcDynamicQuantizationGroupSizeSetExplicitly && model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) {
if (!fcDynamicQuantizationGroupSizeSetExplicitly &&
model->has_rt_info({"runtime_options", ov::hint::dynamic_quantization_group_size.name()})) {
this->fcDynamicQuantizationGroupSize =
model->get_rt_info<uint64_t>({"runtime_options", ov::hint::dynamic_quantization_group_size.name()});
}
Expand Down
Loading

0 comments on commit 5bc75f8

Please sign in to comment.