Skip to content

Commit

Permalink
CB: fixed scheduler perf on old platforms (openvinotoolkit#1284)
Browse files Browse the repository at this point in the history
Ported openvinotoolkit#1283 to
current master
  • Loading branch information
ilya-lavrenov committed Dec 3, 2024
1 parent f8c7226 commit 91bdf95
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
20 changes: 14 additions & 6 deletions src/cpp/src/block_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

#include "sequence_group.hpp"


namespace ov::genai {

class KVCacheBlock {
Expand Down Expand Up @@ -188,7 +187,10 @@ class CacheStateDumper;
*/
class BlockAllocator {
std::vector<std::list<KVCacheBlock::Ptr>> m_free_blocks;
int m_total_num_blocks;
// We keep m_free_blocks_num instead of m_free_blocks[X].size() to WA old CXX library implementation issue for std::list::size()
// see https://stackoverflow.com/questions/13157164/why-isnt-stdlist-size-constant-time
std::vector<size_t> m_free_blocks_num;
size_t m_total_num_blocks;
friend class CacheStateDumper;
size_t m_num_layers;
bool m_enable_prefix_caching;
Expand All @@ -202,8 +204,8 @@ class BlockAllocator {
* @param num_layers The number of separate attention layers with KV caches in the LLM associated with the pipeline.
* Blocks returned will be vectors with this size, each vector entry to be associated with a separate layer's KV cache.
*/
BlockAllocator(int num_blocks, bool enable_prefix_caching, size_t num_layers = 1) :
m_total_num_blocks(num_blocks), m_num_layers(num_layers), m_enable_prefix_caching(enable_prefix_caching), m_overwriteable_blocks(num_layers) {
BlockAllocator(size_t num_blocks, bool enable_prefix_caching, size_t num_layers = 1) :
m_free_blocks_num(num_layers, num_blocks), m_total_num_blocks(num_blocks), m_num_layers(num_layers), m_enable_prefix_caching(enable_prefix_caching), m_overwriteable_blocks(num_layers) {
OPENVINO_ASSERT(num_layers != 0, "num_layers must be non-zero");
m_free_blocks.resize(m_num_layers);
for (auto& per_layer_block_list : m_free_blocks) {
Expand All @@ -224,7 +226,7 @@ class BlockAllocator {
* @return Number of free blocks for this layer.
*/
size_t num_free_blocks(size_t layer_idx) const {
return m_free_blocks[layer_idx].size() + m_overwriteable_blocks.num_blocks();
return m_free_blocks_num[layer_idx] + num_overwriteable_blocks();
}

/**
Expand Down Expand Up @@ -270,6 +272,7 @@ class BlockAllocator {
block_ptr->release();
if (block_ptr->is_free()) {
m_free_blocks[layer_idx].push_back(block_ptr);
++m_free_blocks_num[layer_idx];
}
}

Expand Down Expand Up @@ -325,6 +328,7 @@ class BlockAllocator {
// actual collision case
for (size_t layer_idx = 0; layer_idx < colliding_blocks_per_layer.size(); layer_idx++) {
m_free_blocks[layer_idx].push_back(colliding_blocks_per_layer[layer_idx]);
++m_free_blocks_num[layer_idx];
}
}
m_overwriteable_blocks.add(blocks_for_all_layers);
Expand All @@ -333,12 +337,14 @@ class BlockAllocator {
// TODO (vshampor): more fine-grained hash store control
for (size_t layer_idx = 0; layer_idx < blocks_for_all_layers.size(); layer_idx++) {
m_free_blocks[layer_idx].push_back(blocks_for_all_layers[layer_idx]);
++m_free_blocks_num[layer_idx];
}
}
}
else {
for (size_t layer_idx = 0; layer_idx < blocks_for_all_layers.size(); layer_idx++) {
m_free_blocks[layer_idx].push_back(blocks_for_all_layers[layer_idx]);
++m_free_blocks_num[layer_idx];
}
}
}
Expand Down Expand Up @@ -368,6 +374,7 @@ class BlockAllocator {
KVCacheBlock::Ptr allocated_block = m_free_blocks[layer_idx].front();
allocated_block->increment();
m_free_blocks[layer_idx].pop_front();
--m_free_blocks_num[layer_idx];
return allocated_block;
}

Expand All @@ -386,7 +393,7 @@ class BlockAllocator {
OPENVINO_ASSERT(m_enable_prefix_caching);
OPENVINO_ASSERT(can_allocate_blocks(1));

if (m_free_blocks[0].size() > 0) {
if (m_free_blocks_num[0] > 0) {
// allocate new empty block
BlocksPerLayer allocated_blocks;
allocated_blocks.reserve(m_num_layers);
Expand All @@ -396,6 +403,7 @@ class BlockAllocator {
allocated_block->set_hash(hash);
allocated_blocks.push_back(allocated_block);
m_free_blocks[i].pop_front();
--m_free_blocks_num[i];
}
cached_blocks[hash] = allocated_blocks;
return allocated_blocks;
Expand Down
3 changes: 3 additions & 0 deletions src/cpp/src/sequence_group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,9 @@ class SequenceGroup {
}

void clear_waiting_sequences() {
if (!is_waiting())
return;

for (size_t seq_id = 0; seq_id < m_sequences.size(); ++seq_id) {
if (m_sequences[seq_id]->is_waiting()) {
m_sequences[seq_id]->set_status(SequenceStatus::RUNNING);
Expand Down

0 comments on commit 91bdf95

Please sign in to comment.