Skip to content

Commit

Permalink
Removed not needed sample.
Browse files Browse the repository at this point in the history
  • Loading branch information
popovaan committed Jul 23, 2024
1 parent 48c06f4 commit 480d382
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 155 deletions.
1 change: 0 additions & 1 deletion samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
add_subdirectory(cpp/beam_search_causal_lm)
add_subdirectory(cpp/chat_sample)
add_subdirectory(cpp/continuous_batching_accuracy)
add_subdirectory(cpp/continuous_batching_multiple_generation_sample)
add_subdirectory(cpp/continuous_batching_benchmark)
add_subdirectory(cpp/greedy_causal_lm)
add_subdirectory(cpp/multinomial_causal_lm)
Expand Down

This file was deleted.

This file was deleted.

9 changes: 5 additions & 4 deletions src/cpp/src/block_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ class BlockAllocator {
}
// TODO: Currently we cache all allocated blocks which might be redundant for beam search,
// where blocks of non-used candidates are not needed in cache.
// This part can be probably improved if we cache only blocks for resulting finished sequences.
// This part can be probably improved if we cache only blocks for prompt.
if (cached_blocks.find(hash) != cached_blocks.end()) {
// use cashed block from cached_blocks
block = cached_blocks[hash];
Expand Down Expand Up @@ -205,7 +205,7 @@ class BlockAllocator {
return nullptr;
}

KVCacheBlock::Ptr get_cashed_block(size_t hash, std::map<uint64_t, KVCacheBlock::Ptr>& cached_blocks) {
KVCacheBlock::Ptr get_cached_block(size_t hash, std::map<uint64_t, KVCacheBlock::Ptr>& cached_blocks) {
auto block = m_evictor.get_block(hash);
if (block != nullptr) {
// use cashed block from evictor
Expand All @@ -231,6 +231,7 @@ class BlockManager {
BlockAllocator m_allocator;
bool m_enable_prefix_caching;
size_t m_block_size;
// TODO: caching time can probably be improved if we use the prefix tree
std::map<uint64_t, KVCacheBlock::Ptr> cached_blocks;

// stores blocks for each sequence (not sequence group)
Expand Down Expand Up @@ -539,7 +540,7 @@ class BlockManager {
}
// restore fully filled blocks
auto hash = sequence->get_hash(content_len, prompt_ids);
auto block = m_allocator.get_cashed_block(hash, cached_blocks);
auto block = m_allocator.get_cached_block(hash, cached_blocks);
if (block != nullptr) {
block->set_timestamp(std::chrono::system_clock::now());
m_block_table[seq_id].push_back(block);
Expand All @@ -554,7 +555,7 @@ class BlockManager {
break;
}
auto hash = sequence->get_hash(prev_iteration_content_len + i, prompt_ids);
auto block = m_allocator.get_cashed_block(hash, cached_blocks);
auto block = m_allocator.get_cached_block(hash, cached_blocks);
if (block != nullptr) {
block->set_timestamp(std::chrono::system_clock::now());
m_block_table[seq_id].push_back(block);
Expand Down

0 comments on commit 480d382

Please sign in to comment.