Skip to content

Commit

Permalink
Fixed missed cache error.
Browse files Browse the repository at this point in the history
  • Loading branch information
popovaan committed Jul 17, 2024
1 parent e67b79c commit a154dba
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ int main(int argc, char* argv[]) try {
ov::genai::greedy(),
};

size_t num_chat_iterations = 5;
size_t num_chat_iterations = 10;

std::vector<std::string> prompts(num_prompts);
std::vector<ov::genai::GenerationConfig> sampling_params(num_prompts);
Expand All @@ -67,7 +67,7 @@ int main(int argc, char* argv[]) try {

ov::genai::SchedulerConfig scheduler_config {
// batch size
.max_num_batched_tokens = 32,
.max_num_batched_tokens = 64,
// cache params
.num_kv_blocks = 364,
.block_size = 32,
Expand All @@ -83,7 +83,7 @@ int main(int argc, char* argv[]) try {
for(size_t i = 0; i<num_chat_iterations; i++) {
std::string question = conversation_history + prompt_examples[i % prompt_examples.size()];

std::cout << "History: " << question << std::endl;
std::cout <<"Iteration "<< i << std::endl << "History: " << question << std::endl;
std::vector<ov::genai::GenerationResult> generation_results = pipe.generate({question}, sampling_params);

for (size_t request_id = 0; request_id < generation_results.size(); ++request_id) {
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/src/block_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,8 @@ class BlockManager {

ov::genai::KVCacheBlock::Ptr block = nullptr;
if (m_enable_prefix_caching) {
size_t num_hashed_tokens = allocated_content + (i + 1) * m_block_size < content_length ? m_block_size : num_hashed_tokens_in_last_block;
auto hash = sequence->get_hash(allocated_content + i * m_block_size + num_hashed_tokens, prompt_ids);
size_t num_hashed_tokens = (i + 1) * m_block_size + allocated_content <= content_length ? (i + 1) * m_block_size + allocated_content: num_hashed_tokens_in_last_block + allocated_content;
auto hash = sequence->get_hash(num_hashed_tokens, prompt_ids);
block = m_allocator.allocate_block(hash, num_hashed_tokens, cashed_blocks);
}
else {
Expand Down
3 changes: 1 addition & 2 deletions src/cpp/src/sequence_group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,10 @@ class Sequence {
OPENVINO_ASSERT(content_length <= prompt_ids.size() + m_generated_ids.size());
content.insert( content.end(), prompt_ids.begin(), prompt_ids.begin() + std::min(prompt_ids.size(), content_length));
if (content_length > prompt_ids.size()) {
content.insert(content.end(), m_generated_ids.begin(), m_generated_ids.begin() + std::min(m_generated_ids.size(), content_length - prompt_ids.size()));
content.insert(content.end(), m_generated_ids.begin(), m_generated_ids.begin() + content_length - prompt_ids.size());
}
const char* data = reinterpret_cast<const char*>(content.data());
std::size_t size = content.size() * sizeof(content[0]);

return std::hash<std::string_view>{}(std::string_view(data, size));
}
};
Expand Down

0 comments on commit a154dba

Please sign in to comment.