From 3441735dd13e7148fcc4d3396c6185ac35e38179 Mon Sep 17 00:00:00 2001 From: Jacob-Junqi Tian Date: Thu, 7 Mar 2024 15:49:46 -0500 Subject: [PATCH] Added test case of lora block_hash conflict. --- tests/test_cache_block_hashing.py | 45 ++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py index c2067e52b59c0..3c2f22ab8a0d4 100644 --- a/tests/test_cache_block_hashing.py +++ b/tests/test_cache_block_hashing.py @@ -2,8 +2,11 @@ Run `pytest tests/test_cache_block_hashing.py`. """ +from typing import List, Optional + import pytest +from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizer import TokenizerGroup from vllm.sequence import Sequence @@ -36,7 +39,9 @@ def flatten_2d(li): @pytest.mark.parametrize("model", ["facebook/opt-125m"]) @pytest.mark.parametrize("block_size", [16]) @pytest.mark.parametrize("max_num_seqs", [256]) -def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int): +@pytest.mark.parametrize("concurrent_lora_int_ids", [[None], [1], [None, 1], [None, 1, 2], [1, 2]]) +def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int, concurrent_lora_int_ids: List[Optional[int]]): + tokenizer = TokenizerGroup( tokenizer_id="facebook/opt-125m", @@ -48,20 +53,30 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int): hashes = [] for prefix in prefixes: - hashes.append([]) - prompts = [prefix + prompt for prompt in sample_prompts] - seq_id = 0 - for prompt in prompts: - hashes[-1].append([]) - prompt_token_ids = tokenizer.encode(prompt) - seq = Sequence(seq_id, prompt, prompt_token_ids, block_size, - tokenizer.tokenizer.eos_token_id) - - num_blocks = len(prompt_token_ids) // block_size - for idx in range(num_blocks): - hashes[-1][-1].append(seq.hash_of_block(idx)) - - seq_id += 1 + for lora_int_id in concurrent_lora_int_ids: + lora_request = None + + if lora_int_id is not None: + lora_request = LoRARequest( + f"example_lora_{lora_int_id}", + lora_int_id, + f"example/path/to/lora_{lora_int_id}", + ) + + hashes.append([]) + prompts = [prefix + prompt for prompt in sample_prompts] + seq_id = 0 + for prompt in prompts: + hashes[-1].append([]) + prompt_token_ids = tokenizer.encode(prompt) + seq = Sequence(seq_id, prompt, prompt_token_ids, block_size, + tokenizer.tokenizer.eos_token_id, lora_request) + + num_blocks = len(prompt_token_ids) // block_size + for idx in range(num_blocks): + hashes[-1][-1].append(seq.hash_of_block(idx)) + + seq_id += 1 # Check that hashes made with two prefixes with different first blocks are # different everywhere.