Added test case of lora block_hash conflict.

vllm-project · Mar 7, 2024 · 3441735 · 3441735
1 parent 385da2d
commit 3441735
Showing 1 changed file with 30 additions and 15 deletions.
diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py
@@ -2,8 +2,11 @@
 
 Run `pytest tests/test_cache_block_hashing.py`.
 """
+from typing import List, Optional
+
 import pytest
 
+from vllm.lora.request import LoRARequest
 from vllm.transformers_utils.tokenizer import TokenizerGroup
 from vllm.sequence import Sequence
 
@@ -36,7 +39,9 @@ def flatten_2d(li):
 @pytest.mark.parametrize("model", ["facebook/opt-125m"])
 @pytest.mark.parametrize("block_size", [16])
 @pytest.mark.parametrize("max_num_seqs", [256])
-def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int):
+@pytest.mark.parametrize("concurrent_lora_int_ids", [[None], [1], [None, 1], [None, 1, 2], [1, 2]])
+def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int, concurrent_lora_int_ids: List[Optional[int]]):
+
 
     tokenizer = TokenizerGroup(
         tokenizer_id="facebook/opt-125m",
@@ -48,20 +53,30 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int):
     hashes = []
 
     for prefix in prefixes:
-        hashes.append([])
-        prompts = [prefix + prompt for prompt in sample_prompts]
-        seq_id = 0
-        for prompt in prompts:
-            hashes[-1].append([])
-            prompt_token_ids = tokenizer.encode(prompt)
-            seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
-                           tokenizer.tokenizer.eos_token_id)
-
-            num_blocks = len(prompt_token_ids) // block_size
-            for idx in range(num_blocks):
-                hashes[-1][-1].append(seq.hash_of_block(idx))
-
-            seq_id += 1
+        for lora_int_id in concurrent_lora_int_ids:
+            lora_request = None
+
+            if lora_int_id is not None:
+                lora_request = LoRARequest(
+                    f"example_lora_{lora_int_id}", 
+                    lora_int_id, 
+                    f"example/path/to/lora_{lora_int_id}",
+                )
+
+            hashes.append([])
+            prompts = [prefix + prompt for prompt in sample_prompts]
+            seq_id = 0
+            for prompt in prompts:
+                hashes[-1].append([])
+                prompt_token_ids = tokenizer.encode(prompt)
+                seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
+                            tokenizer.tokenizer.eos_token_id, lora_request)
+
+                num_blocks = len(prompt_token_ids) // block_size
+                for idx in range(num_blocks):
+                    hashes[-1][-1].append(seq.hash_of_block(idx))
+
+                seq_id += 1
 
     # Check that hashes made with two prefixes with different first blocks are
     # different everywhere.