Skip to content

Commit

Permalink
Merge branch 'vllm-project:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
grandiose-pizza authored Mar 6, 2024
2 parents 65669fc + a33ce60 commit c9a3db5
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 23 deletions.
49 changes: 31 additions & 18 deletions tests/core/test_block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from vllm.block import PhysicalTokenBlock
from vllm.core.block_manager import BlockAllocator, BlockSpaceManager, AllocStatus
from vllm.utils import Device
from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
from vllm.sequence import Sequence, SequenceGroup, SequenceStatus, Logprob

from .utils import create_dummy_prompt

Expand All @@ -22,7 +22,8 @@ def test_block_allocator_allocate():
for _ in range(num_cpu_blocks):
block = cpu_allocator.allocate()
num_free -= 1
assert block not in cpu_allocator.free_blocks

assert block.block_hash not in cpu_allocator.evictor
assert cpu_allocator.get_num_free_blocks() == num_free

with pytest.raises(ValueError):
Expand All @@ -39,15 +40,15 @@ def test_block_allocator_free():
for _ in range(num_cpu_blocks):
block = cpu_allocator.allocate()
blocks.append(block)
assert block not in cpu_allocator.free_blocks
assert block.block_hash not in cpu_allocator.evictor

# Free all allocated cpu blocks.
num_free = 0
assert cpu_allocator.get_num_free_blocks() == num_free
for block in blocks:
cpu_allocator.free(block)
num_free += 1
assert block in cpu_allocator.free_blocks
assert block.block_hash in cpu_allocator.evictor
assert cpu_allocator.get_num_free_blocks() == num_free

with pytest.raises(ValueError):
Expand Down Expand Up @@ -106,7 +107,7 @@ def test_append_slot_single_seq():
# Add block_size number of new tokens and append slot.
for i in range(block_size):
token_id = i + 5
prompt.append_token_id(token_id, {token_id: 0.0})
prompt.append_token_id(token_id, {token_id: Logprob(0.0)})

assert block_manager.can_append_slot(seq_group)
before_blocks = block_manager.get_num_free_gpu_blocks()
Expand All @@ -119,25 +120,37 @@ def test_append_slot_cow():
block_size = 4
num_cpu_blocks = 4
num_gpu_blocks = 4
block_manager = BlockSpaceManager(block_size,
num_cpu_blocks,
num_gpu_blocks,
block_manager = BlockSpaceManager(block_size=block_size,
num_cpu_blocks=num_cpu_blocks,
num_gpu_blocks=num_gpu_blocks,
watermark=0)

# Allocate prompt to gpu block.
prompt = Sequence(1, "one two three", [1, 2, 3], block_size)
child = prompt.fork(2)
token_id = 4
child.append_token_id(token_id, {token_id: 0.0})
# Allocate prompt to gpu block. There is one slot left in the block.
prompt = Sequence(seq_id=1,
prompt="one two three",
prompt_token_ids=[1, 2, 3],
block_size=block_size)

# Fork the sequence, such that a COW will be required when we append a new
# token id.
child = prompt.fork(new_seq_id=2)

# Allocate space for the sequence group.
seq_group = SequenceGroup("1", [prompt, child], SamplingParams(),
time.time(), time.perf_counter)
block_manager.allocate(seq_group)

# Append slot for child token.
# Last block being modified is shared. Copy on write occurs.
# Fork and append a new token id. We expect a COW to be scheduled.
token_id = 4
child.append_token_id(token_id, {token_id: Logprob(0.0)})
block_manager.fork(prompt, child)

assert block_manager.can_append_slot(seq_group)
before_blocks = block_manager.get_num_free_gpu_blocks()
src_block, dst_block = block_manager.append_slot(child)

maybe_src_dst_block = block_manager.append_slot(child)
assert maybe_src_dst_block is not None
src_block, dst_block = maybe_src_dst_block
assert src_block != dst_block

after_blocks = block_manager.get_num_free_gpu_blocks()
Expand Down Expand Up @@ -165,7 +178,7 @@ def test_fork():
prompt) == block_manager.get_block_table(child)
token_id = 4
# Append token to child. Block is shared so copy on write occurs.
child.append_token_id(token_id, {token_id: 0.0})
child.append_token_id(token_id, {token_id: Logprob(0.0)})
block_manager.append_slot(child)
assert block_manager.get_block_table(
prompt) != block_manager.get_block_table(child)
Expand All @@ -189,7 +202,7 @@ def test_swap():
# tokens will be written in the next forward pass.
token_id = 0
prompt.status = SequenceStatus.RUNNING
prompt.append_token_id(token_id, {token_id: 0.0})
prompt.append_token_id(token_id, {token_id: Logprob(0.0)})

# Swap seq group from GPU -> CPU.
gpu_blocks = block_manager.get_block_table(prompt)
Expand Down
6 changes: 3 additions & 3 deletions tests/core/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from vllm.config import CacheConfig, SchedulerConfig
from vllm.core.scheduler import Scheduler
from vllm.sequence import SequenceGroup
from vllm.sequence import SequenceGroup, Logprob

from .utils import create_dummy_prompt

Expand Down Expand Up @@ -108,8 +108,8 @@ def test_scheduler_schedule_preempt_abort():
# Append "generated" tokens, allowing the sequence to mark prompt tokens as
# processed.
token_id = 0
seq_a.append_token_id(token_id, {token_id: 0.0})
seq_b.append_token_id(token_id, {token_id: 0.0})
seq_a.append_token_id(token_id, {token_id: Logprob(0.0)})
seq_b.append_token_id(token_id, {token_id: Logprob(0.0)})

# Schedule seq groups generation and preempt seq group b.
seq_group_meta, out = scheduler.schedule()
Expand Down
2 changes: 1 addition & 1 deletion tests/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def create_dummy_prompt(
prompt_str = " ".join([str(t) for t in prompt_tokens])
prompt = Sequence(int(request_id), prompt_str, prompt_tokens, block_size)
seq_group = SequenceGroup(request_id, [prompt], SamplingParams(),
time.time(), None, None)
time.time(), None)

return prompt, seq_group

Expand Down
2 changes: 1 addition & 1 deletion vllm/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def __init__(
prompt: str,
prompt_token_ids: List[int],
block_size: int,
eos_token_id: int,
eos_token_id: Optional[int] = None,
lora_request: Optional[LoRARequest] = None,
) -> None:
self.seq_id = seq_id
Expand Down

0 comments on commit c9a3db5

Please sign in to comment.