Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
[mypy][6/N] Fix all the core subdirectory typing (vllm-project#4450)
Browse files Browse the repository at this point in the history
Co-authored-by: Cade Daniel <[email protected]>
  • Loading branch information
2 people authored and robertgshaw2-neuralmagic committed May 6, 2024
1 parent 91f8b48 commit 2017aaf
Show file tree
Hide file tree
Showing 10 changed files with 275 additions and 83 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/mypy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
- name: Mypy
run: |
mypy vllm/attention --config-file pyproject.toml
mypy vllm/core --config-file pyproject.toml
mypy vllm/distributed --config-file pyproject.toml
mypy vllm/entrypoints --config-file pyproject.toml
mypy vllm/executor --config-file pyproject.toml
Expand All @@ -42,9 +43,6 @@ jobs:
mypy vllm/engine --config-file pyproject.toml
mypy vllm/worker --config-file pyproject.toml
mypy vllm/spec_decode --config-file pyproject.toml
mypy vllm/lora --config-file pyproject.toml
mypy vllm/model_executor --config-file pyproject.toml
# TODO(sang): Fix nested dir
mypy vllm/core/*.py --follow-imports=skip --config-file pyproject.toml
mypy vllm/lora --config-file pyproject.toml
2 changes: 1 addition & 1 deletion format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ echo 'vLLM yapf: Done'
# Run mypy
echo 'vLLM mypy:'
mypy vllm/attention --config-file pyproject.toml
mypy vllm/core/*.py --follow-imports=skip --config-file pyproject.toml
mypy vllm/core --config-file pyproject.toml
mypy vllm/distributed --config-file pyproject.toml
mypy vllm/entrypoints --config-file pyproject.toml
mypy vllm/executor --config-file pyproject.toml
Expand Down
16 changes: 10 additions & 6 deletions vllm/core/block/block_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def __init__(
):
self._block_size = block_size
self._allocator = block_allocator
self._blocks: Optional[List[Block]] = _blocks
if _blocks is None:
_blocks = []
self._blocks: List[Block] = _blocks

# Use helper method instead of directly calculating, as blocks
# may not be allocated.
Expand Down Expand Up @@ -104,7 +106,7 @@ def append_token_ids(self,
token_ids (List[int]): The sequence of token IDs to be appended.
"""
assert self._is_allocated
assert self._blocks is not None
assert len(self._blocks) > 0

self.ensure_num_empty_slots(num_empty_slots=len(token_ids) +
num_lookahead_slots)
Expand Down Expand Up @@ -141,6 +143,7 @@ def ensure_num_empty_slots(self, num_empty_slots: int) -> None:
blocks_to_allocate = cdiv(slots_to_allocate, self._block_size)

for _ in range(blocks_to_allocate):
assert len(self._blocks) > 0
self._blocks.append(
self._allocator.allocate_mutable(prev_block=self._blocks[-1],
device=device))
Expand All @@ -159,6 +162,7 @@ def fork(self) -> "BlockTable":
the current instance.
"""
assert self._is_allocated
assert len(self._blocks) > 0
forked_blocks = self._allocator.fork(self._blocks[-1])
return BlockTable(
block_size=self._block_size,
Expand All @@ -177,10 +181,10 @@ def free(self) -> None:
assert self._is_allocated
for block in self._blocks:
self._allocator.free(block)
self._blocks = None
self._blocks = []

@property
def physical_block_ids(self) -> List[int]:
def physical_block_ids(self) -> List[Optional[int]]:
"""Returns a list of physical block indices for the blocks in the
BlockTable.
Expand Down Expand Up @@ -235,7 +239,7 @@ def _allocate_blocks_for_token_ids(self, prev_block: Optional[Block],

def _get_all_token_ids(self) -> List[int]:
# NOTE: This function is O(seq_len); use sparingly.
token_ids = []
token_ids: List[int] = []

if not self._is_allocated:
return token_ids
Expand All @@ -247,7 +251,7 @@ def _get_all_token_ids(self) -> List[int]:

@property
def _is_allocated(self) -> bool:
return self._blocks is not None
return len(self._blocks) > 0

@property
def _num_empty_slots(self) -> int:
Expand Down
20 changes: 16 additions & 4 deletions vllm/core/block/common.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
from collections import defaultdict
from typing import Dict, Iterable, List, Optional
from typing import Dict, Iterable, List, Optional, Protocol

from vllm.core.block.interfaces import Block, BlockAllocator

BlockId = int
RefCount = int


class RefCounter:
class RefCounterProtocol(Protocol):

def incr(self, block_id: BlockId) -> RefCount:
raise NotImplementedError

def decr(self, block_id: BlockId) -> RefCount:
raise NotImplementedError

def get(self, block_id: BlockId) -> RefCount:
raise NotImplementedError


class RefCounter(RefCounterProtocol):
"""A class for managing reference counts for a set of block indices.
The RefCounter class maintains a dictionary that maps block indices to their
Expand Down Expand Up @@ -54,7 +66,7 @@ def as_readonly(self) -> "ReadOnlyRefCounter":
return ReadOnlyRefCounter(self)


class ReadOnlyRefCounter:
class ReadOnlyRefCounter(RefCounterProtocol):
"""A read-only view of the RefCounter class.
The ReadOnlyRefCounter class provides a read-only interface to access the
Expand Down Expand Up @@ -96,7 +108,7 @@ class CopyOnWriteTracker:

def __init__(
self,
refcounter: RefCounter,
refcounter: RefCounterProtocol,
allocator: BlockAllocator,
):
self._copy_on_writes: Dict[BlockId, List[BlockId]] = defaultdict(list)
Expand Down
49 changes: 33 additions & 16 deletions vllm/core/block/cpu_gpu_block_allocator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, List, Optional
from typing import Dict, FrozenSet, List, Optional

from vllm.core.block.interfaces import (Block, BlockAllocator,
from vllm.core.block.interfaces import (Block, BlockAllocator, BlockId,
DeviceAwareBlockAllocator)
from vllm.core.block.naive_block import NaiveBlock, NaiveBlockAllocator
from vllm.core.block.prefix_caching_block import PrefixCachingBlockAllocator
Expand Down Expand Up @@ -57,15 +57,15 @@ def create(
cpu_block_ids = block_ids[num_gpu_blocks:]

if allocator_type == "naive":
gpu_allocator = NaiveBlockAllocator(
create_block=NaiveBlock,
gpu_allocator: BlockAllocator = NaiveBlockAllocator(
create_block=NaiveBlock, # type: ignore
num_blocks=num_gpu_blocks,
block_size=block_size,
block_ids=gpu_block_ids,
)

cpu_allocator = NaiveBlockAllocator(
create_block=NaiveBlock,
cpu_allocator: BlockAllocator = NaiveBlockAllocator(
create_block=NaiveBlock, # type: ignore
num_blocks=num_cpu_blocks,
block_size=block_size,
block_ids=cpu_block_ids,
Expand Down Expand Up @@ -105,13 +105,14 @@ def __init__(
Device.GPU: gpu_block_allocator,
}

self._block_ids_to_allocator = {}
self._block_ids_to_allocator: Dict[int, BlockAllocator] = {}
for _, allocator in self._allocators.items():
for block_id in allocator.all_block_ids:
self._block_ids_to_allocator[block_id] = allocator

def allocate_mutable(self, prev_block: Optional[Block],
device: Device) -> Block:
def allocate_mutable(self,
prev_block: Optional[Block],
device: Optional[Device] = None) -> Block:
"""Allocates a new mutable block on the specified device.
Args:
Expand All @@ -122,10 +123,13 @@ def allocate_mutable(self, prev_block: Optional[Block],
Returns:
Block: The newly allocated mutable block.
"""
assert device is not None
return self._allocators[device].allocate_mutable(prev_block)

def allocate_immutable(self, prev_block: Optional[Block],
token_ids: List[int], device: Device) -> Block:
def allocate_immutable(self,
prev_block: Optional[Block],
token_ids: List[int],
device: Optional[Device] = None) -> Block:
"""Allocates a new immutable block with the provided token IDs on the
specified device.
Expand All @@ -140,6 +144,7 @@ def allocate_immutable(self, prev_block: Optional[Block],
Block: The newly allocated immutable block containing the provided
token IDs.
"""
assert device is not None
return self._allocators[device].allocate_immutable(
prev_block, token_ids)

Expand All @@ -149,7 +154,9 @@ def free(self, block: Block) -> None:
Args:
block (Block): The block to be freed.
"""
allocator = self._block_ids_to_allocator[block.block_id]
block_id = block.block_id
assert block_id is not None
allocator = self._block_ids_to_allocator[block_id]
return allocator.free(block)

def fork(self, last_block: Block) -> List[Block]:
Expand All @@ -163,19 +170,22 @@ def fork(self, last_block: Block) -> List[Block]:
List[Block]: A new list of blocks that shares the same memory as the
original sequence.
"""
allocator = self._block_ids_to_allocator[last_block.block_id]
block_id = last_block.block_id
assert block_id is not None
allocator = self._block_ids_to_allocator[block_id]
return allocator.fork(last_block)

def get_num_free_blocks(self, device: Device) -> int:
def get_num_free_blocks(self, device: Optional[Device] = None) -> int:
"""Returns the number of free blocks available on the specified device.
Args:
device (Device): The device for which to query the number of free
blocks.
blocks. AssertionError is raised if None is passed.
Returns:
int: The number of free blocks available on the specified device.
"""
assert device is not None
return self._allocators[device].get_num_free_blocks()

def clear_copy_on_writes(self) -> Dict[int, List[int]]:
Expand Down Expand Up @@ -210,5 +220,12 @@ def get_common_computed_block_ids(
return self._allocators[device].get_common_computed_block_ids(
seq_block_ids)

def all_block_ids(self) -> frozenset[int]:
@property
def all_block_ids(self) -> FrozenSet[int]:
return frozenset(self._block_ids_to_allocator.keys())

def promote_to_immutable_block(self, block: Block) -> BlockId:
raise NotImplementedError

def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
raise NotImplementedError
Loading

0 comments on commit 2017aaf

Please sign in to comment.