From fd6957204102250c86d7111ef62e5a43fc695045 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 7 May 2024 11:06:32 -0700 Subject: [PATCH] [Core][Optimization] change copy-on-write from dict[int, list] to list (#4648) --- tests/core/block/test_block_table.py | 6 ++---- tests/core/test_block_manager.py | 6 +++++- tests/core/test_scheduler.py | 4 ++-- vllm/core/block/common.py | 21 ++++++++++----------- vllm/core/block/cpu_gpu_block_allocator.py | 8 ++++---- vllm/core/block/interfaces.py | 6 +++--- vllm/core/block/naive_block.py | 8 ++++---- vllm/core/block/prefix_caching_block.py | 8 ++++---- vllm/core/block_manager_v1.py | 10 +++++----- vllm/core/block_manager_v2.py | 3 ++- vllm/core/interfaces.py | 3 ++- vllm/core/scheduler.py | 5 +---- 12 files changed, 44 insertions(+), 44 deletions(-) diff --git a/tests/core/block/test_block_table.py b/tests/core/block/test_block_table.py index 3481d6b4312c1..6fb95cfdfab81 100644 --- a/tests/core/block/test_block_table.py +++ b/tests/core/block/test_block_table.py @@ -410,8 +410,7 @@ def test_cow(block_size: int, sequence_len: int, append_len: int, expected_src = static_block_table.physical_block_ids[cow_block_id] expected_dst = appender_block_table.physical_block_ids[cow_block_id] - assert expected_src in cows - assert expected_dst in cows[expected_src] + assert (expected_src, expected_dst) in cows else: # Otherwise, there should be no copy-on-write. assert not cows @@ -490,8 +489,7 @@ def test_cow_lookahead_simple(block_size: int, sequence_len: int, expected_src = static_block_table.physical_block_ids[cow_block_id] expected_dst = appender_block_table.physical_block_ids[cow_block_id] - assert expected_src in cows - assert expected_dst in cows[expected_src] + assert (expected_src, expected_dst) in cows static_block_table.free() appender_block_table.free() diff --git a/tests/core/test_block_manager.py b/tests/core/test_block_manager.py index 9f9a6180add78..08d34efb8302c 100644 --- a/tests/core/test_block_manager.py +++ b/tests/core/test_block_manager.py @@ -1,4 +1,5 @@ import time +from collections import defaultdict from typing import List import pytest @@ -155,7 +156,10 @@ def test_append_slot_cow(): cows = block_manager.append_slots(child) assert cows - for src_block, dst_blocks in cows.items(): + dict_cows = defaultdict(list) + for src_block, dst_block in cows: + dict_cows[src_block].append(dst_block) + for src_block, dst_blocks in dict_cows.items(): assert src_block not in dst_blocks after_blocks = block_manager.get_num_free_gpu_blocks() diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py index 348169035ae97..3f0c918a89abb 100644 --- a/tests/core/test_scheduler.py +++ b/tests/core/test_scheduler.py @@ -636,7 +636,7 @@ def test_schedule_decode_blocks_to_copy_update(): # The last request should be swapped out. scheduler.block_manager.append_slots = MagicMock() - scheduler.block_manager.append_slots.return_value = {2: [3]} + scheduler.block_manager.append_slots.return_value = [(2, 3)] budget = create_token_budget() remaining_running, output = scheduler._schedule_running( @@ -845,7 +845,7 @@ def test_schedule_swapped_blocks_to_copy(): # The last request should be swapped out. scheduler.block_manager.append_slots = MagicMock() - scheduler.block_manager.append_slots.return_value = {2: [3]} + scheduler.block_manager.append_slots.return_value = [(2, 3)] budget = create_token_budget() remaining_swapped, output = scheduler._schedule_swapped( diff --git a/vllm/core/block/common.py b/vllm/core/block/common.py index 3f97a1210b096..4d7a12165cb01 100644 --- a/vllm/core/block/common.py +++ b/vllm/core/block/common.py @@ -1,5 +1,4 @@ -from collections import defaultdict -from typing import Dict, Iterable, List, Optional, Protocol +from typing import Dict, Iterable, List, Optional, Protocol, Tuple from vllm.core.block.interfaces import Block, BlockAllocator @@ -111,7 +110,7 @@ def __init__( refcounter: RefCounterProtocol, allocator: BlockAllocator, ): - self._copy_on_writes: Dict[BlockId, List[BlockId]] = defaultdict(list) + self._copy_on_writes: List[Tuple[BlockId, BlockId]] = [] self._refcounter = refcounter self._allocator = allocator @@ -152,25 +151,25 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]: # Track src/dst copy. assert src_block_id is not None assert block_id is not None - self._copy_on_writes[src_block_id].append(block_id) + self._copy_on_writes.append((src_block_id, block_id)) return block_id - def clear_cows(self) -> Dict[BlockId, List[BlockId]]: + def clear_cows(self) -> List[Tuple[BlockId, BlockId]]: """Clears the copy-on-write tracking information and returns the current state. - This method returns a dictionary mapping source block indices to lists - of destination block indices for the current copy-on-write operations. + This method returns a list mapping source block indices to + destination block indices for the current copy-on-write operations. It then clears the internal tracking information. Returns: - Dict[BlockId, List[BlockId]]: A dictionary mapping source - block indices to lists of destination block indices for the + List[Tuple[BlockId, BlockId]]: A list mapping source + block indices to destination block indices for the current copy-on-write operations. """ - cows = dict(self._copy_on_writes) - self._copy_on_writes.clear() + cows = self._copy_on_writes + self._copy_on_writes = [] return cows diff --git a/vllm/core/block/cpu_gpu_block_allocator.py b/vllm/core/block/cpu_gpu_block_allocator.py index 5b25e1bcdada0..0577ca76ea971 100644 --- a/vllm/core/block/cpu_gpu_block_allocator.py +++ b/vllm/core/block/cpu_gpu_block_allocator.py @@ -1,4 +1,4 @@ -from typing import Dict, FrozenSet, List, Optional +from typing import Dict, FrozenSet, List, Optional, Tuple from vllm.core.block.interfaces import (Block, BlockAllocator, BlockId, DeviceAwareBlockAllocator) @@ -185,13 +185,13 @@ def get_num_free_blocks(self, device: Device) -> int: def get_num_total_blocks(self, device: Device) -> int: return self._allocators[device].get_num_total_blocks() - def clear_copy_on_writes(self) -> Dict[int, List[int]]: + def clear_copy_on_writes(self) -> List[Tuple[int, int]]: """Clears the copy-on-write (CoW) state and returns the mapping of source to destination block IDs. Returns: - Dict[int, List[int]]: A dictionary mapping source block IDs to lists - of destination block IDs. + List[Tuple[int, int]]: A list mapping source block IDs to + destination block IDs. """ # CoW only supported on GPU device = Device.GPU diff --git a/vllm/core/block/interfaces.py b/vllm/core/block/interfaces.py index 634c4016ca19c..140fbbb0949cc 100644 --- a/vllm/core/block/interfaces.py +++ b/vllm/core/block/interfaces.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Dict, FrozenSet, List, Optional, Protocol +from typing import FrozenSet, List, Optional, Protocol, Tuple from vllm.utils import Device @@ -122,7 +122,7 @@ def all_block_ids(self) -> FrozenSet[int]: pass @abstractmethod - def clear_copy_on_writes(self) -> Dict[int, List[int]]: + def clear_copy_on_writes(self) -> List[Tuple[int, int]]: pass @abstractmethod @@ -187,7 +187,7 @@ def all_block_ids(self) -> FrozenSet[int]: pass @abstractmethod - def clear_copy_on_writes(self) -> Dict[int, List[int]]: + def clear_copy_on_writes(self) -> List[Tuple[int, int]]: pass @abstractmethod diff --git a/vllm/core/block/naive_block.py b/vllm/core/block/naive_block.py index a1b901bf78efc..ae01930878254 100644 --- a/vllm/core/block/naive_block.py +++ b/vllm/core/block/naive_block.py @@ -1,4 +1,4 @@ -from typing import Dict, FrozenSet, Iterable, List, Optional, Set +from typing import FrozenSet, Iterable, List, Optional, Set, Tuple from vllm.core.block.common import (CopyOnWriteTracker, RefCounter, get_all_blocks_recursively) @@ -175,12 +175,12 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]: """ return self._cow_tracker.cow_block_if_not_appendable(block) - def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]: + def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]: """Returns the copy-on-write source->destination mapping and clears it. Returns: - Dict[BlockId, List[BlockId]]: A dictionary mapping source - block indices to lists of destination block indices. + List[Tuple[BlockId, BlockId]]: A list mapping source + block indices to destination block indices. """ return self._cow_tracker.clear_cows() diff --git a/vllm/core/block/prefix_caching_block.py b/vllm/core/block/prefix_caching_block.py index 4a37e8f87c379..882f301c1f697 100644 --- a/vllm/core/block/prefix_caching_block.py +++ b/vllm/core/block/prefix_caching_block.py @@ -1,7 +1,7 @@ """Token blocks.""" from itertools import takewhile from os.path import commonprefix -from typing import Dict, FrozenSet, Iterable, List, Optional +from typing import Dict, FrozenSet, Iterable, List, Optional, Tuple from vllm.core.block.common import (CopyOnWriteTracker, get_all_blocks_recursively) @@ -337,12 +337,12 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]: """ return self._cow_tracker.cow_block_if_not_appendable(block) - def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]: + def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]: """Returns the copy-on-write source->destination mapping and clears it. Returns: - Dict[BlockId, List[BlockId]]: A dictionary mapping source - block indices to lists of destination block indices. + List[Tuple[BlockId, BlockId]]: A list mapping source + block indices to destination block indices. """ return self._cow_tracker.clear_cows() diff --git a/vllm/core/block_manager_v1.py b/vllm/core/block_manager_v1.py index 268c5c135d887..4e7392f3486c9 100644 --- a/vllm/core/block_manager_v1.py +++ b/vllm/core/block_manager_v1.py @@ -5,7 +5,7 @@ from os.path import commonprefix from typing import Dict, List, Optional from typing import Sequence as GenericSequence -from typing import Set +from typing import Set, Tuple from vllm.block import BlockTable, PhysicalTokenBlock from vllm.core.evictor_v1 import EvictionPolicy, Evictor, make_evictor @@ -386,7 +386,7 @@ def append_slots( self, seq: Sequence, num_lookahead_slots: int = 0, - ) -> Dict[int, List[int]]: + ) -> List[Tuple[int, int]]: """Allocate a physical slot for a new token.""" logical_blocks = seq.logical_token_blocks block_table = self.block_tables[seq.seq_id] @@ -405,7 +405,7 @@ def append_slots( # Allocate a new physical block. new_block = self._allocate_last_physical_block(seq) block_table.append(new_block) - return {} + return [] # We want to append the token to the last physical block. last_block = block_table[-1] @@ -418,7 +418,7 @@ def append_slots( maybe_new_block = self._maybe_promote_last_block( seq, last_block) block_table[-1] = maybe_new_block - return {} + return [] else: # The last block is shared with other sequences. # Copy on Write: Allocate a new block and copy the tokens. @@ -426,7 +426,7 @@ def append_slots( block_table[-1] = new_block self.gpu_allocator.free(last_block) - return {last_block.block_number: [new_block.block_number]} + return [(last_block.block_number, new_block.block_number)] def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None: # NOTE: fork does not allocate a new physical block. diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py index ce90ce2f17278..3b483e67ad9c1 100644 --- a/vllm/core/block_manager_v2.py +++ b/vllm/core/block_manager_v2.py @@ -1,6 +1,7 @@ """A block manager that manages token blocks.""" from typing import Dict, List, Optional from typing import Sequence as GenericSequence +from typing import Tuple from vllm.core.block.block_table import BlockTable from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator @@ -166,7 +167,7 @@ def append_slots( self, seq: Sequence, num_lookahead_slots: int, - ) -> Dict[int, List[int]]: + ) -> List[Tuple[int, int]]: block_table = self.block_tables[seq.seq_id] diff --git a/vllm/core/interfaces.py b/vllm/core/interfaces.py index 09ccaddb62615..ab2c8ea0053dd 100644 --- a/vllm/core/interfaces.py +++ b/vllm/core/interfaces.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod from typing import Dict, List from typing import Sequence as GenericSequence +from typing import Tuple from vllm.sequence import Sequence, SequenceGroup @@ -54,7 +55,7 @@ def append_slots( self, seq: Sequence, num_lookahead_slots: int, - ) -> Dict[int, List[int]]: + ) -> List[Tuple[int, int]]: pass @abstractmethod diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index de3ecd24e52db..f426ee95c0ca2 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -1027,10 +1027,7 @@ def _append_slots( for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING): cows = self.block_manager.append_slots(seq, num_lookahead_slots) - - for src, dests in cows.items(): - for dest in dests: - blocks_to_copy.append((src, dest)) + blocks_to_copy.extend(cows) def _preempt( self,