Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
[Core][Optimization] change copy-on-write from dict[int, list] to list (
Browse files Browse the repository at this point in the history
  • Loading branch information
youkaichao authored and robertgshaw2-neuralmagic committed May 19, 2024
1 parent e4ab5c6 commit fd69572
Show file tree
Hide file tree
Showing 12 changed files with 44 additions and 44 deletions.
6 changes: 2 additions & 4 deletions tests/core/block/test_block_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,8 +410,7 @@ def test_cow(block_size: int, sequence_len: int, append_len: int,
expected_src = static_block_table.physical_block_ids[cow_block_id]
expected_dst = appender_block_table.physical_block_ids[cow_block_id]

assert expected_src in cows
assert expected_dst in cows[expected_src]
assert (expected_src, expected_dst) in cows
else:
# Otherwise, there should be no copy-on-write.
assert not cows
Expand Down Expand Up @@ -490,8 +489,7 @@ def test_cow_lookahead_simple(block_size: int, sequence_len: int,
expected_src = static_block_table.physical_block_ids[cow_block_id]
expected_dst = appender_block_table.physical_block_ids[cow_block_id]

assert expected_src in cows
assert expected_dst in cows[expected_src]
assert (expected_src, expected_dst) in cows

static_block_table.free()
appender_block_table.free()
Expand Down
6 changes: 5 additions & 1 deletion tests/core/test_block_manager.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
from collections import defaultdict
from typing import List

import pytest
Expand Down Expand Up @@ -155,7 +156,10 @@ def test_append_slot_cow():

cows = block_manager.append_slots(child)
assert cows
for src_block, dst_blocks in cows.items():
dict_cows = defaultdict(list)
for src_block, dst_block in cows:
dict_cows[src_block].append(dst_block)
for src_block, dst_blocks in dict_cows.items():
assert src_block not in dst_blocks

after_blocks = block_manager.get_num_free_gpu_blocks()
Expand Down
4 changes: 2 additions & 2 deletions tests/core/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def test_schedule_decode_blocks_to_copy_update():

# The last request should be swapped out.
scheduler.block_manager.append_slots = MagicMock()
scheduler.block_manager.append_slots.return_value = {2: [3]}
scheduler.block_manager.append_slots.return_value = [(2, 3)]

budget = create_token_budget()
remaining_running, output = scheduler._schedule_running(
Expand Down Expand Up @@ -845,7 +845,7 @@ def test_schedule_swapped_blocks_to_copy():

# The last request should be swapped out.
scheduler.block_manager.append_slots = MagicMock()
scheduler.block_manager.append_slots.return_value = {2: [3]}
scheduler.block_manager.append_slots.return_value = [(2, 3)]

budget = create_token_budget()
remaining_swapped, output = scheduler._schedule_swapped(
Expand Down
21 changes: 10 additions & 11 deletions vllm/core/block/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from collections import defaultdict
from typing import Dict, Iterable, List, Optional, Protocol
from typing import Dict, Iterable, List, Optional, Protocol, Tuple

from vllm.core.block.interfaces import Block, BlockAllocator

Expand Down Expand Up @@ -111,7 +110,7 @@ def __init__(
refcounter: RefCounterProtocol,
allocator: BlockAllocator,
):
self._copy_on_writes: Dict[BlockId, List[BlockId]] = defaultdict(list)
self._copy_on_writes: List[Tuple[BlockId, BlockId]] = []
self._refcounter = refcounter
self._allocator = allocator

Expand Down Expand Up @@ -152,25 +151,25 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
# Track src/dst copy.
assert src_block_id is not None
assert block_id is not None
self._copy_on_writes[src_block_id].append(block_id)
self._copy_on_writes.append((src_block_id, block_id))

return block_id

def clear_cows(self) -> Dict[BlockId, List[BlockId]]:
def clear_cows(self) -> List[Tuple[BlockId, BlockId]]:
"""Clears the copy-on-write tracking information and returns the current
state.
This method returns a dictionary mapping source block indices to lists
of destination block indices for the current copy-on-write operations.
This method returns a list mapping source block indices to
destination block indices for the current copy-on-write operations.
It then clears the internal tracking information.
Returns:
Dict[BlockId, List[BlockId]]: A dictionary mapping source
block indices to lists of destination block indices for the
List[Tuple[BlockId, BlockId]]: A list mapping source
block indices to destination block indices for the
current copy-on-write operations.
"""
cows = dict(self._copy_on_writes)
self._copy_on_writes.clear()
cows = self._copy_on_writes
self._copy_on_writes = []
return cows


Expand Down
8 changes: 4 additions & 4 deletions vllm/core/block/cpu_gpu_block_allocator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, FrozenSet, List, Optional
from typing import Dict, FrozenSet, List, Optional, Tuple

from vllm.core.block.interfaces import (Block, BlockAllocator, BlockId,
DeviceAwareBlockAllocator)
Expand Down Expand Up @@ -185,13 +185,13 @@ def get_num_free_blocks(self, device: Device) -> int:
def get_num_total_blocks(self, device: Device) -> int:
return self._allocators[device].get_num_total_blocks()

def clear_copy_on_writes(self) -> Dict[int, List[int]]:
def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
"""Clears the copy-on-write (CoW) state and returns the mapping of
source to destination block IDs.
Returns:
Dict[int, List[int]]: A dictionary mapping source block IDs to lists
of destination block IDs.
List[Tuple[int, int]]: A list mapping source block IDs to
destination block IDs.
"""
# CoW only supported on GPU
device = Device.GPU
Expand Down
6 changes: 3 additions & 3 deletions vllm/core/block/interfaces.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Dict, FrozenSet, List, Optional, Protocol
from typing import FrozenSet, List, Optional, Protocol, Tuple

from vllm.utils import Device

Expand Down Expand Up @@ -122,7 +122,7 @@ def all_block_ids(self) -> FrozenSet[int]:
pass

@abstractmethod
def clear_copy_on_writes(self) -> Dict[int, List[int]]:
def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
pass

@abstractmethod
Expand Down Expand Up @@ -187,7 +187,7 @@ def all_block_ids(self) -> FrozenSet[int]:
pass

@abstractmethod
def clear_copy_on_writes(self) -> Dict[int, List[int]]:
def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
pass

@abstractmethod
Expand Down
8 changes: 4 additions & 4 deletions vllm/core/block/naive_block.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, FrozenSet, Iterable, List, Optional, Set
from typing import FrozenSet, Iterable, List, Optional, Set, Tuple

from vllm.core.block.common import (CopyOnWriteTracker, RefCounter,
get_all_blocks_recursively)
Expand Down Expand Up @@ -175,12 +175,12 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
"""
return self._cow_tracker.cow_block_if_not_appendable(block)

def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
"""Returns the copy-on-write source->destination mapping and clears it.
Returns:
Dict[BlockId, List[BlockId]]: A dictionary mapping source
block indices to lists of destination block indices.
List[Tuple[BlockId, BlockId]]: A list mapping source
block indices to destination block indices.
"""
return self._cow_tracker.clear_cows()

Expand Down
8 changes: 4 additions & 4 deletions vllm/core/block/prefix_caching_block.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Token blocks."""
from itertools import takewhile
from os.path import commonprefix
from typing import Dict, FrozenSet, Iterable, List, Optional
from typing import Dict, FrozenSet, Iterable, List, Optional, Tuple

from vllm.core.block.common import (CopyOnWriteTracker,
get_all_blocks_recursively)
Expand Down Expand Up @@ -337,12 +337,12 @@ def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
"""
return self._cow_tracker.cow_block_if_not_appendable(block)

def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
"""Returns the copy-on-write source->destination mapping and clears it.
Returns:
Dict[BlockId, List[BlockId]]: A dictionary mapping source
block indices to lists of destination block indices.
List[Tuple[BlockId, BlockId]]: A list mapping source
block indices to destination block indices.
"""
return self._cow_tracker.clear_cows()

Expand Down
10 changes: 5 additions & 5 deletions vllm/core/block_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from os.path import commonprefix
from typing import Dict, List, Optional
from typing import Sequence as GenericSequence
from typing import Set
from typing import Set, Tuple

from vllm.block import BlockTable, PhysicalTokenBlock
from vllm.core.evictor_v1 import EvictionPolicy, Evictor, make_evictor
Expand Down Expand Up @@ -386,7 +386,7 @@ def append_slots(
self,
seq: Sequence,
num_lookahead_slots: int = 0,
) -> Dict[int, List[int]]:
) -> List[Tuple[int, int]]:
"""Allocate a physical slot for a new token."""
logical_blocks = seq.logical_token_blocks
block_table = self.block_tables[seq.seq_id]
Expand All @@ -405,7 +405,7 @@ def append_slots(
# Allocate a new physical block.
new_block = self._allocate_last_physical_block(seq)
block_table.append(new_block)
return {}
return []

# We want to append the token to the last physical block.
last_block = block_table[-1]
Expand All @@ -418,15 +418,15 @@ def append_slots(
maybe_new_block = self._maybe_promote_last_block(
seq, last_block)
block_table[-1] = maybe_new_block
return {}
return []
else:
# The last block is shared with other sequences.
# Copy on Write: Allocate a new block and copy the tokens.
new_block = self._allocate_last_physical_block(seq)

block_table[-1] = new_block
self.gpu_allocator.free(last_block)
return {last_block.block_number: [new_block.block_number]}
return [(last_block.block_number, new_block.block_number)]

def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
# NOTE: fork does not allocate a new physical block.
Expand Down
3 changes: 2 additions & 1 deletion vllm/core/block_manager_v2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""A block manager that manages token blocks."""
from typing import Dict, List, Optional
from typing import Sequence as GenericSequence
from typing import Tuple

from vllm.core.block.block_table import BlockTable
from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator
Expand Down Expand Up @@ -166,7 +167,7 @@ def append_slots(
self,
seq: Sequence,
num_lookahead_slots: int,
) -> Dict[int, List[int]]:
) -> List[Tuple[int, int]]:

block_table = self.block_tables[seq.seq_id]

Expand Down
3 changes: 2 additions & 1 deletion vllm/core/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from abc import ABC, abstractmethod
from typing import Dict, List
from typing import Sequence as GenericSequence
from typing import Tuple

from vllm.sequence import Sequence, SequenceGroup

Expand Down Expand Up @@ -54,7 +55,7 @@ def append_slots(
self,
seq: Sequence,
num_lookahead_slots: int,
) -> Dict[int, List[int]]:
) -> List[Tuple[int, int]]:
pass

@abstractmethod
Expand Down
5 changes: 1 addition & 4 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,10 +1027,7 @@ def _append_slots(

for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):
cows = self.block_manager.append_slots(seq, num_lookahead_slots)

for src, dests in cows.items():
for dest in dests:
blocks_to_copy.append((src, dest))
blocks_to_copy.extend(cows)

def _preempt(
self,
Expand Down

0 comments on commit fd69572

Please sign in to comment.