From 5c2e66e4871917c5d59cc4a8b89ef53e690e9bd9 Mon Sep 17 00:00:00 2001
From: Dylan Hawk <51147702+dylanwhawk@users.noreply.github.com>
Date: Fri, 12 Apr 2024 21:07:04 -0700
Subject: [PATCH] [Bugfix] More type hint fixes for py 3.8 (#4039)

---
 vllm/executor/executor_base.py | 2 +-
 vllm/worker/cpu_worker.py      | 4 ++--
 vllm/worker/neuron_worker.py   | 4 ++--
 vllm/worker/worker_base.py     | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py
index 55bccfa8e3ca9..bbfbfc689c99f 100644
--- a/vllm/executor/executor_base.py
+++ b/vllm/executor/executor_base.py
@@ -39,7 +39,7 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
         ExecutorBase may require modification of the result, e.g. to ensure the
         selected cache sizes are compatible with all workers.
 
-        Returns a tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
+        Returns a Tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
         are blocks that are "active" on the device and can be appended to.
         num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
         appended to.
diff --git a/vllm/worker/cpu_worker.py b/vllm/worker/cpu_worker.py
index 3989207e8dd83..41341b063bed7 100644
--- a/vllm/worker/cpu_worker.py
+++ b/vllm/worker/cpu_worker.py
@@ -1,5 +1,5 @@
 """A CPU worker class."""
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple
 
 import torch
 import torch.distributed
@@ -157,7 +157,7 @@ def init_device(self) -> None:
     def load_model(self):
         self.model_runner.load_model()
 
-    def determine_num_available_blocks(self) -> tuple[int, int]:
+    def determine_num_available_blocks(self) -> Tuple[int, int]:
         """Determine the number of blocks available for the KV cache.
 
         This determines how many KV blocks can fit into the configured CPU
diff --git a/vllm/worker/neuron_worker.py b/vllm/worker/neuron_worker.py
index 6136d50d0c068..2f22f82c045db 100644
--- a/vllm/worker/neuron_worker.py
+++ b/vllm/worker/neuron_worker.py
@@ -1,5 +1,5 @@
 """A Neuron worker class."""
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 import torch
 import torch.distributed
@@ -40,7 +40,7 @@ def init_device(self) -> None:
     def load_model(self):
         self.model_runner.load_model()
 
-    def determine_num_available_blocks(self) -> tuple[int, int]:
+    def determine_num_available_blocks(self) -> Tuple[int, int]:
         """Determine the number of available KV blocks.
 
         Swapping is not yet supported, so always return num_cpu_blocks=0.
diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py
index e3027c406ffeb..d8c9febb11584 100644
--- a/vllm/worker/worker_base.py
+++ b/vllm/worker/worker_base.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Dict, List
+from typing import Dict, List, Tuple
 
 from vllm.lora.request import LoRARequest
 from vllm.sequence import SamplerOutput, SequenceGroupMetadata
@@ -18,14 +18,14 @@ def init_device(self) -> None:
         raise NotImplementedError
 
     @abstractmethod
-    def determine_num_available_blocks(self) -> tuple[int, int]:
+    def determine_num_available_blocks(self) -> Tuple[int, int]:
         """Determine the number of available blocks for the GPU KV cache and
         swappable CPU KV cache.
 
         The implementation may run profiling or other heuristics to determine
         the size of caches.
 
-        Returns a tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
+        Returns a Tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
         are blocks that are "active" on the device and can be appended to.
         num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
         appended to.