diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index b7de816ce454a..6c395c74c97b2 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -1,8 +1,8 @@ import asyncio import time from functools import partial -from typing import (Any, AsyncIterator, Callable, Dict, Iterable, List, - Optional, Set, Tuple, Type, Union) +from typing import (AsyncIterator, Callable, Dict, Iterable, List, Optional, + Set, Tuple, Type, Union) from transformers import PreTrainedTokenizer @@ -327,7 +327,7 @@ def __init__(self, # We need to keep a reference to unshielded # task as well to prevent it from being garbage # collected - self._background_loop_unshielded: Optional[asyncio.Task[Any]] = None + self._background_loop_unshielded: Optional[asyncio.Task] = None self.start_engine_loop = start_engine_loop self._errored_with: Optional[BaseException] = None diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index f9e294af47253..44a946f2e32d4 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -4,7 +4,7 @@ import re from contextlib import asynccontextmanager from http import HTTPStatus -from typing import Any, Set +from typing import Set import fastapi import uvicorn @@ -34,7 +34,7 @@ openai_serving_completion: OpenAIServingCompletion logger = init_logger(__name__) -_running_tasks: Set[asyncio.Task[Any]] = set() +_running_tasks: Set[asyncio.Task] = set() @asynccontextmanager