Skip to content

Commit

Permalink
fix python 3.8 syntax (vllm-project#2716)
Browse files Browse the repository at this point in the history
  • Loading branch information
simon-mo authored Feb 1, 2024
1 parent 923797f commit b9e96b1
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
15 changes: 14 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,21 @@
#################### BASE BUILD IMAGE ####################
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev

# Set the DEBIAN_FRONTEND variable to noninteractive to avoid interactive prompts
ENV DEBIAN_FRONTEND=noninteractive

# Preconfigure tzdata for US Central Time (build running in us-central-1 but this really doesn't matter.)
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Chicago' | debconf-set-selections

# We install an older version of python here for testing to make sure vllm works with older versions of Python.
# For the actual openai compatible server, we will use the latest version of Python.
RUN apt-get update -y \
&& apt-get install -y python3-pip git
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa -y \
&& apt-get update -y \
&& apt-get install -y python3.8 python3.8-dev python3.8-venv python3-pip git \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
Expand Down
14 changes: 7 additions & 7 deletions vllm/entrypoints/openai/serving_completion.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
import time
from fastapi import Request
from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional
from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional, Dict, Tuple
from vllm.logger import init_logger
from vllm.utils import random_uuid
from vllm.engine.async_llm_engine import AsyncLLMEngine
Expand All @@ -19,8 +19,8 @@

logger = init_logger(__name__)

TypeTokenIDs = list[int]
TypeTopLogProbs = List[Optional[dict[int, float]]]
TypeTokenIDs = List[int]
TypeTopLogProbs = List[Optional[Dict[int, float]]]
TypeCreateLogProbsFn = Callable[
[TypeTokenIDs, TypeTopLogProbs, Optional[int], int], LogProbs]

Expand All @@ -29,7 +29,7 @@ async def completion_stream_generator(
request: CompletionRequest,
raw_request: Request,
on_abort,
result_generator: AsyncIterator[tuple[int, RequestOutput]],
result_generator: AsyncIterator[Tuple[int, RequestOutput]],
create_logprobs_fn: TypeCreateLogProbsFn,
request_id: str,
created_time: int,
Expand Down Expand Up @@ -126,7 +126,7 @@ async def completion_stream_generator(
yield "data: [DONE]\n\n"


def parse_prompt_format(prompt) -> tuple[bool, list]:
def parse_prompt_format(prompt) -> Tuple[bool, list]:
# get the prompt, openai supports the following
# "a string, array of strings, array of tokens, or array of token arrays."
prompt_is_tokens = False
Expand All @@ -151,7 +151,7 @@ def parse_prompt_format(prompt) -> tuple[bool, list]:


def request_output_to_completion_response(
final_res_batch: list[RequestOutput],
final_res_batch: List[RequestOutput],
request: CompletionRequest,
create_logprobs_fn: TypeCreateLogProbsFn,
request_id: str,
Expand Down Expand Up @@ -302,7 +302,7 @@ async def create_completion(self, request: CompletionRequest,
except ValueError as e:
return self.create_error_response(str(e))

result_generator: AsyncIterator[tuple[
result_generator: AsyncIterator[Tuple[
int, RequestOutput]] = merge_async_iterators(*generators)

# Similar to the OpenAI API, when n != best_of, we do not stream the
Expand Down

0 comments on commit b9e96b1

Please sign in to comment.