From d3c8180ac4143f4affd2ef26855058e96b72b5f5 Mon Sep 17 00:00:00 2001 From: Jack Gordley Date: Tue, 23 Apr 2024 12:06:29 +0100 Subject: [PATCH] [Bugfix] Fixing max token error message for openai compatible server (#4016) --- vllm/entrypoints/openai/serving_engine.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 610e807cae4c7..31da27a447c6c 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -206,6 +206,12 @@ def _validate_prompt_and_tokenize( token_num = len(input_ids) if request.max_tokens is None: + if token_num >= self.max_model_len: + raise ValueError( + f"This model's maximum context length is " + f"{self.max_model_len} tokens. However, you requested " + f"{token_num} tokens in the messages, " + f"Please reduce the length of the messages.", ) request.max_tokens = self.max_model_len - token_num if token_num + request.max_tokens > self.max_model_len: