triton-inference-server · rmccorm4 · Oct 11, 2024 · Jul 31, 2024 · Aug 2, 2024 · Aug 2, 2024
diff --git a/python/openai/openai_frontend/engine/utils/triton.py b/python/openai/openai_frontend/engine/utils/triton.py
@@ -36,14 +36,34 @@ def _create_vllm_inference_request(
     model, prompt, request: CreateChatCompletionRequest | CreateCompletionRequest
 ):
     inputs = {}
-    excludes = {"model", "stream", "messages", "prompt", "echo"}
+    # Exclude non-sampling parameters so they aren't passed to vLLM
+    excludes = {
+        "model",
+        "stream",
+        "messages",
+        "prompt",
+        "echo",
+        "store",
+        "metadata",
+        "response_format",
+        "service_tier",
+        "stream_options",
+        "tools",
+        "tool_choice",
+        "parallel_tool_calls",
+        "user",
+        "function_call",
+        "functions",
+        "suffix",
+    }
 
     # NOTE: The exclude_none is important, as internals may not support
     # values of NoneType at this time.
     sampling_parameters = request.model_dump_json(
         exclude=excludes,
         exclude_none=True,
     )
+
     exclude_input_in_output = True
     echo = getattr(request, "echo", None)
     if echo is not None:

diff --git a/python/openai/openai_frontend/schemas/openai.py b/python/openai/openai_frontend/schemas/openai.py
@@ -42,6 +42,9 @@ class PromptItem(RootModel):
 
 
 class CreateCompletionRequest(BaseModel):
+    # Explicitly return errors for unknown fields.
+    model_config: ConfigDict = ConfigDict(extra="forbid")
+
     model: Union[str, Model1] = Field(
         ...,
         description="ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n",
@@ -776,6 +779,9 @@ def content(self):
 
 
 class CreateChatCompletionRequest(BaseModel):
+    # Explicitly return errors for unknown fields.
+    model_config: ConfigDict = ConfigDict(extra="forbid")
+
     messages: List[ChatCompletionRequestMessage] = Field(
         ...,
         description="A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).",