diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index a196f8b1e574e..9b29ca66022cb 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -62,6 +62,32 @@ completion = client.chat.completions.create( ) ``` +### Extra HTTP Headers + +Only `X-Request-Id` HTTP request header is supported for now. + +```python +completion = client.chat.completions.create( + model="NousResearch/Meta-Llama-3-8B-Instruct", + messages=[ + {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"} + ], + extra_headers={ + "x-request-id": "sentiment-classification-00001", + } +) +print(completion._request_id) + +completion = client.completions.create( + model="NousResearch/Meta-Llama-3-8B-Instruct", + prompt="A robot may not injure a human being", + extra_headers={ + "x-request-id": "completion-test", + } +) +print(completion._request_id) +``` + ### Extra Parameters for Completions API The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported. diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 917b347ff1161..b8b7912742d45 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -7,6 +7,7 @@ import signal import socket import tempfile +import uuid from argparse import Namespace from contextlib import asynccontextmanager from functools import partial @@ -475,6 +476,13 @@ async def authentication(request: Request, call_next): status_code=401) return await call_next(request) + @app.middleware("http") + async def add_request_id(request: Request, call_next): + request_id = request.headers.get("X-Request-Id") or uuid.uuid4().hex + response = await call_next(request) + response.headers["X-Request-Id"] = request_id + return response + for middleware in args.middleware: module_path, object_name = middleware.rsplit(".", 1) imported = getattr(importlib.import_module(module_path), object_name)