opendatahub-io · tjohnson31415 · Nov 15, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -32,7 +32,7 @@ jobs:
         pyv: ["3.12"]
         vllm_version:
           # - "" # skip the pypi version as it will not work on CPU
-          - "git+https://github.com/vllm-project/[email protected].2"
+          - "git+https://github.com/vllm-project/[email protected].4"
           - "git+https://github.com/vllm-project/vllm@main"
           - "git+https://github.com/opendatahub-io/vllm@main"
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 requires-python = ">=3.9"
 dynamic = ["version"]
 dependencies = [
-  "vllm>=0.6.2",
+  "vllm>=0.6.4",
   "prometheus_client==0.21.0",
   "grpcio==1.67.0",
   "grpcio-health-checking==1.62.2",

diff --git a/src/vllm_tgis_adapter/grpc/grpc_server.py b/src/vllm_tgis_adapter/grpc/grpc_server.py
@@ -16,7 +16,7 @@
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.engine.multiprocessing import MQEngineDeadError
 from vllm.entrypoints.openai.serving_completion import merge_async_iterators
-from vllm.inputs import LLMInputs
+from vllm.inputs import token_inputs
 from vllm.sampling_params import RequestOutputKind, SamplingParams
 from vllm.tracing import (
     contains_trace_headers,
@@ -257,7 +257,7 @@ async def Generate(
                 sampling_params, truncate_input_tokens, req.text, tokenizer, context
             )
 
-            inputs = LLMInputs(
+            inputs = token_inputs(
                 prompt=req.text,
                 prompt_token_ids=input_ids,
             )
@@ -356,7 +356,7 @@ async def GenerateStream(  # noqa: PLR0915, C901
             context,
         )
 
-        inputs = LLMInputs(
+        inputs = token_inputs(
             prompt=request.request.text,
             prompt_token_ids=input_ids,
         )