diff --git a/ci/L0_backend_vllm/enabled_stream/enabled_stream_test.py b/ci/L0_backend_vllm/enabled_stream/enabled_stream_test.py index 5d82333e..79452609 100644 --- a/ci/L0_backend_vllm/enabled_stream/enabled_stream_test.py +++ b/ci/L0_backend_vllm/enabled_stream/enabled_stream_test.py @@ -72,7 +72,7 @@ async def request_iterator(): result, error = response if expect_error: self.assertIsInstance(error, InferenceServerException) - self.assertEquals( + self.assertEqual( error.message(), "Error generating stream: When streaming, `exclude_input_in_output` = False is not allowed.", error, diff --git a/ci/L0_backend_vllm/test.sh b/ci/L0_backend_vllm/test.sh index a9f89894..074c0a8c 100755 --- a/ci/L0_backend_vllm/test.sh +++ b/ci/L0_backend_vllm/test.sh @@ -28,7 +28,7 @@ RET=0 SUBTESTS="accuracy_test request_cancellation enabled_stream vllm_backend metrics_test" -python3 -m pip install --upgrade pip && pip3 install tritonclient[grpc] +python3 -m pip install tritonclient[grpc] for TEST in ${SUBTESTS}; do (cd ${TEST} && bash -ex test.sh && cd ..) diff --git a/ci/L0_multi_gpu_vllm/test.sh b/ci/L0_multi_gpu_vllm/test.sh index 50e10d0b..59123f13 100755 --- a/ci/L0_multi_gpu_vllm/test.sh +++ b/ci/L0_multi_gpu_vllm/test.sh @@ -28,7 +28,7 @@ RET=0 SUBTESTS="vllm_backend multi_lora" -python3 -m pip install --upgrade pip && pip3 install tritonclient[grpc] +python3 -m pip install tritonclient[grpc] for TEST in ${SUBTESTS}; do (cd ${TEST} && bash -ex test.sh && cd ..)