Fix (intel-analytics#12390)

ACupofAir · Nov 27, 2024 · 8331875 · 8331875
1 parent cb7b089
commit 8331875
Showing 1 changed file with 2 additions and 0 deletions.
diff --git a/docker/llm/serving/xpu/docker/vllm_offline_inference.py b/docker/llm/serving/xpu/docker/vllm_offline_inference.py
@@ -54,6 +54,8 @@
           disable_async_output_proc=True,
           distributed_executor_backend="ray",
           max_model_len=2000,
+          trust_remote_code=True,
+          block_size=8,
           max_num_batched_tokens=2000)
 # Generate texts from the prompts. The output is a list of RequestOutput objects
 # that contain the prompt, generated text, and other information.