From fb1c3375427cfa36e1b57da85eb1759d3796ce7e Mon Sep 17 00:00:00 2001 From: leiwen83 Date: Wed, 3 Apr 2024 02:56:26 +0800 Subject: [PATCH] Fix early CUDA init via get_architecture_class_name import (#3770) Signed-off-by: Lei Wen Co-authored-by: Lei Wen --- vllm/engine/llm_engine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 7047b23bbe27f..cd7fc5fdfcee1 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -13,7 +13,6 @@ from vllm.executor.executor_base import ExecutorBase from vllm.logger import init_logger from vllm.lora.request import LoRARequest -from vllm.model_executor.model_loader import get_architecture_class_name from vllm.outputs import RequestOutput from vllm.sampling_params import SamplingParams from vllm.sequence import (MultiModalData, SamplerOutput, Sequence, @@ -115,6 +114,8 @@ def __init__( # If usage stat is enabled, collect relevant info. if is_usage_stats_enabled(): + from vllm.model_executor.model_loader import ( + get_architecture_class_name) usage_message.report_usage( get_architecture_class_name(model_config), usage_context,