Skip to content

Commit

Permalink
fix vllm load error
Browse files Browse the repository at this point in the history
  • Loading branch information
wuzhaoxin committed Oct 12, 2024
1 parent 0db5574 commit 4257633
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 40 deletions.
21 changes: 9 additions & 12 deletions xinference/model/llm/llm_family.json
Original file line number Diff line number Diff line change
Expand Up @@ -6909,18 +6909,15 @@
"model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
}
],
"prompt_style":{
"style_name":"QWEN",
"system_prompt":"You are a helpful assistant",
"roles":[
"user",
"assistant"
],
"stop": [
"<|im_end|>",
"<|endoftext|>"
]
}
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
"stop_token_ids": [
151645,
151643
],
"stop": [
"<|im_end|>",
"<|endoftext|>"
]
},
{
"version": 1,
Expand Down
17 changes: 9 additions & 8 deletions xinference/model/llm/llm_family_modelscope.json
Original file line number Diff line number Diff line change
Expand Up @@ -4627,14 +4627,15 @@
"model_hub": "modelscope"
}
],
"prompt_style": {
"style_name": "QWEN",
"system_prompt": "You are a helpful assistant",
"roles": [
"user",
"assistant"
]
}
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
"stop_token_ids": [
151645,
151643
],
"stop": [
"<|im_end|>",
"<|endoftext|>"
]
},
{
"version": 1,
Expand Down
52 changes: 32 additions & 20 deletions xinference/model/llm/vllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,14 +311,6 @@ def _sanitize_model_config(
model_config.setdefault("max_num_seqs", 256)
model_config.setdefault("quantization", None)
model_config.setdefault("max_model_len", None)
if vllm.__version__ >= "0.6.1":
model_config["limit_mm_per_prompt"] = (
json.loads(model_config.get("limit_mm_per_prompt")) # type: ignore
if model_config.get("limit_mm_per_prompt")
else {
"image": 2, # default 2 images all chat
}
)

return model_config

Expand Down Expand Up @@ -738,17 +730,32 @@ def match(
return False
return VLLM_INSTALLED

def load(self):
super().load()
def _sanitize_model_config(
self, model_config: Optional[VLLMModelConfig]
) -> VLLMModelConfig:
if model_config is None:
model_config = VLLMModelConfig()

self._processor = None
model_family = self.model_family.model_family or self.model_family.model_name
if "qwen2-vl" in model_family.lower():
from transformers import AutoProcessor
cuda_count = self._get_cuda_count()

self._processor = AutoProcessor.from_pretrained(
self.model_path, trust_remote_code=True
)
model_config.setdefault("tokenizer_mode", "auto")
model_config.setdefault("trust_remote_code", True)
model_config.setdefault("tensor_parallel_size", cuda_count)
model_config.setdefault("block_size", 16)
model_config.setdefault("swap_space", 4)
model_config.setdefault("gpu_memory_utilization", 0.90)
model_config.setdefault("max_num_seqs", 256)
model_config.setdefault("quantization", None)
model_config.setdefault("max_model_len", None)
model_config["limit_mm_per_prompt"] = (
json.loads(model_config.get("limit_mm_per_prompt")) # type: ignore
if model_config.get("limit_mm_per_prompt")
else {
"image": 2, # default 2 images all chat
}
)

return model_config

def _sanitize_chat_config(
self,
Expand Down Expand Up @@ -777,14 +784,19 @@ async def async_chat(
request_id: Optional[str] = None,
) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
messages = self._transform_messages(messages)
tools = generate_config.pop("tools", []) if generate_config else None

model_family = self.model_family.model_family or self.model_family.model_name

if "qwen2-vl" in model_family.lower():
if "internvl2" not in model_family.lower():
from qwen_vl_utils import process_vision_info

prompt = self._processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
full_context_kwargs = {}
if tools and model_family in QWEN_TOOL_CALL_FAMILY:
full_context_kwargs["tools"] = tools
assert self.model_family.chat_template is not None
prompt = self.get_full_context(
messages, self.model_family.chat_template, **full_context_kwargs
)
images, video_inputs = process_vision_info(messages)
if video_inputs:
Expand Down

0 comments on commit 4257633

Please sign in to comment.