Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Model][Bugfix] Implicit model flags and reenable Phi-3-Vision #5896

Merged
merged 2 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions vllm/model_executor/models/baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,6 @@ def forward(


class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"W_pack": ["W_pack"],
"gate_up_proj": [
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,6 @@ def forward(


class ChatGLMForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"query_key_value": ["query_key_value"],
"dense_h_to_4h": ["dense_h_to_4h"]
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,6 @@ def forward(


class GemmaForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/gpt_bigcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,6 @@ def forward(


class GPTBigCodeForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {"c_attn": ["c_attn"]}

supported_lora_modules = ["c_fc", "c_proj", "wte", "lm_head", "c_attn"]
Expand Down
18 changes: 16 additions & 2 deletions vllm/model_executor/models/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@
class SupportsVision(Protocol):
"""The interface required for all vision language models (VLMs)."""

supports_vision: ClassVar[Literal[True]]
supports_vision: ClassVar[Literal[True]] = True
"""
A flag that indicates this model supports vision inputs.
Note:
There is no need to redefine this flag if this class is in the
MRO of your model class.
"""

def __init__(self, *, vlm_config: VisionLanguageConfig) -> None:
...
Expand Down Expand Up @@ -52,7 +59,14 @@ def supports_vision(
class SupportsLoRA(Protocol):
"""The interface required for all models that support LoRA."""

supports_lora: ClassVar[Literal[True]]
supports_lora: ClassVar[Literal[True]] = True
"""
A flag that indicates this model supports LoRA.
Note:
There is no need to redefine this flag if this class is in the
MRO of your model class.
"""

packed_modules_mapping: ClassVar[Dict[str, List[str]]]
supported_lora_modules: ClassVar[List[str]]
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,6 @@ def forward(


class LlamaForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ class LlavaImageFeatureInputs(TypedDict):
@MULTIMODAL_REGISTRY.register_dummy_data(get_dummy_image_data)
class LlavaForConditionalGeneration(nn.Module, SupportsVision):

supports_vision = True

def __init__(self,
config: LlavaConfig,
vlm_config: VisionLanguageConfig,
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,6 @@ def _image_pixel_processor(
@MULTIMODAL_REGISTRY.register_dummy_data(_get_dummy_image_data)
class LlavaNextForConditionalGeneration(nn.Module, SupportsVision):

supports_vision = True

def __init__(self,
config: LlavaNextConfig,
vlm_config: VisionLanguageConfig,
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,6 @@ def forward(


class MiniCPMForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,6 @@ def forward(


class MixtralForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

fall_back_to_pt_during_load = False

packed_modules_mapping = {
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/phi.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,6 @@ def forward(


class PhiForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
16 changes: 10 additions & 6 deletions vllm/model_executor/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.clip import CLIPVisionModel
from vllm.model_executor.models.llama import LlamaModel
from vllm.model_executor.models.vlm_base import VisionLanguageModelBase
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.image import ImagePixelData, get_dummy_image_data
from vllm.sequence import SamplerOutput

from .interfaces import SupportsVision

logger = init_logger(__name__)

_KEYS_TO_MODIFY_MAPPING = {
Expand Down Expand Up @@ -317,18 +318,21 @@ def _image_processor(

@MULTIMODAL_REGISTRY.register_image_pixel_input(_image_processor)
@MULTIMODAL_REGISTRY.register_dummy_data(get_dummy_image_data)
class Phi3VForCausalLM(VisionLanguageModelBase):
class Phi3VForCausalLM(nn.Module, SupportsVision):

def __init__(self,
config: PretrainedConfig,
vision_language_config: VisionLanguageConfig,
vlm_config: VisionLanguageConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None) -> None:
super().__init__(vision_language_config)
super().__init__()

self.config = config
self.vlm_config = vlm_config

self.model = LlamaModel(config, cache_config, quant_config)
self.vision_embed_tokens = Phi3HDImageEmbedding(
vision_language_config, config, self.model.embed_tokens)
vlm_config, config, self.model.embed_tokens)
self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
self.logits_processor = LogitsProcessor(config.vocab_size)
self.sampler = Sampler()
Expand All @@ -338,7 +342,7 @@ def _parse_and_validate_image_input(
pixel_values = kwargs.pop("pixel_values", None)
image_sizes = kwargs.pop("image_sizes", None)

expected_input_type = self.vision_language_config.image_input_type
expected_input_type = self.vlm_config.image_input_type
ImageInputType = VisionLanguageConfig.ImageInputType

if expected_input_type != ImageInputType.PIXEL_VALUES:
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,6 @@ def forward(


class Qwen2ForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/xverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,6 @@ def forward(


class XverseForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
Loading