Skip to content

Commit

Permalink
Bump transformers version for Llama 3.1 hotfix and patch Chameleon (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ywang96 authored Jul 23, 2024
1 parent 507ef78 commit 1bedf21
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 178 deletions.
2 changes: 1 addition & 1 deletion requirements-common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ numpy < 2.0.0
requests
tqdm
py-cpuinfo
transformers >= 4.42.4 # Required for Gemma 2 and for additional chat template parameters.
transformers >= 4.43.1 # Required for Chameleon and Llama 3.1 hotfox.
tokenizers >= 0.19.1 # Required for Llama 3.
fastapi
aiohttp
Expand Down
53 changes: 27 additions & 26 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ def test_get_sliding_window():


def test_rope_customization():
TEST_ROPE_SCALING = {"type": "dynamic", "factor": 2.0}
TEST_ROPE_SCALING = {"rope_type": "dynamic", "factor": 2.0}
TEST_ROPE_THETA = 16_000_000.0
LONGCHAT_ROPE_SCALING = {"type": "linear", "factor": 8.0}

llama_model_config = ModelConfig(
"meta-llama/Meta-Llama-3-8B-Instruct",
Expand Down Expand Up @@ -96,27 +95,29 @@ def test_rope_customization():
None) == TEST_ROPE_THETA
assert llama_model_config.max_model_len == 16384

longchat_model_config = ModelConfig(
"lmsys/longchat-13b-16k",
"lmsys/longchat-13b-16k",
tokenizer_mode="auto",
trust_remote_code=False,
dtype="float16",
seed=0,
)
assert getattr(longchat_model_config.hf_config, "rope_scaling",
None) == LONGCHAT_ROPE_SCALING
assert longchat_model_config.max_model_len == 16384

longchat_model_config = ModelConfig(
"lmsys/longchat-13b-16k",
"lmsys/longchat-13b-16k",
tokenizer_mode="auto",
trust_remote_code=False,
dtype="float16",
seed=0,
rope_scaling=TEST_ROPE_SCALING,
)
assert getattr(longchat_model_config.hf_config, "rope_scaling",
None) == TEST_ROPE_SCALING
assert longchat_model_config.max_model_len == 4096
# TODO: add these back when the rope configs are fixed
# LONGCHAT_ROPE_SCALING = {"rope_type": "linear", "factor": 8.0}
# longchat_model_config = ModelConfig(
# "lmsys/longchat-13b-16k",
# "lmsys/longchat-13b-16k",
# tokenizer_mode="auto",
# trust_remote_code=False,
# dtype="float16",
# seed=0,
# )
# assert getattr(longchat_model_config.hf_config, "rope_scaling",
# None) == LONGCHAT_ROPE_SCALING
# assert longchat_model_config.max_model_len == 16384

# longchat_model_config = ModelConfig(
# "lmsys/longchat-13b-16k",
# "lmsys/longchat-13b-16k",
# tokenizer_mode="auto",
# trust_remote_code=False,
# dtype="float16",
# seed=0,
# rope_scaling=TEST_ROPE_SCALING,
# )
# assert getattr(longchat_model_config.hf_config, "rope_scaling",
# None) == TEST_ROPE_SCALING
# assert longchat_model_config.max_model_len == 4096
2 changes: 0 additions & 2 deletions vllm/model_executor/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
"BaiChuanForCausalLM": ("baichuan", "BaiChuanForCausalLM"), # baichuan-7b
"BaichuanForCausalLM": ("baichuan", "BaichuanForCausalLM"), # baichuan-13b
"BloomForCausalLM": ("bloom", "BloomForCausalLM"),
#TODO(ywang96): remove this when huggingface fixes the model repo
"ChameleonForCausalLM": ("chameleon", "ChameleonForConditionalGeneration"),
"ChameleonForConditionalGeneration":
("chameleon", "ChameleonForConditionalGeneration"),
"ChatGLMModel": ("chatglm", "ChatGLMForCausalLM"),
Expand Down
3 changes: 1 addition & 2 deletions vllm/model_executor/models/chameleon.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import torch.nn.functional as F
from PIL import Image
from torch import nn
from transformers import ChameleonConfig, ChameleonVQVAEConfig

from vllm.attention import Attention, AttentionMetadata
from vllm.config import CacheConfig, MultiModalConfig
Expand All @@ -30,8 +31,6 @@
from vllm.multimodal.image import (cached_get_tokenizer,
repeat_and_pad_image_tokens)
from vllm.sequence import IntermediateTensors, SamplerOutput, SequenceData
from vllm.transformers_utils.configs import (ChameleonConfig,
ChameleonVQVAEConfig)
from vllm.utils import print_warning_once

from .interfaces import SupportsVision
Expand Down
9 changes: 4 additions & 5 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

from vllm.envs import VLLM_USE_MODELSCOPE
from vllm.logger import init_logger
from vllm.transformers_utils.configs import (ChameleonConfig, ChatGLMConfig,
DbrxConfig, JAISConfig,
MedusaConfig, MLPSpeculatorConfig,
MPTConfig, RWConfig)
from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
JAISConfig, MedusaConfig,
MLPSpeculatorConfig, MPTConfig,
RWConfig)

if VLLM_USE_MODELSCOPE:
from modelscope import AutoConfig
Expand All @@ -18,7 +18,6 @@
logger = init_logger(__name__)

_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
"chameleon": ChameleonConfig,
"chatglm": ChatGLMConfig,
"dbrx": DbrxConfig,
"mpt": MPTConfig,
Expand Down
4 changes: 0 additions & 4 deletions vllm/transformers_utils/configs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from vllm.transformers_utils.configs.chameleon import (ChameleonConfig,
ChameleonVQVAEConfig)
from vllm.transformers_utils.configs.chatglm import ChatGLMConfig
from vllm.transformers_utils.configs.dbrx import DbrxConfig
# RWConfig is for the original tiiuae/falcon-40b(-instruct) and
Expand All @@ -12,8 +10,6 @@
from vllm.transformers_utils.configs.mpt import MPTConfig

__all__ = [
"ChameleonConfig",
"ChameleonVQVAEConfig",
"ChatGLMConfig",
"DbrxConfig",
"MPTConfig",
Expand Down
138 changes: 0 additions & 138 deletions vllm/transformers_utils/configs/chameleon.py

This file was deleted.

0 comments on commit 1bedf21

Please sign in to comment.