Skip to content

Commit

Permalink
🧼 remove v4.44 deprecations (#34245)
Browse files Browse the repository at this point in the history
* remove v4.44 deprecations

* PR comments

* deprecations scheduled for v4.50

* hub version update

* make fiuxp

---------

Co-authored-by: Marc Sun <[email protected]>
Co-authored-by: Arthur <[email protected]>
  • Loading branch information
3 people authored Nov 15, 2024
1 parent 8d50fda commit 1349321
Show file tree
Hide file tree
Showing 17 changed files with 42 additions and 175 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
"fugashi>=1.0",
"GitPython<3.1.19",
"hf-doc-builder>=0.3.0",
"huggingface-hub>=0.23.2,<1.0",
"huggingface-hub>=0.24.0,<1.0",
"importlib_metadata",
"ipadic>=1.0.0,<2.0",
"isort>=5.5.4",
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/dependency_versions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"fugashi": "fugashi>=1.0",
"GitPython": "GitPython<3.1.19",
"hf-doc-builder": "hf-doc-builder>=0.3.0",
"huggingface-hub": "huggingface-hub>=0.23.2,<1.0",
"huggingface-hub": "huggingface-hub>=0.24.0,<1.0",
"importlib_metadata": "importlib_metadata",
"ipadic": "ipadic>=1.0.0,<2.0",
"isort": "isort>=5.5.4",
Expand Down
88 changes: 1 addition & 87 deletions src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
replace_return_docstrings,
strtobool,
)
from .utils.hub import convert_file_size_to_int, create_and_tag_model_card, get_checkpoint_shard_files
from .utils.hub import create_and_tag_model_card, get_checkpoint_shard_files
from .utils.import_utils import (
ENV_VARS_TRUE_VALUES,
is_sagemaker_mp_enabled,
Expand Down Expand Up @@ -381,92 +381,6 @@ def check_support_param_buffer_assignment(model_to_load, state_dict, start_prefi
return False


def shard_checkpoint(
state_dict: Dict[str, torch.Tensor], max_shard_size: Union[int, str] = "10GB", weights_name: str = WEIGHTS_NAME
):
"""
Splits a model state dictionary in sub-checkpoints so that the final size of each sub-checkpoint does not exceed a
given size.
The sub-checkpoints are determined by iterating through the `state_dict` in the order of its keys, so there is no
optimization made to make each sub-checkpoint as close as possible to the maximum size passed. For example, if the
limit is 10GB and we have weights of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB],
[6+2+2GB] and not [6+2+2GB], [6+2GB], [6GB].
<Tip warning={true}>
If one of the model's weight is bigger than `max_shard_size`, it will end up in its own sub-checkpoint which will
have a size greater than `max_shard_size`.
</Tip>
Args:
state_dict (`Dict[str, torch.Tensor]`): The state dictionary of a model to save.
max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
The maximum size of each sub-checkpoint. If expressed as a string, needs to be digits followed by a unit
(like `"5MB"`).
weights_name (`str`, *optional*, defaults to `"pytorch_model.bin"`):
The name of the model save file.
"""
logger.warning(
"Note that `shard_checkpoint` is deprecated and will be removed in v4.44. We recommend you using "
"split_torch_state_dict_into_shards from huggingface_hub library"
)
max_shard_size = convert_file_size_to_int(max_shard_size)

sharded_state_dicts = [{}]
last_block_size = 0
total_size = 0
storage_id_to_block = {}

for key, weight in state_dict.items():
# when bnb serialization is used the weights in the state dict can be strings
# check: https://github.com/huggingface/transformers/pull/24416 for more details
if isinstance(weight, str):
continue
else:
storage_id = id_tensor_storage(weight)

# If a `weight` shares the same underlying storage as another tensor, we put `weight` in the same `block`
if storage_id in storage_id_to_block and weight.device != torch.device("meta"):
block_id = storage_id_to_block[storage_id]
sharded_state_dicts[block_id][key] = weight
continue

weight_size = weight.numel() * dtype_byte_size(weight.dtype)
# If this weight is going to tip up over the maximal size, we split, but only if we have put at least one
# weight in the current shard.
if last_block_size + weight_size > max_shard_size and len(sharded_state_dicts[-1]) > 0:
sharded_state_dicts.append({})
last_block_size = 0

sharded_state_dicts[-1][key] = weight
last_block_size += weight_size
total_size += weight_size
storage_id_to_block[storage_id] = len(sharded_state_dicts) - 1

# If we only have one shard, we return it
if len(sharded_state_dicts) == 1:
return {weights_name: sharded_state_dicts[0]}, None

# Otherwise, let's build the index
weight_map = {}
shards = {}
for idx, shard in enumerate(sharded_state_dicts):
shard_file = weights_name.replace(".bin", f"-{idx+1:05d}-of-{len(sharded_state_dicts):05d}.bin")
shard_file = shard_file.replace(
".safetensors", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.safetensors"
)
shards[shard_file] = shard
for key in shard.keys():
weight_map[key] = shard_file

# Add the metadata
metadata = {"total_size": total_size}
index = {"metadata": metadata, "weight_map": weight_map}
return shards, index


def load_sharded_checkpoint(model, folder, strict=True, prefer_safe=True):
"""
This is the same as
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/blip_2/modeling_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2203,7 +2203,7 @@ def forward(
logger.warning_once(
"Expanding inputs for image tokens in BLIP-2 should be done in processing. "
"Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
inputs_embeds = torch.cat([language_model_inputs, inputs_embeds.to(language_model_inputs.device)], dim=1)
attention_mask = torch.cat(
Expand Down Expand Up @@ -2326,7 +2326,7 @@ def generate(
logger.warning_once(
"Expanding inputs for image tokens in BLIP-2 should be done in processing. "
"Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
inputs_embeds = torch.cat([language_model_inputs, inputs_embeds.to(language_model_inputs.device)], dim=1)
attention_mask = torch.cat(
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/blip_2/processing_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def __call__(
logger.warning_once(
"Expanding inputs for image tokens in BLIP-2 should be done in processing. "
"Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)

# cast to desired return tensors type
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/instructblip/modeling_instructblip.py
Original file line number Diff line number Diff line change
Expand Up @@ -1471,7 +1471,7 @@ def forward(
logger.warning_once(
"Expanding inputs for image tokens in InstructBLIP should be done in processing. "
"Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your InstructBLIP model. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
inputs_embeds = torch.cat([language_model_inputs, inputs_embeds.to(language_model_inputs.device)], dim=1)
attention_mask = torch.cat(
Expand Down Expand Up @@ -1610,7 +1610,7 @@ def generate(
logger.warning_once(
"Expanding inputs for image tokens in InstructBLIP should be done in processing. "
"Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your InstructBLIP model. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
inputs_embeds = torch.cat([language_model_inputs, inputs_embeds.to(language_model_inputs.device)], dim=1)
attention_mask = torch.cat(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def __call__(
logger.warning_once(
"Expanding inputs for image tokens in InstructBLIP should be done in processing. "
"Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your InstructBLIP model. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)

# cast to desired return tensors type after concatenating
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/llava/modeling_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def forward(
"Expanding inputs for image tokens in LLaVa should be done in processing. "
"Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
"with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
# prefill stage vs decoding stage (legacy behavior copied)
if input_ids.shape[1] != 1:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/llava/processing_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def __call__(
"Expanding inputs for image tokens in LLaVa should be done in processing. "
"Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
"with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)

text_inputs = self.tokenizer(prompt_strings, **output_kwargs["text_kwargs"])
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/llava_next/modeling_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ def forward(
"Expanding inputs for image tokens in LLaVa-NeXT should be done in processing. "
"Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
"with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
if input_ids.shape[1] != 1:
inputs_embeds = inputs_embeds.to(image_features.dtype)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def __call__(
"Expanding inputs for image tokens in LLaVa-NeXT should be done in processing. "
"Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
"with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
else:
image_sizes = iter(image_inputs["image_sizes"])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
import types

import torch
from huggingface_hub import split_torch_state_dict_into_shards
from packaging import version

from transformers import AutoTokenizer, GPT2Config
from transformers.modeling_utils import WEIGHTS_INDEX_NAME, WEIGHTS_NAME, shard_checkpoint
from transformers.modeling_utils import WEIGHTS_INDEX_NAME, WEIGHTS_NAME


def add_checkpointing_args(parser):
Expand Down Expand Up @@ -571,7 +572,15 @@ def convert_checkpoint_from_megatron_to_transformers(args):

# Store the state_dict to file.
max_shard_size = int(args.max_shard_size) if args.max_shard_size.isdigit() else args.max_shard_size
shards, index = shard_checkpoint(output_state_dict, max_shard_size=max_shard_size)
state_dict_split = split_torch_state_dict_into_shards(output_state_dict, max_shard_size=max_shard_size)
shards = index = None
for tensors in state_dict_split.filename_to_tensors.values():
shards = {tensor: state_dict[tensor] for tensor in tensors}
if state_dict_split.is_sharded:
index = {
"metadata": state_dict_split.metadata,
"weight_map": state_dict_split.tensor_to_filename,
}

# Save the model
for shard_file, shard in shards.items():
Expand Down
15 changes: 12 additions & 3 deletions src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
import re

import torch
from huggingface_hub import hf_hub_download
from huggingface_hub import hf_hub_download, split_torch_state_dict_into_shards

from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedTokenizerFast, RwkvConfig
from transformers.modeling_utils import WEIGHTS_INDEX_NAME, shard_checkpoint
from transformers.modeling_utils import WEIGHTS_INDEX_NAME


NUM_HIDDEN_LAYERS_MAPPING = {
Expand Down Expand Up @@ -116,7 +116,16 @@ def convert_rmkv_checkpoint_to_hf_format(
state_dict = convert_state_dict(state_dict)

# 4. Split in shards and save
shards, index = shard_checkpoint(state_dict)
state_dict_split = split_torch_state_dict_into_shards(state_dict)
shards = index = None
for tensors in state_dict_split.filename_to_tensors.values():
shards = {tensor: state_dict[tensor] for tensor in tensors}
if state_dict_split.is_sharded:
index = {
"metadata": state_dict_split.metadata,
"weight_map": state_dict_split.tensor_to_filename,
}

for shard_file, shard in shards.items():
torch.save(shard, os.path.join(output_dir, shard_file))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ def forward(
"Expanding inputs for image tokens in Video-LLaVa should be done in processing. "
"Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
"with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
if input_ids.shape[1] != 1:
for features, frames in ((image_features, 1), (video_features, num_frames)):
Expand Down
7 changes: 4 additions & 3 deletions src/transformers/models/video_llava/processing_video_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,10 @@ def __call__(
if encoded_images is not None and (self.patch_size is None or self.vision_feature_select_strategy is None):
logger.warning_once(
"Expanding inputs for image tokens in Video-LLaVa should be done in processing. "
"Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
"with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.44."
"Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set "
"directly with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = "
"{{vision_feature_select_strategy}}`. Using processors without these attributes in the config is "
"deprecated and will throw an error in v4.50."
)
# Replace the image/video tokens with the expanded token sequence
elif encoded_images is not None:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/vipllava/modeling_vipllava.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def forward(
logger.warning_once(
"Expanding inputs for image tokens in VipLLaVa should be done in processing. "
"Please add `patch_size` and `vision_feature_select_strategy` to the model's image processing config. "
"Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
"Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
)
# prefill stage vs decoding stage (legacy behavior copied)
if input_ids.shape[1] != 1:
Expand Down
Loading

0 comments on commit 1349321

Please sign in to comment.