Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MNT: Use BitsAndBytesConfig as load_in_* is deprecated #1552

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ class AutoModelForSentenceEmbedding(nn.Module):
def __init__(self, model_name, tokenizer, normalize=True):
super().__init__()

self.model = AutoModel.from_pretrained(model_name) # , load_in_8bit=True, device_map={"":0})
self.model = AutoModel.from_pretrained(
model_name
) # , quantizaton_config=BitsAndBytesConfig(load_in_8bit=True), device_map={"":0})
self.normalize = normalize
self.tokenizer = tokenizer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
" def __init__(self, model_name, tokenizer, normalize=True):\n",
" super(AutoModelForSentenceEmbedding, self).__init__()\n",
"\n",
" self.model = AutoModel.from_pretrained(model_name) # , load_in_8bit=True, device_map={\"\":0})\n",
" self.model = AutoModel.from_pretrained(model_name) # , quantizaton_config=BitsAndBytesConfig(load_in_8bit=True), device_map={\"\":0})\n",
" self.normalize = normalize\n",
" self.tokenizer = tokenizer\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,11 @@ def print_trainable_parameters(model):

# import torch
# from peft import PeftModel, PeftConfig
# from transformers import AutoModelForCausalLM, AutoTokenizer
# from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
#
# peft_model_id = "ybelkada/opt-6.7b-lora"
# config = PeftConfig.from_pretrained(peft_model_id)
# model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
# model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, quantization_config=BitsAndBytesConfig(load_in_8bit=True), device_map='auto')
# tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
#
## Load the Lora model
Expand Down
4 changes: 2 additions & 2 deletions examples/int8_training/Finetune_flan_t5_large_bnb_peft.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -301,11 +301,11 @@
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
"\n",
"from datasets import load_dataset\n",
"from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
"from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig\n",
"\n",
"model_name = \"google/flan-t5-large\"\n",
"\n",
"model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True)\n",
"model = AutoModelForSeq2SeqLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name)"
]
},
Expand Down
8 changes: 4 additions & 4 deletions examples/int8_training/Finetune_opt_bnb_peft.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,9 @@
"import torch\n",
"import torch.nn as nn\n",
"import bitsandbytes as bnb\n",
"from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM\n",
"from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig\n",
"\n",
"model = AutoModelForCausalLM.from_pretrained(\"facebook/opt-6.7b\", load_in_8bit=True)\n",
"model = AutoModelForCausalLM.from_pretrained(\"facebook/opt-6.7b\", quantization_config=BitsAndBytesConfig(load_in_8bit=True))\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"facebook/opt-6.7b\")"
]
Expand Down Expand Up @@ -1459,12 +1459,12 @@
"source": [
"import torch\n",
"from peft import PeftModel, PeftConfig\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
"\n",
"peft_model_id = \"ybelkada/opt-6.7b-lora\"\n",
"config = PeftConfig.from_pretrained(peft_model_id)\n",
"model = AutoModelForCausalLM.from_pretrained(\n",
" config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map=\"auto\"\n",
" config.base_model_name_or_path, return_dict=True, quantization_config=BitsAndBytesConfig(load_in_8bit=True), device_map=\"auto\"\n",
")\n",
"tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
"\n",
Expand Down
6 changes: 4 additions & 2 deletions examples/int8_training/fine_tune_blip2_int8.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import torch
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModelForVision2Seq, AutoProcessor
from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig

from peft import LoraConfig, get_peft_model

Expand All @@ -28,7 +28,9 @@
)

# We load our model and processor using `transformers`
model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip2-opt-2.7b", load_in_8bit=True)
model = AutoModelForVision2Seq.from_pretrained(
"Salesforce/blip2-opt-2.7b", quantization_config=BitsAndBytesConfig(load_in_8bit=True)
)
processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")

# Get our peft model and print the number of trainable parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import (
BitsAndBytesConfig,
SchedulerType,
WhisperForConditionalGeneration,
WhisperProcessor,
Expand Down Expand Up @@ -533,7 +534,9 @@ def main():
metric = evaluate.load("wer")

# model
model = WhisperForConditionalGeneration.from_pretrained(args.model_name_or_path, load_in_8bit=True)
model = WhisperForConditionalGeneration.from_pretrained(
args.model_name_or_path, quantization_config=BitsAndBytesConfig(load_in_8bit=True)
)
model.config.forced_decoder_ids = None
model.config.suppress_tokens = []
if len(set(model.hf_device_map.values()).intersection({"cpu", "disk"})) > 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1102,9 +1102,9 @@
},
"outputs": [],
"source": [
"from transformers import WhisperForConditionalGeneration\n",
"from transformers import WhisperForConditionalGeneration, BitsAndBytesConfig\n",
"\n",
"model = WhisperForConditionalGeneration.from_pretrained(model_name_or_path, load_in_8bit=True)\n",
"model = WhisperForConditionalGeneration.from_pretrained(model_name_or_path, quantization_config=BitsAndBytesConfig(load_in_8bit=True))\n",
"\n",
"# model.hf_device_map - this should be {\" \": 0}"
]
Expand Down Expand Up @@ -1645,7 +1645,7 @@
"peft_model_id = \"smangrul/openai-whisper-large-v2-LORA-colab\"\n",
"peft_config = PeftConfig.from_pretrained(peft_model_id)\n",
"model = WhisperForConditionalGeneration.from_pretrained(\n",
" peft_config.base_model_name_or_path, load_in_8bit=True, device_map=\"auto\"\n",
" peft_config.base_model_name_or_path, quantization_config=BitsAndBytesConfig(load_in_8bit=True), device_map=\"auto\"\n",
")\n",
"model = PeftModel.from_pretrained(model, peft_model_id)"
]
Expand Down Expand Up @@ -1884,7 +1884,7 @@
"task = \"transcribe\"\n",
"peft_config = PeftConfig.from_pretrained(peft_model_id)\n",
"model = WhisperForConditionalGeneration.from_pretrained(\n",
" peft_config.base_model_name_or_path, load_in_8bit=True, device_map=\"auto\"\n",
" peft_config.base_model_name_or_path, quantization_config=BitsAndBytesConfig(load_in_8bit=True), device_map=\"auto\"\n",
")\n",
"\n",
"model = PeftModel.from_pretrained(model, peft_model_id)\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@
"outputs": [],
"source": [
"from peft import PeftModel\n",
"from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig\n",
"from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, BitsAndBytesConfig\n",
"\n",
"model_name = \"decapoda-research/llama-7b-hf\"\n",
"tokenizer = LlamaTokenizer.from_pretrained(model_name)\n",
"model = LlamaForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map=\"auto\", use_auth_token=True)"
"model = LlamaForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True), device_map=\"auto\", use_auth_token=True)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion examples/sft/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class ModelArguments:
default=False,
metadata={"help": "Enables PEFT LoRA for training."},
)
use_8bit_qunatization: Optional[bool] = field(
use_8bit_quantization: Optional[bool] = field(
default=False,
metadata={"help": "Enables loading model in 8bit."},
)
Expand Down
9 changes: 4 additions & 5 deletions examples/sft/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ def create_and_prepare_model(args, data_args, training_args):
from unsloth import FastLanguageModel
device_map = None
bnb_config = None
load_in_8bit = args.use_8bit_qunatization
load_in_4bit = args.use_4bit_quantization

if (
torch.distributed.is_available()
Expand All @@ -113,8 +111,10 @@ def create_and_prepare_model(args, data_args, training_args):
print("=" * 80)
print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16")
print("=" * 80)
elif args.use_8bit_quantization:
bnb_config = BitsAndBytesConfig(load_in_8bit=args.use_8bit_quantization)

if args.use_4bit_quantization or args.use_8bit_qunatization:
if args.use_4bit_quantization or args.use_8bit_quantization:
device_map = (
int(os.environ.get("LOCAL_RANK", -1))
if torch.distributed.is_available() and torch.distributed.is_initialized()
Expand All @@ -127,12 +127,11 @@ def create_and_prepare_model(args, data_args, training_args):
model_name=args.model_name_or_path,
max_seq_length=data_args.max_seq_length,
dtype=None,
load_in_4bit=load_in_4bit,
load_in_4bit=args.use_4bit_quantization,
)
else:
model = AutoModelForCausalLM.from_pretrained(
args.model_name_or_path,
load_in_8bit=load_in_8bit,
quantization_config=bnb_config,
device_map=device_map,
trust_remote_code=True,
Expand Down
14 changes: 13 additions & 1 deletion src/peft/tuners/lora/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,22 +79,34 @@ class LoraModel(BaseTuner):
```

```py
>>> import torch
>>> import transformers
>>> from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_int8_training

>>> rank = ...
>>> target_modules = ["q_proj", "k_proj", "v_proj", "out_proj", "fc_in", "fc_out", "wte"]
>>> config = LoraConfig(
... r=4, lora_alpha=16, target_modules=target_modules, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM"
... )
>>> quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True)

>>> tokenizer = transformers.AutoTokenizer.from_pretrained(
... "kakaobrain/kogpt",
... revision="KoGPT6B-ryan1.5b-float16", # or float32 version: revision=KoGPT6B-ryan1.5b
... bos_token="[BOS]",
... eos_token="[EOS]",
... unk_token="[UNK]",
... pad_token="[PAD]",
... mask_token="[MASK]",
... )
>>> model = transformers.GPTJForCausalLM.from_pretrained(
... "kakaobrain/kogpt",
... revision="KoGPT6B-ryan1.5b-float16", # or float32 version: revision=KoGPT6B-ryan1.5b
... pad_token_id=tokenizer.eos_token_id,
... use_cache=False,
... device_map={"": rank},
... torch_dtype=torch.float16,
... load_in_8bit=True,
... quantization_config=quantization_config,
... )
>>> model = prepare_model_for_int8_training(model)
>>> lora_model = get_peft_model(model, config)
Expand Down
2 changes: 1 addition & 1 deletion tests/regression/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ def load_base_model(self):
self.fix_seed()
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
load_in_8bit=True,
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
)
return model

Expand Down
Loading
Loading