diff --git a/examples/feature_extraction/peft_lora_embedding_semantic_search.py b/examples/feature_extraction/peft_lora_embedding_semantic_search.py index fe94f6f6b7..a4a7eecb4d 100644 --- a/examples/feature_extraction/peft_lora_embedding_semantic_search.py +++ b/examples/feature_extraction/peft_lora_embedding_semantic_search.py @@ -27,12 +27,11 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import DatasetDict, load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch import nn from torch.utils.data import DataLoader from tqdm import tqdm from transformers import AutoModel, AutoTokenizer, SchedulerType, default_data_collator, get_scheduler -from transformers.utils import get_full_repo_name from peft import LoraConfig, TaskType, get_peft_model @@ -236,12 +235,13 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + api = HfApi(token=args.hub_token) + + # Create repo (repo_name from args or inferred) + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + repo_id = api.create_repo(repo_name, exist_ok=True).repo_id with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: @@ -487,7 +487,12 @@ def preprocess_function(examples): if epoch < args.num_train_epochs - 1 else "End of training" ) - repo.push_to_hub(commit_message=commit_message, blocking=False, auto_lfs_prune=True) + api.upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message=commit_message, + run_as_future=True, + ) accelerator.wait_for_everyone() accelerator.end_training() diff --git a/examples/int8_training/peft_adalora_whisper_large_training.py b/examples/int8_training/peft_adalora_whisper_large_training.py index 102d137642..0c8d02a237 100644 --- a/examples/int8_training/peft_adalora_whisper_large_training.py +++ b/examples/int8_training/peft_adalora_whisper_large_training.py @@ -26,7 +26,7 @@ from datasets import Audio, DatasetDict, IterableDatasetDict, interleave_datasets, load_dataset # hf imports -from huggingface_hub import Repository +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm import tqdm from transformers import ( @@ -38,7 +38,6 @@ set_seed, ) from transformers.models.whisper.english_normalizer import BasicTextNormalizer -from transformers.utils import get_full_repo_name # peft imports from peft import AdaLoraConfig, LoraConfig, PeftModel, get_peft_model @@ -450,11 +449,13 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + api = HfApi(token=args.hub_token) + + # Create repo (repo_name from args or inferred) + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + repo_id = api.create_repo(repo_name, exist_ok=True).repo_id with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: @@ -739,8 +740,11 @@ def make_inputs_require_grad(module, input, output): if accelerator.is_main_process: processor.tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message=f"Training in progress epoch {epoch}", + run_as_future=True, ) if args.load_best_model: @@ -760,7 +764,11 @@ def make_inputs_require_grad(module, input, output): if accelerator.is_main_process: processor.tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ) with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: eval_metrics.pop("eval_samples") diff --git a/examples/loftq_finetuning/train_gsm8k_llama.py b/examples/loftq_finetuning/train_gsm8k_llama.py index 8c462da4c8..66b83d55e3 100644 --- a/examples/loftq_finetuning/train_gsm8k_llama.py +++ b/examples/loftq_finetuning/train_gsm8k_llama.py @@ -28,7 +28,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm from transformers import ( @@ -333,14 +333,13 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - # Retrieve of infer repo_name + api = HfApi(token=args.hub_token) + + # Create repo (repo_name from args or inferred) repo_name = args.hub_model_id if repo_name is None: repo_name = Path(args.output_dir).absolute().name - # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) + repo_id = api.create_repo(repo_name, exist_ok=True).repo_id with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: @@ -780,8 +779,11 @@ def preprocess_function_test(examples): ) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message=f"Training in progress epoch {epoch}", + run_as_future=True, ) if args.checkpointing_steps == "epoch": @@ -802,7 +804,11 @@ def preprocess_function_test(examples): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ) PATTERN_NUMBER = re.compile(r"-?\d+\.?\d*") diff --git a/examples/lora_dreambooth/train_dreambooth.py b/examples/lora_dreambooth/train_dreambooth.py index 5bf46eec10..61df7340c5 100644 --- a/examples/lora_dreambooth/train_dreambooth.py +++ b/examples/lora_dreambooth/train_dreambooth.py @@ -9,7 +9,6 @@ import warnings from contextlib import nullcontext from pathlib import Path -from typing import Optional import datasets import diffusers @@ -32,7 +31,7 @@ from diffusers.optimization import get_scheduler from diffusers.utils import check_min_version from diffusers.utils.import_utils import is_xformers_available -from huggingface_hub import HfFolder, Repository, whoami +from huggingface_hub import HfApi from PIL import Image from torch.utils.data import Dataset from torchvision import transforms @@ -576,16 +575,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(args): logging_dir = Path(args.output_dir, args.logging_dir) @@ -678,11 +667,13 @@ def main(args): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841 + api = HfApi(token=args.hub_token) + + # Create repo (repo_name from args or inferred) + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + repo_id = api.create_repo(repo_name, exist_ok=True).repo_id with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: @@ -1086,7 +1077,12 @@ def main(args): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + api.upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + run_as_future=True, + ) accelerator.end_training() diff --git a/examples/oft_dreambooth/train_dreambooth.py b/examples/oft_dreambooth/train_dreambooth.py index ecd605e25e..fece05776a 100644 --- a/examples/oft_dreambooth/train_dreambooth.py +++ b/examples/oft_dreambooth/train_dreambooth.py @@ -9,7 +9,6 @@ import warnings from contextlib import nullcontext from pathlib import Path -from typing import Optional import datasets import diffusers @@ -32,7 +31,7 @@ from diffusers.optimization import get_scheduler from diffusers.utils import check_min_version from diffusers.utils.import_utils import is_xformers_available -from huggingface_hub import HfFolder, Repository, whoami +from huggingface_hub import HfApi from PIL import Image from torch.utils.data import Dataset from torchvision import transforms @@ -586,16 +585,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(args): logging_dir = Path(args.output_dir, args.logging_dir) @@ -688,11 +677,13 @@ def main(args): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841 + api = HfApi(token=args.hub_token) + + # Create repo (repo_name from args or inferred) + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + repo_id = api.create_repo(repo_name, exist_ok=True).repo_id with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: @@ -1094,7 +1085,12 @@ def main(args): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + api.upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + run_as_future=True, + ) accelerator.end_training() diff --git a/examples/stable_diffusion/train_dreambooth.py b/examples/stable_diffusion/train_dreambooth.py index fd6ee0967e..56406ed082 100644 --- a/examples/stable_diffusion/train_dreambooth.py +++ b/examples/stable_diffusion/train_dreambooth.py @@ -8,7 +8,7 @@ import threading import warnings from pathlib import Path -from typing import Optional, Union +from typing import Union import datasets import diffusers @@ -31,7 +31,7 @@ from diffusers.optimization import get_scheduler from diffusers.utils import check_min_version from diffusers.utils.import_utils import is_xformers_available -from huggingface_hub import HfFolder, Repository, whoami +from huggingface_hub import HfApi from PIL import Image from torch.utils.data import Dataset from torchvision import transforms @@ -749,16 +749,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(args): logging_dir = Path(args.output_dir, args.logging_dir) @@ -851,11 +841,13 @@ def main(args): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841 + api = HfApi(token=args.hub_token) + + # Create repo (repo_name from args or inferred) + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + repo_id = api.create_repo(repo_name, exist_ok=True).repo_id with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: @@ -1252,7 +1244,12 @@ def main(args): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + api.upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + run_as_future=True, + ) accelerator.end_training()