Skip to content

Commit

Permalink
Don't use deprecated Repository anymore (huggingface#1641)
Browse files Browse the repository at this point in the history
* Don't use deprecated Repository anymore

* oops
  • Loading branch information
Wauplin authored and DTennant committed Apr 19, 2024
1 parent 12b52a5 commit adbe0ef
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 82 deletions.
23 changes: 14 additions & 9 deletions examples/feature_extraction/peft_lora_embedding_semantic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from datasets import DatasetDict, load_dataset
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer, SchedulerType, default_data_collator, get_scheduler
from transformers.utils import get_full_repo_name

from peft import LoraConfig, TaskType, get_peft_model

Expand Down Expand Up @@ -236,12 +235,13 @@ def main():
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
create_repo(repo_name, exist_ok=True, token=args.hub_token)
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
api = HfApi(token=args.hub_token)

# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id

with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
Expand Down Expand Up @@ -487,7 +487,12 @@ def preprocess_function(examples):
if epoch < args.num_train_epochs - 1
else "End of training"
)
repo.push_to_hub(commit_message=commit_message, blocking=False, auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message=commit_message,
run_as_future=True,
)
accelerator.wait_for_everyone()
accelerator.end_training()

Expand Down
28 changes: 18 additions & 10 deletions examples/int8_training/peft_adalora_whisper_large_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from datasets import Audio, DatasetDict, IterableDatasetDict, interleave_datasets, load_dataset

# hf imports
from huggingface_hub import Repository
from huggingface_hub import HfApi
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import (
Expand All @@ -38,7 +38,6 @@
set_seed,
)
from transformers.models.whisper.english_normalizer import BasicTextNormalizer
from transformers.utils import get_full_repo_name

# peft imports
from peft import AdaLoraConfig, LoraConfig, PeftModel, get_peft_model
Expand Down Expand Up @@ -450,11 +449,13 @@ def main():
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
api = HfApi(token=args.hub_token)

# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id

with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
Expand Down Expand Up @@ -739,8 +740,11 @@ def make_inputs_require_grad(module, input, output):

if accelerator.is_main_process:
processor.tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message=f"Training in progress epoch {epoch}",
run_as_future=True,
)

if args.load_best_model:
Expand All @@ -760,7 +764,11 @@ def make_inputs_require_grad(module, input, output):
if accelerator.is_main_process:
processor.tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
)

with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
eval_metrics.pop("eval_samples")
Expand Down
24 changes: 15 additions & 9 deletions examples/loftq_finetuning/train_gsm8k_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from datasets import load_dataset
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from transformers import (
Expand Down Expand Up @@ -333,14 +333,13 @@ def main():
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
# Retrieve of infer repo_name
api = HfApi(token=args.hub_token)

# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
# Clone repo locally
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id

with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
Expand Down Expand Up @@ -780,8 +779,11 @@ def preprocess_function_test(examples):
)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message=f"Training in progress epoch {epoch}",
run_as_future=True,
)

if args.checkpointing_steps == "epoch":
Expand All @@ -802,7 +804,11 @@ def preprocess_function_test(examples):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
)


PATTERN_NUMBER = re.compile(r"-?\d+\.?\d*")
Expand Down
32 changes: 14 additions & 18 deletions examples/lora_dreambooth/train_dreambooth.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import warnings
from contextlib import nullcontext
from pathlib import Path
from typing import Optional

import datasets
import diffusers
Expand All @@ -32,7 +31,7 @@
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version
from diffusers.utils.import_utils import is_xformers_available
from huggingface_hub import HfFolder, Repository, whoami
from huggingface_hub import HfApi
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
Expand Down Expand Up @@ -576,16 +575,6 @@ def __getitem__(self, index):
return example


def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
if token is None:
token = HfFolder.get_token()
if organization is None:
username = whoami(token)["name"]
return f"{username}/{model_id}"
else:
return f"{organization}/{model_id}"


def main(args):
logging_dir = Path(args.output_dir, args.logging_dir)

Expand Down Expand Up @@ -678,11 +667,13 @@ def main(args):
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841
api = HfApi(token=args.hub_token)

# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id

with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
Expand Down Expand Up @@ -1086,7 +1077,12 @@ def main(args):
pipeline.save_pretrained(args.output_dir)

if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
run_as_future=True,
)

accelerator.end_training()

Expand Down
32 changes: 14 additions & 18 deletions examples/oft_dreambooth/train_dreambooth.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import warnings
from contextlib import nullcontext
from pathlib import Path
from typing import Optional

import datasets
import diffusers
Expand All @@ -32,7 +31,7 @@
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version
from diffusers.utils.import_utils import is_xformers_available
from huggingface_hub import HfFolder, Repository, whoami
from huggingface_hub import HfApi
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
Expand Down Expand Up @@ -586,16 +585,6 @@ def __getitem__(self, index):
return example


def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
if token is None:
token = HfFolder.get_token()
if organization is None:
username = whoami(token)["name"]
return f"{username}/{model_id}"
else:
return f"{organization}/{model_id}"


def main(args):
logging_dir = Path(args.output_dir, args.logging_dir)

Expand Down Expand Up @@ -688,11 +677,13 @@ def main(args):
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841
api = HfApi(token=args.hub_token)

# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id

with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
Expand Down Expand Up @@ -1094,7 +1085,12 @@ def main(args):
pipeline.save_pretrained(args.output_dir)

if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
run_as_future=True,
)

accelerator.end_training()

Expand Down
33 changes: 15 additions & 18 deletions examples/stable_diffusion/train_dreambooth.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import threading
import warnings
from pathlib import Path
from typing import Optional, Union
from typing import Union

import datasets
import diffusers
Expand All @@ -31,7 +31,7 @@
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version
from diffusers.utils.import_utils import is_xformers_available
from huggingface_hub import HfFolder, Repository, whoami
from huggingface_hub import HfApi
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
Expand Down Expand Up @@ -749,16 +749,6 @@ def __getitem__(self, index):
return example


def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
if token is None:
token = HfFolder.get_token()
if organization is None:
username = whoami(token)["name"]
return f"{username}/{model_id}"
else:
return f"{organization}/{model_id}"


def main(args):
logging_dir = Path(args.output_dir, args.logging_dir)

Expand Down Expand Up @@ -851,11 +841,13 @@ def main(args):
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841
api = HfApi(token=args.hub_token)

# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id

with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
Expand Down Expand Up @@ -1252,7 +1244,12 @@ def main(args):
pipeline.save_pretrained(args.output_dir)

if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
run_as_future=True,
)

accelerator.end_training()

Expand Down

0 comments on commit adbe0ef

Please sign in to comment.