Skip to content

Commit

Permalink
fix push_to_ms (#1901)
Browse files Browse the repository at this point in the history
(cherry picked from commit eee2323)
  • Loading branch information
tastelikefeet committed Sep 2, 2024
1 parent 1c55186 commit adf5b3f
Showing 1 changed file with 76 additions and 77 deletions.
153 changes: 76 additions & 77 deletions swift/trainers/push_to_ms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import tempfile
from concurrent.futures import Future
from functools import partial
from pathlib import Path
from typing import List, Optional, Union
Expand All @@ -13,75 +12,75 @@
logger = logging.get_logger(__name__)


class PushToMsHubMixin:
def create_repo(repo_id: str, *, token: Union[str, bool, None] = None, private: bool = False, **kwargs) -> RepoUrl:
from modelscope.hub.repository import Repository
hub_model_id = PushToMsHubMixin.create_ms_repo(repo_id, token, private)
PushToMsHubMixin.ms_token = token
with tempfile.TemporaryDirectory() as temp_cache_dir:
repo = Repository(temp_cache_dir, hub_model_id)
PushToMsHubMixin.add_patterns_to_gitattributes(repo, ['*.safetensors', '*.bin', '*.pt'])
# Add 'runs/' to .gitignore, ignore tensorboard files
PushToMsHubMixin.add_patterns_to_gitignore(repo, ['runs/', 'images/'])
PushToMsHubMixin.add_patterns_to_file(
repo,
'configuration.json', ['{"framework": "pytorch", "task": "text-generation", "allow_remote": true}'],
ignore_push_error=True)
# Add '*.sagemaker' to .gitignore if using SageMaker
if os.environ.get('SM_TRAINING_ENV'):
PushToMsHubMixin.add_patterns_to_gitignore(repo, ['*.sagemaker-uploading', '*.sagemaker-uploaded'],
'Add `*.sagemaker` patterns to .gitignore')
return RepoUrl(url=hub_model_id, )


@future_compatible
def upload_folder(
self,
*,
repo_id: str,
folder_path: Union[str, Path],
path_in_repo: Optional[str] = None,
commit_message: Optional[str] = None,
commit_description: Optional[str] = None,
token: Union[str, bool, None] = None,
revision: Optional[str] = 'master',
ignore_patterns: Optional[Union[List[str], str]] = None,
run_as_future: bool = False,
**kwargs,
):
from modelscope import push_to_hub
commit_message = commit_message or 'Upload folder using api'
if commit_description:
commit_message = commit_message + '\n' + commit_description
if not os.path.exists(os.path.join(folder_path, 'configuration.json')):
with open(os.path.join(folder_path, 'configuration.json'), 'w') as f:
f.write('{"framework": "pytorch", "task": "text-generation", "allow_remote": true}')
if ignore_patterns:
ignore_patterns = [p for p in ignore_patterns if p != '_*']
if path_in_repo:
# We don't support part submit for now
path_in_repo = os.path.basename(folder_path)
folder_path = os.path.dirname(folder_path)
ignore_patterns = []
push_to_hub(
repo_id,
folder_path,
token or PushToMsHubMixin.ms_token,
commit_message=commit_message,
ignore_file_pattern=ignore_patterns,
revision=revision,
tag=path_in_repo)
return CommitInfo(
commit_url=f'https://www.modelscope.cn/models/{repo_id}/files',
commit_message=commit_message,
commit_description=commit_description,
oid=None,
)

_use_hf_hub = strtobool(os.environ.get('USE_HF', 'False'))
_token = None

@staticmethod
def create_repo(repo_id: str, *, token: Union[str, bool, None] = None, private: bool = False, **kwargs) -> RepoUrl:
from modelscope.hub.repository import Repository
hub_model_id = PushToMsHubMixin._create_ms_repo(repo_id, token, private)
PushToMsHubMixin._token = token
with tempfile.TemporaryDirectory() as temp_cache_dir:
repo = Repository(temp_cache_dir, hub_model_id)
PushToMsHubMixin._add_patterns_to_gitattributes(repo, ['*.safetensors', '*.bin', '*.pt'])
# Add 'runs/' to .gitignore, ignore tensorboard files
PushToMsHubMixin._add_patterns_to_gitignore(repo, ['runs/', 'images/'])
PushToMsHubMixin._add_patterns_to_file(
repo,
'configuration.json', ['{"framework": "pytorch", "task": "text-generation", "allow_remote": true}'],
ignore_push_error=True)
# Add '*.sagemaker' to .gitignore if using SageMaker
if os.environ.get('SM_TRAINING_ENV'):
PushToMsHubMixin._add_patterns_to_gitignore(repo, ['*.sagemaker-uploading', '*.sagemaker-uploaded'],
'Add `*.sagemaker` patterns to .gitignore')
return RepoUrl(url=hub_model_id, )
class PushToMsHubMixin:

@staticmethod
@future_compatible
def upload_folder(
self,
*,
repo_id: str,
folder_path: Union[str, Path],
path_in_repo: Optional[str] = None,
commit_message: Optional[str] = None,
commit_description: Optional[str] = None,
token: Union[str, bool, None] = None,
revision: Optional[str] = 'master',
ignore_patterns: Optional[Union[List[str], str]] = None,
run_as_future: bool = False,
**kwargs,
) -> Union[CommitInfo, str, Future[CommitInfo], Future[str]]:
from modelscope import push_to_hub
commit_message = commit_message or 'Upload folder using api'
if commit_description:
commit_message = commit_message + '\n' + commit_description
if not os.path.exists(os.path.join(folder_path, 'configuration.json')):
with open(os.path.join(folder_path, 'configuration.json'), 'w') as f:
f.write('{"framework": "pytorch", "task": "text-generation", "allow_remote": true}')
if ignore_patterns:
ignore_patterns = [p for p in ignore_patterns if p != '_*']
if path_in_repo:
# We don't support part submit for now
path_in_repo = os.path.basename(folder_path)
folder_path = os.path.dirname(folder_path)
ignore_patterns = []
push_to_hub(
repo_id,
folder_path,
token or PushToMsHubMixin._token,
commit_message=commit_message,
ignore_file_pattern=ignore_patterns,
revision=revision,
tag=path_in_repo)
return CommitInfo(
commit_url=f'https://www.modelscope.cn/models/{repo_id}/files',
commit_message=commit_message,
commit_description=commit_description,
oid=None,
)
_use_hf_hub = strtobool(os.environ.get('USE_HF', 'False'))
ms_token = None

if not _use_hf_hub:
import huggingface_hub
Expand All @@ -93,7 +92,7 @@ def upload_folder(
trainer.upload_folder = partial(upload_folder, api)

@staticmethod
def _create_ms_repo(hub_model_id: str, hub_token: Optional[str] = None, hub_private_repo: bool = False) -> str:
def create_ms_repo(hub_model_id: str, hub_token: Optional[str] = None, hub_private_repo: bool = False) -> str:
from modelscope import HubApi
from modelscope.hub.api import ModelScopeConfig
from modelscope.hub.constants import ModelVisibility
Expand Down Expand Up @@ -121,11 +120,11 @@ def _create_ms_repo(hub_model_id: str, hub_token: Optional[str] = None, hub_priv
return hub_model_id

@staticmethod
def _add_patterns_to_file(repo,
file_name: str,
patterns: List[str],
commit_message: Optional[str] = None,
ignore_push_error=False) -> None:
def add_patterns_to_file(repo,
file_name: str,
patterns: List[str],
commit_message: Optional[str] = None,
ignore_push_error=False) -> None:
if isinstance(patterns, str):
patterns = [patterns]
if commit_message is None:
Expand Down Expand Up @@ -161,11 +160,11 @@ def _add_patterns_to_file(repo,
raise e

@staticmethod
def _add_patterns_to_gitignore(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
PushToMsHubMixin._add_patterns_to_file(repo, '.gitignore', patterns, commit_message, ignore_push_error=True)
def add_patterns_to_gitignore(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
PushToMsHubMixin.add_patterns_to_file(repo, '.gitignore', patterns, commit_message, ignore_push_error=True)

@staticmethod
def _add_patterns_to_gitattributes(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
def add_patterns_to_gitattributes(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
new_patterns = []
suffix = 'filter=lfs diff=lfs merge=lfs -text'
for pattern in patterns:
Expand All @@ -175,4 +174,4 @@ def _add_patterns_to_gitattributes(repo, patterns: List[str], commit_message: Op
file_name = '.gitattributes'
if commit_message is None:
commit_message = f'Add `{patterns[0]}` patterns to {file_name}'
PushToMsHubMixin._add_patterns_to_file(repo, file_name, new_patterns, commit_message, ignore_push_error=True)
PushToMsHubMixin.add_patterns_to_file(repo, file_name, new_patterns, commit_message, ignore_push_error=True)

0 comments on commit adf5b3f

Please sign in to comment.