Skip to content

Commit

Permalink
Merge pull request #387 from DagsHub/enhancement/repo-bucket-client
Browse files Browse the repository at this point in the history
Add function to get the client for the S3 Repo Bucket
  • Loading branch information
kbolashev authored Nov 16, 2023
2 parents 86cf24d + e1e83c7 commit 5355994
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 0 deletions.
2 changes: 2 additions & 0 deletions dagshub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from .common.init import init
from .upload.wrapper import upload_files
from . import notebook
from .repo_bucket import get_repo_bucket_client

__all__ = [
DAGsHubLogger,
dagshub_logger,
init,
upload_files,
notebook.save_notebook,
get_repo_bucket_client,
]
9 changes: 9 additions & 0 deletions dagshub/common/api/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,15 @@ def storage_api_url(self) -> str:
"""
return multi_urljoin(self.repo_api_url, "storage")

def repo_bucket_api_url(self) -> str:
"""
Endpoint URL for getting access to the S3-compatible repo bucket
Format: https://dagshub.com/api/v1/repo-buckets/s3/user
The bucket name is usually the name of the repo
"""
return multi_urljoin(self.host, "api/v1/repo-buckets/s3", self.owner)

@staticmethod
def parse_repo(repo: str) -> Tuple[str, str]:
repo = repo.strip("/")
Expand Down
63 changes: 63 additions & 0 deletions dagshub/repo_bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal

from dagshub.common.api.repo import RepoAPI
from dagshub.auth import get_token
from dagshub.common.helpers import log_message


def get_boto_client(repo_api: RepoAPI, token: str):
"""
Creates a `boto3.client` object to interact with the bucket of the repository
Docs: https://boto3.amazonaws.com/v1/documentation/api/latest/index.html
"""
endpoint_url = repo_api.repo_bucket_api_url()
import boto3

client = boto3.client("s3", endpoint_url=endpoint_url, aws_access_key_id=token, aws_secret_access_key=token)
log_message(f"Client created. Use the name of the repo ({repo_api.repo_name}) as the name of the bucket")
return client


def get_s3fs_client(repo_api: RepoAPI, token: str):
"""
Creates an `s3fs.S3FileSystem` object to interact with the bucket of the repository
Docs: https://s3fs.readthedocs.io/en/latest/
"""
endpoint_url = repo_api.repo_bucket_api_url()
import s3fs

client = s3fs.S3FileSystem(endpoint_url=endpoint_url, key=token, secret=token)
log_message(f"Client created. Use the name of the repo ({repo_api.repo_name}) as the name of the bucket")
return client


_s3_flavor_lookup = {
"boto": get_boto_client,
"s3fs": get_s3fs_client,
}

FlavorTypes = Literal["boto", "s3fs"]


def get_repo_bucket_client(repo: str, flavor: FlavorTypes = "boto"):
"""
Creates an S3 client for the specified repository's DagsHub storage bucket
Available flavors are:
boto: Returns a `boto3.client` with predefined EndpointURL and credentials.
The name of the bucket is the name of the repository,
and you will need to specify it for any request you make
s3fs: Returns a s3fs.S3FileSystem with predefined EndpointURL and credentials
The name of the bucket is the name of the repository,
and you will need to specify it for any request you make
Args:
repo: Name of the repo in the format of "username/repository"
flavor: one of the possible s3 client flavor variants
"""
repo_api = RepoAPI(repo)
token = get_token()
return _s3_flavor_lookup[flavor](repo_api, token)

0 comments on commit 5355994

Please sign in to comment.