Skip to content

Commit

Permalink
use variable for client schemes, allowing override (#467) (#471)
Browse files Browse the repository at this point in the history
* use variable for client schemes, allowing override

This change is intended to make the default client implementations
more flexible so that their scheme can be customized. This can be
useful in scenarios where a subclass wants to implement a custom
scheme on e.g. a S3 compatible API [1] but with a custom scheme
so that the default S3 access is still also available.

[1] https://cloudpathlib.drivendata.org/stable/authentication/#accessing-custom-s3-compatible-object-stores

The tests have been updated to include a new s3-like rig which uses the
new scheme override functionality.

* use single cloud_prefix

* tests: switch to lighter-weight custom scheme tests

* update HISTORY file for custom scheme support

* update custom scheme tests to utilize pytest fixture

This isolates the implementation in response to PR feedback:
#467 (comment)

Co-authored-by: Aaron Taylor <[email protected]>
  • Loading branch information
pjbull and kujenga authored Sep 9, 2024
1 parent b776bee commit 8207b3d
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 15 deletions.
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# cloudpathlib Changelog

## UNRELEASED

- Added support for custom schemes in CloudPath and Client subclases. (Issue [#466](https://github.com/drivendataorg/cloudpathlib/issues/466), PR [#467](https://github.com/drivendataorg/cloudpathlib/pull/467))

## v0.19.0 (2024-08-29)

- Fixed an error that occurred when loading and dumping `CloudPath` objects using pickle multiple times. (Issue [#450](https://github.com/drivendataorg/cloudpathlib/issues/450), PR [#454](https://github.com/drivendataorg/cloudpathlib/pull/454), thanks to [@kujenga](https://github.com/kujenga))
Expand Down
14 changes: 10 additions & 4 deletions cloudpathlib/azure/azblobclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,14 @@ def _list_dir(
) -> Iterable[Tuple[AzureBlobPath, bool]]:
if not cloud_path.container:
for container in self.service_client.list_containers():
yield self.CloudPath(f"az://{container.name}"), True
yield self.CloudPath(f"{cloud_path.cloud_prefix}{container.name}"), True

if not recursive:
continue

yield from self._list_dir(self.CloudPath(f"az://{container.name}"), recursive=True)
yield from self._list_dir(
self.CloudPath(f"{cloud_path.cloud_prefix}{container.name}"), recursive=True
)
return

container_client = self.service_client.get_container_client(cloud_path.container)
Expand All @@ -295,7 +297,9 @@ def _list_dir(
paths = file_system_client.get_paths(path=cloud_path.blob, recursive=recursive)

for path in paths:
yield self.CloudPath(f"az://{cloud_path.container}/{path.name}"), path.is_directory
yield self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.container}/{path.name}"
), path.is_directory

else:
if not recursive:
Expand All @@ -306,7 +310,9 @@ def _list_dir(
for blob in blobs:
# walk_blobs returns folders with a trailing slash
blob_path = blob.name.rstrip("/")
blob_cloud_path = self.CloudPath(f"az://{cloud_path.container}/{blob_path}")
blob_cloud_path = self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.container}/{blob_path}"
)

yield blob_cloud_path, (
isinstance(blob, BlobPrefix)
Expand Down
16 changes: 11 additions & 5 deletions cloudpathlib/gs/gsclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat
)

yield from (
(self.CloudPath(f"gs://{str(b)}"), True) for b in self.client.list_buckets()
(self.CloudPath(f"{cloud_path.cloud_prefix}{str(b)}"), True)
for b in self.client.list_buckets()
)
return

Expand All @@ -200,25 +201,30 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat
# if we haven't surfaced this directory already
if parent not in yielded_dirs and str(parent) != ".":
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{prefix}{parent}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{prefix}{parent}"
),
True, # is a directory
)
yielded_dirs.add(parent)
yield (self.CloudPath(f"gs://{cloud_path.bucket}/{o.name}"), False) # is a file
yield (
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{o.name}"),
False,
) # is a file
else:
iterator = bucket.list_blobs(delimiter="/", prefix=prefix)

# files must be iterated first for `.prefixes` to be populated:
# see: https://github.com/googleapis/python-storage/issues/863
for file in iterator:
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{file.name}"),
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{file.name}"),
False, # is a file
)

for directory in iterator.prefixes:
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{directory}"),
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{directory}"),
True, # is a directory
)

Expand Down
18 changes: 13 additions & 5 deletions cloudpathlib/s3/s3client.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
)

yield from (
(self.CloudPath(f"s3://{b['Name']}"), True)
(self.CloudPath(f"{cloud_path.cloud_prefix}{b['Name']}"), True)
for b in self.client.list_buckets().get("Buckets", [])
)
return
Expand All @@ -241,7 +241,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
canonical = result_prefix.get("Prefix").rstrip("/") # keep a canonical form
if canonical not in yielded_dirs:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{canonical}"
),
True,
)
yielded_dirs.add(canonical)
Expand All @@ -254,7 +256,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
parent_canonical = prefix + str(parent).rstrip("/")
if parent_canonical not in yielded_dirs and str(parent) != ".":
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{parent_canonical}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{parent_canonical}"
),
True,
)
yielded_dirs.add(parent_canonical)
Expand All @@ -267,15 +271,19 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
# s3 fake directories have 0 size and end with "/"
if result_key.get("Key").endswith("/") and result_key.get("Size") == 0:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{canonical}"
),
True,
)
yielded_dirs.add(canonical)

# yield object as file
else:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{result_key.get('Key')}"),
self.CloudPath(
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{result_key.get('Key')}"
),
False,
)

Expand Down
53 changes: 52 additions & 1 deletion tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import mimetypes
import os
from pathlib import Path
import random
import string
from pathlib import Path

import pytest

from cloudpathlib import CloudPath
from cloudpathlib.client import register_client_class
from cloudpathlib.cloudpath import implementation_registry, register_path_class
from cloudpathlib.s3.s3client import S3Client
from cloudpathlib.s3.s3path import S3Path


def test_default_client_instantiation(rig):
Expand Down Expand Up @@ -116,3 +121,49 @@ def my_content_type(path):

for suffix, content_type in mimes:
_test_write_content_type(suffix, content_type, rig)


@pytest.fixture
def custom_s3_path():
# A fixture isolates these classes as they modify the global registry of
# implementations.
@register_path_class("mys3")
class MyS3Path(S3Path):
cloud_prefix: str = "mys3://"

@register_client_class("mys3")
class MyS3Client(S3Client):
pass

yield (MyS3Path, MyS3Client)

# cleanup after use
implementation_registry.pop("mys3")


def test_custom_mys3path_instantiation(custom_s3_path):
CustomPath, _ = custom_s3_path

path = CustomPath("mys3://bucket/dir/file.txt")
assert isinstance(path, CustomPath)
assert path.cloud_prefix == "mys3://"
assert path.bucket == "bucket"
assert path.key == "dir/file.txt"


def test_custom_mys3client_instantiation(custom_s3_path):
_, CustomClient = custom_s3_path

client = CustomClient()
assert isinstance(client, CustomClient)
assert client.CloudPath("mys3://bucket/dir/file.txt").cloud_prefix == "mys3://"


def test_custom_mys3client_default_client(custom_s3_path):
_, CustomClient = custom_s3_path

CustomClient().set_as_default_client()

path = CloudPath("mys3://bucket/dir/file.txt")
assert isinstance(path.client, CustomClient)
assert path.cloud_prefix == "mys3://"

0 comments on commit 8207b3d

Please sign in to comment.