Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
madsbk committed Oct 9, 2024
1 parent ae033ed commit 3db4d7e
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 31 deletions.
14 changes: 7 additions & 7 deletions python/kvikio/kvikio/_lib/remote_handle.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,15 @@ cdef class RemoteFile:
return ret

@classmethod
def open_s3_from_http_url(
def open_s3(
cls,
url: str,
bucket_name: str,
object_name: str,
nbytes: Optional[int],
):
cdef RemoteFile ret = RemoteFile()
cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint](
_to_string(url)
_to_string(bucket_name), _to_string(object_name)
)
if nbytes is None:
ret._handle = make_unique[cpp_RemoteHandle](move(ep))
Expand All @@ -94,15 +95,14 @@ cdef class RemoteFile:
return ret

@classmethod
def open_s3(
def open_s3_from_http_url(
cls,
bucket_name: str,
object_name: str,
url: str,
nbytes: Optional[int],
):
cdef RemoteFile ret = RemoteFile()
cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint](
_to_string(bucket_name), _to_string(object_name)
_to_string(url)
)
if nbytes is None:
ret._handle = make_unique[cpp_RemoteHandle](move(ep))
Expand Down
2 changes: 1 addition & 1 deletion python/kvikio/kvikio/benchmarks/s3_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def run_numpy_like(args, xp):

def run() -> float:
t0 = time.perf_counter()
with kvikio.RemoteFile.open_s3_from_http_url(url) as f:
with kvikio.RemoteFile.open_s3_url(url) as f:
res = f.read(recv)
t1 = time.perf_counter()
assert res == args.nbytes, f"IO mismatch, expected {args.nbytes} got {res}"
Expand Down
71 changes: 49 additions & 22 deletions python/kvikio/kvikio/remote_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,26 @@ def open_s3(
object_name: str,
nbytes: Optional[int] = None,
) -> RemoteFile:
"""Open a AWS S3 file from a bucket name and object name.
Please make sure to set the AWS environment variables:
- `AWS_DEFAULT_REGION`
- `AWS_ACCESS_KEY_ID`
- `AWS_SECRET_ACCESS_KEY`
Additionally, to overwrite the AWS endpoint, set `AWS_ENDPOINT_URL`.
See <https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-envvars.html>
Parameters
----------
bucket_name
The bucket name of the file.
object_name
The object name of the file.
nbytes
The size of the file. If None, KvikIO will ask the server
for the file size.
"""
return RemoteFile(
_get_remote_module().RemoteFile.open_s3(bucket_name, object_name, nbytes)
)
Expand All @@ -85,33 +105,40 @@ def open_s3_url(
url: str,
nbytes: Optional[int] = None,
) -> RemoteFile:
"""Open a AWS S3 file from an URL.
The `url` can take two forms:
- A full http url such as "http://127.0.0.1/my/file", or
- A S3 url such as "s3://<bucket>/<object>".
Please make sure to set the AWS environment variables:
- `AWS_DEFAULT_REGION`
- `AWS_ACCESS_KEY_ID`
- `AWS_SECRET_ACCESS_KEY`
Additionally, if `url` is a S3 url, it is possible to overwrite the AWS endpoint
by setting `AWS_ENDPOINT_URL`.
See <https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-envvars.html>
Parameters
----------
url
Either a http url or a S3 url.
nbytes
The size of the file. If None, KvikIO will ask the server
for the file size.
"""
url = url.lower()
if url.startswith("http://") or url.startswith("https://"):
return cls.open_s3_from_http_url(url, nbytes)
return RemoteFile(
_get_remote_module().RemoteFile.open_s3_from_http_url(url, nbytes)
)
if url.startswith("s://"):
return cls.open_s3_from_s3_url(url, nbytes)
return RemoteFile(
_get_remote_module().RemoteFile.open_s3_from_s3_url(url, nbytes)
)
raise ValueError(f"Unsupported protocol in url: {url}")

@classmethod
def open_s3_from_http_url(
cls,
url: str,
nbytes: Optional[int] = None,
) -> RemoteFile:
return RemoteFile(
_get_remote_module().RemoteFile.open_s3_from_http_url(url, nbytes)
)

@classmethod
def open_s3_from_s3_url(
cls,
url: str,
nbytes: Optional[int] = None,
) -> RemoteFile:
return RemoteFile(
_get_remote_module().RemoteFile.open_s3_from_s3_url(url, nbytes)
)

def close(self) -> None:
"""Close the file"""
pass
Expand Down
2 changes: 1 addition & 1 deletion python/kvikio/tests/test_s3_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def test_read_with_file_offset(s3_base, xp, start, end):
s3_base=s3_base, bucket=bucket_name, files={object_name: bytes(a)}
) as server_address:
url = f"{server_address}/{bucket_name}/{object_name}"
with kvikio.RemoteFile.open_s3_from_http_url(url) as f:
with kvikio.RemoteFile.open_s3_url(url) as f:
b = xp.zeros(shape=(end - start,), dtype=xp.int64)
assert f.read(b, file_offset=start * a.itemsize) == b.nbytes
xp.testing.assert_array_equal(a[start:end], b)

0 comments on commit 3db4d7e

Please sign in to comment.