Skip to content

Commit

Permalink
apacheGH-39779: [Python] Expose force_virtual_addressing in PyArrow (a…
Browse files Browse the repository at this point in the history
…pache#39819)

### Rationale for this change / What changes are included in this PR?

To expose force_virtual_addressing in PyArrow.

### Are these changes tested?

Existing unit tests are not broken, and a new test case have been added.

### Are there any user-facing changes?

pyarrow.fs.S3FileSystem: it becomes possible to specify the argument 'force_virtual_addressing'.

* Closes: apache#39779

Authored-by: yo1956 <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
teihenn authored and thisisnic committed Mar 8, 2024
1 parent ec166c3 commit 6feecb5
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 1 deletion.
11 changes: 10 additions & 1 deletion python/pyarrow/_s3fs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ cdef class S3FileSystem(FileSystem):
retry_strategy : S3RetryStrategy, default AwsStandardS3RetryStrategy(max_attempts=3)
The retry strategy to use with S3; fail after max_attempts. Available
strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy.
force_virtual_addressing : bool, default False
Whether to use virtual addressing of buckets.
If true, then virtual addressing is always enabled.
If false, then virtual addressing is only enabled if `endpoint_override` is empty.
This can be used for non-AWS backends that only support virtual hosted-style access.
Examples
--------
Expand All @@ -268,7 +273,9 @@ cdef class S3FileSystem(FileSystem):
role_arn=None, session_name=None, external_id=None,
load_frequency=900, proxy_options=None,
allow_bucket_creation=False, allow_bucket_deletion=False,
retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3)):
retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(
max_attempts=3),
force_virtual_addressing=False):
cdef:
optional[CS3Options] options
shared_ptr[CS3FileSystem] wrapped
Expand Down Expand Up @@ -380,6 +387,7 @@ cdef class S3FileSystem(FileSystem):

options.value().allow_bucket_creation = allow_bucket_creation
options.value().allow_bucket_deletion = allow_bucket_deletion
options.value().force_virtual_addressing = force_virtual_addressing

if isinstance(retry_strategy, AwsStandardS3RetryStrategy):
options.value().retry_strategy = CS3RetryStrategy.GetAwsStandardRetryStrategy(
Expand Down Expand Up @@ -447,6 +455,7 @@ cdef class S3FileSystem(FileSystem):
opts.proxy_options.username),
'password': frombytes(
opts.proxy_options.password)},
force_virtual_addressing=opts.force_virtual_addressing,
),)
)

Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/includes/libarrow_fs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
c_bool background_writes
c_bool allow_bucket_creation
c_bool allow_bucket_deletion
c_bool force_virtual_addressing
shared_ptr[const CKeyValueMetadata] default_metadata
c_string role_arn
c_string session_name
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/tests/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,10 @@ def test_s3_options(pickle_module):
assert pickle_module.loads(pickle_module.dumps(fs2)) == fs2
assert fs2 != fs

fs = S3FileSystem(endpoint_override='localhost:8999', force_virtual_addressing=True)
assert isinstance(fs, S3FileSystem)
assert pickle_module.loads(pickle_module.dumps(fs)) == fs

with pytest.raises(ValueError):
S3FileSystem(access_key='access')
with pytest.raises(ValueError):
Expand Down

0 comments on commit 6feecb5

Please sign in to comment.