Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-38364: [Python] Initialize S3 on first use #38375

Merged
merged 4 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions python/pyarrow/_s3fs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ def finalize_s3():
check_status(CFinalizeS3())


def ensure_s3_finalized():
"""
Finalize S3 if already initialized
"""
check_status(CEnsureS3Finalized())


def resolve_s3_region(bucket):
"""
Resolve the S3 region of a bucket.
Expand All @@ -93,6 +100,8 @@ def resolve_s3_region(bucket):
c_string c_bucket
c_string c_region

ensure_s3_initialized()

c_bucket = tobytes(bucket)
with nogil:
c_region = GetResultValue(ResolveS3BucketRegion(c_bucket))
Expand Down Expand Up @@ -260,6 +269,8 @@ cdef class S3FileSystem(FileSystem):
load_frequency=900, proxy_options=None,
allow_bucket_creation=False, allow_bucket_deletion=False,
retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3)):
ensure_s3_initialized()

cdef:
optional[CS3Options] options
shared_ptr[CS3FileSystem] wrapped
Expand Down
9 changes: 6 additions & 3 deletions python/pyarrow/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,16 @@
from pyarrow._s3fs import ( # noqa
AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy,
S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized,
finalize_s3, initialize_s3, resolve_s3_region)
finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region)
except ImportError:
_not_imported.append("S3FileSystem")
else:
ensure_s3_initialized()
# GH-38364: we don't initialize S3 eagerly as that could lead
# to crashes at shutdown even when S3 isn't used.
# Instead, S3 is initialized lazily using `ensure_s3_initialized`
# in assorted places.
import atexit
pitrou marked this conversation as resolved.
Show resolved Hide resolved
atexit.register(finalize_s3)
pitrou marked this conversation as resolved.
Show resolved Hide resolved
atexit.register(ensure_s3_finalized)


def __getattr__(name):
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/includes/libarrow_fs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
const CS3GlobalOptions& options)
cdef CStatus CEnsureS3Initialized "arrow::fs::EnsureS3Initialized"()
cdef CStatus CFinalizeS3 "arrow::fs::FinalizeS3"()
cdef CStatus CEnsureS3Finalized "arrow::fs::EnsureS3Finalized"()

cdef CResult[c_string] ResolveS3BucketRegion(const c_string& bucket)

Expand Down