Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-16521 [C++][Python] Configure curl timeout policy for S3 #13385

Merged
merged 10 commits into from
Aug 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,11 @@ bool S3Options::Equals(const S3Options& other) const {
default_metadata_size
? (other.default_metadata && other.default_metadata->Equals(*default_metadata))
: (!other.default_metadata || other.default_metadata->size() == 0);
return (region == other.region && endpoint_override == other.endpoint_override &&
scheme == other.scheme && role_arn == other.role_arn &&
session_name == other.session_name && external_id == other.external_id &&
load_frequency == other.load_frequency &&
return (region == other.region && connect_timeout == other.connect_timeout &&
request_timeout == other.request_timeout &&
endpoint_override == other.endpoint_override && scheme == other.scheme &&
role_arn == other.role_arn && session_name == other.session_name &&
external_id == other.external_id && load_frequency == other.load_frequency &&
proxy_options.Equals(other.proxy_options) &&
credentials_kind == other.credentials_kind &&
background_writes == other.background_writes &&
Expand Down Expand Up @@ -718,6 +719,14 @@ class ClientBuilder {
if (!options_.region.empty()) {
client_config_.region = ToAwsString(options_.region);
}
if (options_.request_timeout > 0) {
// Use ceil() to avoid setting it to 0 as that probably means no timeout.
client_config_.requestTimeoutMs = ceil(options_.request_timeout * 1000);
}
if (options_.connect_timeout > 0) {
client_config_.connectTimeoutMs = ceil(options_.connect_timeout * 1000);
}

client_config_.endpointOverride = ToAwsString(options_.endpoint_override);
if (options_.scheme == "http") {
client_config_.scheme = Aws::Http::Scheme::HTTP;
Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/filesystem/s3fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,17 @@ struct ARROW_EXPORT S3Options {
/// server).
std::string region;

/// \brief Socket connection timeout, in seconds
///
/// If negative, the AWS SDK default value is used (typically 1 second).
double connect_timeout = -1;

/// \brief Socket read timeout on Windows and macOS, in seconds
///
/// If negative, the AWS SDK default value is used (typically 3 seconds).
/// This option is ignored on non-Windows, non-macOS systems.
double request_timeout = -1;

/// If non-empty, override region with a connect string such as "localhost:9000"
// XXX perhaps instead take a URL like "http://localhost:9000"?
std::string endpoint_override;
Expand Down
22 changes: 18 additions & 4 deletions python/pyarrow/_s3fs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ cdef class S3FileSystem(FileSystem):
assumed role session will be refreshed.
region : str, default 'us-east-1'
AWS region to connect to.
request_timeout : double, default None
Socket read timeouts on Windows and macOS, in seconds.
If omitted, the AWS SDK default value is used (typically 3 seconds).
This option is ignored on non-Windows, non-macOS systems.
connect_timeout : double, default None
Socket connection timeout, in seconds.
If omitted, the AWS SDK default value is used (typically 1 second).
scheme : str, default 'https'
S3 connection transport scheme.
endpoint_override : str, default None
Expand Down Expand Up @@ -183,10 +190,11 @@ cdef class S3FileSystem(FileSystem):
CS3FileSystem* s3fs

def __init__(self, *, access_key=None, secret_key=None, session_token=None,
bint anonymous=False, region=None, scheme=None,
endpoint_override=None, bint background_writes=True,
default_metadata=None, role_arn=None, session_name=None,
external_id=None, load_frequency=900, proxy_options=None,
bint anonymous=False, region=None, request_timeout=None,
connect_timeout=None, scheme=None, endpoint_override=None,
bint background_writes=True, default_metadata=None,
role_arn=None, session_name=None, external_id=None,
load_frequency=900, proxy_options=None,
allow_bucket_creation=False, allow_bucket_deletion=False):
cdef:
CS3Options options
Expand Down Expand Up @@ -254,6 +262,10 @@ cdef class S3FileSystem(FileSystem):

if region is not None:
options.region = tobytes(region)
if request_timeout is not None:
options.request_timeout = request_timeout
if connect_timeout is not None:
options.connect_timeout = connect_timeout
if scheme is not None:
options.scheme = tobytes(scheme)
if endpoint_override is not None:
Expand Down Expand Up @@ -324,6 +336,8 @@ cdef class S3FileSystem(FileSystem):
CS3CredentialsKind_Anonymous),
region=frombytes(opts.region),
scheme=frombytes(opts.scheme),
connect_timeout=opts.connect_timeout,
request_timeout=opts.request_timeout,
endpoint_override=frombytes(opts.endpoint_override),
role_arn=frombytes(opts.role_arn),
session_name=frombytes(opts.session_name),
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/includes/libarrow_fs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:

cdef cppclass CS3Options "arrow::fs::S3Options":
c_string region
double connect_timeout
double request_timeout
c_string endpoint_override
c_string scheme
c_bool background_writes
Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/tests/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,15 @@ def test_s3_options():
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs

fs = S3FileSystem(request_timeout=0.5, connect_timeout=0.25)
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs

fs2 = S3FileSystem(request_timeout=0.25, connect_timeout=0.5)
assert isinstance(fs2, S3FileSystem)
assert pickle.loads(pickle.dumps(fs2)) == fs2
assert fs2 != fs

with pytest.raises(ValueError):
S3FileSystem(access_key='access')
with pytest.raises(ValueError):
Expand Down