Skip to content

Commit

Permalink
ARROW-16521 [C++][Python] Configure curl timeout policy for S3 (#13385)
Browse files Browse the repository at this point in the history
added timeout options to c++ and python s3fs

Lead-authored-by: Ziheng Wang <[email protected]>
Co-authored-by: Ziheng Wang <[email protected]>
Co-authored-by: Ziheng Wang <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
3 people authored Aug 4, 2022
1 parent ee874d6 commit 669009e
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 8 deletions.
17 changes: 13 additions & 4 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,11 @@ bool S3Options::Equals(const S3Options& other) const {
default_metadata_size
? (other.default_metadata && other.default_metadata->Equals(*default_metadata))
: (!other.default_metadata || other.default_metadata->size() == 0);
return (region == other.region && endpoint_override == other.endpoint_override &&
scheme == other.scheme && role_arn == other.role_arn &&
session_name == other.session_name && external_id == other.external_id &&
load_frequency == other.load_frequency &&
return (region == other.region && connect_timeout == other.connect_timeout &&
request_timeout == other.request_timeout &&
endpoint_override == other.endpoint_override && scheme == other.scheme &&
role_arn == other.role_arn && session_name == other.session_name &&
external_id == other.external_id && load_frequency == other.load_frequency &&
proxy_options.Equals(other.proxy_options) &&
credentials_kind == other.credentials_kind &&
background_writes == other.background_writes &&
Expand Down Expand Up @@ -718,6 +719,14 @@ class ClientBuilder {
if (!options_.region.empty()) {
client_config_.region = ToAwsString(options_.region);
}
if (options_.request_timeout > 0) {
// Use ceil() to avoid setting it to 0 as that probably means no timeout.
client_config_.requestTimeoutMs = ceil(options_.request_timeout * 1000);
}
if (options_.connect_timeout > 0) {
client_config_.connectTimeoutMs = ceil(options_.connect_timeout * 1000);
}

client_config_.endpointOverride = ToAwsString(options_.endpoint_override);
if (options_.scheme == "http") {
client_config_.scheme = Aws::Http::Scheme::HTTP;
Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/filesystem/s3fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,17 @@ struct ARROW_EXPORT S3Options {
/// server).
std::string region;

/// \brief Socket connection timeout, in seconds
///
/// If negative, the AWS SDK default value is used (typically 1 second).
double connect_timeout = -1;

/// \brief Socket read timeout on Windows and macOS, in seconds
///
/// If negative, the AWS SDK default value is used (typically 3 seconds).
/// This option is ignored on non-Windows, non-macOS systems.
double request_timeout = -1;

/// If non-empty, override region with a connect string such as "localhost:9000"
// XXX perhaps instead take a URL like "http://localhost:9000"?
std::string endpoint_override;
Expand Down
22 changes: 18 additions & 4 deletions python/pyarrow/_s3fs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ cdef class S3FileSystem(FileSystem):
assumed role session will be refreshed.
region : str, default 'us-east-1'
AWS region to connect to.
request_timeout : double, default None
Socket read timeouts on Windows and macOS, in seconds.
If omitted, the AWS SDK default value is used (typically 3 seconds).
This option is ignored on non-Windows, non-macOS systems.
connect_timeout : double, default None
Socket connection timeout, in seconds.
If omitted, the AWS SDK default value is used (typically 1 second).
scheme : str, default 'https'
S3 connection transport scheme.
endpoint_override : str, default None
Expand Down Expand Up @@ -183,10 +190,11 @@ cdef class S3FileSystem(FileSystem):
CS3FileSystem* s3fs

def __init__(self, *, access_key=None, secret_key=None, session_token=None,
bint anonymous=False, region=None, scheme=None,
endpoint_override=None, bint background_writes=True,
default_metadata=None, role_arn=None, session_name=None,
external_id=None, load_frequency=900, proxy_options=None,
bint anonymous=False, region=None, request_timeout=None,
connect_timeout=None, scheme=None, endpoint_override=None,
bint background_writes=True, default_metadata=None,
role_arn=None, session_name=None, external_id=None,
load_frequency=900, proxy_options=None,
allow_bucket_creation=False, allow_bucket_deletion=False):
cdef:
CS3Options options
Expand Down Expand Up @@ -254,6 +262,10 @@ cdef class S3FileSystem(FileSystem):

if region is not None:
options.region = tobytes(region)
if request_timeout is not None:
options.request_timeout = request_timeout
if connect_timeout is not None:
options.connect_timeout = connect_timeout
if scheme is not None:
options.scheme = tobytes(scheme)
if endpoint_override is not None:
Expand Down Expand Up @@ -324,6 +336,8 @@ cdef class S3FileSystem(FileSystem):
CS3CredentialsKind_Anonymous),
region=frombytes(opts.region),
scheme=frombytes(opts.scheme),
connect_timeout=opts.connect_timeout,
request_timeout=opts.request_timeout,
endpoint_override=frombytes(opts.endpoint_override),
role_arn=frombytes(opts.role_arn),
session_name=frombytes(opts.session_name),
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/includes/libarrow_fs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:

cdef cppclass CS3Options "arrow::fs::S3Options":
c_string region
double connect_timeout
double request_timeout
c_string endpoint_override
c_string scheme
c_bool background_writes
Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/tests/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,15 @@ def test_s3_options():
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs

fs = S3FileSystem(request_timeout=0.5, connect_timeout=0.25)
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs

fs2 = S3FileSystem(request_timeout=0.25, connect_timeout=0.5)
assert isinstance(fs2, S3FileSystem)
assert pickle.loads(pickle.dumps(fs2)) == fs2
assert fs2 != fs

with pytest.raises(ValueError):
S3FileSystem(access_key='access')
with pytest.raises(ValueError):
Expand Down

0 comments on commit 669009e

Please sign in to comment.