Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-43535: [C++] Support the AWS S3 SSE-C encryption #43601

Merged
merged 18 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions cpp/src/arrow/filesystem/s3_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,38 @@
#include <aws/core/client/RetryStrategy.h>
#include <aws/core/http/HttpTypes.h>
#include <aws/core/utils/DateTime.h>
#include <aws/core/utils/HashingUtils.h>
#include <aws/core/utils/StringUtils.h>

#include "arrow/filesystem/filesystem.h"
#include "arrow/filesystem/s3fs.h"
#include "arrow/status.h"
#include "arrow/util/base64.h"
#include "arrow/util/logging.h"
#include "arrow/util/print.h"
#include "arrow/util/string.h"

#ifndef ARROW_AWS_SDK_VERSION_CHECK
// AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7.
# if defined(AWS_SDK_VERSION_MAJOR) && defined(AWS_SDK_VERSION_MINOR) && \
defined(AWS_SDK_VERSION_PATCH)
// Redundant "(...)" are for suppressing "Weird number of spaces at
// line-start. Are you using a 2-space indent? [whitespace/indent]
// [3]" errors...
# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) \
((AWS_SDK_VERSION_MAJOR > (major) || \
(AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) || \
((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
AWS_SDK_VERSION_PATCH >= (patch)))))
# else
# define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
# endif
#endif // !ARROW_AWS_SDK_VERSION_CHECK

#if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 201)
# define ARROW_S3_HAS_SSE_CUSTOMER_KEY
#endif

namespace arrow {
namespace fs {
namespace internal {
Expand Down Expand Up @@ -291,6 +314,70 @@ class ConnectRetryStrategy : public Aws::Client::RetryStrategy {
int32_t max_retry_duration_;
};

/// \brief calculate the MD5 of the input SSE-C key (raw key, not base64 encoded)
/// \param sse_customer_key is the input SSE-C key
/// \return the base64 encoded MD5 for the input key
inline Result<std::string> CalculateSSECustomerKeyMD5(
const std::string& sse_customer_key) {
// The key needs to be 256 bits (32 bytes) according to
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html#specifying-s3-c-encryption
if (sse_customer_key.length() != 32) {
return Status::Invalid("32 bytes SSE-C key is expected");
}

// Convert the raw binary key to an Aws::String
Aws::String sse_customer_key_aws_string(sse_customer_key.data(),
sse_customer_key.length());

// Compute the MD5 hash of the raw binary key
Aws::Utils::ByteBuffer sse_customer_key_md5 =
Aws::Utils::HashingUtils::CalculateMD5(sse_customer_key_aws_string);

// Base64-encode the MD5 hash
return arrow::util::base64_encode(std::string_view(
reinterpret_cast<const char*>(sse_customer_key_md5.GetUnderlyingData()),
sse_customer_key_md5.GetLength()));
}

struct SSECustomerKeyHeaders {
std::string sse_customer_key;
std::string sse_customer_key_md5;
std::string sse_customer_algorithm;
};

inline Result<std::optional<SSECustomerKeyHeaders>> GetSSECustomerKeyHeaders(
const std::string& sse_customer_key) {
if (sse_customer_key.empty()) {
return std::nullopt;
}
#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY
ARROW_ASSIGN_OR_RAISE(auto md5, internal::CalculateSSECustomerKeyMD5(sse_customer_key));
return SSECustomerKeyHeaders{arrow::util::base64_encode(sse_customer_key), md5,
"AES256"};
#else
return Status::NotImplemented(
"SSE customer key not supported by this version of the AWS SDK");
#endif
}

template <typename S3RequestType>
Status SetSSECustomerKey(S3RequestType* request, const std::string& sse_customer_key) {
ARROW_ASSIGN_OR_RAISE(auto maybe_headers, GetSSECustomerKeyHeaders(sse_customer_key));
if (!maybe_headers.has_value()) {
return Status::OK();
}
#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY
auto headers = std::move(maybe_headers).value();
request->SetSSECustomerKey(headers.sse_customer_key);
request->SetSSECustomerKeyMD5(headers.sse_customer_key_md5);
request->SetSSECustomerAlgorithm(headers.sse_customer_algorithm);
return Status::OK();
#else
return Status::NotImplemented(
"SSE customer key not supported by this version of the AWS SDK");
#endif
}

} // namespace internal
} // namespace fs
} // namespace arrow
77 changes: 77 additions & 0 deletions cpp/src/arrow/filesystem/s3_test_cert_internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

namespace arrow::fs {
// The below two static strings are generated according to
// https://github.com/minio/minio/tree/RELEASE.2024-09-22T00-33-43Z/docs/tls#323-generate-a-self-signed-certificate
// `openssl req -new -x509 -nodes -days 36500 -keyout private.key -out public.crt -config
// openssl.conf`
static constexpr const char* kMinioPrivateKey = R"(-----BEGIN PRIVATE KEY-----
MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCqwKYHsTSciGqP
uU3qkTWpnXIi3iC0eeW7JSzJHGFs880WdR5JdK4WufPK+1xzgiYjMEPfAcuSWz3b
qYyCI61q+a9Iu2nj7cFTW9bfZrmWlnI0YOLJc+q0AAdAjF1lvRKenH8tbjz/2jyl
i/cYQ+I5Tg4nngrX8OmOfluNzwD/nwGLq6/DVbzDUdPI9q1XtVT/0Vf7qwbDG1HD
NkIzKT5B+YdSLaOCRYNK3x7RPsfazKIBrTmRy1v454wKe8TjTmTB7+m5wKqfCJcq
lI253WHcK0lsw6zCNtX/kahPAvm/8mniPolW4qxoD6xwebgMVkrNTs3ztcPIG9O4
pmCbATijAgMBAAECggEACL5swiAU7Z8etdVrZAOjl9f0LEzrp9JGLVst++50Hrwt
WGUO8/wBnjBPh6lvhoq3oT2rfBP/dLMva7w28cMZ8kxu6W6PcZiPOdGOI0qDXm69
0mjTtDU3Y5hMxsVpUvhnp6+j45Otk/x89o1ATgHL59tTZjv1mjFABIf78DsVdgF9
CMi2q6Lv7NLftieyWmz1K3p109z9+xkDNSOkVrv1JFChviKqWgIS0rdFjySvTgoy
rHYT+TweDliKJrZCeoUJmNB0uVW/dM9lXhcvkvkJZKPPurylx1oH5a7K/sWFPf7A
Ed1vjvZQFlaXu/bOUUSOZtkErAir/oCxrUDsHxGsAQKBgQDZghyy7jNGNdjZe1Xs
On1ZVgIS3Nt+OLGCVH7tTsfZsCOb+SkrhB1RQva3YzPMfgoZScI9+bN/pRVf49Pj
qGEHkW/wozutUve7UMzeTOm1aWxUuaKSrmYST7muvAnlYEtO7agd0wrcusYXlMoG
KQwghkufO9I7wXcrudMKXZalIwKBgQDI+FaUwhgfThkgq6bRbdMEeosgohrCM9Wm
E5JMePQq4VaGcgGveWUoNOgT8kvJa0qQwQOqLZj7kUIdj+SCRt0u+Wu3p5IMqdOq
6tMnLNQ3wzUC2KGFLSfISR3L/bo5Bo6Jqz4hVtjMk3PV9bu50MNTNaofYb2xlf/f
/WgiEG0WgQKBgAr8RVLMMQ7EvXUOg6Jwuc//Rg+J1BQl7OE2P0rhBbr66HGCPhAS
liB6j1dnzT/wxbXNQeA7clNqFRBIw3TmFjB5qfuvYt44KIbvZ8l6fPtKncwRrCJY
aJNYL3qhyKYrHOKZojoPZKcNT9/1BdcVz6T842jhbpbSCKDOu9f0Lh2dAoGATZeM
Hh0eISAPFY0QeDV1znnds3jC6g4HQ/q0dnAQnWmo9XmY6v3sr2xV2jWnSxnwjRjo
aFD4itBXfYBr0ly30wYbr6mz+s2q2oeVhL+LJAhrNDEdk4SOooaQSY0p1BCTAdYq
w8Z7J+kaRRZ+J0zRzROgHkOncKQgSYPWK6i55YECgYAC+ECrHhUlPsfusjKpFsEe
stW1HCt3wXtKQn6SJ6IAesbxwALZS6Da/ZC2x1mdBHS3GwWvtGLc0BPnPVfJjr9V
m82qkgJ+p5d7qp7pRA7SFD+5809yVqRnEF3rSLafgGet9ah0ZjZvQ3fwnYZNnNH9
t9pJcv2E5xY7/nFNIorpKg==
-----END PRIVATE KEY-----
)";

static constexpr const char* kMinioCert = R"(-----BEGIN CERTIFICATE-----
MIIDiTCCAnGgAwIBAgIUXbHZ6FAhKSXg4WSGUQySlSyE4U0wDQYJKoZIhvcNAQEL
BQAwXzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAlZBMQ4wDAYDVQQHDAVBcnJvdzEO
MAwGA1UECgwFQXJyb3cxDjAMBgNVBAsMBUFycm93MRMwEQYDVQQDDApBcnJyb3dU
ZXN0MB4XDTI0MDkyNDA5MzUxNloXDTM0MDkyMjA5MzUxNlowXzELMAkGA1UEBhMC
VVMxCzAJBgNVBAgMAlZBMQ4wDAYDVQQHDAVBcnJvdzEOMAwGA1UECgwFQXJyb3cx
DjAMBgNVBAsMBUFycm93MRMwEQYDVQQDDApBcnJyb3dUZXN0MIIBIjANBgkqhkiG
9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqsCmB7E0nIhqj7lN6pE1qZ1yIt4gtHnluyUs
yRxhbPPNFnUeSXSuFrnzyvtcc4ImIzBD3wHLkls926mMgiOtavmvSLtp4+3BU1vW
32a5lpZyNGDiyXPqtAAHQIxdZb0Snpx/LW48/9o8pYv3GEPiOU4OJ54K1/Dpjn5b
jc8A/58Bi6uvw1W8w1HTyPatV7VU/9FX+6sGwxtRwzZCMyk+QfmHUi2jgkWDSt8e
0T7H2syiAa05kctb+OeMCnvE405kwe/pucCqnwiXKpSNud1h3CtJbMOswjbV/5Go
TwL5v/Jp4j6JVuKsaA+scHm4DFZKzU7N87XDyBvTuKZgmwE4owIDAQABoz0wOzAa
BgNVHREEEzARhwR/AAABgglsb2NhbGhvc3QwHQYDVR0OBBYEFOUNqUSfROf1dz3o
hAVBhgd3UIvKMA0GCSqGSIb3DQEBCwUAA4IBAQBSwWJ2dSw3jlHU0l2V3ozqthTt
XFo07AyWGw8AWNCM6mQ+GKBf0JJ1d7e4lyTf2lCobknS94EgGPORWeiucKYAoCjS
dh1eKGsSevz1rNbp7wsO7DoiRPciK+S95DbsPowloGI6fvOeE12Cf1udeNIpEYWs
OBFwN0HxfYqdPALCtw7l0icpTrJ2Us06UfL9kbkdZwQhXvOscG7JDRtNjBxl9XNm
TFeMNKROmrEPCWaYr6MJ+ItHtb5Cawapea4THz9GCjR9eLq2CbMqLezZ8xBHPzc4
ixI2l0uCfg7ZUSA+90yaScc7bhEQ8CMiPtJgNKaKIqB58DpY7028xJpW7Ma2
-----END CERTIFICATE-----
)";
} // namespace arrow::fs
88 changes: 76 additions & 12 deletions cpp/src/arrow/filesystem/s3_test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# include <sys/wait.h>
#endif

#include "arrow/filesystem/s3_test_cert_internal.h"
#include "arrow/filesystem/s3_test_util.h"
#include "arrow/filesystem/s3fs.h"
#include "arrow/testing/process.h"
Expand All @@ -31,6 +32,11 @@
namespace arrow {
namespace fs {

using ::arrow::internal::FileClose;
using ::arrow::internal::FileDescriptor;
using ::arrow::internal::FileOpenWritable;
using ::arrow::internal::FileWrite;
using ::arrow::internal::PlatformFilename;
using ::arrow::internal::TemporaryDir;

namespace {
Expand All @@ -44,16 +50,16 @@ const char* kEnvConnectString = "ARROW_TEST_S3_CONNECT_STRING";
const char* kEnvAccessKey = "ARROW_TEST_S3_ACCESS_KEY";
const char* kEnvSecretKey = "ARROW_TEST_S3_SECRET_KEY";

std::string GenerateConnectString() { return GetListenAddress(); }

} // namespace

struct MinioTestServer::Impl {
std::unique_ptr<TemporaryDir> temp_dir_;
std::unique_ptr<TemporaryDir> temp_dir_ca_;
std::string connect_string_;
std::string access_key_ = kMinioAccessKey;
std::string secret_key_ = kMinioSecretKey;
std::unique_ptr<util::Process> server_process_;
std::string scheme_ = "http";
};

MinioTestServer::MinioTestServer() : impl_(new Impl) {}
Expand All @@ -69,7 +75,45 @@ std::string MinioTestServer::access_key() const { return impl_->access_key_; }

std::string MinioTestServer::secret_key() const { return impl_->secret_key_; }

Status MinioTestServer::Start() {
std::string MinioTestServer::ca_dir_path() const {
return impl_->temp_dir_ca_->path().ToString();
}

std::string MinioTestServer::ca_file_path() const {
return impl_->temp_dir_ca_->path().ToString() + "/public.crt";
}

std::string MinioTestServer::scheme() const { return impl_->scheme_; }

Status MinioTestServer::GenerateCertificateFile() {
// create the dedicated folder for certificate file, rather than reuse the data
// folder, since there is test case to check whether the folder is empty.
ARROW_ASSIGN_OR_RAISE(impl_->temp_dir_ca_, TemporaryDir::Make("s3fs-test-ca-"));

ARROW_ASSIGN_OR_RAISE(auto public_crt_file,
PlatformFilename::FromString(ca_dir_path() + "/public.crt"));
ARROW_ASSIGN_OR_RAISE(auto public_cert_fd, FileOpenWritable(public_crt_file));
ARROW_RETURN_NOT_OK(FileWrite(public_cert_fd.fd(),
reinterpret_cast<const uint8_t*>(kMinioCert),
strlen(kMinioCert)));
ARROW_RETURN_NOT_OK(public_cert_fd.Close());

ARROW_ASSIGN_OR_RAISE(auto private_key_file,
PlatformFilename::FromString(ca_dir_path() + "/private.key"));
ARROW_ASSIGN_OR_RAISE(auto private_key_fd, FileOpenWritable(private_key_file));
ARROW_RETURN_NOT_OK(FileWrite(private_key_fd.fd(),
reinterpret_cast<const uint8_t*>(kMinioPrivateKey),
strlen(kMinioPrivateKey)));
ARROW_RETURN_NOT_OK(private_key_fd.Close());

arrow::fs::FileSystemGlobalOptions global_options;
global_options.tls_ca_file_path = ca_file_path();
ARROW_RETURN_NOT_OK(arrow::fs::Initialize(global_options));

return Status::OK();
}

Status MinioTestServer::Start(bool enable_tls) {
const char* connect_str = std::getenv(kEnvConnectString);
const char* access_key = std::getenv(kEnvAccessKey);
const char* secret_key = std::getenv(kEnvSecretKey);
Expand All @@ -88,12 +132,27 @@ Status MinioTestServer::Start() {
impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey);
// Disable the embedded console (one less listening address to care about)
impl_->server_process_->SetEnv("MINIO_BROWSER", "off");
impl_->connect_string_ = GenerateConnectString();
ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
// NOTE: --quiet makes startup faster by suppressing remote version check
impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--address",
impl_->connect_string_,
impl_->temp_dir_->path().ToString()});
std::vector<std::string> minio_args({"server", "--quiet", "--compat"});
if (enable_tls) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can always enable TLS.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, removed the enable_tls flag and always enable the TLS

ARROW_RETURN_NOT_OK(GenerateCertificateFile());
minio_args.emplace_back("--certs-dir");
minio_args.emplace_back(ca_dir_path());
impl_->scheme_ = "https";
// With TLS enabled, we need the connection hostname to match the certificate's
// subject name. This also constrains the actual listening IP address.
impl_->connect_string_ = GetListenAddress("localhost");
} else {
// Without TLS enabled, we want to minimize the likelihood of address collisions
// by varying the listening IP address (note that most tests don't enable TLS).
impl_->connect_string_ = GetListenAddress();
}
minio_args.emplace_back("--address");
minio_args.emplace_back(impl_->connect_string_);
minio_args.emplace_back(impl_->temp_dir_->path().ToString());

ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
impl_->server_process_->SetArgs(minio_args);
ARROW_RETURN_NOT_OK(impl_->server_process_->Execute());
return Status::OK();
}
Expand All @@ -105,24 +164,29 @@ Status MinioTestServer::Stop() {

struct MinioTestEnvironment::Impl {
std::function<Future<std::shared_ptr<MinioTestServer>>()> server_generator_;
bool enable_tls_;

explicit Impl(bool enable_tls) : enable_tls_(enable_tls) {}

Result<std::shared_ptr<MinioTestServer>> LaunchOneServer() {
auto server = std::make_shared<MinioTestServer>();
RETURN_NOT_OK(server->Start());
RETURN_NOT_OK(server->Start(enable_tls_));
return server;
}
};

MinioTestEnvironment::MinioTestEnvironment() : impl_(new Impl) {}
MinioTestEnvironment::MinioTestEnvironment(bool enable_tls)
: impl_(new Impl(enable_tls)) {}

MinioTestEnvironment::~MinioTestEnvironment() = default;

void MinioTestEnvironment::SetUp() {
auto pool = ::arrow::internal::GetCpuThreadPool();

auto launch_one_server = []() -> Result<std::shared_ptr<MinioTestServer>> {
auto launch_one_server =
[enable_tls = impl_->enable_tls_]() -> Result<std::shared_ptr<MinioTestServer>> {
auto server = std::make_shared<MinioTestServer>();
RETURN_NOT_OK(server->Start());
RETURN_NOT_OK(server->Start(enable_tls));
return server;
};
impl_->server_generator_ = [pool, launch_one_server]() {
Expand Down
11 changes: 9 additions & 2 deletions cpp/src/arrow/filesystem/s3_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class MinioTestServer {
MinioTestServer();
~MinioTestServer();

Status Start();
Status Start(bool enable_tls = false);

Status Stop();

Expand All @@ -50,7 +50,14 @@ class MinioTestServer {

std::string secret_key() const;

std::string ca_dir_path() const;

std::string ca_file_path() const;

std::string scheme() const;

private:
Status GenerateCertificateFile();
struct Impl;
std::unique_ptr<Impl> impl_;
};
Expand All @@ -60,7 +67,7 @@ class MinioTestServer {

class MinioTestEnvironment : public ::testing::Environment {
public:
MinioTestEnvironment();
explicit MinioTestEnvironment(bool enable_tls = false);
~MinioTestEnvironment();

void SetUp() override;
Expand Down
Loading
Loading