Skip to content

Commit

Permalink
apacheGH-39292 [C++][FS]: Remove the AzureBackend enum and add more f…
Browse files Browse the repository at this point in the history
…lexible connection options (apache#39293)

### Rationale for this change

It's good to avoid mentioning the specific test environment in the implementation code.

### What changes are included in this PR?

 - Removal of the enum
 - Removal of the `AzureOptions::backend` class member
 - Addition of more options to `AzureOptions`
 - Removal of some private string members of `AzureOptions` -- the URLs are built on-the-fly when the clients are instantiated now

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Changes to the public interface (`azurefs.h`) that won't affect users because the `AzureFS` implementation is not used yet.
* Closes: apache#39292

Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
  • Loading branch information
felipecrv authored and clayburn committed Jan 23, 2024
1 parent 17ab4ff commit 4e0483e
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 42 deletions.
61 changes: 40 additions & 21 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,12 @@ AzureOptions::~AzureOptions() = default;

bool AzureOptions::Equals(const AzureOptions& other) const {
// TODO(GH-38598): update here when more auth methods are added.
const bool equals = backend == other.backend &&
const bool equals = blob_storage_authority == other.blob_storage_authority &&
dfs_storage_authority == other.dfs_storage_authority &&
blob_storage_scheme == other.blob_storage_scheme &&
dfs_storage_scheme == other.dfs_storage_scheme &&
default_metadata == other.default_metadata &&
account_blob_url_ == other.account_blob_url_ &&
account_dfs_url_ == other.account_dfs_url_ &&
account_name_ == other.account_name_ &&
credential_kind_ == other.credential_kind_;
if (!equals) {
return false;
Expand All @@ -72,42 +74,59 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
return false;
}

void AzureOptions::SetUrlsForAccountName(const std::string& account_name) {
if (this->backend == AzureBackend::kAzurite) {
account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
} else {
account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
namespace {
std::string BuildBaseUrl(const std::string& scheme, const std::string& authority,
const std::string& account_name) {
std::string url;
url += scheme + "://";
if (!authority.empty()) {
if (authority[0] == '.') {
url += account_name;
url += authority;
} else {
url += authority;
url += "/";
url += account_name;
}
}
url += "/";
return url;
}
} // namespace

Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
AzureOptions::SetUrlsForAccountName(account_name);
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
std::string AzureOptions::AccountBlobUrl(const std::string& account_name) const {
return BuildBaseUrl(blob_storage_scheme, blob_storage_authority, account_name);
}

std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
}

Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
const std::string& account_key) {
AzureOptions::SetUrlsForAccountName(account_name);
credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
account_name_ = account_name;
storage_shared_key_credential_ =
std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
return Status::OK();
}

Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
}

Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
const {
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
Expand All @@ -119,11 +138,11 @@ AzureOptions::MakeDataLakeServiceClient() const {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(account_dfs_url_,
token_credential_);
return std::make_unique<DataLake::DataLakeServiceClient>(
AccountDfsUrl(account_name_), token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
account_dfs_url_, storage_shared_key_credential_);
AccountDfsUrl(account_name_), storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
}
Expand Down
51 changes: 33 additions & 18 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,37 @@ class DataLakeServiceClient;

namespace arrow::fs {

enum class AzureBackend {
/// \brief Official Azure Remote Backend
kAzure,
/// \brief Local Simulated Storage
kAzurite
};

/// Options for the AzureFileSystem implementation.
struct ARROW_EXPORT AzureOptions {
/// \brief The backend to connect to: Azure or Azurite (for testing).
AzureBackend backend = AzureBackend::kAzure;
/// \brief hostname[:port] of the Azure Blob Storage Service.
///
/// If the hostname is a relative domain name (one that starts with a '.'), then storage
/// account URLs will be constructed by prepending the account name to the hostname.
/// If the hostname is a fully qualified domain name, then the hostname will be used
/// as-is and the account name will follow the hostname in the URL path.
///
/// Default: ".blob.core.windows.net"
std::string blob_storage_authority = ".blob.core.windows.net";

/// \brief hostname[:port] of the Azure Data Lake Storage Gen 2 Service.
///
/// If the hostname is a relative domain name (one that starts with a '.'), then storage
/// account URLs will be constructed by prepending the account name to the hostname.
/// If the hostname is a fully qualified domain name, then the hostname will be used
/// as-is and the account name will follow the hostname in the URL path.
///
/// Default: ".dfs.core.windows.net"
std::string dfs_storage_authority = ".dfs.core.windows.net";

/// \brief Azure Blob Storage connection transport.
///
/// Default: "https"
std::string blob_storage_scheme = "https";

/// \brief Azure Data Lake Storage Gen 2 connection transport.
///
/// Default: "https"
std::string dfs_storage_scheme = "https";

// TODO(GH-38598): Add support for more auth methods.
// std::string connection_string;
Expand All @@ -65,22 +85,17 @@ struct ARROW_EXPORT AzureOptions {
std::shared_ptr<const KeyValueMetadata> default_metadata;

private:
std::string account_blob_url_;
std::string account_dfs_url_;

enum class CredentialKind {
kAnonymous,
kTokenCredential,
kStorageSharedKeyCredential,
} credential_kind_ = CredentialKind::kAnonymous;

std::string account_name_;
std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;

std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;

void SetUrlsForAccountName(const std::string& account_name);

public:
AzureOptions();
~AzureOptions();
Expand All @@ -92,8 +107,8 @@ struct ARROW_EXPORT AzureOptions {

bool Equals(const AzureOptions& other) const;

const std::string& AccountBlobUrl() const { return account_blob_url_; }
const std::string& AccountDfsUrl() const { return account_dfs_url_; }
std::string AccountBlobUrl(const std::string& account_name) const;
std::string AccountDfsUrl(const std::string& account_name) const;

Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
MakeBlobServiceClient() const;
Expand Down
21 changes: 18 additions & 3 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ namespace Blobs = Azure::Storage::Blobs;
namespace Core = Azure::Core;
namespace DataLake = Azure::Storage::Files::DataLake;

enum class AzureBackend {
/// \brief Official Azure Remote Backend
kAzure,
/// \brief Local Simulated Storage
kAzurite
};

class BaseAzureEnv : public ::testing::Environment {
protected:
std::string account_name_;
Expand Down Expand Up @@ -265,8 +272,6 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {

TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
AzureOptions options;
options.backend = AzureBackend::kAzurite; // Irrelevant for this test because it
// doesn't connect to the server.
ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
EXPECT_OK_AND_ASSIGN(auto default_credential_fs, AzureFileSystem::Make(options));
}
Expand Down Expand Up @@ -352,7 +357,17 @@ class TestAzureFileSystem : public ::testing::Test {

static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
AzureOptions options;
options.backend = env->backend();
switch (env->backend()) {
case AzureBackend::kAzurite:
options.blob_storage_authority = "127.0.0.1:10000";
options.dfs_storage_authority = "127.0.0.1:10000";
options.blob_storage_scheme = "http";
options.dfs_storage_scheme = "http";
break;
case AzureBackend::kAzure:
// Use the default values
break;
}
ARROW_EXPECT_OK(
options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
return options;
Expand Down

0 comments on commit 4e0483e

Please sign in to comment.