From 3d5a4e248adf24f0f7c3ebfc05c5a63ad30e59ef Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 15 Dec 2023 18:12:46 -0300 Subject: [PATCH] Remove the AzureBackend enum and add more flexible connection options to AzureOptions --- cpp/src/arrow/filesystem/azurefs.cc | 61 ++++++++++++++++-------- cpp/src/arrow/filesystem/azurefs.h | 51 +++++++++++++------- cpp/src/arrow/filesystem/azurefs_test.cc | 21 ++++++-- 3 files changed, 91 insertions(+), 42 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index dd267aac36d35..1aa3e86a6f926 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -51,10 +51,12 @@ AzureOptions::~AzureOptions() = default; bool AzureOptions::Equals(const AzureOptions& other) const { // TODO(GH-38598): update here when more auth methods are added. - const bool equals = backend == other.backend && + const bool equals = blob_storage_authority == other.blob_storage_authority && + dfs_storage_authority == other.dfs_storage_authority && + blob_storage_scheme == other.blob_storage_scheme && + dfs_storage_scheme == other.dfs_storage_scheme && default_metadata == other.default_metadata && - account_blob_url_ == other.account_blob_url_ && - account_dfs_url_ == other.account_dfs_url_ && + account_name_ == other.account_name_ && credential_kind_ == other.credential_kind_; if (!equals) { return false; @@ -72,42 +74,59 @@ bool AzureOptions::Equals(const AzureOptions& other) const { return false; } -void AzureOptions::SetUrlsForAccountName(const std::string& account_name) { - if (this->backend == AzureBackend::kAzurite) { - account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/"; - account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/"; - } else { - account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/"; - account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/"; +namespace { +std::string BuildBaseUrl(const std::string& scheme, const std::string& authority, + const std::string& account_name) { + std::string url; + url += scheme + "://"; + if (!authority.empty()) { + if (authority[0] == '.') { + url += account_name; + url += authority; + } else { + url += authority; + url += "/"; + url += account_name; + } } + url += "/"; + return url; } +} // namespace -Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) { - AzureOptions::SetUrlsForAccountName(account_name); - credential_kind_ = CredentialKind::kTokenCredential; - token_credential_ = std::make_shared(); - return Status::OK(); +std::string AzureOptions::AccountBlobUrl(const std::string& account_name) const { + return BuildBaseUrl(blob_storage_scheme, blob_storage_authority, account_name); +} + +std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const { + return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name); } Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name, const std::string& account_key) { - AzureOptions::SetUrlsForAccountName(account_name); credential_kind_ = CredentialKind::kStorageSharedKeyCredential; + account_name_ = account_name; storage_shared_key_credential_ = std::make_shared(account_name, account_key); return Status::OK(); } +Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) { + credential_kind_ = CredentialKind::kTokenCredential; + token_credential_ = std::make_shared(); + return Status::OK(); +} + Result> AzureOptions::MakeBlobServiceClient() const { switch (credential_kind_) { case CredentialKind::kAnonymous: break; case CredentialKind::kTokenCredential: - return std::make_unique(account_blob_url_, + return std::make_unique(AccountBlobUrl(account_name_), token_credential_); case CredentialKind::kStorageSharedKeyCredential: - return std::make_unique(account_blob_url_, + return std::make_unique(AccountBlobUrl(account_name_), storage_shared_key_credential_); } return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); @@ -119,11 +138,11 @@ AzureOptions::MakeDataLakeServiceClient() const { case CredentialKind::kAnonymous: break; case CredentialKind::kTokenCredential: - return std::make_unique(account_dfs_url_, - token_credential_); + return std::make_unique( + AccountDfsUrl(account_name_), token_credential_); case CredentialKind::kStorageSharedKeyCredential: return std::make_unique( - account_dfs_url_, storage_shared_key_credential_); + AccountDfsUrl(account_name_), storage_shared_key_credential_); } return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); } diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index b2c7010ff3758..35c140b1097c7 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -43,17 +43,37 @@ class DataLakeServiceClient; namespace arrow::fs { -enum class AzureBackend { - /// \brief Official Azure Remote Backend - kAzure, - /// \brief Local Simulated Storage - kAzurite -}; - /// Options for the AzureFileSystem implementation. struct ARROW_EXPORT AzureOptions { - /// \brief The backend to connect to: Azure or Azurite (for testing). - AzureBackend backend = AzureBackend::kAzure; + /// \brief hostname[:port] of the Azure Blob Storage Service. + /// + /// If the hostname is a relative domain name (one that starts with a '.'), then storage + /// account URLs will be constructed by prepending the account name to the hostname. + /// If the hostname is a fully qualified domain name, then the hostname will be used + /// as-is and the account name will follow the hostname in the URL path. + /// + /// Default: ".blob.core.windows.net" + std::string blob_storage_authority = ".blob.core.windows.net"; + + /// \brief hostname[:port] of the Azure Data Lake Storage Gen 2 Service. + /// + /// If the hostname is a relative domain name (one that starts with a '.'), then storage + /// account URLs will be constructed by prepending the account name to the hostname. + /// If the hostname is a fully qualified domain name, then the hostname will be used + /// as-is and the account name will follow the hostname in the URL path. + /// + /// Default: ".dfs.core.windows.net" + std::string dfs_storage_authority = ".dfs.core.windows.net"; + + /// \brief Azure Blob Storage connection transport. + /// + /// Default: "https" + std::string blob_storage_scheme = "https"; + + /// \brief Azure Data Lake Storage Gen 2 connection transport. + /// + /// Default: "https" + std::string dfs_storage_scheme = "https"; // TODO(GH-38598): Add support for more auth methods. // std::string connection_string; @@ -65,22 +85,17 @@ struct ARROW_EXPORT AzureOptions { std::shared_ptr default_metadata; private: - std::string account_blob_url_; - std::string account_dfs_url_; - enum class CredentialKind { kAnonymous, kTokenCredential, kStorageSharedKeyCredential, } credential_kind_ = CredentialKind::kAnonymous; + std::string account_name_; + std::shared_ptr token_credential_; std::shared_ptr storage_shared_key_credential_; - std::shared_ptr token_credential_; - - void SetUrlsForAccountName(const std::string& account_name); - public: AzureOptions(); ~AzureOptions(); @@ -92,8 +107,8 @@ struct ARROW_EXPORT AzureOptions { bool Equals(const AzureOptions& other) const; - const std::string& AccountBlobUrl() const { return account_blob_url_; } - const std::string& AccountDfsUrl() const { return account_dfs_url_; } + std::string AccountBlobUrl(const std::string& account_name) const; + std::string AccountDfsUrl(const std::string& account_name) const; Result> MakeBlobServiceClient() const; diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 799f3992a2210..8a39c4c554897 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -73,6 +73,13 @@ namespace Blobs = Azure::Storage::Blobs; namespace Core = Azure::Core; namespace DataLake = Azure::Storage::Files::DataLake; +enum class AzureBackend { + /// \brief Official Azure Remote Backend + kAzure, + /// \brief Local Simulated Storage + kAzurite +}; + class BaseAzureEnv : public ::testing::Environment { protected: std::string account_name_; @@ -265,8 +272,6 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl { TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) { AzureOptions options; - options.backend = AzureBackend::kAzurite; // Irrelevant for this test because it - // doesn't connect to the server. ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name")); EXPECT_OK_AND_ASSIGN(auto default_credential_fs, AzureFileSystem::Make(options)); } @@ -352,7 +357,17 @@ class TestAzureFileSystem : public ::testing::Test { static Result MakeOptions(BaseAzureEnv* env) { AzureOptions options; - options.backend = env->backend(); + switch (env->backend()) { + case AzureBackend::kAzurite: + options.blob_storage_authority = "127.0.0.1:10000"; + options.dfs_storage_authority = "127.0.0.1:10000"; + options.blob_storage_scheme = "http"; + options.dfs_storage_scheme = "http"; + break; + case AzureBackend::kAzure: + // Use the default values + break; + } ARROW_EXPECT_OK( options.ConfigureAccountKeyCredential(env->account_name(), env->account_key())); return options;