diff --git a/ci/docker/alpine-linux-3.16-cpp.dockerfile b/ci/docker/alpine-linux-3.16-cpp.dockerfile index 8828e717a53a1..72489c6eae1a7 100644 --- a/ci/docker/alpine-linux-3.16-cpp.dockerfile +++ b/ci/docker/alpine-linux-3.16-cpp.dockerfile @@ -74,6 +74,7 @@ COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default ENV ARROW_ACERO=ON \ + ARROW_AZURE=OFF \ ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile index c8e98bdd00b11..59db84034bec7 100644 --- a/ci/docker/fedora-39-cpp.dockerfile +++ b/ci/docker/fedora-39-cpp.dockerfile @@ -80,6 +80,7 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin # Python process explicitly if we use LLVM 17 or later. ENV absl_SOURCE=BUNDLED \ ARROW_ACERO=ON \ + ARROW_AZURE=OFF \ ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ ARROW_DATASET=ON \ diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 93412ca81cdd5..c424d04653dbe 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -96,6 +96,7 @@ RUN /arrow/ci/scripts/r_deps.sh /arrow && \ R -e "install.packages('pkgdown')" ENV ARROW_ACERO=ON \ + ARROW_AZURE=OFF \ ARROW_BUILD_STATIC=OFF \ ARROW_BUILD_TESTS=OFF \ ARROW_BUILD_UTILITIES=OFF \ diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile index 3df895b427c9e..ae2ba9421cd55 100644 --- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile @@ -76,6 +76,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin ENV ARROW_ACERO=ON \ + ARROW_AZURE=OFF \ ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ ARROW_FLIGHT=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile index 7eba541a63af2..dd887a6d00ceb 100644 --- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile @@ -76,6 +76,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin ENV ARROW_ACERO=ON \ + ARROW_AZURE=OFF \ ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ ARROW_FLIGHT=ON \ diff --git a/cpp/src/arrow/filesystem/api.h b/cpp/src/arrow/filesystem/api.h index 732be5f928f58..562b7c1808ec1 100644 --- a/cpp/src/arrow/filesystem/api.h +++ b/cpp/src/arrow/filesystem/api.h @@ -20,10 +20,13 @@ #include "arrow/util/config.h" // IWYU pragma: export #include "arrow/filesystem/filesystem.h" // IWYU pragma: export -#include "arrow/filesystem/hdfs.h" // IWYU pragma: export +#ifdef ARROW_AZURE +#include "arrow/filesystem/azurefs.h" // IWYU pragma: export +#endif #ifdef ARROW_GCS #include "arrow/filesystem/gcsfs.h" // IWYU pragma: export #endif +#include "arrow/filesystem/hdfs.h" // IWYU pragma: export #include "arrow/filesystem/localfs.h" // IWYU pragma: export #include "arrow/filesystem/mockfs.h" // IWYU pragma: export #ifdef ARROW_S3 diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 0ce84043a537c..bd741fde8cc23 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -2458,6 +2458,7 @@ TEST_F(TestAzuriteFileSystem, WriteMetadata) { ASSERT_OK(output->Close()); // Verify the metadata has been set. + // TODO(GH-40025): Use `AzureFileSystem` to fetch metadata for this assertion. auto blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) .GetBlockBlobClient(blob_path) .GetProperties() @@ -2470,6 +2471,7 @@ TEST_F(TestAzuriteFileSystem, WriteMetadata) { full_path, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}}))); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); + // TODO(GH-40025): Use `AzureFileSystem` to fetch metadata for this assertion. blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) .GetBlockBlobClient(blob_path) .GetProperties() diff --git a/cpp/src/arrow/filesystem/type_fwd.h b/cpp/src/arrow/filesystem/type_fwd.h index 892f7ad2e1b16..92c70799be16c 100644 --- a/cpp/src/arrow/filesystem/type_fwd.h +++ b/cpp/src/arrow/filesystem/type_fwd.h @@ -42,11 +42,12 @@ struct FileInfo; struct FileSelector; class FileSystem; -class SubTreeFileSystem; -class SlowFileSystem; +class AzureFileSystem; +class GcsFileSystem; class LocalFileSystem; class S3FileSystem; -class GcsFileSystem; +class SlowFileSystem; +class SubTreeFileSystem; } // namespace fs } // namespace arrow diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index fb42a53139f44..9fbd685084fd5 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -52,6 +52,7 @@ #cmakedefine ARROW_PARQUET #cmakedefine ARROW_SUBSTRAIT +#cmakedefine ARROW_AZURE #cmakedefine ARROW_ENABLE_THREADING #cmakedefine ARROW_GCS #cmakedefine ARROW_HDFS diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 8c98e269d6ff4..af65ea7d61483 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -554,6 +554,10 @@ set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE) set(LINK_LIBS arrow_python) +if(PYARROW_BUILD_AZURE) + list(APPEND CYTHON_EXTENSIONS _azurefs) +endif() + if(PYARROW_BUILD_GCS) list(APPEND CYTHON_EXTENSIONS _gcsfs) endif() diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 7ede69da665ab..936f4736977c8 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -145,7 +145,8 @@ def print_entry(label, value): print(f" {module: <20}: {status: <8}") print("\nFilesystems:") - filesystems = ["GcsFileSystem", "HadoopFileSystem", "S3FileSystem"] + filesystems = ["AzureFileSystem", "GcsFileSystem", + "HadoopFileSystem", "S3FileSystem"] for fs in filesystems: status = "Enabled" if _filesystem_is_available(fs) else "-" print(f" {fs: <20}: {status: <8}") diff --git a/python/pyarrow/_azurefs.pyx b/python/pyarrow/_azurefs.pyx new file mode 100644 index 0000000000000..5cd6300c18c6a --- /dev/null +++ b/python/pyarrow/_azurefs.pyx @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: language_level = 3 + +from cython cimport binding + + +from pyarrow.lib import frombytes, tobytes +from pyarrow.includes.libarrow_fs cimport * +from pyarrow._fs cimport FileSystem + + +cdef class AzureFileSystem(FileSystem): + """ + Azure Blob Storage backed FileSystem implementation + + This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a. + Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific + features will be used when they provide a performance advantage. Azurite emulator is + also supported. Note: `/` is the only supported delimiter. + + The storage account is considered the root of the filesystem. When enabled, containers + will be created or deleted during relevant directory operations. Obviously, this also + requires authentication with the additional permissions. + + By default `DefaultAzureCredential `__ + is used for authentication. This means it will try several types of authentication + and go with the first one that works. If any authentication parameters are provided when + initialising the FileSystem, they will be used instead of the default credential. + + Parameters + ---------- + account_name : str + Azure Blob Storage account name. This is the globally unique identifier for the + storage account. + account_key : str, default None + Account key of the storage account. Pass None to use default credential. + blob_storage_authority : str, default None + hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful + for connecting to a local emulator, like Azurite. + dfs_storage_authority : str, default None + hostname[:port] of the Data Lake Gen 2 Service. Defaults to + `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite. + blob_storage_scheme : str, default None + Either `http` or `https`. Defaults to `https`. Useful for connecting to a local + emulator, like Azurite. + dfs_storage_scheme : str, default None + Either `http` or `https`. Defaults to `https`. Useful for connecting to a local + emulator, like Azurite. + + Examples + -------- + >>> from pyarrow import fs + >>> azure_fs = fs.AzureFileSystem(account_name='myaccount') + >>> azurite_fs = fs.AzureFileSystem( + ... account_name='devstoreaccount1', + ... account_key='Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', + ... blob_storage_authority='127.0.0.1:10000', + ... dfs_storage_authority='127.0.0.1:10000', + ... blob_storage_scheme='http', + ... dfs_storage_scheme='http', + ... ) + + For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`. + """ + cdef: + CAzureFileSystem* azurefs + c_string account_key + + def __init__(self, account_name, *, account_key=None, blob_storage_authority=None, + dfs_storage_authority=None, blob_storage_scheme=None, + dfs_storage_scheme=None): + cdef: + CAzureOptions options + shared_ptr[CAzureFileSystem] wrapped + + options.account_name = tobytes(account_name) + if blob_storage_authority: + options.blob_storage_authority = tobytes(blob_storage_authority) + if dfs_storage_authority: + options.dfs_storage_authority = tobytes(dfs_storage_authority) + if blob_storage_scheme: + options.blob_storage_scheme = tobytes(blob_storage_scheme) + if dfs_storage_scheme: + options.dfs_storage_scheme = tobytes(dfs_storage_scheme) + + if account_key: + options.ConfigureAccountKeyCredential(tobytes(account_key)) + self.account_key = tobytes(account_key) + else: + options.ConfigureDefaultCredential() + + with nogil: + wrapped = GetResultValue(CAzureFileSystem.Make(options)) + + self.init( wrapped) + + cdef init(self, const shared_ptr[CFileSystem]& wrapped): + FileSystem.init(self, wrapped) + self.azurefs = wrapped.get() + + @staticmethod + @binding(True) # Required for cython < 3 + def _reconstruct(kwargs): + # __reduce__ doesn't allow passing named arguments directly to the + # reconstructor, hence this wrapper. + return AzureFileSystem(**kwargs) + + def __reduce__(self): + cdef CAzureOptions opts = self.azurefs.options() + return ( + AzureFileSystem._reconstruct, (dict( + account_name=frombytes(opts.account_name), + account_key=frombytes(self.account_key), + blob_storage_authority=frombytes(opts.blob_storage_authority), + dfs_storage_authority=frombytes(opts.dfs_storage_authority), + blob_storage_scheme=frombytes(opts.blob_storage_scheme), + dfs_storage_scheme=frombytes(opts.dfs_storage_scheme) + ),)) diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 395f488144331..86cf39e993c1b 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -491,6 +491,9 @@ cdef class FileSystem(_Weakrefable): elif typ == 'gcs': from pyarrow._gcsfs import GcsFileSystem self = GcsFileSystem.__new__(GcsFileSystem) + elif typ == 'abfs': + from pyarrow._azurefs import AzureFileSystem + self = AzureFileSystem.__new__(AzureFileSystem) elif typ == 'hdfs': from pyarrow._hdfs import HadoopFileSystem self = HadoopFileSystem.__new__(HadoopFileSystem) diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py index 6f6807e907d62..2ac8427de17e7 100644 --- a/python/pyarrow/conftest.py +++ b/python/pyarrow/conftest.py @@ -24,6 +24,7 @@ groups = [ 'acero', + 'azure', 'brotli', 'bz2', 'cython', @@ -54,6 +55,7 @@ defaults = { 'acero': False, + 'azure': False, 'brotli': Codec.is_available('brotli'), 'bz2': Codec.is_available('bz2'), 'cython': False, @@ -142,13 +144,18 @@ except ImportError: pass +try: + from pyarrow.fs import AzureFileSystem # noqa + defaults['azure'] = True +except ImportError: + pass + try: from pyarrow.fs import GcsFileSystem # noqa defaults['gcs'] = True except ImportError: pass - try: from pyarrow.fs import S3FileSystem # noqa defaults['s3'] = True diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index a256cc540f742..abdd1a995751a 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -39,6 +39,10 @@ FileStats = FileInfo _not_imported = [] +try: + from pyarrow._azurefs import AzureFileSystem # noqa +except ImportError: + _not_imported.append("AzureFileSystem") try: from pyarrow._hdfs import HadoopFileSystem # noqa diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index 7876fb0f96671..328b426a498db 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -251,6 +251,22 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: CResult[shared_ptr[CGcsFileSystem]] Make(const CGcsOptions& options) CGcsOptions options() + cdef cppclass CAzureOptions "arrow::fs::AzureOptions": + c_string account_name + c_string blob_storage_authority + c_string dfs_storage_authority + c_string blob_storage_scheme + c_string dfs_storage_scheme + + c_bool Equals(const CAzureOptions& other) + CStatus ConfigureDefaultCredential() + CStatus ConfigureAccountKeyCredential(c_string account_key) + + cdef cppclass CAzureFileSystem "arrow::fs::AzureFileSystem": + @staticmethod + CResult[shared_ptr[CAzureFileSystem]] Make(const CAzureOptions& options) + CAzureOptions options() + cdef cppclass CHdfsOptions "arrow::fs::HdfsOptions": HdfsConnectionConfig connection_config int32_t buffer_size diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 0da757a4bc56e..57bc3c8fc6616 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -256,6 +256,37 @@ def gcs_server(): proc.wait() +@pytest.fixture(scope='session') +def azure_server(tmpdir_factory): + port = find_free_port() + env = os.environ.copy() + tmpdir = tmpdir_factory.getbasetemp() + # We only need blob service emulator, not queue or table. + args = ['azurite-blob', "--location", tmpdir, "--blobPort", str(port)] + proc = None + try: + proc = subprocess.Popen(args, env=env) + # Make sure the server is alive. + if proc.poll() is not None: + pytest.skip(f"Command {args} did not start server successfully!") + except (ModuleNotFoundError, OSError) as e: + pytest.skip(f"Command {args} failed to execute: {e}") + else: + yield { + # Use the standard azurite account_name and account_key. + # https://learn.microsoft.com/en-us/azure/storage/common/storage-use-emulator#authorize-with-shared-key-credentials + 'connection': ('127.0.0.1', port, 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2' + 'UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=='), + 'process': proc, + 'tempdir': tmpdir, + } + finally: + if proc is not None: + proc.kill() + proc.wait() + + @pytest.fixture( params=[ 'builtin_pickle', diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 543c4399ddb47..845f1eccecc72 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -291,6 +291,35 @@ def subtree_s3fs(request, s3fs): }""" +@pytest.fixture +def azurefs(request, azure_server): + request.config.pyarrow.requires('azure') + from pyarrow.fs import AzureFileSystem + + host, port, account_name, account_key = azure_server['connection'] + azurite_authority = f"{host}:{port}" + azurite_scheme = "http" + + container = 'pyarrow-filesystem/' + + fs = AzureFileSystem(account_name=account_name, + account_key=account_key, + blob_storage_authority=azurite_authority, + dfs_storage_authority=azurite_authority, + blob_storage_scheme=azurite_scheme, + dfs_storage_scheme=azurite_scheme) + + fs.create_dir(container) + + yield dict( + fs=fs, + pathfn=container.__add__, + allow_move_dir=True, + allow_append_to_file=True, + ) + fs.delete_dir(container) + + @pytest.fixture def hdfs(request, hdfs_connection): request.config.pyarrow.requires('hdfs') @@ -383,6 +412,11 @@ def py_fsspec_s3fs(request, s3_server): id='GcsFileSystem', marks=pytest.mark.gcs ), + pytest.param( + 'azurefs', + id='AzureFileSystem', + marks=pytest.mark.azure + ), pytest.param( 'hdfs', id='HadoopFileSystem', @@ -467,6 +501,11 @@ def skip_fsspec_s3fs(fs): pytest.xfail(reason="Not working with fsspec's s3fs") +def skip_azure(fs, reason): + if fs.type_name == "abfs": + pytest.skip(reason=reason) + + @pytest.mark.s3 def test_s3fs_limited_permissions_create_bucket(s3_server): from pyarrow.fs import S3FileSystem @@ -857,6 +896,9 @@ def test_copy_file(fs, pathfn): def test_move_directory(fs, pathfn, allow_move_dir): + # TODO(GH-40025): Stop skipping this test + skip_azure(fs, "Not implemented yet in for Azure. See GH-40025") + # move directory (doesn't work with S3) s = pathfn('source-dir/') t = pathfn('target-dir/') @@ -878,6 +920,9 @@ def test_move_file(fs, pathfn): # (https://github.com/dask/s3fs/issues/394) skip_fsspec_s3fs(fs) + # TODO(GH-40025): Stop skipping this test + skip_azure(fs, "Not implemented yet in for Azure. See GH-40025") + s = pathfn('test-move-source-file') t = pathfn('test-move-target-file') @@ -1029,7 +1074,11 @@ def test_open_output_stream_metadata(fs, pathfn): assert f.read() == data got_metadata = f.metadata() - if fs.type_name in ['s3', 'gcs'] or 'mock' in fs.type_name: + if fs.type_name in ['s3', 'gcs', 'abfs'] or 'mock' in fs.type_name: + # TODO(GH-40026): Stop skipping this test + skip_azure( + fs, "Azure filesystem currently only returns system metadata not user " + "metadata. See GH-40026") for k, v in metadata.items(): assert got_metadata[k] == v.encode() else: @@ -1379,6 +1428,33 @@ def test_s3fs_wrong_region(): fs.get_file_info("voltrondata-labs-datasets") +@pytest.mark.azure +def test_azurefs_options(pickle_module): + from pyarrow.fs import AzureFileSystem + + fs1 = AzureFileSystem(account_name='fake-account-name') + assert isinstance(fs1, AzureFileSystem) + assert pickle_module.loads(pickle_module.dumps(fs1)) == fs1 + + fs2 = AzureFileSystem(account_name='fake-account-name', + account_key='fakeaccountkey') + assert isinstance(fs2, AzureFileSystem) + assert pickle_module.loads(pickle_module.dumps(fs2)) == fs2 + assert fs2 != fs1 + + fs3 = AzureFileSystem(account_name='fake-account', account_key='fakeaccount', + blob_storage_authority='fake-blob-authority', + dfs_storage_authority='fake-dfs-authority', + blob_storage_scheme='fake-blob-scheme', + dfs_storage_scheme='fake-dfs-scheme') + assert isinstance(fs3, AzureFileSystem) + assert pickle_module.loads(pickle_module.dumps(fs3)) == fs3 + assert fs3 != fs2 + + with pytest.raises(TypeError): + AzureFileSystem() + + @pytest.mark.hdfs def test_hdfs_options(hdfs_connection, pickle_module): from pyarrow.fs import HadoopFileSystem diff --git a/python/setup.py b/python/setup.py index 798bd6b05fd0b..ce6996dc1d439 100755 --- a/python/setup.py +++ b/python/setup.py @@ -113,6 +113,8 @@ def run(self): ('with-parquet', None, 'build the Parquet extension'), ('with-parquet-encryption', None, 'build the Parquet encryption extension'), + ('with-azure', None, + 'build the Azure Blob Storage extension'), ('with-gcs', None, 'build the Google Cloud Storage (GCS) extension'), ('with-s3', None, 'build the Amazon S3 extension'), @@ -150,6 +152,8 @@ def initialize_options(self): if not hasattr(sys, 'gettotalrefcount'): self.build_type = 'release' + self.with_azure = strtobool( + os.environ.get('PYARROW_WITH_AZURE', '0')) self.with_gcs = strtobool( os.environ.get('PYARROW_WITH_GCS', '0')) self.with_s3 = strtobool( @@ -207,6 +211,7 @@ def initialize_options(self): '_parquet_encryption', '_pyarrow_cpp_tests', '_orc', + '_azurefs', '_gcsfs', '_s3fs', '_substrait', @@ -278,6 +283,7 @@ def append_cmake_bool(value, varname): append_cmake_bool(self.with_parquet, 'PYARROW_BUILD_PARQUET') append_cmake_bool(self.with_parquet_encryption, 'PYARROW_BUILD_PARQUET_ENCRYPTION') + append_cmake_bool(self.with_azure, 'PYARROW_BUILD_AZURE') append_cmake_bool(self.with_gcs, 'PYARROW_BUILD_GCS') append_cmake_bool(self.with_s3, 'PYARROW_BUILD_S3') append_cmake_bool(self.with_hdfs, 'PYARROW_BUILD_HDFS') @@ -344,6 +350,8 @@ def _failure_permitted(self, name): return True if name == '_substrait' and not self.with_substrait: return True + if name == '_azurefs' and not self.with_azure: + return True if name == '_gcsfs' and not self.with_gcs: return True if name == '_s3fs' and not self.with_s3: