From 817e0d7b86abf477a25b5ae8f52f1093f24aaa55 Mon Sep 17 00:00:00 2001 From: Andre Luis Anastacio Date: Tue, 10 Sep 2024 10:09:20 -0300 Subject: [PATCH] Deprecate ADLFS prefix in favor of ADLS (#961) * Deprecate ADLFS prefix in favor of ADLS * Add missing renaming --- Makefile | 4 +- mkdocs/docs/configuration.md | 18 +++--- mkdocs/docs/contributing.md | 6 +- pyiceberg/io/__init__.py | 22 ++++--- pyiceberg/io/fsspec.py | 65 +++++++++++++++++---- pyproject.toml | 2 +- tests/conftest.py | 24 ++++---- tests/io/test_fsspec.py | 108 +++++++++++++++++------------------ 8 files changed, 151 insertions(+), 98 deletions(-) diff --git a/Makefile b/Makefile index a40efd17c8..ee0b405570 100644 --- a/Makefile +++ b/Makefile @@ -59,9 +59,9 @@ test-integration-rebuild: docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache -test-adlfs: ## Run tests marked with adlfs, can add arguments with PYTEST_ARGS="-vv" +test-adls: ## Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-azurite.sh - poetry run pytest tests/ -m adlfs ${PYTEST_ARGS} + poetry run pytest tests/ -m adls ${PYTEST_ARGS} test-gcs: ## Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-gcs-server.sh diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 422800675f..8495437f23 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -109,15 +109,15 @@ For the FileIO there are several configuration options available: -| Key | Example | Description | -| ----------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| adlfs.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint= | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adlfs-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). | -| adlfs.account-name | devstoreaccount1 | The account that you want to connect to | -| adlfs.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. | -| adlfs.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature | -| adlfs.tenant-id | ad667be4-b811-11ed-afa1-0242ac120002 | The tenant-id | -| adlfs.client-id | ad667be4-b811-11ed-afa1-0242ac120002 | The client-id | -| adlfs.client-secret | oCA3R6P\*ka#oa1Sms2J74z... | The client-secret | +| Key | Example | Description | +| ---------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| adls.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint= | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adls-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). | +| adls.account-name | devstoreaccount1 | The account that you want to connect to | +| adls.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. | +| adls.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature | +| adls.tenant-id | ad667be4-b811-11ed-afa1-0242ac120002 | The tenant-id | +| adls.client-id | ad667be4-b811-11ed-afa1-0242ac120002 | The client-id | +| adls.client-secret | oCA3R6P\*ka#oa1Sms2J74z... | The client-secret | diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md index d87f2ec6a3..62db11c6b7 100644 --- a/mkdocs/docs/contributing.md +++ b/mkdocs/docs/contributing.md @@ -106,17 +106,17 @@ For Python, `pytest` is used a testing framework in combination with `coverage` make test ``` -By default, S3 and ADLFS tests are ignored because that require minio and azurite to be running. +By default, S3 and ADLS tests are ignored because that require minio and azurite to be running. To run the S3 suite: ```bash make test-s3 ``` -To run the ADLFS suite: +To run the ADLS suite: ```bash -make test-adlfs +make test-adls ``` To pass additional arguments to pytest, you can use `PYTEST_ARGS`. diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index be06bc0458..fe3ea43e10 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -48,6 +48,14 @@ logger = logging.getLogger(__name__) +ADLFS_CONNECTION_STRING = "adlfs.connection-string" +ADLFS_ACCOUNT_NAME = "adlfs.account-name" +ADLFS_ACCOUNT_KEY = "adlfs.account-key" +ADLFS_SAS_TOKEN = "adlfs.sas-token" +ADLFS_TENANT_ID = "adlfs.tenant-id" +ADLFS_CLIENT_ID = "adlfs.client-id" +ADLFS_ClIENT_SECRET = "adlfs.client-secret" +ADLFS_PREFIX = "adlfs" AWS_REGION = "client.region" AWS_ACCESS_KEY_ID = "client.access-key-id" AWS_SECRET_ACCESS_KEY = "client.secret-access-key" @@ -66,13 +74,13 @@ HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" HDFS_KERB_TICKET = "hdfs.kerberos_ticket" -ADLFS_CONNECTION_STRING = "adlfs.connection-string" -ADLFS_ACCOUNT_NAME = "adlfs.account-name" -ADLFS_ACCOUNT_KEY = "adlfs.account-key" -ADLFS_SAS_TOKEN = "adlfs.sas-token" -ADLFS_TENANT_ID = "adlfs.tenant-id" -ADLFS_CLIENT_ID = "adlfs.client-id" -ADLFS_ClIENT_SECRET = "adlfs.client-secret" +ADLS_CONNECTION_STRING = "adls.connection-string" +ADLS_ACCOUNT_NAME = "adls.account-name" +ADLS_ACCOUNT_KEY = "adls.account-key" +ADLS_SAS_TOKEN = "adls.sas-token" +ADLS_TENANT_ID = "adls.tenant-id" +ADLS_CLIENT_ID = "adls.client-id" +ADLS_ClIENT_SECRET = "adls.client-secret" GCS_TOKEN = "gcs.oauth2.token" GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at" GCS_PROJECT_ID = "gcs.project-id" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index cbe5d5b64b..1632c4bb28 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -44,8 +44,15 @@ ADLFS_ACCOUNT_NAME, ADLFS_CLIENT_ID, ADLFS_CONNECTION_STRING, + ADLFS_PREFIX, ADLFS_SAS_TOKEN, ADLFS_TENANT_ID, + ADLS_ACCOUNT_KEY, + ADLS_ACCOUNT_NAME, + ADLS_CLIENT_ID, + ADLS_CONNECTION_STRING, + ADLS_SAS_TOKEN, + ADLS_TENANT_ID, AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, @@ -71,6 +78,7 @@ S3_SIGNER_ENDPOINT_DEFAULT, S3_SIGNER_URI, ADLFS_ClIENT_SECRET, + ADLS_ClIENT_SECRET, FileIO, InputFile, InputStream, @@ -78,6 +86,7 @@ OutputStream, ) from pyiceberg.typedef import Properties +from pyiceberg.utils.deprecated import deprecation_message from pyiceberg.utils.properties import get_first_property_value, property_as_bool logger = logging.getLogger(__name__) @@ -176,17 +185,53 @@ def _gs(properties: Properties) -> AbstractFileSystem: ) -def _adlfs(properties: Properties) -> AbstractFileSystem: +def _adls(properties: Properties) -> AbstractFileSystem: from adlfs import AzureBlobFileSystem + for property_name in properties: + if property_name.startswith(ADLFS_PREFIX): + deprecation_message( + deprecated_in="0.8.0", + removed_in="0.9.0", + help_message=f"The property {property_name} is deprecated. Please use properties that start with adls.", + ) + return AzureBlobFileSystem( - connection_string=properties.get(ADLFS_CONNECTION_STRING), - account_name=properties.get(ADLFS_ACCOUNT_NAME), - account_key=properties.get(ADLFS_ACCOUNT_KEY), - sas_token=properties.get(ADLFS_SAS_TOKEN), - tenant_id=properties.get(ADLFS_TENANT_ID), - client_id=properties.get(ADLFS_CLIENT_ID), - client_secret=properties.get(ADLFS_ClIENT_SECRET), + connection_string=get_first_property_value( + properties, + ADLS_CONNECTION_STRING, + ADLFS_CONNECTION_STRING, + ), + account_name=get_first_property_value( + properties, + ADLS_ACCOUNT_NAME, + ADLFS_ACCOUNT_NAME, + ), + account_key=get_first_property_value( + properties, + ADLS_ACCOUNT_KEY, + ADLFS_ACCOUNT_KEY, + ), + sas_token=get_first_property_value( + properties, + ADLS_SAS_TOKEN, + ADLFS_SAS_TOKEN, + ), + tenant_id=get_first_property_value( + properties, + ADLS_TENANT_ID, + ADLFS_TENANT_ID, + ), + client_id=get_first_property_value( + properties, + ADLS_CLIENT_ID, + ADLFS_CLIENT_ID, + ), + client_secret=get_first_property_value( + properties, + ADLS_ClIENT_SECRET, + ADLFS_ClIENT_SECRET, + ), ) @@ -196,8 +241,8 @@ def _adlfs(properties: Properties) -> AbstractFileSystem: "s3": _s3, "s3a": _s3, "s3n": _s3, - "abfs": _adlfs, - "abfss": _adlfs, + "abfs": _adls, + "abfss": _adls, "gs": _gs, "gcs": _gs, } diff --git a/pyproject.toml b/pyproject.toml index b5e6d12d37..8bf8452194 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -603,7 +603,7 @@ gcsfs = ["gcsfs"] markers = [ "unmarked: marks a test as a unittest", "s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)", - "adlfs: marks a test as requiring access to adlfs compliant storage (use with --adlfs.account-name, --adlfs.account-key, and --adlfs.endpoint args)", + "adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)", "integration: marks integration tests against Apache Spark", "gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)", ] diff --git a/tests/conftest.py b/tests/conftest.py index f65f4ed55f..b05947ebe6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -111,23 +111,23 @@ def pytest_addoption(parser: pytest.Parser) -> None: parser.addoption( "--s3.secret-access-key", action="store", default="password", help="The AWS secret access key ID for tests marked as s3" ) - # ADLFS options + # ADLS options # Azurite provides default account name and key. Those can be customized using env variables. # For more information, see README file at https://github.com/azure/azurite#default-storage-account parser.addoption( - "--adlfs.endpoint", + "--adls.endpoint", action="store", default="http://127.0.0.1:10000", - help="The ADLS endpoint URL for tests marked as adlfs", + help="The ADLS endpoint URL for tests marked as adls", ) parser.addoption( - "--adlfs.account-name", action="store", default="devstoreaccount1", help="The ADLS account key for tests marked as adlfs" + "--adls.account-name", action="store", default="devstoreaccount1", help="The ADLS account key for tests marked as adls" ) parser.addoption( - "--adlfs.account-key", + "--adls.account-key", action="store", default="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==", - help="The ADLS secret account key for tests marked as adlfs", + help="The ADLS secret account key for tests marked as adls", ) parser.addoption( "--gcs.endpoint", action="store", default="http://0.0.0.0:4443", help="The GCS endpoint URL for tests marked gcs" @@ -1955,16 +1955,16 @@ def fixture_dynamodb(_aws_credentials: None) -> Generator[boto3.client, None, No @pytest.fixture -def adlfs_fsspec_fileio(request: pytest.FixtureRequest) -> Generator[FsspecFileIO, None, None]: +def adls_fsspec_fileio(request: pytest.FixtureRequest) -> Generator[FsspecFileIO, None, None]: from azure.storage.blob import BlobServiceClient - azurite_url = request.config.getoption("--adlfs.endpoint") - azurite_account_name = request.config.getoption("--adlfs.account-name") - azurite_account_key = request.config.getoption("--adlfs.account-key") + azurite_url = request.config.getoption("--adls.endpoint") + azurite_account_name = request.config.getoption("--adls.account-name") + azurite_account_key = request.config.getoption("--adls.account-key") azurite_connection_string = f"DefaultEndpointsProtocol=http;AccountName={azurite_account_name};AccountKey={azurite_account_key};BlobEndpoint={azurite_url}/{azurite_account_name};" properties = { - "adlfs.connection-string": azurite_connection_string, - "adlfs.account-name": azurite_account_name, + "adls.connection-string": azurite_connection_string, + "adls.account-name": azurite_account_name, } bbs = BlobServiceClient.from_connection_string(conn_str=azurite_connection_string) diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py index fbb184910a..64dc68b9ca 100644 --- a/tests/io/test_fsspec.py +++ b/tests/io/test_fsspec.py @@ -290,70 +290,70 @@ def test_fsspec_unified_session_properties() -> None: ) -@pytest.mark.adlfs -def test_fsspec_new_input_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_new_input_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test creating a new input file from an fsspec file-io""" filename = str(uuid.uuid4()) - input_file = adlfs_fsspec_fileio.new_input(f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(f"abfss://tests/{filename}") assert isinstance(input_file, fsspec.FsspecInputFile) assert input_file.location == f"abfss://tests/{filename}" -@pytest.mark.adlfs -def test_fsspec_new_abfss_output_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_new_abfss_output_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test creating a new output file from an fsspec file-io""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(f"abfss://tests/{filename}") assert isinstance(output_file, fsspec.FsspecOutputFile) assert output_file.location == f"abfss://tests/{filename}" -@pytest.mark.adlfs -def test_fsspec_write_and_read_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_write_and_read_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test writing and reading a file using FsspecInputFile and FsspecOutputFile""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as f: f.write(b"foo") - input_file = adlfs_fsspec_fileio.new_input(f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(f"abfss://tests/{filename}") assert input_file.open().read() == b"foo" - adlfs_fsspec_fileio.delete(input_file) + adls_fsspec_fileio.delete(input_file) -@pytest.mark.adlfs -def test_fsspec_getting_length_of_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_getting_length_of_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test getting the length of an FsspecInputFile and FsspecOutputFile""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as f: f.write(b"foobar") assert len(output_file) == 6 - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") assert len(input_file) == 6 - adlfs_fsspec_fileio.delete(output_file) + adls_fsspec_fileio.delete(output_file) -@pytest.mark.adlfs -def test_fsspec_file_tell_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_file_tell_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test finding cursor position for an fsspec file-io file""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as write_file: write_file.write(b"foobar") - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") f = input_file.open() f.seek(0) @@ -365,19 +365,19 @@ def test_fsspec_file_tell_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: f.seek(0) assert f.tell() == 0 - adlfs_fsspec_fileio.delete(f"abfss://tests/{filename}") + adls_fsspec_fileio.delete(f"abfss://tests/{filename}") -@pytest.mark.adlfs -def test_fsspec_read_specified_bytes_for_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_read_specified_bytes_for_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test reading a specified number of bytes from an fsspec file-io file""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as write_file: write_file.write(b"foo") - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") f = input_file.open() f.seek(0) @@ -391,89 +391,89 @@ def test_fsspec_read_specified_bytes_for_file_adlfs(adlfs_fsspec_fileio: FsspecF f.seek(0) assert b"foo" == f.read(999) # test reading amount larger than entire content length - adlfs_fsspec_fileio.delete(input_file) + adls_fsspec_fileio.delete(input_file) -@pytest.mark.adlfs -def test_fsspec_raise_on_opening_file_not_found_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: - """Test that an fsspec input file raises appropriately when the adlfs file is not found""" +@pytest.mark.adls +def test_fsspec_raise_on_opening_file_not_found_adls(adls_fsspec_fileio: FsspecFileIO) -> None: + """Test that an fsspec input file raises appropriately when the adls file is not found""" filename = str(uuid.uuid4()) - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") with pytest.raises(FileNotFoundError) as exc_info: input_file.open().read() assert filename in str(exc_info.value) -@pytest.mark.adlfs -def test_checking_if_a_file_exists_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_checking_if_a_file_exists_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test checking if a file exists""" - non_existent_file = adlfs_fsspec_fileio.new_input(location="abfss://tests/does-not-exist.txt") + non_existent_file = adls_fsspec_fileio.new_input(location="abfss://tests/does-not-exist.txt") assert not non_existent_file.exists() filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") assert not output_file.exists() with output_file.create() as f: f.write(b"foo") - existing_input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + existing_input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") assert existing_input_file.exists() - existing_output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + existing_output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") assert existing_output_file.exists() - adlfs_fsspec_fileio.delete(existing_output_file) + adls_fsspec_fileio.delete(existing_output_file) -@pytest.mark.adlfs -def test_closing_a_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_closing_a_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test closing an output file and input file""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as write_file: write_file.write(b"foo") assert not write_file.closed # type: ignore assert write_file.closed # type: ignore - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") f = input_file.open() assert not f.closed # type: ignore f.close() assert f.closed # type: ignore - adlfs_fsspec_fileio.delete(f"abfss://tests/{filename}") + adls_fsspec_fileio.delete(f"abfss://tests/{filename}") -@pytest.mark.adlfs -def test_fsspec_converting_an_outputfile_to_an_inputfile_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_converting_an_outputfile_to_an_inputfile_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test converting an output file to an input file""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") input_file = output_file.to_input_file() assert input_file.location == output_file.location -@pytest.mark.adlfs -def test_writing_avro_file_adlfs(generated_manifest_entry_file: str, adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_writing_avro_file_adls(generated_manifest_entry_file: str, adls_fsspec_fileio: FsspecFileIO) -> None: """Test that bytes match when reading a local avro file, writing it using fsspec file-io, and then reading it again""" filename = str(uuid.uuid4()) with PyArrowFileIO().new_input(location=generated_manifest_entry_file).open() as f: b1 = f.read() - with adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}").create() as out_f: + with adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}").create() as out_f: out_f.write(b1) - with adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}").open() as in_f: + with adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}").open() as in_f: b2 = in_f.read() - assert b1 == b2 # Check that bytes of read from local avro file match bytes written to adlfs + assert b1 == b2 # Check that bytes of read from local avro file match bytes written to adls - adlfs_fsspec_fileio.delete(f"abfss://tests/{filename}") + adls_fsspec_fileio.delete(f"abfss://tests/{filename}") -@pytest.mark.adlfs -def test_fsspec_pickle_round_trip_aldfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: - _test_fsspec_pickle_round_trip(adlfs_fsspec_fileio, "abfss://tests/foo.txt") +@pytest.mark.adls +def test_fsspec_pickle_round_trip_aldfs(adls_fsspec_fileio: FsspecFileIO) -> None: + _test_fsspec_pickle_round_trip(adls_fsspec_fileio, "abfss://tests/foo.txt") @pytest.mark.gcs