From dc0f5208e25a31a73df1ce351a283231394bc6ab Mon Sep 17 00:00:00 2001 From: Marc Cenac <547446+mrcnc@users.noreply.github.com> Date: Wed, 18 Dec 2024 13:47:40 -0600 Subject: [PATCH] Revert "Support WASB scheme in ADLSFileIO (#11504)" This reverts commit 09634857e4a1333f5dc742d1dca3921e9a9f62dd. --- .../apache/iceberg/azure/AzureProperties.java | 13 +----------- .../iceberg/azure/adlsv2/ADLSLocation.java | 20 ++++++------------- .../iceberg/azure/AzurePropertiesTest.java | 10 ++++------ .../azure/adlsv2/ADLSLocationTest.java | 19 ++++-------------- .../apache/iceberg/io/ResolvingFileIO.java | 4 +--- 5 files changed, 16 insertions(+), 50 deletions(-) diff --git a/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java b/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java index a7f9885a4726..2d363cbc5231 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java +++ b/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java @@ -77,17 +77,6 @@ public Optional adlsWriteBlockSize() { return Optional.ofNullable(adlsWriteBlockSize); } - /** - * Applies configuration to the {@link DataLakeFileSystemClientBuilder} to provide the endpoint - * and credentials required to create an instance of the client. - * - *

The default endpoint is constructed in the form {@code - * https://{account}.dfs.core.windows.net} and default credentials are provided via the {@link - * com.azure.identity.DefaultAzureCredential}. - * - * @param account the service account name - * @param builder the builder instance - */ public void applyClientConfiguration(String account, DataLakeFileSystemClientBuilder builder) { String sasToken = adlsSasTokens.get(account); if (sasToken != null && !sasToken.isEmpty()) { @@ -104,7 +93,7 @@ public void applyClientConfiguration(String account, DataLakeFileSystemClientBui if (connectionString != null && !connectionString.isEmpty()) { builder.endpoint(connectionString); } else { - builder.endpoint("https://" + account + ".dfs.core.windows.net"); + builder.endpoint("https://" + account); } } } diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java index fb91c4cb3233..5af590628fe8 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java +++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java @@ -30,21 +30,14 @@ * *

Locations follow a URI like structure to identify resources * - *

{@code abfs[s]://[@].dfs.core.windows.net/}
- * - * or - * - *
{@code wasb[s]://@.blob.core.windows.net/}
- * - * For compatibility, locations using the wasb scheme are also accepted but will use the Azure Data - * Lake Storage Gen2 REST APIs instead of the Blob Storage REST APIs. + *
{@code abfs[s]://[@]/}
* *

See Azure * Data Lake Storage URI */ class ADLSLocation { - private static final Pattern URI_PATTERN = Pattern.compile("^(abfss?|wasbs?)://([^/?#]+)(.*)?$"); + private static final Pattern URI_PATTERN = Pattern.compile("^abfss?://([^/?#]+)(.*)?$"); private final String storageAccount; private final String container; @@ -62,18 +55,17 @@ class ADLSLocation { ValidationException.check(matcher.matches(), "Invalid ADLS URI: %s", location); - String authority = matcher.group(2); + String authority = matcher.group(1); String[] parts = authority.split("@", -1); if (parts.length > 1) { this.container = parts[0]; - String host = parts[1]; - this.storageAccount = host.split("\\.", -1)[0]; + this.storageAccount = parts[1]; } else { this.container = null; - this.storageAccount = authority.split("\\.", -1)[0]; + this.storageAccount = authority; } - String uriPath = matcher.group(3); + String uriPath = matcher.group(2); this.path = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath; } diff --git a/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java b/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java index 4f032d7ab125..6b8287c44e58 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java @@ -97,13 +97,11 @@ public void testNoSasToken() { @Test public void testWithConnectionString() { AzureProperties props = - new AzureProperties( - ImmutableMap.of( - "adls.connection-string.account1", "https://account1.dfs.core.usgovcloudapi.net")); + new AzureProperties(ImmutableMap.of("adls.connection-string.account1", "http://endpoint")); DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account1", clientBuilder); - verify(clientBuilder).endpoint("https://account1.dfs.core.usgovcloudapi.net"); + verify(clientBuilder).endpoint("http://endpoint"); } @Test @@ -113,7 +111,7 @@ public void testNoMatchingConnectionString() { DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account1", clientBuilder); - verify(clientBuilder).endpoint("https://account1.dfs.core.windows.net"); + verify(clientBuilder).endpoint("https://account1"); } @Test @@ -122,7 +120,7 @@ public void testNoConnectionString() { DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account", clientBuilder); - verify(clientBuilder).endpoint("https://account.dfs.core.windows.net"); + verify(clientBuilder).endpoint("https://account"); } @Test diff --git a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java index 10b5e1877cca..403886f4b28e 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java @@ -33,18 +33,7 @@ public void testLocationParsing(String scheme) { String p1 = scheme + "://container@account.dfs.core.windows.net/path/to/file"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageAccount()).isEqualTo("account"); - assertThat(location.container().get()).isEqualTo("container"); - assertThat(location.path()).isEqualTo("path/to/file"); - } - - @ParameterizedTest - @ValueSource(strings = {"wasb", "wasbs"}) - public void testWasbLocatonParsing(String scheme) { - String p1 = scheme + "://container@account.blob.core.windows.net/path/to/file"; - ADLSLocation location = new ADLSLocation(p1); - - assertThat(location.storageAccount()).isEqualTo("account"); + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo("path/to/file"); } @@ -54,7 +43,7 @@ public void testEncodedString() { String p1 = "abfs://container@account.dfs.core.windows.net/path%20to%20file"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageAccount()).isEqualTo("account"); + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo("path%20to%20file"); } @@ -78,7 +67,7 @@ public void testNoContainer() { String p1 = "abfs://account.dfs.core.windows.net/path/to/file"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageAccount()).isEqualTo("account"); + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().isPresent()).isFalse(); assertThat(location.path()).isEqualTo("path/to/file"); } @@ -88,7 +77,7 @@ public void testNoPath() { String p1 = "abfs://container@account.dfs.core.windows.net"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageAccount()).isEqualTo("account"); + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo(""); } diff --git a/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java b/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java index a8adf979f85a..a858045aab8b 100644 --- a/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java +++ b/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java @@ -62,9 +62,7 @@ public class ResolvingFileIO implements HadoopConfigurable, DelegateFileIO { "s3n", S3_FILE_IO_IMPL, "gs", GCS_FILE_IO_IMPL, "abfs", ADLS_FILE_IO_IMPL, - "abfss", ADLS_FILE_IO_IMPL, - "wasb", ADLS_FILE_IO_IMPL, - "wasbs", ADLS_FILE_IO_IMPL); + "abfss", ADLS_FILE_IO_IMPL); private final Map ioInstances = Maps.newConcurrentMap(); private final AtomicBoolean isClosed = new AtomicBoolean(false);