Skip to content

Commit

Permalink
[#5582] improvement(hadoop3-filesystem): Remove configuration `fs.gvf…
Browse files Browse the repository at this point in the history
…s.filesystem.providers` from GVFS client. (#5634)

### What changes were proposed in this pull request?

Configuration `fs.gvfs.filesystem.providers` is redundant, so we'd
better remove this configuation.
 
### Why are the changes needed?

This configuration is redundant. 
Fix: #5582


### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

Existing tests.
  • Loading branch information
yuqi1129 authored Dec 16, 2024
1 parent fce1bd9 commit 1d92626
Show file tree
Hide file tree
Showing 18 changed files with 103 additions and 58 deletions.
4 changes: 2 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ tasks {
!it.name.startsWith("client") && !it.name.startsWith("filesystem") && !it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name != "trino-connector" &&
it.name != "integration-test" && it.name != "bundled-catalog" && !it.name.startsWith("flink") &&
it.name != "integration-test" && it.name != "hive-metastore-common" && !it.name.startsWith("flink") &&
it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle"
it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle" && it.name != "hadoop-common"
) {
from(it.configurations.runtimeClasspath)
into("distribution/package/libs")
Expand All @@ -802,7 +802,7 @@ tasks {
it.name != "bundled-catalog" &&
it.name != "hive-metastore-common" && it.name != "gcp-bundle" &&
it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle" &&
it.name != "docs"
it.name != "hadoop-common" && it.name != "docs"
) {
dependsOn("${it.name}:build")
from("${it.name}/build/libs")
Expand Down
3 changes: 3 additions & 0 deletions bundles/aliyun-bundle/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ dependencies {
compileOnly(project(":core"))
compileOnly(project(":catalogs:catalog-common"))
compileOnly(project(":catalogs:catalog-hadoop"))
compileOnly(project(":catalogs:hadoop-common")) {
exclude("*")
}
compileOnly(libs.hadoop3.common)

implementation(libs.aliyun.credentials.sdk)
Expand Down
3 changes: 3 additions & 0 deletions bundles/aws-bundle/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ dependencies {
compileOnly(project(":core"))
compileOnly(project(":catalogs:catalog-common"))
compileOnly(project(":catalogs:catalog-hadoop"))
compileOnly(project(":catalogs:hadoop-common")) {
exclude("*")
}
compileOnly(libs.hadoop3.common)

implementation(libs.aws.iam)
Expand Down
3 changes: 3 additions & 0 deletions bundles/azure-bundle/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ dependencies {
compileOnly(project(":api"))
compileOnly(project(":core"))
compileOnly(project(":catalogs:catalog-hadoop"))
compileOnly(project(":catalogs:hadoop-common")) {
exclude("*")
}

compileOnly(libs.hadoop3.common)

Expand Down
3 changes: 3 additions & 0 deletions bundles/gcp-bundle/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ dependencies {
compileOnly(project(":core"))
compileOnly(project(":catalogs:catalog-common"))
compileOnly(project(":catalogs:catalog-hadoop"))
compileOnly(project(":catalogs:hadoop-common")) {
exclude("*")
}

compileOnly(libs.hadoop3.common)

Expand Down
41 changes: 23 additions & 18 deletions catalogs/catalog-hadoop/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies {
implementation(project(":core")) {
exclude(group = "*")
}

implementation(project(":common")) {
exclude(group = "*")
}
Expand All @@ -40,7 +41,9 @@ dependencies {
exclude(group = "*")
}

compileOnly(libs.guava)
implementation(project(":catalogs:hadoop-common")) {
exclude(group = "*")
}

implementation(libs.hadoop3.common) {
exclude("com.sun.jersey")
Expand All @@ -54,6 +57,14 @@ dependencies {
exclude("com.sun.jersey", "jersey-servlet")
}

implementation(libs.hadoop3.client) {
exclude("org.apache.hadoop", "hadoop-mapreduce-client-core")
exclude("org.apache.hadoop", "hadoop-mapreduce-client-jobclient")
exclude("org.apache.hadoop", "hadoop-yarn-api")
exclude("org.apache.hadoop", "hadoop-yarn-client")
exclude("com.squareup.okhttp", "okhttp")
}

implementation(libs.hadoop3.hdfs) {
exclude("com.sun.jersey")
exclude("javax.servlet", "servlet-api")
Expand All @@ -63,38 +74,32 @@ dependencies {
exclude("io.netty")
exclude("org.fusesource.leveldbjni")
}
implementation(libs.hadoop3.client) {
exclude("org.apache.hadoop", "hadoop-mapreduce-client-core")
exclude("org.apache.hadoop", "hadoop-mapreduce-client-jobclient")
exclude("org.apache.hadoop", "hadoop-yarn-api")
exclude("org.apache.hadoop", "hadoop-yarn-client")
exclude("com.squareup.okhttp", "okhttp")
}

implementation(libs.slf4j.api)

testImplementation(project(":clients:client-java"))
testImplementation(project(":integration-test-common", "testArtifacts"))
testImplementation(project(":server"))
testImplementation(project(":server-common"))
compileOnly(libs.guava)

testImplementation(project(":bundles:aws-bundle"))
testImplementation(project(":bundles:gcp-bundle"))
testImplementation(project(":bundles:aliyun-bundle"))
testImplementation(project(":bundles:azure-bundle"))

testImplementation(libs.minikdc)
testImplementation(libs.hadoop3.minicluster)
testImplementation(project(":clients:client-java"))
testImplementation(project(":integration-test-common", "testArtifacts"))
testImplementation(project(":server"))
testImplementation(project(":server-common"))

testImplementation(libs.bundles.log4j)
testImplementation(libs.hadoop3.gcs)
testImplementation(libs.hadoop3.minicluster)
testImplementation(libs.junit.jupiter.api)
testImplementation(libs.junit.jupiter.params)
testImplementation(libs.minikdc)
testImplementation(libs.mockito.core)
testImplementation(libs.mockito.inline)
testImplementation(libs.mysql.driver)
testImplementation(libs.postgresql.driver)
testImplementation(libs.junit.jupiter.api)
testImplementation(libs.junit.jupiter.params)
testImplementation(libs.testcontainers)
testImplementation(libs.testcontainers.mysql)
testImplementation(libs.hadoop3.gcs)

testRuntimeOnly(libs.junit.jupiter.engine)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ public static Map<String, FileSystemProvider> getFileSystemProviders(String file
if (resultMap.containsKey(fileSystemProvider.scheme())) {
throw new UnsupportedOperationException(
String.format(
"File system provider: '%s' with scheme '%s' already exists in the use provider list "
+ "Please make sure the file system provider scheme is unique.",
"File system provider: '%s' with scheme '%s' already exists in the provider list,"
+ "please make sure the file system provider scheme is unique.",
fileSystemProvider.getClass().getName(), fileSystemProvider.scheme()));
}
resultMap.put(fileSystemProvider.scheme(), fileSystemProvider);
Expand Down
28 changes: 28 additions & 0 deletions catalogs/hadoop-common/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

plugins {
id("java")
}

// try to avoid adding extra dependencies because it is used by catalogs and connectors.
dependencies {
implementation(libs.commons.lang3)
implementation(libs.hadoop3.common)
}
13 changes: 10 additions & 3 deletions clients/filesystem-hadoop3/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ plugins {
dependencies {
compileOnly(project(":clients:client-java-runtime", configuration = "shadow"))
compileOnly(libs.hadoop3.common)
implementation(project(":catalogs:catalog-hadoop")) {

implementation(project(":catalogs:catalog-common")) {
exclude(group = "*")
}
implementation(project(":catalogs:catalog-common")) {
implementation(project(":catalogs:hadoop-common")) {
exclude(group = "*")
}

Expand All @@ -42,16 +43,19 @@ dependencies {
testImplementation(project(":server-common"))
testImplementation(project(":clients:client-java"))
testImplementation(project(":integration-test-common", "testArtifacts"))
testImplementation(project(":catalogs:catalog-hadoop"))
testImplementation(project(":bundles:gcp-bundle"))
testImplementation(project(":bundles:aliyun-bundle"))
testImplementation(project(":bundles:aws-bundle"))
testImplementation(project(":bundles:azure-bundle"))
testImplementation(project(":bundles:gcp-bundle"))

testImplementation(libs.awaitility)
testImplementation(libs.bundles.jetty)
testImplementation(libs.bundles.jersey)
testImplementation(libs.bundles.jwt)
testImplementation(libs.testcontainers)
testImplementation(libs.guava)

testImplementation(libs.hadoop3.client)
testImplementation(libs.hadoop3.common) {
exclude("com.sun.jersey")
Expand All @@ -75,6 +79,8 @@ dependencies {
}
testImplementation(libs.mysql.driver)
testImplementation(libs.postgresql.driver)
testImplementation(libs.testcontainers)

testRuntimeOnly(libs.junit.jupiter.engine)
}

Expand All @@ -99,6 +105,7 @@ tasks.test {
dependsOn(":bundles:aws-bundle:jar")
dependsOn(":bundles:aliyun-bundle:jar")
dependsOn(":bundles:gcp-bundle:jar")
dependsOn(":bundles:azure-bundle:jar")
}

tasks.javadoc {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@
*/
package org.apache.gravitino.filesystem.hadoop;

import static org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystemConfiguration.FS_FILESYSTEM_PROVIDERS;

import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.Scheduler;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Streams;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
Expand All @@ -46,7 +46,6 @@
import org.apache.gravitino.audit.FilesetDataOperation;
import org.apache.gravitino.audit.InternalClientType;
import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider;
import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
import org.apache.gravitino.client.DefaultOAuth2TokenProvider;
import org.apache.gravitino.client.GravitinoClient;
import org.apache.gravitino.client.KerberosTokenProvider;
Expand Down Expand Up @@ -135,8 +134,7 @@ public void initialize(URI name, Configuration configuration) throws IOException
initializeClient(configuration);

// Register the default local and HDFS FileSystemProvider
String fileSystemProviders = configuration.get(FS_FILESYSTEM_PROVIDERS);
fileSystemProvidersMap.putAll(FileSystemUtils.getFileSystemProviders(fileSystemProviders));
fileSystemProvidersMap.putAll(getFileSystemProviders());

this.workingDirectory = new Path(name);
this.uri = URI.create(name.getScheme() + "://" + name.getAuthority());
Expand Down Expand Up @@ -618,4 +616,24 @@ public FileSystem getFileSystem() {
return fileSystem;
}
}

private static Map<String, FileSystemProvider> getFileSystemProviders() {
Map<String, FileSystemProvider> resultMap = Maps.newHashMap();
ServiceLoader<FileSystemProvider> allFileSystemProviders =
ServiceLoader.load(FileSystemProvider.class);

Streams.stream(allFileSystemProviders.iterator())
.forEach(
fileSystemProvider -> {
if (resultMap.containsKey(fileSystemProvider.scheme())) {
throw new UnsupportedOperationException(
String.format(
"File system provider: '%s' with scheme '%s' already exists in the provider list, "
+ "please make sure the file system provider scheme is unique.",
fileSystemProvider.getClass().getName(), fileSystemProvider.scheme()));
}
resultMap.put(fileSystemProvider.scheme(), fileSystemProvider);
});
return resultMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
*/
package org.apache.gravitino.filesystem.hadoop;

import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider;

/** Configuration class for Gravitino Virtual File System. */
public class GravitinoVirtualFileSystemConfiguration {

Expand All @@ -44,13 +42,6 @@ public class GravitinoVirtualFileSystemConfiguration {
/** The configuration key for the Gravitino client auth type. */
public static final String FS_GRAVITINO_CLIENT_AUTH_TYPE_KEY = "fs.gravitino.client.authType";

/**
* File system provider names configuration key. The value is a comma separated list of file
* system provider name which is defined in the service loader. Users can custom their own file
* system by implementing the {@link FileSystemProvider} interface.
*/
public static final String FS_FILESYSTEM_PROVIDERS = "fs.gvfs.filesystem.providers";

/** The authentication type for simple authentication. */
public static final String SIMPLE_AUTH_TYPE = "simple";
/** The authentication type for oauth2 authentication. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ public void startUp() throws Exception {
conf.set("fs.gravitino.server.uri", serverUri);
conf.set("fs.gravitino.client.metalake", metalakeName);

conf.set("fs.gvfs.filesystem.providers", AzureFileSystemProvider.ABS_PROVIDER_NAME);
// Pass this configuration to the real file system
conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME);
conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.gravitino.filesystem.hadoop.integration.test;

import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS;
import static org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystemConfiguration.FS_FILESYSTEM_PROVIDERS;

import com.google.common.collect.Maps;
import java.io.IOException;
Expand Down Expand Up @@ -92,15 +91,14 @@ public void startUp() throws Exception {

// Pass this configuration to the real file system
conf.set(GCSProperties.GCS_SERVICE_ACCOUNT_JSON_PATH, SERVICE_ACCOUNT_FILE);
conf.set(FS_FILESYSTEM_PROVIDERS, "gcs");
}

@AfterAll
public void tearDown() throws IOException {
Catalog catalog = metalake.loadCatalog(catalogName);
catalog.asSchemas().dropSchema(schemaName, true);
metalake.dropCatalog(catalogName);
client.dropMetalake(metalakeName);
metalake.dropCatalog(catalogName, true);
client.dropMetalake(metalakeName, true);

if (client != null) {
client.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.gravitino.filesystem.hadoop.integration.test;

import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS;
import static org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystemConfiguration.FS_FILESYSTEM_PROVIDERS;

import com.google.common.collect.Maps;
import java.io.IOException;
Expand Down Expand Up @@ -100,7 +99,6 @@ public void startUp() throws Exception {
conf.set(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_SECRET, OSS_SECRET_KEY);
conf.set(OSSProperties.GRAVITINO_OSS_ENDPOINT, OSS_ENDPOINT);
conf.set("fs.oss.impl", "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem");
conf.set(FS_FILESYSTEM_PROVIDERS, "oss");
}

@AfterAll
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.gravitino.filesystem.hadoop.integration.test;

import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS;
import static org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystemConfiguration.FS_FILESYSTEM_PROVIDERS;

import com.google.common.collect.Maps;
import java.io.IOException;
Expand Down Expand Up @@ -157,7 +156,6 @@ public void startUp() throws Exception {
conf.set(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, accessKey);
conf.set(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, secretKey);
conf.set(S3Properties.GRAVITINO_S3_ENDPOINT, s3Endpoint);
conf.set(FS_FILESYSTEM_PROVIDERS, "s3");
}

@AfterAll
Expand Down
Loading

0 comments on commit 1d92626

Please sign in to comment.