diff --git a/LICENSE.bin b/LICENSE.bin index 46b097b53f9..ee65d4d6952 100644 --- a/LICENSE.bin +++ b/LICENSE.bin @@ -282,6 +282,8 @@ ApacheDS I18n ApacheDS Protocol Kerberos Codec Apache Hadoop + Apache Hadoop Aliyun connector + Apache Hadoop GCS connector Apache Hadoop Annotatations Apache Hadoop Auth Apache Hadoop Client Aggregator diff --git a/build.gradle.kts b/build.gradle.kts index 9733a17912f..ff287db9110 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -745,7 +745,7 @@ tasks { if (!it.name.startsWith("catalog") && !it.name.startsWith("authorization") && !it.name.startsWith("client") && !it.name.startsWith("filesystem") && !it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name != "trino-connector" && - it.name != "integration-test" && it.name != "hive-metastore-common" && !it.name.startsWith("flink") && it.name != "gcp-bundle" + it.name != "integration-test" && it.name != "hive-metastore-common" && !it.name.startsWith("flink") && it.name != "gcp-bundle" && it.name != "aliyun-bundle" ) { from(it.configurations.runtimeClasspath) into("distribution/package/libs") @@ -764,7 +764,8 @@ tasks { !it.name.startsWith("integration-test") && !it.name.startsWith("flink") && !it.name.startsWith("trino-connector") && - it.name != "hive-metastore-common" && it.name != "gcp-bundle" + it.name != "hive-metastore-common" && it.name != "gcp-bundle" && + it.name != "aliyun-bundle" ) { dependsOn("${it.name}:build") from("${it.name}/build/libs") diff --git a/bundles/aliyun-bundle/build.gradle.kts b/bundles/aliyun-bundle/build.gradle.kts new file mode 100644 index 00000000000..5858147e270 --- /dev/null +++ b/bundles/aliyun-bundle/build.gradle.kts @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `maven-publish` + id("java") + alias(libs.plugins.shadow) +} + +dependencies { + compileOnly(project(":catalogs:catalog-hadoop")) + compileOnly(libs.hadoop3.common) + implementation(libs.hadoop3.oss) + + // oss needs StringUtils from commons-lang or the following error will occur in 3.1.0 + // java.lang.NoClassDefFoundError: org/apache/commons/lang/StringUtils + // org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystemStore.initialize(AliyunOSSFileSystemStore.java:111) + // org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem.initialize(AliyunOSSFileSystem.java:323) + // org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3611) + implementation(libs.commons.lang) +} + +tasks.withType(ShadowJar::class.java) { + isZip64 = true + configurations = listOf(project.configurations.runtimeClasspath.get()) + archiveClassifier.set("") + mergeServiceFiles() + + // Relocate dependencies to avoid conflicts + relocate("org.jdom", "org.apache.gravitino.shaded.org.jdom") + relocate("org.apache.commons.lang", "org.apache.gravitino.shaded.org.apache.commons.lang") +} + +tasks.jar { + dependsOn(tasks.named("shadowJar")) + archiveClassifier.set("empty") +} + +tasks.compileJava { + dependsOn(":catalogs:catalog-hadoop:runtimeJars") +} diff --git a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java b/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java new file mode 100644 index 00000000000..97bce16f05c --- /dev/null +++ b/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.oss.fs; + +import java.io.IOException; +import java.util.Map; +import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem; + +public class OSSFileSystemProvider implements FileSystemProvider { + @Override + public FileSystem getFileSystem(Path path, Map config) throws IOException { + Configuration configuration = new Configuration(); + config.forEach( + (k, v) -> { + configuration.set(k.replace("gravitino.bypass.", ""), v); + }); + + return AliyunOSSFileSystem.newInstance(path.toUri(), configuration); + } + + @Override + public String scheme() { + return "oss"; + } + + @Override + public String name() { + return "oss"; + } +} diff --git a/bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider b/bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider new file mode 100644 index 00000000000..20cedbd8b1a --- /dev/null +++ b/bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +org.apache.gravitino.oss.fs.OSSFileSystemProvider \ No newline at end of file diff --git a/bundles/gcp-bundle/build.gradle.kts b/bundles/gcp-bundle/build.gradle.kts index 9433a600429..6b373578c9d 100644 --- a/bundles/gcp-bundle/build.gradle.kts +++ b/bundles/gcp-bundle/build.gradle.kts @@ -34,6 +34,12 @@ tasks.withType(ShadowJar::class.java) { isZip64 = true configurations = listOf(project.configurations.runtimeClasspath.get()) archiveClassifier.set("") + + // Relocate dependencies to avoid conflicts + relocate("org.apache.httpcomponents", "org.apache.gravitino.shaded.org.apache.httpcomponents") + relocate("org.apache.commons", "org.apache.gravitino.shaded.org.apache.commons") + relocate("com.google.guava", "org.apache.gravitino.shaded.com.google.guava") + relocate("com.google.code", "org.apache.gravitino.shaded.com.google.code") } tasks.jar { diff --git a/catalogs/catalog-hadoop/build.gradle.kts b/catalogs/catalog-hadoop/build.gradle.kts index 9ff3cc0e31c..4c091b14946 100644 --- a/catalogs/catalog-hadoop/build.gradle.kts +++ b/catalogs/catalog-hadoop/build.gradle.kts @@ -74,6 +74,7 @@ dependencies { testImplementation(project(":server")) testImplementation(project(":server-common")) testImplementation(project(":bundles:gcp-bundle")) + testImplementation(project(":bundles:aliyun-bundle")) testImplementation(libs.minikdc) testImplementation(libs.hadoop3.minicluster) diff --git a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopGCSCatalogIT.java b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopGCSCatalogIT.java index 74ae2a77cdb..db1d01336ca 100644 --- a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopGCSCatalogIT.java +++ b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopGCSCatalogIT.java @@ -42,8 +42,21 @@ public class HadoopGCSCatalogIT extends HadoopCatalogIT { public static final String BUCKET_NAME = "YOUR_BUCKET"; public static final String SERVICE_ACCOUNT_FILE = "YOUR_KEY_FILE"; + @Override + public void startIntegrationTest() throws Exception { + // Do nothing. + } + @BeforeAll public void setup() throws IOException { + copyBundleJarsToHadoop("gcp-bundle"); + + try { + super.startIntegrationTest(); + } catch (Exception e) { + throw new RuntimeException(e); + } + metalakeName = GravitinoITUtils.genRandomName("CatalogFilesetIT_metalake"); catalogName = GravitinoITUtils.genRandomName("CatalogFilesetIT_catalog"); schemaName = GravitinoITUtils.genRandomName("CatalogFilesetIT_schema"); diff --git a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopOSSCatalogIT.java b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopOSSCatalogIT.java new file mode 100644 index 00000000000..0bd07739993 --- /dev/null +++ b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopOSSCatalogIT.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.hadoop.integration.test; + +import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Maps; +import java.io.IOException; +import java.net.URI; +import java.util.Map; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.integration.test.util.GravitinoITUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Disabled( + "Disabled due to we don't have a real OSS account to test. If you have a GCP account," + + "please change the configuration(BUCKET_NAME, OSS_ACCESS_KEY, OSS_SECRET_KEY, OSS_ENDPOINT) and enable this test.") +public class HadoopOSSCatalogIT extends HadoopCatalogIT { + private static final Logger LOG = LoggerFactory.getLogger(HadoopOSSCatalogIT.class); + public static final String BUCKET_NAME = "YOUR_BUCKET"; + public static final String OSS_ACCESS_KEY = "YOUR_OSS_ACCESS_KEY"; + public static final String OSS_SECRET_KEY = "YOUR_OSS_SECRET_KEY"; + public static final String OSS_ENDPOINT = "YOUR_OSS_ENDPOINT"; + + @VisibleForTesting + public void startIntegrationTest() throws Exception {} + + @BeforeAll + public void setup() throws IOException { + copyBundleJarsToHadoop("aliyun-bundle"); + + try { + super.startIntegrationTest(); + } catch (Exception e) { + throw new RuntimeException("Failed to start integration test", e); + } + + metalakeName = GravitinoITUtils.genRandomName("CatalogFilesetIT_metalake"); + catalogName = GravitinoITUtils.genRandomName("CatalogFilesetIT_catalog"); + schemaName = GravitinoITUtils.genRandomName("CatalogFilesetIT_schema"); + + schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX); + Configuration conf = new Configuration(); + + conf.set("fs.oss.accessKeyId", OSS_ACCESS_KEY); + conf.set("fs.oss.accessKeySecret", OSS_SECRET_KEY); + conf.set("fs.oss.endpoint", OSS_ENDPOINT); + conf.set("fs.oss.impl", "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"); + fileSystem = FileSystem.get(URI.create(String.format("oss://%s", BUCKET_NAME)), conf); + + createMetalake(); + createCatalog(); + createSchema(); + } + + @AfterAll + public void stop() throws IOException { + Catalog catalog = metalake.loadCatalog(catalogName); + catalog.asSchemas().dropSchema(schemaName, true); + metalake.dropCatalog(catalogName); + client.dropMetalake(metalakeName); + + try { + closer.close(); + } catch (Exception e) { + LOG.error("Failed to close CloseableGroup", e); + } + } + + protected String defaultBaseLocation() { + if (defaultBaseLocation == null) { + try { + Path bucket = + new Path( + String.format( + "oss://%s/%s", + BUCKET_NAME, GravitinoITUtils.genRandomName("CatalogFilesetIT"))); + if (!fileSystem.exists(bucket)) { + fileSystem.mkdirs(bucket); + } + + defaultBaseLocation = bucket.toString(); + } catch (IOException e) { + throw new RuntimeException("Failed to create default base location", e); + } + } + + return defaultBaseLocation; + } + + protected void createCatalog() { + Map map = Maps.newHashMap(); + map.put("gravitino.bypass.fs.oss.accessKeyId", OSS_ACCESS_KEY); + map.put("gravitino.bypass.fs.oss.accessKeySecret", OSS_SECRET_KEY); + map.put("gravitino.bypass.fs.oss.endpoint", OSS_ENDPOINT); + map.put("gravitino.bypass.fs.oss.impl", "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"); + map.put(FILESYSTEM_PROVIDERS, "oss"); + + metalake.createCatalog(catalogName, Catalog.Type.FILESET, provider, "comment", map); + + catalog = metalake.loadCatalog(catalogName); + } + + protected String generateLocation(String filesetName) { + return String.format("%s/%s", defaultBaseLocation, filesetName); + } +} diff --git a/clients/filesystem-hadoop3/build.gradle.kts b/clients/filesystem-hadoop3/build.gradle.kts index cae1888185a..c3f8c6d7bcf 100644 --- a/clients/filesystem-hadoop3/build.gradle.kts +++ b/clients/filesystem-hadoop3/build.gradle.kts @@ -40,6 +40,7 @@ dependencies { testImplementation(project(":clients:client-java")) testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation(project(":bundles:gcp-bundle")) + testImplementation(project(":bundles:aliyun-bundle")) testImplementation(libs.awaitility) testImplementation(libs.bundles.jetty) testImplementation(libs.bundles.jersey) diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemGCSIT.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemGCSIT.java index a42d1c4b7b3..73a45006f03 100644 --- a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemGCSIT.java +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemGCSIT.java @@ -26,11 +26,8 @@ import java.io.IOException; import java.util.Collections; import java.util.Map; -import java.util.Objects; import org.apache.gravitino.Catalog; -import org.apache.gravitino.integration.test.util.DownloaderUtils; import org.apache.gravitino.integration.test.util.GravitinoITUtils; -import org.apache.gravitino.integration.test.util.ITUtils; import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -55,7 +52,7 @@ public void startIntegrationTest() { @BeforeAll public void startUp() throws Exception { - copyGCPJars(); + copyBundleJarsToHadoop("gcp-bundle"); // Need to download jars to gravitino server super.startIntegrationTest(); @@ -133,37 +130,6 @@ protected String genStorageLocation(String fileset) { return String.format("gs://%s/%s", BUCKET_NAME, fileset); } - private static boolean isDeploy() { - String mode = - System.getProperty(ITUtils.TEST_MODE) == null - ? ITUtils.EMBEDDED_TEST_MODE - : System.getProperty(ITUtils.TEST_MODE); - - return Objects.equals(mode, ITUtils.DEPLOY_TEST_MODE); - } - - private void copyGCPJars() { - if (!isDeploy()) { - return; - } - - String gravitinoHome = System.getenv("GRAVITINO_HOME"); - String jarName = String.format("gravitino-gcp-bundle-%s.jar", System.getenv("PROJECT_VERSION")); - String gcsJars = - ITUtils.joinPath( - gravitinoHome, "..", "..", "bundles", "gcp-bundle", "build", "libs", jarName); - gcsJars = "file://" + gcsJars; - try { - if (!ITUtils.EMBEDDED_TEST_MODE.equals(testMode)) { - String hadoopLibDirs = ITUtils.joinPath(gravitinoHome, "catalogs", "hadoop", "libs"); - DownloaderUtils.downloadFile(gcsJars, hadoopLibDirs); - } - } catch (Exception e) { - throw new RuntimeException( - String.format("Failed to copy the gcs dependency jars: %s", gcsJars), e); - } - } - @Disabled( "GCS does not support append, java.io.IOException: The append operation is not supported") public void testAppend() throws IOException {} diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemOSSIT.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemOSSIT.java new file mode 100644 index 00000000000..67c76be3db5 --- /dev/null +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemOSSIT.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.filesystem.hadoop.integration.test; + +import static org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS; +import static org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystemConfiguration.FS_FILESYSTEM_PROVIDERS; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.google.common.collect.Maps; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.file.Fileset; +import org.apache.gravitino.integration.test.util.GravitinoITUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Disabled( + "Disabled due to we don't have a real OSS account to test. If you have a GCP account," + + "please change the configuration(BUCKET_NAME, OSS_ACCESS_KEY, OSS_SECRET_KEY, OSS_ENDPOINT) and enable this test.") +public class GravitinoVirtualFileSystemOSSIT extends GravitinoVirtualFileSystemIT { + private static final Logger LOG = LoggerFactory.getLogger(GravitinoVirtualFileSystemOSSIT.class); + + public static final String BUCKET_NAME = "YOUR_BUCKET"; + public static final String OSS_ACCESS_KEY = "YOUR_OSS_ACCESS_KEY"; + public static final String OSS_SECRET_KEY = "YOUR_OSS_SECRET_KEY"; + public static final String OSS_ENDPOINT = "YOUR_OSS_ENDPOINT"; + + @BeforeAll + public void startIntegrationTest() { + // Do nothing + } + + @BeforeAll + public void startUp() throws Exception { + copyBundleJarsToHadoop("aliyun-bundle"); + // Need to download jars to gravitino server + super.startIntegrationTest(); + + // This value can be by tune by the user, please change it accordingly. + defaultBockSize = 64 * 1024 * 1024; + + metalakeName = GravitinoITUtils.genRandomName("gvfs_it_metalake"); + catalogName = GravitinoITUtils.genRandomName("catalog"); + schemaName = GravitinoITUtils.genRandomName("schema"); + + Assertions.assertFalse(client.metalakeExists(metalakeName)); + metalake = client.createMetalake(metalakeName, "metalake comment", Collections.emptyMap()); + Assertions.assertTrue(client.metalakeExists(metalakeName)); + + Map properties = Maps.newHashMap(); + properties.put(FILESYSTEM_PROVIDERS, "oss"); + properties.put("gravitino.bypass.fs.oss.accessKeyId", OSS_ACCESS_KEY); + properties.put("gravitino.bypass.fs.oss.accessKeySecret", OSS_SECRET_KEY); + properties.put("gravitino.bypass.fs.oss.endpoint", OSS_ENDPOINT); + properties.put( + "gravitino.bypass.fs.oss.impl", "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"); + + Catalog catalog = + metalake.createCatalog( + catalogName, Catalog.Type.FILESET, "hadoop", "catalog comment", properties); + Assertions.assertTrue(metalake.catalogExists(catalogName)); + + catalog.asSchemas().createSchema(schemaName, "schema comment", properties); + Assertions.assertTrue(catalog.asSchemas().schemaExists(schemaName)); + + conf.set("fs.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem"); + conf.set("fs.AbstractFileSystem.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.Gvfs"); + conf.set("fs.gvfs.impl.disable.cache", "true"); + conf.set("fs.gravitino.server.uri", serverUri); + conf.set("fs.gravitino.client.metalake", metalakeName); + + // Pass this configuration to the real file system + conf.set("gravitino.bypass.fs.oss.accessKeyId", OSS_ACCESS_KEY); + conf.set("gravitino.bypass.fs.oss.accessKeySecret", OSS_SECRET_KEY); + conf.set("gravitino.bypass.fs.oss.endpoint", OSS_ENDPOINT); + conf.set("gravitino.bypass.fs.oss.impl", "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"); + + conf.set(FS_FILESYSTEM_PROVIDERS, "oss"); + } + + @AfterAll + public void tearDown() throws IOException { + Catalog catalog = metalake.loadCatalog(catalogName); + catalog.asSchemas().dropSchema(schemaName, true); + metalake.dropCatalog(catalogName); + client.dropMetalake(metalakeName); + + if (client != null) { + client.close(); + client = null; + } + + try { + closer.close(); + } catch (Exception e) { + LOG.error("Exception in closing CloseableGroup", e); + } + } + + /** + * Remove the `gravitino.bypass` prefix from the configuration and pass it to the real file system + * This method corresponds to the method org.apache.gravitino.filesystem.hadoop + * .GravitinoVirtualFileSystem#getConfigMap(Configuration) in the original code. + */ + protected Configuration convertGvfsConfigToRealFileSystemConfig(Configuration gvfsConf) { + Configuration gcsConf = new Configuration(); + gvfsConf.forEach( + entry -> { + gcsConf.set(entry.getKey().replace("gravitino.bypass.", ""), entry.getValue()); + }); + + return gcsConf; + } + + protected String genStorageLocation(String fileset) { + return String.format("oss://%s/%s", BUCKET_NAME, fileset); + } + + @Test + public void testGetDefaultReplications() throws IOException { + String filesetName = GravitinoITUtils.genRandomName("test_get_default_replications"); + NameIdentifier filesetIdent = NameIdentifier.of(schemaName, filesetName); + Catalog catalog = metalake.loadCatalog(catalogName); + String storageLocation = genStorageLocation(filesetName); + catalog + .asFilesetCatalog() + .createFileset( + filesetIdent, + "fileset comment", + Fileset.Type.MANAGED, + storageLocation, + new HashMap<>()); + Assertions.assertTrue(catalog.asFilesetCatalog().filesetExists(filesetIdent)); + Path gvfsPath = genGvfsPath(filesetName); + try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) { + // Here HDFS is 3, but for oss is 1. + assertEquals(1, gvfs.getDefaultReplication(gvfsPath)); + } + + catalog.asFilesetCatalog().dropFileset(filesetIdent); + } + + @Disabled( + "OSS does not support append, java.io.IOException: The append operation is not supported") + public void testAppend() throws IOException {} +} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index af04c63146c..fc06975c19d 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -33,11 +33,13 @@ hive2 = "2.3.9" hadoop2 = "2.10.2" hadoop3 = "3.1.0" hadoop3-gcs = "1.9.4-hadoop3" +hadoop3-aliyun = "3.1.0" hadoop-minikdc = "3.3.6" htrace-core4 = "4.1.0-incubating" httpclient5 = "5.2.1" mockserver = "5.15.0" commons-lang3 = "3.14.0" +commons-lang = "2.6" commons-io = "2.15.0" commons-collections4 = "4.4" commons-collections3 = "3.2.2" @@ -154,12 +156,14 @@ hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common", version. hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", version.ref = "hadoop3"} hadoop3-minicluster = { group = "org.apache.hadoop", name = "hadoop-minicluster", version.ref = "hadoop-minikdc"} hadoop3-gcs = { group = "com.google.cloud.bigdataoss", name = "gcs-connector", version.ref = "hadoop3-gcs"} +hadoop3-oss = { group = "org.apache.hadoop", name = "hadoop-aliyun", version.ref = "hadoop3-aliyun"} htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", version.ref = "htrace-core4" } airlift-json = { group = "io.airlift", name = "json", version.ref = "airlift-json"} airlift-resolver = { group = "io.airlift.resolver", name = "resolver", version.ref = "airlift-resolver"} httpclient5 = { group = "org.apache.httpcomponents.client5", name = "httpclient5", version.ref = "httpclient5" } mockserver-netty = { group = "org.mock-server", name = "mockserver-netty", version.ref = "mockserver" } mockserver-client-java = { group = "org.mock-server", name = "mockserver-client-java", version.ref = "mockserver" } +commons-lang = { group = "commons-lang", name = "commons-lang", version.ref = "commons-lang" } commons-lang3 = { group = "org.apache.commons", name = "commons-lang3", version.ref = "commons-lang3" } commons-io = { group = "commons-io", name = "commons-io", version.ref = "commons-io" } caffeine = { group = "com.github.ben-manes.caffeine", name = "caffeine", version.ref = "caffeine" } diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/BaseIT.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/BaseIT.java index e8f688f96ea..8bbb5a3b23f 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/BaseIT.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/BaseIT.java @@ -38,6 +38,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.ArrayUtils; @@ -386,4 +387,36 @@ protected String readGitCommitIdFromGitFile() { return ""; } } + + private static boolean isDeploy() { + String mode = + System.getProperty(ITUtils.TEST_MODE) == null + ? ITUtils.EMBEDDED_TEST_MODE + : System.getProperty(ITUtils.TEST_MODE); + + return Objects.equals(mode, ITUtils.DEPLOY_TEST_MODE); + } + + protected void copyBundleJarsToHadoop(String bundleName) { + if (!isDeploy()) { + return; + } + + String gravitinoHome = System.getenv("GRAVITINO_HOME"); + String jarName = + String.format("gravitino-%s-%s.jar", bundleName, System.getenv("PROJECT_VERSION")); + String gcsJars = + ITUtils.joinPath( + gravitinoHome, "..", "..", "bundles", bundleName, "build", "libs", jarName); + gcsJars = "file://" + gcsJars; + try { + if (!ITUtils.EMBEDDED_TEST_MODE.equals(testMode)) { + String hadoopLibDirs = ITUtils.joinPath(gravitinoHome, "catalogs", "hadoop", "libs"); + DownloaderUtils.downloadFile(gcsJars, hadoopLibDirs); + } + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to copy the %s dependency jars: %s", bundleName, gcsJars), e); + } + } } diff --git a/settings.gradle.kts b/settings.gradle.kts index 36d66504f47..10cf107497c 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -71,3 +71,5 @@ include("web:web", "web:integration-test") include("docs") include("integration-test-common") include(":bundles:gcp-bundle") +include("bundles:aliyun-bundle") +findProject(":bundles:aliyun-bundle")?.name = "aliyun-bundle"