Skip to content

Commit

Permalink
[#5492] feat(hadoop-catalog): Support Azure blob storage for Gravitin…
Browse files Browse the repository at this point in the history
…o server and GVFS Java client (#5508)

### What changes were proposed in this pull request?

Add support for Support Azure blob storage for Gravitino server and GVFS
Java client

### Why are the changes needed?

It's a big improvement for fileset usage. 

Fix: #5492

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

ITs

---------

Co-authored-by: Jerry Shao <[email protected]>
  • Loading branch information
yuqi1129 and jerryshao authored Nov 14, 2024
1 parent 3907b04 commit 79c362c
Show file tree
Hide file tree
Showing 14 changed files with 585 additions and 4 deletions.
1 change: 1 addition & 0 deletions LICENSE.bin
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@
Apache Hadoop Aliyun connector
Apache Hadoop GCS connector
Apache Hadoop AWS connector
Apache Hadoop Azure connector
Apache Hadoop Annotatations
Apache Hadoop Auth
Apache Hadoop Client Aggregator
Expand Down
4 changes: 2 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ tasks {
!it.name.startsWith("client") && !it.name.startsWith("filesystem") && !it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name != "trino-connector" &&
it.name != "integration-test" && it.name != "bundled-catalog" && !it.name.startsWith("flink") &&
it.name != "integration-test" && it.name != "hive-metastore-common" && !it.name.startsWith("flink") &&
it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle"
it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle"
) {
from(it.configurations.runtimeClasspath)
into("distribution/package/libs")
Expand All @@ -796,7 +796,7 @@ tasks {
!it.name.startsWith("trino-connector") &&
it.name != "bundled-catalog" &&
it.name != "hive-metastore-common" && it.name != "gcp-bundle" &&
it.name != "aliyun-bundle" && it.name != "aws-bundle"
it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != "azure-bundle"
) {
dependsOn("${it.name}:build")
from("${it.name}/build/libs")
Expand Down
62 changes: 62 additions & 0 deletions bundles/azure-bundle/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar

plugins {
`maven-publish`
id("java")
alias(libs.plugins.shadow)
}

dependencies {
compileOnly(project(":api"))
compileOnly(project(":core"))
compileOnly(project(":catalogs:catalog-hadoop"))

compileOnly(libs.hadoop3.common)

implementation(libs.commons.lang3)
// runtime used
implementation(libs.commons.logging)
implementation(libs.hadoop3.abs)
implementation(project(":catalogs:catalog-common")) {
exclude("*")
}
}

tasks.withType(ShadowJar::class.java) {
isZip64 = true
configurations = listOf(project.configurations.runtimeClasspath.get())
archiveClassifier.set("")

// Relocate dependencies to avoid conflicts
relocate("org.apache.httpcomponents", "org.apache.gravitino.azure.shaded.org.apache.httpcomponents")
relocate("org.apache.commons", "org.apache.gravitino.azure.shaded.org.apache.commons")
relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml")
relocate("com.google.guava", "org.apache.gravitino.azure.shaded.com.google.guava")
}

tasks.jar {
dependsOn(tasks.named("shadowJar"))
archiveClassifier.set("empty")
}

tasks.compileJava {
dependsOn(":catalogs:catalog-hadoop:runtimeJars")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.gravitino.abs.fs;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import java.io.IOException;
import java.util.Map;
import javax.annotation.Nonnull;
import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider;
import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
import org.apache.gravitino.storage.ABSProperties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class AzureFileSystemProvider implements FileSystemProvider {

@VisibleForTesting public static final String ABS_PROVIDER_SCHEME = "abfss";

@VisibleForTesting public static final String ABS_PROVIDER_NAME = "abs";

private static final String ABFS_IMPL = "org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem";

private static final String ABFS_IMPL_KEY = "fs.abfss.impl";

@Override
public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map<String, String> config)
throws IOException {
Configuration configuration = new Configuration();

Map<String, String> hadoopConfMap =
FileSystemUtils.toHadoopConfigMap(config, ImmutableMap.of());

if (config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)
&& config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)) {
hadoopConfMap.put(
String.format(
"fs.azure.account.key.%s.dfs.core.windows.net",
config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)),
config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY));
}

if (!config.containsKey(ABFS_IMPL_KEY)) {
configuration.set(ABFS_IMPL_KEY, ABFS_IMPL);
}

hadoopConfMap.forEach(configuration::set);

return FileSystem.get(path.toUri(), configuration);
}

@Override
public String scheme() {
return ABS_PROVIDER_SCHEME;
}

@Override
public String name() {
return ABS_PROVIDER_NAME;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

org.apache.gravitino.abs.fs.AzureFileSystemProvider
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.gravitino.storage;

public class ABSProperties {

// The account name of the Azure Blob Storage.
public static final String GRAVITINO_ABS_ACCOUNT_NAME = "abs-account-name";

// The account key of the Azure Blob Storage.
public static final String GRAVITINO_ABS_ACCOUNT_KEY = "abs-account-key";
}
1 change: 1 addition & 0 deletions catalogs/catalog-hadoop/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ dependencies {
testImplementation(project(":bundles:aws-bundle"))
testImplementation(project(":bundles:gcp-bundle"))
testImplementation(project(":bundles:aliyun-bundle"))
testImplementation(project(":bundles:azure-bundle"))

testImplementation(libs.minikdc)
testImplementation(libs.hadoop3.minicluster)
Expand Down
Loading

0 comments on commit 79c362c

Please sign in to comment.