Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#478] feat(core, catalogs): Refactor catalog configurations #489

Merged
merged 25 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
1cd1fc9
Refactor config
yuqi1129 Oct 10, 2023
9dd8165
Merge branch 'main' of github.com:datastrato/graviton into issue_478
yuqi1129 Oct 10, 2023
99cf636
Merge branch 'main' of github.com:datastrato/graviton into issue_478
yuqi1129 Oct 11, 2023
37c7947
Changes according to discussion
yuqi1129 Oct 11, 2023
1d051a3
Remove unused code and add some description
yuqi1129 Oct 11, 2023
109a2ad
Add some description
yuqi1129 Oct 11, 2023
805d511
Fix some minor issues
yuqi1129 Oct 11, 2023
2de0e66
Format code
yuqi1129 Oct 11, 2023
69aa11b
Fix discussion one
yuqi1129 Oct 11, 2023
ec70017
Fix discussion about file name and descriptions
yuqi1129 Oct 12, 2023
78941ca
Fix comments
yuqi1129 Oct 12, 2023
9d24e45
Remove used code
yuqi1129 Oct 12, 2023
1f6c235
Remove used code again
yuqi1129 Oct 12, 2023
97d31e8
Change the format of the name for template file
yuqi1129 Oct 12, 2023
249334b
Only configuration files with specific names will be copied to classpath
yuqi1129 Oct 12, 2023
b0432dd
Changes names and some minor logic according to review comments
yuqi1129 Oct 12, 2023
c1b6e33
Remove some incorrect comments
yuqi1129 Oct 12, 2023
6973eaf
Changes name of some variants in `buildLibAndResourcePaths`
yuqi1129 Oct 12, 2023
30ab0ea
Remove unnecessary code
yuqi1129 Oct 12, 2023
299c33c
Separate config and package
yuqi1129 Oct 12, 2023
2677a7c
Remove unused code
yuqi1129 Oct 12, 2023
2473b24
Remove unused code
yuqi1129 Oct 12, 2023
1138d3f
Refactor the logic of getting config file for specific catalog
yuqi1129 Oct 12, 2023
8474e5f
Fix comments by xiaojin
yuqi1129 Oct 12, 2023
7b42c7d
refine code
yuqi1129 Oct 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ tasks {
val outputDir = projectDir.dir("distribution")

val compileDistribution by registering {
dependsOn("copySubprojectDependencies", "copyCatalogLibs", "copySubprojectLib")
dependsOn("copySubprojectDependencies", "copyCatalogLibAndConfigs", "copySubprojectLib")

group = "graviton distribution"
outputs.dir(projectDir.dir("distribution/package"))
Expand Down Expand Up @@ -230,9 +230,9 @@ tasks {
}
}

val copyCatalogLibs by registering(Copy::class) {
dependsOn(":catalogs:catalog-hive:copyCatalogLibs",
":catalogs:catalog-lakehouse-iceberg:copyCatalogLibs")
val copyCatalogLibAndConfigs by registering(Copy::class) {
dependsOn(":catalogs:catalog-hive:copyLibAndConfig",
":catalogs:catalog-lakehouse-iceberg:copyLibAndConfig")
}

clean {
Expand Down
22 changes: 22 additions & 0 deletions catalogs/catalog-hive/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,26 @@ tasks {
from("build/libs")
into("${rootDir}/distribution/package/catalogs/hive/libs")
}

val copyCatalogConfig by registering(Copy::class) {
from("src/main/resources")
yuqi1129 marked this conversation as resolved.
Show resolved Hide resolved
FANNG1 marked this conversation as resolved.
Show resolved Hide resolved
into("${rootDir}/distribution/package/catalogs/hive/conf")

include("hive.conf")
include("hive-site.xml.template")

rename { original -> if (original.endsWith(".template")) {
original.replace(".template", "")
} else {
original
}}

exclude { details ->
details.file.isDirectory()
}
}

val copyLibAndConfig by registering(Copy::class) {
dependsOn(copyCatalogConfig, copyCatalogLibs)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
package com.datastrato.graviton.catalog.hive;

import static com.datastrato.graviton.catalog.BaseCatalog.CATALOG_BYPASS_PREFIX;
import static com.datastrato.graviton.catalog.hive.HiveTable.SUPPORT_TABLE_TYPES;
import static com.datastrato.graviton.catalog.hive.HiveTablePropertiesMetadata.COMMENT;
import static com.datastrato.graviton.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE;
Expand Down Expand Up @@ -63,7 +64,7 @@ public class HiveCatalogOperations implements CatalogOperations, SupportsSchemas

@VisibleForTesting HiveClientPool clientPool;

private HiveConf hiveConf;
@VisibleForTesting HiveConf hiveConf;

private final CatalogEntity entity;

Expand All @@ -90,6 +91,15 @@ public void initialize(Map<String, String> conf) throws RuntimeException {
conf.forEach(hadoopConf::set);
yuqi1129 marked this conversation as resolved.
Show resolved Hide resolved
hiveConf = new HiveConf(hadoopConf, HiveCatalogOperations.class);

// Overwrite hive conf with graviton conf if exists
conf.forEach(
(key, value) -> {
if (key.startsWith(CATALOG_BYPASS_PREFIX)) {
// Trim bypass prefix and pass it to hive conf
hiveConf.set(key.substring(CATALOG_BYPASS_PREFIX.length()), value);
}
});

// todo(xun): add hive client pool size in config
this.clientPool = new HiveClientPool(1, hiveConf);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!--
~ Copyright 2023 Datastrato.
~ This software is licensed under the Apache License version 2.
-->
<configuration>
</configuration>

12 changes: 12 additions & 0 deletions catalogs/catalog-hive/src/main/resources/hive.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# Copyright 2023 Datastrato.
# This software is licensed under the Apache License version 2.
#

## This file holds common properties for hive catalog and metastore, for more, please refer to
## `org.apache.hadoop.hive.conf.HiveConf`

## If we want to specify Hive catalog-related configuration like 'hive.metastore.client.capability.check', we can do it like this:
## graviton.bypass.hive.metastore.client.capability.check = true, and 'graviton.bypass' is the prefix that
## the configuration will be directly by pass to backend engine, and 'hive.metastore.client.capability.check' is the configuration key.
graviton.bypass.hive.metastore.client.capability.check = false
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright 2023 Datastrato.
* This software is licensed under the Apache License version 2.
*/

package com.datastrato.graviton.catalog.hive;

import static com.datastrato.graviton.catalog.BaseCatalog.CATALOG_BYPASS_PREFIX;

import com.google.common.collect.Maps;
import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

class TestHiveCatalogOperations {
@Test
void testInitialize() {
Map<String, String> properties = Maps.newHashMap();
HiveCatalogOperations hiveCatalogOperations = new HiveCatalogOperations(null);
hiveCatalogOperations.initialize(properties);
String v = hiveCatalogOperations.hiveConf.get("mapreduce.job.reduces");
Assertions.assertEquals("10", v);

// Test If we can override the value in hive-site.xml
properties.put(CATALOG_BYPASS_PREFIX + "mapreduce.job.reduces", "20");
hiveCatalogOperations.initialize(properties);
v = hiveCatalogOperations.hiveConf.get("mapreduce.job.reduces");
Assertions.assertEquals("20", v);

// Test If user properties can override the value in hive-site.xml
properties.clear();
properties.put("mapreduce.job.reduces", "30");
hiveCatalogOperations.initialize(properties);
v = hiveCatalogOperations.hiveConf.get("mapreduce.job.reduces");
Assertions.assertEquals("30", v);
}
}
16 changes: 16 additions & 0 deletions catalogs/catalog-hive/src/test/resources/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!--
~ Copyright 2023 Datastrato.
~ This software is licensed under the Apache License version 2.
-->
<configuration>
<property>
<name>hive.metastore.client.capability.check</name>
<value>true</value>
</property>

<property>
FANNG1 marked this conversation as resolved.
Show resolved Hide resolved
<name>mapreduce.job.reduces</name>
<value>10</value>
</property>
</configuration>

23 changes: 23 additions & 0 deletions catalogs/catalog-lakehouse-iceberg/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,27 @@ tasks {
from("build/libs")
into("${rootDir}/distribution/package/catalogs/lakehouse-iceberg/libs")
}

val copyCatalogConfig by registering(Copy::class) {
from("src/main/resources")
into("${rootDir}/distribution/package/catalogs/lakehouse-iceberg/conf")

include("lakehouse-iceberg.conf")
include("hive-site.xml.template")
include("hdfs-site.xml.template")

rename { original -> if (original.endsWith(".template")) {
original.replace(".template", "")
} else {
original
}}

exclude { details ->
details.file.isDirectory()
}
}

val copyLibAndConfig by registering(Copy::class) {
dependsOn(copyCatalogLibs, copyCatalogConfig)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<!--
~ Copyright 2023 Datastrato.
~ This software is licensed under the Apache License version 2.
-->
<configuration>
</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!--
~ Copyright 2023 Datastrato.
~ This software is licensed under the Apache License version 2.
-->
<configuration>
</configuration>

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#
# Copyright 2023 Datastrato.
yuqi1129 marked this conversation as resolved.
Show resolved Hide resolved
# This software is licensed under the Apache License version 2.
#

## This file holds common configurations for Lakehouse-iceberg catalog. The format of the key is
## 'graviton.bypass.{iceberg-inner-config-key}' and `iceberg-inner-config-key` is the
## real key that pass to Lakehouse-iceberg catalog.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Streams;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -79,8 +80,8 @@ public GravitonAuxiliaryService loadAuxService(
}

@VisibleForTesting
public IsolatedClassLoader getIsolatedClassLoader(String classPath) {
return IsolatedClassLoader.buildClassLoader(classPath);
public IsolatedClassLoader getIsolatedClassLoader(List<String> classPaths) {
return IsolatedClassLoader.buildClassLoader(classPaths);
}

@VisibleForTesting
Expand Down Expand Up @@ -115,7 +116,7 @@ private void registerAuxService(String auxServiceName, Map<String, String> confi
classPath = getValidPath(auxServiceName, classPath);
LOG.info("AuxService name:{}, config:{}, classpath:{}", auxServiceName, config, classPath);

IsolatedClassLoader isolatedClassLoader = getIsolatedClassLoader(classPath);
IsolatedClassLoader isolatedClassLoader = getIsolatedClassLoader(Lists.newArrayList(classPath));
try {
GravitonAuxiliaryService gravitonAuxiliaryService =
loadAuxService(auxServiceName, isolatedClassLoader);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The abstract base class for Catalog implementations.
Expand All @@ -27,13 +29,22 @@
public abstract class BaseCatalog<T extends BaseCatalog>
implements Catalog, CatalogProvider, HasPropertyMetadata {

private static final Logger LOG = LoggerFactory.getLogger(BaseCatalog.class);

private CatalogEntity entity;

private Map<String, String> conf;

private volatile CatalogOperations ops;

private volatile Map<String, String> properties;

// Any Graviton configuration that starts with this prefix will be trim and passed to the specific
// catalog implementation. For example, if the configuration is
// "graviton.bypass.hive.metastore.uris",
// then we will trim the prefix and pass "hive.metastore.uris" to the hive client configurations.
public static final String CATALOG_BYPASS_PREFIX = "graviton.bypass.";

/**
* Creates a new instance of CatalogOperations.
*
Expand Down
Loading