Skip to content

Commit

Permalink
[#3919] feat(catalog-lakehouse-paimon): Support hive backend for Paim…
Browse files Browse the repository at this point in the history
…on Catalog (#5092)

### What changes were proposed in this pull request?
Support hive backend for Paimon Catalog

### Why are the changes needed?

Fix: #3919

### Does this PR introduce _any_ user-facing change?
will add doc in a later pr.

### How was this patch tested?
new UT and IT.

---------

Co-authored-by: caican <[email protected]>
  • Loading branch information
caican00 and caican authored Oct 21, 2024
1 parent 577c166 commit c8c66a4
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 10 deletions.
34 changes: 32 additions & 2 deletions catalogs/catalog-lakehouse-paimon/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,9 @@ dependencies {
exclude("com.sun.jersey")
exclude("javax.servlet")
exclude("org.apache.curator")
exclude("org.apache.hive")
exclude("org.apache.hbase")
exclude("org.apache.zookeeper")
exclude("org.eclipse.jetty.aggregate:jetty-all")
exclude("org.eclipse.jetty.aggregate")
exclude("org.mortbay.jetty")
exclude("org.mortbay.jetty:jetty")
exclude("org.mortbay.jetty:jetty-util")
Expand All @@ -67,9 +66,40 @@ dependencies {
exclude("org.apache.parquet:parquet-encoding")
exclude("org.apache.parquet:parquet-common")
exclude("org.apache.parquet:parquet-hadoop")
exclude("org.apache.parquet:parquet-hadoop-bundle")
exclude("org.apache.paimon:paimon-codegen-loader")
exclude("org.apache.paimon:paimon-shade-caffeine-2")
exclude("org.apache.paimon:paimon-shade-guava-30")
exclude("org.apache.hive:hive-service-rpc")
exclude("org.apache.logging.log4j")
exclude("com.google.guava")
exclude("commons-lang")
exclude("org.slf4j")
exclude("org.apache.orc")
exclude("org.apache.httpcomponents")
exclude("jline")
exclude("org.eclipse.jetty.orbit")
exclude("org.apache.ant")
exclude("com.tdunning")
exclude("io.dropwizard.metrics")
exclude("com.github.joshelser")
exclude("commons-codec")
exclude("commons-cli")
exclude("tomcat")
exclude("org.apache.avro")
exclude("net.sf.opencsv")
exclude("javolution")
exclude("com.jolbox")
exclude("com.zaxxer")
exclude("org.apache.derby")
exclude("org.datanucleus")
exclude("commons-pool")
exclude("commons-dbcp")
exclude("javax.jdo")
exclude("org.antlr")
exclude("co.cask.tephra")
exclude("com.google.code.findbugs")
exclude("com.github.spotbugs")
}
implementation(libs.bundles.log4j)
implementation(libs.commons.lang3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@
/** The type of Apache Paimon catalog backend. */
public enum PaimonCatalogBackend {
FILESYSTEM,
JDBC
JDBC,
HIVE
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
protected String jdbcPassword;
protected Catalog catalog;
protected org.apache.paimon.catalog.Catalog paimonCatalog;
protected SparkSession spark;
protected String metalakeName = GravitinoITUtils.genRandomName("paimon_it_metalake");
protected String catalogName = GravitinoITUtils.genRandomName("paimon_it_catalog");
protected String schemaName = GravitinoITUtils.genRandomName("paimon_it_schema");
Expand All @@ -115,8 +116,8 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
private static final String alertTableName = "alert_table_name";
private static String INSERT_BATCH_WITHOUT_PARTITION_TEMPLATE = "INSERT INTO paimon.%s VALUES %s";
private static final String SELECT_ALL_TEMPLATE = "SELECT * FROM paimon.%s";
private static final String DEFAULT_DB = "default";
private GravitinoMetalake metalake;
protected SparkSession spark;
private Map<String, String> catalogProperties;

@BeforeAll
Expand Down Expand Up @@ -727,7 +728,7 @@ public void testAlterPaimonTable() {
// update column position
Column col1 = Column.of("name", Types.StringType.get(), "comment");
Column col2 = Column.of("address", Types.StringType.get(), "comment");
Column col3 = Column.of("date_of_birth", Types.DateType.get(), "comment");
Column col3 = Column.of("date_of_birth", Types.StringType.get(), "comment");

Column[] newColumns = new Column[] {col1, col2, col3};
NameIdentifier tableIdentifier =
Expand Down Expand Up @@ -874,7 +875,13 @@ void testOperationDataOfPaimonTable() {
private void clearTableAndSchema() {
SupportsSchemas supportsSchema = catalog.asSchemas();
Arrays.stream(supportsSchema.listSchemas())
.forEach(schema -> supportsSchema.dropSchema(schema, true));
.forEach(
schema -> {
// can not drop default database for hive backend.
if (!DEFAULT_DB.equalsIgnoreCase(schema)) {
supportsSchema.dropSchema(schema, true);
}
});
}

private void createMetalake() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.gravitino.catalog.lakehouse.paimon.integration.test;

import com.google.common.collect.Maps;
import java.util.Map;
import org.apache.gravitino.NameIdentifier;
import org.apache.gravitino.Schema;
import org.apache.gravitino.SupportsSchemas;
import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata;
import org.apache.gravitino.integration.test.container.HiveContainer;
import org.apache.gravitino.integration.test.util.GravitinoITUtils;
import org.apache.paimon.catalog.Catalog;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;

@Tag("gravitino-docker-test")
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
public class CatalogPaimonHiveIT extends CatalogPaimonBaseIT {

@Override
protected Map<String, String> initPaimonCatalogProperties() {
Map<String, String> catalogProperties = Maps.newHashMap();
catalogProperties.put("key1", "val1");
catalogProperties.put("key2", "val2");

TYPE = "hive";
WAREHOUSE =
String.format(
"hdfs://%s:%d/user/hive/warehouse-catalog-paimon/",
containerSuite.getHiveContainer().getContainerIpAddress(),
HiveContainer.HDFS_DEFAULTFS_PORT);
URI =
String.format(
"thrift://%s:%d",
containerSuite.getHiveContainer().getContainerIpAddress(),
HiveContainer.HIVE_METASTORE_PORT);

catalogProperties.put(PaimonCatalogPropertiesMetadata.GRAVITINO_CATALOG_BACKEND, TYPE);
catalogProperties.put(PaimonCatalogPropertiesMetadata.WAREHOUSE, WAREHOUSE);
catalogProperties.put(PaimonCatalogPropertiesMetadata.URI, URI);

return catalogProperties;
}

@Test
void testPaimonSchemaProperties() throws Catalog.DatabaseNotExistException {
SupportsSchemas schemas = catalog.asSchemas();

// create schema check.
String testSchemaName = GravitinoITUtils.genRandomName("test_schema_1");
NameIdentifier schemaIdent = NameIdentifier.of(metalakeName, catalogName, testSchemaName);
Map<String, String> schemaProperties = Maps.newHashMap();
schemaProperties.put("key", "hive");
Schema createdSchema =
schemas.createSchema(schemaIdent.name(), schema_comment, schemaProperties);
Assertions.assertEquals(createdSchema.properties().get("key"), "hive");

// load schema check.
Schema schema = schemas.loadSchema(schemaIdent.name());
Assertions.assertEquals(schema.properties().get("key"), "hive");
Map<String, String> loadedProps = paimonCatalog.loadDatabaseProperties(schemaIdent.name());
Assertions.assertEquals(loadedProps.get("key"), "hive");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@
import java.util.function.Consumer;
import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogBackend;
import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConfig;
import org.apache.gravitino.integration.test.container.ContainerSuite;
import org.apache.gravitino.integration.test.container.HiveContainer;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.FileSystemCatalog;
import org.apache.paimon.factories.FactoryException;
import org.apache.paimon.hive.HiveCatalog;
import org.apache.paimon.jdbc.JdbcCatalog;
import org.junit.jupiter.api.Test;

Expand All @@ -44,6 +47,8 @@ void testLoadCatalogBackend() throws Exception {
assertCatalog(PaimonCatalogBackend.FILESYSTEM.name(), FileSystemCatalog.class);
// Test load JdbcCatalog for jdbc metastore.
assertCatalog(PaimonCatalogBackend.JDBC.name(), JdbcCatalog.class);
// Test load HiveCatalog for hive metastore.
assertCatalog(PaimonCatalogBackend.HIVE.name(), HiveCatalog.class);
// Test load catalog exception for other metastore.
assertThrowsExactly(FactoryException.class, () -> assertCatalog("other", catalog -> {}));
}
Expand All @@ -66,7 +71,7 @@ private void assertCatalog(String metastore, Consumer<Catalog> consumer) throws
System.getProperty("java.io.tmpdir"),
"paimon_catalog_warehouse"),
PaimonConfig.CATALOG_URI.getKey(),
"jdbc:h2:mem:testdb",
generateUri(metastore),
PaimonConfig.CATALOG_JDBC_USER.getKey(),
"user",
PaimonConfig.CATALOG_JDBC_PASSWORD.getKey(),
Expand All @@ -75,4 +80,20 @@ private void assertCatalog(String metastore, Consumer<Catalog> consumer) throws
consumer.accept(catalog);
}
}

private static String generateUri(String metastore) {
String uri = "uri";
if (PaimonCatalogBackend.JDBC.name().equalsIgnoreCase(metastore)) {
uri = "jdbc:h2:mem:testdb";
} else if (PaimonCatalogBackend.HIVE.name().equalsIgnoreCase(metastore)) {
ContainerSuite containerSuite = ContainerSuite.getInstance();
containerSuite.startHiveContainer();
uri =
String.format(
"thrift://%s:%d",
containerSuite.getHiveContainer().getContainerIpAddress(),
HiveContainer.HIVE_METASTORE_PORT);
}
return uri;
}
}
8 changes: 5 additions & 3 deletions docs/lakehouse-paimon-catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,16 @@ Builds with Apache Paimon `0.8.0`.

### Catalog capabilities

- Works as a catalog proxy, supporting `FilesystemCatalog` and `JdbcCatalog`.
- Works as a catalog proxy, supporting `FilesystemCatalog`, `JdbcCatalog` and `HiveCatalog`.
- Supports DDL operations for Paimon schemas and tables.

- Doesn't support `HiveCatalog` catalog backend now.
- Doesn't support alterSchema.

### Catalog properties

| Property name | Description | Default value | Required | Since Version |
|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------------------------------------------|-------------------|
| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Supports `filesystem` and `jdbc` now. | (none) | Yes | 0.6.0-incubating |
| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Supports `filesystem`, `jdbc` and `hive`. | (none) | Yes | 0.6.0-incubating |
| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0-incubating |
| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs, `hdfs://namespace/hdfs/path` for HDFS , `s3://{bucket-name}/path/` for S3 or `oss://{bucket-name}/path` for Aliyun OSS | (none) | Yes | 0.6.0-incubating |
| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0-incubating |
Expand All @@ -51,6 +50,9 @@ Builds with Apache Paimon `0.8.0`.
If you want to use the `oss` or `s3` warehouse, you need to place related jars in the `catalogs/lakehouse-paimon/lib` directory, more information can be found in the [Paimon S3](https://paimon.apache.org/docs/master/filesystems/s3/).
:::

:::note
The hive backend does not support the kerberos authentication now.
:::

Any properties not defined by Gravitino with `gravitino.bypass.` prefix will pass to Paimon catalog properties and HDFS configuration. For example, if specify `gravitino.bypass.table.type`, `table.type` will pass to Paimon catalog properties.

Expand Down

0 comments on commit c8c66a4

Please sign in to comment.