From 73eb7be3a9147f79aaa9ccdf69752fe2c5b4abdb Mon Sep 17 00:00:00 2001 From: mchades Date: Fri, 20 Sep 2024 17:24:54 +0800 Subject: [PATCH] add user doc for Hudi catalog --- MAINTAINERS.md | 4 +- .../storage/kv/TestEntityKeyEncoding.java | 3 +- .../storage/kv/TestKvEntityStorage.java | 3 +- .../storage/kv/TestKvGarbageCollector.java | 3 +- .../storage/kv/TestKvNameMappingService.java | 3 +- .../storage/kv/TestRocksDBKvBackend.java | 3 +- .../storage/kv/TestStorageVersion.java | 3 +- .../kv/TestTransactionIdGenerator.java | 3 +- .../kv/TestTransactionalKvBackend.java | 3 +- docs/apache-hive-catalog.md | 2 +- docs/gravitino-server-config.md | 66 +++++------ docs/hadoop-catalog.md | 14 +-- docs/how-to-use-relational-backend-storage.md | 6 +- docs/iceberg-rest-service.md | 18 +-- docs/jdbc-doris-catalog.md | 20 ++-- docs/jdbc-mysql-catalog.md | 42 +++---- docs/jdbc-postgresql-catalog.md | 2 +- docs/lakehouse-hudi-catalog.md | 110 ++++++++++++++++++ docs/lakehouse-iceberg-catalog.md | 58 ++++----- docs/lakehouse-paimon-catalog.md | 24 ++-- ...age-relational-metadata-using-gravitino.md | 30 +++-- 21 files changed, 272 insertions(+), 148 deletions(-) create mode 100644 docs/lakehouse-hudi-catalog.md diff --git a/MAINTAINERS.md b/MAINTAINERS.md index f9b693345dc..5c8a466cef5 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -61,8 +61,8 @@ something like that, all PRs should have related issues. 6. After PR is merged, please check the related issue: - If the issue is not closed, please close it as fixed manually. - Assign the issue "Assignees" to the PR author. - - Starting from 0.6.0, we will use the "labels" to manage the release versions, so please add - the corresponding labels to the issue. For example, if the issue is fixed in 0.6.0, please + - Starting from 0.6.0-incubating, we will use the "labels" to manage the release versions, so please add + the corresponding labels to the issue. For example, if the issue is fixed in 0.6.0-incubating, please add the label "0.6.0". If the issue is fixed both in 0.6.0 and 0.5.1, please add both labels. ## Policy on backporting bug fixes diff --git a/core/src/test/java/org/apache/gravitino/storage/kv/TestEntityKeyEncoding.java b/core/src/test/java/org/apache/gravitino/storage/kv/TestEntityKeyEncoding.java index dcf16f6884c..720af031695 100644 --- a/core/src/test/java/org/apache/gravitino/storage/kv/TestEntityKeyEncoding.java +++ b/core/src/test/java/org/apache/gravitino/storage/kv/TestEntityKeyEncoding.java @@ -56,7 +56,8 @@ import org.mockito.Mockito; @TestInstance(Lifecycle.PER_CLASS) -@Disabled("Gravitino will not support KV entity store since 0.6.0, so we disable this test.") +@Disabled( + "Gravitino will not support KV entity store since 0.6.0-incubating, so we disable this test.") public class TestEntityKeyEncoding { private Config getConfig() throws IOException { File baseDir = new File(System.getProperty("java.io.tmpdir")); diff --git a/core/src/test/java/org/apache/gravitino/storage/kv/TestKvEntityStorage.java b/core/src/test/java/org/apache/gravitino/storage/kv/TestKvEntityStorage.java index 75c3f30ba7c..b731696e288 100644 --- a/core/src/test/java/org/apache/gravitino/storage/kv/TestKvEntityStorage.java +++ b/core/src/test/java/org/apache/gravitino/storage/kv/TestKvEntityStorage.java @@ -63,7 +63,8 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; -@Disabled("Gravitino will not support KV entity store since 0.6.0, so we disable this test.") +@Disabled( + "Gravitino will not support KV entity store since 0.6.0-incubating, so we disable this test.") public class TestKvEntityStorage extends TestEntityStorage { @BeforeEach @AfterEach diff --git a/core/src/test/java/org/apache/gravitino/storage/kv/TestKvGarbageCollector.java b/core/src/test/java/org/apache/gravitino/storage/kv/TestKvGarbageCollector.java index 457ecdae9ba..eee6f42aa89 100644 --- a/core/src/test/java/org/apache/gravitino/storage/kv/TestKvGarbageCollector.java +++ b/core/src/test/java/org/apache/gravitino/storage/kv/TestKvGarbageCollector.java @@ -64,7 +64,8 @@ import org.mockito.Mockito; @SuppressWarnings("DefaultCharset") -@Disabled("Gravitino will not support KV entity store since 0.6.0, so we disable this test.") +@Disabled( + "Gravitino will not support KV entity store since 0.6.0-incubating, so we disable this test.") class TestKvGarbageCollector { public Config getConfig() throws IOException { Config config = Mockito.mock(Config.class); diff --git a/core/src/test/java/org/apache/gravitino/storage/kv/TestKvNameMappingService.java b/core/src/test/java/org/apache/gravitino/storage/kv/TestKvNameMappingService.java index 6736e8b8879..d9c85311a30 100644 --- a/core/src/test/java/org/apache/gravitino/storage/kv/TestKvNameMappingService.java +++ b/core/src/test/java/org/apache/gravitino/storage/kv/TestKvNameMappingService.java @@ -41,7 +41,8 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; -@Disabled("Gravitino will not support KV entity store since 0.6.0, so we disable this test.") +@Disabled( + "Gravitino will not support KV entity store since 0.6.0-incubating, so we disable this test.") public class TestKvNameMappingService { private Config getConfig() throws IOException { File baseDir = new File(System.getProperty("java.io.tmpdir")); diff --git a/core/src/test/java/org/apache/gravitino/storage/kv/TestRocksDBKvBackend.java b/core/src/test/java/org/apache/gravitino/storage/kv/TestRocksDBKvBackend.java index 123f153a805..77b03beeb29 100644 --- a/core/src/test/java/org/apache/gravitino/storage/kv/TestRocksDBKvBackend.java +++ b/core/src/test/java/org/apache/gravitino/storage/kv/TestRocksDBKvBackend.java @@ -36,7 +36,8 @@ import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; -@Disabled("Gravitino will not support KV entity store since 0.6.0, so we disable this test.") +@Disabled( + "Gravitino will not support KV entity store since 0.6.0-incubating, so we disable this test.") public class TestRocksDBKvBackend { private KvBackend getKvBackEnd() throws IOException { diff --git a/core/src/test/java/org/apache/gravitino/storage/kv/TestStorageVersion.java b/core/src/test/java/org/apache/gravitino/storage/kv/TestStorageVersion.java index 84e0713edeb..2290d223721 100644 --- a/core/src/test/java/org/apache/gravitino/storage/kv/TestStorageVersion.java +++ b/core/src/test/java/org/apache/gravitino/storage/kv/TestStorageVersion.java @@ -41,7 +41,8 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; -@Disabled("Gravitino will not support KV entity store since 0.6.0, so we disable this test.") +@Disabled( + "Gravitino will not support KV entity store since 0.6.0-incubating, so we disable this test.") class TestStorageVersion { @Test diff --git a/core/src/test/java/org/apache/gravitino/storage/kv/TestTransactionIdGenerator.java b/core/src/test/java/org/apache/gravitino/storage/kv/TestTransactionIdGenerator.java index 70330d48f0c..729dbc5f150 100644 --- a/core/src/test/java/org/apache/gravitino/storage/kv/TestTransactionIdGenerator.java +++ b/core/src/test/java/org/apache/gravitino/storage/kv/TestTransactionIdGenerator.java @@ -42,7 +42,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@Disabled("Gravitino will not support KV entity store since 0.6.0, so we disable this test.") +@Disabled( + "Gravitino will not support KV entity store since 0.6.0-incubating, so we disable this test.") public class TestTransactionIdGenerator { private static final Logger LOGGER = LoggerFactory.getLogger(TestTransactionalKvBackend.class); diff --git a/core/src/test/java/org/apache/gravitino/storage/kv/TestTransactionalKvBackend.java b/core/src/test/java/org/apache/gravitino/storage/kv/TestTransactionalKvBackend.java index 2528c78ec96..f5001767809 100644 --- a/core/src/test/java/org/apache/gravitino/storage/kv/TestTransactionalKvBackend.java +++ b/core/src/test/java/org/apache/gravitino/storage/kv/TestTransactionalKvBackend.java @@ -54,7 +54,8 @@ import org.slf4j.LoggerFactory; @SuppressWarnings("DefaultCharset") -@Disabled("Gravitino will not support KV entity store since 0.6.0, so we disable this test.") +@Disabled( + "Gravitino will not support KV entity store since 0.6.0-incubating, so we disable this test.") class TestTransactionalKvBackend { private static final Logger LOGGER = LoggerFactory.getLogger(TestTransactionalKvBackend.class); diff --git a/docs/apache-hive-catalog.md b/docs/apache-hive-catalog.md index 8dd6ed09467..2d4f98d0faf 100644 --- a/docs/apache-hive-catalog.md +++ b/docs/apache-hive-catalog.md @@ -125,7 +125,7 @@ The following table lists the data types mapped from the Hive catalog to Graviti | `uniontype` | `uniontype` | 0.2.0 | :::info -Since 0.6.0, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type from the Hive catalog. +Since 0.6.0-incubating, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type from the Hive catalog. ::: ### Table properties diff --git a/docs/gravitino-server-config.md b/docs/gravitino-server-config.md index 4cc1dfe3c2b..f096a6801f8 100644 --- a/docs/gravitino-server-config.md +++ b/docs/gravitino-server-config.md @@ -23,20 +23,20 @@ The `gravitino.conf` file lists the configuration items in the following table. ### Apache Gravitino HTTP Server configuration -| Configuration item | Description | Default value | Required | Since version | -|------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------|----------|---------------| -| `gravitino.server.webserver.host` | The host of the Gravitino server. | `0.0.0.0` | No | 0.1.0 | -| `gravitino.server.webserver.httpPort` | The port on which the Gravitino server listens for incoming connections. | `8090` | No | 0.1.0 | -| `gravitino.server.webserver.minThreads` | The minimum number of threads in the thread pool used by the Jetty webserver. `minThreads` is 8 if the value is less than 8. | `Math.max(Math.min(Runtime.getRuntime().availableProcessors() * 2, 100), 8)` | No | 0.2.0 | -| `gravitino.server.webserver.maxThreads` | The maximum number of threads in the thread pool used by the Jetty webserver. `maxThreads` is 8 if the value is less than 8, and `maxThreads` must be great or equal to `minThreads`. | `Math.max(Runtime.getRuntime().availableProcessors() * 4, 400)` | No | 0.1.0 | -| `gravitino.server.webserver.threadPoolWorkQueueSize` | The size of the queue in the thread pool used by the Jetty webserver. | `100` | No | 0.1.0 | -| `gravitino.server.webserver.stopTimeout` | Time in milliseconds to gracefully shut down the Jetty webserver, for more, please see `org.eclipse.jetty.server.Server#setStopTimeout`. | `30000` | No | 0.2.0 | -| `gravitino.server.webserver.idleTimeout` | The timeout in milliseconds of idle connections. | `30000` | No | 0.2.0 | -| `gravitino.server.webserver.requestHeaderSize` | Maximum size of HTTP requests. | `131072` | No | 0.1.0 | -| `gravitino.server.webserver.responseHeaderSize` | Maximum size of HTTP responses. | `131072` | No | 0.1.0 | -| `gravitino.server.shutdown.timeout` | Time in milliseconds to gracefully shut down of the Gravitino webserver. | `3000` | No | 0.2.0 | -| `gravitino.server.webserver.customFilters` | Comma-separated list of filter class names to apply to the API. | (none) | No | 0.4.0 | -| `gravitino.server.rest.extensionPackages` | Comma-separated list of REST API packages to expand | (none) | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since version | +|------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------|----------|------------------| +| `gravitino.server.webserver.host` | The host of the Gravitino server. | `0.0.0.0` | No | 0.1.0 | +| `gravitino.server.webserver.httpPort` | The port on which the Gravitino server listens for incoming connections. | `8090` | No | 0.1.0 | +| `gravitino.server.webserver.minThreads` | The minimum number of threads in the thread pool used by the Jetty webserver. `minThreads` is 8 if the value is less than 8. | `Math.max(Math.min(Runtime.getRuntime().availableProcessors() * 2, 100), 8)` | No | 0.2.0 | +| `gravitino.server.webserver.maxThreads` | The maximum number of threads in the thread pool used by the Jetty webserver. `maxThreads` is 8 if the value is less than 8, and `maxThreads` must be great or equal to `minThreads`. | `Math.max(Runtime.getRuntime().availableProcessors() * 4, 400)` | No | 0.1.0 | +| `gravitino.server.webserver.threadPoolWorkQueueSize` | The size of the queue in the thread pool used by the Jetty webserver. | `100` | No | 0.1.0 | +| `gravitino.server.webserver.stopTimeout` | Time in milliseconds to gracefully shut down the Jetty webserver, for more, please see `org.eclipse.jetty.server.Server#setStopTimeout`. | `30000` | No | 0.2.0 | +| `gravitino.server.webserver.idleTimeout` | The timeout in milliseconds of idle connections. | `30000` | No | 0.2.0 | +| `gravitino.server.webserver.requestHeaderSize` | Maximum size of HTTP requests. | `131072` | No | 0.1.0 | +| `gravitino.server.webserver.responseHeaderSize` | Maximum size of HTTP responses. | `131072` | No | 0.1.0 | +| `gravitino.server.shutdown.timeout` | Time in milliseconds to gracefully shut down of the Gravitino webserver. | `3000` | No | 0.2.0 | +| `gravitino.server.webserver.customFilters` | Comma-separated list of filter class names to apply to the API. | (none) | No | 0.4.0 | +| `gravitino.server.rest.extensionPackages` | Comma-separated list of REST API packages to expand | (none) | No | 0.6.0-incubating | The filter in the customFilters should be a standard javax servlet filter. You can also specify filter parameters by setting configuration entries of the form `gravitino.server.webserver..param.=`. @@ -50,20 +50,20 @@ If you are going to use H2 in the production environment, Gravitino will not gua The following table lists the storage configuration items: -| Configuration item | Description | Default value | Required | Since version | -|---------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------|--------------------------------------------------|---------------| -| `gravitino.entity.store` | Which entity storage implementation to use. Only`relational` storage is currently supported. | `relational` | No | 0.1.0 | -| `gravitino.entity.serde` | The serialization/deserialization class used to support entity storage. `proto' is currently supported. | `proto` | No | 0.1.0 | -| `gravitino.entity.store.maxTransactionSkewTimeMs` | The maximum skew time of transactions in milliseconds. | `2000` | No | 0.3.0 | -| `gravitino.entity.store.kv.deleteAfterTimeMs` | It is deprecated since Gravitino 0.5.0. Please use `gravitino.entity.store.deleteAfterTimeMs` instead. | `604800000`(7 days) | No | 0.3.0 | -| `gravitino.entity.store.deleteAfterTimeMs` | The maximum time in milliseconds that deleted and old-version data is kept. Set to at least 10 minutes and no longer than 30 days. | `604800000`(7 days) | No | 0.5.0 | -| `gravitino.entity.store.versionRetentionCount` | The Count of versions allowed to be retained, including the current version, used to delete old versions data. Set to at least 1 and no greater than 10. | `1` | No | 0.5.0 | -| `gravitino.entity.store.relational` | Detailed implementation of Relational storage. `H2`, `MySQL` and `PostgreSQL` is currently supported, and the implementation is `JDBCBackend`. | `JDBCBackend` | No | 0.5.0 | -| `gravitino.entity.store.relational.jdbcUrl` | The database url that the `JDBCBackend` needs to connect to. If you use `MySQL` or `PostgreSQL`, you should firstly initialize the database tables yourself by executing the ddl scripts in the `${GRAVITINO_HOME}/scripts/{DATABASE_TYPE}/` directory. | `jdbc:h2` | No | 0.5.0 | -| `gravitino.entity.store.relational.jdbcDriver` | The jdbc driver name that the `JDBCBackend` needs to use. You should place the driver Jar package in the `${GRAVITINO_HOME}/libs/` directory. | `org.h2.Driver` | Yes if the jdbc connection url is not `jdbc:h2` | 0.5.0 | -| `gravitino.entity.store.relational.jdbcUser` | The username that the `JDBCBackend` needs to use when connecting the database. It is required for `MySQL`. | `gravitino` | Yes if the jdbc connection url is not `jdbc:h2` | 0.5.0 | -| `gravitino.entity.store.relational.jdbcPassword` | The password that the `JDBCBackend` needs to use when connecting the database. It is required for `MySQL`. | `gravitino` | Yes if the jdbc connection url is not `jdbc:h2` | 0.5.0 | -| `gravitino.entity.store.relational.storagePath` | The storage path for embedded JDBC storage implementation. It supports both absolute and relative path, if the value is a relative path, the final path is `${GRAVITINO_HOME}/${PATH_YOU_HAVA_SET}`, default value is `${GRAVITINO_HOME}/data/jdbc` | `${GRAVITINO_HOME}/data/jdbc` | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since version | +|---------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------|-------------------------------------------------|------------------| +| `gravitino.entity.store` | Which entity storage implementation to use. Only`relational` storage is currently supported. | `relational` | No | 0.1.0 | +| `gravitino.entity.serde` | The serialization/deserialization class used to support entity storage. `proto' is currently supported. | `proto` | No | 0.1.0 | +| `gravitino.entity.store.maxTransactionSkewTimeMs` | The maximum skew time of transactions in milliseconds. | `2000` | No | 0.3.0 | +| `gravitino.entity.store.kv.deleteAfterTimeMs` | It is deprecated since Gravitino 0.5.0. Please use `gravitino.entity.store.deleteAfterTimeMs` instead. | `604800000`(7 days) | No | 0.3.0 | +| `gravitino.entity.store.deleteAfterTimeMs` | The maximum time in milliseconds that deleted and old-version data is kept. Set to at least 10 minutes and no longer than 30 days. | `604800000`(7 days) | No | 0.5.0 | +| `gravitino.entity.store.versionRetentionCount` | The Count of versions allowed to be retained, including the current version, used to delete old versions data. Set to at least 1 and no greater than 10. | `1` | No | 0.5.0 | +| `gravitino.entity.store.relational` | Detailed implementation of Relational storage. `H2`, `MySQL` and `PostgreSQL` is currently supported, and the implementation is `JDBCBackend`. | `JDBCBackend` | No | 0.5.0 | +| `gravitino.entity.store.relational.jdbcUrl` | The database url that the `JDBCBackend` needs to connect to. If you use `MySQL` or `PostgreSQL`, you should firstly initialize the database tables yourself by executing the ddl scripts in the `${GRAVITINO_HOME}/scripts/{DATABASE_TYPE}/` directory. | `jdbc:h2` | No | 0.5.0 | +| `gravitino.entity.store.relational.jdbcDriver` | The jdbc driver name that the `JDBCBackend` needs to use. You should place the driver Jar package in the `${GRAVITINO_HOME}/libs/` directory. | `org.h2.Driver` | Yes if the jdbc connection url is not `jdbc:h2` | 0.5.0 | +| `gravitino.entity.store.relational.jdbcUser` | The username that the `JDBCBackend` needs to use when connecting the database. It is required for `MySQL`. | `gravitino` | Yes if the jdbc connection url is not `jdbc:h2` | 0.5.0 | +| `gravitino.entity.store.relational.jdbcPassword` | The password that the `JDBCBackend` needs to use when connecting the database. It is required for `MySQL`. | `gravitino` | Yes if the jdbc connection url is not `jdbc:h2` | 0.5.0 | +| `gravitino.entity.store.relational.storagePath` | The storage path for embedded JDBC storage implementation. It supports both absolute and relative path, if the value is a relative path, the final path is `${GRAVITINO_HOME}/${PATH_YOU_HAVA_SET}`, default value is `${GRAVITINO_HOME}/data/jdbc` | `${GRAVITINO_HOME}/data/jdbc` | No | 0.6.0-incubating | :::caution @@ -171,11 +171,11 @@ These rules only apply to the catalog properties and don't affect the schema or Below is a list of catalog properties that will be used by all Gravitino catalogs: -| Configuration item | Description | Default value | Required | Since version | -|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `package` | The path of the catalog package, Gravitino leverages this path to load the related catalog libs and configurations. The package should consist two folders, `conf` (for catalog related configurations) and `libs` (for catalog related dependencies/jars) | (none) | No | 0.5.0 | -| `cloud.name` | The property to specify the cloud that the catalog is running on. The valid values are `aws`, `azure`, `gcp`, `on_premise` and `other`. | (none) | No | 0.6.0 | -| `cloud.region-code` | The property to specify the region code of the cloud that the catalog is running on. | (none) | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since version | +|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `package` | The path of the catalog package, Gravitino leverages this path to load the related catalog libs and configurations. The package should consist two folders, `conf` (for catalog related configurations) and `libs` (for catalog related dependencies/jars) | (none) | No | 0.5.0 | +| `cloud.name` | The property to specify the cloud that the catalog is running on. The valid values are `aws`, `azure`, `gcp`, `on_premise` and `other`. | (none) | No | 0.6.0-incubating | +| `cloud.region-code` | The property to specify the region code of the cloud that the catalog is running on. | (none) | No | 0.6.0-incubating | The following table lists the catalog specific properties and their default paths: diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index ca552091c7e..d6706ff3e1b 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -59,13 +59,13 @@ The Hadoop catalog supports creating, updating, deleting, and listing schema. ### Schema properties -| Property name | Description | Default value | Required | Since Version | -|----------------------------------------------------|----------------------------------------------------------------------------------------------------------------|---------------------------|----------|-----------------| -| `location` | The storage location managed by Hadoop schema. | (none) | No | 0.5.0 | -| `authentication.impersonation-enable` | Whether to enable impersonation for this schema of the Hadoop catalog. | The parent(catalog) value | No | 0.6.0 | -| `authentication.type` | The type of authentication for this schema of Hadoop catalog , currently we only support `kerberos`, `simple`. | The parent(catalog) value | No | 0.6.0 | -| `authentication.kerberos.principal` | The principal of the Kerberos authentication for this schema. | The parent(catalog) value | No | 0.6.0 | -| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication for this scheam. | The parent(catalog) value | No | 0.6.0 | +| Property name | Description | Default value | Required | Since Version | +|---------------------------------------|----------------------------------------------------------------------------------------------------------------|---------------------------|----------|------------------| +| `location` | The storage location managed by Hadoop schema. | (none) | No | 0.5.0 | +| `authentication.impersonation-enable` | Whether to enable impersonation for this schema of the Hadoop catalog. | The parent(catalog) value | No | 0.6.0-incubating | +| `authentication.type` | The type of authentication for this schema of Hadoop catalog , currently we only support `kerberos`, `simple`. | The parent(catalog) value | No | 0.6.0-incubating | +| `authentication.kerberos.principal` | The principal of the Kerberos authentication for this schema. | The parent(catalog) value | No | 0.6.0-incubating | +| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication for this scheam. | The parent(catalog) value | No | 0.6.0-incubating | ### Schema operations diff --git a/docs/how-to-use-relational-backend-storage.md b/docs/how-to-use-relational-backend-storage.md index f3be053e711..e934fea4f25 100644 --- a/docs/how-to-use-relational-backend-storage.md +++ b/docs/how-to-use-relational-backend-storage.md @@ -6,8 +6,8 @@ license: "This software is licensed under the Apache License version 2." ## Introduction -Before the version `0.6.0`, Apache Gravitino supports KV and Relational backend storage to store metadata. -Since 0.6.0, Gravitino only supports using RDBMS as relational backend storage to store metadata. This doc will guide you on how to use the +Before the version `0.6.0-incubating`, Apache Gravitino supports KV and Relational backend storage to store metadata. +Since 0.6.0-incubating, Gravitino only supports using RDBMS as relational backend storage to store metadata. This doc will guide you on how to use the relational backend storage in Gravitino. Relational backend storage mainly aims to the users who are accustomed to using RDBMS to @@ -45,7 +45,7 @@ ${GRAVITINO_HOME}/scripts/mysql/ ``` The script name is like `schema-{version}-mysql.sql`, and the `version` depends on your Gravitino version. -For example, if your Gravitino version is `0.6.0`, then you can choose the **latest version** script. +For example, if your Gravitino version is `0.6.0-incubating`, then you can choose the **latest version** script. If you used a legacy script, you can use `upgrade-{old version}-to-{new version}-mysql.sql` to upgrade the schema. ### Step 2: Initialize the database diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index a5760118cc1..f6b42a80d75 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -49,7 +49,7 @@ For detailed instructions on how to build and install the Gravitino server packa There are distinct configuration files for standalone and auxiliary server: `gravitino-iceberg-rest-server.conf` is used for the standalone server, while `gravitino.conf` is for the auxiliary server. Although the configuration files differ, the configuration items remain the same. -Starting with version `0.6.0`, the prefix `gravitino.auxService.iceberg-rest.` for auxiliary server configurations has been deprecated. If both `gravitino.auxService.iceberg-rest.key` and `gravitino.iceberg-rest.key` are present, the latter will take precedence. The configurations listed below use the `gravitino.iceberg-rest.` prefix. +Starting with version `0.6.0-incubating`, the prefix `gravitino.auxService.iceberg-rest.` for auxiliary server configurations has been deprecated. If both `gravitino.auxService.iceberg-rest.key` and `gravitino.iceberg-rest.key` are present, the latter will take precedence. The configurations listed below use the `gravitino.iceberg-rest.` prefix. ### Configuration to enable Iceberg REST service in Gravitino server. @@ -88,14 +88,14 @@ Gravitino Iceberg REST server supports OAuth2 and HTTPS, please refer to [Securi For JDBC backend, you can use the `gravitino.iceberg-rest.jdbc.user` and `gravitino.iceberg-rest.jdbc.password` to authenticate the JDBC connection. For Hive backend, you can use the `gravitino.iceberg-rest.authentication.type` to specify the authentication type, and use the `gravitino.iceberg-rest.authentication.kerberos.principal` and `gravitino.iceberg-rest.authentication.kerberos.keytab-uri` to authenticate the Kerberos connection. The detailed configuration items are as follows: -| Configuration item | Description | Default value | Required | Since Version | -|---------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------------------------------------------------------------|---------------| -| `gravitino.iceberg-rest.authentication.type` | The type of authentication for Iceberg rest catalog backend. This configuration only applicable for for Hive backend, and only supports `Kerberos`, `simple` currently. As for JDBC backend, only username/password authentication was supported now. | `simple` | No | 0.6.0 | -| `gravitino.iceberg-rest.authentication.impersonation-enable` | Whether to enable impersonation for the Iceberg catalog | `false` | No | 0.6.0 | -| `gravitino.iceberg-rest.authentication.kerberos.principal` | The principal of the Kerberos authentication | (none) | required if the value of `gravitino.iceberg-rest.authentication.type` is Kerberos. | 0.6.0 | -| `gravitino.iceberg-rest.authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `gravitino.iceberg-rest.authentication.type` is Kerberos. | 0.6.0 | -| `gravitino.iceberg-rest.authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Iceberg catalog. | 60 | No | 0.6.0 | -| `gravitino.iceberg-rest.authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|---------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------------------------------------------------------------|------------------| +| `gravitino.iceberg-rest.authentication.type` | The type of authentication for Iceberg rest catalog backend. This configuration only applicable for for Hive backend, and only supports `Kerberos`, `simple` currently. As for JDBC backend, only username/password authentication was supported now. | `simple` | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.authentication.impersonation-enable` | Whether to enable impersonation for the Iceberg catalog | `false` | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.authentication.kerberos.principal` | The principal of the Kerberos authentication | (none) | required if the value of `gravitino.iceberg-rest.authentication.type` is Kerberos. | 0.6.0-incubating | +| `gravitino.iceberg-rest.authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `gravitino.iceberg-rest.authentication.type` is Kerberos. | 0.6.0-incubating | +| `gravitino.iceberg-rest.authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Iceberg catalog. | 60 | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0-incubating | ### Storage diff --git a/docs/jdbc-doris-catalog.md b/docs/jdbc-doris-catalog.md index 560f0baaead..872ca904b41 100644 --- a/docs/jdbc-doris-catalog.md +++ b/docs/jdbc-doris-catalog.md @@ -41,16 +41,16 @@ more details. Besides the [common catalog properties](./gravitino-server-config.md#gravitino-catalog-properties-configuration), the Doris catalog has the following properties: -| Configuration item | Description | Default value | Required | Since Version | -|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `jdbc-url` | JDBC URL for connecting to the database. For example, `jdbc:mysql://localhost:9030` | (none) | Yes | 0.5.0 | -| `jdbc-driver` | The driver of the JDBC connection. For example, `com.mysql.jdbc.Driver`. | (none) | Yes | 0.5.0 | -| `jdbc-user` | The JDBC user name. | (none) | Yes | 0.5.0 | -| `jdbc-password` | The JDBC password. | (none) | Yes | 0.5.0 | -| `jdbc.pool.min-size` | The minimum number of connections in the pool. `2` by default. | `2` | No | 0.5.0 | -| `jdbc.pool.max-size` | The maximum number of connections in the pool. `10` by default. | `10` | No | 0.5.0 | -| `jdbc.pool.max-size` | The maximum number of connections in the pool. `10` by default. | `10` | No | 0.5.0 | -| `replication_num` | The number of replications for the table. If not specified and the number of backend servers less than 3, then the default value is 1; If not specified and the number of backend servers greater or equals to 3, the default value (3) in Doris server will be used. For more, please see the [doc](https://doris.apache.org/docs/1.2/sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-TABLE/) | `1` or `3` | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `jdbc-url` | JDBC URL for connecting to the database. For example, `jdbc:mysql://localhost:9030` | (none) | Yes | 0.5.0 | +| `jdbc-driver` | The driver of the JDBC connection. For example, `com.mysql.jdbc.Driver`. | (none) | Yes | 0.5.0 | +| `jdbc-user` | The JDBC user name. | (none) | Yes | 0.5.0 | +| `jdbc-password` | The JDBC password. | (none) | Yes | 0.5.0 | +| `jdbc.pool.min-size` | The minimum number of connections in the pool. `2` by default. | `2` | No | 0.5.0 | +| `jdbc.pool.max-size` | The maximum number of connections in the pool. `10` by default. | `10` | No | 0.5.0 | +| `jdbc.pool.max-size` | The maximum number of connections in the pool. `10` by default. | `10` | No | 0.5.0 | +| `replication_num` | The number of replications for the table. If not specified and the number of backend servers less than 3, then the default value is 1; If not specified and the number of backend servers greater or equals to 3, the default value (3) in Doris server will be used. For more, please see the [doc](https://doris.apache.org/docs/1.2/sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-TABLE/) | `1` or `3` | No | 0.6.0-incubating | Before using the Doris Catalog, you must download the corresponding JDBC driver to the `catalogs/jdbc-doris/libs` directory. Gravitino doesn't package the JDBC driver for Doris due to licensing issues. diff --git a/docs/jdbc-mysql-catalog.md b/docs/jdbc-mysql-catalog.md index f437f0c9fee..58042188c21 100644 --- a/docs/jdbc-mysql-catalog.md +++ b/docs/jdbc-mysql-catalog.md @@ -87,30 +87,30 @@ Refer to [Manage Relational Metadata Using Gravitino](./manage-relational-metada #### Table column types -| Gravitino Type | MySQL Type | -|------------------|---------------------| -| `Byte` | `Tinyint` | -| `Byte(false)` | `Tinyint Unsigned` | -| `Short` | `Smallint` | -| `Short(false)` | `Smallint Unsigned` | -| `Integer` | `Int` | -| `Integer(false)` | `Int Unsigned` | -| `Long` | `Bigint` | -| `Long(false)` | `Bigint Unsigned` | -| `Float` | `Float` | -| `Double` | `Double` | -| `String` | `Text` | -| `Date` | `Date` | -| `Time` | `Time` | -| `Timestamp` | `Timestamp` | -| `Decimal` | `Decimal` | -| `VarChar` | `VarChar` | -| `FixedChar` | `FixedChar` | -| `Binary` | `Binary` | +| Gravitino Type | MySQL Type | +|--------------------|---------------------| +| `Byte` | `Tinyint` | +| `Unsigned Byte` | `Tinyint Unsigned` | +| `Short` | `Smallint` | +| `Unsigned Short` | `Smallint Unsigned` | +| `Integer` | `Int` | +| `Unsigned Integer` | `Int Unsigned` | +| `Long` | `Bigint` | +| `Unsigned Long` | `Bigint Unsigned` | +| `Float` | `Float` | +| `Double` | `Double` | +| `String` | `Text` | +| `Date` | `Date` | +| `Time` | `Time` | +| `Timestamp` | `Timestamp` | +| `Decimal` | `Decimal` | +| `VarChar` | `VarChar` | +| `FixedChar` | `FixedChar` | +| `Binary` | `Binary` | :::info MySQL doesn't support Gravitino `Boolean` `Fixed` `Struct` `List` `Map` `Timestamp_tz` `IntervalDay` `IntervalYear` `Union` `UUID` type. -Meanwhile, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type since 0.6.0. +Meanwhile, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type since 0.6.0-incubating. ::: #### Table column auto-increment diff --git a/docs/jdbc-postgresql-catalog.md b/docs/jdbc-postgresql-catalog.md index 6954ae67379..6550a024271 100644 --- a/docs/jdbc-postgresql-catalog.md +++ b/docs/jdbc-postgresql-catalog.md @@ -111,7 +111,7 @@ Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational :::info PostgreSQL doesn't support Gravitino `Fixed` `Struct` `Map` `IntervalDay` `IntervalYear` `Union` `UUID` type. -Meanwhile, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type since 0.6.0. +Meanwhile, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type since 0.6.0-incubating. ::: #### Table column auto-increment diff --git a/docs/lakehouse-hudi-catalog.md b/docs/lakehouse-hudi-catalog.md new file mode 100644 index 00000000000..be6d328bfb4 --- /dev/null +++ b/docs/lakehouse-hudi-catalog.md @@ -0,0 +1,110 @@ +--- +title: "Hudi catalog" +slug: /lakehouse-hudi-catalog +keywords: + - lakehouse + - hudi + - metadata +license: "This software is licensed under the Apache License version 2." +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Introduction + +Apache Gravitino provides the ability to manage Apache Hudi metadata. + +### Requirements and limitations + +:::info +Tested and verified with Apache Hudi `0.15.0`. +::: + +## Catalog + +### Catalog capabilities + +- Works as a catalog proxy, supporting `HMS` as catalog backend. +- Only support read operations (list and load) for Hudi schemas and tables. +- Doesn't support timeline management operations now. + +### Catalog properties + +| Property name | Description | Default value | Required | Since Version | +|------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `catalog-backend` | Catalog backend of Gravitino Hudi catalog. Only supports `hms` now. | (none) | Yes | 0.7.0-incubating | +| `uri` | The URI associated with the backend. Such as `thrift://127.0.0.1:9083` for HMS backend. | (none) | Yes | 0.7.0-incubating | +| `client.pool-size` | For HMS backend. The maximum number of Hive metastore clients in the pool for Gravitino. | 1 | No | 0.7.0-incubating | +| `client.pool-cache.eviction-interval-ms` | For HMS backend. The cache pool eviction interval. | 300000 | No | 0.7.0-incubating | +| `gravitino.bypass.` | Property name with this prefix passed down to the underlying backend client for use. Such as `gravitino.bypass.hive.metastore.failure.retries = 3` indicate 3 times of retries upon failure of Thrift metastore calls for HMS backend. | (none) | No | 0.7.0-incubating | + +### Catalog operations + +Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational-metadata-using-gravitino.md#catalog-operations) for more details. + +## Schema + +### Schema capabilities + +- Only support read operations: listSchema, loadSchema, and schemaExists. + +### Schema properties + +- The `Location` is an optional property that shows the storage path to the Hudi database + +### Schema operations + +Only support read operations: listSchema, loadSchema, and schemaExists. +Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational-metadata-using-gravitino.md#schema-operations) for more details. + +## Table + +### Table capabilities + +- Only support read operations: listTable, loadTable, and tableExists. + +### Table partitions + +- Support loading Hudi partitioned tables (Hudi only supports identity partitioning). + +### Table sort orders + +- Doesn't support table sort orders. + +### Table distributions + +- Doesn't support table distributions. + +### Table indexes + +- Doesn't support table indexes. + +### Table properties + +- For HMS backend, it will bring out all the table parameters from the HMS. + +### Table column types + +The following table shows the mapping between Gravitino and [Apache Hudi column types](https://hudi.apache.org/docs/sql_ddl#supported-types): + +| Gravitino Type | Apache Hudi Type | +|----------------|------------------| +| `boolean` | `boolean` | +| `integer` | `int` | +| `long` | `long` | +| `date` | `date` | +| `timestamp` | `timestamp` | +| `float` | `float` | +| `double` | `double` | +| `string` | `string` | +| `decimal` | `decimal` | +| `binary` | `bytes` | +| `array` | `array` | +| `map` | `map` | +| `struct` | `struct` | + +### Table operations + +Only support read operations: listTable, loadTable, and tableExists. +Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational-metadata-using-gravitino.md#table-operations) for more details. diff --git a/docs/lakehouse-iceberg-catalog.md b/docs/lakehouse-iceberg-catalog.md index 8470da5b234..edece0d722c 100644 --- a/docs/lakehouse-iceberg-catalog.md +++ b/docs/lakehouse-iceberg-catalog.md @@ -72,13 +72,13 @@ You must download the corresponding JDBC driver and place it to the `catalogs/la Supports using static access-key-id and secret-access-key to access S3 data. -| Configuration item | Description | Default value | Required | Since Version | -|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `io-impl` | The io implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aws.s3.S3FileIO` for s3. | (none) | No | 0.6.0 | -| `s3-access-key-id` | The static access key ID used to access S3 data. | (none) | No | 0.6.0 | -| `s3-secret-access-key` | The static secret access key used to access S3 data. | (none) | No | 0.6.0 | -| `s3-endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | (none) | No | 0.6.0 | -| `s3-region` | The region of the S3 service, like `us-west-2`. | (none) | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `io-impl` | The io implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aws.s3.S3FileIO` for s3. | (none) | No | 0.6.0-incubating | +| `s3-access-key-id` | The static access key ID used to access S3 data. | (none) | No | 0.6.0-incubating | +| `s3-secret-access-key` | The static secret access key used to access S3 data. | (none) | No | 0.6.0-incubating | +| `s3-endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | (none) | No | 0.6.0-incubating | +| `s3-region` | The region of the S3 service, like `us-west-2`. | (none) | No | 0.6.0-incubating | For other Iceberg s3 properties not managed by Gravitino like `s3.sse.type`, you could config it directly by `gravitino.bypass.s3.sse.type`. @@ -90,12 +90,12 @@ To configure the JDBC catalog backend, set the `warehouse` parameter to `s3://{b Gravitino Iceberg REST service supports using static access-key-id and secret-access-key to access OSS data. -| Configuration item | Description | Default value | Required | Since Version | -|-------------------------|-------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `io-impl` | The IO implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aliyun.oss.OSSFileIO` for OSS. | (none) | No | 0.6.0 | -| `oss-access-key-id` | The static access key ID used to access OSS data. | (none) | No | 0.7.0 | -| `oss-secret-access-key` | The static secret access key used to access OSS data. | (none) | No | 0.7.0 | -| `oss-endpoint` | The endpoint of Aliyun OSS service. | (none) | No | 0.7.0 | +| Configuration item | Description | Default value | Required | Since Version | +|-------------------------|-------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `io-impl` | The IO implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aliyun.oss.OSSFileIO` for OSS. | (none) | No | 0.6.0-incubating | +| `oss-access-key-id` | The static access key ID used to access OSS data. | (none) | No | 0.7.0-incubating | +| `oss-secret-access-key` | The static secret access key used to access OSS data. | (none) | No | 0.7.0-incubating | +| `oss-endpoint` | The endpoint of Aliyun OSS service. | (none) | No | 0.7.0-incubating | For other Iceberg OSS properties not managed by Gravitino like `client.security-token`, you could config it directly by `gravitino.bypass.client.security-token`. @@ -107,9 +107,9 @@ Please set the `warehouse` parameter to `oss://{bucket_name}/${prefix_name}`. Ad Supports using google credential file to access GCS data. -| Configuration item | Description | Default value | Required | Since Version | -|------------------------|----------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `io-impl` | The io implementation for `FileIO` in Iceberg, use `org.apache.iceberg.gcp.gcs.GCSFileIO` for GCS. | (none) | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|--------------------|----------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `io-impl` | The io implementation for `FileIO` in Iceberg, use `org.apache.iceberg.gcp.gcs.GCSFileIO` for GCS. | (none) | No | 0.6.0-incubating | For other Iceberg GCS properties not managed by Gravitino like `gcs.project-id`, you could config it directly by `gravitino.bypass.gcs.project-id`. @@ -123,9 +123,9 @@ Please set `warehouse` to `gs://{bucket_name}/${prefix_name}`, and download [Ice For other storages that are not managed by Gravitino directly, you can manage them through custom catalog properties. -| Configuration item | Description | Default value | Required | Since Version | -|--------------------|-----------------------------------------------------------------------------------------|---------------|----------|---------------| -| `io-impl` | The IO implementation for `FileIO` in Iceberg, please use the full qualified classname. | (none) | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|--------------------|-----------------------------------------------------------------------------------------|---------------|----------|------------------| +| `io-impl` | The IO implementation for `FileIO` in Iceberg, please use the full qualified classname. | (none) | No | 0.6.0-incubating | To pass custom properties such as `security-token` to your custom `FileIO`, you can directly configure it by `gravitino.bypass.security-token`. `security-token` will be included in the properties when the initialize method of `FileIO` is invoked. @@ -137,14 +137,14 @@ Please set the `warehouse` parameter to `{storage_prefix}://{bucket_name}/${pref Users can use the following properties to configure the security of the catalog backend if needed. For example, if you are using a Kerberos Hive catalog backend, you must set `authentication.type` to `Kerberos` and provide `authentication.kerberos.principal` and `authentication.kerberos.keytab-uri`. -| Property name | Description | Default value | Required | Since Version | -|----------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-------------------------------------------------------------|---------------| -| `authentication.type` | The type of authentication for Iceberg catalog backend. This configuration only applicable for for Hive backend, and only supports `Kerberos`, `simple` currently. As for JDBC backend, only username/password authentication was supported now. | `simple` | No | 0.6.0 | -| `authentication.impersonation-enable` | Whether to enable impersonation for the Iceberg catalog | `false` | No | 0.6.0 | -| `authentication.kerberos.principal` | The principal of the Kerberos authentication | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | -| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | -| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Iceberg catalog. | 60 | No | 0.6.0 | -| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0 | +| Property name | Description | Default value | Required | Since Version | +|----------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------------|------------------| +| `authentication.type` | The type of authentication for Iceberg catalog backend. This configuration only applicable for for Hive backend, and only supports `Kerberos`, `simple` currently. As for JDBC backend, only username/password authentication was supported now. | `simple` | No | 0.6.0-incubating | +| `authentication.impersonation-enable` | Whether to enable impersonation for the Iceberg catalog | `false` | No | 0.6.0-incubating | +| `authentication.kerberos.principal` | The principal of the Kerberos authentication | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0-incubating | +| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0-incubating | +| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Iceberg catalog. | 60 | No | 0.6.0-incubating | +| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0-incubating | ### Catalog operations @@ -170,7 +170,7 @@ Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational - Doesn't support column default value. -#### Table partitions +### Table partitions Supports transforms: @@ -305,7 +305,7 @@ Apache Iceberg doesn't support Gravitino `EvenDistribution` type. :::info Apache Iceberg doesn't support Gravitino `Varchar` `Fixedchar` `Byte` `Short` `Union` type. -Meanwhile, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type since 0.6.0. +Meanwhile, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type since 0.6.0-incubating. ::: ### Table properties diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md index 6eabd3e8fcd..786f5544945 100644 --- a/docs/lakehouse-paimon-catalog.md +++ b/docs/lakehouse-paimon-catalog.md @@ -30,16 +30,16 @@ Builds with Apache Paimon `0.8.0`. ### Catalog properties -| Property name | Description | Default value | Required | Since Version | -|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------------------------------------------|---------------| -| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Only supports `filesystem` now. | (none) | Yes | 0.6.0 | -| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0 | -| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs or `hdfs://namespace/hdfs/path` for HDFS. | (none) | Yes | 0.6.0 | -| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0 | -| `authentication.kerberos.principal` | The principal of the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | -| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | -| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Paimon catalog. | 60 | No | 0.6.0 | -| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0 | +| Property name | Description | Default value | Required | Since Version | +|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------|------------------| +| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Only supports `filesystem` now. | (none) | Yes | 0.6.0-incubating | +| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0-incubating | +| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs or `hdfs://namespace/hdfs/path` for HDFS. | (none) | Yes | 0.6.0-incubating | +| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0-incubating | +| `authentication.kerberos.principal` | The principal of the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0-incubating | +| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0-incubating | +| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Paimon catalog. | 60 | No | 0.6.0-incubating | +| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0-incubating | Any properties not defined by Gravitino with `gravitino.bypass.` prefix will pass to Paimon catalog properties and HDFS configuration. For example, if specify `gravitino.bypass.table.type`, `table.type` will pass to Paimon catalog properties. @@ -85,7 +85,7 @@ Gravitino Paimon Catalog does not support dropTable, because the dropTable in Pa Paimon does not support auto increment column. ::: -#### Table changes +### Table changes - RenameTable - AddColumn @@ -99,7 +99,7 @@ Paimon does not support auto increment column. - SetProperty - RemoveProperty -#### Table partitions +### Table partitions - Only supports Identity partitions, such as `day`, `hour`, etc. diff --git a/docs/manage-relational-metadata-using-gravitino.md b/docs/manage-relational-metadata-using-gravitino.md index a5d4d44ece6..3f394f58386 100644 --- a/docs/manage-relational-metadata-using-gravitino.md +++ b/docs/manage-relational-metadata-using-gravitino.md @@ -24,6 +24,7 @@ For more details, please refer to the related doc. - [**Apache Doris**](./jdbc-doris-catalog.md) - [**Apache Iceberg**](./lakehouse-iceberg-catalog.md) - [**Apache Paimon**](./lakehouse-paimon-catalog.md) +- [**Apache Hudi**](./lakehouse-hudi-catalog.md) Assuming: @@ -88,14 +89,15 @@ Catalog catalog = gravitinoClient.createCatalog("catalog", Currently, Gravitino supports the following catalog providers: -| Catalog provider | Catalog property | -|---------------------|---------------------------------------------------------------------------------| -| `hive` | [Hive catalog property](./apache-hive-catalog.md#catalog-properties) | -| `lakehouse-iceberg` | [Iceberg catalog property](./lakehouse-iceberg-catalog.md#catalog-properties) | -| `lakehouse-paimon` | [Paimon catalog property](./lakehouse-paimon-catalog.md#catalog-properties) | -| `jdbc-mysql` | [MySQL catalog property](./jdbc-mysql-catalog.md#catalog-properties) | -| `jdbc-postgresql` | [PostgreSQL catalog property](./jdbc-postgresql-catalog.md#catalog-properties) | -| `jdbc-doris` | [Doris catalog property](./jdbc-doris-catalog.md#catalog-properties) | +| Catalog provider | Catalog property | +|---------------------|--------------------------------------------------------------------------------| +| `hive` | [Hive catalog property](./apache-hive-catalog.md#catalog-properties) | +| `lakehouse-iceberg` | [Iceberg catalog property](./lakehouse-iceberg-catalog.md#catalog-properties) | +| `lakehouse-paimon` | [Paimon catalog property](./lakehouse-paimon-catalog.md#catalog-properties) | +| `lakehouse-hudi` | [Hudi catalog property](./lakehouse-hudi-catalog.md#catalog-properties) | +| `jdbc-mysql` | [MySQL catalog property](./jdbc-mysql-catalog.md#catalog-properties) | +| `jdbc-postgresql` | [PostgreSQL catalog property](./jdbc-postgresql-catalog.md#catalog-properties) | +| `jdbc-doris` | [Doris catalog property](./jdbc-doris-catalog.md#catalog-properties) | ### Load a catalog @@ -326,6 +328,7 @@ Currently, Gravitino supports the following schema property: | `hive` | [Hive schema property](./apache-hive-catalog.md#schema-properties) | | `lakehouse-iceberg` | [Iceberg scheme property](./lakehouse-iceberg-catalog.md#schema-properties) | | `lakehouse-paimon` | [Paimon scheme property](./lakehouse-paimon-catalog.md#schema-properties) | +| `lakehouse-hudi` | [Hudi scheme property](./lakehouse-hudi-catalog.md#schema-properties) | | `jdbc-mysql` | [MySQL schema property](./jdbc-mysql-catalog.md#schema-properties) | | `jdbc-postgresql` | [PostgreSQL schema property](./jdbc-postgresql-catalog.md#schema-properties) | | `jdbc-doris` | [Doris schema property](./jdbc-doris-catalog.md#schema-properties) | @@ -710,13 +713,13 @@ The following types that Gravitino supports: |---------------------------|--------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Boolean | `Types.BooleanType.get()` | `boolean` | Boolean type | | Byte | `Types.ByteType.get()` | `byte` | Byte type, indicates a numerical value of 1 byte | -| Byte(false) | `Types.ByteType.unsigned()` | `byte unsigned` | Unsigned Byte type, indicates a unsigned numerical value of 1 byte | +| Unsigned Byte | `Types.ByteType.unsigned()` | `byte unsigned` | Unsigned Byte type, indicates a unsigned numerical value of 1 byte | | Short | `Types.ShortType.get()` | `short` | Short type, indicates a numerical value of 2 bytes | -| Short(false) | `Types.ShortType.unsigned()` | `short unsigned` | Unsigned Short type, indicates a unsigned numerical value of 2 bytes | +| Unsigned Short | `Types.ShortType.unsigned()` | `short unsigned` | Unsigned Short type, indicates a unsigned numerical value of 2 bytes | | Integer | `Types.IntegerType.get()` | `integer` | Integer type, indicates a numerical value of 4 bytes | -| Integer(false) | `Types.IntegerType.unsigned()` | `integer unsigned` | Unsigned Integer type, indicates a unsigned numerical value of 4 bytes | +| Unsigned Integer | `Types.IntegerType.unsigned()` | `integer unsigned` | Unsigned Integer type, indicates a unsigned numerical value of 4 bytes | | Long | `Types.LongType.get()` | `long` | Long type, indicates a numerical value of 8 bytes | -| Long(false) | `Types.LongType.unsigned()` | `long unsigned` | Unsigned Long type, indicates a unsigned numerical value of 8 bytes | +| Unsigned Long | `Types.LongType.unsigned()` | `long unsigned` | Unsigned Long type, indicates a unsigned numerical value of 8 bytes | | Float | `Types.FloatType.get()` | `float` | Float type, indicates a single-precision floating point number | | Double | `Types.DoubleType.get()` | `double` | Double type, indicates a double-precision floating point number | | Decimal(precision, scale) | `Types.DecimalType.of(precision, scale)` | `decimal(p, s)` | Decimal type, indicates a fixed-precision decimal number with the constraint that the precision must be in range `[1, 38]` and the scala must be in range `[0, precision]` | @@ -807,6 +810,7 @@ The following is a table of the column default value that Gravitino supports for | `hive` | ✘ | | `lakehouse-iceberg` | ✘ | | `lakehouse-paimon` | ✘ | +| `lakehouse-hudi` | ✘ | | `jdbc-mysql` | ✔ | | `jdbc-postgresql` | ✔ | @@ -820,6 +824,7 @@ The following table shows the column auto-increment that Gravitino supports for | `hive` | ✘ | | `lakehouse-iceberg` | ✘ | | `lakehouse-paimon` | ✘ | +| `lakehouse-hudi` | ✘ | | `jdbc-mysql` | ✔([limitations](./jdbc-mysql-catalog.md#table-column-auto-increment)) | | `jdbc-postgresql` | ✔ | @@ -832,6 +837,7 @@ The following is the table property that Gravitino supports: | `hive` | [Hive table property](./apache-hive-catalog.md#table-properties) | [Hive type mapping](./apache-hive-catalog.md#table-column-types) | | `lakehouse-iceberg` | [Iceberg table property](./lakehouse-iceberg-catalog.md#table-properties) | [Iceberg type mapping](./lakehouse-iceberg-catalog.md#table-column-types) | | `lakehouse-paimon` | [Paimon table property](./lakehouse-paimon-catalog.md#table-properties) | [Paimon type mapping](./lakehouse-paimon-catalog.md#table-column-types) | +| `lakehouse-hudi` | [Hudi table property](./lakehouse-hudi-catalog.md#table-properties) | [Hudi type mapping](./lakehouse-hudi-catalog.md#table-column-types) | | `jdbc-mysql` | [MySQL table property](./jdbc-mysql-catalog.md#table-properties) | [MySQL type mapping](./jdbc-mysql-catalog.md#table-column-types) | | `jdbc-postgresql` | [PostgreSQL table property](./jdbc-postgresql-catalog.md#table-properties) | [PostgreSQL type mapping](./jdbc-postgresql-catalog.md#table-column-types) | | `doris` | [Doris table property](./jdbc-doris-catalog.md#table-properties) | [Doris type mapping](./jdbc-doris-catalog.md#table-column-types) |