From aaf94fa235fac4d2c9ba9e3b658efefb817d44b6 Mon Sep 17 00:00:00 2001 From: theoryxu Date: Fri, 10 May 2024 19:39:56 +0800 Subject: [PATCH] [#3295] fix(catalog-hive):The Hive Catalog Bug In Multiple Kerberized HMS (#3321) ### What changes were proposed in this pull request? remove PRINCIPAL -> METASTORE_KERBEROS_PRINCIPAL in the GRAVITINO_CONFIG_TO_HIVE ### Why are the changes needed? The hive.metastore.kerberos.principal is not the same as kerberos.principal functionally. Fix: #3295 ### Does this PR introduce _any_ user-facing change? yes, add the document ### How was this patch tested? existing test (TestHiveCatalogOperations) #### test in inner environment as follow: step 1 Install gravitino in host1, Install HMS1 in host1, and install HMS2 in host2 step 2 create catalog1 for HMS1: curl -L -X POST 'http://host1:8090/api/metalakes/mk1/catalogs' -H 'Content-Type: application/json' -H 'Accept: application/vnd.gravitino.v1+json' --data-raw '{ "name": "catalog1", "type": "relational", "provider": "hive", "properties": { "metastore.uris": "thrift://host1:7004", "kerberos.principal": "hadoop/[host1@EXAMPLE.COM](mailto:host1@EXAMPLE.COM)", "kerberos.keytab-uri": "/var/krb5kdc/emr.keytab", "gravitino.bypass.hadoop.security.authentication": "kerberos", "gravitino.bypass.hive.metastore.kerberos.principal": "hadoop/[_HOST@EXAMPLE.COM](mailto:_HOST@EXAMPLE.COM)", "gravitino.bypass.hive.metastore.sasl.enabled": true } }' step 3 create catalog1 for HMS1: curl -L -X POST 'http://host1:8090/api/metalakes/mk1/catalogs' -H 'Content-Type: application/json' -H 'Accept: application/vnd.gravitino.v1+json' --data-raw '{ "name": "catalog2", "type": "relational", "provider": "hive", "properties": { "metastore.uris": "thrift://host2:7004", "kerberos.principal": "hadoop/[host1@EXAMPLE.COM](mailto:host1@EXAMPLE.COM)", "kerberos.keytab-uri": "/var/krb5kdc/emr.keytab", "gravitino.bypass.hadoop.security.authentication": "kerberos", "gravitino.bypass.hive.metastore.kerberos.principal": "hadoop/[_HOST@EXAMPLE.COM](mailto:_HOST@EXAMPLE.COM)", "gravitino.bypass.hive.metastore.sasl.enabled": true } }' step 4 curl -L -X GET 'http://host1:8090/api/metalakes/mk1/catalogs/catalog1/schemas' success curl -L -X GET 'http://host1:8090/api/metalakes/mk1/catalogs/catalog2/schemas' success Co-authored-by: theoryxu --- .../catalog/hive/HiveCatalogOperations.java | 6 +---- docs/apache-hive-catalog.md | 22 +++++++++---------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java index 9a089647f31..3e419c788e0 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java @@ -104,11 +104,7 @@ public class HiveCatalogOperations implements CatalogOperations, SupportsSchemas // will only need to set the configuration 'METASTORE_URL' in Gravitino and Gravitino will change // it to `METASTOREURIS` automatically and pass it to Hive. public static final Map GRAVITINO_CONFIG_TO_HIVE = - ImmutableMap.of( - METASTORE_URIS, - ConfVars.METASTOREURIS.varname, - PRINCIPAL, - ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname); + ImmutableMap.of(METASTORE_URIS, ConfVars.METASTOREURIS.varname); /** * Initializes the Hive catalog operations with the provided configuration. diff --git a/docs/apache-hive-catalog.md b/docs/apache-hive-catalog.md index 6743dad5b93..ecf7a058e4b 100644 --- a/docs/apache-hive-catalog.md +++ b/docs/apache-hive-catalog.md @@ -28,17 +28,17 @@ The Hive catalog supports creating, updating, and deleting databases and tables ### Catalog properties -| Property Name | Description | Default Value | Required | Since Version | -|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------|---------------| -| `metastore.uris` | The Hive metastore service URIs, separate multiple addresses with commas. Such as `thrift://127.0.0.1:9083` | (none) | Yes | 0.2.0 | -| `client.pool-size` | The maximum number of Hive metastore clients in the pool for Gravitino. | 1 | No | 0.2.0 | -| `gravitino.bypass.` | Property name with this prefix passed down to the underlying HMS client for use. Such as `gravitino.bypass.hive.metastore.failure.retries = 3` indicate 3 times of retries upon failure of Thrift metastore calls | (none) | No | 0.2.0 | -| `client.pool-cache.eviction-interval-ms` | The cache pool eviction interval. | 300000 | No | 0.4.0 | -| `impersonation-enable` | Enable user impersonation for Hive catalog. | false | No | 0.4.0 | -| `kerberos.principal` | The Kerberos principal for the catalog. You should configure `gravitino.bypass.hadoop.security.authentication` and `gravitino.bypass.hive.metastore.sasl.enabled`if you want to use Kerberos. | (none) | required if you use kerberos | 0.4.0 | -| `kerberos.keytab-uri` | The uri of key tab for the catalog. Now supported protocols are `https`, `http`, `ftp`, `file`. | (none) | required if you use kerberos | 0.4.0 | -| `kerberos.check-interval-sec` | The interval to check validness of the principal | 60 | No | 0.4.0 | -| `kerberos.keytab-fetch-timeout-sec` | The timeout to fetch key tab | 60 | No | 0.4.0 | +| Property Name | Description | Default Value | Required | Since Version | +|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------|---------------| +| `metastore.uris` | The Hive metastore service URIs, separate multiple addresses with commas. Such as `thrift://127.0.0.1:9083` | (none) | Yes | 0.2.0 | +| `client.pool-size` | The maximum number of Hive metastore clients in the pool for Gravitino. | 1 | No | 0.2.0 | +| `gravitino.bypass.` | Property name with this prefix passed down to the underlying HMS client for use. Such as `gravitino.bypass.hive.metastore.failure.retries = 3` indicate 3 times of retries upon failure of Thrift metastore calls | (none) | No | 0.2.0 | +| `client.pool-cache.eviction-interval-ms` | The cache pool eviction interval. | 300000 | No | 0.4.0 | +| `impersonation-enable` | Enable user impersonation for Hive catalog. | false | No | 0.4.0 | +| `kerberos.principal` | The Kerberos principal for the catalog. You should configure `gravitino.bypass.hadoop.security.authentication`, `gravitino.bypass.hive.metastore.kerberos.principal` and `gravitino.bypass.hive.metastore.sasl.enabled`if you want to use Kerberos. | (none) | required if you use kerberos | 0.4.0 | +| `kerberos.keytab-uri` | The uri of key tab for the catalog. Now supported protocols are `https`, `http`, `ftp`, `file`. | (none) | required if you use kerberos | 0.4.0 | +| `kerberos.check-interval-sec` | The interval to check validness of the principal | 60 | No | 0.4.0 | +| `kerberos.keytab-fetch-timeout-sec` | The timeout to fetch key tab | 60 | No | 0.4.0 | When you use the Gravitino with Trino. You can pass the Trino Hive connector configuration using prefix `trino.bypass.`. For example, using `trino.bypass.hive.config.resources` to pass the `hive.config.resources` to the Gravitino Hive catalog in Trino runtime.