From 5f8bdf6cdcf75c809561d44018bf37f77ed69065 Mon Sep 17 00:00:00 2001 From: mygrsun Date: Wed, 5 Jun 2024 22:21:05 +0800 Subject: [PATCH] [#3403] fix(hive-catalog): add hive catalog property list-all-tables (#3703) ### What changes were proposed in this pull request? Add a Hive catalog property "list-all-tables". Using this property to control whether the Iceberg table is displayed in the Hive table list. ### Why are the changes needed? The bug is a schema has the Iceberg tables in the Hive catalog Fix: #3403 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? 1.create a hive catalog with "list-all-tables " property. 2.crate a database and a iceberg table in the catalog by hive beeline 3.check whether the table is displayed in the catalog . --------- Co-authored-by: ericqin --- .../catalog/hive/HiveCatalogOperations.java | 39 ++++++++++++++++++- .../hive/HiveCatalogPropertiesMeta.java | 14 +++++++ .../gravitino/catalog/hive/HiveTable.java | 2 + .../hive/TestHiveCatalogOperations.java | 4 +- docs/apache-hive-catalog.md | 23 +++++------ 5 files changed, 69 insertions(+), 13 deletions(-) diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java index 5f8dd3eae79..09b885f88f2 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java @@ -6,9 +6,12 @@ import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS; import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.CLIENT_POOL_SIZE; +import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.LIST_ALL_TABLES; import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.METASTORE_URIS; import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.PRINCIPAL; +import static com.datastrato.gravitino.catalog.hive.HiveTable.ICEBERG_TABLE_TYPE_VALUE; import static com.datastrato.gravitino.catalog.hive.HiveTable.SUPPORT_TABLE_TYPES; +import static com.datastrato.gravitino.catalog.hive.HiveTable.TABLE_TYPE_PROP; import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.COMMENT; import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE; import static com.datastrato.gravitino.connector.BaseCatalog.CATALOG_BYPASS_PREFIX; @@ -97,6 +100,7 @@ public class HiveCatalogOperations implements CatalogOperations, SupportsSchemas private ScheduledThreadPoolExecutor checkTgtExecutor; private String kerberosRealm; private ProxyPlugin proxyPlugin; + boolean listAllTables = true; // Map that maintains the mapping of keys in Gravitino to that in Hive, for example, users // will only need to set the configuration 'METASTORE_URL' in Gravitino and Gravitino will change @@ -148,6 +152,8 @@ public void initialize( this.clientPool = new CachedClientPool(getClientPoolSize(conf), hiveConf, getCacheEvictionInterval(conf)); + + this.listAllTables = enableListAllTables(conf); } private void initKerberosIfNecessary(Map conf, Configuration hadoopConf) { @@ -266,6 +272,10 @@ long getCacheEvictionInterval(Map conf) { .getOrDefault(conf, CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS); } + boolean enableListAllTables(Map conf) { + return (boolean) + propertiesMetadata.catalogPropertiesMetadata().getOrDefault(conf, LIST_ALL_TABLES); + } /** Closes the Hive catalog and releases the associated client pool. */ @Override public void close() { @@ -521,7 +531,18 @@ public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaExcep return clientPool.run( c -> c.getTableObjectsByName(schemaIdent.name(), allTables).stream() - .filter(tb -> SUPPORT_TABLE_TYPES.contains(tb.getTableType())) + .filter( + tb -> { + boolean isSupportTable = SUPPORT_TABLE_TYPES.contains(tb.getTableType()); + if (!isSupportTable) { + return false; + } + if (!listAllTables) { + Map parameters = tb.getParameters(); + return isHiveTable(parameters); + } + return true; + }) .map(tb -> NameIdentifier.of(namespace, tb.getTableName())) .toArray(NameIdentifier[]::new)); } catch (UnknownDBException e) { @@ -537,6 +558,22 @@ public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaExcep } } + boolean isHiveTable(Map tableParameters) { + if (isIcebergTable(tableParameters)) return false; + return true; + } + + boolean isIcebergTable(Map tableParameters) { + if (tableParameters != null) { + boolean isIcebergTable = + ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(tableParameters.get(TABLE_TYPE_PROP)); + if (isIcebergTable) { + return true; + } + } + return false; + } + /** * Loads a table from the Hive Metastore. * diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogPropertiesMeta.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogPropertiesMeta.java index 29cb01b1266..a1d4baac8ac 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogPropertiesMeta.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogPropertiesMeta.java @@ -36,6 +36,10 @@ public class HiveCatalogPropertiesMeta extends BaseCatalogPropertiesMetadata { public static final String FETCH_TIMEOUT_SEC = "kerberos.keytab-fetch-timeout-sec"; + public static final String LIST_ALL_TABLES = "list-all-tables"; + + public static final boolean DEFAULT_LIST_ALL_TABLES = false; + private static final Map> HIVE_CATALOG_PROPERTY_ENTRIES = ImmutableMap.>builder() .put( @@ -88,6 +92,16 @@ public class HiveCatalogPropertiesMeta extends BaseCatalogPropertiesMetadata { FETCH_TIMEOUT_SEC, PropertyEntry.integerOptionalPropertyEntry( FETCH_TIMEOUT_SEC, "The timeout to fetch key tab", true, 60, false)) + .put( + LIST_ALL_TABLES, + PropertyEntry.booleanPropertyEntry( + LIST_ALL_TABLES, + "Lists all tables in a database, including non-Hive tables, such as Iceberg, etc.", + false, + false, + DEFAULT_LIST_ALL_TABLES, + false, + false)) .putAll(BASIC_CATALOG_PROPERTY_ENTRIES) .build(); diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTable.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTable.java index f33ec12d4b0..267b8265eda 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTable.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTable.java @@ -62,6 +62,8 @@ public class HiveTable extends BaseTable { // A set of supported Hive table types. public static final Set SUPPORT_TABLE_TYPES = Sets.newHashSet(MANAGED_TABLE.name(), EXTERNAL_TABLE.name()); + public static final String ICEBERG_TABLE_TYPE_VALUE = "ICEBERG"; + public static final String TABLE_TYPE_PROP = "table_type"; private String schemaName; private CachedClientPool clientPool; private StorageDescriptor sd; diff --git a/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/TestHiveCatalogOperations.java b/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/TestHiveCatalogOperations.java index 27aae03327a..ed85b7819be 100644 --- a/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/TestHiveCatalogOperations.java +++ b/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/TestHiveCatalogOperations.java @@ -11,6 +11,7 @@ import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.FETCH_TIMEOUT_SEC; import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.IMPERSONATION_ENABLE; import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.KEY_TAB_URI; +import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.LIST_ALL_TABLES; import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.METASTORE_URIS; import static com.datastrato.gravitino.catalog.hive.HiveCatalogPropertiesMeta.PRINCIPAL; import static com.datastrato.gravitino.catalog.hive.TestHiveCatalog.HIVE_PROPERTIES_METADATA; @@ -67,12 +68,13 @@ void testPropertyMeta() { Map> propertyEntryMap = HIVE_PROPERTIES_METADATA.catalogPropertiesMetadata().propertyEntries(); - Assertions.assertEquals(11, propertyEntryMap.size()); + Assertions.assertEquals(12, propertyEntryMap.size()); Assertions.assertTrue(propertyEntryMap.containsKey(METASTORE_URIS)); Assertions.assertTrue(propertyEntryMap.containsKey(Catalog.PROPERTY_PACKAGE)); Assertions.assertTrue(propertyEntryMap.containsKey(BaseCatalog.CATALOG_OPERATION_IMPL)); Assertions.assertTrue(propertyEntryMap.containsKey(CLIENT_POOL_SIZE)); Assertions.assertTrue(propertyEntryMap.containsKey(IMPERSONATION_ENABLE)); + Assertions.assertTrue(propertyEntryMap.containsKey(LIST_ALL_TABLES)); Assertions.assertTrue(propertyEntryMap.get(METASTORE_URIS).isRequired()); Assertions.assertFalse(propertyEntryMap.get(Catalog.PROPERTY_PACKAGE).isRequired()); diff --git a/docs/apache-hive-catalog.md b/docs/apache-hive-catalog.md index ee05aba5bbc..d1f8e95a0f1 100644 --- a/docs/apache-hive-catalog.md +++ b/docs/apache-hive-catalog.md @@ -28,17 +28,18 @@ The Hive catalog supports creating, updating, and deleting databases and tables ### Catalog properties -| Property Name | Description | Default Value | Required | Since Version | -|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------|---------------| -| `metastore.uris` | The Hive metastore service URIs, separate multiple addresses with commas. Such as `thrift://127.0.0.1:9083` | (none) | Yes | 0.2.0 | -| `client.pool-size` | The maximum number of Hive metastore clients in the pool for Gravitino. | 1 | No | 0.2.0 | -| `gravitino.bypass.` | Property name with this prefix passed down to the underlying HMS client for use. Such as `gravitino.bypass.hive.metastore.failure.retries = 3` indicate 3 times of retries upon failure of Thrift metastore calls | (none) | No | 0.2.0 | -| `client.pool-cache.eviction-interval-ms` | The cache pool eviction interval. | 300000 | No | 0.4.0 | -| `impersonation-enable` | Enable user impersonation for Hive catalog. | false | No | 0.4.0 | -| `kerberos.principal` | The Kerberos principal for the catalog. You should configure `gravitino.bypass.hadoop.security.authentication`, `gravitino.bypass.hive.metastore.kerberos.principal` and `gravitino.bypass.hive.metastore.sasl.enabled`if you want to use Kerberos. | (none) | required if you use kerberos | 0.4.0 | -| `kerberos.keytab-uri` | The uri of key tab for the catalog. Now supported protocols are `https`, `http`, `ftp`, `file`. | (none) | required if you use kerberos | 0.4.0 | -| `kerberos.check-interval-sec` | The interval to check validness of the principal | 60 | No | 0.4.0 | -| `kerberos.keytab-fetch-timeout-sec` | The timeout to fetch key tab | 60 | No | 0.4.0 | +| Property Name | Description | Default Value | Required | Since Version | +|------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|------------------------------|---------------| +| `metastore.uris` | The Hive metastore service URIs, separate multiple addresses with commas. Such as `thrift://127.0.0.1:9083` | (none) | Yes | 0.2.0 | +| `client.pool-size` | The maximum number of Hive metastore clients in the pool for Gravitino. | 1 | No | 0.2.0 | +| `gravitino.bypass.` | Property name with this prefix passed down to the underlying HMS client for use. Such as `gravitino.bypass.hive.metastore.failure.retries = 3` indicate 3 times of retries upon failure of Thrift metastore calls | (none) | No | 0.2.0 | +| `client.pool-cache.eviction-interval-ms` | The cache pool eviction interval. | 300000 | No | 0.4.0 | +| `impersonation-enable` | Enable user impersonation for Hive catalog. | false | No | 0.4.0 | +| `kerberos.principal` | The Kerberos principal for the catalog. You should configure `gravitino.bypass.hadoop.security.authentication`, `gravitino.bypass.hive.metastore.kerberos.principal` and `gravitino.bypass.hive.metastore.sasl.enabled`if you want to use Kerberos. | (none) | required if you use kerberos | 0.4.0 | +| `kerberos.keytab-uri` | The uri of key tab for the catalog. Now supported protocols are `https`, `http`, `ftp`, `file`. | (none) | required if you use kerberos | 0.4.0 | +| `kerberos.check-interval-sec` | The interval to check validness of the principal | 60 | No | 0.4.0 | +| `kerberos.keytab-fetch-timeout-sec` | The timeout to fetch key tab | 60 | No | 0.4.0 | +| `list-all-tables` | Lists all tables in a database, including non-Hive tables, such as Iceberg, etc | false | No | 0.5.1 | When you use the Gravitino with Trino. You can pass the Trino Hive connector configuration using prefix `trino.bypass.`. For example, using `trino.bypass.hive.config.resources` to pass the `hive.config.resources` to the Gravitino Hive catalog in Trino runtime.