Skip to content

Commit

Permalink
[KYUUBI #4171] Support skip retrieving table's properties to speed up…
Browse files Browse the repository at this point in the history
… GetTables operation

### _Why are the changes needed?_

`GetTables` operation is too slow because it queries table details info one by one, but then only a table comment is used to construct a result row, which i think could be optional.
This PR add an optional config which can control this operation. By default, `GetTables` operation queries all message. Otherwise, `GetTables` operation just return table identifiers.

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request

Closes #4444 from liaoyt/master.

Closes #4171

af5e60e [yeatsliao] rename config
0c9985e [yeatsliao] add doc
5e8687c [yeatsliao] Supports ignore table comment when list all tables.

Authored-by: yeatsliao <[email protected]>
Signed-off-by: Cheng Pan <[email protected]>
  • Loading branch information
liaoyt authored and pan3793 committed Mar 6, 2023
1 parent 3d65f27 commit b40fea2
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 24 deletions.
35 changes: 18 additions & 17 deletions docs/deployment/settings.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.kyuubi.engine.spark.operation

import org.apache.spark.sql.types.StructType

import org.apache.kyuubi.config.KyuubiConf.OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES
import org.apache.kyuubi.engine.spark.shim.SparkCatalogShim
import org.apache.kyuubi.operation.IterableFetchIterator
import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._
Expand All @@ -32,6 +33,12 @@ class GetTables(
tableTypes: Set[String])
extends SparkOperation(session) {

protected val ignoreTableProperties =
spark.conf.getOption(OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES.key) match {
case Some(s) => s.toBoolean
case _ => session.sessionManager.getConf.get(OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES)
}

override def statement: String = {
super.statement +
s" [catalog: $catalog," +
Expand Down Expand Up @@ -68,7 +75,13 @@ class GetTables(
val tablePattern = toJavaRegex(tableName)
val sparkShim = SparkCatalogShim()
val catalogTablesAndViews =
sparkShim.getCatalogTablesOrViews(spark, catalog, schemaPattern, tablePattern, tableTypes)
sparkShim.getCatalogTablesOrViews(
spark,
catalog,
schemaPattern,
tablePattern,
tableTypes,
ignoreTableProperties)

val allTableAndViews =
if (tableTypes.exists("VIEW".equalsIgnoreCase)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ class CatalogShim_v2_4 extends SparkCatalogShim {
catalogName: String,
schemaPattern: String,
tablePattern: String,
tableTypes: Set[String]): Seq[Row] = {
tableTypes: Set[String],
ignoreTableProperties: Boolean): Seq[Row] = {
val catalog = spark.sessionState.catalog
val databases = catalog.listDatabases(schemaPattern)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ class CatalogShim_v3_0 extends CatalogShim_v2_4 {
catalogName: String,
schemaPattern: String,
tablePattern: String,
tableTypes: Set[String]): Seq[Row] = {
tableTypes: Set[String],
ignoreTableProperties: Boolean = false): Seq[Row] = {
val catalog = getCatalog(spark, catalogName)
val namespaces = listNamespacesWithPattern(catalog, schemaPattern)
catalog match {
Expand All @@ -160,16 +161,17 @@ class CatalogShim_v3_0 extends CatalogShim_v2_4 {
SESSION_CATALOG,
schemaPattern,
tablePattern,
tableTypes)
tableTypes,
ignoreTableProperties)
case tc: TableCatalog =>
val tp = tablePattern.r.pattern
val identifiers = namespaces.flatMap { ns =>
tc.listTables(ns).filter(i => tp.matcher(quoteIfNeeded(i.name())).matches())
}
identifiers.map { ident =>
val table = tc.loadTable(ident)
// TODO: restore view type for session catalog
val comment = table.properties().getOrDefault(TableCatalog.PROP_COMMENT, "")
val comment = if (ignoreTableProperties) ""
else tc.loadTable(ident).properties().getOrDefault(TableCatalog.PROP_COMMENT, "")
val schema = ident.namespace().map(quoteIfNeeded).mkString(".")
val tableName = quoteIfNeeded(ident.name())
Row(catalog.name(), schema, tableName, "TABLE", comment, null, null, null, null, null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ trait SparkCatalogShim extends Logging {
catalogName: String,
schemaPattern: String,
tablePattern: String,
tableTypes: Set[String]): Seq[Row]
tableTypes: Set[String],
ignoreTableProperties: Boolean): Seq[Row]

def getTempViews(
spark: SparkSession,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2713,4 +2713,11 @@ object KyuubiConf {
.version("1.7.0")
.timeConf
.createWithDefault(Duration.ofSeconds(60).toMillis)

val OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES: ConfigEntry[Boolean] =
buildConf("kyuubi.operation.getTables.ignoreTableProperties")
.doc("Speed up the `GetTables` operation by returning table identities only.")
.version("1.8.0")
.booleanConf
.createWithDefault(false)
}

0 comments on commit b40fea2

Please sign in to comment.