From 4c07ce68173b0d204b3b4a957d81c3579442af06 Mon Sep 17 00:00:00 2001 From: Jibing Li Date: Wed, 9 Aug 2023 12:04:06 +0800 Subject: [PATCH] Fix external stats collection bugs. Support show cached table stats Support alter column stats. --- docs/en/docs/lakehouse/external-statistics.md | 7 +++++- .../docs/lakehouse/external-statistics.md | 7 +++++- fe/fe-core/src/main/cup/sql_parser.cup | 4 ++-- .../doris/analysis/AlterColumnStatsStmt.java | 11 +++------ .../apache/doris/analysis/AnalyzeTblStmt.java | 7 ++++-- .../doris/analysis/ShowTableStatsStmt.java | 8 ++++++- .../org/apache/doris/qe/ShowExecutor.java | 15 +++++++++++- .../doris/statistics/AnalysisManager.java | 7 +++--- .../doris/statistics/HMSAnalysisTask.java | 10 ++++++++ .../doris/statistics/StatisticsCache.java | 8 +++++++ .../TableStatisticsCacheLoader.java | 4 +++- .../hive/test_hive_statistic.groovy | 23 +++++++++++++++---- 12 files changed, 86 insertions(+), 25 deletions(-) diff --git a/docs/en/docs/lakehouse/external-statistics.md b/docs/en/docs/lakehouse/external-statistics.md index 6c469961a8bd42..33724fc2388354 100644 --- a/docs/en/docs/lakehouse/external-statistics.md +++ b/docs/en/docs/lakehouse/external-statistics.md @@ -191,6 +191,11 @@ DROP ANALYZE JOB [JOB_ID] Show statistics includes show table statistics (number of rows) and column statistics. Please refer to View statistics in [Internal Table Statistics](../query-acceleration/statistics.md) #### Table statistics +``` +SHOW TABLE [cached] stats TABLE_NAME; +``` + +View row count of the given table. If the cached parameter is specified, the row count of the specified table that has been loaded into the cache is displayed. ``` mysql> SHOW TABLE STATS hive.tpch100.orders; @@ -203,7 +208,7 @@ mysql> SHOW TABLE STATS hive.tpch100.orders; #### Column statistics ``` -SHOW COLUMN [cached] stats hive.tpch100.orders; +SHOW COLUMN [cached] stats TABLE_NAME; ``` View the column statistics of a table. If the cached parameter is specified, the column information of the specified table that has been loaded into the cache is displayed. diff --git a/docs/zh-CN/docs/lakehouse/external-statistics.md b/docs/zh-CN/docs/lakehouse/external-statistics.md index 0b47ed53295145..f4f331b2870bd2 100644 --- a/docs/zh-CN/docs/lakehouse/external-statistics.md +++ b/docs/zh-CN/docs/lakehouse/external-statistics.md @@ -191,6 +191,11 @@ DROP ANALYZE JOB [JOB_ID] 信息的查看包括表的统计信息(表的行数)查看和列统计信息查看,请参考[内表统计信息](../query-acceleration/statistics.md)查看统计信息部分。 #### 表统计信息 +``` +SHOW TALBE [cached] stats TABLE_NAME; +``` + +查看statistics表中指定table的行数,如果指定cached参数,则展示的是指定表已加载到缓存中的行数信息。 ``` mysql> SHOW TABLE STATS hive.tpch100.orders; @@ -203,7 +208,7 @@ mysql> SHOW TABLE STATS hive.tpch100.orders; #### 列统计信息 ``` -SHOW COLUMN [cached] stats hive.tpch100.orders; +SHOW COLUMN [cached] stats TABLE_NAME; ``` 查看statistics表中指定table的列统计信息,如果指定cached参数,则展示的是指定表已加载到缓存中的列信息。 diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 7b41cc31900dd8..9376a8f3d5120b 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -4176,9 +4176,9 @@ show_param ::= RESULT = new ShowSyncJobStmt(dbName); :} /* show table stats */ - | KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames + | KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames {: - RESULT = new ShowTableStatsStmt(tbl, partitionNames); + RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached); :} /* show column stats */ | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java index 0e7892dcd109f1..58b81212671fa3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java @@ -22,7 +22,6 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionType; -import org.apache.doris.catalog.Table; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; @@ -148,17 +147,13 @@ private void checkPartitionAndColumn() throws AnalysisException { DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb()); TableIf table = db.getTableOrAnalysisException(tableName.getTbl()); - if (table.getType() != Table.TableType.OLAP) { - throw new AnalysisException("Only OLAP table statistics are supported"); - } - - OlapTable olapTable = (OlapTable) table; - if (olapTable.getColumn(columnName) == null) { + if (table.getColumn(columnName) == null) { ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, columnName, FeNameFormat.getColumnNameRegex()); } - if (optPartitionNames != null) { + if (optPartitionNames != null && table instanceof OlapTable) { + OlapTable olapTable = (OlapTable) table; if (olapTable.getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) { throw new AnalysisException("Not a partitioned table: " + olapTable.getName()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java index 527f802748dcd2..fb4c3bb39a5dac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java @@ -142,8 +142,11 @@ public void check() throws AnalysisException { } checkAnalyzePriv(tableName.getDb(), tableName.getTbl()); if (columnNames == null) { - columnNames = table.getBaseSchema(false) - .stream().map(Column::getName).collect(Collectors.toList()); + // Filter unsupported type columns. + columnNames = table.getBaseSchema(false).stream() + .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .map(Column::getName) + .collect(Collectors.toList()); } table.readLock(); try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index e462c8585ca984..da10d5c492b1fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -53,12 +53,14 @@ public class ShowTableStatsStmt extends ShowStmt { private final TableName tableName; private final PartitionNames partitionNames; + private final boolean cached; private TableIf table; - public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames) { + public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) { this.tableName = tableName; this.partitionNames = partitionNames; + this.cached = cached; } public TableName getTableName() { @@ -133,4 +135,8 @@ public ShowResultSet constructResultSet(TableStatistic tableStatistic) { result.add(row); return new ShowResultSet(getMetaData(), result); } + + public boolean isCached() { + return cached; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 7b0d6c4d8a7638..4bfc6c61b1993b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -138,6 +138,7 @@ import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.TabletMeta; import org.apache.doris.catalog.View; +import org.apache.doris.catalog.external.ExternalTable; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.clone.DynamicPartitionScheduler; import org.apache.doris.cluster.ClusterNamespace; @@ -240,6 +241,7 @@ import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Predicate; @@ -2411,8 +2413,19 @@ private void handleShowTableStats() { ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt; TableIf tableIf = showTableStatsStmt.getTable(); long partitionId = showTableStatsStmt.getPartitionId(); + boolean showCache = showTableStatsStmt.isCached(); try { - if (partitionId > 0) { + if (tableIf instanceof ExternalTable && showCache) { + Optional tableStatistics = Env.getCurrentEnv().getStatisticsCache().getTableStatistics( + tableIf.getDatabase().getCatalog().getId(), + tableIf.getDatabase().getId(), + tableIf.getId()); + if (tableStatistics.isPresent()) { + resultSet = showTableStatsStmt.constructResultSet(tableStatistics.get()); + } else { + resultSet = showTableStatsStmt.constructResultSet(TableStatistic.UNKNOWN); + } + } else if (partitionId > 0) { TableStatistic partStats = StatisticsRepository.fetchTableLevelOfPartStats(partitionId); resultSet = showTableStatsStmt.constructResultSet(partStats); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index cb4d9eb034c1f8..014c18e251ae62 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -276,10 +276,9 @@ public List buildAnalysisInfosForDB(DatabaseIf db, Analyz TableName tableName = new TableName(db.getCatalog().getName(), db.getFullName(), table.getName()); // columnNames null means to add all visitable columns. + // Will get all the visible columns in analyzeTblStmt.check() AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(analyzeProperties, tableName, - table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())).map( - Column::getName).collect( - Collectors.toList()), db.getId(), table); + null, db.getId(), table); try { analyzeTblStmt.check(); } catch (AnalysisException analysisException) { @@ -808,6 +807,8 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException { } if (dropStatsStmt.dropTableRowCount()) { StatisticsRepository.dropExternalTableStatistics(tblId); + // Table cache key doesn't care about catalog id and db id, because the table id is globally unique. + Env.getCurrentEnv().getStatisticsCache().invalidateTableStats(-1, -1, tblId); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index 119368d91d78ef..d569cd79bd4aa0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.FeConstants; import org.apache.doris.common.util.TimeUtils; @@ -291,4 +292,13 @@ private void setParameterData(Map parameters, Map doLoad(StatisticsCacheKey key) { try { TableStatistic tableStatistic = StatisticsRepository.fetchTableLevelStats(key.tableId); - return Optional.of(tableStatistic); + if (tableStatistic != TableStatistic.UNKNOWN) { + return Optional.of(tableStatistic); + } } catch (DdlException e) { LOG.debug("Fail to get table line number from table_statistics table. " + "Will try to get from data source.", e); diff --git a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy index 90da0738a9f369..0d783c13ad56ce 100644 --- a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy +++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy @@ -221,15 +221,28 @@ suite("test_hive_statistic", "p2,external,hive,external_remote,external_remote_h assertTrue(result[0][6] == "'AIR'") assertTrue(result[0][7] == "'TRUCK'") - // sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')""" - // result = sql "show column stats `statistics` (lo_shipmode)" - // assertTrue(result.size() == 1) - // assertTrue(result[0][0] == "lo_shipmode") - // assertTrue(result[0][1] == "6001215.0") + sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')""" + result = sql "show column stats `statistics` (lo_shipmode)" + assertTrue(result.size() == 1) + assertTrue(result[0][0] == "lo_shipmode") + assertTrue(result[0][1] == "6001215.0") sql """drop stats statistics""" result = sql """show column stats statistics""" assertTrue(result.size() == 0) + + sql """analyze database `statistics` with sync""" + result = sql """show table stats statistics""" + assertTrue(result.size() == 1) + assertTrue(result[0][0] == "100") + + result = sql """show table cached stats statistics""" + assertTrue(result.size() == 1) + assertTrue(result[0][0] == "100") + + sql """drop stats statistics""" + result = sql """show column cached stats statistics""" + assertTrue(result.size() == 0) } }