Skip to content

Commit

Permalink
Fix external stats collection bugs.
Browse files Browse the repository at this point in the history
Support show cached table stats
Support alter column stats.
  • Loading branch information
Jibing-Li committed Aug 10, 2023
1 parent 5147c09 commit e5d0261
Show file tree
Hide file tree
Showing 12 changed files with 78 additions and 25 deletions.
7 changes: 6 additions & 1 deletion docs/en/docs/lakehouse/external-statistics.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ DROP ANALYZE JOB [JOB_ID]
Show statistics includes show table statistics (number of rows) and column statistics. Please refer to View statistics in [Internal Table Statistics](../query-acceleration/statistics.md)

#### Table statistics
```
SHOW TABLE [cached] stats TABLE_NAME;
```

View row count of the given table. If the cached parameter is specified, the row count of the specified table that has been loaded into the cache is displayed.

```
mysql> SHOW TABLE STATS hive.tpch100.orders;
Expand All @@ -203,7 +208,7 @@ mysql> SHOW TABLE STATS hive.tpch100.orders;

#### Column statistics
```
SHOW COLUMN [cached] stats hive.tpch100.orders;
SHOW COLUMN [cached] stats TABLE_NAME;
```

View the column statistics of a table. If the cached parameter is specified, the column information of the specified table that has been loaded into the cache is displayed.
Expand Down
7 changes: 6 additions & 1 deletion docs/zh-CN/docs/lakehouse/external-statistics.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ DROP ANALYZE JOB [JOB_ID]
信息的查看包括表的统计信息(表的行数)查看和列统计信息查看,请参考[内表统计信息](../query-acceleration/statistics.md)查看统计信息部分。

#### 表统计信息
```
SHOW TALBE [cached] stats TABLE_NAME;
```

查看statistics表中指定table的行数,如果指定cached参数,则展示的是指定表已加载到缓存中的行数信息。

```
mysql> SHOW TABLE STATS hive.tpch100.orders;
Expand All @@ -203,7 +208,7 @@ mysql> SHOW TABLE STATS hive.tpch100.orders;

#### 列统计信息
```
SHOW COLUMN [cached] stats hive.tpch100.orders;
SHOW COLUMN [cached] stats TABLE_NAME;
```

查看statistics表中指定table的列统计信息,如果指定cached参数,则展示的是指定表已加载到缓存中的列信息。
Expand Down
4 changes: 2 additions & 2 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4176,9 +4176,9 @@ show_param ::=
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show table stats */
| KW_TABLE KW_STATS table_name:tbl opt_partition_names:partitionNames
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames
{:
RESULT = new ShowTableStatsStmt(tbl, partitionNames);
RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
Expand Down Expand Up @@ -148,17 +147,13 @@ private void checkPartitionAndColumn() throws AnalysisException {
DatabaseIf db = catalog.getDbOrAnalysisException(tableName.getDb());
TableIf table = db.getTableOrAnalysisException(tableName.getTbl());

if (table.getType() != Table.TableType.OLAP) {
throw new AnalysisException("Only OLAP table statistics are supported");
}

OlapTable olapTable = (OlapTable) table;
if (olapTable.getColumn(columnName) == null) {
if (table.getColumn(columnName) == null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME,
columnName, FeNameFormat.getColumnNameRegex());
}

if (optPartitionNames != null) {
if (optPartitionNames != null && table instanceof OlapTable) {
OlapTable olapTable = (OlapTable) table;
if (olapTable.getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) {
throw new AnalysisException("Not a partitioned table: " + olapTable.getName());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,11 @@ public void check() throws AnalysisException {
}
checkAnalyzePriv(tableName.getDb(), tableName.getTbl());
if (columnNames == null) {
columnNames = table.getBaseSchema(false)
.stream().map(Column::getName).collect(Collectors.toList());
// Filter unsupported type columns.
columnNames = table.getBaseSchema(false).stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName)
.collect(Collectors.toList());
}
table.readLock();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ public class ShowTableStatsStmt extends ShowStmt {
private final TableName tableName;

private final PartitionNames partitionNames;
private final boolean cached;

private TableIf table;

public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames) {
public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) {
this.tableName = tableName;
this.partitionNames = partitionNames;
this.cached = cached;
}

public TableName getTableName() {
Expand Down Expand Up @@ -133,4 +135,8 @@ public ShowResultSet constructResultSet(TableStatistic tableStatistic) {
result.add(row);
return new ShowResultSet(getMetaData(), result);
}

public boolean isCached() {
return cached;
}
}
15 changes: 14 additions & 1 deletion fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@
import org.apache.doris.catalog.TabletInvertedIndex;
import org.apache.doris.catalog.TabletMeta;
import org.apache.doris.catalog.View;
import org.apache.doris.catalog.external.ExternalTable;
import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.clone.DynamicPartitionScheduler;
import org.apache.doris.cluster.ClusterNamespace;
Expand Down Expand Up @@ -240,6 +241,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
Expand Down Expand Up @@ -2411,8 +2413,19 @@ private void handleShowTableStats() {
ShowTableStatsStmt showTableStatsStmt = (ShowTableStatsStmt) stmt;
TableIf tableIf = showTableStatsStmt.getTable();
long partitionId = showTableStatsStmt.getPartitionId();
boolean showCache = showTableStatsStmt.isCached();
try {
if (partitionId > 0) {
if (tableIf instanceof ExternalTable && showCache) {
Optional<TableStatistic> tableStatistics = Env.getCurrentEnv().getStatisticsCache().getTableStatistics(
tableIf.getDatabase().getCatalog().getId(),
tableIf.getDatabase().getId(),
tableIf.getId());
if (tableStatistics.isPresent()) {
resultSet = showTableStatsStmt.constructResultSet(tableStatistics.get());
} else {
resultSet = showTableStatsStmt.constructResultSet(TableStatistic.UNKNOWN);
}
} else if (partitionId > 0) {
TableStatistic partStats = StatisticsRepository.fetchTableLevelOfPartStats(partitionId);
resultSet = showTableStatsStmt.constructResultSet(partStats);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,9 @@ public List<AnalysisInfo> buildAnalysisInfosForDB(DatabaseIf<TableIf> db, Analyz
TableName tableName = new TableName(db.getCatalog().getName(), db.getFullName(),
table.getName());
// columnNames null means to add all visitable columns.
// Will get all the visible columns in analyzeTblStmt.check()
AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(analyzeProperties, tableName,
table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())).map(
Column::getName).collect(
Collectors.toList()), db.getId(), table);
null, db.getId(), table);
try {
analyzeTblStmt.check();
} catch (AnalysisException analysisException) {
Expand Down Expand Up @@ -808,6 +807,8 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException {
}
if (dropStatsStmt.dropTableRowCount()) {
StatisticsRepository.dropExternalTableStatistics(tblId);
// Table cache key doesn't care about catalog id and db id, because the table id is globally unique.
Env.getCurrentEnv().getStatisticsCache().invalidateTableStats(-1, -1, tblId);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.statistics;

import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.external.HMSExternalTable;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.util.TimeUtils;
Expand Down Expand Up @@ -148,6 +149,7 @@ private void getTableStats() throws Exception {
String rowCount = columnResult.get(0).getColumnValue("rowCount");
params.put("rowCount", rowCount);
StatisticsRepository.persistTableStats(params);
Env.getCurrentEnv().getStatisticsCache().refreshTableStatsSync(catalog.getId(), db.getId(), tbl.getId());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ public void refreshColStatsSync(long catalogId, long dbId, long tblId, long idxI
columnStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId, idxId, colName));
}

public void invalidateTableStats(long catalogId, long dbId, long tblId) {
tableStatisticsCache.synchronous().invalidate(new StatisticsCacheKey(catalogId, dbId, tblId));
}

public void refreshTableStatsSync(long catalogId, long dbId, long tblId) {
tableStatisticsCache.synchronous().refresh(new StatisticsCacheKey(catalogId, dbId, tblId));
}

public void refreshHistogramSync(long tblId, long idxId, String colName) {
histogramCache.synchronous().refresh(new StatisticsCacheKey(tblId, idxId, colName));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ public class TableStatisticsCacheLoader extends StatisticsCacheLoader<Optional<T
protected Optional<TableStatistic> doLoad(StatisticsCacheKey key) {
try {
TableStatistic tableStatistic = StatisticsRepository.fetchTableLevelStats(key.tableId);
return Optional.of(tableStatistic);
if (tableStatistic != TableStatistic.UNKNOWN) {
return Optional.of(tableStatistic);
}
} catch (DdlException e) {
LOG.debug("Fail to get table line number from table_statistics table. "
+ "Will try to get from data source.", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,28 @@ suite("test_hive_statistic", "p2,external,hive,external_remote,external_remote_h
assertTrue(result[0][6] == "'AIR'")
assertTrue(result[0][7] == "'TRUCK'")

// sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')"""
// result = sql "show column stats `statistics` (lo_shipmode)"
// assertTrue(result.size() == 1)
// assertTrue(result[0][0] == "lo_shipmode")
// assertTrue(result[0][1] == "6001215.0")
sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')"""
result = sql "show column stats `statistics` (lo_shipmode)"
assertTrue(result.size() == 1)
assertTrue(result[0][0] == "lo_shipmode")
assertTrue(result[0][1] == "6001215.0")

sql """drop stats statistics"""
result = sql """show column stats statistics"""
assertTrue(result.size() == 0)

sql """analyze database `statistics` with sync"""
result = sql """show table stats statistics"""
assertTrue(result.size() == 1)
assertTrue(result[0][0] == "100")

result = sql """show table cached stats statistics"""
assertTrue(result.size() == 1)
assertTrue(result[0][0] == "100")

sql """drop stats statistics"""
result = sql """show column cached stats statistics"""
assertTrue(result.size() == 0)
}
}

0 comments on commit e5d0261

Please sign in to comment.