Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[improvement](statistics)Support show column partition update rows info. #37124

Merged
merged 1 commit into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4597,9 +4597,9 @@ show_param ::=
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show table stats */
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames opt_col_list:cols
{:
RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached);
RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSet;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.statistics.ColStatsMeta;
import org.apache.doris.statistics.TableStatsMeta;

import com.google.common.collect.ImmutableList;
Expand All @@ -42,7 +44,10 @@
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class ShowTableStatsStmt extends ShowStmt {

Expand All @@ -65,15 +70,25 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("row_count")
.build();

private final TableName tableName;
private static final ImmutableList<String> COLUMN_PARTITION_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("index_name")
.add("column_name")
.add("partition_name")
.add("updated_rows")
.build();

private final TableName tableName;
private final List<String> columnNames;
private final PartitionNames partitionNames;
private final boolean cached;

private TableIf table;

public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) {
public ShowTableStatsStmt(TableName tableName, List<String> columnNames,
PartitionNames partitionNames, boolean cached) {
this.tableName = tableName;
this.columnNames = columnNames;
this.partitionNames = partitionNames;
this.cached = cached;
}
Expand All @@ -89,6 +104,9 @@ public void analyze(Analyzer analyzer) throws UserException {
if (partitionNames != null) {
partitionNames.analyze(analyzer);
}
if (columnNames != null && partitionNames == null) {
ErrorReport.reportAnalysisException(String.format("Must specify partitions when columns are specified."));
}
CatalogIf<DatabaseIf> catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(tableName.getCtl());
if (catalog == null) {
ErrorReport.reportAnalysisException(String.format("Catalog: %s not exists", tableName.getCtl()));
Expand Down Expand Up @@ -122,7 +140,15 @@ public void analyze(Analyzer analyzer) throws UserException {
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();

ImmutableList<String> titles = partitionNames == null ? TABLE_TITLE_NAMES : PARTITION_TITLE_NAMES;
ImmutableList<String> titles;
// If columnNames != null, partitionNames is also not null. Guaranteed in analyze()
if (columnNames != null) {
titles = COLUMN_PARTITION_TITLE_NAMES;
} else if (partitionNames != null) {
titles = PARTITION_TITLE_NAMES;
} else {
titles = TABLE_TITLE_NAMES;
}
for (String title : titles) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
Expand All @@ -136,8 +162,11 @@ public TableIf getTable() {
public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
if (partitionNames == null) {
return constructTableResultSet(tableStatistic);
} else {
}
if (columnNames == null) {
return constructPartitionResultSet(tableStatistic);
} else {
return constructColumnPartitionResultSet(tableStatistic);
}
}

Expand Down Expand Up @@ -209,6 +238,43 @@ public ShowResultSet constructPartitionResultSet(TableStatsMeta tableStatistic)
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructColumnPartitionResultSet(TableStatsMeta tableStatistic) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
return new ShowResultSet(getMetaData(), result);
}
OlapTable olapTable = (OlapTable) table;
Collection<String> partitions = partitionNames.isStar()
? table.getPartitionNames()
: partitionNames.getPartitionNames();
if (partitions.size() > 100) {
throw new RuntimeException("Too many partitions, show at most 100 partitions each time.");
}
Set<Pair<String, String>> columnIndexPairs = olapTable.getColumnIndexPairs(new HashSet<>(columnNames));
for (Pair<String, String> pair : columnIndexPairs) {
ColStatsMeta columnStatsMeta = tableStatistic.findColumnStatsMeta(pair.first, pair.second);
if (columnStatsMeta != null && columnStatsMeta.partitionUpdateRows != null) {
for (Map.Entry<Long, Long> entry : columnStatsMeta.partitionUpdateRows.entrySet()) {
Partition partition = olapTable.getPartition(entry.getKey());
if (partition != null && !partitions.contains(partition.getName())) {
continue;
}
List<String> row = Lists.newArrayList();
row.add(pair.first);
row.add(pair.second);
if (partition == null) {
row.add("Partition " + entry.getKey() + " Not Exist");
} else {
row.add(partition.getName());
}
row.add(String.valueOf(entry.getValue()));
result.add(row);
}
}
}
return new ShowResultSet(getMetaData(), result);
}

public boolean isCached() {
return cached;
}
Expand Down
45 changes: 44 additions & 1 deletion regression-test/suites/statistics/test_partition_stats.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ suite("test_partition_stats") {
result = sql """select * from internal.__internal_schema.partition_statistics where tbl_id = ${tblIdPart1}"""
assertEquals(0, result.size())

// Test analyze table after drop partition
// Test analyze table after drop partition, test show table column stats
sql """drop database if exists test_partition_stats"""
sql """create database test_partition_stats"""
sql """use test_partition_stats"""
Expand Down Expand Up @@ -234,9 +234,52 @@ suite("test_partition_stats") {
"replication_allocation" = "tag.location.default: 1"
)
"""
result = sql """show table stats part2 partition(*) (id)"""
assertEquals(0, result.size())
result = sql """show table stats part2 partition(*) (colint, coltinyint, colsmallint, colbigint, collargeint, colfloat, coldouble, coldecimal)"""
assertEquals(0, result.size())
sql """analyze table part2 with sync;"""
result = sql """show table stats part2 partition(*) (id)"""
assertEquals(3, result.size())
result = sql """show table stats part2 partition(*) (colint, coltinyint, colsmallint, colbigint, collargeint, colfloat, coldouble, coldecimal)"""
assertEquals(24, result.size())
result = sql """show table stats part2 partition(p1, p2) (id, colint)"""
assertEquals(4, result.size())
result = sql """show table stats part2 partition(p1) (id)"""
assertEquals(1, result.size())
assertEquals("part2", result[0][0])
assertEquals("id", result[0][1])
assertEquals("p1", result[0][2])
assertEquals("0", result[0][3])

sql """Insert into part2 values (1, 1, 1, 1, 1, 1, 1.1, 1.1, 1.1), (2, 2, 2, 2, 2, 2, 2.2, 2.2, 2.2), (3, 3, 3, 3, 3, 3, 3.3, 3.3, 3.3),(4, 4, 4, 4, 4, 4, 4.4, 4.4, 4.4),(5, 5, 5, 5, 5, 5, 5.5, 5.5, 5.5),(6, 6, 6, 6, 6, 6, 6.6, 6.6, 6.6),(10001, 10001, 10001, 10001, 10001, 10001, 10001.10001, 10001.10001, 10001.10001),(10002, 10002, 10002, 10002, 10002, 10002, 10002.10002, 10002.10002, 10002.10002),(10003, 10003, 10003, 10003, 10003, 10003, 10003.10003, 10003.10003, 10003.10003),(10004, 10004, 10004, 10004, 10004, 10004, 10004.10004, 10004.10004, 10004.10004),(10005, 10005, 10005, 10005, 10005, 10005, 10005.10005, 10005.10005, 10005.10005),(10006, 10006, 10006, 10006, 10006, 10006, 10006.10006, 10006.10006, 10006.10006),(20001, 20001, 20001, 20001, 20001, 20001, 20001.20001, 20001.20001, 20001.20001),(20002, 20002, 20002, 20002, 20002, 20002, 20002.20002, 20002.20002, 20002.20002),(20003, 20003, 20003, 20003, 20003, 20003, 20003.20003, 20003.20003, 20003.20003),(20004, 20004, 20004, 20004, 20004, 20004, 20004.20004, 20004.20004, 20004.20004),(20005, 20005, 20005, 20005, 20005, 20005, 20005.20005, 20005.20005, 20005.20005),(20006, 20006, 20006, 20006, 20006, 20006, 20006.20006, 20006.20006, 20006.20006)"""
result = sql """show table stats part2 partition(*) (id)"""
assertEquals(3, result.size())
result = sql """show table stats part2 partition(*) (colint, coltinyint, colsmallint, colbigint, collargeint, colfloat, coldouble, coldecimal)"""
assertEquals(24, result.size())
result = sql """show table stats part2 partition(p1, p2) (id, colint)"""
assertEquals(4, result.size())
result = sql """show table stats part2 partition(p1) (id)"""
assertEquals(1, result.size())
assertEquals("part2", result[0][0])
assertEquals("id", result[0][1])
assertEquals("p1", result[0][2])
assertEquals("0", result[0][3])

sql """analyze table part2 with sync;"""
result = sql """show table stats part2 partition(*) (id)"""
assertEquals(3, result.size())
result = sql """show table stats part2 partition(*) (colint, coltinyint, colsmallint, colbigint, collargeint, colfloat, coldouble, coldecimal)"""
assertEquals(24, result.size())
result = sql """show table stats part2 partition(p1, p2) (id, colint)"""
assertEquals(4, result.size())
result = sql """show table stats part2 partition(p1) (id)"""
assertEquals(1, result.size())
assertEquals("part2", result[0][0])
assertEquals("id", result[0][1])
assertEquals("p1", result[0][2])
assertEquals("6", result[0][3])

result = sql """show column stats part2"""
assertEquals(9, result.size())
assertEquals("18.0", result[0][2])
Expand Down
Loading