From 79ab0743d4ffa0dbb1335b266d948398164dce2c Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Sat, 6 Jan 2024 17:15:40 +0800 Subject: [PATCH] Do not collect min max for agg table value columns while doing sample analyze. (#29483) --- .../doris/statistics/OlapAnalysisTask.java | 13 ++++++++-- .../suites/statistics/analyze_stats.groovy | 25 ++++++++++++++++++- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 81348c1f948c29b..f9ac408825d8bb4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -104,8 +104,10 @@ protected void doSample() throws Exception { // Get basic stats, including min and max. ResultRow basicStats = collectBasicStat(r); long rowCount = tbl.getRowCount(); - String min = StatisticsUtil.escapeSQL(basicStats.get(0)); - String max = StatisticsUtil.escapeSQL(basicStats.get(1)); + String min = StatisticsUtil.escapeSQL(basicStats != null && basicStats.getValues().size() > 0 + ? basicStats.get(0) : null); + String max = StatisticsUtil.escapeSQL(basicStats != null && basicStats.getValues().size() > 1 + ? basicStats.get(1) : null); boolean limitFlag = false; long rowsToSample = pair.second; @@ -166,6 +168,13 @@ protected void doSample() throws Exception { } protected ResultRow collectBasicStat(AutoCloseConnectContext context) { + // Agg table value columns has no zone map. + // For these columns, skip collecting min and max value to avoid scan whole table. + if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) && !col.isKey()) { + LOG.info("Aggregation table {} column {} is not a key column, skip collecting min and max.", + tbl.getName(), col.getName()); + return null; + } Map params = new HashMap<>(); params.put("dbName", db.getFullName()); params.put("colName", info.colName); diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 7bacd4c833f1610..67c5705b62fe65e 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -2612,6 +2612,30 @@ PARTITION `p599` VALUES IN (599) partition_result = sql """show table stats partition_test""" assertEquals(partition_result[0][6], "false") + // Test sample agg table value column + sql """ + CREATE TABLE `agg_table_test` ( + `id` BIGINT NOT NULL, + `name` VARCHAR(10) REPLACE NULL + ) ENGINE=OLAP + AGGREGATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 32 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """insert into agg_table_test values (1,'name1'), (2, 'name2')""" + Thread.sleep(1000 * 90) + sql """analyze table agg_table_test with sample rows 100 with sync""" + def agg_result = sql """show column stats agg_table_test (name)""" + assertEquals(agg_result[0][6], "N/A") + assertEquals(agg_result[0][7], "N/A") + agg_result = sql """show column stats agg_table_test (id)""" + assertEquals(agg_result[0][6], "1") + assertEquals(agg_result[0][7], "2") + sql """DROP DATABASE IF EXISTS AggTableTest""" + // Test trigger type. sql """DROP DATABASE IF EXISTS trigger""" sql """CREATE DATABASE IF NOT EXISTS trigger""" @@ -2650,5 +2674,4 @@ PARTITION `p599` VALUES IN (599) assertEquals(result[1][10], "MANUAL") } sql """DROP DATABASE IF EXISTS trigger""" - }