Skip to content

Commit

Permalink
Do not collect min max for agg table value columns while doing sample…
Browse files Browse the repository at this point in the history
… analyze. (apache#29483)
  • Loading branch information
Jibing-Li committed Jan 6, 2024
1 parent 17905a8 commit ca169f8
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,10 @@ protected void doSample() throws Exception {
// Get basic stats, including min and max.
ResultRow basicStats = collectBasicStat(r);
long rowCount = tbl.getRowCount();
String min = StatisticsUtil.escapeSQL(basicStats.get(0));
String max = StatisticsUtil.escapeSQL(basicStats.get(1));
String min = StatisticsUtil.escapeSQL(basicStats != null && basicStats.getValues().size() > 0
? basicStats.get(0) : null);
String max = StatisticsUtil.escapeSQL(basicStats != null && basicStats.getValues().size() > 1
? basicStats.get(1) : null);

boolean limitFlag = false;
long rowsToSample = pair.second;
Expand Down Expand Up @@ -166,6 +168,13 @@ protected void doSample() throws Exception {
}

protected ResultRow collectBasicStat(AutoCloseConnectContext context) {
// Agg table value columns has no zone map.
// For these columns, skip collecting min and max value to avoid scan whole table.
if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) && !col.isKey()) {
LOG.info("Aggregation table {} column {} is not a key column, skip collecting min and max.",
tbl.getName(), col.getName());
return null;
}
Map<String, String> params = new HashMap<>();
params.put("dbName", db.getFullName());
params.put("colName", info.colName);
Expand Down
25 changes: 24 additions & 1 deletion regression-test/suites/statistics/analyze_stats.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -2610,6 +2610,30 @@ PARTITION `p599` VALUES IN (599)
partition_result = sql """show table stats partition_test"""
assertEquals(partition_result[0][6], "false")

// Test sample agg table value column
sql """
CREATE TABLE `agg_table_test` (
`id` BIGINT NOT NULL,
`name` VARCHAR(10) REPLACE NULL
) ENGINE=OLAP
AGGREGATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 32
PROPERTIES (
"replication_num" = "1"
);
"""
sql """insert into agg_table_test values (1,'name1'), (2, 'name2')"""
Thread.sleep(1000 * 90)
sql """analyze table agg_table_test with sample rows 100 with sync"""
def agg_result = sql """show column stats agg_table_test (name)"""
assertEquals(agg_result[0][6], "N/A")
assertEquals(agg_result[0][7], "N/A")
agg_result = sql """show column stats agg_table_test (id)"""
assertEquals(agg_result[0][6], "1")
assertEquals(agg_result[0][7], "2")
sql """DROP DATABASE IF EXISTS AggTableTest"""

// Test trigger type.
sql """DROP DATABASE IF EXISTS trigger"""
sql """CREATE DATABASE IF NOT EXISTS trigger"""
Expand Down Expand Up @@ -2648,5 +2672,4 @@ PARTITION `p599` VALUES IN (599)
assertEquals(result[1][10], "MANUAL")
}
sql """DROP DATABASE IF EXISTS trigger"""

}

0 comments on commit ca169f8

Please sign in to comment.