From 3b1bcaf9c2f0b1c73425bf5288cce09848ee9edd Mon Sep 17 00:00:00 2001 From: meiyi Date: Mon, 2 Dec 2024 20:16:22 +0800 Subject: [PATCH] [fix](cluster key) fix cluster key in topn --- be/src/olap/tablet_reader.cpp | 33 ++- .../data/compaction/test_full_compaction.out | 4 + .../compaction/test_full_compaction_ck.out | 47 +++++ .../test_generated_column_nereids.out | 12 +- .../data/unique_with_mow_c_p0/test_select.out | 9 + .../org/apache/doris/regression/Config.groovy | 9 +- .../compaction/test_full_compaction.groovy | 7 +- .../compaction/test_full_compaction_ck.groovy | 189 ++++++++++++++++++ .../test_generated_column_nereids.groovy | 22 +- .../unique_with_mow_c_p0/test_select.groovy | 70 +++++++ 10 files changed, 378 insertions(+), 24 deletions(-) create mode 100644 regression-test/data/compaction/test_full_compaction_ck.out create mode 100644 regression-test/data/unique_with_mow_c_p0/test_select.out create mode 100644 regression-test/suites/compaction/test_full_compaction_ck.groovy create mode 100644 regression-test/suites/unique_with_mow_c_p0/test_select.groovy diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index 7410b70f4aa471..c758b47f360d4b 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -464,13 +464,32 @@ Status TabletReader::_init_orderby_keys_param(const ReaderParams& read_params) { // UNIQUE_KEYS will compare all keys as before if (_tablet_schema->keys_type() == DUP_KEYS || (_tablet_schema->keys_type() == UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write())) { - // find index in vector _return_columns - // for the read_orderby_key_num_prefix_columns orderby keys - for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { - for (uint32_t idx = 0; idx < _return_columns.size(); idx++) { - if (_return_columns[idx] == i) { - _orderby_key_columns.push_back(idx); - break; + if (!_tablet_schema->cluster_key_idxes().empty()) { + for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { + auto cid = _tablet_schema->cluster_key_idxes()[i]; + auto index = _tablet_schema->field_index(cid); + if (index < 0) { + return Status::Error( + "could not find cluster key column with unique_id=" + + std::to_string(cid) + " in tablet schema, tablet_id=" + + std::to_string(_tablet->tablet_id())); + } + for (uint32_t idx = 0; idx < _return_columns.size(); idx++) { + if (_return_columns[idx] == index) { + _orderby_key_columns.push_back(idx); + break; + } + } + } + } else { + // find index in vector _return_columns + // for the read_orderby_key_num_prefix_columns orderby keys + for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { + for (uint32_t idx = 0; idx < _return_columns.size(); idx++) { + if (_return_columns[idx] == i) { + _orderby_key_columns.push_back(idx); + break; + } } } } diff --git a/regression-test/data/compaction/test_full_compaction.out b/regression-test/data/compaction/test_full_compaction.out index b25fdad93145cc..aaab47b4c3678b 100644 --- a/regression-test/data/compaction/test_full_compaction.out +++ b/regression-test/data/compaction/test_full_compaction.out @@ -41,3 +41,7 @@ 2 200 3 0 +-- !select_final2 -- +1 100 +2 200 + diff --git a/regression-test/data/compaction/test_full_compaction_ck.out b/regression-test/data/compaction/test_full_compaction_ck.out new file mode 100644 index 00000000000000..f76314e79a7579 --- /dev/null +++ b/regression-test/data/compaction/test_full_compaction_ck.out @@ -0,0 +1,47 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +1 1 +2 2 + +-- !2 -- +1 10 +2 20 + +-- !3 -- +1 100 +2 200 + +-- !4 -- +1 100 +2 200 +3 300 + +-- !5 -- +1 100 +2 200 +3 100 + +-- !6 -- +1 100 +2 200 + +-- !skip_delete -- +1 1 +1 10 +1 100 +2 2 +2 20 +2 200 +3 100 +3 100 +3 300 + +-- !select_final -- +1 100 +2 200 +3 100 + +-- !select_final2 -- +1 100 +2 200 + diff --git a/regression-test/data/ddl_p0/test_create_table_generated_column/test_generated_column_nereids.out b/regression-test/data/ddl_p0/test_create_table_generated_column/test_generated_column_nereids.out index b7757222ab4991..24f2f19c5769a5 100644 --- a/regression-test/data/ddl_p0/test_create_table_generated_column/test_generated_column_nereids.out +++ b/regression-test/data/ddl_p0/test_create_table_generated_column/test_generated_column_nereids.out @@ -170,12 +170,6 @@ c double No false \N NONE,STORED GENERATED b int Yes false \N NONE d int Yes false \N NONE,STORED GENERATED --- !test_update -- -1 - --- !test_update_generated_column -- -1 20 21 - -- !gen_col_unique_key -- 0 @@ -221,3 +215,9 @@ d int Yes false \N NONE,STORED GENERATED -- !agg_replace_null -- 1 2 3 4 13 +-- !test_update -- +1 + +-- !test_update_generated_column -- +1 20 21 + diff --git a/regression-test/data/unique_with_mow_c_p0/test_select.out b/regression-test/data/unique_with_mow_c_p0/test_select.out new file mode 100644 index 00000000000000..11c66ecf4dbc47 --- /dev/null +++ b/regression-test/data/unique_with_mow_c_p0/test_select.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql0 -- +0 +can + +-- !sql1 -- +0 +can + diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy index 5aa6d7ce7e7218..71aaefb4c90e75 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy @@ -1025,8 +1025,15 @@ class Config { if (isCKEnabled) { excludeDirectorySet.add("unique_with_mow_p0/partial_update") excludeDirectorySet.add("unique_with_mow_p0/flexible") + excludeDirectorySet.add("schema_change_p0/unique_ck") excludeDirectorySet.add("doc") - List excludeCases = ["test_table_properties", "test_default_hll", "test_default_pi", "test_full_compaction", "test_full_compaction_by_table_id", "test_create_table", "txn_insert", "test_update_mow", "test_new_update", "test_update_unique", "test_partial_update_generated_column", "nereids_partial_update_native_insert_stmt", "partial_update", "nereids_update_on_current_timestamp", "update_on_current_timestamp", "test_default_bitmap_empty", "nereids_delete_mow_partial_update", "delete_mow_partial_update", "partial_update_seq_col", "nereids_partial_update_native_insert_stmt_complex", "regression_test_variant_delete_and_update", "test_unique_table_auto_inc_partial_update_correct_stream_load", "test_unique_table_auto_inc", "test_unique_table_auto_inc_partial_update_correct_insert", "test_update_schema_change"] + List excludeCases = ["test_table_properties", "test_create_table" + , "test_default_hll", "test_default_pi", "test_default_bitmap_empty" + // partial update + , "txn_insert", "test_update_schema_change", "test_generated_column_update", "test_nested_type_with_rowstore", "test_partial_update_generated_column", "nereids_partial_update_native_insert_stmt" + , "partial_update", "nereids_update_on_current_timestamp", "update_on_current_timestamp", "nereids_delete_mow_partial_update", "delete_mow_partial_update", "test_unique_table_auto_inc" + , "test_unique_table_auto_inc_partial_update_correct_insert", "partial_update_seq_col", "nereids_partial_update_native_insert_stmt_complex", "regression_test_variant_delete_and_update" + , "test_unique_table_auto_inc_partial_update_correct_stream_load", "test_update_mow", "test_new_update", "test_update_unique", "nereids_partial_update_native_insert_seq_col"] for (def excludeCase in excludeCases) { excludeSuiteWildcard.add(excludeCase) } diff --git a/regression-test/suites/compaction/test_full_compaction.groovy b/regression-test/suites/compaction/test_full_compaction.groovy index 60f52f6f5a55a0..2a1f0c01ce7a68 100644 --- a/regression-test/suites/compaction/test_full_compaction.groovy +++ b/regression-test/suites/compaction/test_full_compaction.groovy @@ -177,7 +177,12 @@ suite("test_full_compaction") { // make sure all hidden data has been deleted // (1,100)(2,200) qt_select_final """select * from ${tableName} order by user_id""" + + sql "SET skip_delete_predicate = false" + sql "SET skip_delete_sign = false" + sql "SET skip_delete_bitmap = false" + qt_select_final2 """select * from ${tableName} order by user_id""" } finally { - try_sql("DROP TABLE IF EXISTS ${tableName}") + // try_sql("DROP TABLE IF EXISTS ${tableName}") } } diff --git a/regression-test/suites/compaction/test_full_compaction_ck.groovy b/regression-test/suites/compaction/test_full_compaction_ck.groovy new file mode 100644 index 00000000000000..8e2f86c596de65 --- /dev/null +++ b/regression-test/suites/compaction/test_full_compaction_ck.groovy @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_full_compaction_ck") { + def tableName = "test_full_compaction_ck" + + try { + String backend_id; + + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + backend_id = backendId_to_backendIP.keySet()[0] + def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id)) + logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def configList = parseJson(out.trim()) + assert configList instanceof List + + boolean disableAutoCompaction = true + for (Object ele in (List) configList) { + assert ele instanceof List + if (((List) ele)[0] == "disable_auto_compaction") { + disableAutoCompaction = Boolean.parseBoolean(((List) ele)[2]) + } + } + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE ${tableName} ( + `user_id` INT NOT NULL, `value` INT NOT NULL) + UNIQUE KEY(`user_id`) + CLUSTER BY(`value`) + DISTRIBUTED BY HASH(`user_id`) + BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_mow_light_delete" = "false", + "enable_unique_key_merge_on_write" = "true");""" + + // version1 (1,1)(2,2) + sql """ INSERT INTO ${tableName} VALUES + (1,1),(2,2) + """ + qt_1 """select * from ${tableName} order by user_id""" + + + // version2 (1,10)(2,20) + sql """ INSERT INTO ${tableName} VALUES + (1,10),(2,20) + """ + qt_2 """select * from ${tableName} order by user_id""" + + + // version3 (1,100)(2,200) + sql """ INSERT INTO ${tableName} VALUES + (1,100),(2,200) + """ + qt_3 """select * from ${tableName} order by user_id""" + + + // version4 (1,100)(2,200)(3,300) + sql """ INSERT INTO ${tableName} VALUES + (3,300) + """ + qt_4 """select * from ${tableName} order by user_id""" + + + // version5 (1,100)(2,200)(3,100) + sql """update ${tableName} set value = 100 where user_id = 3""" + qt_5 """select * from ${tableName} order by user_id""" + + + // version6 (1,100)(2,200) + sql """delete from ${tableName} where user_id = 3""" + qt_6 """select * from ${tableName} order by user_id""" + + sql "SET skip_delete_predicate = true" + sql "SET skip_delete_sign = true" + sql "SET skip_delete_bitmap = true" + // show all hidden data + // (1,10)(1,100)(2,2)(2,20)(2,200)(3,300)(3,100) + qt_skip_delete """select * from ${tableName} order by user_id, value""" + + //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus + def tablets = sql_return_maparray """ show tablets from ${tableName}; """ + + def replicaNum = get_table_replica_num(tableName) + logger.info("get table replica num: " + replicaNum) + // before full compaction, there are 7 rowsets. + int rowsetCount = 0 + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + (code, out, err) = curl("GET", tablet.CompactionStatus) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List) tabletJson.rowsets).size() + } + assert (rowsetCount == 7 * replicaNum) + + // trigger full compactions for all tablets in ${tableName} + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + backend_id = tablet.BackendId + times = 1 + + do{ + (code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + ++times + sleep(2000) + } while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10) + + def compactJson = parseJson(out.trim()) + if (compactJson.status.toLowerCase() == "fail") { + assertEquals(disableAutoCompaction, false) + logger.info("Compaction was done automatically!") + } + if (disableAutoCompaction) { + assertEquals("success", compactJson.status.toLowerCase()) + } + } + + // wait for full compaction done + for (def tablet in tablets) { + boolean running = true + do { + Thread.sleep(1000) + String tablet_id = tablet.TabletId + backend_id = tablet.BackendId + (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + // after full compaction, there is only 1 rowset. + + rowsetCount = 0 + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + (code, out, err) = curl("GET", tablet.CompactionStatus) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List) tabletJson.rowsets).size() + } + def cloudMode = isCloudMode() + if (cloudMode) { + assert (rowsetCount == 2) + } else { + assert (rowsetCount == 1 * replicaNum) + } + + // make sure all hidden data has been deleted + // (1,100)(2,200) + qt_select_final """select * from ${tableName} order by user_id""" + + sql "SET skip_delete_predicate = false" + sql "SET skip_delete_sign = false" + sql "SET skip_delete_bitmap = false" + qt_select_final2 """select * from ${tableName} order by user_id""" + } finally { + // try_sql("DROP TABLE IF EXISTS ${tableName}") + } +} diff --git a/regression-test/suites/ddl_p0/test_create_table_generated_column/test_generated_column_nereids.groovy b/regression-test/suites/ddl_p0/test_create_table_generated_column/test_generated_column_nereids.groovy index 36fbf9d80aa663..b22b7d82ae34c8 100644 --- a/regression-test/suites/ddl_p0/test_create_table_generated_column/test_generated_column_nereids.groovy +++ b/regression-test/suites/ddl_p0/test_create_table_generated_column/test_generated_column_nereids.groovy @@ -134,15 +134,6 @@ suite("test_generated_column") { qt_describe "describe gencol_refer_gencol" - //test update - sql "drop table if exists test_gen_col_update" - sql """create table test_gen_col_update (a int, b int, c int as (a+b)) - unique key(a) - distributed by hash(a) properties("replication_num"="1")""" - sql "insert into test_gen_col_update values(1,3,default)" - qt_test_update "update test_gen_col_update set b=20" - qt_test_update_generated_column "select * from test_gen_col_update" - // test unique table, generated column is not key sql "drop table if exists test_gen_col_unique_key" qt_gen_col_unique_key """create table test_gen_col_unique_key(a int,b int,c int generated always as (abs(a+b)) not null) @@ -233,4 +224,17 @@ suite("test_generated_column") { PROPERTIES("replication_num" = "1");""" exception "The generated columns can be key columns, or value columns of replace and replace_if_not_null aggregation type." } + + //test update + sql "drop table if exists test_gen_col_update" + sql """create table test_gen_col_update (a int, b int, c int as (a+b)) + unique key(a) + distributed by hash(a) properties("replication_num"="1")""" + sql "insert into test_gen_col_update values(1,3,default)" + if (!isClusterKeyEnabled()) { + qt_test_update "update test_gen_col_update set b=20" + qt_test_update_generated_column "select * from test_gen_col_update" + } else { + // errCode = 2, detailMessage = The value specified for generated column 'c' in table 'test_gen_col_update' is not allowed + } } \ No newline at end of file diff --git a/regression-test/suites/unique_with_mow_c_p0/test_select.groovy b/regression-test/suites/unique_with_mow_c_p0/test_select.groovy new file mode 100644 index 00000000000000..8cf169e6272dd0 --- /dev/null +++ b/regression-test/suites/unique_with_mow_c_p0/test_select.groovy @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_select") { + sql """ DROP TABLE IF EXISTS test_select0 """ + sql """ DROP TABLE IF EXISTS test_select1 """ + + sql """ + create table test_select0 ( + pk int, + v1 char(255) null, + v2 varchar(255) not null, + v3 varchar(1000) not null + ) engine=olap + UNIQUE KEY(pk) + distributed by hash(pk) buckets 10 + properties("replication_num" = "1"); + """ + + sql """ + create table test_select1 ( + pk int, + v1 char(255) null, + v2 varchar(255) not null, + v3 varchar(1000) not null + ) engine=olap + UNIQUE KEY(pk) + cluster by(v2, v3) + distributed by hash(pk) buckets 10 + properties("replication_num" = "1"); + """ + + sql """ + insert into test_select0 values + (0,null,'a','1'), + (1,'r','0','9999-12-31 23:59:59'), + (2,'n','she','i'), + (3,'but','can','2024-08-03 13:08:30'); + """ + + sql """ + insert into test_select1 values + (0,null,'a','1'), + (1,'r','0','9999-12-31 23:59:59'), + (2,'n','she','i'), + (3,'but','can','2024-08-03 13:08:30'); + """ + + order_qt_sql0 """ + select v2 from test_select0 where v1 is not null ORDER BY v2 LIMIT 2 ; + """ + + order_qt_sql1 """ + select v2 from test_select1 where v1 is not null ORDER BY v2 LIMIT 2 ; + """ +}