From ffa6fef9e0f542632abf7eaaf748504b0c56f475 Mon Sep 17 00:00:00 2001 From: abmdocrt Date: Wed, 30 Oct 2024 17:48:23 +0800 Subject: [PATCH 1/2] [Enhancement](MS) Add fix tablet data size api for meta service (#41782) --- be/src/cloud/cloud_full_compaction.cpp | 4 + cloud/src/meta-service/meta_service.cpp | 50 ++++ cloud/src/meta-service/meta_service.h | 7 + cloud/src/meta-service/meta_service_http.cpp | 14 + .../meta_service_tablet_stats.cpp | 245 ++++++++++++++++++ .../meta-service/meta_service_tablet_stats.h | 15 ++ .../test_fix_tablet_stat_fault_injection.out | 13 + .../doris/regression/suite/Suite.groovy | 21 ++ ...est_fix_tablet_stat_fault_injection.groovy | 159 ++++++++++++ 9 files changed, 528 insertions(+) create mode 100644 regression-test/data/fault_injection_p0/test_fix_tablet_stat_fault_injection.out create mode 100644 regression-test/suites/fault_injection_p0/test_fix_tablet_stat_fault_injection.groovy diff --git a/be/src/cloud/cloud_full_compaction.cpp b/be/src/cloud/cloud_full_compaction.cpp index 2e11891045c250..f22c449223c448 100644 --- a/be/src/cloud/cloud_full_compaction.cpp +++ b/be/src/cloud/cloud_full_compaction.cpp @@ -195,6 +195,10 @@ Status CloudFullCompaction::modify_rowsets() { compaction_job->set_num_output_rows(_output_rowset->num_rows()); compaction_job->set_size_input_rowsets(_input_rowsets_size); compaction_job->set_size_output_rowsets(_output_rowset->data_disk_size()); + DBUG_EXECUTE_IF("CloudFullCompaction::modify_rowsets.wrong_compaction_data_size", { + compaction_job->set_size_input_rowsets(1); + compaction_job->set_size_output_rowsets(10000001); + }) compaction_job->set_num_input_segments(_input_segments); compaction_job->set_num_output_segments(_output_rowset->num_segments()); compaction_job->set_num_input_rowsets(_input_rowsets.size()); diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 107ca9c0447c13..5b8b1397a686a0 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -2188,4 +2188,54 @@ std::pair MetaServiceImpl::get_instance_info( return {code, std::move(msg)}; } +std::pair init_key_pair(std::string instance_id, int64_t table_id) { + std::string begin_key = stats_tablet_key({instance_id, table_id, 0, 0, 0}); + std::string end_key = stats_tablet_key({instance_id, table_id + 1, 0, 0, 0}); + return std::make_pair(begin_key, end_key); +} + +MetaServiceResponseStatus MetaServiceImpl::fix_tablet_stats(std::string cloud_unique_id_str, + std::string table_id_str) { + // parse params + int64_t table_id; + std::string instance_id; + MetaServiceResponseStatus st = parse_fix_tablet_stats_param( + resource_mgr_, table_id_str, cloud_unique_id_str, table_id, instance_id); + if (st.code() != MetaServiceCode::OK) { + return st; + } + + std::pair key_pair = init_key_pair(instance_id, table_id); + std::string old_begin_key; + while (old_begin_key < key_pair.first) { + // get tablet stats + std::vector> tablet_stat_shared_ptr_vec_batch; + old_begin_key = key_pair.first; + + // fix tablet stats + size_t retry = 0; + do { + st = fix_tablet_stats_internal(txn_kv_, key_pair, tablet_stat_shared_ptr_vec_batch, + instance_id); + if (st.code() != MetaServiceCode::OK) { + LOG_WARNING("failed to fix tablet stats") + .tag("err", st.msg()) + .tag("table id", table_id) + .tag("retry time", retry); + } + retry++; + } while (st.code() != MetaServiceCode::OK && retry < 3); + if (st.code() != MetaServiceCode::OK) { + return st; + } + + // Check tablet stats + st = check_new_tablet_stats(txn_kv_, instance_id, tablet_stat_shared_ptr_vec_batch); + if (st.code() != MetaServiceCode::OK) { + return st; + } + } + return st; +} + } // namespace doris::cloud diff --git a/cloud/src/meta-service/meta_service.h b/cloud/src/meta-service/meta_service.h index f60d795949b366..edc2c97a3eaca8 100644 --- a/cloud/src/meta-service/meta_service.h +++ b/cloud/src/meta-service/meta_service.h @@ -40,6 +40,10 @@ class Transaction; constexpr std::string_view BUILT_IN_STORAGE_VAULT_NAME = "built_in_storage_vault"; +void internal_get_rowset(Transaction* txn, int64_t start, int64_t end, + const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code, + std::string& msg, GetRowsetResponse* response); + class MetaServiceImpl : public cloud::MetaService { public: MetaServiceImpl(std::shared_ptr txn_kv, std::shared_ptr resource_mgr, @@ -298,6 +302,9 @@ class MetaServiceImpl : public cloud::MetaService { const std::string& cloud_unique_id, InstanceInfoPB* instance); + MetaServiceResponseStatus fix_tablet_stats(std::string cloud_unique_id_str, + std::string table_id_str); + private: std::pair alter_instance( const AlterInstanceRequest* request, diff --git a/cloud/src/meta-service/meta_service_http.cpp b/cloud/src/meta-service/meta_service_http.cpp index 9a9f6de97cc4dd..97d57e458ba299 100644 --- a/cloud/src/meta-service/meta_service_http.cpp +++ b/cloud/src/meta-service/meta_service_http.cpp @@ -468,6 +468,16 @@ static HttpResponse process_get_tablet_stats(MetaServiceImpl* service, brpc::Con return http_text_reply(resp.status(), body); } +static HttpResponse process_fix_tablet_stats(MetaServiceImpl* service, brpc::Controller* ctrl) { + auto& uri = ctrl->http_request().uri(); + std::string_view cloud_unique_id = http_query(uri, "cloud_unique_id"); + std::string_view table_id = http_query(uri, "table_id"); + + MetaServiceResponseStatus st = + service->fix_tablet_stats(std::string(cloud_unique_id), std::string(table_id)); + return http_text_reply(st, st.DebugString()); +} + static HttpResponse process_get_stage(MetaServiceImpl* service, brpc::Controller* ctrl) { GetStageRequest req; PARSE_MESSAGE_OR_RETURN(ctrl, req); @@ -575,6 +585,8 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller, {"get_value", process_get_value}, {"show_meta_ranges", process_show_meta_ranges}, {"txn_lazy_commit", process_txn_lazy_commit}, + {"injection_point", process_injection_point}, + {"fix_tablet_stats", process_fix_tablet_stats}, {"v1/decode_key", process_decode_key}, {"v1/encode_key", process_encode_key}, {"v1/get_value", process_get_value}, @@ -582,6 +594,8 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller, {"v1/txn_lazy_commit", process_txn_lazy_commit}, // for get {"get_instance", process_get_instance_info}, + // for get + {"get_instance", process_get_instance_info}, {"get_obj_store_info", process_get_obj_store_info}, {"get_cluster", process_get_cluster}, {"get_tablet_stats", process_get_tablet_stats}, diff --git a/cloud/src/meta-service/meta_service_tablet_stats.cpp b/cloud/src/meta-service/meta_service_tablet_stats.cpp index 501cecbab76d52..cecccbd67673ad 100644 --- a/cloud/src/meta-service/meta_service_tablet_stats.cpp +++ b/cloud/src/meta-service/meta_service_tablet_stats.cpp @@ -17,13 +17,22 @@ #include "meta-service/meta_service_tablet_stats.h" +#include #include +#include + +#include +#include +#include +#include #include "common/logging.h" #include "common/util.h" #include "meta-service/keys.h" +#include "meta-service/meta_service.h" #include "meta-service/meta_service_helper.h" #include "meta-service/txn_kv.h" +#include "meta-service/txn_kv_error.h" namespace doris::cloud { @@ -156,4 +165,240 @@ void internal_get_tablet_stats(MetaServiceCode& code, std::string& msg, Transact merge_tablet_stats(stats, detached_stats); } +MetaServiceResponseStatus parse_fix_tablet_stats_param( + std::shared_ptr resource_mgr, const std::string& table_id_str, + const std::string& cloud_unique_id_str, int64_t& table_id, std::string& instance_id) { + MetaServiceCode code = MetaServiceCode::OK; + std::string msg; + MetaServiceResponseStatus st; + st.set_code(MetaServiceCode::OK); + + // parse params + try { + table_id = std::stoll(table_id_str); + } catch (...) { + st.set_code(MetaServiceCode::INVALID_ARGUMENT); + st.set_msg("Invalid table_id, table_id: " + table_id_str); + return st; + } + + instance_id = get_instance_id(resource_mgr, cloud_unique_id_str); + if (instance_id.empty()) { + code = MetaServiceCode::INVALID_ARGUMENT; + msg = "empty instance_id"; + LOG(INFO) << msg << ", cloud_unique_id=" << cloud_unique_id_str; + st.set_code(code); + st.set_msg(msg); + return st; + } + return st; +} + +MetaServiceResponseStatus fix_tablet_stats_internal( + std::shared_ptr txn_kv, std::pair& key_pair, + std::vector>& tablet_stat_shared_ptr_vec_batch, + const std::string& instance_id, size_t batch_size) { + std::unique_ptr txn; + MetaServiceResponseStatus st; + st.set_code(MetaServiceCode::OK); + MetaServiceCode code = MetaServiceCode::OK; + std::unique_ptr it; + std::vector> tmp_tablet_stat_vec; + + TxnErrorCode err = txn_kv->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + st.set_code(cast_as(err)); + st.set_msg("failed to create txn"); + return st; + } + + // read tablet stats + err = txn->get(key_pair.first, key_pair.second, &it, true); + if (err != TxnErrorCode::TXN_OK) { + st.set_code(cast_as(err)); + st.set_msg(fmt::format("failed to get tablet stats, err={} ", err)); + return st; + } + + size_t tablet_cnt = 0; + while (it->has_next() && tablet_cnt < batch_size) { + auto [k, v] = it->next(); + key_pair.first = k; + auto k1 = k; + k1.remove_prefix(1); + std::vector, int, int>> out; + decode_key(&k1, &out); + + // 0x01 "stats" ${instance_id} "tablet" ${table_id} ${index_id} ${partition_id} ${tablet_id} -> TabletStatsPB + if (out.size() == 7) { + tablet_cnt++; + TabletStatsPB tablet_stat; + tablet_stat.ParseFromArray(v.data(), v.size()); + tmp_tablet_stat_vec.emplace_back(std::make_shared(tablet_stat)); + } + } + if (it->has_next()) { + key_pair.first = it->next().first; + } + + for (const auto& tablet_stat_ptr : tmp_tablet_stat_vec) { + GetRowsetResponse resp; + std::string msg; + // get rowsets in tablet and accumulate disk size + internal_get_rowset(txn.get(), 0, std::numeric_limits::max() - 1, instance_id, + tablet_stat_ptr->idx().tablet_id(), code, msg, &resp); + if (code != MetaServiceCode::OK) { + st.set_code(code); + st.set_msg(msg); + return st; + } + int64_t total_disk_size = 0; + for (const auto& rs_meta : resp.rowset_meta()) { + total_disk_size += rs_meta.total_disk_size(); + } + + // set new disk size to tabletPB and write it back + TabletStatsPB tablet_stat; + tablet_stat.CopyFrom(*tablet_stat_ptr); + tablet_stat.set_data_size(total_disk_size); + // record tablet stats batch + tablet_stat_shared_ptr_vec_batch.emplace_back(std::make_shared(tablet_stat)); + std::string tablet_stat_key; + std::string tablet_stat_value; + tablet_stat_key = stats_tablet_key( + {instance_id, tablet_stat.idx().table_id(), tablet_stat.idx().index_id(), + tablet_stat.idx().partition_id(), tablet_stat.idx().tablet_id()}); + if (!tablet_stat.SerializeToString(&tablet_stat_value)) { + st.set_code(MetaServiceCode::PROTOBUF_SERIALIZE_ERR); + st.set_msg("failed to serialize tablet stat"); + return st; + } + txn->put(tablet_stat_key, tablet_stat_value); + + // read num segs + // 0x01 "stats" ${instance_id} "tablet" ${table_id} ${index_id} ${partition_id} ${tablet_id} "num_segs" -> int64 + std::string tablet_stat_num_segs_key; + stats_tablet_num_segs_key( + {instance_id, tablet_stat_ptr->idx().table_id(), tablet_stat_ptr->idx().index_id(), + tablet_stat_ptr->idx().partition_id(), tablet_stat_ptr->idx().tablet_id()}, + &tablet_stat_num_segs_key); + int64_t tablet_stat_num_segs = 0; + std::string tablet_stat_num_segs_value(sizeof(tablet_stat_num_segs), '\0'); + err = txn->get(tablet_stat_num_segs_key, &tablet_stat_num_segs_value); + if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { + st.set_code(cast_as(err)); + } + if (tablet_stat_num_segs_value.size() != sizeof(tablet_stat_num_segs)) [[unlikely]] { + LOG(WARNING) << " malformed tablet stats value v.size=" + << tablet_stat_num_segs_value.size() + << " value=" << hex(tablet_stat_num_segs_value); + } + std::memcpy(&tablet_stat_num_segs, tablet_stat_num_segs_value.data(), + sizeof(tablet_stat_num_segs)); + if constexpr (std::endian::native == std::endian::big) { + tablet_stat_num_segs = bswap_64(tablet_stat_num_segs); + } + + if (tablet_stat_num_segs > 0) { + // set tablet stats data size = 0 + // 0x01 "stats" ${instance_id} "tablet" ${table_id} ${index_id} ${partition_id} ${tablet_id} "data_size" -> int64 + std::string tablet_stat_data_size_key; + stats_tablet_data_size_key( + {instance_id, tablet_stat.idx().table_id(), tablet_stat.idx().index_id(), + tablet_stat.idx().partition_id(), tablet_stat.idx().tablet_id()}, + &tablet_stat_data_size_key); + int64_t tablet_stat_data_size = 0; + std::string tablet_stat_data_size_value(sizeof(tablet_stat_data_size), '\0'); + memcpy(tablet_stat_data_size_value.data(), &tablet_stat_data_size, + sizeof(tablet_stat_data_size)); + txn->put(tablet_stat_data_size_key, tablet_stat_data_size_value); + } + } + + err = txn->commit(); + if (err != TxnErrorCode::TXN_OK) { + st.set_code(cast_as(err)); + st.set_msg("failed to commit txn"); + return st; + } + return st; +} + +MetaServiceResponseStatus check_new_tablet_stats( + std::shared_ptr txn_kv, const std::string& instance_id, + const std::vector>& tablet_stat_shared_ptr_vec_batch) { + std::unique_ptr txn; + MetaServiceResponseStatus st; + st.set_code(MetaServiceCode::OK); + + TxnErrorCode err = txn_kv->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + st.set_code(cast_as(err)); + st.set_msg("failed to create txn"); + return st; + } + + for (const auto& tablet_stat_ptr : tablet_stat_shared_ptr_vec_batch) { + // check tablet stats + std::string tablet_stat_key; + std::string tablet_stat_value; + tablet_stat_key = stats_tablet_key( + {instance_id, tablet_stat_ptr->idx().table_id(), tablet_stat_ptr->idx().index_id(), + tablet_stat_ptr->idx().partition_id(), tablet_stat_ptr->idx().tablet_id()}); + err = txn->get(tablet_stat_key, &tablet_stat_value); + if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { + st.set_code(cast_as(err)); + return st; + } + TabletStatsPB tablet_stat_check; + tablet_stat_check.ParseFromArray(tablet_stat_value.data(), tablet_stat_value.size()); + if (tablet_stat_check.DebugString() != tablet_stat_ptr->DebugString() && + // If anyone data size of tablet_stat_check and tablet_stat_ptr is twice bigger than another, + // we need to rewrite it this tablet_stat. + (tablet_stat_check.data_size() > 2 * tablet_stat_ptr->data_size() || + tablet_stat_ptr->data_size() > 2 * tablet_stat_check.data_size())) { + LOG_WARNING("[fix tablet stats]:tablet stats check failed") + .tag("tablet stat", tablet_stat_ptr->DebugString()) + .tag("check tabelt stat", tablet_stat_check.DebugString()); + } + + // check data size + std::string tablet_stat_data_size_key; + stats_tablet_data_size_key( + {instance_id, tablet_stat_ptr->idx().table_id(), tablet_stat_ptr->idx().index_id(), + tablet_stat_ptr->idx().partition_id(), tablet_stat_ptr->idx().tablet_id()}, + &tablet_stat_data_size_key); + int64_t tablet_stat_data_size = 0; + std::string tablet_stat_data_size_value(sizeof(tablet_stat_data_size), '\0'); + err = txn->get(tablet_stat_data_size_key, &tablet_stat_data_size_value); + if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { + st.set_code(cast_as(err)); + return st; + } + int64_t tablet_stat_data_size_check; + + if (tablet_stat_data_size_value.size() != sizeof(tablet_stat_data_size_check)) + [[unlikely]] { + LOG(WARNING) << " malformed tablet stats value v.size=" + << tablet_stat_data_size_value.size() + << " value=" << hex(tablet_stat_data_size_value); + } + std::memcpy(&tablet_stat_data_size_check, tablet_stat_data_size_value.data(), + sizeof(tablet_stat_data_size_check)); + if constexpr (std::endian::native == std::endian::big) { + tablet_stat_data_size_check = bswap_64(tablet_stat_data_size_check); + } + if (tablet_stat_data_size_check != tablet_stat_data_size && + // ditto + (tablet_stat_data_size_check > 2 * tablet_stat_data_size || + tablet_stat_data_size > 2 * tablet_stat_data_size_check)) { + LOG_WARNING("[fix tablet stats]:data size check failed") + .tag("data size", tablet_stat_data_size) + .tag("check data size", tablet_stat_data_size_check); + } + } + + return st; +} + } // namespace doris::cloud diff --git a/cloud/src/meta-service/meta_service_tablet_stats.h b/cloud/src/meta-service/meta_service_tablet_stats.h index 5726cf50b76652..a7aea5885a8e1a 100644 --- a/cloud/src/meta-service/meta_service_tablet_stats.h +++ b/cloud/src/meta-service/meta_service_tablet_stats.h @@ -19,6 +19,8 @@ #include +#include "resource-manager/resource_manager.h" + namespace doris::cloud { class Transaction; class RangeGetIterator; @@ -66,4 +68,17 @@ void internal_get_tablet_stats(MetaServiceCode& code, std::string& msg, Transact TabletStats& detached_stats); // clang-format on +MetaServiceResponseStatus parse_fix_tablet_stats_param( + std::shared_ptr resource_mgr, const std::string& table_id_str, + const std::string& cloud_unique_id_str, int64_t& table_id, std::string& instance_id); + +MetaServiceResponseStatus fix_tablet_stats_internal( + std::shared_ptr txn_kv, std::pair& key_pair, + std::vector>& tablet_stat_shared_ptr_vec_batch, + const std::string& instance_id, size_t batch_size = 20); + +MetaServiceResponseStatus check_new_tablet_stats( + std::shared_ptr txn_kv, const std::string& instance_id, + const std::vector>& tablet_stat_shared_ptr_vec_batch); + } // namespace doris::cloud diff --git a/regression-test/data/fault_injection_p0/test_fix_tablet_stat_fault_injection.out b/regression-test/data/fault_injection_p0/test_fix_tablet_stat_fault_injection.out new file mode 100644 index 00000000000000..a9db9fa716ed0d --- /dev/null +++ b/regression-test/data/fault_injection_p0/test_fix_tablet_stat_fault_injection.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_1 -- +test_fix_tablet_stat_fault_injection test_fix_tablet_stat_fault_injection 518.911 KB 1000 500 0.000 + Total 518.911 KB 1000 0.000 + +-- !select_2 -- +test_fix_tablet_stat_fault_injection test_fix_tablet_stat_fault_injection 9.314 GB 1000 100 0.000 + Total 9.314 GB 1000 0.000 + +-- !select_3 -- +test_fix_tablet_stat_fault_injection test_fix_tablet_stat_fault_injection 114.974 KB 1000 100 0.000 + Total 114.974 KB 1000 0.000 + diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index affbd19a388ee7..997c6c7c66fc4b 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -2413,6 +2413,27 @@ class Suite implements GroovyInterceptable { } } + def fix_tablet_stats = { table_id -> + def jsonOutput = new JsonOutput() + def map = [] + def js = jsonOutput.toJson(map) + log.info("fix tablet stat req: /MetaService/http/fix_tablet_stats?token=${token}&cloud_unique_id=${instance_id}&table_id=${table_id} ".toString()) + + def fix_tablet_stats_api = { request_body, check_func -> + httpTest { + endpoint context.config.metaServiceHttpAddress + uri "/MetaService/http/fix_tablet_stats?token=${token}&cloud_unique_id=${instance_id}&table_id=${table_id}" + body request_body + check check_func + } + } + + fix_tablet_stats_api.call(js) { + respCode, body -> + log.info("fix tablet stats resp: ${body} ${respCode}".toString()) + } + } + public void resetConnection() { context.resetConnection() } diff --git a/regression-test/suites/fault_injection_p0/test_fix_tablet_stat_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_fix_tablet_stat_fault_injection.groovy new file mode 100644 index 00000000000000..d96f6f0ec48cd3 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_fix_tablet_stat_fault_injection.groovy @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods +import org.apache.doris.regression.util.Http + +suite("test_fix_tablet_stat_fault_injection", "nonConcurrent") { + if(isCloudMode()){ + def tableName = "test_fix_tablet_stat_fault_injection" + def bucketSize = 10 + def partitionSize = 100 + def maxPartition = partitionSize + 1 + def create_table_sql = """ + CREATE TABLE IF NOT EXISTS ${tableName} + ( + `k1` INT NULL, + `v1` INT NULL, + `v2` INT NULL + ) + UNIQUE KEY (k1) + PARTITION BY RANGE(`k1`) + ( + FROM (1) TO (${maxPartition}) INTERVAL 1 + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS ${bucketSize} + PROPERTIES ( + "replication_num" = "1", + "disable_auto_compaction" = "true" + ); + """ + def insertData = { + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + try { + // enable debug point + GetDebugPoint().enableDebugPointForAllBEs("CloudFullCompaction::modify_rowsets.wrong_compaction_data_size") + // insert data + sql """ DROP TABLE IF EXISTS ${tableName} """ + + sql "${create_table_sql}" + (1..partitionSize).each { i -> + sql "insert into ${tableName} values (${i},1,1);" + sql "insert into ${tableName} values (${i},2,2);" + sql "insert into ${tableName} values (${i},3,3);" + sql "insert into ${tableName} values (${i},4,4);" + sql "insert into ${tableName} values (${i},5,5);" + } + + sql "select count(*) from ${tableName};" + sleep(60000) + qt_select_1 "show data from ${tableName};" + + // check rowsets num + def tablets = sql_return_maparray """ show tablets from ${tableName}; """ + // before full compaction, there are 6 rowsets. + int rowsetCount = 0 + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + (code, out, err) = curl("GET", tablet.CompactionStatus) + logger.info("Show tablets status after insert data: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List) tabletJson.rowsets).size() + } + assert (rowsetCount == 6 * bucketSize * partitionSize) + + // trigger full compactions for all tablets in ${tableName} + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + backend_id = tablet.BackendId + times = 1 + + do{ + (code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + ++times + } while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10) + + def compactJson = parseJson(out.trim()) + assertEquals("success", compactJson.status.toLowerCase()) + } + + // wait for full compaction done + for (def tablet in tablets) { + boolean running = true + do { + String tablet_id = tablet.TabletId + backend_id = tablet.BackendId + (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + sleep(60000) + // after full compaction, there are 2 rowsets. + rowsetCount = 0 + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + (code, out, err) = curl("GET", tablet.CompactionStatus) + logger.info("Show tablets status after full compaction: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List) tabletJson.rowsets).size() + } + // assert (rowsetCount == 2 * bucketSize * partitionSize) + + // data size should be very large + sql "select count(*) from ${tableName};" + qt_select_2 "show data from ${tableName};" + + + fix_tablet_stats(getTableId(tableName)) + + sleep(60000) + // after fix, there are 2 rowsets. + rowsetCount = 0 + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + (code, out, err) = curl("GET", tablet.CompactionStatus) + //logger.info("Show tablets status after fix stats: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List) tabletJson.rowsets).size() + } + // assert (rowsetCount == 2 * bucketSize * partitionSize) + // after fix table stats, data size should be normal + sql "select count(*) from ${tableName};" + qt_select_3 "show data from ${tableName};" + } finally { + //try_sql("DROP TABLE IF EXISTS ${tableName}") + GetDebugPoint().disableDebugPointForAllBEs("CloudFullCompaction::modify_rowsets.wrong_compaction_data_size") + } + } + insertData() + } +} + From d43d6122634dcf3c2660ebe279b531601aee5547 Mon Sep 17 00:00:00 2001 From: Yukang-Lian Date: Thu, 7 Nov 2024 23:03:17 +0800 Subject: [PATCH 2/2] 2 --- cloud/src/meta-service/meta_service_http.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cloud/src/meta-service/meta_service_http.cpp b/cloud/src/meta-service/meta_service_http.cpp index 97d57e458ba299..95ed5d614a35c3 100644 --- a/cloud/src/meta-service/meta_service_http.cpp +++ b/cloud/src/meta-service/meta_service_http.cpp @@ -585,7 +585,6 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller, {"get_value", process_get_value}, {"show_meta_ranges", process_show_meta_ranges}, {"txn_lazy_commit", process_txn_lazy_commit}, - {"injection_point", process_injection_point}, {"fix_tablet_stats", process_fix_tablet_stats}, {"v1/decode_key", process_decode_key}, {"v1/encode_key", process_encode_key},