From 08da3bae5e928e4f0355017cef895c9567feb089 Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Thu, 1 Aug 2024 11:15:56 +0800 Subject: [PATCH 01/15] tmp --- be/src/cloud/cloud_base_compaction.cpp | 28 ++++- be/src/cloud/cloud_cumulative_compaction.cpp | 41 +++++-- be/src/cloud/cloud_schema_change_job.cpp | 84 +++++++------- be/src/cloud/cloud_schema_change_job.h | 2 +- be/src/cloud/cloud_tablet.cpp | 3 +- be/src/cloud/cloud_tablet.h | 6 + cloud/src/meta-service/meta_service_job.cpp | 109 ++++++++++++++---- .../apache/doris/alter/CloudRollupJobV2.java | 9 ++ .../doris/alter/CloudSchemaChangeJobV2.java | 33 +++++- .../org/apache/doris/alter/RollupJobV2.java | 2 +- .../apache/doris/alter/SchemaChangeJobV2.java | 8 +- .../datasource/CloudInternalCatalog.java | 43 +++++++ .../doris/cloud/rpc/MetaServiceClient.java | 11 ++ .../doris/cloud/rpc/MetaServiceProxy.java | 10 ++ gensrc/proto/cloud.proto | 4 + 15 files changed, 311 insertions(+), 82 deletions(-) diff --git a/be/src/cloud/cloud_base_compaction.cpp b/be/src/cloud/cloud_base_compaction.cpp index 81c1d47b7461e5..1ff21d3e8466a2 100644 --- a/be/src/cloud/cloud_base_compaction.cpp +++ b/be/src/cloud/cloud_base_compaction.cpp @@ -29,6 +29,7 @@ #include "service/backend_options.h" #include "util/thread.h" #include "util/uuid_generator.h" +#include "vec/runtime/vdatetime_value.h" namespace doris { using namespace ErrorCode; @@ -82,13 +83,14 @@ Status CloudBaseCompaction::prepare_compact() { compaction_job->set_type(cloud::TabletCompactionJobPB::BASE); compaction_job->set_base_compaction_cnt(_base_compaction_cnt); compaction_job->set_cumulative_compaction_cnt(_cumulative_compaction_cnt); + compaction_job->add_input_versions(_input_rowsets.front()->start_version()); + compaction_job->add_input_versions(_input_rowsets.back()->end_version()); using namespace std::chrono; int64_t now = duration_cast(system_clock::now().time_since_epoch()).count(); _expiration = now + config::compaction_timeout_seconds; compaction_job->set_expiration(_expiration); compaction_job->set_lease(now + config::lease_compaction_interval_seconds * 4); cloud::StartTabletJobResponse resp; - //auto st = cloud::meta_mgr()->prepare_tablet_job(job, &resp); auto st = _engine.meta_mgr().prepare_tablet_job(job, &resp); if (!st.ok()) { if (resp.status().code() == cloud::STALE_TABLET_CACHE) { @@ -97,6 +99,18 @@ Status CloudBaseCompaction::prepare_compact() { } else if (resp.status().code() == cloud::TABLET_NOT_FOUND) { // tablet not found cloud_tablet()->clear_cache(); + } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { + (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + std::stringstream ss; + ss << "failed to prepare cumu compaction. Check compaction input versions " + "failed in schema change. " + "input_version_start=" + << compaction_job->input_versions(0) + << " input_version_end=" << compaction_job->input_versions(1) + << " schema_change_alter_version=" << resp.alter_version(); + std::string msg = ss.str(); + LOG(WARNING) << msg; + return Status::InternalError(msg); } return st; } @@ -314,6 +328,18 @@ Status CloudBaseCompaction::modify_rowsets() { if (!st.ok()) { if (resp.status().code() == cloud::TABLET_NOT_FOUND) { cloud_tablet()->clear_cache(); + } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { + (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + std::stringstream ss; + ss << "failed to prepare cumu compaction. Check compaction input versions " + "failed in schema change. " + "input_version_start=" + << compaction_job->input_versions(0) + << " input_version_end=" << compaction_job->input_versions(1) + << " schema_change_alter_version=" << resp.alter_version(); + std::string msg = ss.str(); + LOG(WARNING) << msg; + return Status::InternalError(msg); } return st; } diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index cc84dce1b58840..93c00f53c93627 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -48,7 +48,7 @@ CloudCumulativeCompaction::CloudCumulativeCompaction(CloudStorageEngine& engine, CloudCumulativeCompaction::~CloudCumulativeCompaction() = default; Status CloudCumulativeCompaction::prepare_compact() { - if (_tablet->tablet_state() != TABLET_RUNNING) { + if (_tablet->tablet_state() != TABLET_RUNNING && dynamic_cast(_tablet.get())->alter_version() == -1) { return Status::InternalError("invalid tablet state. tablet_id={}", _tablet->tablet_id()); } @@ -110,11 +110,11 @@ Status CloudCumulativeCompaction::prepare_compact() { _expiration = now + config::compaction_timeout_seconds; compaction_job->set_expiration(_expiration); compaction_job->set_lease(now + config::lease_compaction_interval_seconds * 4); - if (config::enable_parallel_cumu_compaction) { - // Set input version range to let meta-service judge version range conflict - compaction_job->add_input_versions(_input_rowsets.front()->start_version()); - compaction_job->add_input_versions(_input_rowsets.back()->end_version()); - } + + compaction_job->add_input_versions(_input_rowsets.front()->start_version()); + compaction_job->add_input_versions(_input_rowsets.back()->end_version()); + // Set input version range to let meta-service judge version range conflict + compaction_job->set_judge_input_versions_range(config::enable_parallel_cumu_compaction); cloud::StartTabletJobResponse resp; st = _engine.meta_mgr().prepare_tablet_job(job, &resp); if (!st.ok()) { @@ -141,6 +141,18 @@ Status CloudCumulativeCompaction::prepare_compact() { .tag("msg", resp.status().msg()); return Status::Error("no suitable versions"); } + } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { + (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + std::stringstream ss; + ss << "failed to prepare cumu compaction. Check compaction input versions " + "failed in schema change. " + "input_version_start=" + << compaction_job->input_versions(0) + << " input_version_end=" << compaction_job->input_versions(1) + << " schema_change_alter_version=" << resp.alter_version(); + std::string msg = ss.str(); + LOG(WARNING) << msg; + return Status::InternalError(msg); } return st; } @@ -259,6 +271,18 @@ Status CloudCumulativeCompaction::modify_rowsets() { if (!st.ok()) { if (resp.status().code() == cloud::TABLET_NOT_FOUND) { cloud_tablet()->clear_cache(); + } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { + (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + std::stringstream ss; + ss << "failed to prepare cumu compaction. Check compaction input versions " + "failed in schema change. " + "input_version_start=" + << compaction_job->input_versions(0) + << " input_version_end=" << compaction_job->input_versions(1) + << " schema_change_alter_version=" << resp.alter_version(); + std::string msg = ss.str(); + LOG(WARNING) << msg; + return Status::InternalError(msg); } return st; } @@ -344,8 +368,9 @@ Status CloudCumulativeCompaction::pick_rowsets_to_compact() { std::shared_lock rlock(_tablet->get_header_lock()); _base_compaction_cnt = cloud_tablet()->base_compaction_cnt(); _cumulative_compaction_cnt = cloud_tablet()->cumulative_compaction_cnt(); - int64_t candidate_version = - std::max(cloud_tablet()->cumulative_layer_point(), _max_conflict_version + 1); + int64_t candidate_version = std::max( + std::max(cloud_tablet()->cumulative_layer_point(), _max_conflict_version + 1), + cloud_tablet()->alter_version()); // Get all rowsets whose version >= `candidate_version` as candidate rowsets cloud_tablet()->traverse_rowsets( [&candidate_rowsets, candidate_version](const RowsetSharedPtr& rs) { diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index ed3e5f9433fcfd..53d04bc2641941 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -86,13 +86,47 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque RETURN_IF_ERROR(_base_tablet->sync_rowsets(request.alter_version)); // ATTN: Only convert rowsets of version larger than 1, MUST let the new tablet cache have rowset [0-1] _output_cumulative_point = _base_tablet->cumulative_layer_point(); - std::vector rs_splits; int64_t base_max_version = _base_tablet->max_version_unlocked(); + cloud::TabletJobInfoPB job; + auto* idx = job.mutable_idx(); + idx->set_tablet_id(_base_tablet->tablet_id()); + idx->set_table_id(_base_tablet->table_id()); + idx->set_index_id(_base_tablet->index_id()); + idx->set_partition_id(_base_tablet->partition_id()); + auto* sc_job = job.mutable_schema_change(); + sc_job->set_id(_job_id); + sc_job->set_initiator(BackendOptions::get_localhost() + ':' + + std::to_string(config::heartbeat_service_port)); + sc_job->set_alter_version(request.alter_version); + auto* new_tablet_idx = sc_job->mutable_new_tablet_idx(); + new_tablet_idx->set_tablet_id(_new_tablet->tablet_id()); + new_tablet_idx->set_table_id(_new_tablet->table_id()); + new_tablet_idx->set_index_id(_new_tablet->index_id()); + new_tablet_idx->set_partition_id(_new_tablet->partition_id()); + cloud::StartTabletJobResponse start_resp; + auto st = _cloud_storage_engine.meta_mgr().prepare_tablet_job(job, &start_resp); + if (!st.ok()) { + if (start_resp.status().code() == cloud::JOB_ALREADY_SUCCESS) { + st = _new_tablet->sync_rowsets(); + if (!st.ok()) { + LOG_WARNING("failed to sync new tablet") + .tag("tablet_id", _new_tablet->tablet_id()) + .error(st); + } + return Status::OK(); + } + return st; + } + LOG(INFO) << "lightman 0704 " << "start " << request.alter_version << " end " << start_resp.alter_version(); if (request.alter_version > 1) { // [0-1] is a placeholder rowset, no need to convert - RETURN_IF_ERROR(_base_tablet->capture_rs_readers({2, base_max_version}, &rs_splits, false)); + RETURN_IF_ERROR(_base_tablet->capture_rs_readers({2, start_resp.alter_version()}, + &rs_splits, false)); } + _new_tablet->set_alter_version(start_resp.alter_version()); + _base_tablet->set_alter_version(start_resp.alter_version()); + sc_job->set_alter_version(start_resp.alter_version()); // FIXME(cyx): Should trigger compaction on base_tablet if there are too many rowsets to convert. // Create a new tablet schema, should merge with dropped columns in light weight schema change @@ -156,7 +190,7 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque } sc_params.vault_id = request.storage_vault_id; if (!request.__isset.materialized_view_params) { - return _convert_historical_rowsets(sc_params); + return _convert_historical_rowsets(sc_params, job); } for (auto item : request.materialized_view_params) { AlterMaterializedViewParam mv_param; @@ -176,10 +210,11 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque std::make_pair(to_lower(item.column_name), mv_param)); } sc_params.enable_unique_key_merge_on_write = _new_tablet->enable_unique_key_merge_on_write(); - return _convert_historical_rowsets(sc_params); + return _convert_historical_rowsets(sc_params, job); } -Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParams& sc_params) { +Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParams& sc_params, + cloud::TabletJobInfoPB& job) { LOG(INFO) << "Begin to convert historical rowsets for new_tablet from base_tablet. base_tablet=" << _base_tablet->tablet_id() << ", new_tablet=" << _new_tablet->tablet_id() << ", job_id=" << _job_id; @@ -210,36 +245,6 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam changer, sc_sorting, _cloud_storage_engine.memory_limitation_bytes_per_thread_for_schema_change()); - cloud::TabletJobInfoPB job; - auto* idx = job.mutable_idx(); - idx->set_tablet_id(_base_tablet->tablet_id()); - idx->set_table_id(_base_tablet->table_id()); - idx->set_index_id(_base_tablet->index_id()); - idx->set_partition_id(_base_tablet->partition_id()); - auto* sc_job = job.mutable_schema_change(); - sc_job->set_id(_job_id); - sc_job->set_initiator(BackendOptions::get_localhost() + ':' + - std::to_string(config::heartbeat_service_port)); - auto* new_tablet_idx = sc_job->mutable_new_tablet_idx(); - new_tablet_idx->set_tablet_id(_new_tablet->tablet_id()); - new_tablet_idx->set_table_id(_new_tablet->table_id()); - new_tablet_idx->set_index_id(_new_tablet->index_id()); - new_tablet_idx->set_partition_id(_new_tablet->partition_id()); - cloud::StartTabletJobResponse start_resp; - auto st = _cloud_storage_engine.meta_mgr().prepare_tablet_job(job, &start_resp); - if (!st.ok()) { - if (start_resp.status().code() == cloud::JOB_ALREADY_SUCCESS) { - st = _new_tablet->sync_rowsets(); - if (!st.ok()) { - LOG_WARNING("failed to sync new tablet") - .tag("tablet_id", _new_tablet->tablet_id()) - .error(st); - } - return Status::OK(); - } - return st; - } - // 3. Convert historical data bool already_exist_any_version = false; for (const auto& rs_reader : sc_params.ref_rowset_readers) { @@ -317,10 +322,8 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam VLOG_TRACE << "Successfully convert a history version " << rs_reader->version(); } - - if (sc_params.ref_rowset_readers.empty()) { - sc_job->set_alter_version(1); // no rowset to convert implies alter_version == 1 - } else { + auto* sc_job = job.mutable_schema_change(); + if (!sc_params.ref_rowset_readers.empty()) { int64_t num_output_rows = 0; int64_t size_output_rowsets = 0; int64_t num_output_segments = 0; @@ -335,7 +338,6 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam sc_job->set_size_output_rowsets(size_output_rowsets); sc_job->set_num_output_segments(num_output_segments); sc_job->set_num_output_rowsets(_output_rowsets.size()); - sc_job->set_alter_version(_output_rowsets.back()->end_version()); } _output_cumulative_point = std::min(_output_cumulative_point, sc_job->alter_version() + 1); sc_job->set_output_cumulative_point(_output_cumulative_point); @@ -354,7 +356,7 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam } cloud::FinishTabletJobResponse finish_resp; - st = _cloud_storage_engine.meta_mgr().commit_tablet_job(job, &finish_resp); + auto st = _cloud_storage_engine.meta_mgr().commit_tablet_job(job, &finish_resp); if (!st.ok()) { if (finish_resp.status().code() == cloud::JOB_ALREADY_SUCCESS) { st = _new_tablet->sync_rowsets(); diff --git a/be/src/cloud/cloud_schema_change_job.h b/be/src/cloud/cloud_schema_change_job.h index d587111df717a3..9cedc2bcfdaf35 100644 --- a/be/src/cloud/cloud_schema_change_job.h +++ b/be/src/cloud/cloud_schema_change_job.h @@ -37,7 +37,7 @@ class CloudSchemaChangeJob { Status process_alter_tablet(const TAlterTabletReqV2& request); private: - Status _convert_historical_rowsets(const SchemaChangeParams& sc_params); + Status _convert_historical_rowsets(const SchemaChangeParams& sc_params, cloud::TabletJobInfoPB& job); Status _process_delete_bitmap(int64_t alter_version, int64_t start_calc_delete_bitmap_version, int64_t initiator); diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 17ec1fe22b0d85..71721d33509379 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -592,7 +592,8 @@ std::vector CloudTablet::pick_candidate_rowsets_to_base_compact { std::shared_lock rlock(_meta_lock); for (const auto& [version, rs] : _rs_version_map) { - if (version.first != 0 && version.first < _cumulative_point) { + if (version.first != 0 && version.first < _cumulative_point && + (_alter_version == -1 || version.first < _alter_version)) { candidate_rowsets.push_back(rs); } } diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h index 10ff1835e6c830..0ee92d7b78d4e1 100644 --- a/be/src/cloud/cloud_tablet.h +++ b/be/src/cloud/cloud_tablet.h @@ -145,6 +145,11 @@ class CloudTablet final : public BaseTablet { _last_base_compaction_schedule_millis = millis; } + int64_t alter_version() const { return _alter_version; } + void set_alter_version(int64_t alter_version) { + _alter_version = alter_version; + } + std::vector pick_candidate_rowsets_to_base_compaction(); inline Version max_version() const { @@ -238,6 +243,7 @@ class CloudTablet final : public BaseTablet { int64_t _cumulative_compaction_cnt = 0; int64_t _max_version = -1; int64_t _base_size = 0; + int64_t _alter_version = -1; std::mutex _base_compaction_lock; std::mutex _cumulative_compaction_lock; diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index 1886d4bdf53537..23687d580c3104 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -49,6 +49,18 @@ namespace doris::cloud { static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2; +bool check_compaction_input_verions(const TabletCompactionJobPB& compaction, + const TabletJobInfoPB& job_pb) { + DCHECK_EQ(compaction.input_versions_size(), 2) << proto_to_json(compaction); + DCHECK_LE(compaction.input_versions(0), compaction.input_versions(1)) + << proto_to_json(compaction); + int64_t alter_version = job_pb.schema_change().alter_version(); + return (compaction.type() == TabletCompactionJobPB_CompactionType_BASE && + compaction.input_versions(1) < alter_version) || + (compaction.type() == TabletCompactionJobPB_CompactionType_CUMULATIVE && + compaction.input_versions(0) > alter_version); +} + void start_compaction_job(MetaServiceCode& code, std::string& msg, std::stringstream& ss, std::unique_ptr& txn, const StartTabletJobRequest* request, StartTabletJobResponse* response, std::string& instance_id, @@ -124,6 +136,17 @@ void start_compaction_job(MetaServiceCode& code, std::string& msg, std::stringst } while (err == TxnErrorCode::TXN_OK) { job_pb.ParseFromString(job_val); + if (job_pb.has_schema_change() && !check_compaction_input_verions(compaction, job_pb)) { + SS << "Check compaction input versions failed in schema change. input_version_start=" + << compaction.input_versions(0) + << " input_version_end=" << compaction.input_versions(1) + << " schema_change_alter_version=" << job_pb.schema_change().alter_version(); + msg = ss.str(); + INSTANCE_LOG(INFO) << msg; + code = MetaServiceCode::JOB_CHECK_ALTER_VERSION_FAIL; + response->set_alter_version(job_pb.schema_change().alter_version()); + return; + } if (job_pb.compaction().empty()) { break; } @@ -153,7 +176,8 @@ void start_compaction_job(MetaServiceCode& code, std::string& msg, std::stringst // for MOW table, so priority should be given to performing full // compaction operations and canceling other types of compaction. compactions.Clear(); - } else if (compaction.input_versions().empty()) { + } else if (!compaction.has_judge_input_versions_range() || + !compaction.judge_input_versions_range()) { // Unknown input version range, doesn't support parallel compaction of same type for (auto& c : compactions) { if (c.type() != compaction.type() && c.type() != TabletCompactionJobPB::FULL) @@ -214,8 +238,8 @@ void start_compaction_job(MetaServiceCode& code, std::string& msg, std::stringst void start_schema_change_job(MetaServiceCode& code, std::string& msg, std::stringstream& ss, std::unique_ptr& txn, - const StartTabletJobRequest* request, std::string& instance_id, - bool& need_commit) { + const StartTabletJobRequest* request, StartTabletJobResponse* response, + std::string& instance_id, bool& need_commit) { auto& schema_change = request->job().schema_change(); if (!schema_change.has_id() || schema_change.id().empty()) { code = MetaServiceCode::INVALID_ARGUMENT; @@ -290,6 +314,13 @@ void start_schema_change_job(MetaServiceCode& code, std::string& msg, std::strin std::string job_val; TabletJobInfoPB job_pb; err = txn->get(job_key, &job_val); + if (err == TxnErrorCode::TXN_OK) { + job_pb.ParseFromString(job_val); + if (job_pb.has_schema_change() && job_pb.schema_change().id() == schema_change.id()) { + response->set_alter_version(job_pb.schema_change().alter_version()); + return; + } + } if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { SS << "failed to get tablet job, instance_id=" << instance_id << " tablet_id=" << tablet_id << " key=" << hex(job_key) << " err=" << err; @@ -314,6 +345,11 @@ void start_schema_change_job(MetaServiceCode& code, std::string& msg, std::strin } INSTANCE_LOG(INFO) << "schema_change job to save job=" << proto_to_json(schema_change); txn->put(job_key, job_val); + auto new_tablet_job_key = + job_tablet_key({instance_id, new_tablet_idx.table_id(), new_tablet_idx.index_id(), + new_tablet_idx.partition_id(), new_tablet_id}); + txn->put(new_tablet_job_key, job_val); + response->set_alter_version(job_pb.schema_change().alter_version()); need_commit = true; } @@ -386,7 +422,7 @@ void MetaServiceImpl::start_tablet_job(::google::protobuf::RpcController* contro } if (request->job().has_schema_change()) { - start_schema_change_job(code, msg, ss, txn, request, instance_id, need_commit); + start_schema_change_job(code, msg, ss, txn, request, response, instance_id, need_commit); return; } } @@ -558,30 +594,23 @@ void process_compaction_job(MetaServiceCode& code, std::string& msg, std::string return; } - //========================================================================== - // Lease - //========================================================================== - if (request->action() == FinishTabletJobRequest::LEASE) { - if (compaction.lease() <= 0 || recorded_compaction->lease() > compaction.lease()) { - ss << "invalid lease. recoreded_lease=" << recorded_compaction->lease() - << " req_lease=" << compaction.lease(); - msg = ss.str(); - code = MetaServiceCode::INVALID_ARGUMENT; - return; - } - recorded_compaction->set_lease(compaction.lease()); - auto job_val = recorded_job.SerializeAsString(); - txn->put(job_key, job_val); - INSTANCE_LOG(INFO) << "lease tablet compaction job, tablet_id=" << tablet_id - << " key=" << hex(job_key); - need_commit = true; - return; + bool abort_compaction = false; + if (recorded_job.has_schema_change() && + !check_compaction_input_verions(compaction, recorded_job)) { + SS << "Check compaction input versions failed in schema change. input_version_start=" + << compaction.input_versions(0) << " input_version_end=" << compaction.input_versions(1) + << " schema_change_alter_version=" << recorded_job.schema_change().alter_version(); + msg = ss.str(); + INSTANCE_LOG(INFO) << msg; + abort_compaction = true; + response->set_alter_version(recorded_job.schema_change().alter_version()); + code = MetaServiceCode::JOB_CHECK_ALTER_VERSION_FAIL; } //========================================================================== // Abort //========================================================================== - if (request->action() == FinishTabletJobRequest::ABORT) { + if (request->action() == FinishTabletJobRequest::ABORT || abort_compaction) { // TODO(gavin): mv tmp rowsets to recycle or remove them directly recorded_job.mutable_compaction()->erase(recorded_compaction); auto job_val = recorded_job.SerializeAsString(); @@ -597,6 +626,26 @@ void process_compaction_job(MetaServiceCode& code, std::string& msg, std::string return; } + //========================================================================== + // Lease + //========================================================================== + if (request->action() == FinishTabletJobRequest::LEASE) { + if (compaction.lease() <= 0 || recorded_compaction->lease() > compaction.lease()) { + ss << "invalid lease. recoreded_lease=" << recorded_compaction->lease() + << " req_lease=" << compaction.lease(); + msg = ss.str(); + code = MetaServiceCode::INVALID_ARGUMENT; + return; + } + recorded_compaction->set_lease(compaction.lease()); + auto job_val = recorded_job.SerializeAsString(); + txn->put(job_key, job_val); + INSTANCE_LOG(INFO) << "lease tablet compaction job, tablet_id=" << tablet_id + << " key=" << hex(job_key); + need_commit = true; + return; + } + //========================================================================== // Commit //========================================================================== @@ -891,6 +940,8 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str auto new_tablet_key = meta_tablet_key( {instance_id, new_table_id, new_index_id, new_partition_id, new_tablet_id}); + auto new_tablet_job_key = job_tablet_key( + {instance_id, new_table_id, new_index_id, new_partition_id, new_tablet_id}); std::string new_tablet_val; doris::TabletMetaCloudPB new_tablet_meta; TxnErrorCode err = txn->get(new_tablet_key, &new_tablet_val); @@ -966,6 +1017,15 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str //========================================================================== if (request->action() == FinishTabletJobRequest::ABORT) { // TODO(cyx) + // remove schema change + recorded_job.clear_schema_change(); + auto job_val = recorded_job.SerializeAsString(); + txn->put(job_key, job_val); + txn->remove(new_tablet_job_key); + INSTANCE_LOG(INFO) << "remove schema_change job tablet_id=" << tablet_id + << " key=" << hex(job_key); + + need_commit = true; return; } @@ -977,7 +1037,7 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str // 2. move rowsets [2-alter_version] in new_tablet to recycle // 3. update new_tablet stats // 4. change tmp rowset to formal rowset - // 5. remove schema_change job (unnecessary) + // 5. remove schema_change job // //========================================================================== // update tablet meta @@ -1126,6 +1186,7 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str recorded_job.clear_schema_change(); auto job_val = recorded_job.SerializeAsString(); txn->put(job_key, job_val); + txn->remove(new_tablet_job_key); INSTANCE_LOG(INFO) << "remove schema_change job tablet_id=" << tablet_id << " key=" << hex(job_key); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java index 1c31d74d98630b..c1982daa44d61d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java @@ -124,6 +124,15 @@ protected void onCancel() { try { ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) .dropMaterializedIndex(tableId, rollupIndexList, false); + for (Map.Entry> partitionEntry : partitionIdToBaseRollupTabletIdMap.entrySet()) { + Long partitionId = partitionEntry.getKey(); + Map rollupTabletIdToBaseTabletId = partitionEntry.getValue(); + for (Map.Entry tabletEntry : rollupTabletIdToBaseTabletId.entrySet()) { + Long baseTabletId = tabletEntry.getValue(); + ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) + .removeSchemaChangeJob(dbId, tableId, baseIndexId, partitionId, baseTabletId); + } + } break; } catch (Exception e) { LOG.warn("tryTimes:{}, onCancel exception:", tryTimes, e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java index a8bcc546de33e6..775f5b7894013e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java @@ -32,6 +32,7 @@ import org.apache.doris.cloud.proto.Cloud; import org.apache.doris.cloud.system.CloudSystemInfoService; import org.apache.doris.common.Config; +import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.proto.OlapFile; import org.apache.doris.qe.ConnectContext; @@ -41,6 +42,9 @@ import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.common.collect.HashBasedTable; +import com.google.common.collect.Table; +import com.google.common.collect.Table.Cell; import com.google.gson.annotations.SerializedName; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -49,8 +53,10 @@ import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; public class CloudSchemaChangeJobV2 extends SchemaChangeJobV2 { @@ -106,7 +112,7 @@ protected void commitShadowIndex() throws AlterCancelException { } @Override - protected void postProcessShadowIndex() { + protected void onCancel() { if (Config.enable_check_compatibility_mode) { LOG.info("skip drop shadown indexes in checking compatibility mode"); return; @@ -114,6 +120,31 @@ protected void postProcessShadowIndex() { List shadowIdxList = indexIdMap.keySet().stream().collect(Collectors.toList()); dropIndex(shadowIdxList); + + long tryTimes = 1; + while (true) { + try { + Set>> tableSet = partitionIndexTabletMap.cellSet(); + Iterator>> it = tableSet.iterator(); + while (it.hasNext()) { + Table.Cell> data = it.next(); + Long partitionId = data.getRowKey(); + Long shadowIndexId = data.getColumnKey(); + Long originIndexId = indexIdMap.get(shadowIndexId); + Map shadowTabletIdToOriginTabletId = data.getValue(); + for (Map.Entry entry : shadowTabletIdToOriginTabletId.entrySet()) { + Long originTabletId = entry.getValue(); + ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) + .removeSchemaChangeJob(dbId, tableId, originIndexId, partitionId, originTabletId); + } + } + break; + } catch (Exception e) { + LOG.warn("tryTimes:{}, onCancel exception:", tryTimes, e); + } + sleepSeveralSeconds(); + tryTimes++; + } } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java index e38c91d296fed4..d22b1bff9fc30e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java @@ -104,7 +104,7 @@ public class RollupJobV2 extends AlterJobV2 implements GsonPostProcessable { // partition id -> (rollup tablet id -> base tablet id) @SerializedName(value = "partitionIdToBaseRollupTabletIdMap") - private Map> partitionIdToBaseRollupTabletIdMap = Maps.newHashMap(); + protected Map> partitionIdToBaseRollupTabletIdMap = Maps.newHashMap(); @SerializedName(value = "partitionIdToRollupIndex") protected Map partitionIdToRollupIndex = Maps.newHashMap(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java index 527c8620c6cf77..94dd2421f843ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java @@ -94,7 +94,7 @@ public class SchemaChangeJobV2 extends AlterJobV2 { // partition id -> (shadow index id -> (shadow tablet id -> origin tablet id)) @SerializedName(value = "partitionIndexTabletMap") - private Table> partitionIndexTabletMap = HashBasedTable.create(); + protected Table> partitionIndexTabletMap = HashBasedTable.create(); // partition id -> (shadow index id -> shadow index)) @SerializedName(value = "partitionIndexMap") protected Table partitionIndexMap = HashBasedTable.create(); @@ -755,7 +755,7 @@ protected synchronized boolean cancelImpl(String errMsg) { changeTableState(dbId, tableId, OlapTableState.NORMAL); LOG.info("set table's state to NORMAL when cancel, table id: {}, job id: {}", tableId, jobId); - postProcessShadowIndex(); + onCancel(); return true; } @@ -903,7 +903,7 @@ private void replayRunningJob(SchemaChangeJobV2 replayedJob) { private void replayCancelled(SchemaChangeJobV2 replayedJob) { cancelInternal(); // try best to drop shadow index - postProcessShadowIndex(); + onCancel(); this.jobState = JobState.CANCELLED; this.finishedTimeMs = replayedJob.finishedTimeMs; this.errMsg = replayedJob.errMsg; @@ -1006,7 +1006,7 @@ private void changeTableState(long dbId, long tableId, OlapTableState olapTableS protected void commitShadowIndex() throws AlterCancelException {} // try best to drop shadow index, when job is cancelled in cloud mode - protected void postProcessShadowIndex() {} + protected void onCancel() {} // try best to drop origin index in cloud mode protected void postProcessOriginIndex() {} diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java index e9d4e3c33acedc..0badce2b9674fa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java @@ -845,6 +845,49 @@ private void dropCloudPartition(long dbId, long tableId, List partitionIds } } + public void removeSchemaChangeJob(long dbId, long tableId, long indexId, long partitionId, long tabletId) + throws DdlException { + Cloud.FinishTabletJobRequest.Builder finishTabletJobRequestBuilder = Cloud.FinishTabletJobRequest.newBuilder(); + finishTabletJobRequestBuilder.setCloudUniqueId(Config.cloud_unique_id); + finishTabletJobRequestBuilder.setAction(Cloud.FinishTabletJobRequest.Action.ABORT); + Cloud.TabletJobInfoPB.Builder tabletJobInfoPBBuilder = Cloud.TabletJobInfoPB.newBuilder(); + + Cloud.TabletIndexPB.Builder tabletIndexPBBuilder = Cloud.TabletIndexPB.newBuilder(); + tabletIndexPBBuilder.setDbId(dbId); + tabletIndexPBBuilder.setTableId(tableId); + tabletIndexPBBuilder.setIndexId(indexId); + tabletIndexPBBuilder.setPartitionId(partitionId); + tabletIndexPBBuilder.setTabletId(tabletId); + final Cloud.TabletIndexPB tabletIndex = tabletIndexPBBuilder.build(); + tabletJobInfoPBBuilder.setIdx(tabletIndex); + final Cloud.TabletJobInfoPB tabletJobInfoPB = tabletJobInfoPBBuilder.build(); + finishTabletJobRequestBuilder.setJob(tabletJobInfoPB); + + final Cloud.FinishTabletJobRequest request = finishTabletJobRequestBuilder.build(); + + Cloud.FinishTabletJobResponse response = null; + int tryTimes = 0; + while (tryTimes++ < Config.metaServiceRpcRetryTimes()) { + try { + response = MetaServiceProxy.getInstance().finishTabletJob(request); + if (response.getStatus().getCode() != Cloud.MetaServiceCode.KV_TXN_CONFLICT) { + break; + } + } catch (RpcException e) { + LOG.warn("tryTimes:{}, dropIndex RpcException", tryTimes, e); + if (tryTimes + 1 >= Config.metaServiceRpcRetryTimes()) { + throw new DdlException(e.getMessage()); + } + } + sleepSeveralMs(); + } + + if (response.getStatus().getCode() != Cloud.MetaServiceCode.OK) { + LOG.warn("dropIndex response: {} ", response); + throw new DdlException(response.getStatus().getMsg()); + } + } + public void dropMaterializedIndex(long tableId, List indexIds, boolean dropTable) throws DdlException { if (Config.enable_check_compatibility_mode) { LOG.info("skip dropping materialized index in compatibility checking mode"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceClient.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceClient.java index 5fd42a31f92459..1216d0a684692b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceClient.java @@ -382,4 +382,15 @@ public Cloud.GetInstanceResponse getInstance(Cloud.GetInstanceRequest request) { } return blockingStub.abortTxnWithCoordinator(request); } + + public Cloud.FinishTabletJobResponse + finishTabletJob(Cloud.FinishTabletJobRequest request) { + if (!request.hasCloudUniqueId()) { + Cloud.FinishTabletJobRequest.Builder builder = + Cloud.FinishTabletJobRequest.newBuilder(); + builder.mergeFrom(request); + return blockingStub.finishTabletJob(builder.setCloudUniqueId(Config.cloud_unique_id).build()); + } + return blockingStub.finishTabletJob(request); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java index 5f17692180b353..9f944f0ddf9cda 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java @@ -497,6 +497,16 @@ public Cloud.AlterObjStoreInfoResponse alterStorageVault(Cloud.AlterObjStoreInfo } } + public Cloud.FinishTabletJobResponse finishTabletJob(Cloud.FinishTabletJobRequest request) + throws RpcException { + try { + final MetaServiceClient client = getProxy(); + return client.finishTabletJob(request); + } catch (Exception e) { + throw new RpcException("", e.getMessage(), e); + } + } + public Cloud.GetRLTaskCommitAttachResponse getRLTaskCommitAttach(Cloud.GetRLTaskCommitAttachRequest request) throws RpcException { diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index 7d9c9c19169f40..2d1dc3bdadc9b0 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -536,6 +536,7 @@ message TabletCompactionJobPB { optional int64 lease = 23; // prepare optional int64 delete_bitmap_lock_initiator = 24; optional int64 full_compaction_cnt = 25; // prepare + optional bool judge_input_versions_range = 26; } message TabletSchemaChangeJobPB { @@ -1182,6 +1183,7 @@ message StartTabletJobRequest { message StartTabletJobResponse { optional MetaServiceResponseStatus status = 1; repeated int64 version_in_compaction = 2; + optional int64 alter_version = 3; } message FinishTabletJobRequest { @@ -1199,6 +1201,7 @@ message FinishTabletJobRequest { message FinishTabletJobResponse { optional MetaServiceResponseStatus status = 1; optional TabletStatsPB stats = 2; + optional int64 alter_version = 3; } message BeginCopyRequest { @@ -1337,6 +1340,7 @@ enum MetaServiceCode { JOB_ALREADY_SUCCESS = 5002; ROUTINE_LOAD_DATA_INCONSISTENT = 5003; ROUTINE_LOAD_PROGRESS_NOT_FOUND = 5004; + JOB_CHECK_ALTER_VERSION_FAIL = 5005; // Rate limit MAX_QPS_LIMIT = 6001; From 70873ccc43ec57e062b88be5cff52ae900a9e437 Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Thu, 4 Jul 2024 16:54:05 +0800 Subject: [PATCH 02/15] tmp --- be/src/cloud/cloud_schema_change_job.cpp | 3 ++- be/src/cloud/cloud_schema_change_job.h | 3 ++- be/src/cloud/cloud_tablet.h | 4 +--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index 53d04bc2641941..5761b820357b52 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -118,7 +118,8 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque } return st; } - LOG(INFO) << "lightman 0704 " << "start " << request.alter_version << " end " << start_resp.alter_version(); + LOG(INFO) << "lightman 0704 " + << "start " << request.alter_version << " end " << start_resp.alter_version(); if (request.alter_version > 1) { // [0-1] is a placeholder rowset, no need to convert RETURN_IF_ERROR(_base_tablet->capture_rs_readers({2, start_resp.alter_version()}, diff --git a/be/src/cloud/cloud_schema_change_job.h b/be/src/cloud/cloud_schema_change_job.h index 9cedc2bcfdaf35..c77aae4857049d 100644 --- a/be/src/cloud/cloud_schema_change_job.h +++ b/be/src/cloud/cloud_schema_change_job.h @@ -37,7 +37,8 @@ class CloudSchemaChangeJob { Status process_alter_tablet(const TAlterTabletReqV2& request); private: - Status _convert_historical_rowsets(const SchemaChangeParams& sc_params, cloud::TabletJobInfoPB& job); + Status _convert_historical_rowsets(const SchemaChangeParams& sc_params, + cloud::TabletJobInfoPB& job); Status _process_delete_bitmap(int64_t alter_version, int64_t start_calc_delete_bitmap_version, int64_t initiator); diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h index 0ee92d7b78d4e1..2bd1ce475028ab 100644 --- a/be/src/cloud/cloud_tablet.h +++ b/be/src/cloud/cloud_tablet.h @@ -146,9 +146,7 @@ class CloudTablet final : public BaseTablet { } int64_t alter_version() const { return _alter_version; } - void set_alter_version(int64_t alter_version) { - _alter_version = alter_version; - } + void set_alter_version(int64_t alter_version) { _alter_version = alter_version; } std::vector pick_candidate_rowsets_to_base_compaction(); From 96a9d9e0b0a055a4124828f8ff42892db126719d Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Thu, 4 Jul 2024 16:56:22 +0800 Subject: [PATCH 03/15] tmp --- be/src/cloud/cloud_cumulative_compaction.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 93c00f53c93627..cb6a4877425b4b 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -48,7 +48,8 @@ CloudCumulativeCompaction::CloudCumulativeCompaction(CloudStorageEngine& engine, CloudCumulativeCompaction::~CloudCumulativeCompaction() = default; Status CloudCumulativeCompaction::prepare_compact() { - if (_tablet->tablet_state() != TABLET_RUNNING && dynamic_cast(_tablet.get())->alter_version() == -1) { + if (_tablet->tablet_state() != TABLET_RUNNING && + dynamic_cast(_tablet.get())->alter_version() == -1) { return Status::InternalError("invalid tablet state. tablet_id={}", _tablet->tablet_id()); } From 4fb89c46c069a3d6981288ce5b87affb7118e444 Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Thu, 4 Jul 2024 19:24:55 +0800 Subject: [PATCH 04/15] tmp --- be/src/cloud/cloud_schema_change_job.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index 5761b820357b52..35eb08d216c31e 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -118,8 +118,6 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque } return st; } - LOG(INFO) << "lightman 0704 " - << "start " << request.alter_version << " end " << start_resp.alter_version(); if (request.alter_version > 1) { // [0-1] is a placeholder rowset, no need to convert RETURN_IF_ERROR(_base_tablet->capture_rs_readers({2, start_resp.alter_version()}, From 8b44116e199ef2d07ee7ff4bf63f0f48737acdf7 Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Thu, 4 Jul 2024 20:18:24 +0800 Subject: [PATCH 05/15] tmp --- cloud/src/meta-service/meta_service_job.cpp | 3 ++- cloud/test/meta_service_job_test.cpp | 1 + .../java/org/apache/doris/alter/CloudSchemaChangeJobV2.java | 2 -- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index 23687d580c3104..5dbcf4d6b929db 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -316,7 +316,8 @@ void start_schema_change_job(MetaServiceCode& code, std::string& msg, std::strin err = txn->get(job_key, &job_val); if (err == TxnErrorCode::TXN_OK) { job_pb.ParseFromString(job_val); - if (job_pb.has_schema_change() && job_pb.schema_change().id() == schema_change.id()) { + if (job_pb.has_schema_change() && job_pb.schema_change().id() == schema_change.id() && + job_pb.schema_change().initiator() == schema_change.initiator()) { response->set_alter_version(job_pb.schema_change().alter_version()); return; } diff --git a/cloud/test/meta_service_job_test.cpp b/cloud/test/meta_service_job_test.cpp index 250cf43ea98684..4f6213ab8b47b6 100644 --- a/cloud/test/meta_service_job_test.cpp +++ b/cloud/test/meta_service_job_test.cpp @@ -63,6 +63,7 @@ void start_compaction_job(MetaService* meta_service, int64_t tablet_id, const st if (input_version.first > 0 && input_version.second > 0) { compaction->add_input_versions(input_version.first); compaction->add_input_versions(input_version.second); + compaction->set_judge_input_versions_range(true); } meta_service->start_tablet_job(&cntl, &req, &res, nullptr); }; diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java index 775f5b7894013e..7023f35d3b03f4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java @@ -32,7 +32,6 @@ import org.apache.doris.cloud.proto.Cloud; import org.apache.doris.cloud.system.CloudSystemInfoService; import org.apache.doris.common.Config; -import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.proto.OlapFile; import org.apache.doris.qe.ConnectContext; @@ -42,7 +41,6 @@ import com.google.common.base.Preconditions; import com.google.common.base.Strings; -import com.google.common.collect.HashBasedTable; import com.google.common.collect.Table; import com.google.common.collect.Table.Cell; import com.google.gson.annotations.SerializedName; From c00ff70c286c2705e2100bfe404f262f60888cea Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Fri, 12 Jul 2024 11:37:09 +0800 Subject: [PATCH 06/15] tmp --- be/src/cloud/cloud_cumulative_compaction.cpp | 5 +++-- be/src/cloud/cloud_schema_change_job.cpp | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index cb6a4877425b4b..4d65598183b755 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -173,7 +173,8 @@ Status CloudCumulativeCompaction::prepare_compact() { .tag("tablet_max_version", cloud_tablet()->max_version_unlocked()) .tag("cumulative_point", cloud_tablet()->cumulative_layer_point()) .tag("num_rowsets", cloud_tablet()->fetch_add_approximate_num_rowsets(0)) - .tag("cumu_num_rowsets", cloud_tablet()->fetch_add_approximate_cumu_num_rowsets(0)); + .tag("cumu_num_rowsets", cloud_tablet()->fetch_add_approximate_cumu_num_rowsets(0)) + .tag("lightman 0711 alter_version", cloud_tablet()->alter_version()); return st; } @@ -371,7 +372,7 @@ Status CloudCumulativeCompaction::pick_rowsets_to_compact() { _cumulative_compaction_cnt = cloud_tablet()->cumulative_compaction_cnt(); int64_t candidate_version = std::max( std::max(cloud_tablet()->cumulative_layer_point(), _max_conflict_version + 1), - cloud_tablet()->alter_version()); + cloud_tablet()->alter_version() + 1); // Get all rowsets whose version >= `candidate_version` as candidate rowsets cloud_tablet()->traverse_rowsets( [&candidate_rowsets, candidate_version](const RowsetSharedPtr& rs) { diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index 35eb08d216c31e..89a3276bee5eba 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -19,7 +19,9 @@ #include +#include #include +#include #include "cloud/cloud_meta_mgr.h" #include "cloud/cloud_tablet_mgr.h" @@ -34,6 +36,7 @@ #include "olap/tablet_fwd.h" #include "olap/tablet_meta.h" #include "service/backend_options.h" +#include "util/debug_points.h" namespace doris { using namespace ErrorCode; @@ -59,10 +62,9 @@ CloudSchemaChangeJob::CloudSchemaChangeJob(CloudStorageEngine& cloud_storage_eng CloudSchemaChangeJob::~CloudSchemaChangeJob() = default; Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& request) { - LOG(INFO) << "Begin to alter tablet. base_tablet_id=" << request.base_tablet_id - << ", new_tablet_id=" << request.new_tablet_id - << ", alter_version=" << request.alter_version << ", job_id=" << _job_id; + DBUG_EXECUTE_IF("SchemaChangeJob.process_alter_tablet.sleep", + { std::this_thread::sleep_for(std::chrono::seconds(600)); }); // new tablet has to exist _new_tablet = DORIS_TRY(_cloud_storage_engine.tablet_mgr().get_tablet(request.new_tablet_id)); if (_new_tablet->tablet_state() == TABLET_RUNNING) { @@ -123,8 +125,15 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque RETURN_IF_ERROR(_base_tablet->capture_rs_readers({2, start_resp.alter_version()}, &rs_splits, false)); } + Defer defer {[&]() { + _new_tablet->set_alter_version(-1); + _base_tablet->set_alter_version(-1); + }}; _new_tablet->set_alter_version(start_resp.alter_version()); _base_tablet->set_alter_version(start_resp.alter_version()); + LOG(INFO) << "Begin to alter tablet. base_tablet_id=" << request.base_tablet_id + << ", new_tablet_id=" << request.new_tablet_id + << ", alter_version=" << start_resp.alter_version() << ", job_id=" << _job_id; sc_job->set_alter_version(start_resp.alter_version()); // FIXME(cyx): Should trigger compaction on base_tablet if there are too many rowsets to convert. From 99c6ef9c02fa5e72c9e93e545680781aa5f9ecef Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Fri, 12 Jul 2024 11:37:48 +0800 Subject: [PATCH 07/15] tmp --- be/src/olap/schema_change.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 599d9c1d1423ca..76a12a1b5871ec 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include From 95644c34f27d6e7faf644611a2d25281e6ac23e5 Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Mon, 15 Jul 2024 19:26:56 +0800 Subject: [PATCH 08/15] tmp --- be/src/cloud/cloud_cumulative_compaction.cpp | 3 +-- be/src/cloud/cloud_schema_change_job.cpp | 11 ++++++----- be/src/cloud/cloud_storage_engine.cpp | 14 +++++++------- be/src/cloud/cloud_tablet.cpp | 3 +-- be/src/cloud/cloud_tablet.h | 2 -- cloud/src/meta-service/meta_service_job.cpp | 2 +- 6 files changed, 16 insertions(+), 19 deletions(-) diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 4d65598183b755..2f092c27323a9e 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -173,8 +173,7 @@ Status CloudCumulativeCompaction::prepare_compact() { .tag("tablet_max_version", cloud_tablet()->max_version_unlocked()) .tag("cumulative_point", cloud_tablet()->cumulative_layer_point()) .tag("num_rowsets", cloud_tablet()->fetch_add_approximate_num_rowsets(0)) - .tag("cumu_num_rowsets", cloud_tablet()->fetch_add_approximate_cumu_num_rowsets(0)) - .tag("lightman 0711 alter_version", cloud_tablet()->alter_version()); + .tag("cumu_num_rowsets", cloud_tablet()->fetch_add_approximate_cumu_num_rowsets(0)); return st; } diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index 89a3276bee5eba..80428123cae8a9 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -63,8 +63,6 @@ CloudSchemaChangeJob::~CloudSchemaChangeJob() = default; Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& request) { - DBUG_EXECUTE_IF("SchemaChangeJob.process_alter_tablet.sleep", - { std::this_thread::sleep_for(std::chrono::seconds(600)); }); // new tablet has to exist _new_tablet = DORIS_TRY(_cloud_storage_engine.tablet_mgr().get_tablet(request.new_tablet_id)); if (_new_tablet->tablet_state() == TABLET_RUNNING) { @@ -100,7 +98,7 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque sc_job->set_id(_job_id); sc_job->set_initiator(BackendOptions::get_localhost() + ':' + std::to_string(config::heartbeat_service_port)); - sc_job->set_alter_version(request.alter_version); + sc_job->set_alter_version(base_max_version); auto* new_tablet_idx = sc_job->mutable_new_tablet_idx(); new_tablet_idx->set_tablet_id(_new_tablet->tablet_id()); new_tablet_idx->set_table_id(_new_tablet->table_id()); @@ -135,6 +133,9 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque << ", new_tablet_id=" << request.new_tablet_id << ", alter_version=" << start_resp.alter_version() << ", job_id=" << _job_id; sc_job->set_alter_version(start_resp.alter_version()); + + DBUG_EXECUTE_IF("SchemaChangeJob.process_alter_tablet.sleep", + { std::this_thread::sleep_for(std::chrono::seconds(120)); }); // FIXME(cyx): Should trigger compaction on base_tablet if there are too many rowsets to convert. // Create a new tablet schema, should merge with dropped columns in light weight schema change @@ -152,7 +153,7 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque delete_predicates.push_back(rs_meta); } } - RETURN_IF_ERROR(delete_handler.init(_base_tablet_schema, delete_predicates, base_max_version)); + RETURN_IF_ERROR(delete_handler.init(_base_tablet_schema, delete_predicates, start_resp.alter_version())); std::vector return_columns; return_columns.resize(_base_tablet_schema->num_columns()); @@ -169,7 +170,7 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque reader_context.is_unique = _base_tablet->keys_type() == UNIQUE_KEYS; reader_context.batch_size = ALTER_TABLE_BATCH_SIZE; reader_context.delete_bitmap = &_base_tablet->tablet_meta()->delete_bitmap(); - reader_context.version = Version(0, base_max_version); + reader_context.version = Version(0, start_resp.alter_version()); for (auto& split : rs_splits) { RETURN_IF_ERROR(split.rs_reader->init(&reader_context)); diff --git a/be/src/cloud/cloud_storage_engine.cpp b/be/src/cloud/cloud_storage_engine.cpp index de4bbac7b3ef6c..2c3dc87fb81c5e 100644 --- a/be/src/cloud/cloud_storage_engine.cpp +++ b/be/src/cloud/cloud_storage_engine.cpp @@ -550,21 +550,21 @@ std::vector CloudStorageEngine::_generate_cloud_compaction_task std::function filter_out; if (compaction_type == CompactionType::BASE_COMPACTION) { filter_out = [&submitted_base_compactions, &submitted_full_compactions](CloudTablet* t) { - return !!submitted_base_compactions.count(t->tablet_id()) || - !!submitted_full_compactions.count(t->tablet_id()) || + return submitted_base_compactions.contains(t->tablet_id()) || + submitted_full_compactions.contains(t->tablet_id()) || t->tablet_state() != TABLET_RUNNING; }; } else if (config::enable_parallel_cumu_compaction) { filter_out = [&tablet_preparing_cumu_compaction](CloudTablet* t) { - return !!tablet_preparing_cumu_compaction.count(t->tablet_id()) || - t->tablet_state() != TABLET_RUNNING; + return tablet_preparing_cumu_compaction.contains(t->tablet_id()) || + (t->tablet_state() != TABLET_RUNNING && t->alter_version() == -1); }; } else { filter_out = [&tablet_preparing_cumu_compaction, &submitted_cumu_compactions](CloudTablet* t) { - return !!tablet_preparing_cumu_compaction.count(t->tablet_id()) || - !!submitted_cumu_compactions.count(t->tablet_id()) || - t->tablet_state() != TABLET_RUNNING; + return tablet_preparing_cumu_compaction.contains(t->tablet_id()) || + submitted_cumu_compactions.contains(t->tablet_id()) || + (t->tablet_state() != TABLET_RUNNING && t->alter_version() == -1); }; } diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 71721d33509379..1c2bb30f183bf9 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -110,7 +110,6 @@ Status CloudTablet::capture_rs_readers(const Version& spec_version, // There are only two tablet_states RUNNING and NOT_READY in cloud mode // This function will erase the tablet from `CloudTabletMgr` when it can't find this tablet in MS. Status CloudTablet::sync_rowsets(int64_t query_version, bool warmup_delta_data) { - RETURN_IF_ERROR(sync_if_not_running()); if (query_version > 0) { std::shared_lock rlock(_meta_lock); @@ -593,7 +592,7 @@ std::vector CloudTablet::pick_candidate_rowsets_to_base_compact std::shared_lock rlock(_meta_lock); for (const auto& [version, rs] : _rs_version_map) { if (version.first != 0 && version.first < _cumulative_point && - (_alter_version == -1 || version.first < _alter_version)) { + (_alter_version == -1 || version.second <= _alter_version)) { candidate_rowsets.push_back(rs); } } diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h index 2bd1ce475028ab..43f2a3adafe721 100644 --- a/be/src/cloud/cloud_tablet.h +++ b/be/src/cloud/cloud_tablet.h @@ -206,8 +206,6 @@ class CloudTablet final : public BaseTablet { static void recycle_cached_data(const std::vector& rowsets); - Status sync_if_not_running(); - CloudStorageEngine& _engine; // this mutex MUST ONLY be used when sync meta diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index 5dbcf4d6b929db..9ddca143e96215 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -56,7 +56,7 @@ bool check_compaction_input_verions(const TabletCompactionJobPB& compaction, << proto_to_json(compaction); int64_t alter_version = job_pb.schema_change().alter_version(); return (compaction.type() == TabletCompactionJobPB_CompactionType_BASE && - compaction.input_versions(1) < alter_version) || + compaction.input_versions(1) <= alter_version) || (compaction.type() == TabletCompactionJobPB_CompactionType_CUMULATIVE && compaction.input_versions(0) > alter_version); } From 87d429d9024a1ca31e110194b88ca31875cf8d87 Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Mon, 22 Jul 2024 16:16:27 +0800 Subject: [PATCH 09/15] tmp --- be/src/cloud/cloud_schema_change_job.cpp | 3 +- cloud/src/meta-service/meta_service_job.cpp | 2 +- .../datasource/CloudInternalCatalog.java | 1 - .../plugins/plugin_curl_requester.groovy | 4 + ...test_schema_change_with_compaction1.groovy | 257 +++++++++++++++++ ...test_schema_change_with_compaction3.groovy | 214 ++++++++++++++ ...test_schema_change_with_compaction5.groovy | 194 +++++++++++++ ...test_schema_change_with_compaction6.groovy | 194 +++++++++++++ ...test_schema_change_with_compaction7.groovy | 259 +++++++++++++++++ ...test_schema_change_with_compaction9.groovy | 262 ++++++++++++++++++ 10 files changed, 1386 insertions(+), 4 deletions(-) create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction1/test_schema_change_with_compaction1.groovy create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction2/test_schema_change_with_compaction3.groovy create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction3/test_schema_change_with_compaction5.groovy create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction4/test_schema_change_with_compaction6.groovy create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction7.groovy create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction9.groovy diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index 80428123cae8a9..0134ac24affa14 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -134,8 +134,6 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque << ", alter_version=" << start_resp.alter_version() << ", job_id=" << _job_id; sc_job->set_alter_version(start_resp.alter_version()); - DBUG_EXECUTE_IF("SchemaChangeJob.process_alter_tablet.sleep", - { std::this_thread::sleep_for(std::chrono::seconds(120)); }); // FIXME(cyx): Should trigger compaction on base_tablet if there are too many rowsets to convert. // Create a new tablet schema, should merge with dropped columns in light weight schema change @@ -351,6 +349,7 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam _output_cumulative_point = std::min(_output_cumulative_point, sc_job->alter_version() + 1); sc_job->set_output_cumulative_point(_output_cumulative_point); + DBUG_EXECUTE_IF("CloudSchemaChangeJob.process_alter_tablet.sleep", DBUG_BLOCK); // process delete bitmap if the table is MOW if (_new_tablet->enable_unique_key_merge_on_write()) { int64_t initiator = boost::uuids::hash_value(UUIDGenerator::instance()->next_uuid()) & diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index 9ddca143e96215..76e74b162b29d1 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -596,7 +596,7 @@ void process_compaction_job(MetaServiceCode& code, std::string& msg, std::string } bool abort_compaction = false; - if (recorded_job.has_schema_change() && + if (recorded_job.has_schema_change() && request->action() == FinishTabletJobRequest::COMMIT && !check_compaction_input_verions(compaction, recorded_job)) { SS << "Check compaction input versions failed in schema change. input_version_start=" << compaction.input_versions(0) << " input_version_end=" << compaction.input_versions(1) diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java index 0badce2b9674fa..8563d1aafdff6a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java @@ -884,7 +884,6 @@ public void removeSchemaChangeJob(long dbId, long tableId, long indexId, long pa if (response.getStatus().getCode() != Cloud.MetaServiceCode.OK) { LOG.warn("dropIndex response: {} ", response); - throw new DdlException(response.getStatus().getMsg()); } } diff --git a/regression-test/plugins/plugin_curl_requester.groovy b/regression-test/plugins/plugin_curl_requester.groovy index 1e097a045a76d5..4be7f4b2e0ff6a 100644 --- a/regression-test/plugins/plugin_curl_requester.groovy +++ b/regression-test/plugins/plugin_curl_requester.groovy @@ -173,6 +173,10 @@ Suite.metaClass.be_get_overall_compaction_status{ String ip, String port /* par return curl("GET", String.format("http://%s:%s/api/compaction/run_status", ip, port)) } +Suite.metaClass.be_show_tablet_status{ String ip, String port, String tablet_id /* param */-> + return curl("GET", String.format("http://%s:%s/api/compaction/show?tablet_id=%s", ip, port, tablet_id)) +} + logger.info("Added 'be_get_compaction_status' function to Suite") Suite.metaClass._be_run_compaction = { String ip, String port, String tablet_id, String compact_type -> diff --git a/regression-test/suites/cloud_p0/schema_change/compaction1/test_schema_change_with_compaction1.groovy b/regression-test/suites/cloud_p0/schema_change/compaction1/test_schema_change_with_compaction1.groovy new file mode 100644 index 00000000000000..93bb6254303301 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction1/test_schema_change_with_compaction1.groovy @@ -0,0 +1,257 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Most of the cases are copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases +// and modified by Doris. + +// Note: To filter out tables from sql files, use the following one-liner comamnd +// sed -nr 's/.*tables: (.*)$/\1/gp' /path/to/*.sql | sed -nr 's/,/\n/gp' | sort | uniq + +import org.apache.doris.regression.util.DebugPoint + +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction1', 'nonConcurrent') { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + + DebugPoint.enableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + for (int i = 0; i < array.size(); i++) { + tabletId = array[i].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + } + + for (int i = 0; i < array.size(); i++) { + running = true + do { + Thread.sleep(100) + tabletId = array[i].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + } finally { + if (injectBe != null) { + DebugPoint.disableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "FINISHED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 23004); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + for (int i = 0; i < 3; i++) { + load_date_once("date"); + } + + sql """ select count(*) from date """ + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + // wait for all compactions done + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-16]")) + } + +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/compaction2/test_schema_change_with_compaction3.groovy b/regression-test/suites/cloud_p0/schema_change/compaction2/test_schema_change_with_compaction3.groovy new file mode 100644 index 00000000000000..c8ca8a54109824 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction2/test_schema_change_with_compaction3.groovy @@ -0,0 +1,214 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Most of the cases are copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases +// and modified by Doris. + +// Note: To filter out tables from sql files, use the following one-liner comamnd +// sed -nr 's/.*tables: (.*)$/\1/gp' /path/to/*.sql | sed -nr 's/,/\n/gp' | sort | uniq + +import org.apache.doris.regression.util.DebugPoint + +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction2', 'nonConcurrent') { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + + sleep(1000) + + DebugPoint.enableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + } finally { + if (injectBe != null) { + DebugPoint.disableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 20448); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-12]")) + } + +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/compaction3/test_schema_change_with_compaction5.groovy b/regression-test/suites/cloud_p0/schema_change/compaction3/test_schema_change_with_compaction5.groovy new file mode 100644 index 00000000000000..b2aab9f2dc7c84 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction3/test_schema_change_with_compaction5.groovy @@ -0,0 +1,194 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Most of the cases are copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases +// and modified by Doris. + +// Note: To filter out tables from sql files, use the following one-liner comamnd +// sed -nr 's/.*tables: (.*)$/\1/gp' /path/to/*.sql | sed -nr 's/,/\n/gp' | sort | uniq + +import org.apache.doris.regression.util.DebugPoint + +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction3', 'nonConcurrent') { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + + DebugPoint.enableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + for (int i = 0; i < array.size(); i++) { + tabletId = array[i].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + } + + for (int i = 0; i < array.size(); i++) { + running = true + do { + Thread.sleep(100) + tabletId = array[i].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + } finally { + sql """ CANCEL ALTER TABLE COLUMN FROM date """ + if (injectBe != null) { + DebugPoint.disableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "CANCELLED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 23004); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + } + +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/compaction4/test_schema_change_with_compaction6.groovy b/regression-test/suites/cloud_p0/schema_change/compaction4/test_schema_change_with_compaction6.groovy new file mode 100644 index 00000000000000..4b53dbdd998104 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction4/test_schema_change_with_compaction6.groovy @@ -0,0 +1,194 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Most of the cases are copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases +// and modified by Doris. + +// Note: To filter out tables from sql files, use the following one-liner comamnd +// sed -nr 's/.*tables: (.*)$/\1/gp' /path/to/*.sql | sed -nr 's/,/\n/gp' | sort | uniq + +import org.apache.doris.regression.util.DebugPoint + +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction4', 'nonConcurrent') { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE MATERIALIZED VIEW WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][8] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + + DebugPoint.enableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + sql " CREATE MATERIALIZED VIEW date_view as select d_datekey, sum(d_daynuminweek) from date group by d_datekey;" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + for (int i = 0; i < array.size(); i++) { + tabletId = array[i].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + } + + for (int i = 0; i < array.size(); i++) { + running = true + do { + Thread.sleep(100) + tabletId = array[i].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + } finally { + sql """ CANCEL ALTER TABLE MATERIALIZED VIEW FROM date """ + if (injectBe != null) { + DebugPoint.disableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "CANCELLED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 23004); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + } + +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction7.groovy b/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction7.groovy new file mode 100644 index 00000000000000..c338dac907b245 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction7.groovy @@ -0,0 +1,259 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions +import org.apache.http.NoHttpResponseException +import org.apache.doris.regression.util.DebugPoint +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction5', 'nonConcurrent') { + def options = new ClusterOptions() + options.cloudMode = true + options.enableDebugPoints() + options.beConfigs += [ "enable_java_support=false" ] + options.beConfigs += [ "disable_auto_compaction=true" ] + options.beNum = 1 + docker(options) { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + GetDebugPoint().enableDebugPointForAllBEs(injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + for (int i = 0; i < array.size(); i++) { + tabletId = array[i].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + } + + for (int i = 0; i < array.size(); i++) { + running = true + do { + Thread.sleep(100) + tabletId = array[i].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + cluster.restartFrontends() + sleep(30000) + context.reconnectFe() + } finally { + if (injectBe != null) { + GetDebugPoint().disableDebugPointForAllBEs(injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "FINISHED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 23004); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + for (int i = 0; i < 3; i++) { + load_date_once("date"); + } + + sql """ select count(*) from date """ + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + // wait for all compactions done + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-16]")) + } + } +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction9.groovy b/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction9.groovy new file mode 100644 index 00000000000000..245dbe46b714c6 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction9.groovy @@ -0,0 +1,262 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions +import org.apache.http.NoHttpResponseException +import org.apache.doris.regression.util.DebugPoint +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction6', 'nonConcurrent') { + def options = new ClusterOptions() + options.cloudMode = true + options.enableDebugPoints() + options.beConfigs += [ "enable_java_support=false" ] + options.beConfigs += [ "disable_auto_compaction=true" ] + options.beNum = 1 + docker(options) { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + GetDebugPoint().enableDebugPointForAllBEs(injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + + cluster.restartBackends() + GetDebugPoint().enableDebugPointForAllBEs(injectName) + sleep(30000) + + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + for (int i = 0; i < array.size(); i++) { + tabletId = array[i].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + } + + for (int i = 0; i < array.size(); i++) { + running = true + do { + Thread.sleep(100) + tabletId = array[i].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + } finally { + if (injectBe != null) { + GetDebugPoint().disableDebugPointForAllBEs(injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "FINISHED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 23004); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + for (int i = 0; i < 3; i++) { + load_date_once("date"); + } + + sql """ select count(*) from date """ + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + // wait for all compactions done + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-16]")) + } + } +} \ No newline at end of file From 7e53248b2ab37a06d2fce82b825ed8ae750992cc Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Mon, 22 Jul 2024 16:22:49 +0800 Subject: [PATCH 10/15] tmp --- be/src/cloud/cloud_schema_change_job.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index 0134ac24affa14..9a9ce5ae8f0c20 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -62,7 +62,6 @@ CloudSchemaChangeJob::CloudSchemaChangeJob(CloudStorageEngine& cloud_storage_eng CloudSchemaChangeJob::~CloudSchemaChangeJob() = default; Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& request) { - // new tablet has to exist _new_tablet = DORIS_TRY(_cloud_storage_engine.tablet_mgr().get_tablet(request.new_tablet_id)); if (_new_tablet->tablet_state() == TABLET_RUNNING) { @@ -151,7 +150,8 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque delete_predicates.push_back(rs_meta); } } - RETURN_IF_ERROR(delete_handler.init(_base_tablet_schema, delete_predicates, start_resp.alter_version())); + RETURN_IF_ERROR(delete_handler.init(_base_tablet_schema, delete_predicates, + start_resp.alter_version())); std::vector return_columns; return_columns.resize(_base_tablet_schema->num_columns()); From 681a627545b038f1a303ab1ff42bebc2e1cfd75c Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Mon, 22 Jul 2024 16:24:43 +0800 Subject: [PATCH 11/15] tmp --- be/src/cloud/cloud_tablet.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 1c2bb30f183bf9..9aab40f013ead8 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -110,7 +110,6 @@ Status CloudTablet::capture_rs_readers(const Version& spec_version, // There are only two tablet_states RUNNING and NOT_READY in cloud mode // This function will erase the tablet from `CloudTabletMgr` when it can't find this tablet in MS. Status CloudTablet::sync_rowsets(int64_t query_version, bool warmup_delta_data) { - if (query_version > 0) { std::shared_lock rlock(_meta_lock); if (_max_version >= query_version) { From 6a608c1ea83aa70c6d064e06de23ca83ecf867a4 Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Wed, 24 Jul 2024 20:32:45 +0800 Subject: [PATCH 12/15] tmp --- be/src/cloud/cloud_tablet.cpp | 52 ----------------------------------- 1 file changed, 52 deletions(-) diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 9aab40f013ead8..213cfa4d927470 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -146,58 +146,6 @@ TabletSchemaSPtr CloudTablet::merged_tablet_schema() const { return target_schema; } -// Sync tablet meta and all rowset meta if not running. -// This could happen when BE didn't finish schema change job and another BE committed this schema change job. -// It should be a quite rare situation. -Status CloudTablet::sync_if_not_running() { - if (tablet_state() == TABLET_RUNNING) { - return Status::OK(); - } - - // Serially execute sync to reduce unnecessary network overhead - std::lock_guard lock(_sync_meta_lock); - - { - std::shared_lock rlock(_meta_lock); - if (tablet_state() == TABLET_RUNNING) { - return Status::OK(); - } - } - - TabletMetaSharedPtr tablet_meta; - auto st = _engine.meta_mgr().get_tablet_meta(tablet_id(), &tablet_meta); - if (!st.ok()) { - if (st.is()) { - clear_cache(); - } - return st; - } - - if (tablet_meta->tablet_state() != TABLET_RUNNING) [[unlikely]] { - // MoW may go to here when load while schema change - return Status::Error("invalid tablet state {}. tablet_id={}", - tablet_meta->tablet_state(), tablet_id()); - } - - TimestampedVersionTracker empty_tracker; - { - std::lock_guard wlock(_meta_lock); - RETURN_IF_ERROR(set_tablet_state(TABLET_RUNNING)); - _rs_version_map.clear(); - _stale_rs_version_map.clear(); - std::swap(_timestamped_version_tracker, empty_tracker); - _tablet_meta->clear_rowsets(); - _tablet_meta->clear_stale_rowset(); - _max_version = -1; - } - - st = _engine.meta_mgr().sync_tablet_rowsets(this); - if (st.is()) { - clear_cache(); - } - return st; -} - void CloudTablet::add_rowsets(std::vector to_add, bool version_overlap, std::unique_lock& meta_lock, bool warmup_delta_data) { From e17b4178d686276bdc3c9e0843a4a86126bc73fb Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Sun, 28 Jul 2024 18:08:56 +0800 Subject: [PATCH 13/15] tmp --- be/src/cloud/cloud_base_compaction.cpp | 16 +- be/src/cloud/cloud_cumulative_compaction.cpp | 6 +- .../cloud_engine_calc_delete_bitmap_task.cpp | 18 +- be/src/cloud/cloud_meta_mgr.cpp | 3 +- be/src/cloud/cloud_tablet.cpp | 53 ++++ be/src/cloud/cloud_tablet.h | 2 + be/src/cloud/config.cpp | 2 + be/src/cloud/config.h | 1 + be/src/olap/base_tablet.cpp | 2 +- be/src/olap/schema_change.cpp | 1 - cloud/src/meta-service/meta_service_job.cpp | 84 ++++-- cloud/test/meta_service_job_test.cpp | 261 +++++++++++++++-- .../apache/doris/alter/CloudRollupJobV2.java | 7 +- .../doris/alter/CloudSchemaChangeJobV2.java | 7 +- .../datasource/CloudInternalCatalog.java | 25 +- gensrc/proto/cloud.proto | 2 +- ...est_schema_change_with_compaction10.groovy | 262 ++++++++++++++++++ ...est_schema_change_with_compaction2.groovy} | 0 ...est_schema_change_with_compaction3.groovy} | 0 ...est_schema_change_with_compaction4.groovy} | 0 ...est_schema_change_with_compaction5.groovy} | 0 ...est_schema_change_with_compaction6.groovy} | 0 ...test_schema_change_with_compaction7.groovy | 256 +++++++++++++++++ ...test_schema_change_with_compaction8.groovy | 214 ++++++++++++++ ...test_schema_change_with_compaction9.groovy | 259 +++++++++++++++++ .../schema_change/ddl/date_create.sql | 23 ++ .../schema_change/ddl/date_delete.sql | 1 + .../cloud_p0/schema_change/ddl/date_load.sql | 6 + .../schema_change/ddl/date_unique_create.sql | 26 ++ 29 files changed, 1472 insertions(+), 65 deletions(-) create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy rename regression-test/suites/cloud_p0/schema_change/compaction2/{test_schema_change_with_compaction3.groovy => test_schema_change_with_compaction2.groovy} (100%) rename regression-test/suites/cloud_p0/schema_change/compaction3/{test_schema_change_with_compaction5.groovy => test_schema_change_with_compaction3.groovy} (100%) rename regression-test/suites/cloud_p0/schema_change/compaction4/{test_schema_change_with_compaction6.groovy => test_schema_change_with_compaction4.groovy} (100%) rename regression-test/suites/cloud_p0/schema_change/compaction5/{test_schema_change_with_compaction7.groovy => test_schema_change_with_compaction5.groovy} (100%) rename regression-test/suites/cloud_p0/schema_change/compaction6/{test_schema_change_with_compaction9.groovy => test_schema_change_with_compaction6.groovy} (100%) create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction7/test_schema_change_with_compaction7.groovy create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction8/test_schema_change_with_compaction8.groovy create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy create mode 100644 regression-test/suites/cloud_p0/schema_change/ddl/date_create.sql create mode 100644 regression-test/suites/cloud_p0/schema_change/ddl/date_delete.sql create mode 100644 regression-test/suites/cloud_p0/schema_change/ddl/date_load.sql create mode 100644 regression-test/suites/cloud_p0/schema_change/ddl/date_unique_create.sql diff --git a/be/src/cloud/cloud_base_compaction.cpp b/be/src/cloud/cloud_base_compaction.cpp index 1ff21d3e8466a2..76a0a18d8020ad 100644 --- a/be/src/cloud/cloud_base_compaction.cpp +++ b/be/src/cloud/cloud_base_compaction.cpp @@ -100,16 +100,20 @@ Status CloudBaseCompaction::prepare_compact() { // tablet not found cloud_tablet()->clear_cache(); } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { - (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + auto* cloud_tablet = (static_cast(_tablet.get())); std::stringstream ss; ss << "failed to prepare cumu compaction. Check compaction input versions " - "failed in schema change. " + "failed in schema change. The input version end must " + "less than or equal to alter_version." + "current alter version in BE is not correct." "input_version_start=" << compaction_job->input_versions(0) << " input_version_end=" << compaction_job->input_versions(1) + << " current alter_version=" << cloud_tablet->alter_version() << " schema_change_alter_version=" << resp.alter_version(); std::string msg = ss.str(); LOG(WARNING) << msg; + cloud_tablet->set_alter_version(resp.alter_version()); return Status::InternalError(msg); } return st; @@ -329,16 +333,20 @@ Status CloudBaseCompaction::modify_rowsets() { if (resp.status().code() == cloud::TABLET_NOT_FOUND) { cloud_tablet()->clear_cache(); } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { - (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + auto* cloud_tablet = (static_cast(_tablet.get())); std::stringstream ss; ss << "failed to prepare cumu compaction. Check compaction input versions " - "failed in schema change. " + "failed in schema change. The input version end must " + "less than or equal to alter_version." + "current alter version in BE is not correct." "input_version_start=" << compaction_job->input_versions(0) << " input_version_end=" << compaction_job->input_versions(1) + << " current alter_version=" << cloud_tablet->alter_version() << " schema_change_alter_version=" << resp.alter_version(); std::string msg = ss.str(); LOG(WARNING) << msg; + cloud_tablet->set_alter_version(resp.alter_version()); return Status::InternalError(msg); } return st; diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 2f092c27323a9e..0ca8a504a887f1 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -48,7 +48,7 @@ CloudCumulativeCompaction::CloudCumulativeCompaction(CloudStorageEngine& engine, CloudCumulativeCompaction::~CloudCumulativeCompaction() = default; Status CloudCumulativeCompaction::prepare_compact() { - if (_tablet->tablet_state() != TABLET_RUNNING && + if (_tablet->tablet_state() != TABLET_RUNNING && config::enable_new_tablet_do_compaction && dynamic_cast(_tablet.get())->alter_version() == -1) { return Status::InternalError("invalid tablet state. tablet_id={}", _tablet->tablet_id()); } @@ -114,8 +114,8 @@ Status CloudCumulativeCompaction::prepare_compact() { compaction_job->add_input_versions(_input_rowsets.front()->start_version()); compaction_job->add_input_versions(_input_rowsets.back()->end_version()); - // Set input version range to let meta-service judge version range conflict - compaction_job->set_judge_input_versions_range(config::enable_parallel_cumu_compaction); + // Set input version range to let meta-service check version range conflict + compaction_job->set_check_input_versions_range(config::enable_parallel_cumu_compaction); cloud::StartTabletJobResponse resp; st = _engine.meta_mgr().prepare_tablet_job(job, &resp); if (!st.ok()) { diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp index 22f6689ff23782..b8acdb6bd38b9e 100644 --- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp +++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp @@ -154,20 +154,22 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const { }; if (_version != max_version + 1 || should_sync_rowsets_produced_by_compaction()) { auto sync_st = tablet->sync_rowsets(); - if (sync_st.is()) [[unlikely]] { - _engine_calc_delete_bitmap_task->add_succ_tablet_id(_tablet_id); - LOG(INFO) << "tablet is under alter process, delete bitmap will be calculated later, " - "tablet_id: " - << _tablet_id << " txn_id: " << _transaction_id - << ", request_version=" << _version; - return sync_st; - } if (!sync_st.ok()) { LOG(WARNING) << "failed to sync rowsets. tablet_id=" << _tablet_id << ", txn_id=" << _transaction_id << ", status=" << sync_st; _engine_calc_delete_bitmap_task->add_error_tablet_id(_tablet_id, sync_st); return sync_st; } + if (tablet->tablet_state() != TABLET_RUNNING) [[unlikely]] { + _engine_calc_delete_bitmap_task->add_succ_tablet_id(_tablet_id); + LOG(INFO) << "tablet is under alter process, delete bitmap will be calculated later, " + "tablet_id: " + << _tablet_id << " txn_id: " << _transaction_id + << ", request_version=" << _version; + return Status::Error( + "invalid tablet state {}. tablet_id={}", tablet->tablet_state(), + tablet->tablet_id()); + } } auto sync_rowset_time_us = MonotonicMicros() - t2; max_version = tablet->max_version_unlocked(); diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index e743ea9b12c8ce..ad1487917b109a 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -448,7 +448,8 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_ int64_t now = duration_cast(system_clock::now().time_since_epoch()).count(); tablet->last_sync_time_s = now; - if (tablet->enable_unique_key_merge_on_write()) { + if (tablet->enable_unique_key_merge_on_write() && + tablet->tablet_state() == TABLET_RUNNING) { DeleteBitmap delete_bitmap(tablet_id); int64_t old_max_version = req.start_version() - 1; auto st = sync_tablet_delete_bitmap(tablet, old_max_version, resp.rowset_meta(), diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 213cfa4d927470..67fd9dd0a8df5c 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -110,6 +110,8 @@ Status CloudTablet::capture_rs_readers(const Version& spec_version, // There are only two tablet_states RUNNING and NOT_READY in cloud mode // This function will erase the tablet from `CloudTabletMgr` when it can't find this tablet in MS. Status CloudTablet::sync_rowsets(int64_t query_version, bool warmup_delta_data) { + RETURN_IF_ERROR(sync_if_not_running()); + if (query_version > 0) { std::shared_lock rlock(_meta_lock); if (_max_version >= query_version) { @@ -133,6 +135,57 @@ Status CloudTablet::sync_rowsets(int64_t query_version, bool warmup_delta_data) return st; } +// Sync tablet meta and all rowset meta if not running. +// This could happen when BE didn't finish schema change job and another BE committed this schema change job. +// It should be a quite rare situation. +Status CloudTablet::sync_if_not_running() { + if (tablet_state() == TABLET_RUNNING) { + return Status::OK(); + } + + // Serially execute sync to reduce unnecessary network overhead + std::lock_guard lock(_sync_meta_lock); + + { + std::shared_lock rlock(_meta_lock); + if (tablet_state() == TABLET_RUNNING) { + return Status::OK(); + } + } + + TabletMetaSharedPtr tablet_meta; + auto st = _engine.meta_mgr().get_tablet_meta(tablet_id(), &tablet_meta); + if (!st.ok()) { + if (st.is()) { + clear_cache(); + } + return st; + } + + if (tablet_meta->tablet_state() != TABLET_RUNNING) [[unlikely]] { + // MoW may go to here when load while schema change + return Status::OK(); + } + + TimestampedVersionTracker empty_tracker; + { + std::lock_guard wlock(_meta_lock); + RETURN_IF_ERROR(set_tablet_state(TABLET_RUNNING)); + _rs_version_map.clear(); + _stale_rs_version_map.clear(); + std::swap(_timestamped_version_tracker, empty_tracker); + _tablet_meta->clear_rowsets(); + _tablet_meta->clear_stale_rowset(); + _max_version = -1; + } + + st = _engine.meta_mgr().sync_tablet_rowsets(this); + if (st.is()) { + clear_cache(); + } + return st; +} + TabletSchemaSPtr CloudTablet::merged_tablet_schema() const { std::shared_lock rdlock(_meta_lock); TabletSchemaSPtr target_schema; diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h index 43f2a3adafe721..2bd1ce475028ab 100644 --- a/be/src/cloud/cloud_tablet.h +++ b/be/src/cloud/cloud_tablet.h @@ -206,6 +206,8 @@ class CloudTablet final : public BaseTablet { static void recycle_cached_data(const std::vector& rowsets); + Status sync_if_not_running(); + CloudStorageEngine& _engine; // this mutex MUST ONLY be used when sync meta diff --git a/be/src/cloud/config.cpp b/be/src/cloud/config.cpp index 80522759b84b44..67897f527032a8 100644 --- a/be/src/cloud/config.cpp +++ b/be/src/cloud/config.cpp @@ -59,4 +59,6 @@ DEFINE_mBool(save_load_error_log_to_s3, "false"); DEFINE_mInt32(sync_load_for_tablets_thread, "32"); +DEFINE_mBool(enable_new_tablet_do_compaction, "true"); + } // namespace doris::config diff --git a/be/src/cloud/config.h b/be/src/cloud/config.h index bf041ba0fa6fc5..4a5b3e0e16a208 100644 --- a/be/src/cloud/config.h +++ b/be/src/cloud/config.h @@ -65,6 +65,7 @@ DECLARE_mInt32(tablet_sync_interval_s); // Cloud compaction config DECLARE_mInt64(min_compaction_failure_interval_ms); +DECLARE_mBool(enable_new_tablet_do_compaction); // For cloud read/write separate mode DECLARE_mInt64(base_compaction_freeze_interval_s); DECLARE_mInt64(cu_compaction_freeze_interval_s); diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 141e302af8c420..efc6db6debf3b2 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -1471,7 +1471,7 @@ Status BaseTablet::update_delete_bitmap_without_lock( << ", rnd:" << rnd << ", percent: " << percent; } }); - int64_t cur_version = rowset->end_version(); + int64_t cur_version = rowset->start_version(); std::vector segments; RETURN_IF_ERROR(std::dynamic_pointer_cast(rowset)->load_segments(&segments)); diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 76a12a1b5871ec..599d9c1d1423ca 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index 76e74b162b29d1..e702ec2283a5bf 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -49,11 +49,22 @@ namespace doris::cloud { static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2; +// check compaction input_versions are valid during schema change. +// If the schema change job doesnt have alter version, it dont need to check +// because the schema change job is come from old version BE. +// we will check they in prepare compaction and commit compaction. +// 1. When if base compaction, we need to guarantee the end version +// is less than or equal to alter_version. +// 2. When if cu compaction, we need to guarantee the start version +// is large than alter_version. bool check_compaction_input_verions(const TabletCompactionJobPB& compaction, const TabletJobInfoPB& job_pb) { + if (!job_pb.has_schema_change() || !job_pb.schema_change().has_alter_version()) return true; + // compaction need to know [start_version, end_version] DCHECK_EQ(compaction.input_versions_size(), 2) << proto_to_json(compaction); DCHECK_LE(compaction.input_versions(0), compaction.input_versions(1)) << proto_to_json(compaction); + int64_t alter_version = job_pb.schema_change().alter_version(); return (compaction.type() == TabletCompactionJobPB_CompactionType_BASE && compaction.input_versions(1) <= alter_version) || @@ -136,7 +147,7 @@ void start_compaction_job(MetaServiceCode& code, std::string& msg, std::stringst } while (err == TxnErrorCode::TXN_OK) { job_pb.ParseFromString(job_val); - if (job_pb.has_schema_change() && !check_compaction_input_verions(compaction, job_pb)) { + if (!check_compaction_input_verions(compaction, job_pb)) { SS << "Check compaction input versions failed in schema change. input_version_start=" << compaction.input_versions(0) << " input_version_end=" << compaction.input_versions(1) @@ -176,8 +187,10 @@ void start_compaction_job(MetaServiceCode& code, std::string& msg, std::stringst // for MOW table, so priority should be given to performing full // compaction operations and canceling other types of compaction. compactions.Clear(); - } else if (!compaction.has_judge_input_versions_range() || - !compaction.judge_input_versions_range()) { + } else if ((!compaction.has_check_input_versions_range() && + compaction.input_versions().empty()) || + (compaction.has_check_input_versions_range() && + !compaction.check_input_versions_range())) { // Unknown input version range, doesn't support parallel compaction of same type for (auto& c : compactions) { if (c.type() != compaction.type() && c.type() != TabletCompactionJobPB::FULL) @@ -316,8 +329,10 @@ void start_schema_change_job(MetaServiceCode& code, std::string& msg, std::strin err = txn->get(job_key, &job_val); if (err == TxnErrorCode::TXN_OK) { job_pb.ParseFromString(job_val); - if (job_pb.has_schema_change() && job_pb.schema_change().id() == schema_change.id() && + if (job_pb.has_schema_change() && job_pb.schema_change().has_alter_version() && + job_pb.schema_change().id() == schema_change.id() && job_pb.schema_change().initiator() == schema_change.initiator()) { + TEST_SYNC_POINT_CALLBACK("restart_compaction_job"); response->set_alter_version(job_pb.schema_change().alter_version()); return; } @@ -596,7 +611,7 @@ void process_compaction_job(MetaServiceCode& code, std::string& msg, std::string } bool abort_compaction = false; - if (recorded_job.has_schema_change() && request->action() == FinishTabletJobRequest::COMMIT && + if (request->action() == FinishTabletJobRequest::COMMIT && !check_compaction_input_verions(compaction, recorded_job)) { SS << "Check compaction input versions failed in schema change. input_version_start=" << compaction.input_versions(0) << " input_version_end=" << compaction.input_versions(1) @@ -941,8 +956,6 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str auto new_tablet_key = meta_tablet_key( {instance_id, new_table_id, new_index_id, new_partition_id, new_tablet_id}); - auto new_tablet_job_key = job_tablet_key( - {instance_id, new_table_id, new_index_id, new_partition_id, new_tablet_id}); std::string new_tablet_val; doris::TabletMetaCloudPB new_tablet_meta; TxnErrorCode err = txn->get(new_tablet_key, &new_tablet_val); @@ -994,8 +1007,9 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str } // MUST check initiator to let the retried BE commit this schema_change job. - if (schema_change.id() != recorded_schema_change.id() || - schema_change.initiator() != recorded_schema_change.initiator()) { + if (request->action() == FinishTabletJobRequest::COMMIT && + (schema_change.id() != recorded_schema_change.id() || + schema_change.initiator() != recorded_schema_change.initiator())) { SS << "unmatched job id or initiator, recorded_id=" << recorded_schema_change.id() << " given_id=" << schema_change.id() << " recorded_job=" << proto_to_json(recorded_schema_change) @@ -1013,20 +1027,48 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str return; } + auto new_tablet_job_key = job_tablet_key( + {instance_id, new_table_id, new_index_id, new_partition_id, new_tablet_id}); + + std::string new_tablet_job_val; + err = txn->get(new_tablet_job_key, &new_tablet_job_val); + if (err != TxnErrorCode::TXN_OK) { + SS << (err == TxnErrorCode::TXN_KEY_NOT_FOUND ? "job not found," : "internal error,") + << " instance_id=" << instance_id << " tablet_id=" << new_tablet_id + << " job=" << proto_to_json(request->job()) << " err=" << err; + msg = ss.str(); + code = err == TxnErrorCode::TXN_KEY_NOT_FOUND ? MetaServiceCode::INVALID_ARGUMENT + : cast_as(err); + return; + } + TabletJobInfoPB new_recorded_job; + if (!new_recorded_job.ParseFromString(new_tablet_job_val)) { + code = MetaServiceCode::PROTOBUF_PARSE_ERR; + msg = "malformed new tablet recorded job"; + return; + } + //========================================================================== // Abort //========================================================================== if (request->action() == FinishTabletJobRequest::ABORT) { - // TODO(cyx) - // remove schema change - recorded_job.clear_schema_change(); - auto job_val = recorded_job.SerializeAsString(); - txn->put(job_key, job_val); - txn->remove(new_tablet_job_key); - INSTANCE_LOG(INFO) << "remove schema_change job tablet_id=" << tablet_id - << " key=" << hex(job_key); - - need_commit = true; + if (schema_change.new_tablet_idx().index_id() == + recorded_schema_change.new_tablet_idx().index_id() && + schema_change.new_tablet_idx().tablet_id() == + recorded_schema_change.new_tablet_idx().tablet_id()) { + // TODO(cyx) + // remove schema change + recorded_job.clear_schema_change(); + new_recorded_job.clear_schema_change(); + auto job_val = recorded_job.SerializeAsString(); + new_tablet_job_val = new_recorded_job.SerializeAsString(); + txn->put(job_key, job_val); + txn->put(new_tablet_job_key, new_tablet_job_val); + INSTANCE_LOG(INFO) << "remove schema_change job tablet_id=" << tablet_id + << " key=" << hex(job_key); + + need_commit = true; + } return; } @@ -1185,9 +1227,11 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str // remove schema_change job //========================================================================== recorded_job.clear_schema_change(); + new_recorded_job.clear_schema_change(); auto job_val = recorded_job.SerializeAsString(); txn->put(job_key, job_val); - txn->remove(new_tablet_job_key); + new_tablet_job_val = new_recorded_job.SerializeAsString(); + txn->put(new_tablet_job_key, new_tablet_job_val); INSTANCE_LOG(INFO) << "remove schema_change job tablet_id=" << tablet_id << " key=" << hex(job_key); diff --git a/cloud/test/meta_service_job_test.cpp b/cloud/test/meta_service_job_test.cpp index 4f6213ab8b47b6..def9fb11ed8fec 100644 --- a/cloud/test/meta_service_job_test.cpp +++ b/cloud/test/meta_service_job_test.cpp @@ -60,10 +60,10 @@ void start_compaction_job(MetaService* meta_service, int64_t tablet_id, const st long now = time(nullptr); compaction->set_expiration(now + 12); compaction->set_lease(now + 3); - if (input_version.first > 0 && input_version.second > 0) { + if (input_version.second > 0) { compaction->add_input_versions(input_version.first); compaction->add_input_versions(input_version.second); - compaction->set_judge_input_versions_range(true); + compaction->set_check_input_versions_range(true); } meta_service->start_tablet_job(&cntl, &req, &res, nullptr); }; @@ -192,15 +192,18 @@ void create_tablet(MetaService* meta_service, int64_t table_id, int64_t index_id void start_schema_change_job(MetaServiceProxy* meta_service, int64_t table_id, int64_t index_id, int64_t partition_id, int64_t tablet_id, int64_t new_tablet_id, - const std::string& job_id, const std::string& initiator) { + const std::string& job_id, const std::string& initiator, + StartTabletJobResponse& res, int64_t alter_version = -1) { brpc::Controller cntl; StartTabletJobRequest req; - StartTabletJobResponse res; req.mutable_job()->mutable_idx()->set_tablet_id(tablet_id); auto sc = req.mutable_job()->mutable_schema_change(); sc->set_id(job_id); sc->set_initiator(initiator); sc->mutable_new_tablet_idx()->set_tablet_id(new_tablet_id); + if (alter_version != -1) { + sc->set_alter_version(alter_version); + } long now = time(nullptr); sc->set_expiration(now + 12); meta_service->start_tablet_job(&cntl, &req, &res, nullptr); @@ -218,13 +221,14 @@ void start_schema_change_job(MetaServiceProxy* meta_service, int64_t table_id, i EXPECT_EQ(job_pb.schema_change().id(), job_id) << ' ' << initiator; }; -void finish_schema_change_job(MetaService* meta_service, int64_t tablet_id, int64_t new_tablet_id, - const std::string& job_id, const std::string& initiator, - const std::vector& output_rowsets, - FinishTabletJobResponse& res) { +void finish_schema_change_job( + MetaService* meta_service, int64_t tablet_id, int64_t new_tablet_id, + const std::string& job_id, const std::string& initiator, + const std::vector& output_rowsets, FinishTabletJobResponse& res, + FinishTabletJobRequest_Action action = FinishTabletJobRequest::COMMIT) { brpc::Controller cntl; FinishTabletJobRequest req; - req.set_action(FinishTabletJobRequest::COMMIT); + req.set_action(action); req.mutable_job()->mutable_idx()->set_tablet_id(tablet_id); auto sc = req.mutable_job()->mutable_schema_change(); sc->mutable_new_tablet_idx()->set_tablet_id(new_tablet_id); @@ -1333,9 +1337,10 @@ TEST(MetaServiceJobTest, SchemaChangeJobTest) { int64_t new_tablet_id = 14; ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, new_tablet_id, false, true)); + StartTabletJobResponse sc_res; ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, - "job1", "be1")); + "job1", "be1", sc_res)); FinishTabletJobResponse res; finish_schema_change_job(meta_service.get(), tablet_id, new_tablet_id, "job2", "be1", {}, res); @@ -1361,9 +1366,10 @@ TEST(MetaServiceJobTest, SchemaChangeJobTest) { int64_t new_tablet_id = 24; ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, new_tablet_id, false, true)); + StartTabletJobResponse sc_res; ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, - "job2", "be1")); + "job2", "be1", sc_res)); std::vector output_rowsets; for (int64_t i = 0; i < 5; ++i) { @@ -1424,9 +1430,10 @@ TEST(MetaServiceJobTest, SchemaChangeJobTest) { int64_t new_tablet_id = 34; ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, new_tablet_id, false, true)); + StartTabletJobResponse sc_res; ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, - "job3", "be1")); + "job3", "be1", sc_res)); // provide existed rowsets std::vector existed_rowsets; for (int i = 0; i < 5; ++i) { @@ -1544,9 +1551,10 @@ TEST(MetaServiceJobTest, RetrySchemaChangeJobTest) { // start "job1" on BE1 ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, new_tablet_id, false, true)); + StartTabletJobResponse sc_res; ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, "job1", - "be1")); + "be1", sc_res)); // provide existed rowsets std::vector existed_rowsets; for (int i = 0; i < 5; ++i) { @@ -1554,15 +1562,16 @@ TEST(MetaServiceJobTest, RetrySchemaChangeJobTest) { } ASSERT_NO_FATAL_FAILURE(insert_rowsets(meta_service->txn_kv().get(), table_id, index_id, partition_id, new_tablet_id, existed_rowsets)); - + sc_res.Clear(); // FE canceled "job1" and starts "job2" on BE1, should preempt previous "job1" ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, "job2", - "be1")); + "be1", sc_res)); + sc_res.Clear(); // retry "job2" on BE1 ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, "job2", - "be1")); + "be1", sc_res)); // BE1 output_versions=[2-8][9-9][10-10][11-11] std::vector be1_output_rowsets; be1_output_rowsets.push_back(create_rowset(new_tablet_id, 2, 8)); @@ -1574,11 +1583,11 @@ TEST(MetaServiceJobTest, RetrySchemaChangeJobTest) { commit_rowset(meta_service.get(), rs, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK) << rs.end_version(); } - + sc_res.Clear(); // FE thinks BE1 is not alive and retries "job2" on BE2, should preempt "job2" created by BE1 ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, "job2", - "be2")); + "be2", sc_res)); // BE2 output_versions=[2-8][9-12][13-13] std::vector be2_output_rowsets; { @@ -1704,9 +1713,10 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { int64_t new_tablet_id = 14; ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, new_tablet_id, true, true)); + StartTabletJobResponse sc_res; ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, - "job1", "be1")); + "job1", "be1", sc_res)); std::vector output_rowsets; for (int64_t i = 0; i < 5; ++i) { output_rowsets.push_back(create_rowset(new_tablet_id, i + 2, i + 2)); @@ -1751,9 +1761,10 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { int64_t new_tablet_id = 15; ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, new_tablet_id, true, true)); + StartTabletJobResponse sc_res; ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, - "job2", "be1")); + "job2", "be1", sc_res)); std::vector output_rowsets; for (int64_t i = 0; i < 5; ++i) { output_rowsets.push_back(create_rowset(new_tablet_id, i + 2, i + 2)); @@ -2273,9 +2284,10 @@ TEST(MetaServiceJobTest, SchemaChangeJobPersistTest) { int64_t new_tablet_id = 11; ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, new_tablet_id, false, true)); + StartTabletJobResponse sc_res; ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, partition_id, tablet_id, new_tablet_id, "job2", - "BE1")); + "BE1", sc_res)); long now = time(nullptr); FinishTabletJobRequest req; @@ -2295,4 +2307,211 @@ TEST(MetaServiceJobTest, SchemaChangeJobPersistTest) { ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } +TEST(MetaServiceJobTest, DoCompactionWhenSC) { + auto meta_service = get_meta_service(); + + auto* sp = SyncPoint::get_instance(); + std::unique_ptr> defer( + (int*)0x01, [](int*) { SyncPoint::get_instance()->clear_all_call_backs(); }); + sp->set_call_back("get_instance_id", [&](auto&& args) { + auto* ret = try_any_cast_ret(args); + ret->first = instance_id; + ret->second = true; + }); + sp->enable_processing(); + + brpc::Controller cntl; + + int64_t table_id = 5421; + int64_t index_id = 5422; + int64_t partition_id = 5423; + int64_t tablet_id = 5424; + int64_t new_tablet_id = 54211; + + ASSERT_NO_FATAL_FAILURE( + create_tablet(meta_service.get(), table_id, index_id, partition_id, tablet_id, false)); + + ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, + new_tablet_id, false, true)); + + StartTabletJobResponse sc_res; + ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, + partition_id, tablet_id, new_tablet_id, + "job_sc", "BE1", sc_res, 8)); + + StartTabletJobResponse res; + start_compaction_job(meta_service.get(), tablet_id, "job1", "BE1", 0, 7, + TabletCompactionJobPB::CUMULATIVE, res, {7, 10}); + ASSERT_EQ(res.status().code(), MetaServiceCode::JOB_CHECK_ALTER_VERSION_FAIL); + res.Clear(); + + start_compaction_job(meta_service.get(), tablet_id, "job1", "BE1", 0, 7, + TabletCompactionJobPB::BASE, res, {0, 10}); + ASSERT_EQ(res.status().code(), MetaServiceCode::JOB_CHECK_ALTER_VERSION_FAIL); + res.Clear(); + + start_compaction_job(meta_service.get(), tablet_id, "job1", "BE1", 0, 7, + TabletCompactionJobPB::BASE, res, {0, 7}); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); + + start_compaction_job(meta_service.get(), new_tablet_id, "job2", "BE1", 0, 7, + TabletCompactionJobPB::CUMULATIVE, res, {9, 10}); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); + + std::unique_ptr txn; + ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK); + std::string job_key = + job_tablet_key({instance_id, table_id, index_id, partition_id, tablet_id}); + std::string job_val; + TabletJobInfoPB job_pb; + ASSERT_EQ(txn->get(job_key, &job_val), TxnErrorCode::TXN_OK); + ASSERT_TRUE(job_pb.ParseFromString(job_val)); + ASSERT_EQ(job_pb.compaction_size(), 1); + ASSERT_EQ(job_pb.compaction(0).id(), "job1"); + ASSERT_EQ(job_pb.compaction(0).initiator(), "BE1"); + ASSERT_EQ(job_pb.compaction(0).input_versions(0), 0); + ASSERT_EQ(job_pb.compaction(0).input_versions(1), 7); + ASSERT_EQ(job_pb.schema_change().alter_version(), 8); + + std::string new_job_key = + job_tablet_key({instance_id, table_id, index_id, partition_id, new_tablet_id}); + std::string new_job_val; + TabletJobInfoPB new_job_pb; + ASSERT_EQ(txn->get(new_job_key, &new_job_val), TxnErrorCode::TXN_OK); + ASSERT_TRUE(new_job_pb.ParseFromString(new_job_val)); + ASSERT_EQ(new_job_pb.compaction_size(), 1); + ASSERT_EQ(new_job_pb.compaction(0).id(), "job2"); + ASSERT_EQ(new_job_pb.compaction(0).initiator(), "BE1"); + ASSERT_EQ(new_job_pb.compaction(0).input_versions(0), 9); + ASSERT_EQ(new_job_pb.compaction(0).input_versions(1), 10); + ASSERT_EQ(new_job_pb.schema_change().alter_version(), 8); + + FinishTabletJobResponse finish_res; + finish_schema_change_job(meta_service.get(), tablet_id, new_tablet_id, "job_sc", "BE1", {}, + finish_res); + ASSERT_EQ(finish_res.status().code(), MetaServiceCode::OK); +} + +TEST(MetaServiceJobTest, ReStartSC) { + auto meta_service = get_meta_service(); + + auto* sp = SyncPoint::get_instance(); + std::unique_ptr> defer( + (int*)0x01, [](int*) { SyncPoint::get_instance()->clear_all_call_backs(); }); + sp->set_call_back("get_instance_id", [&](auto&& args) { + auto* ret = try_any_cast_ret(args); + ret->first = instance_id; + ret->second = true; + }); + bool use_origin_job = false; + sp->set_call_back("restart_compaction_job", [&](auto&&) { use_origin_job = true; }); + sp->enable_processing(); + + brpc::Controller cntl; + + int64_t table_id = 5331; + int64_t index_id = 5332; + int64_t partition_id = 5333; + int64_t tablet_id = 5334; + int64_t new_tablet_id = 53311; + + ASSERT_NO_FATAL_FAILURE( + create_tablet(meta_service.get(), table_id, index_id, partition_id, tablet_id, false)); + + ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, + new_tablet_id, false, true)); + + StartTabletJobResponse sc_res; + ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, + partition_id, tablet_id, new_tablet_id, + "job_sc", "BE1", sc_res, 8)); + sc_res.Clear(); + ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, + partition_id, tablet_id, new_tablet_id, + "job_sc", "BE1", sc_res, 8)); + ASSERT_TRUE(use_origin_job); + ASSERT_EQ(sc_res.alter_version(), 8); + FinishTabletJobResponse finish_res; + finish_schema_change_job(meta_service.get(), tablet_id, new_tablet_id, "job_sc", "BE1", {}, + finish_res); + ASSERT_EQ(finish_res.status().code(), MetaServiceCode::OK); +} + +TEST(MetaServiceJobTest, CancelSC) { + auto meta_service = get_meta_service(); + + auto* sp = SyncPoint::get_instance(); + std::unique_ptr> defer( + (int*)0x01, [](int*) { SyncPoint::get_instance()->clear_all_call_backs(); }); + sp->set_call_back("get_instance_id", [&](auto&& args) { + auto* ret = try_any_cast_ret(args); + ret->first = instance_id; + ret->second = true; + }); + sp->enable_processing(); + + brpc::Controller cntl; + + int64_t table_id = 5731; + int64_t index_id = 5732; + int64_t partition_id = 5733; + int64_t tablet_id = 5734; + int64_t new_tablet_id = 57311; + int64_t new_tablet_id1 = 57322; + + ASSERT_NO_FATAL_FAILURE( + create_tablet(meta_service.get(), table_id, index_id, partition_id, tablet_id, false)); + + ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, + new_tablet_id, false, true)); + + ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, + new_tablet_id1, false, true)); + + StartTabletJobResponse sc_res; + ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, + partition_id, tablet_id, new_tablet_id, + "job_sc", "BE1", sc_res, 8)); + { + FinishTabletJobResponse finish_res; + finish_schema_change_job(meta_service.get(), tablet_id, new_tablet_id, "job_sc", "BE1", {}, + finish_res, FinishTabletJobRequest::ABORT); + ASSERT_EQ(finish_res.status().code(), MetaServiceCode::OK); + } + { + std::unique_ptr txn; + ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK); + std::string job_key = + job_tablet_key({instance_id, table_id, index_id, partition_id, tablet_id}); + std::string job_val; + TabletJobInfoPB job_pb; + ASSERT_EQ(txn->get(job_key, &job_val), TxnErrorCode::TXN_OK); + ASSERT_TRUE(job_pb.ParseFromString(job_val)); + ASSERT_FALSE(job_pb.has_schema_change()); + } + sc_res.Clear(); + ASSERT_NO_FATAL_FAILURE(start_schema_change_job(meta_service.get(), table_id, index_id, + partition_id, tablet_id, new_tablet_id1, + "job_sc1", "BE1", sc_res, 8)); + { + FinishTabletJobResponse finish_res; + finish_schema_change_job(meta_service.get(), tablet_id, new_tablet_id, "job_sc", "BE1", {}, + finish_res, FinishTabletJobRequest::ABORT); + ASSERT_EQ(finish_res.status().code(), MetaServiceCode::OK); + } + { + std::unique_ptr txn; + ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK); + std::string job_key = + job_tablet_key({instance_id, table_id, index_id, partition_id, tablet_id}); + std::string job_val; + TabletJobInfoPB job_pb; + ASSERT_EQ(txn->get(job_key, &job_val), TxnErrorCode::TXN_OK); + ASSERT_TRUE(job_pb.ParseFromString(job_val)); + ASSERT_TRUE(job_pb.has_schema_change()); + } +} + } // namespace doris::cloud diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java index c1982daa44d61d..f36d9b5f370006 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java @@ -128,10 +128,15 @@ protected void onCancel() { Long partitionId = partitionEntry.getKey(); Map rollupTabletIdToBaseTabletId = partitionEntry.getValue(); for (Map.Entry tabletEntry : rollupTabletIdToBaseTabletId.entrySet()) { + Long rollupTabletId = tabletEntry.getKey(); Long baseTabletId = tabletEntry.getValue(); ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) - .removeSchemaChangeJob(dbId, tableId, baseIndexId, partitionId, baseTabletId); + .removeSchemaChangeJob(dbId, tableId, baseIndexId, rollupIndexId, + partitionId, baseTabletId, rollupTabletId); } + LOG.info("Cancel RollupJob. Remove SchemaChangeJob in ms." + + "dbId:{}, tableId:{}, rollupIndexId: {} partitionId:{}. tabletSize:{}", + dbId, tableId, rollupIndexId, partitionId, rollupTabletIdToBaseTabletId.size()); } break; } catch (Exception e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java index 7023f35d3b03f4..ac80812e5b8420 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java @@ -131,10 +131,15 @@ protected void onCancel() { Long originIndexId = indexIdMap.get(shadowIndexId); Map shadowTabletIdToOriginTabletId = data.getValue(); for (Map.Entry entry : shadowTabletIdToOriginTabletId.entrySet()) { + Long shadowTabletId = entry.getKey(); Long originTabletId = entry.getValue(); ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) - .removeSchemaChangeJob(dbId, tableId, originIndexId, partitionId, originTabletId); + .removeSchemaChangeJob(dbId, tableId, originIndexId, shadowIndexId, + partitionId, originTabletId, shadowTabletId); } + LOG.info("Cancel SchemaChange. Remove SchemaChangeJob in ms." + + "dbId:{}, tableId:{}, originIndexId:{}, partitionId:{}. tabletSize:{}", + dbId, tableId, originIndexId, partitionId, shadowTabletIdToOriginTabletId.size()); } break; } catch (Exception e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java index 8563d1aafdff6a..c3243630376b4c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java @@ -845,13 +845,15 @@ private void dropCloudPartition(long dbId, long tableId, List partitionIds } } - public void removeSchemaChangeJob(long dbId, long tableId, long indexId, long partitionId, long tabletId) + public void removeSchemaChangeJob(long dbId, long tableId, long indexId, long newIndexId, + long partitionId, long tabletId, long newTabletId) throws DdlException { Cloud.FinishTabletJobRequest.Builder finishTabletJobRequestBuilder = Cloud.FinishTabletJobRequest.newBuilder(); finishTabletJobRequestBuilder.setCloudUniqueId(Config.cloud_unique_id); finishTabletJobRequestBuilder.setAction(Cloud.FinishTabletJobRequest.Action.ABORT); Cloud.TabletJobInfoPB.Builder tabletJobInfoPBBuilder = Cloud.TabletJobInfoPB.newBuilder(); + // set origin tablet Cloud.TabletIndexPB.Builder tabletIndexPBBuilder = Cloud.TabletIndexPB.newBuilder(); tabletIndexPBBuilder.setDbId(dbId); tabletIndexPBBuilder.setTableId(tableId); @@ -860,6 +862,23 @@ public void removeSchemaChangeJob(long dbId, long tableId, long indexId, long pa tabletIndexPBBuilder.setTabletId(tabletId); final Cloud.TabletIndexPB tabletIndex = tabletIndexPBBuilder.build(); tabletJobInfoPBBuilder.setIdx(tabletIndex); + + // set new tablet + Cloud.TabletSchemaChangeJobPB.Builder schemaChangeJobPBBuilder = + Cloud.TabletSchemaChangeJobPB.newBuilder(); + Cloud.TabletIndexPB.Builder newtabletIndexPBBuilder = Cloud.TabletIndexPB.newBuilder(); + newtabletIndexPBBuilder.setDbId(dbId); + newtabletIndexPBBuilder.setTableId(tableId); + newtabletIndexPBBuilder.setIndexId(newIndexId); + newtabletIndexPBBuilder.setPartitionId(partitionId); + newtabletIndexPBBuilder.setTabletId(newTabletId); + final Cloud.TabletIndexPB newtabletIndex = newtabletIndexPBBuilder.build(); + schemaChangeJobPBBuilder.setNewTabletIdx(newtabletIndex); + final Cloud.TabletSchemaChangeJobPB tabletSchemaChangeJobPb = + schemaChangeJobPBBuilder.build(); + + tabletJobInfoPBBuilder.setSchemaChange(tabletSchemaChangeJobPb); + final Cloud.TabletJobInfoPB tabletJobInfoPB = tabletJobInfoPBBuilder.build(); finishTabletJobRequestBuilder.setJob(tabletJobInfoPB); @@ -874,7 +893,7 @@ public void removeSchemaChangeJob(long dbId, long tableId, long indexId, long pa break; } } catch (RpcException e) { - LOG.warn("tryTimes:{}, dropIndex RpcException", tryTimes, e); + LOG.warn("tryTimes:{}, finishTabletJob RpcException", tryTimes, e); if (tryTimes + 1 >= Config.metaServiceRpcRetryTimes()) { throw new DdlException(e.getMessage()); } @@ -883,7 +902,7 @@ public void removeSchemaChangeJob(long dbId, long tableId, long indexId, long pa } if (response.getStatus().getCode() != Cloud.MetaServiceCode.OK) { - LOG.warn("dropIndex response: {} ", response); + LOG.warn("finishTabletJob response: {} ", response); } } diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index 2d1dc3bdadc9b0..06850f7db3aeaf 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -536,7 +536,7 @@ message TabletCompactionJobPB { optional int64 lease = 23; // prepare optional int64 delete_bitmap_lock_initiator = 24; optional int64 full_compaction_cnt = 25; // prepare - optional bool judge_input_versions_range = 26; + optional bool check_input_versions_range = 26; } message TabletSchemaChangeJobPB { diff --git a/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy b/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy new file mode 100644 index 00000000000000..6fc8003527dc02 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy @@ -0,0 +1,262 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions +import org.apache.http.NoHttpResponseException +import org.apache.doris.regression.util.DebugPoint +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction10') { + def options = new ClusterOptions() + options.cloudMode = true + options.enableDebugPoints() + options.beConfigs += [ "enable_java_support=false" ] + options.beConfigs += [ "disable_auto_compaction=true" ] + options.beNum = 1 + docker(options) { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_unique_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + GetDebugPoint().enableDebugPointForAllBEs(injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + + cluster.restartBackends() + GetDebugPoint().enableDebugPointForAllBEs(injectName) + sleep(30000) + + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + for (int i = 0; i < array.size(); i++) { + tabletId = array[i].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + } + + for (int i = 0; i < array.size(); i++) { + running = true + do { + Thread.sleep(100) + tabletId = array[i].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + } finally { + if (injectBe != null) { + GetDebugPoint().disableDebugPointForAllBEs(injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "FINISHED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 2556); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + for (int i = 0; i < 3; i++) { + load_date_once("date"); + } + + sql """ select count(*) from date """ + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + // wait for all compactions done + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-16]")) + } + } +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/compaction2/test_schema_change_with_compaction3.groovy b/regression-test/suites/cloud_p0/schema_change/compaction2/test_schema_change_with_compaction2.groovy similarity index 100% rename from regression-test/suites/cloud_p0/schema_change/compaction2/test_schema_change_with_compaction3.groovy rename to regression-test/suites/cloud_p0/schema_change/compaction2/test_schema_change_with_compaction2.groovy diff --git a/regression-test/suites/cloud_p0/schema_change/compaction3/test_schema_change_with_compaction5.groovy b/regression-test/suites/cloud_p0/schema_change/compaction3/test_schema_change_with_compaction3.groovy similarity index 100% rename from regression-test/suites/cloud_p0/schema_change/compaction3/test_schema_change_with_compaction5.groovy rename to regression-test/suites/cloud_p0/schema_change/compaction3/test_schema_change_with_compaction3.groovy diff --git a/regression-test/suites/cloud_p0/schema_change/compaction4/test_schema_change_with_compaction6.groovy b/regression-test/suites/cloud_p0/schema_change/compaction4/test_schema_change_with_compaction4.groovy similarity index 100% rename from regression-test/suites/cloud_p0/schema_change/compaction4/test_schema_change_with_compaction6.groovy rename to regression-test/suites/cloud_p0/schema_change/compaction4/test_schema_change_with_compaction4.groovy diff --git a/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction7.groovy b/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy similarity index 100% rename from regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction7.groovy rename to regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy diff --git a/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction9.groovy b/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy similarity index 100% rename from regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction9.groovy rename to regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy diff --git a/regression-test/suites/cloud_p0/schema_change/compaction7/test_schema_change_with_compaction7.groovy b/regression-test/suites/cloud_p0/schema_change/compaction7/test_schema_change_with_compaction7.groovy new file mode 100644 index 00000000000000..7291ea3a341e44 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction7/test_schema_change_with_compaction7.groovy @@ -0,0 +1,256 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Most of the cases are copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases +// and modified by Doris. + +// Note: To filter out tables from sql files, use the following one-liner comamnd +// sed -nr 's/.*tables: (.*)$/\1/gp' /path/to/*.sql | sed -nr 's/,/\n/gp' | sort | uniq + +import org.apache.doris.regression.util.DebugPoint + +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction7', 'nonConcurrent') { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_unique_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + + DebugPoint.enableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(15000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + for (int i = 0; i < array.size(); i++) { + tabletId = array[i].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + } + + for (int i = 0; i < array.size(); i++) { + running = true + do { + Thread.sleep(100) + tabletId = array[i].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + } finally { + if (injectBe != null) { + DebugPoint.disableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "FINISHED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 2556); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + for (int i = 0; i < 3; i++) { + load_date_once("date"); + } + + sql """ select count(*) from date """ + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + // wait for all compactions done + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-16]")) + } + +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/compaction8/test_schema_change_with_compaction8.groovy b/regression-test/suites/cloud_p0/schema_change/compaction8/test_schema_change_with_compaction8.groovy new file mode 100644 index 00000000000000..1017e1d50f235f --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction8/test_schema_change_with_compaction8.groovy @@ -0,0 +1,214 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Most of the cases are copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases +// and modified by Doris. + +// Note: To filter out tables from sql files, use the following one-liner comamnd +// sed -nr 's/.*tables: (.*)$/\1/gp' /path/to/*.sql | sed -nr 's/,/\n/gp' | sort | uniq + +import org.apache.doris.regression.util.DebugPoint + +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction8', 'nonConcurrent') { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_unique_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + + sleep(1000) + + DebugPoint.enableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + } finally { + if (injectBe != null) { + DebugPoint.disableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 2556); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-12]")) + } + +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy b/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy new file mode 100644 index 00000000000000..6cb47e01f4b62c --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy @@ -0,0 +1,259 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions +import org.apache.http.NoHttpResponseException +import org.apache.doris.regression.util.DebugPoint +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction9') { + def options = new ClusterOptions() + options.cloudMode = true + options.enableDebugPoints() + options.beConfigs += [ "enable_java_support=false" ] + options.beConfigs += [ "disable_auto_compaction=true" ] + options.beNum = 1 + docker(options) { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_unique_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + GetDebugPoint().enableDebugPointForAllBEs(injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + for (int i = 0; i < array.size(); i++) { + tabletId = array[i].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + } + + for (int i = 0; i < array.size(); i++) { + running = true + do { + Thread.sleep(100) + tabletId = array[i].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + cluster.restartFrontends() + sleep(30000) + context.reconnectFe() + } finally { + if (injectBe != null) { + GetDebugPoint().disableDebugPointForAllBEs(injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "FINISHED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 2556); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + for (int i = 0; i < 3; i++) { + load_date_once("date"); + } + + sql """ select count(*) from date """ + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + // wait for all compactions done + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-16]")) + } + } +} \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/ddl/date_create.sql b/regression-test/suites/cloud_p0/schema_change/ddl/date_create.sql new file mode 100644 index 00000000000000..8486d7178bbe5d --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/ddl/date_create.sql @@ -0,0 +1,23 @@ +CREATE TABLE IF NOT EXISTS `date` ( + `d_datekey` int(11) NOT NULL COMMENT "", + `d_date` varchar(20) NOT NULL COMMENT "", + `d_dayofweek` varchar(10) NOT NULL COMMENT "", + `d_month` varchar(11) NOT NULL COMMENT "", + `d_year` int(11) NOT NULL COMMENT "", + `d_yearmonthnum` int(11) NOT NULL COMMENT "", + `d_yearmonth` varchar(9) NOT NULL COMMENT "", + `d_daynuminweek` int(11) NOT NULL COMMENT "", + `d_daynuminmonth` int(11) NOT NULL COMMENT "", + `d_daynuminyear` int(11) NOT NULL COMMENT "", + `d_monthnuminyear` int(11) NOT NULL COMMENT "", + `d_weeknuminyear` int(11) NOT NULL COMMENT "", + `d_sellingseason` varchar(14) NOT NULL COMMENT "", + `d_lastdayinweekfl` int(11) NOT NULL COMMENT "", + `d_lastdayinmonthfl` int(11) NOT NULL COMMENT "", + `d_holidayfl` int(11) NOT NULL COMMENT "", + `d_weekdayfl` int(11) NOT NULL COMMENT "" +) +DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +); \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/ddl/date_delete.sql b/regression-test/suites/cloud_p0/schema_change/ddl/date_delete.sql new file mode 100644 index 00000000000000..41702d336d7e7f --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/ddl/date_delete.sql @@ -0,0 +1 @@ +drop table if exists `date` FORCE; \ No newline at end of file diff --git a/regression-test/suites/cloud_p0/schema_change/ddl/date_load.sql b/regression-test/suites/cloud_p0/schema_change/ddl/date_load.sql new file mode 100644 index 00000000000000..3e1511ca69a67a --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/ddl/date_load.sql @@ -0,0 +1,6 @@ +LOAD LABEL ${loadLabel} ( + DATA INFILE("s3://${s3BucketName}/regression/ssb/sf100/date.tbl.gz") + INTO TABLE date + COLUMNS TERMINATED BY "|" + (d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth, d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear, d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,temp) +) diff --git a/regression-test/suites/cloud_p0/schema_change/ddl/date_unique_create.sql b/regression-test/suites/cloud_p0/schema_change/ddl/date_unique_create.sql new file mode 100644 index 00000000000000..0c3005c6e03f77 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/ddl/date_unique_create.sql @@ -0,0 +1,26 @@ +CREATE TABLE IF NOT EXISTS `date` ( + `d_datekey` int(11) NOT NULL COMMENT "", + `d_date` varchar(20) NOT NULL COMMENT "", + `d_dayofweek` varchar(10) NOT NULL COMMENT "", + `d_month` varchar(11) NOT NULL COMMENT "", + `d_year` int(11) NOT NULL COMMENT "", + `d_yearmonthnum` int(11) NOT NULL COMMENT "", + `d_yearmonth` varchar(9) NOT NULL COMMENT "", + `d_daynuminweek` int(11) NOT NULL COMMENT "", + `d_daynuminmonth` int(11) NOT NULL COMMENT "", + `d_daynuminyear` int(11) NOT NULL COMMENT "", + `d_monthnuminyear` int(11) NOT NULL COMMENT "", + `d_weeknuminyear` int(11) NOT NULL COMMENT "", + `d_sellingseason` varchar(14) NOT NULL COMMENT "", + `d_lastdayinweekfl` int(11) NOT NULL COMMENT "", + `d_lastdayinmonthfl` int(11) NOT NULL COMMENT "", + `d_holidayfl` int(11) NOT NULL COMMENT "", + `d_weekdayfl` int(11) NOT NULL COMMENT "" +) +UNIQUE KEY (`d_datekey`) +DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1", +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" +); \ No newline at end of file From 84c862ff6935532ce572d39f933e9d8c4c45d899 Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Thu, 1 Aug 2024 11:14:28 +0800 Subject: [PATCH 14/15] tmp --- be/src/cloud/cloud_base_compaction.cpp | 4 +-- be/src/cloud/cloud_cumulative_compaction.cpp | 11 ++++--- be/src/cloud/cloud_meta_mgr.cpp | 2 ++ be/src/cloud/config.cpp | 2 +- cloud/src/meta-service/meta_service_job.cpp | 33 ++++++++++---------- 5 files changed, 27 insertions(+), 25 deletions(-) diff --git a/be/src/cloud/cloud_base_compaction.cpp b/be/src/cloud/cloud_base_compaction.cpp index 76a0a18d8020ad..044a64ff95786f 100644 --- a/be/src/cloud/cloud_base_compaction.cpp +++ b/be/src/cloud/cloud_base_compaction.cpp @@ -99,7 +99,7 @@ Status CloudBaseCompaction::prepare_compact() { } else if (resp.status().code() == cloud::TABLET_NOT_FOUND) { // tablet not found cloud_tablet()->clear_cache(); - } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { + } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION) { auto* cloud_tablet = (static_cast(_tablet.get())); std::stringstream ss; ss << "failed to prepare cumu compaction. Check compaction input versions " @@ -332,7 +332,7 @@ Status CloudBaseCompaction::modify_rowsets() { if (!st.ok()) { if (resp.status().code() == cloud::TABLET_NOT_FOUND) { cloud_tablet()->clear_cache(); - } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { + } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION) { auto* cloud_tablet = (static_cast(_tablet.get())); std::stringstream ss; ss << "failed to prepare cumu compaction. Check compaction input versions " diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 0ca8a504a887f1..0d01a4e5f58b48 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -48,8 +48,9 @@ CloudCumulativeCompaction::CloudCumulativeCompaction(CloudStorageEngine& engine, CloudCumulativeCompaction::~CloudCumulativeCompaction() = default; Status CloudCumulativeCompaction::prepare_compact() { - if (_tablet->tablet_state() != TABLET_RUNNING && config::enable_new_tablet_do_compaction && - dynamic_cast(_tablet.get())->alter_version() == -1) { + if (_tablet->tablet_state() != TABLET_RUNNING && + (!config::enable_new_tablet_do_compaction || + static_cast(_tablet.get())->alter_version() == -1)) { return Status::InternalError("invalid tablet state. tablet_id={}", _tablet->tablet_id()); } @@ -142,8 +143,8 @@ Status CloudCumulativeCompaction::prepare_compact() { .tag("msg", resp.status().msg()); return Status::Error("no suitable versions"); } - } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { - (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION) { + (static_cast(_tablet.get()))->set_alter_version(resp.alter_version()); std::stringstream ss; ss << "failed to prepare cumu compaction. Check compaction input versions " "failed in schema change. " @@ -272,7 +273,7 @@ Status CloudCumulativeCompaction::modify_rowsets() { if (!st.ok()) { if (resp.status().code() == cloud::TABLET_NOT_FOUND) { cloud_tablet()->clear_cache(); - } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION_FAIL) { + } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION) { (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); std::stringstream ss; ss << "failed to prepare cumu compaction. Check compaction input versions " diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index ad1487917b109a..d59a8b98764ba5 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -448,6 +448,8 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_ int64_t now = duration_cast(system_clock::now().time_since_epoch()).count(); tablet->last_sync_time_s = now; + // If is mow, the tablet has no delete bitmap in base rowsets. + // So dont need to sync it. if (tablet->enable_unique_key_merge_on_write() && tablet->tablet_state() == TABLET_RUNNING) { DeleteBitmap delete_bitmap(tablet_id); diff --git a/be/src/cloud/config.cpp b/be/src/cloud/config.cpp index 67897f527032a8..cf45df4dbc757e 100644 --- a/be/src/cloud/config.cpp +++ b/be/src/cloud/config.cpp @@ -59,6 +59,6 @@ DEFINE_mBool(save_load_error_log_to_s3, "false"); DEFINE_mInt32(sync_load_for_tablets_thread, "32"); -DEFINE_mBool(enable_new_tablet_do_compaction, "true"); +DEFINE_mBool(enable_new_tablet_do_compaction, "false"); } // namespace doris::config diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index e702ec2283a5bf..ba56d5c5a0b93e 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -60,10 +60,13 @@ static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2; bool check_compaction_input_verions(const TabletCompactionJobPB& compaction, const TabletJobInfoPB& job_pb) { if (!job_pb.has_schema_change() || !job_pb.schema_change().has_alter_version()) return true; - // compaction need to know [start_version, end_version] - DCHECK_EQ(compaction.input_versions_size(), 2) << proto_to_json(compaction); - DCHECK_LE(compaction.input_versions(0), compaction.input_versions(1)) - << proto_to_json(compaction); + if (compaction.input_versions_size() != 2 || + compaction.input_versions(0) > compaction.input_versions(1)) { + LOG(WARNING) << "The compaction need to know [start_version, end_version], and \ + the start_version should LE end_version. \n" + << proto_to_json(compaction); + return false; + } int64_t alter_version = job_pb.schema_change().alter_version(); return (compaction.type() == TabletCompactionJobPB_CompactionType_BASE && @@ -154,7 +157,7 @@ void start_compaction_job(MetaServiceCode& code, std::string& msg, std::stringst << " schema_change_alter_version=" << job_pb.schema_change().alter_version(); msg = ss.str(); INSTANCE_LOG(INFO) << msg; - code = MetaServiceCode::JOB_CHECK_ALTER_VERSION_FAIL; + code = MetaServiceCode::JOB_CHECK_ALTER_VERSION; response->set_alter_version(job_pb.schema_change().alter_version()); return; } @@ -327,16 +330,6 @@ void start_schema_change_job(MetaServiceCode& code, std::string& msg, std::strin std::string job_val; TabletJobInfoPB job_pb; err = txn->get(job_key, &job_val); - if (err == TxnErrorCode::TXN_OK) { - job_pb.ParseFromString(job_val); - if (job_pb.has_schema_change() && job_pb.schema_change().has_alter_version() && - job_pb.schema_change().id() == schema_change.id() && - job_pb.schema_change().initiator() == schema_change.initiator()) { - TEST_SYNC_POINT_CALLBACK("restart_compaction_job"); - response->set_alter_version(job_pb.schema_change().alter_version()); - return; - } - } if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { SS << "failed to get tablet job, instance_id=" << instance_id << " tablet_id=" << tablet_id << " key=" << hex(job_key) << " err=" << err; @@ -349,6 +342,13 @@ void start_schema_change_job(MetaServiceCode& code, std::string& msg, std::strin msg = "pb deserialization failed"; return; } + if (job_pb.has_schema_change() && job_pb.schema_change().has_alter_version() && + job_pb.schema_change().id() == schema_change.id() && + job_pb.schema_change().initiator() == schema_change.initiator()) { + TEST_SYNC_POINT_CALLBACK("restart_compaction_job"); + response->set_alter_version(job_pb.schema_change().alter_version()); + return; + } job_pb.mutable_idx()->CopyFrom(request->job().idx()); // FE can ensure that a tablet does not have more than one schema_change job at the same time, // so we can directly preempt previous schema_change job. @@ -620,7 +620,7 @@ void process_compaction_job(MetaServiceCode& code, std::string& msg, std::string INSTANCE_LOG(INFO) << msg; abort_compaction = true; response->set_alter_version(recorded_job.schema_change().alter_version()); - code = MetaServiceCode::JOB_CHECK_ALTER_VERSION_FAIL; + code = MetaServiceCode::JOB_CHECK_ALTER_VERSION; } //========================================================================== @@ -1056,7 +1056,6 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str recorded_schema_change.new_tablet_idx().index_id() && schema_change.new_tablet_idx().tablet_id() == recorded_schema_change.new_tablet_idx().tablet_id()) { - // TODO(cyx) // remove schema change recorded_job.clear_schema_change(); new_recorded_job.clear_schema_change(); From 65ae146ba1653ba96593821faf3b237766abdf8e Mon Sep 17 00:00:00 2001 From: Lchangliang <915311741@qq.com> Date: Thu, 1 Aug 2024 15:14:52 +0800 Subject: [PATCH 15/15] tmp --- be/src/cloud/cloud_base_compaction.cpp | 4 +- be/src/cloud/cloud_cumulative_compaction.cpp | 5 +- cloud/src/meta-service/meta_service_job.cpp | 43 +-- cloud/test/meta_service_job_test.cpp | 10 +- .../apache/doris/alter/CloudRollupJobV2.java | 6 +- .../doris/alter/CloudSchemaChangeJobV2.java | 6 +- .../datasource/CloudInternalCatalog.java | 4 +- gensrc/proto/cloud.proto | 2 +- .../pipeline/cloud_p0/conf/be_custom.conf | 1 + ...est_schema_change_with_compaction10.groovy | 1 + ...est_schema_change_with_compaction11.groovy | 280 ++++++++++++++++++ ...test_schema_change_with_compaction5.groovy | 1 + ...test_schema_change_with_compaction6.groovy | 1 + ...test_schema_change_with_compaction9.groovy | 1 + .../schema_change/ddl/date_create.sql | 5 +- .../schema_change/ddl/date_unique_create.sql | 5 +- 16 files changed, 339 insertions(+), 36 deletions(-) create mode 100644 regression-test/suites/cloud_p0/schema_change/compaction11/test_schema_change_with_compaction11.groovy diff --git a/be/src/cloud/cloud_base_compaction.cpp b/be/src/cloud/cloud_base_compaction.cpp index 044a64ff95786f..23d4850d0002e8 100644 --- a/be/src/cloud/cloud_base_compaction.cpp +++ b/be/src/cloud/cloud_base_compaction.cpp @@ -92,6 +92,9 @@ Status CloudBaseCompaction::prepare_compact() { compaction_job->set_lease(now + config::lease_compaction_interval_seconds * 4); cloud::StartTabletJobResponse resp; auto st = _engine.meta_mgr().prepare_tablet_job(job, &resp); + if (resp.has_alter_version()) { + (static_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + } if (!st.ok()) { if (resp.status().code() == cloud::STALE_TABLET_CACHE) { // set last_sync_time to 0 to force sync tablet next time @@ -113,7 +116,6 @@ Status CloudBaseCompaction::prepare_compact() { << " schema_change_alter_version=" << resp.alter_version(); std::string msg = ss.str(); LOG(WARNING) << msg; - cloud_tablet->set_alter_version(resp.alter_version()); return Status::InternalError(msg); } return st; diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 0d01a4e5f58b48..ea6062309f28c7 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -270,11 +270,13 @@ Status CloudCumulativeCompaction::modify_rowsets() { cloud::FinishTabletJobResponse resp; auto st = _engine.meta_mgr().commit_tablet_job(job, &resp); + if (resp.has_alter_version()) { + (static_cast(_tablet.get()))->set_alter_version(resp.alter_version()); + } if (!st.ok()) { if (resp.status().code() == cloud::TABLET_NOT_FOUND) { cloud_tablet()->clear_cache(); } else if (resp.status().code() == cloud::JOB_CHECK_ALTER_VERSION) { - (dynamic_cast(_tablet.get()))->set_alter_version(resp.alter_version()); std::stringstream ss; ss << "failed to prepare cumu compaction. Check compaction input versions " "failed in schema change. " @@ -288,6 +290,7 @@ Status CloudCumulativeCompaction::modify_rowsets() { } return st; } + auto& stats = resp.stats(); LOG(INFO) << "tablet stats=" << stats.ShortDebugString(); { diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index ba56d5c5a0b93e..b2b9ec2531b3fb 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -919,7 +919,10 @@ void process_compaction_job(MetaServiceCode& code, std::string& msg, std::string txn->put(job_key, job_val); INSTANCE_LOG(INFO) << "remove compaction job tabelt_id=" << tablet_id << " key=" << hex(job_key); - + response->set_alter_version(recorded_job.has_schema_change() && + recorded_job.schema_change().has_alter_version() + ? recorded_job.schema_change().alter_version() + : -1); need_commit = true; } @@ -1007,9 +1010,8 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str } // MUST check initiator to let the retried BE commit this schema_change job. - if (request->action() == FinishTabletJobRequest::COMMIT && - (schema_change.id() != recorded_schema_change.id() || - schema_change.initiator() != recorded_schema_change.initiator())) { + if (schema_change.id() != recorded_schema_change.id() || + schema_change.initiator() != recorded_schema_change.initiator()) { SS << "unmatched job id or initiator, recorded_id=" << recorded_schema_change.id() << " given_id=" << schema_change.id() << " recorded_job=" << proto_to_json(recorded_schema_change) @@ -1031,21 +1033,22 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str {instance_id, new_table_id, new_index_id, new_partition_id, new_tablet_id}); std::string new_tablet_job_val; + TabletJobInfoPB new_recorded_job; err = txn->get(new_tablet_job_key, &new_tablet_job_val); - if (err != TxnErrorCode::TXN_OK) { - SS << (err == TxnErrorCode::TXN_KEY_NOT_FOUND ? "job not found," : "internal error,") + if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { + SS << "internal error," << " instance_id=" << instance_id << " tablet_id=" << new_tablet_id << " job=" << proto_to_json(request->job()) << " err=" << err; msg = ss.str(); code = err == TxnErrorCode::TXN_KEY_NOT_FOUND ? MetaServiceCode::INVALID_ARGUMENT : cast_as(err); return; - } - TabletJobInfoPB new_recorded_job; - if (!new_recorded_job.ParseFromString(new_tablet_job_val)) { - code = MetaServiceCode::PROTOBUF_PARSE_ERR; - msg = "malformed new tablet recorded job"; - return; + } else if (err == TxnErrorCode::TXN_OK) { + if (!new_recorded_job.ParseFromString(new_tablet_job_val)) { + code = MetaServiceCode::PROTOBUF_PARSE_ERR; + msg = "malformed new tablet recorded job"; + return; + } } //========================================================================== @@ -1058,11 +1061,13 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str recorded_schema_change.new_tablet_idx().tablet_id()) { // remove schema change recorded_job.clear_schema_change(); - new_recorded_job.clear_schema_change(); auto job_val = recorded_job.SerializeAsString(); - new_tablet_job_val = new_recorded_job.SerializeAsString(); txn->put(job_key, job_val); - txn->put(new_tablet_job_key, new_tablet_job_val); + if (!new_tablet_job_val.empty()) { + new_recorded_job.clear_schema_change(); + new_tablet_job_val = new_recorded_job.SerializeAsString(); + txn->put(new_tablet_job_key, new_tablet_job_val); + } INSTANCE_LOG(INFO) << "remove schema_change job tablet_id=" << tablet_id << " key=" << hex(job_key); @@ -1226,11 +1231,13 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str // remove schema_change job //========================================================================== recorded_job.clear_schema_change(); - new_recorded_job.clear_schema_change(); auto job_val = recorded_job.SerializeAsString(); txn->put(job_key, job_val); - new_tablet_job_val = new_recorded_job.SerializeAsString(); - txn->put(new_tablet_job_key, new_tablet_job_val); + if (!new_tablet_job_val.empty()) { + new_recorded_job.clear_schema_change(); + new_tablet_job_val = new_recorded_job.SerializeAsString(); + txn->put(new_tablet_job_key, new_tablet_job_val); + } INSTANCE_LOG(INFO) << "remove schema_change job tablet_id=" << tablet_id << " key=" << hex(job_key); diff --git a/cloud/test/meta_service_job_test.cpp b/cloud/test/meta_service_job_test.cpp index def9fb11ed8fec..f0323eebb790be 100644 --- a/cloud/test/meta_service_job_test.cpp +++ b/cloud/test/meta_service_job_test.cpp @@ -687,8 +687,11 @@ TEST(MetaServiceJobTest, ProcessSchemaChangeArguments) { recorded_sc->set_id("sc1"); recorded_sc->set_initiator("BE1"); job_val = recorded_job.SerializeAsString(); + auto new_job_key = + job_tablet_key({instance_id, table_id, new_index_id, partition_id, new_tablet_id}); ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK); txn->put(job_key, job_val); + txn->put(new_job_key, job_val); ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK); meta_service->finish_tablet_job(&cntl, &req, &res, nullptr); ASSERT_EQ(res.status().code(), MetaServiceCode::INVALID_ARGUMENT) << res.status().msg(); @@ -2342,12 +2345,12 @@ TEST(MetaServiceJobTest, DoCompactionWhenSC) { StartTabletJobResponse res; start_compaction_job(meta_service.get(), tablet_id, "job1", "BE1", 0, 7, TabletCompactionJobPB::CUMULATIVE, res, {7, 10}); - ASSERT_EQ(res.status().code(), MetaServiceCode::JOB_CHECK_ALTER_VERSION_FAIL); + ASSERT_EQ(res.status().code(), MetaServiceCode::JOB_CHECK_ALTER_VERSION); res.Clear(); start_compaction_job(meta_service.get(), tablet_id, "job1", "BE1", 0, 7, TabletCompactionJobPB::BASE, res, {0, 10}); - ASSERT_EQ(res.status().code(), MetaServiceCode::JOB_CHECK_ALTER_VERSION_FAIL); + ASSERT_EQ(res.status().code(), MetaServiceCode::JOB_CHECK_ALTER_VERSION); res.Clear(); start_compaction_job(meta_service.get(), tablet_id, "job1", "BE1", 0, 7, @@ -2499,7 +2502,8 @@ TEST(MetaServiceJobTest, CancelSC) { FinishTabletJobResponse finish_res; finish_schema_change_job(meta_service.get(), tablet_id, new_tablet_id, "job_sc", "BE1", {}, finish_res, FinishTabletJobRequest::ABORT); - ASSERT_EQ(finish_res.status().code(), MetaServiceCode::OK); + ASSERT_NE(finish_res.status().msg().find("unmatched job id or initiator"), + std::string::npos); } { std::unique_ptr txn; diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java index f36d9b5f370006..5c5dd1972ed323 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java @@ -131,11 +131,11 @@ protected void onCancel() { Long rollupTabletId = tabletEntry.getKey(); Long baseTabletId = tabletEntry.getValue(); ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) - .removeSchemaChangeJob(dbId, tableId, baseIndexId, rollupIndexId, + .removeSchemaChangeJob(dbId, tableId, baseIndexId, rollupIndexId, partitionId, baseTabletId, rollupTabletId); } - LOG.info("Cancel RollupJob. Remove SchemaChangeJob in ms." + - "dbId:{}, tableId:{}, rollupIndexId: {} partitionId:{}. tabletSize:{}", + LOG.info("Cancel RollupJob. Remove SchemaChangeJob in ms." + + "dbId:{}, tableId:{}, rollupIndexId: {} partitionId:{}. tabletSize:{}", dbId, tableId, rollupIndexId, partitionId, rollupTabletIdToBaseTabletId.size()); } break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java index ac80812e5b8420..3d79863addb8ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java @@ -134,11 +134,11 @@ protected void onCancel() { Long shadowTabletId = entry.getKey(); Long originTabletId = entry.getValue(); ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) - .removeSchemaChangeJob(dbId, tableId, originIndexId, shadowIndexId, + .removeSchemaChangeJob(dbId, tableId, originIndexId, shadowIndexId, partitionId, originTabletId, shadowTabletId); } - LOG.info("Cancel SchemaChange. Remove SchemaChangeJob in ms." + - "dbId:{}, tableId:{}, originIndexId:{}, partitionId:{}. tabletSize:{}", + LOG.info("Cancel SchemaChange. Remove SchemaChangeJob in ms." + + "dbId:{}, tableId:{}, originIndexId:{}, partitionId:{}. tabletSize:{}", dbId, tableId, originIndexId, partitionId, shadowTabletIdToOriginTabletId.size()); } break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java index c3243630376b4c..f0c9278562d437 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java @@ -874,8 +874,8 @@ public void removeSchemaChangeJob(long dbId, long tableId, long indexId, long ne newtabletIndexPBBuilder.setTabletId(newTabletId); final Cloud.TabletIndexPB newtabletIndex = newtabletIndexPBBuilder.build(); schemaChangeJobPBBuilder.setNewTabletIdx(newtabletIndex); - final Cloud.TabletSchemaChangeJobPB tabletSchemaChangeJobPb = - schemaChangeJobPBBuilder.build(); + final Cloud.TabletSchemaChangeJobPB tabletSchemaChangeJobPb = + schemaChangeJobPBBuilder.build(); tabletJobInfoPBBuilder.setSchemaChange(tabletSchemaChangeJobPb); diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index 06850f7db3aeaf..f8acd97d05fb38 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -1340,7 +1340,7 @@ enum MetaServiceCode { JOB_ALREADY_SUCCESS = 5002; ROUTINE_LOAD_DATA_INCONSISTENT = 5003; ROUTINE_LOAD_PROGRESS_NOT_FOUND = 5004; - JOB_CHECK_ALTER_VERSION_FAIL = 5005; + JOB_CHECK_ALTER_VERSION = 5005; // Rate limit MAX_QPS_LIMIT = 6001; diff --git a/regression-test/pipeline/cloud_p0/conf/be_custom.conf b/regression-test/pipeline/cloud_p0/conf/be_custom.conf index 9f2967b1972c11..1da9c9992d5f93 100644 --- a/regression-test/pipeline/cloud_p0/conf/be_custom.conf +++ b/regression-test/pipeline/cloud_p0/conf/be_custom.conf @@ -33,3 +33,4 @@ save_load_error_log_to_s3 = true enable_stream_load_record = true stream_load_record_batch_size = 500 webserver_num_workers = 128 +enable_new_tablet_do_compaction = true diff --git a/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy b/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy index 6fc8003527dc02..b393979d44218a 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy @@ -25,6 +25,7 @@ suite('test_schema_change_with_compaction10') { options.cloudMode = true options.enableDebugPoints() options.beConfigs += [ "enable_java_support=false" ] + options.beConfigs += [ "enable_new_tablet_do_compaction=true" ] options.beConfigs += [ "disable_auto_compaction=true" ] options.beNum = 1 docker(options) { diff --git a/regression-test/suites/cloud_p0/schema_change/compaction11/test_schema_change_with_compaction11.groovy b/regression-test/suites/cloud_p0/schema_change/compaction11/test_schema_change_with_compaction11.groovy new file mode 100644 index 00000000000000..fd257fcb7ea950 --- /dev/null +++ b/regression-test/suites/cloud_p0/schema_change/compaction11/test_schema_change_with_compaction11.groovy @@ -0,0 +1,280 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions +import org.apache.http.NoHttpResponseException +import org.apache.doris.regression.util.DebugPoint +import org.apache.doris.regression.util.NodeType + +suite('test_schema_change_with_compaction11') { + def options = new ClusterOptions() + options.cloudMode = true + options.enableDebugPoints() + options.beConfigs += [ "enable_java_support=false" ] + options.beConfigs += [ "enable_new_tablet_do_compaction=false" ] + options.beConfigs += [ "disable_auto_compaction=true" ] + options.beNum = 1 + docker(options) { + def getJobState = { tableName -> + def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + return jobStateResult[0][9] + } + + def s3BucketName = getS3BucketName() + def s3WithProperties = """WITH S3 ( + |"AWS_ACCESS_KEY" = "${getS3AK()}", + |"AWS_SECRET_KEY" = "${getS3SK()}", + |"AWS_ENDPOINT" = "${getS3Endpoint()}", + |"AWS_REGION" = "${getS3Region()}", + |"provider" = "${getS3Provider()}") + |PROPERTIES( + |"exec_mem_limit" = "8589934592", + |"load_parallelism" = "3")""".stripMargin() + + // set fe configuration + sql "ADMIN SET FRONTEND CONFIG ('max_bytes_per_broker_scanner' = '161061273600')" + sql new File("""${context.file.parent}/../ddl/date_delete.sql""").text + def load_date_once = { String table -> + def uniqueID = Math.abs(UUID.randomUUID().hashCode()).toString() + def loadLabel = table + "_" + uniqueID + // load data from cos + def loadSql = new File("""${context.file.parent}/../ddl/${table}_load.sql""").text.replaceAll("\\\$\\{s3BucketName\\}", s3BucketName) + loadSql = loadSql.replaceAll("\\\$\\{loadLabel\\}", loadLabel) + s3WithProperties + sql loadSql + + // check load state + while (true) { + def stateResult = sql "show load where Label = '${loadLabel}'" + def loadState = stateResult[stateResult.size() - 1][2].toString() + if ("CANCELLED".equalsIgnoreCase(loadState)) { + throw new IllegalStateException("load ${loadLabel} failed.") + } else if ("FINISHED".equalsIgnoreCase(loadState)) { + break + } + sleep(5000) + } + } + + sql new File("""${context.file.parent}/../ddl/date_unique_create.sql""").text + def injectName = 'CloudSchemaChangeJob.process_alter_tablet.sleep' + def injectBe = null + def backends = sql_return_maparray('show backends') + def array = sql_return_maparray("SHOW TABLETS FROM date") + def injectBeId = array[0].BackendId + def originTabletId = array[0].TabletId + injectBe = backends.stream().filter(be -> be.BackendId == injectBeId).findFirst().orElse(null) + assertNotNull(injectBe) + + def load_delete_compaction = { + load_date_once("date"); + sql "delete from date where d_datekey < 19900000" + sql "select count(*) from date" + // cu compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + boolean running = true + do { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + try { + load_delete_compaction() + load_delete_compaction() + load_delete_compaction() + + load_date_once("date"); + + sleep(1000) + GetDebugPoint().enableDebugPointForAllBEs(injectName) + sql "ALTER TABLE date MODIFY COLUMN d_holidayfl bigint(11)" + sleep(5000) + array = sql_return_maparray("SHOW TABLETS FROM date") + + for (int i = 0; i < 5; i++) { + load_date_once("date"); + } + + // base compaction + logger.info("run compaction:" + originTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + def newTabletId = array[1].TabletId + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + + // cu compaction + tabletId = array[0].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + running = true + do { + Thread.sleep(100) + tabletId = array[0].TabletId + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + + // new tablet cannot do cu compaction + tabletId = array[1].TabletId + logger.info("run compaction:" + tabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("invalid tablet state.")) + + } finally { + if (injectBe != null) { + GetDebugPoint().disableDebugPointForAllBEs(injectName) + } + int max_try_time = 3000 + while (max_try_time--){ + result = getJobState("date") + if (result == "FINISHED" || result == "CANCELLED") { + sleep(3000) + break + } else { + sleep(100) + if (max_try_time < 1){ + assertEquals(1,2) + } + } + } + assertEquals(result, "FINISHED"); + def count = sql """ select count(*) from date; """ + assertEquals(count[0][0], 2556); + // check rowsets + logger.info("run show:" + originTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, originTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-13]")) + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-2]")) + assertTrue(out.contains("[7-7]")) + assertTrue(out.contains("[8-8]")) + assertTrue(out.contains("[9-9]")) + assertTrue(out.contains("[13-13]")) + + // base compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_base_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + boolean running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + // cu compaction + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + + // wait for all compactions done + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-13]")) + + for (int i = 0; i < 4; i++) { + load_date_once("date"); + } + + sql """ select count(*) from date """ + + logger.info("run compaction:" + newTabletId) + (code, out, err) = be_run_cumulative_compaction(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + + // wait for all compactions done + running = true + while (running) { + Thread.sleep(100) + (code, out, err) = be_get_compaction_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } + + logger.info("run show:" + newTabletId) + (code, out, err) = be_show_tablet_status(injectBe.Host, injectBe.HttpPort, newTabletId) + logger.info("Run show: code=" + code + ", out=" + out + ", err=" + err) + assertTrue(out.contains("[0-1]")) + assertTrue(out.contains("[2-7]")) + assertTrue(out.contains("[8-17]")) + } + } +} diff --git a/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy b/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy index c338dac907b245..f5028ff9e818c3 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy @@ -26,6 +26,7 @@ suite('test_schema_change_with_compaction5', 'nonConcurrent') { options.enableDebugPoints() options.beConfigs += [ "enable_java_support=false" ] options.beConfigs += [ "disable_auto_compaction=true" ] + options.beConfigs += [ "enable_new_tablet_do_compaction=true" ] options.beNum = 1 docker(options) { def getJobState = { tableName -> diff --git a/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy b/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy index 245dbe46b714c6..951535433d1362 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy @@ -26,6 +26,7 @@ suite('test_schema_change_with_compaction6', 'nonConcurrent') { options.enableDebugPoints() options.beConfigs += [ "enable_java_support=false" ] options.beConfigs += [ "disable_auto_compaction=true" ] + options.beConfigs += [ "enable_new_tablet_do_compaction=true" ] options.beNum = 1 docker(options) { def getJobState = { tableName -> diff --git a/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy b/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy index 6cb47e01f4b62c..83c549eefc5abd 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy @@ -26,6 +26,7 @@ suite('test_schema_change_with_compaction9') { options.enableDebugPoints() options.beConfigs += [ "enable_java_support=false" ] options.beConfigs += [ "disable_auto_compaction=true" ] + options.beConfigs += [ "enable_new_tablet_do_compaction=true" ] options.beNum = 1 docker(options) { def getJobState = { tableName -> diff --git a/regression-test/suites/cloud_p0/schema_change/ddl/date_create.sql b/regression-test/suites/cloud_p0/schema_change/ddl/date_create.sql index 8486d7178bbe5d..99c85399c123b2 100644 --- a/regression-test/suites/cloud_p0/schema_change/ddl/date_create.sql +++ b/regression-test/suites/cloud_p0/schema_change/ddl/date_create.sql @@ -19,5 +19,6 @@ CREATE TABLE IF NOT EXISTS `date` ( ) DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1 PROPERTIES ( -"replication_num" = "1" -); \ No newline at end of file +"replication_num" = "1", +"disable_auto_compaction" = "true" +); diff --git a/regression-test/suites/cloud_p0/schema_change/ddl/date_unique_create.sql b/regression-test/suites/cloud_p0/schema_change/ddl/date_unique_create.sql index 0c3005c6e03f77..6138cb213a2063 100644 --- a/regression-test/suites/cloud_p0/schema_change/ddl/date_unique_create.sql +++ b/regression-test/suites/cloud_p0/schema_change/ddl/date_unique_create.sql @@ -22,5 +22,6 @@ DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1 PROPERTIES ( "replication_num" = "1", "enable_unique_key_merge_on_write" = "true", -"enable_mow_light_delete" = "true" -); \ No newline at end of file +"enable_mow_light_delete" = "true", +"disable_auto_compaction" = "true" +);