From db1761ec684d667a5cb8d3ba8946372001f04b6c Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Wed, 26 Oct 2022 11:18:28 +0800 Subject: [PATCH] refactor(log): adjust log level and rename for d*_replica/dup/rocksdb macros (#1202) --- src/meta/duplication/duplication_info.cpp | 11 +- src/meta/duplication/duplication_info.h | 16 +- .../duplication/meta_duplication_service.cpp | 21 +- src/replica/backup/replica_backup_manager.cpp | 16 +- src/replica/bulk_load/replica_bulk_loader.cpp | 266 +++++------ .../duplication/load_from_private_log.cpp | 29 +- src/replica/duplication/mutation_batch.cpp | 14 +- .../duplication/replica_duplicator.cpp | 38 +- .../replica_duplicator_manager.cpp | 10 +- .../duplication/replica_duplicator_manager.h | 4 +- src/replica/duplication/replica_follower.cpp | 64 +-- src/replica/replica.cpp | 12 +- src/replica/replica_2pc.cpp | 29 +- src/replica/replica_backup.cpp | 6 +- src/replica/replica_base.h | 2 + src/replica/replica_check.cpp | 14 +- src/replica/replica_chkpt.cpp | 37 +- src/replica/replica_config.cpp | 26 +- src/replica/replica_disk_migrator.cpp | 128 +++--- src/replica/replica_init.cpp | 2 +- src/replica/replica_learn.cpp | 97 ++-- src/replica/replica_restore.cpp | 33 +- src/replica/replica_throttle.cpp | 10 +- src/replica/replication_app_base.cpp | 10 +- src/replica/split/replica_split_manager.cpp | 387 ++++++++-------- src/replica/split/replica_split_manager.h | 2 +- src/server/hotkey_collector.cpp | 16 +- src/server/logging_utils.h | 14 +- src/server/meta_store.cpp | 2 +- src/server/pegasus_event_listener.cpp | 4 +- src/server/pegasus_manual_compact_service.cpp | 42 +- src/server/pegasus_mutation_duplicator.cpp | 16 +- src/server/pegasus_server_impl.cpp | 413 +++++++++--------- src/server/pegasus_server_impl_init.cpp | 36 +- src/server/pegasus_server_write.cpp | 24 +- src/server/pegasus_write_service_impl.h | 106 ++--- src/server/rocksdb_wrapper.cpp | 66 +-- src/utils/fmt_logging.h | 14 +- 38 files changed, 1023 insertions(+), 1014 deletions(-) diff --git a/src/meta/duplication/duplication_info.cpp b/src/meta/duplication/duplication_info.cpp index ad8e7dcfc3..0a6e62155b 100644 --- a/src/meta/duplication/duplication_info.cpp +++ b/src/meta/duplication/duplication_info.cpp @@ -162,14 +162,13 @@ void duplication_info::persist_status() zauto_write_lock l(_lock); if (!_is_altering) { - derror_dup(this, "callers never write a duplication that is not altering to meta store"); + LOG_ERROR_PREFIX("callers never write a duplication that is not altering to meta store"); return; } - ddebug_dup(this, - "change duplication status from {} to {} successfully [app_id: {}]", - duplication_status_to_string(_status), - duplication_status_to_string(_next_status), - app_id); + LOG_INFO_PREFIX("change duplication status from {} to {} successfully [app_id: {}]", + duplication_status_to_string(_status), + duplication_status_to_string(_next_status), + app_id); _is_altering = false; _status = _next_status; diff --git a/src/meta/duplication/duplication_info.h b/src/meta/duplication/duplication_info.h index dbf007e8ce..e4d2450366 100644 --- a/src/meta/duplication/duplication_info.h +++ b/src/meta/duplication/duplication_info.h @@ -62,7 +62,8 @@ class duplication_info follower_cluster_name(std::move(follower_cluster_name)), follower_cluster_metas(std::move(follower_cluster_metas)), store_path(std::move(meta_store_path)), - create_timestamp_ms(create_now_ms) + create_timestamp_ms(create_now_ms), + prefix_for_log(fmt::format("a{}d{}", app_id, id)) { for (int i = 0; i < partition_count; i++) { _progress[i] = {}; @@ -185,6 +186,8 @@ class duplication_info // To json encoded string. std::string to_string() const; + const char *log_prefix() const { return prefix_for_log.c_str(); } + private: friend class duplication_info_test; friend class meta_duplication_service_test; @@ -237,6 +240,7 @@ class duplication_info const std::vector follower_cluster_metas; const std::string store_path; // store path on meta service = get_duplication_path(app, dupid) const uint64_t create_timestamp_ms{0}; // the time when this dup is created. + const std::string prefix_for_log; }; extern void json_encode(dsn::json::JsonWriter &out, const duplication_status::type &s); @@ -247,15 +251,7 @@ extern void json_encode(dsn::json::JsonWriter &out, const duplication_fail_mode: extern bool json_decode(const dsn::json::JsonObject &in, duplication_fail_mode::type &s); -// Macros for writing log message prefixed by appid and dupid. -#define ddebug_dup(_dup_, ...) \ - LOG_INFO_F("[a{}d{}] {}", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__)); -#define dwarn_dup(_dup_, ...) \ - LOG_WARNING_F("[a{}d{}] {}", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__)); -#define derror_dup(_dup_, ...) \ - LOG_ERROR_F("[a{}d{}] {}", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__)); -#define dfatal_dup(_dup_, ...) \ - LOG_FATAL_F("[a{}d{}] {}", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__)); +// TODO(yingchun): remember to update it when refactor dassert_f #define dassert_dup(_pred_, _dup_, ...) \ dassert_f(_pred_, "[a{}d{}] {}", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__)); diff --git a/src/meta/duplication/meta_duplication_service.cpp b/src/meta/duplication/meta_duplication_service.cpp index 54b0215808..b59d5e21d0 100644 --- a/src/meta/duplication/meta_duplication_service.cpp +++ b/src/meta/duplication/meta_duplication_service.cpp @@ -199,8 +199,8 @@ void meta_duplication_service::do_add_duplication(std::shared_ptr &ap std::queue nodes({get_duplication_path(*app), std::to_string(dup->id)}); _meta_svc->get_meta_storage()->create_node_recursively( std::move(nodes), std::move(value), [app, this, dup, rpc]() mutable { - ddebug_dup(dup, - "add duplication successfully [app_name: {}, follower: {}]", + LOG_INFO_F("[{}] add duplication successfully [app_name: {}, follower: {}]", + dup->log_prefix(), app->app_name, dup->follower_cluster_name); @@ -590,19 +590,20 @@ void meta_duplication_service::do_restore_duplication_progress( int64_t confirmed_decree = invalid_decree; if (!buf2int64(value, confirmed_decree)) { - derror_dup(dup, - "invalid confirmed_decree {} on partition_idx {}", - value.to_string(), - partition_idx); + LOG_ERROR_F("[{}] invalid confirmed_decree {} on partition_idx {}", + dup->log_prefix(), + value.to_string(), + partition_idx); return; // fail fast } dup->init_progress(partition_idx, confirmed_decree); - ddebug_dup(dup, - "initialize progress from metastore [partition_idx: {}, confirmed: {}]", - partition_idx, - confirmed_decree); + LOG_INFO_F( + "[{}] initialize progress from metastore [partition_idx: {}, confirmed: {}]", + dup->log_prefix(), + partition_idx, + confirmed_decree); }); } } diff --git a/src/replica/backup/replica_backup_manager.cpp b/src/replica/backup/replica_backup_manager.cpp index 1573975ced..b2767a5974 100644 --- a/src/replica/backup/replica_backup_manager.cpp +++ b/src/replica/backup/replica_backup_manager.cpp @@ -76,7 +76,7 @@ void replica_backup_manager::on_clear_cold_backup(const backup_clear_request &re if (find != _replica->_cold_backup_contexts.end()) { cold_backup_context_ptr backup_context = find->second; if (backup_context->is_checkpointing()) { - ddebug_replica( + LOG_INFO_PREFIX( "{}: delay clearing obsoleted cold backup context, cause backup_status == " "ColdBackupCheckpointing", backup_context->name); @@ -143,9 +143,9 @@ void replica_backup_manager::collect_backup_info() void replica_backup_manager::background_clear_backup_checkpoint(const std::string &policy_name) { - ddebug_replica("schedule to clear all checkpoint dirs of policy({}) after {} minutes", - policy_name, - _replica->options()->cold_backup_checkpoint_reserve_minutes); + LOG_INFO_PREFIX("schedule to clear all checkpoint dirs of policy({}) after {} minutes", + policy_name, + _replica->options()->cold_backup_checkpoint_reserve_minutes); tasking::enqueue( LPC_BACKGROUND_COLD_BACKUP, &_replica->_tracker, @@ -157,7 +157,7 @@ void replica_backup_manager::background_clear_backup_checkpoint(const std::strin // clear all checkpoint dirs of the policy void replica_backup_manager::clear_backup_checkpoint(const std::string &policy_name) { - ddebug_replica("clear all checkpoint dirs of policy({})", policy_name); + LOG_INFO_PREFIX("clear all checkpoint dirs of policy({})", policy_name); auto backup_dir = _replica->_app->backup_dir(); if (!utils::filesystem::directory_exists(backup_dir)) { return; @@ -166,7 +166,7 @@ void replica_backup_manager::clear_backup_checkpoint(const std::string &policy_n // Find the corresponding checkpoint dirs with policy name std::vector chkpt_dirs; if (!get_policy_checkpoint_dirs(backup_dir, policy_name, chkpt_dirs)) { - dwarn_replica("get checkpoint dirs in backup dir({}) failed", backup_dir); + LOG_WARNING_PREFIX("get checkpoint dirs in backup dir({}) failed", backup_dir); return; } @@ -174,9 +174,9 @@ void replica_backup_manager::clear_backup_checkpoint(const std::string &policy_n for (const std::string &dirname : chkpt_dirs) { std::string full_path = utils::filesystem::path_combine(backup_dir, dirname); if (utils::filesystem::remove_path(full_path)) { - ddebug_replica("remove backup checkpoint dir({}) succeed", full_path); + LOG_INFO_PREFIX("remove backup checkpoint dir({}) succeed", full_path); } else { - dwarn_replica("remove backup checkpoint dir({}) failed", full_path); + LOG_WARNING_PREFIX("remove backup checkpoint dir({}) failed", full_path); } } } diff --git a/src/replica/bulk_load/replica_bulk_loader.cpp b/src/replica/bulk_load/replica_bulk_loader.cpp index 9b16f54bf4..beb8d6fa33 100644 --- a/src/replica/bulk_load/replica_bulk_loader.cpp +++ b/src/replica/bulk_load/replica_bulk_loader.cpp @@ -45,13 +45,14 @@ void replica_bulk_loader::on_bulk_load(const bulk_load_request &request, response.err = ERR_OK; if (status() != partition_status::PS_PRIMARY) { - dwarn_replica("receive bulk load request with wrong status {}", enum_to_string(status())); + LOG_WARNING_PREFIX("receive bulk load request with wrong status {}", + enum_to_string(status())); response.err = ERR_INVALID_STATE; return; } if (request.ballot != get_ballot()) { - dwarn_replica( + LOG_WARNING_PREFIX( "receive bulk load request with wrong version, remote ballot={}, local ballot={}", request.ballot, get_ballot()); @@ -59,15 +60,15 @@ void replica_bulk_loader::on_bulk_load(const bulk_load_request &request, return; } - ddebug_replica("receive bulk load request, remote provider = {}, remote_root_path = {}, " - "cluster_name = {}, app_name = {}, " - "meta_bulk_load_status = {}, local bulk_load_status = {}", - request.remote_provider_name, - request.remote_root_path, - request.cluster_name, - request.app_name, - enum_to_string(request.meta_bulk_load_status), - enum_to_string(_status)); + LOG_INFO_PREFIX("receive bulk load request, remote provider = {}, remote_root_path = {}, " + "cluster_name = {}, app_name = {}, " + "meta_bulk_load_status = {}, local bulk_load_status = {}", + request.remote_provider_name, + request.remote_root_path, + request.cluster_name, + request.app_name, + enum_to_string(request.meta_bulk_load_status), + enum_to_string(_status)); error_code ec = do_bulk_load(request.app_name, request.meta_bulk_load_status, @@ -93,20 +94,20 @@ void replica_bulk_loader::on_bulk_load(const bulk_load_request &request, void replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &meta_req) { if (!_replica->_primary_states.learners.empty()) { - dwarn_replica("has learners, skip broadcast group bulk load request"); + LOG_WARNING_PREFIX("has learners, skip broadcast group bulk load request"); return; } if (!_replica->_primary_states.group_bulk_load_pending_replies.empty()) { - dwarn_replica("{} group bulk_load replies are still pending, cancel it firstly", - _replica->_primary_states.group_bulk_load_pending_replies.size()); + LOG_WARNING_PREFIX("{} group bulk_load replies are still pending, cancel it firstly", + _replica->_primary_states.group_bulk_load_pending_replies.size()); for (auto &kv : _replica->_primary_states.group_bulk_load_pending_replies) { CLEANUP_TASK_ALWAYS(kv.second); } _replica->_primary_states.group_bulk_load_pending_replies.clear(); } - ddebug_replica("start to broadcast group bulk load"); + LOG_INFO_PREFIX("start to broadcast group bulk load"); for (const auto &addr : _replica->_primary_states.membership.secondaries) { if (addr == _stub->_primary_address) @@ -122,7 +123,7 @@ void replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &met request->meta_bulk_load_status = meta_req.meta_bulk_load_status; request->remote_root_path = meta_req.remote_root_path; - ddebug_replica("send group_bulk_load_request to {}", addr.to_string()); + LOG_INFO_PREFIX("send group_bulk_load_request to {}", addr.to_string()); group_bulk_load_rpc rpc( std::move(request), RPC_GROUP_BULK_LOAD, 0_ms, 0, get_gpid().thread_hash()); @@ -143,7 +144,7 @@ void replica_bulk_loader::on_group_bulk_load(const group_bulk_load_request &requ if (request.config.ballot < get_ballot()) { response.err = ERR_VERSION_OUTDATED; - dwarn_replica( + LOG_WARNING_PREFIX( "receive outdated group_bulk_load request, request ballot({}) VS local ballot({})", request.config.ballot, get_ballot()); @@ -151,26 +152,26 @@ void replica_bulk_loader::on_group_bulk_load(const group_bulk_load_request &requ } if (request.config.ballot > get_ballot()) { response.err = ERR_INVALID_STATE; - dwarn_replica("receive group_bulk_load request, local ballot is outdated, request " - "ballot({}) VS local ballot({})", - request.config.ballot, - get_ballot()); + LOG_WARNING_PREFIX("receive group_bulk_load request, local ballot is outdated, request " + "ballot({}) VS local ballot({})", + request.config.ballot, + get_ballot()); return; } if (status() != request.config.status) { response.err = ERR_INVALID_STATE; - dwarn_replica("status changed, status should be {}, but {}", - enum_to_string(request.config.status), - enum_to_string(status())); + LOG_WARNING_PREFIX("status changed, status should be {}, but {}", + enum_to_string(request.config.status), + enum_to_string(status())); return; } - ddebug_replica("receive group_bulk_load request, primary address = {}, ballot = {}, " - "meta bulk_load_status = {}, local bulk_load_status = {}", - request.config.primary.to_string(), - request.config.ballot, - enum_to_string(request.meta_bulk_load_status), - enum_to_string(_status)); + LOG_INFO_PREFIX("receive group_bulk_load request, primary address = {}, ballot = {}, " + "meta bulk_load_status = {}, local bulk_load_status = {}", + request.config.primary.to_string(), + request.config.ballot, + enum_to_string(request.meta_bulk_load_status), + enum_to_string(_status)); error_code ec = do_bulk_load(request.app_name, request.meta_bulk_load_status, @@ -194,36 +195,36 @@ void replica_bulk_loader::on_group_bulk_load_reply(error_code err, _replica->_checker.only_one_thread_access(); if (partition_status::PS_PRIMARY != status()) { - derror_replica("replica status={}, should be {}", - enum_to_string(status()), - enum_to_string(partition_status::PS_PRIMARY)); + LOG_ERROR_PREFIX("replica status={}, should be {}", + enum_to_string(status()), + enum_to_string(partition_status::PS_PRIMARY)); return; } _replica->_primary_states.group_bulk_load_pending_replies.erase(req.target_address); if (err != ERR_OK) { - derror_replica("failed to receive group_bulk_load_reply from {}, error = {}", - req.target_address.to_string(), - err.to_string()); + LOG_ERROR_PREFIX("failed to receive group_bulk_load_reply from {}, error = {}", + req.target_address.to_string(), + err.to_string()); _replica->_primary_states.reset_node_bulk_load_states(req.target_address); return; } if (resp.err != ERR_OK) { - derror_replica("receive group_bulk_load response from {} failed, error = {}", - req.target_address.to_string(), - resp.err.to_string()); + LOG_ERROR_PREFIX("receive group_bulk_load response from {} failed, error = {}", + req.target_address.to_string(), + resp.err.to_string()); _replica->_primary_states.reset_node_bulk_load_states(req.target_address); return; } if (req.config.ballot != get_ballot()) { - derror_replica("recevied wrong group_bulk_load response from {}, request ballot = {}, " - "current ballot = {}", - req.target_address.to_string(), - req.config.ballot, - get_ballot()); + LOG_ERROR_PREFIX("recevied wrong group_bulk_load response from {}, request ballot = {}, " + "current ballot = {}", + req.target_address.to_string(), + req.config.ballot, + get_ballot()); _replica->_primary_states.reset_node_bulk_load_states(req.target_address); return; } @@ -245,9 +246,9 @@ error_code replica_bulk_loader::do_bulk_load(const std::string &app_name, bulk_load_status::type local_status = _status; error_code ec = validate_status(meta_status, local_status); if (ec != ERR_OK) { - derror_replica("invalid bulk load status, remote = {}, local = {}", - enum_to_string(meta_status), - enum_to_string(local_status)); + LOG_ERROR_PREFIX("invalid bulk load status, remote = {}, local = {}", + enum_to_string(meta_status), + enum_to_string(local_status)); return ec; } @@ -352,9 +353,9 @@ error_code replica_bulk_loader::start_download(const std::string &remote_dir, { if (_stub->_bulk_load_downloading_count.load() >= _stub->_max_concurrent_bulk_load_downloading_count) { - dwarn_replica("node[{}] already has {} replica downloading, wait for next round", - _stub->_primary_address_str, - _stub->_bulk_load_downloading_count.load()); + LOG_WARNING_PREFIX("node[{}] already has {} replica downloading, wait for next round", + _stub->_primary_address_str, + _stub->_bulk_load_downloading_count.load()); return ERR_BUSY; } @@ -371,22 +372,22 @@ error_code replica_bulk_loader::start_download(const std::string &remote_dir, _status = bulk_load_status::BLS_DOWNLOADING; ++_stub->_bulk_load_downloading_count; - ddebug_replica("node[{}] has {} replica executing downloading", - _stub->_primary_address_str, - _stub->_bulk_load_downloading_count.load()); + LOG_INFO_PREFIX("node[{}] has {} replica executing downloading", + _stub->_primary_address_str, + _stub->_bulk_load_downloading_count.load()); _bulk_load_start_time_ms = dsn_now_ms(); _stub->_counter_bulk_load_downloading_count->increment(); // create local bulk load dir if (!utils::filesystem::directory_exists(_replica->_dir)) { - derror_replica("_dir({}) is not existed", _replica->_dir); + LOG_ERROR_PREFIX("_dir({}) is not existed", _replica->_dir); return ERR_FILE_OPERATION_FAILED; } const std::string local_dir = utils::filesystem::path_combine( _replica->_dir, bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR); if (!utils::filesystem::directory_exists(local_dir) && !utils::filesystem::create_directory(local_dir)) { - derror_replica("create bulk_load_dir({}) failed", local_dir); + LOG_ERROR_PREFIX("create bulk_load_dir({}) failed", local_dir); return ERR_FILE_OPERATION_FAILED; } @@ -407,7 +408,7 @@ void replica_bulk_loader::download_files(const std::string &provider_name, { FAIL_POINT_INJECT_F("replica_bulk_loader_download_files", [](string_view) {}); - ddebug_replica("start to download files"); + LOG_INFO_PREFIX("start to download files"); dist::block_service::block_filesystem *fs = _stub->_block_service_manager.get_or_create_block_filesystem(provider_name); @@ -420,7 +421,8 @@ void replica_bulk_loader::download_files(const std::string &provider_name, if (err != ERR_OK && err != ERR_PATH_ALREADY_EXIST) { try_decrease_bulk_load_download_count(); _download_status.store(err); - derror_replica("download bulk load metadata file failed, error = {}", err.to_string()); + LOG_ERROR_PREFIX("download bulk load metadata file failed, error = {}", + err.to_string()); return; } @@ -431,7 +433,7 @@ void replica_bulk_loader::download_files(const std::string &provider_name, if (err != ERR_OK) { try_decrease_bulk_load_download_count(); _download_status.store(err); - derror_replica("parse bulk load metadata failed, error = {}", err.to_string()); + LOG_ERROR_PREFIX("parse bulk load metadata failed, error = {}", err.to_string()); return; } } @@ -469,11 +471,11 @@ void replica_bulk_loader::download_sst_file(const std::string &remote_dir, f_size = f_meta.size; verified = true; } else { - derror_replica("file({}) exists, but not verified, try to remove local file " - "and redownload it", - file_name); + LOG_ERROR_PREFIX("file({}) exists, but not verified, try to remove local file " + "and redownload it", + file_name); if (!utils::filesystem::remove_path(file_name)) { - derror_replica("failed to remove file({})", file_name); + LOG_ERROR_PREFIX("failed to remove file({})", file_name); ec = ERR_FILE_OPERATION_FAILED; } else { ec = _stub->_block_service_manager.download_file( @@ -496,7 +498,7 @@ void replica_bulk_loader::download_sst_file(const std::string &remote_dir, try_decrease_bulk_load_download_count(); _download_status.store(ec); } - derror_replica("failed to download file({}), error = {}", f_meta.name, ec.to_string()); + LOG_ERROR_PREFIX("failed to download file({}), error = {}", f_meta.name, ec.to_string()); _stub->_counter_bulk_load_download_file_fail_count->increment(); return; } @@ -527,19 +529,19 @@ error_code replica_bulk_loader::parse_bulk_load_metadata(const std::string &fnam std::string buf; error_code ec = utils::filesystem::read_file(fname, buf); if (ec != ERR_OK) { - derror_replica("read file {} failed, error = {}", fname, ec); + LOG_ERROR_PREFIX("read file {} failed, error = {}", fname, ec); return ec; } blob bb = blob::create_from_bytes(std::move(buf)); if (!json::json_forwarder::decode(bb, _metadata)) { - derror_replica("file({}) is damaged", fname); + LOG_ERROR_PREFIX("file({}) is damaged", fname); return ERR_CORRUPTION; } if (_metadata.file_total_size <= 0) { - derror_replica("bulk_load_metadata has invalid file_total_size({})", - _metadata.file_total_size); + LOG_ERROR_PREFIX("bulk_load_metadata has invalid file_total_size({})", + _metadata.file_total_size); return ERR_CORRUPTION; } @@ -553,24 +555,24 @@ void replica_bulk_loader::update_bulk_load_download_progress(uint64_t file_size, { zauto_write_lock l(_lock); if (_metadata.file_total_size <= 0) { - derror_replica("update downloading file({}) progress failed, metadata has invalid " - "file_total_size({}), current status = {}", - file_name, - _metadata.file_total_size, - enum_to_string(_status)); + LOG_ERROR_PREFIX("update downloading file({}) progress failed, metadata has invalid " + "file_total_size({}), current status = {}", + file_name, + _metadata.file_total_size, + enum_to_string(_status)); return; } - ddebug_replica("update progress after downloading file({})", file_name); + LOG_INFO_PREFIX("update progress after downloading file({})", file_name); _cur_downloaded_size.fetch_add(file_size); auto total_size = static_cast(_metadata.file_total_size); auto cur_downloaded_size = static_cast(_cur_downloaded_size.load()); auto cur_progress = static_cast((cur_downloaded_size / total_size) * 100); _download_progress.store(cur_progress); - ddebug_replica("total_size = {}, cur_downloaded_size = {}, progress = {}", - total_size, - cur_downloaded_size, - cur_progress); + LOG_INFO_PREFIX("total_size = {}, cur_downloaded_size = {}, progress = {}", + total_size, + cur_downloaded_size, + cur_progress); } tasking::enqueue(LPC_REPLICATION_COMMON, @@ -588,9 +590,9 @@ void replica_bulk_loader::try_decrease_bulk_load_download_count() } --_stub->_bulk_load_downloading_count; _is_downloading.store(false); - ddebug_replica("node[{}] has {} replica executing downloading", - _stub->_primary_address_str, - _stub->_bulk_load_downloading_count.load()); + LOG_INFO_PREFIX("node[{}] has {} replica executing downloading", + _stub->_primary_address_str, + _stub->_bulk_load_downloading_count.load()); } // ThreadPool: THREAD_POOL_REPLICATION @@ -598,7 +600,7 @@ void replica_bulk_loader::check_download_finish() { if (_download_progress.load() == bulk_load_constant::PROGRESS_FINISHED && _status == bulk_load_status::BLS_DOWNLOADING) { - ddebug_replica("download all files succeed"); + LOG_INFO_PREFIX("download all files succeed"); _status = bulk_load_status::BLS_DOWNLOADED; { zauto_write_lock l(_lock); @@ -659,7 +661,7 @@ void replica_bulk_loader::handle_bulk_load_succeed() void replica_bulk_loader::handle_bulk_load_finish(bulk_load_status::type new_status) { if (is_cleaned_up()) { - ddebug_replica("bulk load states have been cleaned up"); + LOG_INFO_PREFIX("bulk load states have been cleaned up"); return; } @@ -669,9 +671,9 @@ void replica_bulk_loader::handle_bulk_load_finish(bulk_load_status::type new_sta } } - ddebug_replica("bulk load finished, old_status = {}, new_status = {}", - enum_to_string(_status), - enum_to_string(new_status)); + LOG_INFO_PREFIX("bulk load finished, old_status = {}, new_status = {}", + enum_to_string(_status), + enum_to_string(new_status)); // remove local bulk load dir std::string bulk_load_dir = utils::filesystem::path_combine( @@ -695,15 +697,15 @@ void replica_bulk_loader::remove_local_bulk_load_dir(const std::string &bulk_loa std::to_string(dsn_now_ms()), kFolderSuffixGar); if (!utils::filesystem::rename_path(bulk_load_dir, garbage_dir)) { - derror_replica("rename bulk_load dir({}) failed.", bulk_load_dir); + LOG_ERROR_PREFIX("rename bulk_load dir({}) failed.", bulk_load_dir); return; } if (!utils::filesystem::remove_path(garbage_dir)) { - derror_replica( + LOG_ERROR_PREFIX( "remove bulk_load gar dir({}) failed, disk cleaner would retry to remove it.", garbage_dir); } - ddebug_replica("remove bulk_load dir({}) succeed.", garbage_dir); + LOG_INFO_PREFIX("remove bulk_load dir({}) succeed.", garbage_dir); } // ThreadPool: THREAD_POOL_REPLICATION @@ -782,7 +784,7 @@ bool replica_bulk_loader::is_cleaned_up() void replica_bulk_loader::pause_bulk_load() { if (_status == bulk_load_status::BLS_PAUSED) { - ddebug_replica("bulk load has been paused"); + LOG_INFO_PREFIX("bulk load has been paused"); return; } if (_status == bulk_load_status::BLS_DOWNLOADING) { @@ -791,7 +793,7 @@ void replica_bulk_loader::pause_bulk_load() try_decrease_bulk_load_download_count(); } _status = bulk_load_status::BLS_PAUSED; - ddebug_replica("bulk load is paused"); + LOG_INFO_PREFIX("bulk load is paused"); } // ThreadPool: THREAD_POOL_REPLICATION @@ -838,9 +840,9 @@ void replica_bulk_loader::report_bulk_load_states_to_meta(bulk_load_status::type void replica_bulk_loader::report_group_download_progress(/*out*/ bulk_load_response &response) { if (status() != partition_status::PS_PRIMARY) { - dwarn_replica("replica status={}, should be {}", - enum_to_string(status()), - enum_to_string(partition_status::PS_PRIMARY)); + LOG_WARNING_PREFIX("replica status={}, should be {}", + enum_to_string(status()), + enum_to_string(partition_status::PS_PRIMARY)); response.err = ERR_INVALID_STATE; return; } @@ -852,10 +854,10 @@ void replica_bulk_loader::report_group_download_progress(/*out*/ bulk_load_respo primary_state.__set_download_status(_download_status.load()); } response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - ddebug_replica("primary = {}, download progress = {}%, status = {}", - _replica->_primary_states.membership.primary.to_string(), - primary_state.download_progress, - primary_state.download_status); + LOG_INFO_PREFIX("primary = {}, download progress = {}%, status = {}", + _replica->_primary_states.membership.primary.to_string(), + primary_state.download_progress, + primary_state.download_status); int32_t total_progress = primary_state.download_progress; for (const auto &target_address : _replica->_primary_states.membership.secondaries) { @@ -865,16 +867,16 @@ void replica_bulk_loader::report_group_download_progress(/*out*/ bulk_load_respo secondary_state.__isset.download_progress ? secondary_state.download_progress : 0; error_code s_status = secondary_state.__isset.download_status ? secondary_state.download_status : ERR_OK; - ddebug_replica("secondary = {}, download progress = {}%, status={}", - target_address.to_string(), - s_progress, - s_status); + LOG_INFO_PREFIX("secondary = {}, download progress = {}%, status={}", + target_address.to_string(), + s_progress, + s_status); response.group_bulk_load_state[target_address] = secondary_state; total_progress += s_progress; } total_progress /= _replica->_primary_states.membership.max_replica_count; - ddebug_replica("total download progress = {}%", total_progress); + LOG_INFO_PREFIX("total download progress = {}%", total_progress); response.__set_total_download_progress(total_progress); } @@ -882,9 +884,9 @@ void replica_bulk_loader::report_group_download_progress(/*out*/ bulk_load_respo void replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_response &response) { if (status() != partition_status::PS_PRIMARY) { - dwarn_replica("replica status={}, should be {}", - enum_to_string(status()), - enum_to_string(partition_status::PS_PRIMARY)); + LOG_WARNING_PREFIX("replica status={}, should be {}", + enum_to_string(status()), + enum_to_string(partition_status::PS_PRIMARY)); response.err = ERR_INVALID_STATE; return; } @@ -892,9 +894,9 @@ void replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_respon partition_bulk_load_state primary_state; primary_state.__set_ingest_status(_replica->_app->get_ingestion_status()); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - ddebug_replica("primary = {}, ingestion status = {}", - _replica->_primary_states.membership.primary.to_string(), - enum_to_string(primary_state.ingest_status)); + LOG_INFO_PREFIX("primary = {}, ingestion status = {}", + _replica->_primary_states.membership.primary.to_string(), + enum_to_string(primary_state.ingest_status)); bool is_group_ingestion_finish = (primary_state.ingest_status == ingestion_status::IS_SUCCEED) && @@ -906,9 +908,9 @@ void replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_respon ingestion_status::type ingest_status = secondary_state.__isset.ingest_status ? secondary_state.ingest_status : ingestion_status::IS_INVALID; - ddebug_replica("secondary = {}, ingestion status={}", - target_address.to_string(), - enum_to_string(ingest_status)); + LOG_INFO_PREFIX("secondary = {}, ingestion status={}", + target_address.to_string(), + enum_to_string(ingest_status)); response.group_bulk_load_state[target_address] = secondary_state; is_group_ingestion_finish &= (ingest_status == ingestion_status::IS_SUCCEED); } @@ -916,7 +918,7 @@ void replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_respon // if group ingestion finish, recover wirte immediately if (is_group_ingestion_finish) { - ddebug_replica("finish ingestion, recover write"); + LOG_INFO_PREFIX("finish ingestion, recover write"); _replica->_is_bulk_load_ingestion = false; _replica->_bulk_load_ingestion_start_time_ms = 0; } @@ -926,9 +928,9 @@ void replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_respon void replica_bulk_loader::report_group_cleaned_up(bulk_load_response &response) { if (status() != partition_status::PS_PRIMARY) { - dwarn_replica("replica status={}, should be {}", - enum_to_string(status()), - enum_to_string(partition_status::PS_PRIMARY)); + LOG_WARNING_PREFIX("replica status={}, should be {}", + enum_to_string(status()), + enum_to_string(partition_status::PS_PRIMARY)); response.err = ERR_INVALID_STATE; return; } @@ -936,9 +938,9 @@ void replica_bulk_loader::report_group_cleaned_up(bulk_load_response &response) partition_bulk_load_state primary_state; primary_state.__set_is_cleaned_up(is_cleaned_up()); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - ddebug_replica("primary = {}, bulk load states cleaned_up = {}", - _replica->_primary_states.membership.primary.to_string(), - primary_state.is_cleaned_up); + LOG_INFO_PREFIX("primary = {}, bulk load states cleaned_up = {}", + _replica->_primary_states.membership.primary.to_string(), + primary_state.is_cleaned_up); bool group_flag = (primary_state.is_cleaned_up) && (_replica->_primary_states.membership.secondaries.size() + 1 == @@ -948,13 +950,13 @@ void replica_bulk_loader::report_group_cleaned_up(bulk_load_response &response) _replica->_primary_states.secondary_bulk_load_states[target_address]; bool is_cleaned_up = secondary_state.__isset.is_cleaned_up ? secondary_state.is_cleaned_up : false; - ddebug_replica("secondary = {}, bulk load states cleaned_up = {}", - target_address.to_string(), - is_cleaned_up); + LOG_INFO_PREFIX("secondary = {}, bulk load states cleaned_up = {}", + target_address.to_string(), + is_cleaned_up); response.group_bulk_load_state[target_address] = secondary_state; group_flag &= is_cleaned_up; } - ddebug_replica("group bulk load states cleaned_up = {}", group_flag); + LOG_INFO_PREFIX("group bulk load states cleaned_up = {}", group_flag); response.__set_is_group_bulk_load_context_cleaned_up(group_flag); } @@ -962,9 +964,9 @@ void replica_bulk_loader::report_group_cleaned_up(bulk_load_response &response) void replica_bulk_loader::report_group_is_paused(bulk_load_response &response) { if (status() != partition_status::PS_PRIMARY) { - dwarn_replica("replica status={}, should be {}", - enum_to_string(status()), - enum_to_string(partition_status::PS_PRIMARY)); + LOG_WARNING_PREFIX("replica status={}, should be {}", + enum_to_string(status()), + enum_to_string(partition_status::PS_PRIMARY)); response.err = ERR_INVALID_STATE; return; } @@ -972,9 +974,9 @@ void replica_bulk_loader::report_group_is_paused(bulk_load_response &response) partition_bulk_load_state primary_state; primary_state.__set_is_paused(_status == bulk_load_status::BLS_PAUSED); response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state; - ddebug_replica("primary = {}, bulk_load is_paused = {}", - _replica->_primary_states.membership.primary.to_string(), - primary_state.is_paused); + LOG_INFO_PREFIX("primary = {}, bulk_load is_paused = {}", + _replica->_primary_states.membership.primary.to_string(), + primary_state.is_paused); bool group_is_paused = primary_state.is_paused && (_replica->_primary_states.membership.secondaries.size() + 1 == @@ -983,12 +985,12 @@ void replica_bulk_loader::report_group_is_paused(bulk_load_response &response) partition_bulk_load_state secondary_state = _replica->_primary_states.secondary_bulk_load_states[target_address]; bool is_paused = secondary_state.__isset.is_paused ? secondary_state.is_paused : false; - ddebug_replica( + LOG_INFO_PREFIX( "secondary = {}, bulk_load is_paused = {}", target_address.to_string(), is_paused); response.group_bulk_load_state[target_address] = secondary_state; group_is_paused &= is_paused; } - ddebug_replica("group bulk load is_paused = {}", group_is_paused); + LOG_INFO_PREFIX("group bulk load is_paused = {}", group_is_paused); response.__set_is_group_bulk_load_paused(group_is_paused); } @@ -1041,8 +1043,8 @@ void replica_bulk_loader::clear_bulk_load_states_if_needed(partition_status::typ _status == bulk_load_status::BLS_FAILED || _status == bulk_load_status::BLS_INVALID) { return; } - ddebug_replica("prepare to clear bulk load states, current status = {}", - enum_to_string(_status)); + LOG_INFO_PREFIX("prepare to clear bulk load states, current status = {}", + enum_to_string(_status)); clear_bulk_load_states(); } } diff --git a/src/replica/duplication/load_from_private_log.cpp b/src/replica/duplication/load_from_private_log.cpp index cd66b41fc4..1512b2de02 100644 --- a/src/replica/duplication/load_from_private_log.cpp +++ b/src/replica/duplication/load_from_private_log.cpp @@ -52,7 +52,7 @@ bool load_from_private_log::switch_to_next_log_file() log_file_ptr file; error_s es = log_utils::open_read(next_file_it->second->path(), file); if (!es.is_ok()) { - derror_replica("{}", es); + LOG_ERROR_PREFIX("{}", es); _current = nullptr; return false; } @@ -76,11 +76,12 @@ void load_from_private_log::run() // valid status if (_mutation_batch.last_decree() == invalid_decree) { if (_duplicator->progress().confirmed_decree == invalid_decree) { - dwarn_replica("duplication status hasn't been sync completed, try next for delay 1s, " - "last_commit_decree={}, " - "confirmed_decree={}", - _duplicator->progress().last_decree, - _duplicator->progress().confirmed_decree); + LOG_WARNING_PREFIX( + "duplication status hasn't been sync completed, try next for delay 1s, " + "last_commit_decree={}, " + "confirmed_decree={}", + _duplicator->progress().last_decree, + _duplicator->progress().confirmed_decree); repeat(1_s); FAIL_POINT_INJECT_NOT_RETURN_F("duplication_sync_complete", [&](string_view s) -> void { @@ -99,7 +100,7 @@ void load_from_private_log::run() if (_current == nullptr) { find_log_file_to_start(); if (_current == nullptr) { - ddebug_replica("no private log file is currently available"); + LOG_INFO_PREFIX("no private log file is currently available"); repeat(_repeat_delay); return; } @@ -120,7 +121,7 @@ void load_from_private_log::find_log_file_to_start() log_file_ptr file; error_s es = log_utils::open_read(pr.second->path(), file); if (!es.is_ok()) { - derror_replica("{}", es); + LOG_ERROR_PREFIX("{}", es); return; } new_file_map.emplace(pr.first, file); @@ -133,7 +134,7 @@ void load_from_private_log::find_log_file_to_start(std::map l { _current = nullptr; if (dsn_unlikely(log_file_map.empty())) { - derror_replica("unable to start duplication since no log file is available"); + LOG_ERROR_PREFIX("unable to start duplication since no log file is available"); return; } @@ -179,7 +180,7 @@ void load_from_private_log::replay_log_block() // 2. fail-slow, retry reading this file until human interference. _err_block_repeats_num++; if (_err_block_repeats_num >= MAX_ALLOWED_BLOCK_REPEATS) { - derror_replica( + LOG_ERROR_PREFIX( "loading mutation logs failed for {} times: [err: {}, file: {}, start_offset: {}]", _err_block_repeats_num, err, @@ -189,9 +190,9 @@ void load_from_private_log::replay_log_block() _err_file_repeats_num++; if (dsn_unlikely(will_fail_skip())) { // skip this file - derror_replica("failed loading for {} times, abandon file {} and try next", - _err_file_repeats_num, - _current->path()); + LOG_ERROR_PREFIX("failed loading for {} times, abandon file {} and try next", + _err_file_repeats_num, + _current->path()); _err_file_repeats_num = 0; auto prev_offset = _current_global_end_offset; @@ -271,7 +272,7 @@ void load_from_private_log::set_start_decree(decree start_decree) void load_from_private_log::start_from_log_file(log_file_ptr f) { - ddebug_replica("start loading from log file {}", f->path()); + LOG_INFO_PREFIX("start loading from log file {}", f->path()); _current = std::move(f); _start_offset = 0; diff --git a/src/replica/duplication/mutation_batch.cpp b/src/replica/duplication/mutation_batch.cpp index a9df9e49a8..4a85a18b53 100644 --- a/src/replica/duplication/mutation_batch.cpp +++ b/src/replica/duplication/mutation_batch.cpp @@ -64,13 +64,13 @@ void mutation_buffer::commit(decree d, commit_type ct) // duplication // or ignore the loss. if (next_committed_mutation == nullptr || !next_committed_mutation->is_logged()) { - derror_replica("mutation[{}] is lost in prepare_list: " - "prepare_last_committed_decree={}, prepare_min_decree={}, " - "prepare_max_decree={}", - d0, - last_committed_decree(), - min_decree(), - max_decree()); + LOG_ERROR_PREFIX("mutation[{}] is lost in prepare_list: " + "prepare_last_committed_decree={}, prepare_min_decree={}, " + "prepare_max_decree={}", + d0, + last_committed_decree(), + min_decree(), + max_decree()); _counter_dulication_mutation_loss_count->set(min_decree() - last_committed_decree()); // if next_commit_mutation loss, let last_commit_decree catch up with min_decree, and // the next loop will commit from min_decree diff --git a/src/replica/duplication/replica_duplicator.cpp b/src/replica/duplication/replica_duplicator.cpp index d0b1eea67c..ca8fe819d9 100644 --- a/src/replica/duplication/replica_duplicator.cpp +++ b/src/replica/duplication/replica_duplicator.cpp @@ -45,10 +45,10 @@ replica_duplicator::replica_duplicator(const duplication_entry &ent, replica *r) } else { _progress.last_decree = _progress.confirmed_decree = it->second; } - ddebug_replica("initialize replica_duplicator[{}] [dupid:{}, meta_confirmed_decree:{}]", - duplication_status_to_string(_status), - id(), - it->second); + LOG_INFO_PREFIX("initialize replica_duplicator[{}] [dupid:{}, meta_confirmed_decree:{}]", + duplication_status_to_string(_status), + id(), + it->second); thread_pool(LPC_REPLICATION_LOW).task_tracker(tracker()).thread_hash(get_gpid().thread_hash()); if (_status == duplication_status::DS_PREPARE) { @@ -60,11 +60,11 @@ replica_duplicator::replica_duplicator(const duplication_entry &ent, replica *r) void replica_duplicator::prepare_dup() { - ddebug_replica("start prepare checkpoint to catch up with latest durable decree: " - "start_point_decree({}) < last_durable_decree({}) = {}", - _start_point_decree, - _replica->last_durable_decree(), - _start_point_decree < _replica->last_durable_decree()); + LOG_INFO_PREFIX("start prepare checkpoint to catch up with latest durable decree: " + "start_point_decree({}) < last_durable_decree({}) = {}", + _start_point_decree, + _replica->last_durable_decree(), + _start_point_decree < _replica->last_durable_decree()); tasking::enqueue( LPC_REPLICATION_COMMON, @@ -75,10 +75,10 @@ void replica_duplicator::prepare_dup() void replica_duplicator::start_dup_log() { - ddebug_replica("starting duplication {} [last_decree: {}, confirmed_decree: {}]", - to_string(), - _progress.last_decree, - _progress.confirmed_decree); + LOG_INFO_PREFIX("starting duplication {} [last_decree: {}, confirmed_decree: {}]", + to_string(), + _progress.last_decree, + _progress.confirmed_decree); /// ===== pipeline declaration ===== /// @@ -95,7 +95,7 @@ void replica_duplicator::start_dup_log() void replica_duplicator::pause_dup_log() { - ddebug_replica("pausing duplication: {}", to_string()); + LOG_INFO_PREFIX("pausing duplication: {}", to_string()); pause(); cancel_all(); @@ -104,7 +104,7 @@ void replica_duplicator::pause_dup_log() _ship.reset(); _load_private.reset(); - ddebug_replica("duplication paused: {}", to_string()); + LOG_INFO_PREFIX("duplication paused: {}", to_string()); } std::string replica_duplicator::to_string() const @@ -131,8 +131,8 @@ std::string replica_duplicator::to_string() const void replica_duplicator::update_status_if_needed(duplication_status::type next_status) { if (is_duplication_status_invalid(next_status)) { - derror_replica("unexpected duplication status ({})", - duplication_status_to_string(next_status)); + LOG_ERROR_PREFIX("unexpected duplication status ({})", + duplication_status_to_string(next_status)); return; } @@ -142,7 +142,7 @@ void replica_duplicator::update_status_if_needed(duplication_status::type next_s return; } - ddebug_replica( + LOG_INFO_PREFIX( "update duplication status: {}=>{}[start_point={}, last_commit={}, last_durable={}]", duplication_status_to_string(_status), duplication_status_to_string(next_status), @@ -177,7 +177,7 @@ replica_duplicator::~replica_duplicator() { pause(); cancel_all(); - ddebug_replica("closing duplication {}", to_string()); + LOG_INFO_PREFIX("closing duplication {}", to_string()); } error_s replica_duplicator::update_progress(const duplication_progress &p) diff --git a/src/replica/duplication/replica_duplicator_manager.cpp b/src/replica/duplication/replica_duplicator_manager.cpp index 5310dc7a72..c261cb0848 100644 --- a/src/replica/duplication/replica_duplicator_manager.cpp +++ b/src/replica/duplication/replica_duplicator_manager.cpp @@ -35,9 +35,9 @@ replica_duplicator_manager::get_duplication_confirms_to_update() const if (p.last_decree != p.confirmed_decree || (kv.second->status() == duplication_status::DS_PREPARE && p.checkpoint_has_prepared)) { if (p.last_decree < p.confirmed_decree) { - derror_replica("invalid decree state: p.last_decree({}) < p.confirmed_decree({})", - p.last_decree, - p.confirmed_decree); + LOG_ERROR_PREFIX("invalid decree state: p.last_decree({}) < p.confirmed_decree({})", + p.last_decree, + p.confirmed_decree); continue; } duplication_confirm_entry entry; @@ -69,8 +69,8 @@ void replica_duplicator_manager::sync_duplication(const duplication_entry &ent) if (!is_duplication_status_invalid(next_status)) { dup = make_unique(ent, _replica); } else { - derror_replica("illegal duplication status: {}", - duplication_status_to_string(next_status)); + LOG_ERROR_PREFIX("illegal duplication status: {}", + duplication_status_to_string(next_status)); } } else { // update progress diff --git a/src/replica/duplication/replica_duplicator_manager.h b/src/replica/duplication/replica_duplicator_manager.h index b47bf8f8e5..13de3f671a 100644 --- a/src/replica/duplication/replica_duplicator_manager.h +++ b/src/replica/duplication/replica_duplicator_manager.h @@ -100,8 +100,8 @@ class replica_duplicator_manager : public replica_base if (_duplications.empty()) return; - dwarn_replica("remove all duplication, replica status = {}", - enum_to_string(_replica->status())); + LOG_WARNING_PREFIX("remove all duplication, replica status = {}", + enum_to_string(_replica->status())); _duplications.clear(); } diff --git a/src/replica/duplication/replica_follower.cpp b/src/replica/duplication/replica_follower.cpp index aa9c30738e..cedc842e01 100644 --- a/src/replica/duplication/replica_follower.cpp +++ b/src/replica/duplication/replica_follower.cpp @@ -68,11 +68,11 @@ error_code replica_follower::duplicate_checkpoint() { zauto_lock l(_lock); if (_duplicating_checkpoint) { - dwarn_replica("duplicate master[{}] checkpoint is running", master_replica_name()); + LOG_WARNING_PREFIX("duplicate master[{}] checkpoint is running", master_replica_name()); return ERR_BUSY; } - ddebug_replica("start duplicate master[{}] checkpoint", master_replica_name()); + LOG_INFO_PREFIX("start duplicate master[{}] checkpoint", master_replica_name()); _duplicating_checkpoint = true; tasking::enqueue(LPC_DUPLICATE_CHECKPOINT, &_tracker, [=]() mutable { async_duplicate_checkpoint_from_master_replica(); @@ -98,7 +98,7 @@ void replica_follower::async_duplicate_checkpoint_from_master_replica() // just fetch the same partition config meta_config_request.partition_indices = {get_gpid().get_partition_index()}; - ddebug_replica("query master[{}] replica configuration", master_replica_name()); + LOG_INFO_PREFIX("query master[{}] replica configuration", master_replica_name()); dsn::message_ex *msg = dsn::message_ex::create_request( RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, 0, get_gpid().thread_hash()); dsn::marshall(msg, meta_config_request); @@ -126,43 +126,43 @@ replica_follower::update_master_replica_config(error_code err, { error_code err_code = err != ERR_OK ? err : resp.err; if (dsn_unlikely(err_code != ERR_OK)) { - derror_replica( + LOG_ERROR_PREFIX( "query master[{}] config failed: {}", master_replica_name(), err_code.to_string()); return err_code; } if (dsn_unlikely(resp.partition_count != _replica->get_app_info()->partition_count)) { - derror_replica("master[{}] partition count is inconsistent: local = {} vs master = {}", - master_replica_name(), - _replica->get_app_info()->partition_count, - resp.partition_count); + LOG_ERROR_PREFIX("master[{}] partition count is inconsistent: local = {} vs master = {}", + master_replica_name(), + _replica->get_app_info()->partition_count, + resp.partition_count); return ERR_INCONSISTENT_STATE; } if (dsn_unlikely(resp.partitions.size() != 1)) { - derror_replica("master[{}] config size must be single, but actually is {}", - master_replica_name(), - resp.partitions.size()); + LOG_ERROR_PREFIX("master[{}] config size must be single, but actually is {}", + master_replica_name(), + resp.partitions.size()); return ERR_INVALID_DATA; } if (dsn_unlikely(resp.partitions[0].pid.get_partition_index() != get_gpid().get_partition_index())) { - derror_replica("master[{}] partition index is inconsistent: local = {} vs master = {}", - master_replica_name(), - get_gpid().get_partition_index(), - resp.partitions[0].pid.get_partition_index()); + LOG_ERROR_PREFIX("master[{}] partition index is inconsistent: local = {} vs master = {}", + master_replica_name(), + get_gpid().get_partition_index(), + resp.partitions[0].pid.get_partition_index()); return ERR_INCONSISTENT_STATE; } if (dsn_unlikely(resp.partitions[0].primary == rpc_address::s_invalid_address)) { - derror_replica("master[{}] partition address is invalid", master_replica_name()); + LOG_ERROR_PREFIX("master[{}] partition address is invalid", master_replica_name()); return ERR_INVALID_STATE; } // since the request just specify one partition, the result size is single _master_replica_config = resp.partitions[0]; - ddebug_replica( + LOG_INFO_PREFIX( "query master[{}] config successfully and update local config: remote={}, gpid={}", master_replica_name(), _master_replica_config.primary.to_string(), @@ -173,8 +173,8 @@ replica_follower::update_master_replica_config(error_code err, // ThreadPool: THREAD_POOL_DEFAULT void replica_follower::copy_master_replica_checkpoint() { - ddebug_replica("query master[{}] replica checkpoint info and start use nfs copy the data", - master_replica_name()); + LOG_INFO_PREFIX("query master[{}] replica checkpoint info and start use nfs copy the data", + master_replica_name()); learn_request request; request.pid = _master_replica_config.pid; dsn::message_ex *msg = dsn::message_ex::create_request( @@ -193,16 +193,16 @@ error_code replica_follower::nfs_copy_checkpoint(error_code err, learn_response { error_code err_code = err != ERR_OK ? err : resp.err; if (dsn_unlikely(err_code != ERR_OK)) { - derror_replica("query master[{}] replica checkpoint info failed, err = {}", - master_replica_name(), - err_code.to_string()); + LOG_ERROR_PREFIX("query master[{}] replica checkpoint info failed, err = {}", + master_replica_name(), + err_code.to_string()); return err_code; } std::string dest = utils::filesystem::path_combine( _replica->dir(), duplication_constants::kDuplicationCheckpointRootDir); if (!utils::filesystem::remove_path(dest)) { - derror_replica( + LOG_ERROR_PREFIX( "clear master[{}] replica checkpoint dest dir {} failed", master_replica_name(), dest); return ERR_FILE_OPERATION_FAILED; } @@ -219,7 +219,7 @@ void replica_follower::nfs_copy_remote_files(const rpc_address &remote_node, std::vector &file_list, const std::string &dest_dir) { - ddebug_replica( + LOG_INFO_PREFIX( "nfs start copy master[{}] replica checkpoint: {}", master_replica_name(), remote_dir); std::shared_ptr request = std::make_shared(); request->source = remote_node; @@ -240,16 +240,16 @@ void replica_follower::nfs_copy_remote_files(const rpc_address &remote_node, [&](string_view s) -> void { err = ERR_OK; }); if (dsn_unlikely(err != ERR_OK)) { - derror_replica("nfs copy master[{}] checkpoint failed: checkpoint = {}, err = {}", - master_replica_name(), - remote_dir, - err.to_string()); + LOG_ERROR_PREFIX("nfs copy master[{}] checkpoint failed: checkpoint = {}, err = {}", + master_replica_name(), + remote_dir, + err.to_string()); return; } - ddebug_replica("nfs copy master[{}] checkpoint completed: checkpoint = {}, size = {}", - master_replica_name(), - remote_dir, - size); + LOG_INFO_PREFIX("nfs copy master[{}] checkpoint completed: checkpoint = {}, size = {}", + master_replica_name(), + remote_dir, + size); _tracker.set_tasks_success(); }); } diff --git a/src/replica/replica.cpp b/src/replica/replica.cpp index 9d44cdbc96..6545f38244 100644 --- a/src/replica/replica.cpp +++ b/src/replica/replica.cpp @@ -241,10 +241,10 @@ void replica::on_client_read(dsn::message_ex *request, bool ignore_throttling) // a small window where the state is not the latest yet if (last_committed_decree() < _primary_states.last_prepare_decree_on_new_primary) { - derror_replica("last_committed_decree(%" PRId64 - ") < last_prepare_decree_on_new_primary(%" PRId64 ")", - last_committed_decree(), - _primary_states.last_prepare_decree_on_new_primary); + LOG_ERROR_PREFIX("last_committed_decree(%" PRId64 + ") < last_prepare_decree_on_new_primary(%" PRId64 ")", + last_committed_decree(), + _primary_states.last_prepare_decree_on_new_primary); response_client_read(request, ERR_INVALID_STATE); return; } @@ -566,7 +566,7 @@ void replica::init_disk_tag() { dsn::error_code err = _stub->_fs_manager.get_disk_tag(dir(), _disk_tag); if (dsn::ERR_OK != err) { - derror_replica("get disk tag of {} failed: {}, init it to empty ", dir(), err); + LOG_ERROR_PREFIX("get disk tag of {} failed: {}, init it to empty ", dir(), err); } } @@ -576,7 +576,7 @@ error_code replica::store_app_info(app_info &info, const std::string &path) const auto &info_path = path.empty() ? utils::filesystem::path_combine(_dir, kAppInfo) : path; auto err = new_info.store(info_path); if (dsn_unlikely(err != ERR_OK)) { - derror_replica("failed to save app_info to {}, error = {}", info_path, err); + LOG_ERROR_PREFIX("failed to save app_info to {}, error = {}", info_path, err); } return err; } diff --git a/src/replica/replica_2pc.cpp b/src/replica/replica_2pc.cpp index cb22edefa9..9e22148dbd 100644 --- a/src/replica/replica_2pc.cpp +++ b/src/replica/replica_2pc.cpp @@ -68,13 +68,14 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling) if (dsn_unlikely(_stub->_max_allowed_write_size && request->body_size() > _stub->_max_allowed_write_size)) { std::string request_info = _app->dump_write_request(request); - dwarn_replica("client from {} write request body size exceed threshold, request = [{}], " - "request_body_size " - "= {}, max_allowed_write_size = {}, it will be rejected!", - request->header->from_address.to_string(), - request_info, - request->body_size(), - _stub->_max_allowed_write_size); + LOG_WARNING_PREFIX( + "client from {} write request body size exceed threshold, request = [{}], " + "request_body_size " + "= {}, max_allowed_write_size = {}, it will be rejected!", + request->header->from_address.to_string(), + request_info, + request->body_size(), + _stub->_max_allowed_write_size); _stub->_counter_recent_write_size_exceed_threshold_count->increment(); response_client_write(request, ERR_INVALID_DATA); return; @@ -126,12 +127,12 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling) auto cur_bulk_load_status = _bulk_loader->get_bulk_load_status(); if (cur_bulk_load_status != bulk_load_status::BLS_DOWNLOADED && cur_bulk_load_status != bulk_load_status::BLS_INGESTING) { - derror_replica("receive bulk load ingestion request with wrong status({})", - enum_to_string(cur_bulk_load_status)); + LOG_ERROR_PREFIX("receive bulk load ingestion request with wrong status({})", + enum_to_string(cur_bulk_load_status)); response_client_write(request, ERR_INVALID_STATE); return; } - ddebug_replica("receive bulk load ingestion request"); + LOG_INFO_PREFIX("receive bulk load ingestion request"); // bulk load ingestion request requires that all secondaries should be alive if (static_cast(_primary_states.membership.secondaries.size()) + 1 < @@ -208,7 +209,7 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c if (update.code != dsn::apps::RPC_RRDB_RRDB_BULK_LOAD) { break; } - ddebug_replica("try to prepare bulk load mutation({})", mu->name()); + LOG_INFO_PREFIX("try to prepare bulk load mutation({})", mu->name()); if (static_cast(_primary_states.membership.secondaries.size()) + 1 < _primary_states.membership.max_replica_count) { err = ERR_NOT_ENOUGH_MEMBER; @@ -787,7 +788,7 @@ void replica::ack_prepare_message(error_code err, mutation_ptr &mu) if (err == ERR_OK) { if (mu->is_child_acked()) { - dinfo_replica("mutation {} ack_prepare_message, err = {}", mu->name(), err); + LOG_DEBUG_PREFIX("mutation {} ack_prepare_message, err = {}", mu->name(), err); for (auto &request : prepare_requests) { reply(request, resp); } @@ -796,11 +797,11 @@ void replica::ack_prepare_message(error_code err, mutation_ptr &mu) } // only happened when prepare failed during partition split child copy mutation synchronously if (mu->is_error_acked()) { - dwarn_replica("mutation {} has been ack_prepare_message, err = {}", mu->name(), err); + LOG_WARNING_PREFIX("mutation {} has been ack_prepare_message, err = {}", mu->name(), err); return; } - dwarn_replica("mutation {} ack_prepare_message, err = {}", mu->name(), err); + LOG_WARNING_PREFIX("mutation {} ack_prepare_message, err = {}", mu->name(), err); if (mu->is_sync_to_child()) { mu->set_error_acked(); } diff --git a/src/replica/replica_backup.cpp b/src/replica/replica_backup.cpp index 0a4e21c6bf..cbb40c0915 100644 --- a/src/replica/replica_backup.cpp +++ b/src/replica/replica_backup.cpp @@ -47,9 +47,9 @@ void replica::on_cold_backup(const backup_request &request, /*out*/ backup_respo cold_backup_context_ptr new_context( new cold_backup_context(this, request, FLAGS_max_concurrent_uploading_file_count)); - ddebug_replica("{}: received cold backup request, partition_status = {}", - new_context->name, - enum_to_string(status())); + LOG_INFO_PREFIX("{}: received cold backup request, partition_status = {}", + new_context->name, + enum_to_string(status())); if (status() == partition_status::type::PS_PRIMARY || status() == partition_status::type::PS_SECONDARY) { diff --git a/src/replica/replica_base.h b/src/replica/replica_base.h index 07d1814855..64f294e87c 100644 --- a/src/replica/replica_base.h +++ b/src/replica/replica_base.h @@ -51,6 +51,8 @@ struct replica_base const char *app_name() const { return _app_name.c_str(); } + const char *log_prefix() const { return _name.c_str(); } + private: const gpid _gpid; const std::string _name; diff --git a/src/replica/replica_check.cpp b/src/replica/replica_check.cpp index 443f759e02..20068e1478 100644 --- a/src/replica/replica_check.cpp +++ b/src/replica/replica_check.cpp @@ -154,13 +154,13 @@ void replica::on_group_check(const group_check_request &request, { _checker.only_one_thread_access(); - ddebug_replica("process group check, primary = {}, ballot = {}, status = {}, " - "last_committed_decree = {}, confirmed_decree = {}", - request.config.primary.to_string(), - request.config.ballot, - enum_to_string(request.config.status), - request.last_committed_decree, - request.__isset.confirmed_decree ? request.confirmed_decree : invalid_decree); + LOG_INFO_PREFIX("process group check, primary = {}, ballot = {}, status = {}, " + "last_committed_decree = {}, confirmed_decree = {}", + request.config.primary.to_string(), + request.config.ballot, + enum_to_string(request.config.status), + request.last_committed_decree, + request.__isset.confirmed_decree ? request.confirmed_decree : invalid_decree); if (request.config.ballot < get_ballot()) { response.err = ERR_VERSION_OUTDATED; diff --git a/src/replica/replica_chkpt.cpp b/src/replica/replica_chkpt.cpp index 6ce5cc1e43..805ebc705d 100644 --- a/src/replica/replica_chkpt.cpp +++ b/src/replica/replica_chkpt.cpp @@ -89,22 +89,22 @@ void replica::on_checkpoint_timer() // cleanable_decree is the only GC trigger. valid_start_offset = 0; if (min_confirmed_decree < last_durable_decree) { - ddebug_replica("gc_private {}: delay gc for duplication: min_confirmed_decree({}) " - "last_durable_decree({})", - enum_to_string(status()), - min_confirmed_decree, - last_durable_decree); + LOG_INFO_PREFIX("gc_private {}: delay gc for duplication: min_confirmed_decree({}) " + "last_durable_decree({})", + enum_to_string(status()), + min_confirmed_decree, + last_durable_decree); cleanable_decree = min_confirmed_decree; } else { - ddebug_replica("gc_private {}: min_confirmed_decree({}) last_durable_decree({})", - enum_to_string(status()), - min_confirmed_decree, - last_durable_decree); + LOG_INFO_PREFIX("gc_private {}: min_confirmed_decree({}) last_durable_decree({})", + enum_to_string(status()), + min_confirmed_decree, + last_durable_decree); } } else if (is_duplication_master()) { // unsure if the logs can be dropped, because min_confirmed_decree // is currently unavailable - ddebug_replica( + LOG_INFO_PREFIX( "gc_private {}: skip gc because confirmed duplication progress is unknown", enum_to_string(status())); return; @@ -137,14 +137,14 @@ error_code replica::trigger_manual_emergency_checkpoint(decree old_decree) _checker.only_one_thread_access(); if (_app == nullptr) { - derror_replica("app hasn't been init or has been released"); + LOG_ERROR_PREFIX("app hasn't been init or has been released"); return ERR_LOCAL_APP_FAILURE; } if (old_decree <= _app->last_durable_decree()) { - ddebug_replica("checkpoint has been completed: old = {} vs latest = {}", - old_decree, - _app->last_durable_decree()); + LOG_INFO_PREFIX("checkpoint has been completed: old = {} vs latest = {}", + old_decree, + _app->last_durable_decree()); _is_manual_emergency_checkpointing = false; _stub->_manual_emergency_checkpointing_count == 0 ? 0 @@ -153,15 +153,16 @@ error_code replica::trigger_manual_emergency_checkpoint(decree old_decree) } if (_is_manual_emergency_checkpointing) { - dwarn_replica("replica is checkpointing, last_durable_decree = {}", - _app->last_durable_decree()); + LOG_WARNING_PREFIX("replica is checkpointing, last_durable_decree = {}", + _app->last_durable_decree()); return ERR_BUSY; } if (++_stub->_manual_emergency_checkpointing_count > FLAGS_max_concurrent_manual_emergency_checkpointing_count) { - dwarn_replica("please try again later because checkpointing exceed max running count[{}]", - FLAGS_max_concurrent_manual_emergency_checkpointing_count); + LOG_WARNING_PREFIX( + "please try again later because checkpointing exceed max running count[{}]", + FLAGS_max_concurrent_manual_emergency_checkpointing_count); --_stub->_manual_emergency_checkpointing_count; return ERR_TRY_AGAIN; } diff --git a/src/replica/replica_config.cpp b/src/replica/replica_config.cpp index 3ad135f441..6efcbb8dec 100644 --- a/src/replica/replica_config.cpp +++ b/src/replica/replica_config.cpp @@ -586,11 +586,11 @@ void replica::update_bool_envs(const std::map &envs, { bool new_value = false; if (!get_bool_envs(envs, name, new_value)) { - dwarn_replica("invalid value of env {}", name); + LOG_WARNING_PREFIX("invalid value of env {}", name); return; } if (new_value != value) { - ddebug_replica("switch env[{}] from {} to {}", name, value, new_value); + LOG_INFO_PREFIX("switch env[{}] from {} to {}", name, value, new_value); value = new_value; } } @@ -618,10 +618,10 @@ void replica::update_allow_ingest_behind(const std::map bool { auto old_status = status(); _config = config; - ddebug_replica( + LOG_INFO_PREFIX( "update status from {} to {}", enum_to_string(old_status), enum_to_string(status())); return true; }); @@ -794,11 +794,11 @@ bool replica::update_local_configuration(const replica_configuration &config, break; case partition_status::PS_PARTITION_SPLIT: if (config.status == partition_status::PS_INACTIVE) { - dwarn_replica("status change from {} @ {} to {} @ {} is not allowed", - enum_to_string(old_status), - old_ballot, - enum_to_string(config.status), - config.ballot); + LOG_WARNING_PREFIX("status change from {} @ {} to {} @ {} is not allowed", + enum_to_string(old_status), + old_ballot, + enum_to_string(config.status), + config.ballot); return false; } break; @@ -1097,7 +1097,7 @@ void replica::on_config_sync(const app_info &info, const partition_configuration &config, split_status::type meta_split_status) { - dinfo_replica("configuration sync"); + LOG_DEBUG_PREFIX("configuration sync"); // no outdated update if (config.ballot < get_ballot()) return; diff --git a/src/replica/replica_disk_migrator.cpp b/src/replica/replica_disk_migrator.cpp index 21a5ba23bb..b6d1366f6c 100644 --- a/src/replica/replica_disk_migrator.cpp +++ b/src/replica/replica_disk_migrator.cpp @@ -50,7 +50,7 @@ void replica_disk_migrator::on_migrate_replica(replica_disk_migrate_rpc rpc) } _status = disk_migration_status::MOVING; - ddebug_replica( + LOG_INFO_PREFIX( "received replica disk migrate request(origin={}, target={}), update status " "from {}=>{}", rpc.request().origin_disk, @@ -78,10 +78,10 @@ bool replica_disk_migrator::check_migration_args(replica_disk_migrate_rpc rpc) if (status() != disk_migration_status::IDLE) { std::string err_msg = fmt::format("Existed migrate task({}) is running", enum_to_string(status())); - derror_replica("received replica disk migrate request(origin={}, target={}), err = {}", - req.origin_disk, - req.target_disk, - err_msg); + LOG_ERROR_PREFIX("received replica disk migrate request(origin={}, target={}), err = {}", + req.origin_disk, + req.target_disk, + err_msg); resp.err = ERR_BUSY; resp.__set_hint(err_msg); return false; @@ -90,10 +90,10 @@ bool replica_disk_migrator::check_migration_args(replica_disk_migrate_rpc rpc) if (_replica->status() != partition_status::type::PS_SECONDARY) { std::string err_msg = fmt::format("Invalid partition status({})", enum_to_string(_replica->status())); - derror_replica("received replica disk migrate request(origin={}, target={}), err = {}", - req.origin_disk, - req.target_disk, - err_msg); + LOG_ERROR_PREFIX("received replica disk migrate request(origin={}, target={}), err = {}", + req.origin_disk, + req.target_disk, + err_msg); resp.err = ERR_INVALID_STATE; resp.__set_hint(err_msg); return false; @@ -102,10 +102,10 @@ bool replica_disk_migrator::check_migration_args(replica_disk_migrate_rpc rpc) if (req.origin_disk == req.target_disk) { std::string err_msg = fmt::format( "Invalid disk tag(origin({}) equal target({}))", req.origin_disk, req.target_disk); - derror_replica("received replica disk migrate request(origin={}, target={}), err = {}", - req.origin_disk, - req.target_disk, - err_msg); + LOG_ERROR_PREFIX("received replica disk migrate request(origin={}, target={}), err = {}", + req.origin_disk, + req.target_disk, + err_msg); resp.err = ERR_INVALID_PARAMETERS; resp.__set_hint(err_msg); return false; @@ -122,7 +122,7 @@ bool replica_disk_migrator::check_migration_args(replica_disk_migrate_rpc rpc) fmt::format("Invalid replica(replica({}) doesn't exist on origin disk({}))", req.pid, req.origin_disk); - derror_replica( + LOG_ERROR_PREFIX( "received replica disk migrate request(origin={}, target={}), err = {}", req.origin_disk, req.target_disk, @@ -140,7 +140,7 @@ bool replica_disk_migrator::check_migration_args(replica_disk_migrate_rpc rpc) fmt::format("Invalid replica(replica({}) has existed on target disk({}))", req.pid, req.target_disk); - derror_replica( + LOG_ERROR_PREFIX( "received replica disk migrate request(origin={}, target={}), err = {}", req.origin_disk, req.target_disk, @@ -155,10 +155,10 @@ bool replica_disk_migrator::check_migration_args(replica_disk_migrate_rpc rpc) if (!valid_origin_disk || !valid_target_disk) { std::string invalid_disk_tag = !valid_origin_disk ? req.origin_disk : req.target_disk; std::string err_msg = fmt::format("Invalid disk tag({} doesn't exist)", invalid_disk_tag); - derror_replica("received replica disk migrate request(origin={}, target={}), err = {}", - req.origin_disk, - req.target_disk, - err_msg); + LOG_ERROR_PREFIX("received replica disk migrate request(origin={}, target={}), err = {}", + req.origin_disk, + req.target_disk, + err_msg); resp.err = ERR_OBJECT_NOT_FOUND; resp.__set_hint(err_msg); return false; @@ -179,14 +179,14 @@ void replica_disk_migrator::migrate_replica(const replica_disk_migrate_request & if (init_target_dir(req) && migrate_replica_checkpoint(req) && migrate_replica_app_info(req)) { _status = disk_migration_status::MOVED; - ddebug_replica("disk migration(origin={}, target={}) copy data complete, update status " - "from {}=>{}, ready to " - "close origin replica({})", - req.origin_disk, - req.target_disk, - enum_to_string(disk_migration_status::MOVING), - enum_to_string(status()), - _replica->dir()); + LOG_INFO_PREFIX("disk migration(origin={}, target={}) copy data complete, update status " + "from {}=>{}, ready to " + "close origin replica({})", + req.origin_disk, + req.target_disk, + enum_to_string(disk_migration_status::MOVING), + enum_to_string(status()), + _replica->dir()); close_current_replica(req); } @@ -204,7 +204,7 @@ bool replica_disk_migrator::init_target_dir(const replica_disk_migrate_request & // using origin dir to init new dir boost::replace_first(replica_dir, req.origin_disk, req.target_disk); if (utils::filesystem::directory_exists(replica_dir)) { - derror_replica("migration target replica dir({}) has existed", replica_dir); + LOG_ERROR_PREFIX("migration target replica dir({}) has existed", replica_dir); reset_status(); return false; } @@ -213,11 +213,12 @@ bool replica_disk_migrator::init_target_dir(const replica_disk_migrate_request & // /root/target_disk_tag/gpid.app_type in replica_disk_migrator::update_replica_dir finally _target_replica_dir = fmt::format("{}{}", replica_dir, kReplicaDirTempSuffix); if (utils::filesystem::directory_exists(_target_replica_dir)) { - dwarn_replica("disk migration(origin={}, target={}) target replica dir({}) has existed, " - "delete it now", - req.origin_disk, - req.target_disk, - _target_replica_dir); + LOG_WARNING_PREFIX( + "disk migration(origin={}, target={}) target replica dir({}) has existed, " + "delete it now", + req.origin_disk, + req.target_disk, + _target_replica_dir); utils::filesystem::remove_path(_target_replica_dir); } @@ -225,7 +226,7 @@ bool replica_disk_migrator::init_target_dir(const replica_disk_migrate_request & // /root/target/gpid.app_type/data/rdb in replica_disk_migrator::update_replica_dir finally _target_data_dir = utils::filesystem::path_combine(_target_replica_dir, kDataDirFolder); if (!utils::filesystem::create_directory(_target_data_dir)) { - derror_replica( + LOG_ERROR_PREFIX( "disk migration(origin={}, target={}) create target temp data dir({}) failed", req.origin_disk, req.target_disk, @@ -247,10 +248,10 @@ bool replica_disk_migrator::migrate_replica_checkpoint(const replica_disk_migrat const auto &sync_checkpoint_err = _replica->get_app()->sync_checkpoint(); if (sync_checkpoint_err != ERR_OK) { - derror_replica("disk migration(origin={}, target={}) sync_checkpoint failed({})", - req.origin_disk, - req.target_disk, - sync_checkpoint_err.to_string()); + LOG_ERROR_PREFIX("disk migration(origin={}, target={}) sync_checkpoint failed({})", + req.origin_disk, + req.target_disk, + sync_checkpoint_err.to_string()); reset_status(); return false; } @@ -258,13 +259,13 @@ bool replica_disk_migrator::migrate_replica_checkpoint(const replica_disk_migrat const auto ©_checkpoint_err = _replica->get_app()->copy_checkpoint_to_dir(_target_data_dir.c_str(), 0 /*last_decree*/); if (copy_checkpoint_err != ERR_OK) { - derror_replica("disk migration(origin={}, target={}) copy checkpoint to dir({}) " - "failed(error={}), the dir({}) will be deleted", - req.origin_disk, - req.target_disk, - _target_data_dir, - copy_checkpoint_err.to_string(), - _target_replica_dir); + LOG_ERROR_PREFIX("disk migration(origin={}, target={}) copy checkpoint to dir({}) " + "failed(error={}), the dir({}) will be deleted", + req.origin_disk, + req.target_disk, + _target_data_dir, + copy_checkpoint_err.to_string(), + _target_replica_dir); reset_status(); utils::filesystem::remove_path(_target_replica_dir); return false; @@ -283,10 +284,10 @@ bool replica_disk_migrator::migrate_replica_app_info(const replica_disk_migrate_ replica_init_info init_info = _replica->get_app()->init_info(); const auto &store_init_info_err = init_info.store(_target_replica_dir); if (store_init_info_err != ERR_OK) { - derror_replica("disk migration(origin={}, target={}) stores app init info failed({})", - req.origin_disk, - req.target_disk, - store_init_info_err.to_string()); + LOG_ERROR_PREFIX("disk migration(origin={}, target={}) stores app init info failed({})", + req.origin_disk, + req.target_disk, + store_init_info_err.to_string()); reset_status(); return false; } @@ -295,10 +296,10 @@ bool replica_disk_migrator::migrate_replica_app_info(const replica_disk_migrate_ _replica->_app_info, utils::filesystem::path_combine(_target_replica_dir, replica::kAppInfo)); if (store_info_err != ERR_OK) { - derror_replica("disk migration(origin={}, target={}) stores app info failed({})", - req.origin_disk, - req.target_disk, - store_info_err.to_string()); + LOG_ERROR_PREFIX("disk migration(origin={}, target={}) stores app info failed({})", + req.origin_disk, + req.target_disk, + store_info_err.to_string()); reset_status(); return false; } @@ -310,10 +311,11 @@ bool replica_disk_migrator::migrate_replica_app_info(const replica_disk_migrate_ dsn::task_ptr replica_disk_migrator::close_current_replica(const replica_disk_migrate_request &req) { if (_replica->status() != partition_status::type::PS_SECONDARY) { - derror_replica("migrate request(origin={}, target={}), err = Invalid partition status({})", - req.origin_disk, - req.target_disk, - enum_to_string(_replica->status())); + LOG_ERROR_PREFIX( + "migrate request(origin={}, target={}), err = Invalid partition status({})", + req.origin_disk, + req.target_disk, + enum_to_string(_replica->status())); reset_status(); utils::filesystem::remove_path(_target_replica_dir); return nullptr; @@ -350,12 +352,12 @@ void replica_disk_migrator::update_replica_dir() _replica->get_replica_stub()->on_disk_stat(); _status = disk_migration_status::CLOSED; - ddebug_replica("disk replica migration move data from origin dir({}) to new dir({}) " - "succeed, update status from {}=>{}", - _replica->dir(), - _target_replica_dir, - enum_to_string(disk_migration_status::MOVED), - enum_to_string(status())); + LOG_INFO_PREFIX("disk replica migration move data from origin dir({}) to new dir({}) " + "succeed, update status from {}=>{}", + _replica->dir(), + _target_replica_dir, + enum_to_string(disk_migration_status::MOVED), + enum_to_string(status())); } } // namespace replication } // namespace dsn diff --git a/src/replica/replica_init.cpp b/src/replica/replica_init.cpp index f9b57d9161..d510cd86d6 100644 --- a/src/replica/replica_init.cpp +++ b/src/replica/replica_init.cpp @@ -212,7 +212,7 @@ error_code replica::initialize_on_load() decree replica::get_replay_start_decree() { decree replay_start_decree = _app->last_committed_decree(); - ddebug_replica("start to replay private log [replay_start_decree: {}]", replay_start_decree); + LOG_INFO_PREFIX("start to replay private log [replay_start_decree: {}]", replay_start_decree); return replay_start_decree; } diff --git a/src/replica/replica_learn.cpp b/src/replica/replica_learn.cpp index 7623e9f0f2..cfe08efc79 100644 --- a/src/replica/replica_learn.cpp +++ b/src/replica/replica_learn.cpp @@ -299,7 +299,7 @@ decree replica::get_learn_start_decree(const learn_request &request) // on prima decree local_gced = max_gced_decree_no_lock(); if (local_gced == invalid_decree) { // abnormal case - dwarn_replica("no plog to be learned for duplication, continue as normal"); + LOG_WARNING_PREFIX("no plog to be learned for duplication, continue as normal"); } else { learn_start_decree_for_dup = local_gced + 1; } @@ -312,11 +312,11 @@ decree replica::get_learn_start_decree(const learn_request &request) // on prima // see replica::get_max_gced_decree_for_learn for details. if (learn_start_decree_for_dup < learn_start_decree_no_dup) { learn_start_decree = learn_start_decree_for_dup; - ddebug_replica("learn_start_decree steps back to {} to ensure learner having enough " - "logs for duplication [confirmed_decree={}, learner_gced_decree={}]", - learn_start_decree, - min_confirmed_decree, - request.max_gced_decree); + LOG_INFO_PREFIX("learn_start_decree steps back to {} to ensure learner having enough " + "logs for duplication [confirmed_decree={}, learner_gced_decree={}]", + learn_start_decree, + min_confirmed_decree, + request.max_gced_decree); } } dcheck_le_replica(learn_start_decree, local_committed_decree + 1); @@ -571,7 +571,7 @@ void replica::on_learn_reply(error_code err, learn_request &&req, learn_response return; } - ddebug_replica( + LOG_INFO_PREFIX( "on_learn_reply_start[{}]: learnee = {}, learn_duration ={} ms, response_err = " "{}, remote_committed_decree = {}, prepare_start_decree = {}, learn_type = {} " "learned_buffer_size = {}, learned_file_count = {},to_decree_included = " @@ -1483,16 +1483,16 @@ void replica::on_learn_completion_notification_reply(error_code err, void replica::on_add_learner(const group_check_request &request) { - ddebug_replica("process add learner, primary = {}, ballot ={}, status ={}, " - "last_committed_decree = {}, duplicating = {}", - request.config.primary.to_string(), - request.config.ballot, - enum_to_string(request.config.status), - request.last_committed_decree, - request.app.duplicating); + LOG_INFO_PREFIX("process add learner, primary = {}, ballot ={}, status ={}, " + "last_committed_decree = {}, duplicating = {}", + request.config.primary.to_string(), + request.config.ballot, + enum_to_string(request.config.status), + request.last_committed_decree, + request.app.duplicating); if (request.config.ballot < get_ballot()) { - dwarn_replica("on_add_learner ballot is old, skipped"); + LOG_WARNING_PREFIX("on_add_learner ballot is old, skipped"); return; } @@ -1534,10 +1534,10 @@ error_code replica::apply_learned_state_from_private_log(learn_state &state) // after applied: [---------------log----------------] if (duplicating && state.__isset.learn_start_decree && state.learn_start_decree < _app->last_committed_decree() + 1) { - ddebug_replica("learn_start_decree({}) < _app->last_committed_decree() + 1({}), learn " - "must stepped back to include all the unconfirmed ", - state.learn_start_decree, - _app->last_committed_decree() + 1); + LOG_INFO_PREFIX("learn_start_decree({}) < _app->last_committed_decree() + 1({}), learn " + "must stepped back to include all the unconfirmed ", + state.learn_start_decree, + _app->last_committed_decree() + 1); // move the `learn/` dir to working dir (`plog/`) to replace current log files to replay error_code err = _private_log->reset_from( @@ -1550,7 +1550,7 @@ error_code replica::apply_learned_state_from_private_log(learn_state &state) get_gpid().thread_hash()); }); if (err != ERR_OK) { - derror_replica("failed to reset this private log with logs in learn/ dir: {}", err); + LOG_ERROR_PREFIX("failed to reset this private log with logs in learn/ dir: {}", err); return err; } @@ -1619,19 +1619,20 @@ error_code replica::apply_learned_state_from_private_log(learn_state &state) _potential_secondary_states.first_learn_start_decree = state.learn_start_decree; } - ddebug_replica("apply_learned_state_from_private_log[{}]: duplicating={}, step_back={}, " - "learnee = {}, learn_duration = {} ms, apply private log files done, file_count " - "={}, first_learn_start_decree ={}, learn_start_decree = {}, " - "app_committed_decree = {}", - _potential_secondary_states.learning_version, - duplicating, - step_back, - _config.primary.to_string(), - _potential_secondary_states.duration_ms(), - state.files.size(), - _potential_secondary_states.first_learn_start_decree, - state.learn_start_decree, - _app->last_committed_decree()); + LOG_INFO_PREFIX( + "apply_learned_state_from_private_log[{}]: duplicating={}, step_back={}, " + "learnee = {}, learn_duration = {} ms, apply private log files done, file_count " + "={}, first_learn_start_decree ={}, learn_start_decree = {}, " + "app_committed_decree = {}", + _potential_secondary_states.learning_version, + duplicating, + step_back, + _config.primary.to_string(), + _potential_secondary_states.duration_ms(), + state.files.size(), + _potential_secondary_states.first_learn_start_decree, + state.learn_start_decree, + _app->last_committed_decree()); // apply in-buffer private logs if (err == ERR_OK) { @@ -1653,24 +1654,24 @@ error_code replica::apply_learned_state_from_private_log(learn_state &state) } if (state.to_decree_included > last_committed_decree()) { - ddebug_replica("apply_learned_state_from_private_log[{}]: learnee ={}, " - "learned_to_decree_included({}) > last_committed_decree({}), commit to " - "to_decree_included", - _potential_secondary_states.learning_version, - _config.primary.to_string(), - state.to_decree_included, - last_committed_decree()); + LOG_INFO_PREFIX("apply_learned_state_from_private_log[{}]: learnee ={}, " + "learned_to_decree_included({}) > last_committed_decree({}), commit to " + "to_decree_included", + _potential_secondary_states.learning_version, + _config.primary.to_string(), + state.to_decree_included, + last_committed_decree()); plist.commit(state.to_decree_included, COMMIT_TO_DECREE_SOFT); } - ddebug_replica(" apply_learned_state_from_private_log[{}]: learnee ={}, " - "learn_duration ={} ms, apply in-buffer private logs done, " - "replay_count ={}, app_committed_decree = {}", - _potential_secondary_states.learning_version, - _config.primary.to_string(), - _potential_secondary_states.duration_ms(), - replay_count, - _app->last_committed_decree()); + LOG_INFO_PREFIX(" apply_learned_state_from_private_log[{}]: learnee ={}, " + "learn_duration ={} ms, apply in-buffer private logs done, " + "replay_count ={}, app_committed_decree = {}", + _potential_secondary_states.learning_version, + _config.primary.to_string(), + _potential_secondary_states.duration_ms(), + replay_count, + _app->last_committed_decree()); } // awaits for unfinished mutation writes. diff --git a/src/replica/replica_restore.cpp b/src/replica/replica_restore.cpp index a34973686d..5f4d0af2c4 100644 --- a/src/replica/replica_restore.cpp +++ b/src/replica/replica_restore.cpp @@ -147,7 +147,7 @@ error_code replica::download_checkpoint(const configuration_restore_request &req } if (download_err != ERR_OK) { - derror_replica( + LOG_ERROR_PREFIX( "failed to download file({}), error = {}", f_meta.name, download_err); // ERR_CORRUPTION means we should rollback restore, so we can't change err if it // is ERR_CORRUPTION now, otherwise it will be overridden by other errors @@ -189,10 +189,10 @@ error_code replica::get_backup_metadata(block_filesystem *fs, fs, download_file_size); if (err != ERR_OK && err != ERR_PATH_ALREADY_EXIST) { - derror_replica("download backup_metadata failed, file({}), reason({})", - utils::filesystem::path_combine(remote_chkpt_dir, - cold_backup_constant::BACKUP_METADATA), - err); + LOG_ERROR_PREFIX("download backup_metadata failed, file({}), reason({})", + utils::filesystem::path_combine(remote_chkpt_dir, + cold_backup_constant::BACKUP_METADATA), + err); return err; } @@ -200,12 +200,13 @@ error_code replica::get_backup_metadata(block_filesystem *fs, const std::string local_backup_metada_file = utils::filesystem::path_combine(local_chkpt_dir, cold_backup_constant::BACKUP_METADATA); if (!read_cold_backup_metadata(local_backup_metada_file, backup_metadata)) { - derror_replica("read cold_backup_metadata from file({}) failed", local_backup_metada_file); + LOG_ERROR_PREFIX("read cold_backup_metadata from file({}) failed", + local_backup_metada_file); return ERR_FILE_OPERATION_FAILED; } _chkpt_total_size = backup_metadata.checkpoint_total_size; - ddebug_replica( + LOG_INFO_PREFIX( "recover cold_backup_metadata from file({}) succeed, total checkpoint size({}), file " "count({})", local_backup_metada_file, @@ -218,17 +219,17 @@ void replica::clear_restore_useless_files(const std::string &local_chkpt_dir, const cold_backup_metadata &metadata) { if (!remove_useless_file_under_chkpt(local_chkpt_dir, metadata)) { - dwarn_replica("remove useless file failed, chkpt = {}", local_chkpt_dir); + LOG_WARNING_PREFIX("remove useless file failed, chkpt = {}", local_chkpt_dir); } else { - ddebug_replica("remove useless file succeed, chkpt = {}", local_chkpt_dir); + LOG_INFO_PREFIX("remove useless file succeed, chkpt = {}", local_chkpt_dir); } const std::string metadata_file = utils::filesystem::path_combine(local_chkpt_dir, cold_backup_constant::BACKUP_METADATA); if (!utils::filesystem::remove_path(metadata_file)) { - dwarn_replica("remove backup_metadata failed, file = {}", metadata_file); + LOG_WARNING_PREFIX("remove backup_metadata failed, file = {}", metadata_file); } else { - ddebug_replica("remove backup_metadata succeed, file = {}", metadata_file); + LOG_INFO_PREFIX("remove backup_metadata succeed, file = {}", metadata_file); } } @@ -476,7 +477,7 @@ void replica::report_restore_status_to_meta() void replica::update_restore_progress(uint64_t f_size) { if (_chkpt_total_size <= 0) { - derror_replica("cold_backup_metadata has invalid file_total_size({})", _chkpt_total_size); + LOG_ERROR_PREFIX("cold_backup_metadata has invalid file_total_size({})", _chkpt_total_size); return; } @@ -485,10 +486,10 @@ void replica::update_restore_progress(uint64_t f_size) auto cur_download_size = static_cast(_cur_download_size.load()); auto cur_porgress = static_cast((cur_download_size / total_size) * 1000); _restore_progress.store(cur_porgress); - ddebug_replica("total_size = {}, cur_downloaded_size = {}, progress = {}", - total_size, - cur_download_size, - cur_porgress); + LOG_INFO_PREFIX("total_size = {}, cur_downloaded_size = {}, progress = {}", + total_size, + cur_download_size, + cur_porgress); } } } diff --git a/src/replica/replica_throttle.cpp b/src/replica/replica_throttle.cpp index 301cba73f0..253453c92d 100644 --- a/src/replica/replica_throttle.cpp +++ b/src/replica/replica_throttle.cpp @@ -120,10 +120,10 @@ void replica::update_throttle_env_internal(const std::mapsecond, - parse_error); + LOG_WARNING_PREFIX("parse env failed, key = \"{}\", value = \"{}\", error = \"{}\"", + key, + find->second, + parse_error); // reset if parse failed cntl.reset(throttling_changed, old_throttling); } @@ -132,7 +132,7 @@ void replica::update_throttle_env_internal(const std::mapheader->rpc_name); + LOG_ERROR_PREFIX("got storage error when handler request({})", + requests[i]->header->rpc_name); storage_error = e; } } @@ -387,7 +387,7 @@ error_code replication_app_base::apply_mutation(const mutation *mu) for (int i = 0; i < request_count; i++) { const mutation_update &update = mu->data.updates[i]; message_ex *req = mu->client_requests[i]; - dinfo_replica("mutation {} #{}: dispatch rpc call {}", mu->name(), i, update.code); + LOG_DEBUG_PREFIX("mutation {} #{}: dispatch rpc call {}", mu->name(), i, update.code); if (update.code != RPC_REPLICATION_WRITE_EMPTY) { if (req == nullptr) { req = message_ex::create_received_request( @@ -414,7 +414,7 @@ error_code replication_app_base::apply_mutation(const mutation *mu) } if (perror != 0) { - derror_replica("mutation {}: get internal error {}", mu->name(), perror); + LOG_ERROR_PREFIX("mutation {}: get internal error {}", mu->name(), perror); // for normal write requests, if got rocksdb error, this replica will be set error and evoke // learn for ingestion requests, should not do as normal write requests, there are two // reasons: @@ -451,7 +451,7 @@ error_code replication_app_base::apply_mutation(const mutation *mu) dassert_replica(false, "status = {}", enum_to_string(status)); __builtin_unreachable(); } - ddebug_replica( + LOG_INFO_PREFIX( "mutation {} committed on {}, batched_count = {}", mu->name(), str, batched_count); } diff --git a/src/replica/split/replica_split_manager.cpp b/src/replica/split/replica_split_manager.cpp index 22d925d476..93a4d4d776 100644 --- a/src/replica/split/replica_split_manager.cpp +++ b/src/replica/split/replica_split_manager.cpp @@ -42,13 +42,13 @@ void replica_split_manager::parent_start_split( { if (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY && (status() != partition_status::PS_INACTIVE || !_replica->_inactive_is_transient)) { - dwarn_replica("receive add child request with wrong status({}), ignore this request", - enum_to_string(status())); + LOG_WARNING_PREFIX("receive add child request with wrong status({}), ignore this request", + enum_to_string(status())); return; } if (request.config.ballot != get_ballot()) { - dwarn_replica( + LOG_WARNING_PREFIX( "receive add child request with different ballot, local ballot({}) VS request " "ballot({}), ignore this request", get_ballot(), @@ -57,13 +57,13 @@ void replica_split_manager::parent_start_split( } if (_split_status == split_status::SPLITTING) { - dwarn_replica("partition is already splitting, ignore this request"); + LOG_WARNING_PREFIX("partition is already splitting, ignore this request"); return; } gpid child_gpid = request.child_gpid; if (child_gpid.get_partition_index() < _replica->_app_info.partition_count) { - dwarn_replica( + LOG_WARNING_PREFIX( "receive old add child request, child_gpid={}, partition_count={}, ignore this request", child_gpid, _replica->_app_info.partition_count); @@ -79,11 +79,11 @@ void replica_split_manager::parent_start_split( _child_gpid = child_gpid; _child_init_ballot = get_ballot(); - ddebug_replica("start to add child({}), init_ballot={}, status={}, primary_address={}", - _child_gpid, - _child_init_ballot, - enum_to_string(status()), - request.config.primary.to_string()); + LOG_INFO_PREFIX("start to add child({}), init_ballot={}, status={}, primary_address={}", + _child_gpid, + _child_init_ballot, + enum_to_string(status()), + request.config.primary.to_string()); tasking::enqueue(LPC_CREATE_CHILD, tracker(), @@ -106,7 +106,7 @@ void replica_split_manager::child_init_replica(gpid parent_gpid, FAIL_POINT_INJECT_F("replica_child_init_replica", [](dsn::string_view) {}); if (status() != partition_status::PS_INACTIVE) { - dwarn_replica("wrong status({})", enum_to_string(status())); + LOG_WARNING_PREFIX("wrong status({})", enum_to_string(status())); _stub->split_replica_error_handler( parent_gpid, std::bind(&replica_split_manager::parent_cleanup_split_context, std::placeholders::_1)); @@ -132,7 +132,7 @@ void replica_split_manager::child_init_replica(gpid parent_gpid, _replica->_split_states.splitting_start_ts_ns = dsn_now_ns(); _stub->_counter_replicas_splitting_recent_start_count->increment(); - ddebug_replica( + LOG_INFO_PREFIX( "child initialize succeed, init_ballot={}, parent_gpid={}", init_ballot, parent_gpid); error_code ec = @@ -152,7 +152,7 @@ void replica_split_manager::child_check_split_context() // on child partition FAIL_POINT_INJECT_F("replica_child_check_split_context", [](dsn::string_view) {}); if (status() != partition_status::PS_PARTITION_SPLIT) { - derror_replica("wrong status({})", enum_to_string(status())); + LOG_ERROR_PREFIX("wrong status({})", enum_to_string(status())); _replica->_split_states.check_state_task = nullptr; return; } @@ -183,14 +183,14 @@ bool replica_split_manager::parent_check_states() // on parent partition _child_gpid.get_app_id() == 0 || (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY && (status() != partition_status::PS_INACTIVE || !_replica->_inactive_is_transient))) { - dwarn_replica("parent wrong states: status({}), split_status({}), init_ballot({}) VS " - "current_ballot({}), " - "child_gpid({})", - enum_to_string(status()), - enum_to_string(_split_status), - _child_init_ballot, - get_ballot(), - _child_gpid); + LOG_WARNING_PREFIX("parent wrong states: status({}), split_status({}), init_ballot({}) VS " + "current_ballot({}), " + "child_gpid({})", + enum_to_string(status()), + enum_to_string(_split_status), + _child_init_ballot, + get_ballot(), + _child_gpid); parent_handle_split_error("wrong parent states when execute parent_check_states", false); return false; } @@ -209,15 +209,15 @@ void replica_split_manager::parent_prepare_states(const std::string &dir) // on // generate checkpoint error_code ec = _replica->_app->copy_checkpoint_to_dir(dir.c_str(), &checkpoint_decree, true); if (ec == ERR_OK) { - ddebug_replica("prepare checkpoint succeed: checkpoint dir = {}, checkpoint decree = {}", - dir, - checkpoint_decree); + LOG_INFO_PREFIX("prepare checkpoint succeed: checkpoint dir = {}, checkpoint decree = {}", + dir, + checkpoint_decree); parent_states.to_decree_included = checkpoint_decree; // learn_state.files[0] will be used to get learn dir in function 'storage_apply_checkpoint' // so we add a fake file name here, this file won't appear on disk parent_states.files.push_back(dsn::utils::filesystem::path_combine(dir, "file_name")); } else { - dwarn_replica("prepare checkpoint failed, error={}, please wait and retry", ec); + LOG_WARNING_PREFIX("prepare checkpoint failed, error={}, please wait and retry", ec); tasking::enqueue(LPC_PARTITION_SPLIT, tracker(), std::bind(&replica_split_manager::parent_prepare_states, this, dir), @@ -241,12 +241,12 @@ void replica_split_manager::parent_prepare_states(const std::string &dir) // on dcheck_eq(last_committed_decree(), checkpoint_decree); dcheck_ge(mutation_list.size(), 0); dcheck_ge(files.size(), 0); - ddebug_replica("prepare state succeed: {} mutations, {} private log files, total file size = " - "{}, last_committed_decree = {}", - mutation_list.size(), - files.size(), - total_file_size, - last_committed_decree()); + LOG_INFO_PREFIX("prepare state succeed: {} mutations, {} private log files, total file size = " + "{}, last_committed_decree = {}", + mutation_list.size(), + files.size(), + total_file_size, + last_committed_decree()); ec = _stub->split_replica_exec(LPC_PARTITION_SPLIT, _child_gpid, @@ -271,7 +271,7 @@ void replica_split_manager::child_copy_prepare_list( std::shared_ptr plist) // on child partition { if (status() != partition_status::PS_PARTITION_SPLIT) { - derror_replica("wrong status({})", enum_to_string(status())); + LOG_ERROR_PREFIX("wrong status({})", enum_to_string(status())); return; } @@ -289,11 +289,11 @@ void replica_split_manager::child_copy_prepare_list( total_file_size, last_committed_decree)); - ddebug_replica("start to copy parent prepare list, last_committed_decree={}, prepare list min " - "decree={}, max decree={}", - last_committed_decree, - plist->min_decree(), - plist->max_decree()); + LOG_INFO_PREFIX("start to copy parent prepare list, last_committed_decree={}, prepare list min " + "decree={}, max decree={}", + last_committed_decree, + plist->min_decree(), + plist->max_decree()); // copy parent prepare list plist->set_committer(std::bind(&replica::execute_mutation, _replica, std::placeholders::_1)); @@ -321,19 +321,19 @@ void replica_split_manager::child_learn_states(learn_state lstate, FAIL_POINT_INJECT_F("replica_child_learn_states", [](dsn::string_view) {}); if (status() != partition_status::PS_PARTITION_SPLIT) { - derror_replica("wrong status({})", enum_to_string(status())); + LOG_ERROR_PREFIX("wrong status({})", enum_to_string(status())); child_handle_async_learn_error(); return; } - ddebug_replica("start to learn states asynchronously, prepare_list last_committed_decree={}, " - "checkpoint decree range=({},{}], private log files count={}, in-memory " - "mutation count={}", - last_committed_decree, - lstate.from_decree_excluded, - lstate.to_decree_included, - plog_files.size(), - mutation_list.size()); + LOG_INFO_PREFIX("start to learn states asynchronously, prepare_list last_committed_decree={}, " + "checkpoint decree range=({},{}], private log files count={}, in-memory " + "mutation count={}", + last_committed_decree, + lstate.from_decree_excluded, + lstate.to_decree_included, + plog_files.size(), + mutation_list.size()); error_code err; auto cleanup = defer([this, &err]() { @@ -345,7 +345,7 @@ void replica_split_manager::child_learn_states(learn_state lstate, // apply parent checkpoint err = _replica->_app->apply_checkpoint(replication_app_base::chkpt_apply_mode::learn, lstate); if (err != ERR_OK) { - derror_replica("failed to apply checkpoint, error={}", err); + LOG_ERROR_PREFIX("failed to apply checkpoint, error={}", err); return; } @@ -353,24 +353,24 @@ void replica_split_manager::child_learn_states(learn_state lstate, err = child_apply_private_logs(plog_files, mutation_list, total_file_size, last_committed_decree); if (err != ERR_OK) { - derror_replica("failed to replay private log, error={}", err); + LOG_ERROR_PREFIX("failed to replay private log, error={}", err); return; } // generate a checkpoint synchronously err = _replica->_app->sync_checkpoint(); if (err != ERR_OK) { - derror_replica("failed to generate checkpoint synchrounously, error={}", err); + LOG_ERROR_PREFIX("failed to generate checkpoint synchrounously, error={}", err); return; } err = _replica->update_init_info_ballot_and_decree(); if (err != ERR_OK) { - derror_replica("update_init_info_ballot_and_decree failed, error={}", err); + LOG_ERROR_PREFIX("update_init_info_ballot_and_decree failed, error={}", err); return; } - ddebug_replica("learn parent states asynchronously succeed"); + LOG_INFO_PREFIX("learn parent states asynchronously succeed"); tasking::enqueue(LPC_PARTITION_SPLIT, tracker(), @@ -391,7 +391,7 @@ replica_split_manager::child_apply_private_logs(std::vector plog_fi }); if (status() != partition_status::PS_PARTITION_SPLIT) { - derror_replica("wrong status({})", enum_to_string(status())); + LOG_ERROR_PREFIX("wrong status({})", enum_to_string(status())); return ERR_INVALID_STATE; } @@ -425,7 +425,7 @@ replica_split_manager::child_apply_private_logs(std::vector plog_fi }, offset); if (ec != ERR_OK) { - derror_replica( + LOG_ERROR_PREFIX( "replay private_log files failed, file count={}, app last_committed_decree={}", plog_files.size(), _replica->_app->last_committed_decree()); @@ -437,9 +437,9 @@ replica_split_manager::child_apply_private_logs(std::vector plog_fi _stub->_counter_replicas_splitting_recent_copy_file_count->add(plog_files.size()); _stub->_counter_replicas_splitting_recent_copy_file_size->add(total_file_size); - ddebug_replica("replay private_log files succeed, file count={}, app last_committed_decree={}", - plog_files.size(), - _replica->_app->last_committed_decree()); + LOG_INFO_PREFIX("replay private_log files succeed, file count={}, app last_committed_decree={}", + plog_files.size(), + _replica->_app->last_committed_decree()); // apply in-memory mutations if replay private logs succeed int count = 0; @@ -461,7 +461,7 @@ replica_split_manager::child_apply_private_logs(std::vector plog_fi _replica->_split_states.splitting_copy_mutation_count += count; _stub->_counter_replicas_splitting_recent_copy_mutation_count->add(count); plist.commit(last_committed_decree, COMMIT_TO_DECREE_HARD); - ddebug_replica( + LOG_INFO_PREFIX( "apply in-memory mutations succeed, mutation count={}, app last_committed_decree={}", count, _replica->_app->last_committed_decree()); @@ -475,7 +475,7 @@ void replica_split_manager::child_catch_up_states() // on child partition FAIL_POINT_INJECT_F("replica_child_catch_up_states", [](dsn::string_view) {}); if (status() != partition_status::PS_PARTITION_SPLIT) { - derror_replica("wrong status, status is {}", enum_to_string(status())); + LOG_ERROR_PREFIX("wrong status, status is {}", enum_to_string(status())); return; } @@ -491,12 +491,12 @@ void replica_split_manager::child_catch_up_states() // on child partition if (local_decree < goal_decree) { if (local_decree >= _replica->_prepare_list->min_decree()) { // all missing mutations are all in prepare list - dwarn_replica("there are some in-memory mutations should be learned, app " - "last_committed_decree={}, " - "goal decree={}, prepare_list min_decree={}", - local_decree, - goal_decree, - _replica->_prepare_list->min_decree()); + LOG_WARNING_PREFIX("there are some in-memory mutations should be learned, app " + "last_committed_decree={}, " + "goal decree={}, prepare_list min_decree={}", + local_decree, + goal_decree, + _replica->_prepare_list->min_decree()); for (decree d = local_decree + 1; d <= goal_decree; ++d) { auto mu = _replica->_prepare_list->get_mutation_by_decree(d); dassert(mu != nullptr, ""); @@ -509,7 +509,7 @@ void replica_split_manager::child_catch_up_states() // on child partition } else { // some missing mutations have already in private log // should call `catch_up_with_private_logs` to catch up all missing mutations - dwarn_replica( + LOG_WARNING_PREFIX( "there are some private logs should be learned, app last_committed_decree=" "{}, prepare_list min_decree={}, please wait", local_decree, @@ -526,9 +526,9 @@ void replica_split_manager::child_catch_up_states() // on child partition } } - ddebug_replica("child catch up parent states, goal decree={}, local decree={}", - _replica->_prepare_list->last_committed_decree(), - _replica->_app->last_committed_decree()); + LOG_INFO_PREFIX("child catch up parent states, goal decree={}, local decree={}", + _replica->_prepare_list->last_committed_decree(), + _replica->_app->last_committed_decree()); _replica->_split_states.is_caught_up = true; child_notify_catch_up(); @@ -545,10 +545,10 @@ void replica_split_manager::child_notify_catch_up() // on child partition request->child_ballot = get_ballot(); request->child_address = _stub->_primary_address; - ddebug_replica("send notification to primary parent[{}@{}], ballot={}", - _replica->_split_states.parent_gpid, - _replica->_config.primary.to_string(), - get_ballot()); + LOG_INFO_PREFIX("send notification to primary parent[{}@{}], ballot={}", + _replica->_split_states.parent_gpid, + _replica->_config.primary.to_string(), + get_ballot()); notify_catch_up_rpc rpc(std::move(request), RPC_SPLIT_NOTIFY_CATCH_UP, @@ -558,7 +558,7 @@ void replica_split_manager::child_notify_catch_up() // on child partition rpc.call(_replica->_config.primary, tracker(), [this, rpc](error_code ec) mutable { auto response = rpc.response(); if (ec == ERR_TIMEOUT) { - dwarn_replica("notify primary catch up timeout, please wait and retry"); + LOG_WARNING_PREFIX("notify primary catch up timeout, please wait and retry"); tasking::enqueue(LPC_PARTITION_SPLIT, tracker(), std::bind(&replica_split_manager::child_notify_catch_up, this), @@ -568,7 +568,7 @@ void replica_split_manager::child_notify_catch_up() // on child partition } if (ec != ERR_OK || response.err != ERR_OK) { error_code err = (ec == ERR_OK) ? response.err : ec; - derror_replica("failed to notify primary catch up, error={}", err); + LOG_ERROR_PREFIX("failed to notify primary catch up, error={}", err); _stub->split_replica_error_handler( _replica->_split_states.parent_gpid, std::bind(&replica_split_manager::parent_cleanup_split_context, @@ -576,9 +576,9 @@ void replica_split_manager::child_notify_catch_up() // on child partition child_handle_split_error("notify_primary_split_catch_up failed"); return; } - ddebug_replica("notify primary parent[{}@{}] catch up succeed", - _replica->_split_states.parent_gpid, - _replica->_config.primary.to_string()); + LOG_INFO_PREFIX("notify primary parent[{}@{}] catch up succeed", + _replica->_split_states.parent_gpid, + _replica->_config.primary.to_string()); }); } @@ -588,7 +588,7 @@ void replica_split_manager::parent_handle_child_catch_up( notify_cacth_up_response &response) // on primary parent { if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) { - derror_replica( + LOG_ERROR_PREFIX( "wrong partition status or wrong split status, partition_status={}, split_status={}", enum_to_string(status()), enum_to_string(_split_status)); @@ -598,21 +598,22 @@ void replica_split_manager::parent_handle_child_catch_up( } if (request.child_ballot != get_ballot() || request.child_gpid != _child_gpid) { - derror_replica("receive out-date request, request ballot ({}) VS local ballot({}), request " - "child_gpid({}) VS local child_gpid({})", - request.child_ballot, - get_ballot(), - request.child_gpid, - _child_gpid); + LOG_ERROR_PREFIX( + "receive out-date request, request ballot ({}) VS local ballot({}), request " + "child_gpid({}) VS local child_gpid({})", + request.child_ballot, + get_ballot(), + request.child_gpid, + _child_gpid); response.err = ERR_INVALID_STATE; return; } response.err = ERR_OK; - ddebug_replica("receive catch_up request from {}@{}, current ballot={}", - request.child_gpid, - request.child_address.to_string(), - request.child_ballot); + LOG_INFO_PREFIX("receive catch_up request from {}@{}, current ballot={}", + request.child_gpid, + request.child_address.to_string(), + request.child_ballot); _replica->_primary_states.caught_up_children.insert(request.child_address); // _primary_states.statuses is a map structure: rpc address -> partition_status @@ -625,7 +626,7 @@ void replica_split_manager::parent_handle_child_catch_up( } } - ddebug_replica("all child partitions catch up"); + LOG_INFO_PREFIX("all child partitions catch up"); _replica->_primary_states.caught_up_children.clear(); _replica->_primary_states.sync_send_write_request = true; @@ -657,18 +658,18 @@ void replica_split_manager::parent_check_sync_point_commit(decree sync_point) // { FAIL_POINT_INJECT_F("replica_parent_check_sync_point_commit", [](dsn::string_view) {}); if (status() != partition_status::PS_PRIMARY) { - derror_replica("wrong status({})", enum_to_string(status())); + LOG_ERROR_PREFIX("wrong status({})", enum_to_string(status())); parent_handle_split_error("check_sync_point_commit failed, primary changed", false); return; } - ddebug_replica("sync_point = {}, app last_committed_decree = {}", - sync_point, - _replica->_app->last_committed_decree()); + LOG_INFO_PREFIX("sync_point = {}, app last_committed_decree = {}", + sync_point, + _replica->_app->last_committed_decree()); if (_replica->_app->last_committed_decree() >= sync_point) { update_child_group_partition_count(_replica->_app_info.partition_count * 2); } else { - dwarn_replica("sync_point has not been committed, please wait and retry"); + LOG_WARNING_PREFIX("sync_point has not been committed, please wait and retry"); tasking::enqueue( LPC_PARTITION_SPLIT, tracker(), @@ -683,7 +684,7 @@ void replica_split_manager::update_child_group_partition_count( int new_partition_count) // on primary parent { if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) { - derror_replica( + LOG_ERROR_PREFIX( "wrong partition status or wrong split status, partition_status={}, split_status={}", enum_to_string(status()), enum_to_string(_split_status)); @@ -696,9 +697,9 @@ void replica_split_manager::update_child_group_partition_count( if (!_replica->_primary_states.learners.empty() || _replica->_primary_states.membership.secondaries.size() + 1 < _replica->_primary_states.membership.max_replica_count) { - derror_replica("there are {} learners or not have enough secondaries(count is {})", - _replica->_primary_states.learners.size(), - _replica->_primary_states.membership.secondaries.size()); + LOG_ERROR_PREFIX("there are {} learners or not have enough secondaries(count is {})", + _replica->_primary_states.learners.size(), + _replica->_primary_states.membership.secondaries.size()); parent_handle_split_error( "update_child_group_partition_count failed, have learner or lack of secondary", true); return; @@ -731,7 +732,7 @@ void replica_split_manager::parent_send_update_partition_count_request( request->child_pid = _child_gpid; request->ballot = get_ballot(); - ddebug_replica( + LOG_INFO_PREFIX( "send update child group partition count request to node({}), new partition_count = {}", address.to_string(), new_partition_count); @@ -752,7 +753,7 @@ void replica_split_manager::on_update_child_group_partition_count( update_child_group_partition_count_response &response) // on child partition { if (request.ballot != get_ballot() || !_replica->_split_states.is_caught_up) { - derror_replica( + LOG_ERROR_PREFIX( "receive outdated update child group_partition_count_request, request ballot={}, " "local ballot={}, is_caught_up={}", request.ballot, @@ -764,9 +765,9 @@ void replica_split_manager::on_update_child_group_partition_count( if (_replica->_app_info.partition_count == request.new_partition_count && _partition_version.load() == request.new_partition_count - 1) { - dwarn_replica("receive repeated update child group_partition_count_request, " - "partition_count = {}, ignore it", - request.new_partition_count); + LOG_WARNING_PREFIX("receive repeated update child group_partition_count_request, " + "partition_count = {}, ignore it", + request.new_partition_count); response.err = ERR_OK; return; } @@ -798,9 +799,9 @@ void replica_split_manager::update_local_partition_count( } _replica->_app_info = info; - ddebug_replica("update partition_count from {} to {}", - old_partition_count, - _replica->_app_info.partition_count); + LOG_INFO_PREFIX("update partition_count from {} to {}", + old_partition_count, + _replica->_app_info.partition_count); _replica->_app->set_partition_version(_replica->_app_info.partition_count - 1); _partition_version.store(_replica->_app_info.partition_count - 1); @@ -816,7 +817,7 @@ void replica_split_manager::on_update_child_group_partition_count_reply( _replica->_checker.only_one_thread_access(); if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) { - derror_replica( + LOG_ERROR_PREFIX( "wrong partition status or wrong split status, partition_status={}, split_status={}", enum_to_string(status()), enum_to_string(_split_status)); @@ -827,7 +828,7 @@ void replica_split_manager::on_update_child_group_partition_count_reply( } if (request.ballot != get_ballot()) { - derror_replica( + LOG_ERROR_PREFIX( "ballot changed, request ballot = {}, local ballot = {}", request.ballot, get_ballot()); parent_handle_split_error( "on_update_child_group_partition_count_reply failed, ballot changed", true); @@ -836,9 +837,10 @@ void replica_split_manager::on_update_child_group_partition_count_reply( error_code error = (ec == ERR_OK) ? response.err : ec; if (error == ERR_TIMEOUT) { - dwarn_replica("failed to update child node({}) partition_count, error = {}, wait and retry", - request.target_address.to_string(), - error); + LOG_WARNING_PREFIX( + "failed to update child node({}) partition_count, error = {}, wait and retry", + request.target_address.to_string(), + error); tasking::enqueue( LPC_PARTITION_SPLIT, tracker(), @@ -853,29 +855,29 @@ void replica_split_manager::on_update_child_group_partition_count_reply( } if (error != ERR_OK) { - derror_replica("failed to update child node({}) partition_count({}), error = {}", - request.target_address.to_string(), - request.new_partition_count, - error); + LOG_ERROR_PREFIX("failed to update child node({}) partition_count({}), error = {}", + request.target_address.to_string(), + request.new_partition_count, + error); parent_handle_split_error("on_update_child_group_partition_count_reply error", true); return; } - ddebug_replica("update node({}) child({}) partition_count({}) succeed", - request.target_address.to_string(), - request.child_pid, - request.new_partition_count); + LOG_INFO_PREFIX("update node({}) child({}) partition_count({}) succeed", + request.target_address.to_string(), + request.child_pid, + request.new_partition_count); // update group partition_count succeed not_replied_addresses->erase(request.target_address); if (not_replied_addresses->empty()) { - ddebug_replica("update child({}) group partition_count, new_partition_count = {}", - request.child_pid, - request.new_partition_count); + LOG_INFO_PREFIX("update child({}) group partition_count, new_partition_count = {}", + request.child_pid, + request.new_partition_count); register_child_on_meta(get_ballot()); } else { - ddebug_replica("there are still {} replica not update partition count in child group", - not_replied_addresses->size()); + LOG_INFO_PREFIX("there are still {} replica not update partition count in child group", + not_replied_addresses->size()); } } @@ -885,7 +887,7 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren FAIL_POINT_INJECT_F("replica_register_child_on_meta", [](dsn::string_view) {}); if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) { - derror_replica( + LOG_ERROR_PREFIX( "wrong partition status or wrong split status, partition_status={}, split_status={}", enum_to_string(status()), enum_to_string(_split_status)); @@ -895,7 +897,7 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren } if (_replica->_primary_states.reconfiguration_task != nullptr) { - dwarn_replica("under reconfiguration, delay and retry to register child"); + LOG_WARNING_PREFIX("under reconfiguration, delay and retry to register child"); _replica->_primary_states.register_child_task = tasking::enqueue(LPC_PARTITION_SPLIT, tracker(), @@ -922,7 +924,7 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren _replica->update_local_configuration_with_no_ballot_change(partition_status::PS_INACTIVE); _replica->set_inactive_state_transient(true); int32_t old_partition_version = _partition_version.exchange(-1); - ddebug_replica("update partition version from {} to {}", old_partition_version, -1); + LOG_INFO_PREFIX("update partition version from {} to {}", old_partition_version, -1); parent_send_register_request(request); } @@ -934,7 +936,7 @@ void replica_split_manager::parent_send_register_request( FAIL_POINT_INJECT_F("replica_parent_send_register_request", [](dsn::string_view) {}); dcheck_eq_replica(status(), partition_status::PS_INACTIVE); - ddebug_replica( + LOG_INFO_PREFIX( "send register child({}) request to meta_server, current ballot = {}, child ballot = {}", request.child_config.pid, request.parent_config.ballot, @@ -966,8 +968,8 @@ void replica_split_manager::on_register_child_on_meta_reply( // primary parent is under reconfiguration, whose status should be PS_INACTIVE if (partition_status::PS_INACTIVE != status() || !_stub->is_connected()) { - derror_replica("status wrong or stub is not connected, status = {}", - enum_to_string(status())); + LOG_ERROR_PREFIX("status wrong or stub is not connected, status = {}", + enum_to_string(status())); _replica->_primary_states.register_child_task = nullptr; return; } @@ -975,14 +977,14 @@ void replica_split_manager::on_register_child_on_meta_reply( error_code err = ec == ERR_OK ? response.err : ec; if (err == ERR_INVALID_STATE || err == ERR_INVALID_VERSION || err == ERR_CHILD_REGISTERED) { if (err == ERR_CHILD_REGISTERED) { - derror_replica( + LOG_ERROR_PREFIX( "register child({}) failed, error = {}, child has already been registered", request.child_config.pid, err); } else { - derror_replica("register child({}) failed, error = {}, request is out-of-dated", - request.child_config.pid, - err); + LOG_ERROR_PREFIX("register child({}) failed, error = {}, request is out-of-dated", + request.child_config.pid, + err); _stub->split_replica_error_handler( request.child_config.pid, std::bind(&replica_split_manager::child_handle_split_error, @@ -993,16 +995,16 @@ void replica_split_manager::on_register_child_on_meta_reply( _replica->_primary_states.register_child_task = nullptr; _replica->_primary_states.sync_send_write_request = false; if (response.parent_config.ballot >= get_ballot()) { - ddebug_replica("response ballot = {}, local ballot = {}, should update configuration", - response.parent_config.ballot, - get_ballot()); + LOG_INFO_PREFIX("response ballot = {}, local ballot = {}, should update configuration", + response.parent_config.ballot, + get_ballot()); _replica->update_configuration(response.parent_config); } return; } if (err != ERR_OK) { - dwarn_replica( + LOG_WARNING_PREFIX( "register child({}) failed, error = {}, wait and retry", request.child_config.pid, err); _replica->_primary_states.register_child_task = tasking::enqueue( LPC_PARTITION_SPLIT, @@ -1014,7 +1016,7 @@ void replica_split_manager::on_register_child_on_meta_reply( } if (response.parent_config.ballot < get_ballot()) { - dwarn_replica( + LOG_WARNING_PREFIX( "register child({}) failed, parent ballot from response is {}, local ballot is {}", request.child_config.pid, response.parent_config.ballot, @@ -1028,12 +1030,12 @@ void replica_split_manager::on_register_child_on_meta_reply( return; } - ddebug_replica("register child({}) succeed, response parent ballot = {}, local ballot = " - "{}, local status = {}", - request.child_config.pid, - response.parent_config.ballot, - get_ballot(), - enum_to_string(status())); + LOG_INFO_PREFIX("register child({}) succeed, response parent ballot = {}, local ballot = " + "{}, local status = {}", + request.child_config.pid, + response.parent_config.ballot, + get_ballot(), + enum_to_string(status())); dcheck_ge_replica(response.parent_config.ballot, get_ballot()); dcheck_eq_replica(_replica->_app_info.partition_count * 2, response.app.partition_count); @@ -1062,7 +1064,7 @@ void replica_split_manager::child_partition_active( const partition_configuration &config) // on child { if (status() != partition_status::PS_PARTITION_SPLIT) { - dwarn_replica("child partition has been active, status={}", enum_to_string(status())); + LOG_WARNING_PREFIX("child partition has been active, status={}", enum_to_string(status())); return; } @@ -1071,7 +1073,7 @@ void replica_split_manager::child_partition_active( _replica->_prepare_list->max_decree(); _replica->update_configuration(config); _stub->_counter_replicas_splitting_recent_split_succ_count->increment(); - ddebug_replica("child partition is active, status={}", enum_to_string(status())); + LOG_INFO_PREFIX("child partition is active, status={}", enum_to_string(status())); } // ThreadPool: THREAD_POOL_REPLICATION @@ -1087,12 +1089,12 @@ void replica_split_manager::child_handle_split_error( const std::string &error_msg) // on child partition { if (status() != partition_status::PS_ERROR) { - derror_replica("child partition split failed because {}, parent = {}, split_duration = " - "{}ms, async_learn_duration = {}ms", - error_msg, - _replica->_split_states.parent_gpid, - _replica->_split_states.total_ms(), - _replica->_split_states.async_learn_ms()); + LOG_ERROR_PREFIX("child partition split failed because {}, parent = {}, split_duration = " + "{}ms, async_learn_duration = {}ms", + error_msg, + _replica->_split_states.parent_gpid, + _replica->_split_states.total_ms(), + _replica->_split_states.async_learn_ms()); _stub->_counter_replicas_splitting_recent_split_fail_count->increment(); _replica->update_local_configuration_with_no_ballot_change(partition_status::PS_ERROR); } @@ -1129,20 +1131,21 @@ void replica_split_manager::trigger_primary_parent_split( { dcheck_eq_replica(status(), partition_status::PS_PRIMARY); dcheck_eq_replica(_replica->_app_info.partition_count * 2, meta_partition_count); - ddebug_replica("app({}) partition count changed, local({}) VS meta({}), split_status local({}) " - "VS meta({})", - _replica->_app_info.app_name, - _replica->_app_info.partition_count, - meta_partition_count, - enum_to_string(_split_status), - enum_to_string(meta_split_status)); + LOG_INFO_PREFIX( + "app({}) partition count changed, local({}) VS meta({}), split_status local({}) " + "VS meta({})", + _replica->_app_info.app_name, + _replica->_app_info.partition_count, + meta_partition_count, + enum_to_string(_split_status), + enum_to_string(meta_split_status)); _meta_split_status = meta_split_status; if (meta_split_status == split_status::SPLITTING) { if (!_replica->_primary_states.learners.empty() || _replica->_primary_states.membership.secondaries.size() + 1 < _replica->_primary_states.membership.max_replica_count) { - dwarn_replica( + LOG_WARNING_PREFIX( "there are {} learners or not have enough secondaries(count is {}), wait for " "next round", _replica->_primary_states.learners.size(), @@ -1170,7 +1173,7 @@ void replica_split_manager::trigger_primary_parent_split( } if (meta_split_status == split_status::PAUSED) { - dwarn_replica("split has been paused, ignore it"); + LOG_WARNING_PREFIX("split has been paused, ignore it"); return; } @@ -1239,7 +1242,7 @@ void replica_split_manager::copy_mutation(mutation_ptr &mu) // on parent partiti void replica_split_manager::on_copy_mutation(mutation_ptr &mu) // on child partition { if (status() != partition_status::PS_PARTITION_SPLIT) { - derror_replica( + LOG_ERROR_PREFIX( "wrong status({}), ignore this mutation({})", enum_to_string(status()), mu->name()); _stub->split_replica_error_handler( _replica->_split_states.parent_gpid, [mu](replica_split_manager *split_mgr) { @@ -1257,11 +1260,12 @@ void replica_split_manager::on_copy_mutation(mutation_ptr &mu) // on child parti } if (mu->data.header.ballot > get_ballot()) { - derror_replica("ballot changed, mutation ballot({}) vs local ballot({}), ignore copy this " - "mutation({})", - mu->data.header.ballot, - get_ballot(), - mu->name()); + LOG_ERROR_PREFIX( + "ballot changed, mutation ballot({}) vs local ballot({}), ignore copy this " + "mutation({})", + mu->data.header.ballot, + get_ballot(), + mu->name()); _stub->split_replica_error_handler( _replica->_split_states.parent_gpid, [mu](replica_split_manager *split_mgr) { split_mgr->parent_cleanup_split_context(); @@ -1315,14 +1319,14 @@ void replica_split_manager::on_copy_mutation_reply(error_code ec, auto mu = _replica->_prepare_list->get_mutation_by_decree(d); if (mu == nullptr) { - derror_replica("failed to get mutation in prepare list, decree = {}", d); + LOG_ERROR_PREFIX("failed to get mutation in prepare list, decree = {}", d); return; } if (mu->data.header.ballot != b) { - derror_replica("ballot not match, mutation ballot({}) vs child mutation ballot({})", - mu->data.header.ballot, - b); + LOG_ERROR_PREFIX("ballot not match, mutation ballot({}) vs child mutation ballot({})", + mu->data.header.ballot, + b); return; } @@ -1330,12 +1334,12 @@ void replica_split_manager::on_copy_mutation_reply(error_code ec, if (ec == ERR_OK) { mu->child_acked(); } else { - derror_replica("child({}) copy mutation({}) failed, ballot={}, decree={}, error={}", - _child_gpid, - mu->name(), - b, - d, - ec); + LOG_ERROR_PREFIX("child({}) copy mutation({}) failed, ballot={}, decree={}, error={}", + _child_gpid, + mu->name(), + b, + d, + ec); } // handle child ack @@ -1387,7 +1391,7 @@ void replica_split_manager::parent_stop_split( _replica->_primary_states.sync_send_write_request = false; _replica->broadcast_group_check(); } - ddebug_replica( + LOG_INFO_PREFIX( "{} split succeed, status = {}, old split_status = {}, child partition_index = {}", meta_split_status == split_status::PAUSING ? "pause" : "cancel", enum_to_string(status()), @@ -1439,9 +1443,9 @@ void replica_split_manager::parent_send_notify_stop_request( req->meta_split_status = meta_split_status; req->partition_count = _replica->_app_info.partition_count; - ddebug_replica("group {} split succeed, send notify_stop_request to meta server({})", - meta_split_status == split_status::PAUSING ? "pause" : "cancel", - meta_address.to_string()); + LOG_INFO_PREFIX("group {} split succeed, send notify_stop_request to meta server({})", + meta_split_status == split_status::PAUSING ? "pause" : "cancel", + meta_address.to_string()); notify_stop_split_rpc rpc( std::move(req), RPC_CM_NOTIFY_STOP_SPLIT, 0_ms, 0, get_gpid().thread_hash()); rpc.call(meta_address, tracker(), [this, rpc](error_code ec) mutable { @@ -1449,7 +1453,8 @@ void replica_split_manager::parent_send_notify_stop_request( const std::string type = rpc.request().meta_split_status == split_status::PAUSING ? "pause" : "cancel"; if (err != ERR_OK) { - dwarn_replica("notify {} split failed, error = {}, wait for next round", type, err); + LOG_WARNING_PREFIX( + "notify {} split failed, error = {}, wait for next round", type, err); } }); } @@ -1463,8 +1468,8 @@ void replica_split_manager::query_child_state() // on primary parent request->partition_count = _replica->_app_info.partition_count; rpc_address meta_address(_stub->_failure_detector->get_servers()); - ddebug_replica("send query child partition state request to meta server({})", - meta_address.to_string()); + LOG_INFO_PREFIX("send query child partition state request to meta server({})", + meta_address.to_string()); query_child_state_rpc rpc( std::move(request), RPC_CM_QUERY_CHILD_STATE, 0_ms, 0, get_gpid().thread_hash()); _replica->_primary_states.query_child_task = @@ -1482,7 +1487,7 @@ void replica_split_manager::on_query_child_state_reply( _replica->_checker.only_one_thread_access(); if (ec != ERR_OK) { - dwarn_replica("query child partition state failed, error = {}, retry it later", ec); + LOG_WARNING_PREFIX("query child partition state failed, error = {}, retry it later", ec); _replica->_primary_states.query_child_task = tasking::enqueue(LPC_PARTITION_SPLIT, tracker(), @@ -1493,14 +1498,14 @@ void replica_split_manager::on_query_child_state_reply( } if (response.err != ERR_OK) { - dwarn_replica("app({}) partition({}) split has been canceled, ignore it", - request.app_name, - request.pid); + LOG_WARNING_PREFIX("app({}) partition({}) split has been canceled, ignore it", + request.app_name, + request.pid); return; } - ddebug_replica("query child partition succeed, child partition[{}] has already been ready", - response.child_config.pid); + LOG_INFO_PREFIX("query child partition succeed, child partition[{}] has already been ready", + response.child_config.pid); // make child partition active _stub->split_replica_exec(LPC_PARTITION_SPLIT, response.child_config.pid, diff --git a/src/replica/split/replica_split_manager.h b/src/replica/split/replica_split_manager.h index a6e56bfd8c..ef51a8e362 100644 --- a/src/replica/split/replica_split_manager.h +++ b/src/replica/split/replica_split_manager.h @@ -185,7 +185,7 @@ class replica_split_manager : replica_base { auto target_pidx = get_partition_version() & partition_hash; if (dsn_unlikely(target_pidx != get_gpid().get_partition_index())) { - derror_replica( + LOG_ERROR_PREFIX( "receive {} request with wrong partition_hash({}), partition_version = {}, " "target_pidx = {}", op, diff --git a/src/server/hotkey_collector.cpp b/src/server/hotkey_collector.cpp index 28dd1fe183..996565ef8f 100644 --- a/src/server/hotkey_collector.cpp +++ b/src/server/hotkey_collector.cpp @@ -186,8 +186,8 @@ inline void hotkey_collector::change_state_by_result() case hotkey_collector_state::FINE_DETECTING: if (!_result.hot_hash_key.empty()) { change_state_to_finished(); - derror_replica("Find the hotkey: {}", - pegasus::utils::c_escape_string(_result.hot_hash_key)); + LOG_ERROR_PREFIX("Find the hotkey: {}", + pegasus::utils::c_escape_string(_result.hot_hash_key)); } break; default: @@ -212,7 +212,7 @@ void hotkey_collector::handle_rpc(const dsn::replication::detect_hotkey_request std::string hint = fmt::format("{}: can't find this detect action", req.action); resp.err = dsn::ERR_INVALID_STATE; resp.__set_err_hint(hint); - derror_replica(hint); + LOG_ERROR_PREFIX(hint); } } @@ -279,11 +279,11 @@ void hotkey_collector::on_start_detect(dsn::replication::detect_hotkey_response hint = "invalid collector state"; resp.err = dsn::ERR_INVALID_STATE; resp.__set_err_hint(hint); - derror_replica(hint); + LOG_ERROR_PREFIX(hint); dassert(false, "invalid collector state"); } resp.__set_err_hint(hint); - dwarn_replica(hint); + LOG_WARNING_PREFIX(hint); } void hotkey_collector::on_stop_detect(dsn::replication::detect_hotkey_response &resp) @@ -292,7 +292,7 @@ void hotkey_collector::on_stop_detect(dsn::replication::detect_hotkey_response & resp.err = dsn::ERR_OK; std::string hint = fmt::format("{} hotkey stopped, cache cleared", dsn::enum_to_string(_hotkey_type)); - ddebug_replica(hint); + LOG_INFO_PREFIX(hint); } void hotkey_collector::query_result(dsn::replication::detect_hotkey_response &resp) @@ -302,7 +302,7 @@ void hotkey_collector::query_result(dsn::replication::detect_hotkey_response &re std::string hint = fmt::format("Can't get hotkey now, now state: {}", enum_to_string(_state.load())); resp.__set_err_hint(hint); - ddebug_replica(hint); + LOG_INFO_PREFIX(hint); } else { resp.err = dsn::ERR_OK; resp.__set_hotkey_result(pegasus::utils::c_escape_string(_result.hot_hash_key)); @@ -312,7 +312,7 @@ void hotkey_collector::query_result(dsn::replication::detect_hotkey_response &re bool hotkey_collector::terminate_if_timeout() { if (dsn_now_s() >= _collector_start_time_second.load() + FLAGS_max_seconds_to_detect_hotkey) { - ddebug_replica("hotkey collector work time is exhausted but no hotkey has been found"); + LOG_INFO_PREFIX("hotkey collector work time is exhausted but no hotkey has been found"); change_state_to_stopped(); return true; } diff --git a/src/server/logging_utils.h b/src/server/logging_utils.h index c9a379eceb..9fa5095b33 100644 --- a/src/server/logging_utils.h +++ b/src/server/logging_utils.h @@ -23,15 +23,7 @@ /// Utilities for logging the operation on rocksdb. -#define derror_rocksdb(op, error, ...) \ - LOG_ERROR_F("{}: rocksdb {} failed: error = {} [{}]", \ - replica_name(), \ - op, \ - error, \ - fmt::format(__VA_ARGS__)) +#define LOG_ERROR_ROCKSDB(op, error, ...) \ + LOG_ERROR_PREFIX("rocksdb {} failed: error = {} [{}]", op, error, fmt::format(__VA_ARGS__)) -#define ddebug_rocksdb(op, ...) \ - LOG_INFO_F("{}: rocksdb {}: [{}]", replica_name(), op, fmt::format(__VA_ARGS__)) - -#define dwarn_rocksdb(op, ...) \ - LOG_WARNING_F("{}: rocksdb {}: [{}]", replica_name(), op, fmt::format(__VA_ARGS__)) +#define LOG_INFO_ROCKSDB(op, ...) LOG_INFO_PREFIX("rocksdb {}: [{}]", op, fmt::format(__VA_ARGS__)) diff --git a/src/server/meta_store.cpp b/src/server/meta_store.cpp index 0359cfde99..d65f1c004e 100644 --- a/src/server/meta_store.cpp +++ b/src/server/meta_store.cpp @@ -152,7 +152,7 @@ ::dsn::error_code meta_store::set_string_value_to_meta_cf(const std::string &key { auto status = _db->Put(_wt_opts, _meta_cf, key, value); if (!status.ok()) { - derror_replica( + LOG_ERROR_PREFIX( "Put {}={} to meta column family failed, status {}", key, value, status.ToString()); // TODO(yingchun): add a rocksdb io error. return ::dsn::ERR_LOCAL_APP_FAILURE; diff --git a/src/server/pegasus_event_listener.cpp b/src/server/pegasus_event_listener.cpp index 6957444c59..891e35cf20 100644 --- a/src/server/pegasus_event_listener.cpp +++ b/src/server/pegasus_event_listener.cpp @@ -96,10 +96,10 @@ void pegasus_event_listener::OnCompactionCompleted(rocksdb::DB *db, void pegasus_event_listener::OnStallConditionsChanged(const rocksdb::WriteStallInfo &info) { if (info.condition.cur == rocksdb::WriteStallCondition::kDelayed) { - derror_replica("rocksdb write delayed"); + LOG_ERROR_PREFIX("rocksdb write delayed"); _pfc_recent_write_change_delayed_count->increment(); } else if (info.condition.cur == rocksdb::WriteStallCondition::kStopped) { - derror_replica("rocksdb write stopped"); + LOG_ERROR_PREFIX("rocksdb write stopped"); _pfc_recent_write_change_stopped_count->increment(); } } diff --git a/src/server/pegasus_manual_compact_service.cpp b/src/server/pegasus_manual_compact_service.cpp index f69b61ad50..2429e85c2e 100644 --- a/src/server/pegasus_manual_compact_service.cpp +++ b/src/server/pegasus_manual_compact_service.cpp @@ -70,12 +70,12 @@ void pegasus_manual_compact_service::start_manual_compact_if_needed( const std::map &envs) { if (check_compact_disabled(envs)) { - ddebug_replica("ignored compact because disabled"); + LOG_INFO_PREFIX("ignored compact because disabled"); return; } if (check_compact_max_concurrent_running_count(envs) <= 0) { - ddebug_replica("ignored compact because max_concurrent_running_count <= 0"); + LOG_INFO_PREFIX("ignored compact because max_concurrent_running_count <= 0"); return; } @@ -102,7 +102,7 @@ void pegasus_manual_compact_service::start_manual_compact_if_needed( manual_compact(options); }); } else { - ddebug_replica("ignored compact because last one is on going or just finished"); + LOG_INFO_PREFIX("ignored compact because last one is on going or just finished"); } } @@ -119,10 +119,10 @@ bool pegasus_manual_compact_service::check_compact_disabled( if (new_disabled != old_disabled) { // flag changed if (new_disabled) { - ddebug_replica("manual compact is set to disabled now"); + LOG_INFO_PREFIX("manual compact is set to disabled now"); _disabled.store(true); } else { - ddebug_replica("manual compact is set to enabled now"); + LOG_INFO_PREFIX("manual compact is set to enabled now"); _disabled.store(false); } } @@ -136,13 +136,13 @@ int pegasus_manual_compact_service::check_compact_max_concurrent_running_count( int new_count = INT_MAX; auto find = envs.find(MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT_KEY); if (find != envs.end() && !dsn::buf2int32(find->second, new_count)) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); } int old_count = _max_concurrent_running_count.load(); if (new_count != old_count) { // count changed - ddebug_replica("max_concurrent_running_count changed from {} to {}", old_count, new_count); + LOG_INFO_PREFIX("max_concurrent_running_count changed from {} to {}", old_count, new_count); _max_concurrent_running_count.store(new_count); } @@ -159,7 +159,7 @@ bool pegasus_manual_compact_service::check_once_compact( int64_t trigger_time = 0; if (!dsn::buf2int64(find->second, trigger_time) || trigger_time <= 0) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return false; } @@ -177,7 +177,7 @@ bool pegasus_manual_compact_service::check_periodic_compact( std::list trigger_time_strs; dsn::utils::split_args(find->second.c_str(), trigger_time_strs, ','); if (trigger_time_strs.empty()) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return false; } @@ -189,7 +189,7 @@ bool pegasus_manual_compact_service::check_periodic_compact( } } if (trigger_time.empty()) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return false; } @@ -207,7 +207,7 @@ bool pegasus_manual_compact_service::check_periodic_compact( uint64_t pegasus_manual_compact_service::now_timestamp() { #ifdef PEGASUS_UNIT_TEST - ddebug_replica("_mock_now_timestamp={}", _mock_now_timestamp); + LOG_INFO_PREFIX("_mock_now_timestamp={}", _mock_now_timestamp); return _mock_now_timestamp == 0 ? dsn_now_ms() : _mock_now_timestamp; #else return dsn_now_ms(); @@ -230,10 +230,10 @@ void pegasus_manual_compact_service::extract_manual_compact_opts( (target_level >= 1 && target_level <= _app->_data_cf_opts.num_levels))) { options.target_level = target_level; } else { - dwarn_replica("{}={} is invalid, use default value {}", - find->first, - find->second, - options.target_level); + LOG_WARNING_PREFIX("{}={} is invalid, use default value {}", + find->first, + find->second, + options.target_level); } } @@ -246,7 +246,7 @@ void pegasus_manual_compact_service::extract_manual_compact_opts( } else if (argv == MANUAL_COMPACT_BOTTOMMOST_LEVEL_COMPACTION_SKIP) { options.bottommost_level_compaction = rocksdb::BottommostLevelCompaction::kSkip; } else { - dwarn_replica( + LOG_WARNING_PREFIX( "{}={} is invalid, use default value {}", find->first, find->second, @@ -279,7 +279,7 @@ void pegasus_manual_compact_service::manual_compact(const rocksdb::CompactRangeO // if we find manual compaction is disabled when transfer from queue to running, // it would not to be started. if (_disabled.load()) { - ddebug_replica("ignored compact because disabled"); + LOG_INFO_PREFIX("ignored compact because disabled"); _manual_compact_enqueue_time_ms.store(0); return; } @@ -288,8 +288,8 @@ void pegasus_manual_compact_service::manual_compact(const rocksdb::CompactRangeO _pfc_manual_compact_running_count->increment(); if (_pfc_manual_compact_running_count->get_integer_value() > _max_concurrent_running_count) { _pfc_manual_compact_running_count->decrement(); - ddebug_replica("ignored compact because exceed max_concurrent_running_count({})", - _max_concurrent_running_count.load()); + LOG_INFO_PREFIX("ignored compact because exceed max_concurrent_running_count({})", + _max_concurrent_running_count.load()); _manual_compact_enqueue_time_ms.store(0); return; } @@ -303,7 +303,7 @@ void pegasus_manual_compact_service::manual_compact(const rocksdb::CompactRangeO uint64_t pegasus_manual_compact_service::begin_manual_compact() { - ddebug_replica("start to execute manual compaction"); + LOG_INFO_PREFIX("start to execute manual compaction"); uint64_t start = now_timestamp(); _manual_compact_start_running_time_ms.store(start); return start; @@ -311,7 +311,7 @@ uint64_t pegasus_manual_compact_service::begin_manual_compact() void pegasus_manual_compact_service::end_manual_compact(uint64_t start, uint64_t finish) { - ddebug_replica("finish to execute manual compaction, time_used = {}ms", finish - start); + LOG_INFO_PREFIX("finish to execute manual compaction, time_used = {}ms", finish - start); _manual_compact_last_finish_time_ms.store(finish); _manual_compact_last_time_used_ms.store(finish - start); _manual_compact_enqueue_time_ms.store(0); diff --git a/src/server/pegasus_mutation_duplicator.cpp b/src/server/pegasus_mutation_duplicator.cpp index e2ed76eb32..7e996e4ca4 100644 --- a/src/server/pegasus_mutation_duplicator.cpp +++ b/src/server/pegasus_mutation_duplicator.cpp @@ -87,11 +87,11 @@ pegasus_mutation_duplicator::pegasus_mutation_duplicator(dsn::replication::repli ret.get_error()); _remote_cluster_id = static_cast(ret.get_value()); - ddebug_replica("initialize mutation duplicator for local cluster [id:{}], " - "remote cluster [id:{}, addr:{}]", - get_current_cluster_id(), - _remote_cluster_id, - remote_cluster); + LOG_INFO_PREFIX("initialize mutation duplicator for local cluster [id:{}], " + "remote cluster [id:{}, addr:{}]", + get_current_cluster_id(), + _remote_cluster_id, + remote_cluster); // never possible to duplicate data to itself dassert_replica(get_current_cluster_id() != _remote_cluster_id, @@ -145,9 +145,9 @@ void pegasus_mutation_duplicator::on_duplicate_reply(uint64_t hash, // errors are acceptable. // TODO(wutao1): print the entire request for future debugging. if (dsn::rand::next_double01() <= 0.01) { - derror_replica("duplicate_rpc failed: {} [size:{}]", - err == dsn::ERR_OK ? _client->get_error_string(perr) : err.to_string(), - rpc.request().entries.size()); + LOG_ERROR_PREFIX("duplicate_rpc failed: {} [size:{}]", + err == dsn::ERR_OK ? _client->get_error_string(perr) : err.to_string(), + rpc.request().entries.size()); } // duplicating an illegal write to server is unacceptable, fail fast. dassert_replica(perr != PERR_INVALID_ARGUMENT, rpc.response().error_hint); diff --git a/src/server/pegasus_server_impl.cpp b/src/server/pegasus_server_impl.cpp index 5255fa7044..35be069605 100644 --- a/src/server/pegasus_server_impl.cpp +++ b/src/server/pegasus_server_impl.cpp @@ -327,15 +327,15 @@ void pegasus_server_impl::on_get(get_rpc rpc) if (is_get_abnormal(time_used, value.size())) { ::dsn::blob hash_key, sort_key; pegasus_restore_key(key, hash_key, sort_key); - dwarn_replica("rocksdb abnormal get from {}: " - "hash_key = {}, sort_key = {}, return = {}, " - "value_size = {}, time_used = {} ns", - rpc.remote_address().to_string(), - ::pegasus::utils::c_escape_string(hash_key), - ::pegasus::utils::c_escape_string(sort_key), - status.ToString(), - value.size(), - time_used); + LOG_WARNING_PREFIX("rocksdb abnormal get from {}: " + "hash_key = {}, sort_key = {}, return = {}, " + "value_size = {}, time_used = {} ns", + rpc.remote_address().to_string(), + ::pegasus::utils::c_escape_string(hash_key), + ::pegasus::utils::c_escape_string(sort_key), + status.ToString(), + value.size(), + time_used); _pfc_recent_abnormal_count->increment(); } @@ -642,7 +642,7 @@ void pegasus_server_impl::on_multi_get(multi_get_rpc rpc) // scan not completed resp.error = rocksdb::Status::kIncomplete; if (limiter->exceed_limit()) { - dwarn_replica( + LOG_WARNING_PREFIX( "rocksdb abnormal scan from {}: time_used({}ns) VS time_threshold({}ns)", rpc.remote_address().to_string(), limiter->duration_time(), @@ -740,7 +740,7 @@ void pegasus_server_impl::on_multi_get(multi_get_rpc rpc) uint64_t time_used = dsn_now_ns() - start_time; if (is_multi_get_abnormal(time_used, size, iteration_count)) { - dwarn_replica( + LOG_WARNING_PREFIX( "rocksdb abnormal multi_get from {}: hash_key = {}, " "start_sort_key = {} ({}), stop_sort_key = {} ({}), " "sort_key_filter_type = {}, sort_key_filter_pattern = {}, " @@ -798,8 +798,8 @@ void pegasus_server_impl::on_batch_get(batch_get_rpc rpc) const auto &request = rpc.request(); if (request.keys.empty()) { response.error = rocksdb::Status::kInvalidArgument; - derror_replica("Invalid argument for batch_get from {}: 'keys' field in request is empty", - rpc.remote_address().to_string()); + LOG_ERROR_PREFIX("Invalid argument for batch_get from {}: 'keys' field in request is empty", + rpc.remote_address().to_string()); _cu_calculator->add_batch_get_cu(rpc.dsn_request(), response.error, response.data); _pfc_batch_get_latency->set(dsn_now_ns() - start_time); return; @@ -836,7 +836,7 @@ void pegasus_server_impl::on_batch_get(batch_get_rpc rpc) if (dsn_likely(status.ok())) { if (check_if_record_expired(epoch_now, value)) { if (_verbose_log) { - derror_replica( + LOG_ERROR_PREFIX( "rocksdb data expired for batch_get from {}, hash_key = {}, sort_key = {}", rpc.remote_address().to_string(), pegasus::utils::c_escape_string(hash_key), @@ -855,15 +855,15 @@ void pegasus_server_impl::on_batch_get(batch_get_rpc rpc) response.data.emplace_back(std::move(current_data)); } else { if (_verbose_log) { - derror_replica( + LOG_ERROR_PREFIX( "rocksdb get failed for batch_get from {}: error = {}, key size = {}", rpc.remote_address().to_string(), status.ToString(), request.keys.size()); } else { - derror_replica("rocksdb get failed for batch_get from {}: error = {}", - rpc.remote_address().to_string(), - status.ToString()); + LOG_ERROR_PREFIX("rocksdb get failed for batch_get from {}: error = {}", + rpc.remote_address().to_string(), + status.ToString()); } error_occurred = true; @@ -881,12 +881,13 @@ void pegasus_server_impl::on_batch_get(batch_get_rpc rpc) int64_t time_used = dsn_now_ns() - start_time; if (is_batch_get_abnormal(time_used, total_data_size, request.keys.size())) { - dwarn_replica("rocksdb abnormal batch_get from {}: total data size = {}, row count = {}, " - "time_used = {} us", - rpc.remote_address().to_string(), - total_data_size, - request.keys.size(), - time_used / 1000); + LOG_WARNING_PREFIX( + "rocksdb abnormal batch_get from {}: total data size = {}, row count = {}, " + "time_used = {} us", + rpc.remote_address().to_string(), + total_data_size, + request.keys.size(), + time_used / 1000); _pfc_recent_abnormal_count->increment(); } @@ -967,10 +968,10 @@ void pegasus_server_impl::on_sortkey_count(sortkey_count_rpc rpc) } resp.count = 0; } else if (limiter->exceed_limit()) { - dwarn_replica("rocksdb abnormal scan from {}: time_used({}ns) VS time_threshold({}ns)", - rpc.remote_address().to_string(), - limiter->duration_time(), - limiter->max_duration_time()); + LOG_WARNING_PREFIX("rocksdb abnormal scan from {}: time_used({}ns) VS time_threshold({}ns)", + rpc.remote_address().to_string(), + limiter->duration_time(), + limiter->max_duration_time()); resp.count = -1; } @@ -1260,12 +1261,12 @@ void pegasus_server_impl::on_get_scanner(get_scanner_rpc rpc) } else if (limiter->exceed_limit()) { // scan exceed limit time resp.error = rocksdb::Status::kIncomplete; - dwarn_replica("rocksdb abnormal scan from {}: batch_count={}, time_used_ns({}) VS " - "time_threshold_ns({})", - rpc.remote_address().to_string(), - batch_count, - limiter->duration_time(), - limiter->max_duration_time()); + LOG_WARNING_PREFIX("rocksdb abnormal scan from {}: batch_count={}, time_used_ns({}) VS " + "time_threshold_ns({})", + rpc.remote_address().to_string(), + batch_count, + limiter->duration_time(), + limiter->max_duration_time()); } else if (it->Valid() && !complete) { // scan not completed std::unique_ptr context(new pegasus_scan_context( @@ -1432,12 +1433,12 @@ void pegasus_server_impl::on_scan(scan_rpc rpc) } else if (limiter->exceed_limit()) { // scan exceed limit time resp.error = rocksdb::Status::kIncomplete; - dwarn_replica("rocksdb abnormal scan from {}: batch_count={}, time_used({}ns) VS " - "time_threshold({}ns)", - rpc.remote_address().to_string(), - batch_count, - limiter->duration_time(), - limiter->max_duration_time()); + LOG_WARNING_PREFIX("rocksdb abnormal scan from {}: batch_count={}, time_used({}ns) VS " + "time_threshold({}ns)", + rpc.remote_address().to_string(), + batch_count, + limiter->duration_time(), + limiter->max_duration_time()); } else if (it->Valid() && !complete) { // scan not completed int64_t handle = _context_cache.put(std::move(context)); @@ -1471,18 +1472,18 @@ void pegasus_server_impl::on_clear_scanner(const int64_t &args) { _context_cache dsn::error_code pegasus_server_impl::start(int argc, char **argv) { dassert_replica(!_is_open, "replica is already opened."); - ddebug_replica("start to open app {}", data_dir()); + LOG_INFO_PREFIX("start to open app {}", data_dir()); // parse envs for parameters // envs is compounded in replication_app_base::open() function std::map envs; if (argc > 0) { if ((argc - 1) % 2 != 0) { - derror_replica("parse envs failed, invalid argc = {}", argc); + LOG_ERROR_PREFIX("parse envs failed, invalid argc = {}", argc); return dsn::ERR_INVALID_PARAMETERS; } if (argv == nullptr) { - derror_replica("parse envs failed, invalid argv = nullptr"); + LOG_ERROR_PREFIX("parse envs failed, invalid argv = nullptr"); return dsn::ERR_INVALID_PARAMETERS; } int idx = 1; @@ -1515,12 +1516,12 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) auto duplication_path = duplication_dir(); if (dsn::utils::filesystem::path_exists(rdb_path)) { // only case 1 - ddebug_replica("rdb is already exist, path = {}", rdb_path); + LOG_INFO_PREFIX("rdb is already exist, path = {}", rdb_path); } else { // case 2 if (dsn::utils::filesystem::path_exists(duplication_path) && is_duplication_follower()) { if (!dsn::utils::filesystem::rename_path(duplication_path, rdb_path)) { - derror_replica( + LOG_ERROR_PREFIX( "load duplication data from {} to {} failed", duplication_path, rdb_path); return dsn::ERR_FILE_OPERATION_FAILED; } @@ -1531,34 +1532,34 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) if (restore_dir.empty()) { // case 3 if (force_restore) { - derror_replica("try to restore, but we can't combine restore_dir from envs"); + LOG_ERROR_PREFIX("try to restore, but we can't combine restore_dir from envs"); return dsn::ERR_FILE_OPERATION_FAILED; } else { db_exist = false; - dinfo_replica("open a new db, path = {}", rdb_path); + LOG_DEBUG_PREFIX("open a new db, path = {}", rdb_path); } } else { // case 4 - ddebug_replica("try to restore from restore_dir = {}", restore_dir); + LOG_INFO_PREFIX("try to restore from restore_dir = {}", restore_dir); if (dsn::utils::filesystem::directory_exists(restore_dir)) { // here, we just rename restore_dir to rdb, then continue the normal process if (dsn::utils::filesystem::rename_path(restore_dir, rdb_path)) { - ddebug_replica( + LOG_INFO_PREFIX( "rename restore_dir({}) to rdb({}) succeed", restore_dir, rdb_path); } else { - derror_replica( + LOG_ERROR_PREFIX( "rename restore_dir({}) to rdb({}) failed", restore_dir, rdb_path); return dsn::ERR_FILE_OPERATION_FAILED; } } else { if (force_restore) { - derror_replica( + LOG_ERROR_PREFIX( "try to restore, but restore_dir isn't exist, restore_dir = {}", restore_dir); return dsn::ERR_FILE_OPERATION_FAILED; } else { db_exist = false; - dwarn_replica( + LOG_WARNING_PREFIX( "try to restore and restore_dir({}) isn't exist, but we don't force " "it, the role of this replica must not primary, so we open a new db on " "the " @@ -1571,7 +1572,7 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) } } - ddebug_replica("start to open rocksDB's rdb({})", rdb_path); + LOG_INFO_PREFIX("start to open rocksDB's rdb({})", rdb_path); // Here we create a `_table_data_cf_opts` because we don't want to modify `_data_cf_opts`, which // will be used elsewhere. @@ -1583,7 +1584,7 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) bool missing_meta_cf = true; bool missing_data_cf = true; if (check_column_families(rdb_path, &missing_meta_cf, &missing_data_cf) != dsn::ERR_OK) { - derror_replica("check column families failed"); + LOG_ERROR_PREFIX("check column families failed"); return dsn::ERR_LOCAL_APP_FAILURE; } dassert_replica(!missing_meta_cf, "You must upgrade Pegasus server from 2.0"); @@ -1604,13 +1605,14 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) // `pegasus_data` options, which were used in old version rocksdbs (before 2.1.0). if (status.code() != rocksdb::Status::kInvalidArgument || status.ToString().find("pegasus_data") == std::string::npos) { - derror_replica("load latest option file failed: {}.", status.ToString()); + LOG_ERROR_PREFIX("load latest option file failed: {}.", status.ToString()); return dsn::ERR_LOCAL_APP_FAILURE; } has_incompatible_db_options = true; - dwarn_replica("The latest option file has incompatible db options: {}, use default " - "options to open db.", - status.ToString()); + LOG_WARNING_PREFIX( + "The latest option file has incompatible db options: {}, use default " + "options to open db.", + status.ToString()); } if (!has_incompatible_db_options) { @@ -1641,13 +1643,13 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) column_families, /*ignore_unknown_options=*/true); if (!s.ok() && !s.IsNotFound() && !has_incompatible_db_options) { - derror_replica("rocksdb::CheckOptionsCompatibility failed, error = {}", s.ToString()); + LOG_ERROR_PREFIX("rocksdb::CheckOptionsCompatibility failed, error = {}", s.ToString()); return dsn::ERR_LOCAL_APP_FAILURE; } std::vector handles_opened; auto status = rocksdb::DB::Open(_db_opts, rdb_path, column_families, &handles_opened, &_db); if (!status.ok()) { - derror_replica("rocksdb::DB::Open failed, error = {}", status.ToString()); + LOG_ERROR_PREFIX("rocksdb::DB::Open failed, error = {}", status.ToString()); return dsn::ERR_LOCAL_APP_FAILURE; } dcheck_eq_replica(2, handles_opened.size()); @@ -1666,7 +1668,7 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) uint64_t last_manual_compact_finish_time = _meta_store->get_last_manual_compact_finish_time(); if (_pegasus_data_version > PEGASUS_DATA_VERSION_MAX) { - derror_replica("open app failed, unsupported data version {}", _pegasus_data_version); + LOG_ERROR_PREFIX("open app failed, unsupported data version {}", _pegasus_data_version); release_db(); return dsn::ERR_LOCAL_APP_FAILURE; } @@ -1693,22 +1695,22 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) // only need async checkpoint because we sure that memtable is empty now. int64_t last_flushed = static_cast(_last_committed_decree); if (last_flushed != last_durable_decree()) { - ddebug_replica( + LOG_INFO_PREFIX( "start to do async checkpoint, last_durable_decree = {}, last_flushed_decree = {}", last_durable_decree(), last_flushed); auto err = async_checkpoint(false); if (err != dsn::ERR_OK) { - derror_replica("create checkpoint failed, error = {}", err.to_string()); + LOG_ERROR_PREFIX("create checkpoint failed, error = {}", err.to_string()); release_db(); return err; } dcheck_eq_replica(last_flushed, last_durable_decree()); } - ddebug_replica("open app succeed, pegasus_data_version = {}, last_durable_decree = {}", - _pegasus_data_version, - last_durable_decree()); + LOG_INFO_PREFIX("open app succeed, pegasus_data_version = {}, last_durable_decree = {}", + _pegasus_data_version, + last_durable_decree()); _is_open = true; @@ -1717,7 +1719,7 @@ dsn::error_code pegasus_server_impl::start(int argc, char **argv) update_usage_scenario(envs); } - dinfo_replica("start the update replica-level rocksdb statistics timer task"); + LOG_DEBUG_PREFIX("start the update replica-level rocksdb statistics timer task"); _update_replica_rdb_stat = dsn::tasking::enqueue_timer(LPC_REPLICATION_LONG_COMMON, &_tracker, @@ -1863,7 +1865,7 @@ ::dsn::error_code pegasus_server_impl::sync_checkpoint() // case 1: last_durable == last_commit // no need to do checkpoint if (last_durable == last_commit) { - ddebug_replica( + LOG_INFO_PREFIX( "no need to do checkpoint because last_durable_decree = last_committed_decree = {}", last_durable); return ::dsn::ERR_OK; @@ -1874,7 +1876,7 @@ ::dsn::error_code pegasus_server_impl::sync_checkpoint() rocksdb::Checkpoint *chkpt_raw = nullptr; auto status = rocksdb::Checkpoint::Create(_db, &chkpt_raw); if (!status.ok()) { - derror_replica("create Checkpoint object failed, error = {}", status.ToString()); + LOG_ERROR_PREFIX("create Checkpoint object failed, error = {}", status.ToString()); return ::dsn::ERR_LOCAL_APP_FAILURE; } std::unique_ptr chkpt(chkpt_raw); @@ -1882,10 +1884,10 @@ ::dsn::error_code pegasus_server_impl::sync_checkpoint() auto dir = chkpt_get_dir_name(last_commit); auto checkpoint_dir = ::dsn::utils::filesystem::path_combine(data_dir(), dir); if (::dsn::utils::filesystem::directory_exists(checkpoint_dir)) { - ddebug_replica("checkpoint directory {} is already existed, remove it first", - checkpoint_dir); + LOG_INFO_PREFIX("checkpoint directory {} is already existed, remove it first", + checkpoint_dir); if (!::dsn::utils::filesystem::remove_path(checkpoint_dir)) { - derror_replica("remove checkpoint directory {} failed", checkpoint_dir); + LOG_ERROR_PREFIX("remove checkpoint directory {} failed", checkpoint_dir); return ::dsn::ERR_FILE_OPERATION_FAILED; } } @@ -1894,15 +1896,15 @@ ::dsn::error_code pegasus_server_impl::sync_checkpoint() status = chkpt->CreateCheckpoint(checkpoint_dir, 0 /* log_size_for_flush */); if (!status.ok()) { // sometimes checkpoint may fail, and try again will succeed - derror_replica("CreateCheckpoint failed, error = {}, try again", status.ToString()); + LOG_ERROR_PREFIX("CreateCheckpoint failed, error = {}, try again", status.ToString()); // TODO(yingchun): fail and return status = chkpt->CreateCheckpoint(checkpoint_dir, 0); } if (!status.ok()) { - derror_replica("CreateCheckpoint failed, error = {}", status.ToString()); + LOG_ERROR_PREFIX("CreateCheckpoint failed, error = {}", status.ToString()); if (!::dsn::utils::filesystem::remove_path(checkpoint_dir)) { - derror_replica("remove checkpoint directory {} failed", checkpoint_dir); + LOG_ERROR_PREFIX("remove checkpoint directory {} failed", checkpoint_dir); } return ::dsn::ERR_LOCAL_APP_FAILURE; } @@ -1919,8 +1921,8 @@ ::dsn::error_code pegasus_server_impl::sync_checkpoint() set_last_durable_decree(_checkpoints.back()); } - ddebug_replica("sync create checkpoint succeed, last_durable_decree = {}", - last_durable_decree()); + LOG_INFO_PREFIX("sync create checkpoint succeed, last_durable_decree = {}", + last_durable_decree()); gc_checkpoints(); @@ -1946,7 +1948,7 @@ ::dsn::error_code pegasus_server_impl::async_checkpoint(bool flush_memtable) if (last_durable == last_commit) { dcheck_eq_replica(last_durable, last_flushed); dcheck_eq_replica(last_flushed, last_commit); - ddebug_replica( + LOG_INFO_PREFIX( "no need to checkpoint because last_durable_decree = last_committed_decree = {}", last_durable); return ::dsn::ERR_OK; @@ -1963,10 +1965,10 @@ ::dsn::error_code pegasus_server_impl::async_checkpoint(bool flush_memtable) // flush required, but not wait if (::dsn::ERR_OK == flush_all_family_columns(false)) { - ddebug_replica("trigger flushing memtable succeed"); + LOG_INFO_PREFIX("trigger flushing memtable succeed"); return ::dsn::ERR_TRY_AGAIN; } else { - derror_replica("trigger flushing memtable failed"); + LOG_ERROR_PREFIX("trigger flushing memtable failed"); return ::dsn::ERR_LOCAL_APP_FAILURE; } } @@ -1978,10 +1980,10 @@ ::dsn::error_code pegasus_server_impl::async_checkpoint(bool flush_memtable) std::string tmp_dir = ::dsn::utils::filesystem::path_combine( data_dir(), std::string("checkpoint.tmp.") + std::to_string(dsn_now_us())); if (::dsn::utils::filesystem::directory_exists(tmp_dir)) { - ddebug_replica("temporary checkpoint directory {} is already existed, remove it first", - tmp_dir); + LOG_INFO_PREFIX("temporary checkpoint directory {} is already existed, remove it first", + tmp_dir); if (!::dsn::utils::filesystem::remove_path(tmp_dir)) { - derror_replica("remove temporary checkpoint directory {} failed", tmp_dir); + LOG_ERROR_PREFIX("remove temporary checkpoint directory {} failed", tmp_dir); return ::dsn::ERR_FILE_OPERATION_FAILED; } } @@ -1989,28 +1991,29 @@ ::dsn::error_code pegasus_server_impl::async_checkpoint(bool flush_memtable) int64_t checkpoint_decree = 0; ::dsn::error_code err = copy_checkpoint_to_dir_unsafe(tmp_dir.c_str(), &checkpoint_decree); if (err != ::dsn::ERR_OK) { - derror_replica("copy_checkpoint_to_dir_unsafe failed with err = {}", err.to_string()); + LOG_ERROR_PREFIX("copy_checkpoint_to_dir_unsafe failed with err = {}", err.to_string()); return ::dsn::ERR_LOCAL_APP_FAILURE; } auto checkpoint_dir = ::dsn::utils::filesystem::path_combine(data_dir(), chkpt_get_dir_name(checkpoint_decree)); if (::dsn::utils::filesystem::directory_exists(checkpoint_dir)) { - ddebug_replica("checkpoint directory {} is already existed, remove it first", - checkpoint_dir); + LOG_INFO_PREFIX("checkpoint directory {} is already existed, remove it first", + checkpoint_dir); if (!::dsn::utils::filesystem::remove_path(checkpoint_dir)) { - derror_replica("remove old checkpoint directory {} failed", checkpoint_dir); + LOG_ERROR_PREFIX("remove old checkpoint directory {} failed", checkpoint_dir); if (!::dsn::utils::filesystem::remove_path(tmp_dir)) { - derror_replica("remove temporary checkpoint directory {} failed", tmp_dir); + LOG_ERROR_PREFIX("remove temporary checkpoint directory {} failed", tmp_dir); } return ::dsn::ERR_FILE_OPERATION_FAILED; } } if (!::dsn::utils::filesystem::rename_path(tmp_dir, checkpoint_dir)) { - derror_replica("rename checkpoint directory from {} to {} failed", tmp_dir, checkpoint_dir); + LOG_ERROR_PREFIX( + "rename checkpoint directory from {} to {} failed", tmp_dir, checkpoint_dir); if (!::dsn::utils::filesystem::remove_path(tmp_dir)) { - derror_replica("remove temporary checkpoint directory {} failed", tmp_dir); + LOG_ERROR_PREFIX("remove temporary checkpoint directory {} failed", tmp_dir); } return ::dsn::ERR_FILE_OPERATION_FAILED; } @@ -2025,8 +2028,8 @@ ::dsn::error_code pegasus_server_impl::async_checkpoint(bool flush_memtable) set_last_durable_decree(_checkpoints.back()); } - ddebug_replica("async create checkpoint succeed, last_durable_decree = {}", - last_durable_decree()); + LOG_INFO_PREFIX("async create checkpoint succeed, last_durable_decree = {}", + last_durable_decree()); gc_checkpoints(); @@ -2054,16 +2057,16 @@ ::dsn::error_code pegasus_server_impl::copy_checkpoint_to_dir_unsafe(const char rocksdb::Checkpoint *chkpt_raw = nullptr; auto status = rocksdb::Checkpoint::Create(_db, &chkpt_raw); if (!status.ok()) { - derror_replica("create Checkpoint object failed, error = {}", status.ToString()); + LOG_ERROR_PREFIX("create Checkpoint object failed, error = {}", status.ToString()); return ::dsn::ERR_LOCAL_APP_FAILURE; } std::unique_ptr chkpt(chkpt_raw); if (::dsn::utils::filesystem::directory_exists(checkpoint_dir)) { - ddebug_replica("checkpoint directory {} is already existed, remove it first", - checkpoint_dir); + LOG_INFO_PREFIX("checkpoint directory {} is already existed, remove it first", + checkpoint_dir); if (!::dsn::utils::filesystem::remove_path(checkpoint_dir)) { - derror_replica("remove checkpoint directory {} failed", checkpoint_dir); + LOG_ERROR_PREFIX("remove checkpoint directory {} failed", checkpoint_dir); return ::dsn::ERR_FILE_OPERATION_FAILED; } } @@ -2072,20 +2075,20 @@ ::dsn::error_code pegasus_server_impl::copy_checkpoint_to_dir_unsafe(const char status = chkpt->CreateCheckpoint(checkpoint_dir, flush_memtable ? 0 : std::numeric_limits::max()); if (!status.ok()) { - derror_replica("CreateCheckpoint failed, error = {}", status.ToString()); + LOG_ERROR_PREFIX("CreateCheckpoint failed, error = {}", status.ToString()); if (!::dsn::utils::filesystem::remove_path(checkpoint_dir)) { - derror_replica("remove checkpoint directory {} failed", checkpoint_dir); + LOG_ERROR_PREFIX("remove checkpoint directory {} failed", checkpoint_dir); } return ::dsn::ERR_LOCAL_APP_FAILURE; } - ddebug_replica("copy checkpoint to dir({}) succeed", checkpoint_dir); + LOG_INFO_PREFIX("copy checkpoint to dir({}) succeed", checkpoint_dir); if (checkpoint_decree != nullptr) { rocksdb::DB *snapshot_db = nullptr; std::vector handles_opened; auto cleanup = [&](bool remove_checkpoint) { if (remove_checkpoint && !::dsn::utils::filesystem::remove_path(checkpoint_dir)) { - derror_replica("remove checkpoint directory {} failed", checkpoint_dir); + LOG_ERROR_PREFIX("remove checkpoint directory {} failed", checkpoint_dir); } if (snapshot_db) { for (auto handle : handles_opened) { @@ -2107,7 +2110,7 @@ ::dsn::error_code pegasus_server_impl::copy_checkpoint_to_dir_unsafe(const char status = rocksdb::DB::OpenForReadOnly( rocksdb::DBOptions(), checkpoint_dir, column_families, &handles_opened, &snapshot_db); if (!status.ok()) { - derror_replica( + LOG_ERROR_PREFIX( "OpenForReadOnly from {} failed, error = {}", checkpoint_dir, status.ToString()); snapshot_db = nullptr; cleanup(true); @@ -2299,7 +2302,7 @@ range_iteration_state pegasus_server_impl::validate_key_value_for_scan( if (_partition_version < 0 || _gpid.get_partition_index() > _partition_version || !check_pegasus_key_hash(key, _gpid.get_partition_index(), _partition_version)) { if (_verbose_log) { - derror_replica("not serve hash key while scan"); + LOG_ERROR_PREFIX("not serve hash key while scan"); } return range_iteration_state::kHashInvalid; } @@ -2417,14 +2420,14 @@ void pegasus_server_impl::update_replica_rocksdb_statistics() } } _pfc_rdb_sst_count->set(val); - dinfo_replica("_pfc_rdb_sst_count: {}", val); + LOG_DEBUG_PREFIX("_pfc_rdb_sst_count: {}", val); // Update _pfc_rdb_sst_size if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kTotalSstFilesSize, &str_val) && dsn::buf2uint64(str_val, val)) { static uint64_t bytes_per_mb = 1U << 20U; _pfc_rdb_sst_size->set(val / bytes_per_mb); - dinfo_replica("_pfc_rdb_sst_size: {} bytes", val); + LOG_DEBUG_PREFIX("_pfc_rdb_sst_size: {} bytes", val); } // Update _pfc_rdb_write_amplification @@ -2435,21 +2438,21 @@ void pegasus_server_impl::update_replica_rocksdb_statistics() ? 1 : std::stod(write_amplification_iter->second); _pfc_rdb_write_amplification->set(write_amplification); - dinfo_replica("_pfc_rdb_write_amplification: {}", write_amplification); + LOG_DEBUG_PREFIX("_pfc_rdb_write_amplification: {}", write_amplification); } // Update _pfc_rdb_index_and_filter_blocks_mem_usage if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kEstimateTableReadersMem, &str_val) && dsn::buf2uint64(str_val, val)) { _pfc_rdb_index_and_filter_blocks_mem_usage->set(val); - dinfo_replica("_pfc_rdb_index_and_filter_blocks_mem_usage: {} bytes", val); + LOG_DEBUG_PREFIX("_pfc_rdb_index_and_filter_blocks_mem_usage: {} bytes", val); } // Update _pfc_rdb_memtable_mem_usage if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kCurSizeAllMemTables, &str_val) && dsn::buf2uint64(str_val, val)) { _pfc_rdb_memtable_mem_usage->set(val); - dinfo_replica("_pfc_rdb_memtable_mem_usage: {} bytes", val); + LOG_DEBUG_PREFIX("_pfc_rdb_memtable_mem_usage: {} bytes", val); } // Update _pfc_rdb_estimate_num_keys @@ -2458,7 +2461,7 @@ void pegasus_server_impl::update_replica_rocksdb_statistics() if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kEstimateNumKeys, &str_val) && dsn::buf2uint64(str_val, val)) { _pfc_rdb_estimate_num_keys->set(val); - dinfo_replica("_pfc_rdb_estimate_num_keys: {}", val); + LOG_DEBUG_PREFIX("_pfc_rdb_estimate_num_keys: {}", val); } // the follow stats is related to `read`, so only primary need update it,ignore @@ -2476,67 +2479,67 @@ void pegasus_server_impl::update_replica_rocksdb_statistics() _statistics->getTickerCount(rocksdb::READ_AMP_TOTAL_READ_BYTES) / estimate_useful_bytes; _pfc_rdb_read_amplification->set(read_amplification); - dinfo_replica("_pfc_rdb_read_amplification: {}", read_amplification); + LOG_DEBUG_PREFIX("_pfc_rdb_read_amplification: {}", read_amplification); } } // Update _pfc_rdb_bf_seek_negatives auto bf_seek_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_USEFUL); _pfc_rdb_bf_seek_negatives->set(bf_seek_negatives); - dinfo_replica("_pfc_rdb_bf_seek_negatives: {}", bf_seek_negatives); + LOG_DEBUG_PREFIX("_pfc_rdb_bf_seek_negatives: {}", bf_seek_negatives); // Update _pfc_rdb_bf_seek_total auto bf_seek_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_CHECKED); _pfc_rdb_bf_seek_total->set(bf_seek_total); - dinfo_replica("_pfc_rdb_bf_seek_total: {}", bf_seek_total); + LOG_DEBUG_PREFIX("_pfc_rdb_bf_seek_total: {}", bf_seek_total); // Update _pfc_rdb_bf_point_positive_true auto bf_point_positive_true = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_TRUE_POSITIVE); _pfc_rdb_bf_point_positive_true->set(bf_point_positive_true); - dinfo_replica("_pfc_rdb_bf_point_positive_true: {}", bf_point_positive_true); + LOG_DEBUG_PREFIX("_pfc_rdb_bf_point_positive_true: {}", bf_point_positive_true); // Update _pfc_rdb_bf_point_positive_total auto bf_point_positive_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_POSITIVE); _pfc_rdb_bf_point_positive_total->set(bf_point_positive_total); - dinfo_replica("_pfc_rdb_bf_point_positive_total: {}", bf_point_positive_total); + LOG_DEBUG_PREFIX("_pfc_rdb_bf_point_positive_total: {}", bf_point_positive_total); // Update _pfc_rdb_bf_point_negatives auto bf_point_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_USEFUL); _pfc_rdb_bf_point_negatives->set(bf_point_negatives); - dinfo_replica("_pfc_rdb_bf_point_negatives: {}", bf_point_negatives); + LOG_DEBUG_PREFIX("_pfc_rdb_bf_point_negatives: {}", bf_point_negatives); // Update _pfc_rdb_block_cache_hit_count and _pfc_rdb_block_cache_total_count auto block_cache_hit = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_HIT); _pfc_rdb_block_cache_hit_count->set(block_cache_hit); - dinfo_replica("_pfc_rdb_block_cache_hit_count: {}", block_cache_hit); + LOG_DEBUG_PREFIX("_pfc_rdb_block_cache_hit_count: {}", block_cache_hit); auto block_cache_miss = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_MISS); auto block_cache_total = block_cache_hit + block_cache_miss; _pfc_rdb_block_cache_total_count->set(block_cache_total); - dinfo_replica("_pfc_rdb_block_cache_total_count: {}", block_cache_total); + LOG_DEBUG_PREFIX("_pfc_rdb_block_cache_total_count: {}", block_cache_total); // update block memtable/l0/l1/l2andup hit rate under block cache up level auto memtable_hit_count = _statistics->getTickerCount(rocksdb::MEMTABLE_HIT); _pfc_rdb_memtable_hit_count->set(memtable_hit_count); - dinfo_replica("_pfc_rdb_memtable_hit_count: {}", memtable_hit_count); + LOG_DEBUG_PREFIX("_pfc_rdb_memtable_hit_count: {}", memtable_hit_count); auto memtable_miss_count = _statistics->getTickerCount(rocksdb::MEMTABLE_MISS); auto memtable_total = memtable_hit_count + memtable_miss_count; _pfc_rdb_memtable_total_count->set(memtable_total); - dinfo_replica("_pfc_rdb_memtable_total_count: {}", memtable_total); + LOG_DEBUG_PREFIX("_pfc_rdb_memtable_total_count: {}", memtable_total); auto l0_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L0); _pfc_rdb_l0_hit_count->set(l0_hit_count); - dinfo_replica("_pfc_rdb_l0_hit_count: {}", l0_hit_count); + LOG_DEBUG_PREFIX("_pfc_rdb_l0_hit_count: {}", l0_hit_count); auto l1_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L1); _pfc_rdb_l1_hit_count->set(l1_hit_count); - dinfo_replica("_pfc_rdb_l1_hit_count: {}", l1_hit_count); + LOG_DEBUG_PREFIX("_pfc_rdb_l1_hit_count: {}", l1_hit_count); auto l2andup_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L2_AND_UP); _pfc_rdb_l2andup_hit_count->set(l2andup_hit_count); - dinfo_replica("_pfc_rdb_l2andup_hit_count: {}", l2andup_hit_count); + LOG_DEBUG_PREFIX("_pfc_rdb_l2andup_hit_count: {}", l2andup_hit_count); } void pegasus_server_impl::update_server_rocksdb_statistics() @@ -2645,15 +2648,15 @@ void pegasus_server_impl::update_usage_scenario(const std::mapsecond, ttl) || ttl < 0) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return; } _server_write->set_default_ttl(static_cast(ttl)); @@ -2684,30 +2687,30 @@ void pegasus_server_impl::update_checkpoint_reserve(const std::mapsecond, count) || count <= 0) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return; } } find = envs.find(ROCKDB_CHECKPOINT_RESERVE_TIME_SECONDS); if (find != envs.end()) { if (!dsn::buf2int32(find->second, time) || time < 0) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return; } } if (count != _checkpoint_reserve_min_count) { - ddebug_replica("update app env[{}] from \"{}\" to \"{}\" succeed", - ROCKDB_CHECKPOINT_RESERVE_MIN_COUNT, - _checkpoint_reserve_min_count, - count); + LOG_INFO_PREFIX("update app env[{}] from \"{}\" to \"{}\" succeed", + ROCKDB_CHECKPOINT_RESERVE_MIN_COUNT, + _checkpoint_reserve_min_count, + count); _checkpoint_reserve_min_count = count; } if (time != _checkpoint_reserve_time_seconds) { - ddebug_replica("update app env[{}] from \"{}\" to \"{}\" succeed", - ROCKDB_CHECKPOINT_RESERVE_TIME_SECONDS, - _checkpoint_reserve_time_seconds, - time); + LOG_INFO_PREFIX("update app env[{}] from \"{}\" to \"{}\" succeed", + ROCKDB_CHECKPOINT_RESERVE_TIME_SECONDS, + _checkpoint_reserve_time_seconds, + time); _checkpoint_reserve_time_seconds = time; } } @@ -2725,10 +2728,10 @@ void pegasus_server_impl::update_throttling_controller( parse_error, throttling_changed, old_throttling)) { - dwarn_replica("parse env failed, key = \"{}\", value = \"{}\", error = \"{}\"", - READ_SIZE_THROTTLING, - find->second, - parse_error); + LOG_WARNING_PREFIX("parse env failed, key = \"{}\", value = \"{}\", error = \"{}\"", + READ_SIZE_THROTTLING, + find->second, + parse_error); // reset if parse failed _read_size_throttling_controller->reset(throttling_changed, old_throttling); } @@ -2737,10 +2740,10 @@ void pegasus_server_impl::update_throttling_controller( _read_size_throttling_controller->reset(throttling_changed, old_throttling); } if (throttling_changed) { - ddebug_replica("switch {} from \"{}\" to \"{}\"", - READ_SIZE_THROTTLING, - old_throttling, - _read_size_throttling_controller->env_value()); + LOG_INFO_PREFIX("switch {} from \"{}\" to \"{}\"", + READ_SIZE_THROTTLING, + old_throttling, + _read_size_throttling_controller->env_value()); } } @@ -2753,7 +2756,7 @@ void pegasus_server_impl::update_slow_query_threshold( // get slow query from env(the unit of slow query from env is ms) uint64_t threshold_ms; if (!dsn::buf2uint64(find->second, threshold_ms) || threshold_ms <= 0) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return; } threshold_ns = threshold_ms * 1e6; @@ -2761,10 +2764,10 @@ void pegasus_server_impl::update_slow_query_threshold( // check if they are changed if (_slow_query_threshold_ns != threshold_ns) { - ddebug_replica("update app env[{}] from \"{}\" to \"{}\" succeed", - ROCKSDB_ENV_SLOW_QUERY_THRESHOLD, - _slow_query_threshold_ns, - threshold_ns); + LOG_INFO_PREFIX("update app env[{}] from \"{}\" to \"{}\" succeed", + ROCKSDB_ENV_SLOW_QUERY_THRESHOLD, + _slow_query_threshold_ns, + threshold_ns); _slow_query_threshold_ns = threshold_ns; } } @@ -2777,16 +2780,16 @@ void pegasus_server_impl::update_rocksdb_iteration_threshold( if (find != envs.end()) { // the unit of iteration threshold from env is ms if (!dsn::buf2uint64(find->second, threshold_ms) || threshold_ms < 0) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return; } } if (_rng_rd_opts.rocksdb_iteration_threshold_time_ms != threshold_ms) { - ddebug_replica("update app env[{}] from \"{}\" to \"{}\" succeed", - ROCKSDB_ITERATION_THRESHOLD_TIME_MS, - _rng_rd_opts.rocksdb_iteration_threshold_time_ms, - threshold_ms); + LOG_INFO_PREFIX("update app env[{}] from \"{}\" to \"{}\" succeed", + ROCKSDB_ITERATION_THRESHOLD_TIME_MS, + _rng_rd_opts.rocksdb_iteration_threshold_time_ms, + threshold_ms); _rng_rd_opts.rocksdb_iteration_threshold_time_ms = threshold_ms; } } @@ -2799,16 +2802,16 @@ void pegasus_server_impl::update_rocksdb_block_cache_enabled( auto find = envs.find(ROCKSDB_BLOCK_CACHE_ENABLED); if (find != envs.end()) { if (!dsn::buf2bool(find->second, cache_enabled)) { - derror_replica("{}={} is invalid.", find->first, find->second); + LOG_ERROR_PREFIX("{}={} is invalid.", find->first, find->second); return; } } if (_data_cf_rd_opts.fill_cache != cache_enabled) { - ddebug_replica("update app env[{}] from \"{}\" to \"{}\" succeed", - ROCKSDB_BLOCK_CACHE_ENABLED, - _data_cf_rd_opts.fill_cache, - cache_enabled); + LOG_INFO_PREFIX("update app env[{}] from \"{}\" to \"{}\" succeed", + ROCKSDB_BLOCK_CACHE_ENABLED, + _data_cf_rd_opts.fill_cache, + cache_enabled); _data_cf_rd_opts.fill_cache = cache_enabled; } } @@ -2820,12 +2823,12 @@ void pegasus_server_impl::update_validate_partition_hash( auto iter = envs.find(SPLIT_VALIDATE_PARTITION_HASH); if (iter != envs.end()) { if (!dsn::buf2bool(iter->second, new_value)) { - derror_replica("{}={} is invalid.", iter->first, iter->second); + LOG_ERROR_PREFIX("{}={} is invalid.", iter->first, iter->second); return; } } if (new_value != _validate_partition_hash) { - ddebug_replica( + LOG_INFO_PREFIX( "update '_validate_partition_hash' from {} to {}", _validate_partition_hash, new_value); _validate_partition_hash = new_value; _key_ttl_compaction_filter_factory->SetValidatePartitionHash(_validate_partition_hash); @@ -2837,13 +2840,13 @@ void pegasus_server_impl::update_user_specified_compaction( { auto iter = envs.find(USER_SPECIFIED_COMPACTION); if (dsn_unlikely(iter == envs.end() && _user_specified_compaction != "")) { - ddebug_replica("clear user specified compaction coz it was deleted"); + LOG_INFO_PREFIX("clear user specified compaction coz it was deleted"); _key_ttl_compaction_filter_factory->clear_user_specified_ops(); _user_specified_compaction = ""; return; } if (dsn_unlikely(iter != envs.end() && iter->second != _user_specified_compaction)) { - ddebug_replica("update user specified compaction coz it was changed"); + LOG_INFO_PREFIX("update user specified compaction coz it was changed"); _key_ttl_compaction_filter_factory->extract_user_specified_ops(iter->second); _user_specified_compaction = iter->second; return; @@ -2858,11 +2861,11 @@ bool pegasus_server_impl::parse_allow_ingest_behind(const std::mapsecond, allow_ingest_behind)) { - dwarn_replica( + LOG_WARNING_PREFIX( "{}={} is invalid, set allow_ingest_behind = false", iter->first, iter->second); return false; } - ddebug_replica("update allow_ingest_behind = {}", allow_ingest_behind); + LOG_INFO_PREFIX("update allow_ingest_behind = {}", allow_ingest_behind); return allow_ingest_behind; } @@ -2921,7 +2924,7 @@ bool pegasus_server_impl::compression_str_to_type(const std::string &compression } else if (compression_str == "zstd") { type = rocksdb::kZSTD; } else { - derror_replica("Unsupported compression type: {}.", compression_str); + LOG_ERROR_PREFIX("Unsupported compression type: {}.", compression_str); return false; } return true; @@ -2939,7 +2942,7 @@ std::string pegasus_server_impl::compression_type_to_str(rocksdb::CompressionTyp case rocksdb::kZSTD: return "zstd"; default: - derror_replica("Unsupported compression type: {}.", type); + LOG_ERROR_PREFIX("Unsupported compression type: {}.", type); return ""; } } @@ -3005,11 +3008,11 @@ bool pegasus_server_impl::set_usage_scenario(const std::string &usage_scenario) if (set_options(new_options)) { _meta_store->set_usage_scenario(usage_scenario); _usage_scenario = usage_scenario; - ddebug_replica( + LOG_INFO_PREFIX( "set usage scenario from \"{}\" to \"{}\" succeed", old_usage_scenario, usage_scenario); return true; } else { - derror_replica( + LOG_ERROR_PREFIX( "set usage scenario from \"{}\" to \"{}\" failed", old_usage_scenario, usage_scenario); return false; } @@ -3113,7 +3116,7 @@ void pegasus_server_impl::recalculate_data_cf_options( } if (new_options.size() > 0) { if (set_options(new_options)) { - ddebug_replica( + LOG_INFO_PREFIX( "{}: recalculate the value of the options related to usage scenario \"{}\"", replica_name(), _usage_scenario); @@ -3129,7 +3132,7 @@ bool pegasus_server_impl::set_options( const std::unordered_map &new_options) { if (!_is_open) { - dwarn_replica("set_options failed, db is not open"); + LOG_WARNING_PREFIX("set_options failed, db is not open"); return false; } @@ -3166,7 +3169,7 @@ ::dsn::error_code pegasus_server_impl::check_column_families(const std::string & std::vector column_families; auto s = rocksdb::DB::ListColumnFamilies(rocksdb::DBOptions(), path, &column_families); if (!s.ok()) { - derror_replica("rocksdb::DB::ListColumnFamilies failed, error = {}", s.ToString()); + LOG_ERROR_PREFIX("rocksdb::DB::ListColumnFamilies failed, error = {}", s.ToString()); return ::dsn::ERR_LOCAL_APP_FAILURE; } @@ -3176,7 +3179,7 @@ ::dsn::error_code pegasus_server_impl::check_column_families(const std::string & } else if (column_family == DATA_COLUMN_FAMILY_NAME) { *missing_data_cf = false; } else { - derror_replica("unknown column family name: {}", column_family); + LOG_ERROR_PREFIX("unknown column family name: {}", column_family); return ::dsn::ERR_LOCAL_APP_FAILURE; } } @@ -3188,20 +3191,21 @@ uint64_t pegasus_server_impl::do_manual_compact(const rocksdb::CompactRangeOptio // wait flush before compact to make all data compacted. uint64_t start_time = dsn_now_ms(); flush_all_family_columns(true); - ddebug_replica("finish flush_all_family_columns, time_used = {} ms", dsn_now_ms() - start_time); + LOG_INFO_PREFIX("finish flush_all_family_columns, time_used = {} ms", + dsn_now_ms() - start_time); // do compact - ddebug_replica("start CompactRange, target_level = {}, bottommost_level_compaction = {}", - options.target_level, - options.bottommost_level_compaction == rocksdb::BottommostLevelCompaction::kForce - ? "force" - : "skip"); + LOG_INFO_PREFIX( + "start CompactRange, target_level = {}, bottommost_level_compaction = {}", + options.target_level, + options.bottommost_level_compaction == rocksdb::BottommostLevelCompaction::kForce ? "force" + : "skip"); start_time = dsn_now_ms(); auto status = _db->CompactRange(options, _data_cf, nullptr, nullptr); auto end_time = dsn_now_ms(); - ddebug_replica("finish CompactRange, status = {}, time_used = {}ms", - status.ToString(), - end_time - start_time); + LOG_INFO_PREFIX("finish CompactRange, status = {}, time_used = {}ms", + status.ToString(), + end_time - start_time); _meta_store->set_last_manual_compact_finish_time(end_time); // generate new checkpoint and remove old checkpoints, in order to release storage asap if (!release_storage_after_manual_compact()) { @@ -3209,11 +3213,11 @@ uint64_t pegasus_server_impl::do_manual_compact(const rocksdb::CompactRangeOptio // written into rocksdb when doing manual compact. // we will try to generate it again, and it will probably succeed because at least some // empty data is written into rocksdb by periodic group check. - ddebug_replica("release storage failed after manual compact, will retry after 5 minutes"); + LOG_INFO_PREFIX("release storage failed after manual compact, will retry after 5 minutes"); ::dsn::tasking::enqueue(LPC_PEGASUS_SERVER_DELAY, &_tracker, [this]() { - ddebug_replica("retry release storage after manual compact"); + LOG_INFO_PREFIX("retry release storage after manual compact"); release_storage_after_manual_compact(); }, 0, @@ -3233,30 +3237,31 @@ bool pegasus_server_impl::release_storage_after_manual_compact() // wait flush before async checkpoint to make all data compacted uint64_t start_time = dsn_now_ms(); flush_all_family_columns(true); - ddebug_replica("finish flush_all_family_columns, time_used = {} ms", dsn_now_ms() - start_time); + LOG_INFO_PREFIX("finish flush_all_family_columns, time_used = {} ms", + dsn_now_ms() - start_time); // async checkpoint - ddebug_replica("start async_checkpoint"); + LOG_INFO_PREFIX("start async_checkpoint"); start_time = dsn_now_ms(); ::dsn::error_code err = async_checkpoint(false); - ddebug_replica("finish async_checkpoint, return = {}, time_used = {}ms", - err.to_string(), - dsn_now_ms() - start_time); + LOG_INFO_PREFIX("finish async_checkpoint, return = {}, time_used = {}ms", + err.to_string(), + dsn_now_ms() - start_time); // gc checkpoints - ddebug_replica("start gc_checkpoints"); + LOG_INFO_PREFIX("start gc_checkpoints"); start_time = dsn_now_ms(); gc_checkpoints(true); - ddebug_replica("finish gc_checkpoints, time_used = {}ms", dsn_now_ms() - start_time); + LOG_INFO_PREFIX("finish gc_checkpoints, time_used = {}ms", dsn_now_ms() - start_time); int64_t new_last_durable = last_flushed_decree(); if (new_last_durable > old_last_durable) { - ddebug_replica("release storage succeed, last_durable_decree changed from {} to {}", - old_last_durable, - new_last_durable); + LOG_INFO_PREFIX("release storage succeed, last_durable_decree changed from {} to {}", + old_last_durable, + new_last_durable); return true; } else { - ddebug_replica("release storage failed, last_durable_decree remains {}", new_last_durable); + LOG_INFO_PREFIX("release storage failed, last_durable_decree remains {}", new_last_durable); return false; } } @@ -3269,7 +3274,7 @@ std::string pegasus_server_impl::query_compact_state() const void pegasus_server_impl::set_partition_version(int32_t partition_version) { int32_t old_partition_version = _partition_version.exchange(partition_version); - ddebug_replica( + LOG_INFO_PREFIX( "update partition version from {} to {}", old_partition_version, partition_version); _key_ttl_compaction_filter_factory->SetPartitionVersion(partition_version); } @@ -3280,7 +3285,7 @@ ::dsn::error_code pegasus_server_impl::flush_all_family_columns(bool wait) options.wait = wait; rocksdb::Status status = _db->Flush(options, {_meta_cf, _data_cf}); if (!status.ok()) { - derror_replica("flush failed, error = {}", status.ToString()); + LOG_ERROR_PREFIX("flush failed, error = {}", status.ToString()); return ::dsn::ERR_LOCAL_APP_FAILURE; } return ::dsn::ERR_OK; @@ -3339,9 +3344,9 @@ std::string pegasus_server_impl::dump_write_request(dsn::message_ex *request) void pegasus_server_impl::set_ingestion_status(dsn::replication::ingestion_status::type status) { - ddebug_replica("ingestion status from {} to {}", - dsn::enum_to_string(_ingestion_status), - dsn::enum_to_string(status)); + LOG_INFO_PREFIX("ingestion status from {} to {}", + dsn::enum_to_string(_ingestion_status), + dsn::enum_to_string(status)); _ingestion_status = status; } diff --git a/src/server/pegasus_server_impl_init.cpp b/src/server/pegasus_server_impl_init.cpp index d5171afbc1..3f0784055f 100644 --- a/src/server/pegasus_server_impl_init.cpp +++ b/src/server/pegasus_server_impl_init.cpp @@ -403,7 +403,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) false, "enable write buffer manager to limit total memory " "used by memtables and block caches across multiple replicas"); - ddebug_replica("rocksdb_enable_write_buffer_manager = {}", enable_write_buffer_manager); + LOG_INFO_PREFIX("rocksdb_enable_write_buffer_manager = {}", enable_write_buffer_manager); if (enable_write_buffer_manager) { // If write buffer manager is enabled, all replicas(one DB instance for each // replica) on this server will share the same write buffer manager object, @@ -420,8 +420,8 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) "rocksdb_total_size_across_write_buffer", 0, "total size limit used by memtables across multiple replicas"); - ddebug_replica("rocksdb_total_size_across_write_buffer = {}", - total_size_across_write_buffer); + LOG_INFO_PREFIX("rocksdb_total_size_across_write_buffer = {}", + total_size_across_write_buffer); _s_write_buffer_manager = std::make_shared( static_cast(total_size_across_write_buffer), tbl_opts.block_cache); }); @@ -434,16 +434,16 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) -1, /* always keep files opened, default by rocksdb */ "number of opened files that can be used by a replica(namely a DB instance)"); _db_opts.max_open_files = static_cast(max_open_files); - ddebug_replica("rocksdb_max_open_files = {}", _db_opts.max_open_files); + LOG_INFO_PREFIX("rocksdb_max_open_files = {}", _db_opts.max_open_files); _db_opts.max_log_file_size = static_cast(FLAGS_rocksdb_max_log_file_size); - ddebug_replica("rocksdb_max_log_file_size = {}", _db_opts.max_log_file_size); + LOG_INFO_PREFIX("rocksdb_max_log_file_size = {}", _db_opts.max_log_file_size); _db_opts.log_file_time_to_roll = static_cast(FLAGS_rocksdb_log_file_time_to_roll); - ddebug_replica("rocksdb_log_file_time_to_roll = {}", _db_opts.log_file_time_to_roll); + LOG_INFO_PREFIX("rocksdb_log_file_time_to_roll = {}", _db_opts.log_file_time_to_roll); _db_opts.keep_log_file_num = static_cast(FLAGS_rocksdb_keep_log_file_num); - ddebug_replica("rocksdb_keep_log_file_num = {}", _db_opts.keep_log_file_num); + LOG_INFO_PREFIX("rocksdb_keep_log_file_num = {}", _db_opts.keep_log_file_num); std::string index_type = dsn_config_get_value_string("pegasus.server", @@ -455,7 +455,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) "[pegasus.server]rocksdb_index_type should be one among binary_search, " "hash_search, two_level_index_search or binary_search_with_first_key."); tbl_opts.index_type = index_type_item->second; - ddebug_replica("rocksdb_index_type = {}", index_type.c_str()); + LOG_INFO_PREFIX("rocksdb_index_type = {}", index_type.c_str()); tbl_opts.partition_filters = dsn_config_get_value_bool( "pegasus.server", @@ -464,7 +464,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) "Note: currently this option requires two_level_index_search to be set as well. " "Use partitioned full filters for each SST file. This option is " "incompatibile with block-based filters."); - ddebug_replica("rocksdb_partition_filters = {}", tbl_opts.partition_filters); + LOG_INFO_PREFIX("rocksdb_partition_filters = {}", tbl_opts.partition_filters); tbl_opts.metadata_block_size = dsn_config_get_value_uint64( "pegasus.server", @@ -477,7 +477,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) "block size reaches the specified limit. " "Note: this limit is currently applied to only index blocks; a filter " "partition is cut right after an index block is cut"); - ddebug_replica("rocksdb_metadata_block_size = {}", tbl_opts.metadata_block_size); + LOG_INFO_PREFIX("rocksdb_metadata_block_size = {}", tbl_opts.metadata_block_size); tbl_opts.cache_index_and_filter_blocks = dsn_config_get_value_bool( "pegasus.server", @@ -486,8 +486,8 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) "Indicating if we'd put index/filter blocks to the block cache. " "If not specified, each \"table reader\" object will pre-load index/filter " "block during table initialization."); - ddebug_replica("rocksdb_cache_index_and_filter_blocks = {}", - tbl_opts.cache_index_and_filter_blocks); + LOG_INFO_PREFIX("rocksdb_cache_index_and_filter_blocks = {}", + tbl_opts.cache_index_and_filter_blocks); tbl_opts.pin_top_level_index_and_filter = dsn_config_get_value_bool( "pegasus.server", @@ -498,8 +498,8 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) "the cache, but a reference is held in the \"table reader\" object so the " "blocks are pinned and only evicted from cache when the table reader is " "freed. This is not limited to l0 in LSM tree."); - ddebug_replica("rocksdb_pin_top_level_index_and_filter = {}", - tbl_opts.pin_top_level_index_and_filter); + LOG_INFO_PREFIX("rocksdb_pin_top_level_index_and_filter = {}", + tbl_opts.pin_top_level_index_and_filter); tbl_opts.cache_index_and_filter_blocks_with_high_priority = dsn_config_get_value_bool( "pegasus.server", @@ -509,8 +509,8 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) "blocks with high priority. If set to true, depending on implementation of " "block cache, index and filter blocks may be less likely to be evicted " "than data blocks."); - ddebug_replica("rocksdb_cache_index_and_filter_blocks_with_high_priority = {}", - tbl_opts.cache_index_and_filter_blocks_with_high_priority); + LOG_INFO_PREFIX("rocksdb_cache_index_and_filter_blocks_with_high_priority = {}", + tbl_opts.cache_index_and_filter_blocks_with_high_priority); tbl_opts.pin_l0_filter_and_index_blocks_in_cache = dsn_config_get_value_bool( "pegasus.server", @@ -520,8 +520,8 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) "filter and index blocks are stored in the cache, but a reference is " "held in the \"table reader\" object so the blocks are pinned and only " "evicted from cache when the table reader is freed."); - ddebug_replica("rocksdb_pin_l0_filter_and_index_blocks_in_cache = {}", - tbl_opts.pin_l0_filter_and_index_blocks_in_cache); + LOG_INFO_PREFIX("rocksdb_pin_l0_filter_and_index_blocks_in_cache = {}", + tbl_opts.pin_l0_filter_and_index_blocks_in_cache); // Bloom filter configurations. bool disable_bloom_filter = dsn_config_get_value_bool( diff --git a/src/server/pegasus_server_write.cpp b/src/server/pegasus_server_write.cpp index e156c3dcf0..f59b25593b 100644 --- a/src/server/pegasus_server_write.cpp +++ b/src/server/pegasus_server_write.cpp @@ -66,9 +66,9 @@ int pegasus_server_write::on_batched_write_requests(dsn::message_ex **requests, } } catch (TTransportException &ex) { _pfc_recent_corrupt_write_count->increment(); - derror_replica("pegasus not batch write handler failed, from = {}, exception = {}", - requests[0]->header->from_address.to_string(), - ex.what()); + LOG_ERROR_PREFIX("pegasus not batch write handler failed, from = {}, exception = {}", + requests[0]->header->from_address.to_string(), + ex.what()); return 0; } @@ -110,9 +110,9 @@ int pegasus_server_write::on_batched_writes(dsn::message_ex **requests, int coun } } catch (TTransportException &ex) { _pfc_recent_corrupt_write_count->increment(); - derror_replica("pegasus batch writes handler failed, from = {}, exception = {}", - requests[i]->header->from_address.to_string(), - ex.what()); + LOG_ERROR_PREFIX("pegasus batch writes handler failed, from = {}, exception = {}", + requests[i]->header->from_address.to_string(), + ex.what()); } if (!err && local_err) { @@ -150,12 +150,12 @@ void pegasus_server_write::request_key_check(int64_t decree, ::dsn::blob hash_key, sort_key; pegasus_restore_key(key, hash_key, sort_key); - ddebug_rocksdb("Write", - "decree: {}, code: {}, hash_key: {}, sort_key: {}", - decree, - msg->local_rpc_code.to_string(), - utils::c_escape_string(hash_key), - utils::c_escape_string(sort_key)); + LOG_INFO_ROCKSDB("Write", + "decree: {}, code: {}, hash_key: {}, sort_key: {}", + decree, + msg->local_rpc_code.to_string(), + utils::c_escape_string(hash_key), + utils::c_escape_string(sort_key)); } } diff --git a/src/server/pegasus_write_service_impl.h b/src/server/pegasus_write_service_impl.h index 4c668bac70..6d28f39508 100644 --- a/src/server/pegasus_write_service_impl.h +++ b/src/server/pegasus_write_service_impl.h @@ -118,9 +118,9 @@ class pegasus_write_service::impl : public dsn::replication::replica_base resp.server = _primary_address; if (update.kvs.empty()) { - derror_replica("invalid argument for multi_put: decree = {}, error = {}", - decree, - "request.kvs is empty"); + LOG_ERROR_PREFIX("invalid argument for multi_put: decree = {}, error = {}", + decree, + "request.kvs is empty"); resp.error = rocksdb::Status::kInvalidArgument; // we should write empty record to update rocksdb's last flushed decree return empty_put(decree); @@ -152,9 +152,9 @@ class pegasus_write_service::impl : public dsn::replication::replica_base resp.server = _primary_address; if (update.sort_keys.empty()) { - derror_replica("invalid argument for multi_remove: decree = {}, error = {}", - decree, - "request.sort_keys is empty"); + LOG_ERROR_PREFIX("invalid argument for multi_remove: decree = {}, error = {}", + decree, + "request.sort_keys is empty"); resp.error = rocksdb::Status::kInvalidArgument; // we should write empty record to update rocksdb's last flushed decree return empty_put(decree); @@ -211,10 +211,10 @@ class pegasus_write_service::impl : public dsn::replication::replica_base int64_t old_value_int; if (!dsn::buf2int64(old_value, old_value_int)) { // invalid old value - derror_replica("incr failed: decree = {}, error = " - "old value \"{}\" is not an integer or out of range", - decree, - utils::c_escape_string(old_value)); + LOG_ERROR_PREFIX("incr failed: decree = {}, error = " + "old value \"{}\" is not an integer or out of range", + decree, + utils::c_escape_string(old_value)); resp.error = rocksdb::Status::kInvalidArgument; // we should write empty record to update rocksdb's last flushed decree return empty_put(decree); @@ -223,11 +223,11 @@ class pegasus_write_service::impl : public dsn::replication::replica_base if ((update.increment > 0 && new_value < old_value_int) || (update.increment < 0 && new_value > old_value_int)) { // new value is out of range, return old value by 'new_value' - derror_replica("incr failed: decree = {}, error = " - "new value is out of range, old_value = {}, increment = {}", - decree, - old_value_int, - update.increment); + LOG_ERROR_PREFIX("incr failed: decree = {}, error = " + "new value is out of range, old_value = {}, increment = {}", + decree, + old_value_int, + update.increment); resp.error = rocksdb::Status::kInvalidArgument; resp.new_value = old_value_int; // we should write empty record to update rocksdb's last flushed decree @@ -268,10 +268,10 @@ class pegasus_write_service::impl : public dsn::replication::replica_base resp.server = _primary_address; if (!is_check_type_supported(update.check_type)) { - derror_replica("invalid argument for check_and_set: decree = {}, error = {}", - decree, - "check type {} not supported", - update.check_type); + LOG_ERROR_PREFIX("invalid argument for check_and_set: decree = {}, error = {}", + decree, + "check type {} not supported", + update.check_type); resp.error = rocksdb::Status::kInvalidArgument; // we should write empty record to update rocksdb's last flushed decree return empty_put(decree); @@ -285,11 +285,11 @@ class pegasus_write_service::impl : public dsn::replication::replica_base int err = _rocksdb_wrapper->get(check_raw_key, &get_context); if (err != 0) { // read check value failed - derror_rocksdb("Error to GetCheckValue for CheckAndSet decree: {}, hash_key: {}, " - "check_sort_key: {}", - decree, - utils::c_escape_string(update.hash_key), - utils::c_escape_string(update.check_sort_key)); + LOG_ERROR_ROCKSDB("Error to GetCheckValue for CheckAndSet decree: {}, hash_key: {}, " + "check_sort_key: {}", + decree, + utils::c_escape_string(update.hash_key), + utils::c_escape_string(update.check_sort_key)); resp.error = err; return resp.error; } @@ -365,9 +365,9 @@ class pegasus_write_service::impl : public dsn::replication::replica_base resp.server = _primary_address; if (update.mutate_list.empty()) { - derror_replica("invalid argument for check_and_mutate: decree = {}, error = {}", - decree, - "mutate list is empty"); + LOG_ERROR_PREFIX("invalid argument for check_and_mutate: decree = {}, error = {}", + decree, + "mutate list is empty"); resp.error = rocksdb::Status::kInvalidArgument; // we should write empty record to update rocksdb's last flushed decree return empty_put(decree); @@ -377,11 +377,11 @@ class pegasus_write_service::impl : public dsn::replication::replica_base auto &mu = update.mutate_list[i]; if (mu.operation != ::dsn::apps::mutate_operation::MO_PUT && mu.operation != ::dsn::apps::mutate_operation::MO_DELETE) { - derror_replica("invalid argument for check_and_mutate: decree = {}, error = " - "mutation[{}] uses invalid operation {}", - decree, - i, - mu.operation); + LOG_ERROR_PREFIX("invalid argument for check_and_mutate: decree = {}, error = " + "mutation[{}] uses invalid operation {}", + decree, + i, + mu.operation); resp.error = rocksdb::Status::kInvalidArgument; // we should write empty record to update rocksdb's last flushed decree return empty_put(decree); @@ -389,10 +389,10 @@ class pegasus_write_service::impl : public dsn::replication::replica_base } if (!is_check_type_supported(update.check_type)) { - derror_replica("invalid argument for check_and_mutate: decree = {}, error = {}", - decree, - "check type {} not supported", - update.check_type); + LOG_ERROR_PREFIX("invalid argument for check_and_mutate: decree = {}, error = {}", + decree, + "check type {} not supported", + update.check_type); resp.error = rocksdb::Status::kInvalidArgument; // we should write empty record to update rocksdb's last flushed decree return empty_put(decree); @@ -406,11 +406,11 @@ class pegasus_write_service::impl : public dsn::replication::replica_base int err = _rocksdb_wrapper->get(check_raw_key, &get_context); if (err != 0) { // read check value failed - derror_rocksdb("Error to GetCheckValue for CheckAndMutate decree: {}, hash_key: {}, " - "check_sort_key: {}", - decree, - utils::c_escape_string(update.hash_key), - utils::c_escape_string(update.check_sort_key)); + LOG_ERROR_ROCKSDB("Error to GetCheckValue for CheckAndMutate decree: {}, hash_key: {}, " + "check_sort_key: {}", + decree, + utils::c_escape_string(update.hash_key), + utils::c_escape_string(update.check_sort_key)); resp.error = err; return resp.error; } @@ -493,10 +493,10 @@ class pegasus_write_service::impl : public dsn::replication::replica_base // if ballot updated, ignore this request if (req_ballot < current_ballot) { - dwarn_replica("out-dated ingestion request, ballot changed, request({}) vs " - "current({}), ignore it", - req_ballot, - current_ballot); + LOG_WARNING_PREFIX("out-dated ingestion request, ballot changed, request({}) vs " + "current({}), ignore it", + req_ballot, + current_ballot); return dsn::ERR_INVALID_VERSION; } @@ -647,20 +647,20 @@ class pegasus_write_service::impl : public dsn::replication::replica_base int64_t check_value_int; if (!dsn::buf2int64(value, check_value_int)) { // invalid check value - derror_replica("check failed: decree = {}, error = " - "check value \"{}\" is not an integer or out of range", - decree, - utils::c_escape_string(value)); + LOG_ERROR_PREFIX("check failed: decree = {}, error = " + "check value \"{}\" is not an integer or out of range", + decree, + utils::c_escape_string(value)); invalid_argument = true; return false; } int64_t check_operand_int; if (!dsn::buf2int64(check_operand, check_operand_int)) { // invalid check operand - derror_replica("check failed: decree = {}, error = " - "check operand \"{}\" is not an integer or out of range", - decree, - utils::c_escape_string(check_operand)); + LOG_ERROR_PREFIX("check failed: decree = {}, error = " + "check operand \"{}\" is not an integer or out of range", + decree, + utils::c_escape_string(check_operand)); invalid_argument = true; return false; } diff --git a/src/server/rocksdb_wrapper.cpp b/src/server/rocksdb_wrapper.cpp index 33a18cf9f7..981eb6d7a1 100644 --- a/src/server/rocksdb_wrapper.cpp +++ b/src/server/rocksdb_wrapper.cpp @@ -66,11 +66,11 @@ int rocksdb_wrapper::get(dsn::string_view raw_key, /*out*/ db_get_context *ctx) dsn::blob hash_key, sort_key; pegasus_restore_key(dsn::blob(raw_key.data(), 0, raw_key.size()), hash_key, sort_key); - derror_rocksdb("Get", - s.ToString(), - "hash_key: {}, sort_key: {}", - utils::c_escape_string(hash_key), - utils::c_escape_string(sort_key)); + LOG_ERROR_ROCKSDB("Get", + s.ToString(), + "hash_key: {}, sort_key: {}", + utils::c_escape_string(hash_key), + utils::c_escape_string(sort_key)); return s.code(); } @@ -125,13 +125,13 @@ int rocksdb_wrapper::write_batch_put_ctx(const db_write_context &ctx, if (dsn_unlikely(!s.ok())) { ::dsn::blob hash_key, sort_key; pegasus_restore_key(::dsn::blob(raw_key.data(), 0, raw_key.size()), hash_key, sort_key); - derror_rocksdb("WriteBatchPut", - s.ToString(), - "decree: {}, hash_key: {}, sort_key: {}, expire_ts: {}", - ctx.decree, - utils::c_escape_string(hash_key), - utils::c_escape_string(sort_key), - expire_sec); + LOG_ERROR_ROCKSDB("WriteBatchPut", + s.ToString(), + "decree: {}, hash_key: {}, sort_key: {}, expire_ts: {}", + ctx.decree, + utils::c_escape_string(hash_key), + utils::c_escape_string(sort_key), + expire_sec); } return s.code(); } @@ -145,16 +145,16 @@ int rocksdb_wrapper::write(int64_t decree) rocksdb::Status status = _write_batch->Put(_meta_cf, meta_store::LAST_FLUSHED_DECREE, std::to_string(decree)); if (dsn_unlikely(!status.ok())) { - derror_rocksdb("Write", - status.ToString(), - "put decree of meta cf into batch error, decree: {}", - decree); + LOG_ERROR_ROCKSDB("Write", + status.ToString(), + "put decree of meta cf into batch error, decree: {}", + decree); return status.code(); } status = _db->Write(*_wt_opts, _write_batch.get()); if (dsn_unlikely(!status.ok())) { - derror_rocksdb("Write", status.ToString(), "write rocksdb error, decree: {}", decree); + LOG_ERROR_ROCKSDB("Write", status.ToString(), "write rocksdb error, decree: {}", decree); } return status.code(); } @@ -168,12 +168,12 @@ int rocksdb_wrapper::write_batch_delete(int64_t decree, dsn::string_view raw_key if (dsn_unlikely(!s.ok())) { dsn::blob hash_key, sort_key; pegasus_restore_key(dsn::blob(raw_key.data(), 0, raw_key.size()), hash_key, sort_key); - derror_rocksdb("write_batch_delete", - s.ToString(), - "decree: {}, hash_key: {}, sort_key: {}", - decree, - utils::c_escape_string(hash_key), - utils::c_escape_string(sort_key)); + LOG_ERROR_ROCKSDB("write_batch_delete", + s.ToString(), + "decree: {}, hash_key: {}, sort_key: {}", + decree, + utils::c_escape_string(hash_key), + utils::c_escape_string(sort_key)); } return s.code(); } @@ -189,16 +189,16 @@ int rocksdb_wrapper::ingest_files(int64_t decree, ifo.ingest_behind = ingest_behind; rocksdb::Status s = _db->IngestExternalFile(sst_file_list, ifo); if (dsn_unlikely(!s.ok())) { - derror_rocksdb("IngestExternalFile", - s.ToString(), - "decree = {}, ingest_behind = {}", - decree, - ingest_behind); + LOG_ERROR_ROCKSDB("IngestExternalFile", + s.ToString(), + "decree = {}, ingest_behind = {}", + decree, + ingest_behind); } else { - ddebug_rocksdb("IngestExternalFile", - "Ingest files succeed, decree = {}, ingest_behind = {}", - decree, - ingest_behind); + LOG_INFO_ROCKSDB("IngestExternalFile", + "Ingest files succeed, decree = {}, ingest_behind = {}", + decree, + ingest_behind); } return s.code(); } @@ -207,7 +207,7 @@ void rocksdb_wrapper::set_default_ttl(uint32_t ttl) { if (_default_ttl != ttl) { _default_ttl = ttl; - ddebug_replica("update _default_ttl to {}", ttl); + LOG_INFO_PREFIX("update _default_ttl to {}", ttl); } } diff --git a/src/utils/fmt_logging.h b/src/utils/fmt_logging.h index a9d6d26f1c..e5f0a1a290 100644 --- a/src/utils/fmt_logging.h +++ b/src/utils/fmt_logging.h @@ -46,13 +46,13 @@ } \ } while (false) -// Macros for writing log message prefixed by gpid and address. -#define dinfo_replica(...) LOG_DEBUG_F("[{}] {}", replica_name(), fmt::format(__VA_ARGS__)) -#define ddebug_replica(...) LOG_INFO_F("[{}] {}", replica_name(), fmt::format(__VA_ARGS__)) -#define dwarn_replica(...) LOG_WARNING_F("[{}] {}", replica_name(), fmt::format(__VA_ARGS__)) -#define derror_replica(...) LOG_ERROR_F("[{}] {}", replica_name(), fmt::format(__VA_ARGS__)) -#define dfatal_replica(...) LOG_FATAL_F("[{}] {}", replica_name(), fmt::format(__VA_ARGS__)) -#define dassert_replica(x, ...) dassert_f(x, "[{}] {}", replica_name(), fmt::format(__VA_ARGS__)) +// Macros for writing log message prefixed by log_prefix(). +#define LOG_DEBUG_PREFIX(...) LOG_DEBUG_F("[{}] {}", log_prefix(), fmt::format(__VA_ARGS__)) +#define LOG_INFO_PREFIX(...) LOG_INFO_F("[{}] {}", log_prefix(), fmt::format(__VA_ARGS__)) +#define LOG_WARNING_PREFIX(...) LOG_WARNING_F("[{}] {}", log_prefix(), fmt::format(__VA_ARGS__)) +#define LOG_ERROR_PREFIX(...) LOG_ERROR_F("[{}] {}", log_prefix(), fmt::format(__VA_ARGS__)) +#define LOG_FATAL_PREFIX(...) LOG_FATAL_F("[{}] {}", log_prefix(), fmt::format(__VA_ARGS__)) +#define dassert_replica(x, ...) dassert_f(x, "[{}] {}", log_prefix(), fmt::format(__VA_ARGS__)) // Macros to check expected condition. It will abort the application // and log a fatal message when the condition is not met.