From 4b0ea26506b40fdabf6c1753d7446ef032d3c658 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Mon, 31 Jul 2023 16:37:33 +0800 Subject: [PATCH] fix(slog): add an option to exit the process when find slog error (#1574) https://github.com/apache/incubator-pegasus/issues/1572 Add an option to make it possible to exit the process and leave the corrupted slog and replicas to be handled by the administrator when open slog failed. --- src/replica/replica_stub.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp index f828585562..e9bbab7a4b 100644 --- a/src/replica/replica_stub.cpp +++ b/src/replica/replica_stub.cpp @@ -132,6 +132,14 @@ DSN_DEFINE_bool(replication, verbose_commit_log_on_start, false, "whether to print verbose log when commit mutation when starting the server"); +DSN_DEFINE_bool( + replication, + crash_on_slog_error, + false, + "whether to exit the process while fail to open slog. If true, the process will exit and leave " + "the corrupted slog and replicas to be handled by the administrator. If false, the process " + "will continue, and remove the slog and move all the replicas to corresponding error " + "directories"); DSN_DEFINE_uint32(replication, max_concurrent_manual_emergency_checkpointing_count, 10, @@ -708,6 +716,9 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f if (err == ERR_OK) { LOG_INFO("replay shared log succeed, time_used = {} ms", finish_time - start_time); } else { + if (FLAGS_crash_on_slog_error) { + LOG_FATAL("replay shared log failed, err = {}, please check the error details", err); + } LOG_ERROR("replay shared log failed, err = {}, time_used = {} ms, clear all logs ...", err, finish_time - start_time);