Skip to content

Commit

Permalink
[BACKPORT 2.20][#23243] docdb: Fix tablet bootstrap stuck when replay…
Browse files Browse the repository at this point in the history
…ing truncate operation

Summary:
Original commit: c8cbcbf / D37152
**Issue:**
Tablet bootstrap can run into a deadlock if it needs to replay a bootstrap operation. Here are the sequence of events leading to the deadlock

| Thread 1 (Main bootstrap thread) | Thread 2 (Load Transactions) |
| 1. Begin tablet bootstrap | |
| 2. During OpenTablet, if transaction is enabled(ysql table or ycql table with transaction enabled), acquire the the `start_latch` by setting its value to 1, and another thread(Thread 2) is created for transaction load| |
|  | 3. Execute transaction load, acquires `pending_op_counter_blocking_rocksdb_shutdown_start_` to prevent rocksdb shutdown |
| 4. Replay tablet truncate operation, waiting for `pending_op_counter_blocking_rocksdb_shutdown_start_` to be released in order to shutdown rocksdb | |
| | 5. Transaction load completed |
| | 6. Call `LoadFinished` function, it starts waiting for `start_latch` to be 0|
| 7. Bootstrap complete, release the the `start_latch` by setting its value to 0 | |
|  | 8. release the `pending_op_counter_blocking_rocksdb_shutdown_start_` |
Thread 1 stucks at step 4, waiting for step 8 to be executed.
Thread 2 stucks at step 6, waiting for step 7 to be executed.

Result: Tablet bootstrap stuck at replaying truncation operation. This issue starts happening since D29000 (commit id: 5159eb3), the diff Introduced a change to only destroy executor instance(which holds the operation counter) after FinishLoad.

**Fix:**
Reset `pending_op_counter_blocking_rocksdb_shutdown_start_` before calling `loader_.FinishLoad(status)`. Also, reset both regular_iterator and intent_iterator as they hold refs to Rocksdb's SuperVersion. This is not clean fix, but it guarantees the safety, because FinishLoad doesn't need protection from the op counter as it acquires own op counter when processing the pending applies.

**Affected Version 2.20.1**
Starting from D29000 (commit id: 5159eb3), tablet bootstrap will get stuck when replaying truncate operation.
Jira: DB-12175

Test Plan: ./yb_build.sh --cxx-test pgwrapper_pg_single_tserver-test --gtest_filter PgSingleTServerTest.BootstrapReplayTruncate

Reviewers: bkolagani, timur, sergei, mbautin, rthallam

Reviewed By: rthallam

Subscribers: ybase, rthallam, slingam, yql, mbautin

Differential Revision: https://phorge.dev.yugabyte.com/D37653
  • Loading branch information
yusong-yan committed Oct 23, 2024
1 parent 59a5ccf commit 4267f5a
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 3 deletions.
9 changes: 9 additions & 0 deletions src/yb/tablet/operations/truncate_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@

#include "yb/util/trace.h"

DEFINE_test_flag(bool, skip_applying_truncate, false,
"If true, the test will skip applying tablet truncate operation."
"Note that other operations will still be applied.");

namespace yb {
namespace tablet {

Expand All @@ -43,6 +47,11 @@ Status TruncateOperation::DoAborted(const Status& status) {
Status TruncateOperation::DoReplicated(int64_t leader_term, Status* complete_status) {
TRACE("APPLY TRUNCATE: started");

if (FLAGS_TEST_skip_applying_truncate) {
TRACE("APPLY TRUNCATE: skipped");
return Status::OK();
}

RETURN_NOT_OK(VERIFY_RESULT(tablet_safe())->Truncate(this));

TRACE("APPLY TRUNCATE: finished");
Expand Down
3 changes: 3 additions & 0 deletions src/yb/tablet/tablet_bootstrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1739,6 +1739,9 @@ class TabletBootstrap {

TruncateOperation operation(tablet_, req);

operation.set_op_id(OpId::FromPB(replicate_msg->id()));
operation.set_hybrid_time(HybridTime::FromPB(replicate_msg->hybrid_time()));

Status s = tablet_->Truncate(&operation);

RETURN_NOT_OK_PREPEND(s, "Failed to Truncate:");
Expand Down
4 changes: 4 additions & 0 deletions src/yb/tablet/transaction_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ class TransactionLoader::Executor {
Status status;

auto se = ScopeExit([this, &status] {
regular_iterator_.Reset();
intents_iterator_.Reset();
scoped_pending_operation_.Reset();

loader_.FinishLoad(status);
// Destroy this executor object. Must be the last statement before we return from the Execute
// function.
Expand Down
10 changes: 8 additions & 2 deletions src/yb/tserver/ts_tablet_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -838,10 +838,16 @@ void TSTabletManager::CleanupSplitTablets() {
}
}

Status TSTabletManager::WaitForAllBootstrapsToFinish() {
Status TSTabletManager::WaitForAllBootstrapsToFinish(MonoDelta timeout) {
CHECK_EQ(state(), MANAGER_RUNNING);

open_tablet_pool_->Wait();
if (timeout.Initialized()) {
if (!open_tablet_pool_->WaitFor(timeout)) {
return STATUS(TimedOut, "Timeout waiting for all bootstraps to finish");
}
} else {
open_tablet_pool_->Wait();
}

Status s = Status::OK();

Expand Down
2 changes: 1 addition & 1 deletion src/yb/tserver/ts_tablet_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class TSTabletManager : public tserver::TabletPeerLookupIf, public tablet::Table
// Returns Status::OK if all tablets bootstrapped successfully. If
// the bootstrap of any tablet failed returns the failure reason for
// the first tablet whose bootstrap failed.
Status WaitForAllBootstrapsToFinish();
Status WaitForAllBootstrapsToFinish(MonoDelta timeout = MonoDelta());

// Starts shutdown process.
void StartShutdown();
Expand Down
34 changes: 34 additions & 0 deletions src/yb/yql/pgwrapper/pg_single_tserver-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
// or implied. See the License for the specific language governing permissions and limitations
// under the License.
//
#include "yb/consensus/log.h"

#include "yb/tablet/tablet.h"
#include "yb/tablet/tablet_peer.h"
#include "yb/tablet/transaction_participant.h"

#include "yb/tserver/mini_tablet_server.h"
#include "yb/tserver/tablet_server.h"
#include "yb/tserver/ts_tablet_manager.h"

#include "yb/util/countdown_latch.h"
#include "yb/util/hdr_histogram.h"
Expand All @@ -35,12 +38,15 @@ DECLARE_bool(ysql_enable_packed_row_for_colocated_table);
DECLARE_int64(global_memstore_size_mb_max);
DECLARE_int64(db_block_cache_size_bytes);
DECLARE_int32(rocksdb_max_write_buffer_number);
DECLARE_bool(TEST_skip_applying_truncate);

METRIC_DECLARE_histogram(handler_latency_yb_tserver_TabletServerService_Read);
METRIC_DECLARE_histogram(handler_latency_yb_tserver_TabletServerService_Write);

DEFINE_RUNTIME_int32(TEST_scan_reads, 3, "Number of reads in scan tests");

using namespace std::literals;

namespace yb::pgwrapper {

class PgSingleTServerTest : public PgMiniTestBase {
Expand Down Expand Up @@ -760,6 +766,34 @@ TEST_F(PgSingleTServerTest, RangeConflict) {
ASSERT_NOK(conn2.Execute("INSERT INTO t VALUES (0, 2), (1, 2)"));
}

TEST_F(PgSingleTServerTest, BootstrapReplayTruncate) {
auto conn = ASSERT_RESULT(Connect());
ASSERT_OK(conn.Execute(
"CREATE TABLE t(id INT PRIMARY KEY, s TEXT) SPLIT INTO 1 TABLETS;"));

ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_skip_applying_truncate) = true;
ASSERT_OK(conn.Execute("TRUNCATE TABLE t;"));

// Rollover and flush the WAL, so that the truncate will be replayed during next restart.
auto peers = ListTabletPeers(cluster_.get(), ListPeersFilter::kAll);
for (const auto& peer : peers) {
if (peer->tablet()->transaction_participant()) {
ASSERT_OK(peer->log()->AllocateSegmentAndRollOver());
}
}

ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_skip_applying_truncate) = false;

auto* ts = cluster_->mini_tablet_server(0);

ASSERT_OK(ts->Restart());

auto timeout = MonoDelta::FromSeconds(10);
if (!ts->server()->tablet_manager()->WaitForAllBootstrapsToFinish(timeout).ok()) {
LOG(FATAL) << "Tablet bootstrap didn't complete within within " << timeout.ToString();
}
}

TEST_F(PgSingleTServerTest, UpdateIndexWithHole) {
auto conn = ASSERT_RESULT(Connect());
ASSERT_OK(conn.Execute("CREATE TABLE t (id INT PRIMARY KEY, value INT)"));
Expand Down

0 comments on commit 4267f5a

Please sign in to comment.