Skip to content

Commit

Permalink
[#6632] ybase: Add new flag for how many servers to wait for before t…
Browse files Browse the repository at this point in the history
…he transactions table is

created

Summary:
Currently in the catalog manager, we wait for just RF number of TS before creating the
transaction status table. However, when we move to large clusters (> 24 nodes) then just the RF
number of TS might not be enough to guarantee one Txn status tablet leader on every node.

Support has been added to specify the number of TS to wait for before creating the transaction
table. User can specify the flag "txn_table_wait_ts_count" for this purpose. The default value of
this flag is RF.

Test Plan:
./bin/yb-ctl create --master_flags "txn_table_wait_ts_count=3" --tserver_flags "txn_table_wait_ts_count=3"
Verify that Transaction table hasn't been created despite RF being 1
./bin/yb-ctl add_node --tserver_flags "txn_table_wait_ts_count=3"
./bin/yb-ctl add_node --tserver_flags "txn_table_wait_ts_count=3"
Verify that Transaction table gets created

Also, added a unit test in create-table-itest.cc that simulates the above sequence

Reviewers: bogdan, rsami

Reviewed By: bogdan, rsami

Subscribers: zyu, ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D10258
  • Loading branch information
Sanket Kedia committed Jan 20, 2021
1 parent 0612327 commit 2382568
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 5 deletions.
35 changes: 35 additions & 0 deletions src/yb/integration-tests/create-table-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,16 @@
#include <gtest/gtest.h>

#include "yb/client/client-test-util.h"
#include "yb/client/client_fwd.h"
#include "yb/client/table.h"
#include "yb/client/table_creator.h"
#include "yb/common/common.pb.h"
#include "yb/common/wire_protocol-test-util.h"
#include "yb/integration-tests/external_mini_cluster-itest-base.h"
#include "yb/integration-tests/external_mini_cluster.h"
#include "yb/master/catalog_manager.h"
#include "yb/master/master_util.h"
#include "yb/master/sys_catalog_initialization.h"
#include "yb/util/metrics.h"
#include "yb/util/path_util.h"

Expand Down Expand Up @@ -524,4 +528,35 @@ TEST_F(CreateTableITest, TestIsRaftLeaderMetric) {
ASSERT_EQ(kNumRaftLeaders, kExpectedRaftLeaders);
}

TEST_F(CreateTableITest, TestTransactionStatusTableCreation) {
// Set up an RF 1.
// Tell the Master leader to wait for 3 TS to join before creating the
// transaction status table.
vector<string> master_flags = {
"--txn_table_wait_min_ts_count=3"
};
// We also need to enable ysql.
ASSERT_NO_FATALS(StartCluster({}, master_flags, 1, 1, true));

// Check that the transaction table hasn't been created yet.
YQLDatabase db = YQL_DATABASE_CQL;
YBTableName transaction_status_table(db, master::kSystemNamespaceId,
master::kSystemNamespaceName, kTransactionsTableName);
bool exists = ASSERT_RESULT(client_->TableExists(transaction_status_table));
ASSERT_FALSE(exists) << "Transaction table exists even though the "
"requirement for the minimum number of TS not met";

// Add two tservers.
ASSERT_OK(cluster_->AddTabletServer());
ASSERT_OK(cluster_->AddTabletServer());

auto tbl_exists = [&]() -> Result<bool> {
return client_->TableExists(transaction_status_table);
};

ASSERT_OK(WaitFor(tbl_exists, 30s * kTimeMultiplier,
"Transaction table doesn't exist even though the "
"requirement for the minimum number of TS met"));
}

} // namespace yb
7 changes: 5 additions & 2 deletions src/yb/integration-tests/external_mini_cluster-itest-base.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ class ExternalMiniClusterITestBase : public YBTest {
void StartCluster(const std::vector<std::string>& extra_ts_flags = std::vector<std::string>(),
const std::vector<std::string>& extra_master_flags = std::vector<std::string>(),
int num_tablet_servers = 3,
int num_masters = 1);
int num_masters = 1,
bool enable_ysql = false);

gscoped_ptr<ExternalMiniCluster> cluster_;
gscoped_ptr<itest::ExternalMiniClusterFsInspector> inspect_;
Expand All @@ -95,12 +96,14 @@ class ExternalMiniClusterITestBase : public YBTest {
void ExternalMiniClusterITestBase::StartCluster(const std::vector<std::string>& extra_ts_flags,
const std::vector<std::string>& extra_master_flags,
int num_tablet_servers,
int num_masters) {
int num_masters,
bool enable_ysql) {
ExternalMiniClusterOptions opts;
opts.num_masters = num_masters;
opts.num_tablet_servers = num_tablet_servers;
opts.extra_master_flags = extra_master_flags;
opts.extra_tserver_flags = extra_ts_flags;
opts.enable_ysql = enable_ysql;
SetUpCluster(&opts);

cluster_.reset(new ExternalMiniCluster(opts));
Expand Down
15 changes: 12 additions & 3 deletions src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,12 @@ DECLARE_CAPABILITY(TabletReportLimit);
DEFINE_int32(partitions_vtable_cache_refresh_secs, 30,
"Amount of time to wait before refreshing the system.partitions cached vtable.");

DEFINE_int32(txn_table_wait_min_ts_count, 1,
"Minimum Number of TS to wait for before creating the transaction status table."
" Default value is 1. We wait for atleast --replication_factor if this value"
" is smaller than that");
TAG_FLAG(txn_table_wait_min_ts_count, advanced);

namespace yb {
namespace master {

Expand Down Expand Up @@ -861,12 +867,15 @@ Status CatalogManager::VisitSysCatalog(int64_t term) {
permissions_manager_->BuildRecursiveRolesUnlocked();

if (FLAGS_enable_ysql) {
// Number of TS to wait for before creating the txn table.
auto wait_ts_count = std::max(FLAGS_txn_table_wait_min_ts_count, FLAGS_replication_factor);

LOG_WITH_PREFIX(INFO)
<< "YSQL is enabled, will create the transaction status table when "
<< FLAGS_replication_factor << " tablet servers are online";
master_->ts_manager()->SetTSCountCallback(FLAGS_replication_factor, [this] {
<< wait_ts_count << " tablet servers are online";
master_->ts_manager()->SetTSCountCallback(wait_ts_count, [this, wait_ts_count] {
LOG_WITH_PREFIX(INFO)
<< FLAGS_replication_factor
<< wait_ts_count
<< " tablet servers registered, creating the transaction status table";
// Retry table creation until it succeedes. It might fail initially because placement UUID
// of live replicas is set through an RPC from YugaWare, and we won't be able to calculate
Expand Down

0 comments on commit 2382568

Please sign in to comment.