Skip to content

Commit

Permalink
[#23492] DocDB: Upgrade and Rollback tests
Browse files Browse the repository at this point in the history
Summary:
Adding the framework to perform DB upgrades and rollbacks in unit tests.

- `build.xml` stores the location of the builds for various os architecture and build types. We current use `linux_debug_x86`, `linux_release_x86`, `darwin_debug_arm64` and `darwin_release_arm64`. Jenkins covers all linux and darwin release types. Darwin debug is for use on dev mac machines.
- Currently supports `2.20.2.4` and `2024.1.0.1` as older versions. New major versions will be added manually as they become available. Minor versions can be added on demand if the need for one arrises.
- 2.20 linux builds are disabled since `post_install.sh` get stuck.
- Builds are downloaded to the `/opt/yb-build/db-upgrade` directory if it does not already exist.
- We do not maintain builds for ASAN and TSAN so these build types are not run.
- `UpgradeTestBase` provides the framework to perform all Upgrade and Rollback actions.
- `BasicUpgradeTest` tests upgrade and rollback using a simple bank balance workload.
- D32492 changed the DocDB format for debug builds so that it is compatible with release builds. This change was not backported, so debug builds on version 2024.2, or older are generated after patching this change.

#23492
Jira: DB-12406

Test Plan:
BasicUpgradeTest, TestUpgradeFrom_2_20_2_4
BasicUpgradeTest, TestRollbackTo_2_20_2_4
BasicUpgradeTest, TestUpgradeFrom_2024_1_0_1
BasicUpgradeTest, TestRollbackTo_2024_1_0_1

Reviewers: asrivastava, tfoucher, slingam

Reviewed By: asrivastava

Subscribers: ybase

Differential Revision: https://phorge.dev.yugabyte.com/D37153
  • Loading branch information
hari90 committed Aug 17, 2024
1 parent 5f286f5 commit 68ac66e
Show file tree
Hide file tree
Showing 15 changed files with 1,066 additions and 93 deletions.
1 change: 1 addition & 0 deletions python/yugabyte/yb_dist_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def set_global_conf_from_dict(global_conf_dict: Dict[str, str]) -> GlobalTestCon
'linuxbrew_path.txt',
'thirdparty_path.txt',
'thirdparty_url.txt',
'upgrade_test_builds',
f'{POSTGRES_BUILD_SUBDIR}/contrib',
f'{POSTGRES_BUILD_SUBDIR}/src/test/regress',
f'{POSTGRES_BUILD_SUBDIR}/src/test/isolation',
Expand Down
14 changes: 13 additions & 1 deletion src/yb/integration-tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ file(COPY
add_custom_target(
xcluster_ddl_replication_sql DEPENDS ${CMAKE_BINARY_DIR}/test_xcluster_ddl_replication_sql)

configure_file(
${CMAKE_SOURCE_DIR}/src/yb/integration-tests/upgrade-tests/builds.xml
${CMAKE_BINARY_DIR}/upgrade_test_builds/builds.xml
COPYONLY)

add_custom_target(
upgrade_test_builds_tar DEPENDS ${CMAKE_BINARY_DIR}/upgrade_test_builds)

ADD_YB_TEST_LIBRARY(
cdc_test_util
SRCS cdc_test_util.cc
Expand All @@ -68,6 +76,7 @@ set(INTEGRATION_TESTS_SRCS
external_daemon.cc
external_yb_controller.cc
external_mini_cluster_fs_inspector.cc
external_mini_cluster-itest-base.cc
external_mini_cluster_validator.cc
load_balancer_test_util.cc
load_generator.cc
Expand All @@ -88,6 +97,7 @@ set(INTEGRATION_TESTS_SRCS
xcluster/xcluster_ysql_test_base.cc
xcluster/xcluster_ycql_test_base.cc
xcluster/xcluster_ddl_replication_test_base.cc
upgrade-tests/upgrade_test_base.cc
)

ADD_YB_TEST_LIBRARY(integration-tests SRCS ${INTEGRATION_TESTS_SRCS})
Expand Down Expand Up @@ -119,7 +129,8 @@ add_dependencies(integration-tests
yb-ts-cli
generate_test_certs
fips_install
xcluster_ddl_replication_sql)
xcluster_ddl_replication_sql
upgrade_test_builds_tar)

ADD_YB_TEST_LIBRARY(
cql_test_util
Expand Down Expand Up @@ -249,6 +260,7 @@ ADD_YB_TEST(xcluster/xcluster-tablet-split-itest)
ADD_YB_TEST(xcluster/xcluster-test)
ADD_YB_TEST(xcluster/xcluster_outbound_replication_group-itest)
ADD_YB_TEST(retryable_request-test)
ADD_YB_TEST(upgrade-tests/basic_upgrade-test)

set(YB_TEST_LINK_LIBS_SAVED ${YB_TEST_LINK_LIBS})
set(YB_TEST_LINK_LIBS ${YB_TEST_LINK_LIBS} cassandra)
Expand Down
16 changes: 4 additions & 12 deletions src/yb/integration-tests/auto_flags-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -815,11 +815,6 @@ class AutoFlagsExternalMiniClusterTest : public ExternalMiniClusterITestBase {
StartCluster(extra_ts_flags, extra_master_flags, kNumTServers, kNumMasterServers));
}

void SetUpCluster(ExternalMiniClusterOptions* opts) override {
ASSERT_NO_FATALS(ExternalMiniClusterITestBase::SetUpCluster(opts));
opts_ = *opts;
}

Status CheckFlagOnNode(
const string& flag_name, const string& expected_val, ExternalDaemon* daemon) {
auto value = VERIFY_RESULT(daemon->GetFlag(flag_name));
Expand Down Expand Up @@ -999,9 +994,6 @@ class AutoFlagsExternalMiniClusterTest : public ExternalMiniClusterITestBase {
CHECK_EQ(expected_config_version, config.config_version());
return config;
}

protected:
ExternalMiniClusterOptions opts_;
};

// Validate AutoFlags in new cluster and make sure it handles process restarts, and addition of
Expand All @@ -1023,7 +1015,7 @@ TEST_F(AutoFlagsExternalMiniClusterTest, NewCluster) {
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kTrue, new_master.get()));

ASSERT_OK(cluster_->AddTabletServer());
ASSERT_OK(cluster_->WaitForTabletServerCount(opts_.num_tablet_servers + 1, kTimeout));
ASSERT_OK(cluster_->WaitForTabletServerCount(cluster_->num_tablet_servers(), kTimeout));

ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsInitializedFlagName, kTrue));
ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsNewInstallFlagName, kTrue));
Expand Down Expand Up @@ -1064,7 +1056,7 @@ TEST_F(AutoFlagsExternalMiniClusterTest, UpgradeCluster) {
ASSERT_TRUE(Erase(disable_auto_flag_management, cluster_->mutable_extra_tserver_flags()));

ASSERT_OK(cluster_->AddTabletServer());
ASSERT_OK(cluster_->WaitForTabletServerCount(opts_.num_tablet_servers + 1, kTimeout));
ASSERT_OK(cluster_->WaitForTabletServerCount(cluster_->num_tablet_servers(), kTimeout));

// Add a new tserver
auto* new_tserver = cluster_->tablet_server(cluster_->num_tablet_servers() - 1);
Expand All @@ -1075,9 +1067,9 @@ TEST_F(AutoFlagsExternalMiniClusterTest, UpgradeCluster) {

// Restart the new tserver
new_tserver->Shutdown();
ASSERT_OK(cluster_->WaitForTabletServerCount(opts_.num_tablet_servers, kTimeout));
ASSERT_OK(cluster_->WaitForTabletServerCount(cluster_->num_tablet_servers() - 1, kTimeout));
ASSERT_OK(new_tserver->Restart());
ASSERT_OK(cluster_->WaitForTabletServerCount(opts_.num_tablet_servers + 1, kTimeout));
ASSERT_OK(cluster_->WaitForTabletServerCount(cluster_->num_tablet_servers(), kTimeout));
ASSERT_OK(CheckFlagOnNode(kDisableAutoFlagsManagementFlagName, kFalse, new_tserver));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsInitializedFlagName, kFalse, new_tserver));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kFalse, new_tserver));
Expand Down
8 changes: 4 additions & 4 deletions src/yb/integration-tests/cassandra_cpp_driver-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ class CppCassandraDriverTest : public ExternalMiniClusterITestBase {
ASSERT_OK(admin_client_->Init());
}

void SetUpCluster(ExternalMiniClusterOptions* opts) override {
ASSERT_NO_FATALS(ExternalMiniClusterITestBase::SetUpCluster(opts));
void SetUpOptions(ExternalMiniClusterOptions& opts) override {
ASSERT_NO_FATALS(ExternalMiniClusterITestBase::SetUpOptions(opts));

opts->bind_to_unique_loopback_addresses = true;
opts->use_same_ts_ports = true;
opts.bind_to_unique_loopback_addresses = true;
opts.use_same_ts_ports = true;
}

void TearDown() override {
Expand Down
3 changes: 2 additions & 1 deletion src/yb/integration-tests/external_daemon.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
std::vector<std::string> GetDataDirs() const { return data_dirs_; }

const std::string& exe() const { return exe_; }
void SetExe(const std::string& new_exe) { exe_ = new_exe; }

const std::string& GetRootDir() const { return root_dir_; }

Expand Down Expand Up @@ -275,7 +276,7 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
const std::string daemon_id_;
rpc::Messenger* messenger_;
rpc::ProxyCache* proxy_cache_;
const std::string exe_;
std::string exe_;
const std::string root_dir_;
std::vector<std::string> data_dirs_;
std::vector<std::string> extra_flags_;
Expand Down
87 changes: 87 additions & 0 deletions src/yb/integration-tests/external_mini_cluster-itest-base.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (c) YugabyteDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations
// under the License.
//

#include "yb/integration-tests/external_mini_cluster-itest-base.h"

namespace yb {

void ExternalMiniClusterITestBase::SetUpOptions(ExternalMiniClusterOptions& opts) {
// Fsync causes flakiness on EC2.
opts.extra_tserver_flags.push_back("--never_fsync");
}

void ExternalMiniClusterITestBase::StartCluster(
const std::vector<std::string>& extra_ts_flags,
const std::vector<std::string>& extra_master_flags, int num_tablet_servers, int num_masters,
bool enable_ysql) {
ExternalMiniClusterOptions opts;
opts.num_masters = num_masters;
opts.num_tablet_servers = num_tablet_servers;
opts.extra_master_flags = extra_master_flags;
opts.extra_tserver_flags = extra_ts_flags;
opts.enable_ysql = enable_ysql;

ASSERT_OK(StartCluster(opts));
}

Status ExternalMiniClusterITestBase::StartCluster(ExternalMiniClusterOptions opts) {
SetUpOptions(opts);

cluster_.reset(new ExternalMiniCluster(opts));
RETURN_NOT_OK(cluster_->Start());
inspect_.reset(new itest::ExternalMiniClusterFsInspector(cluster_.get()));
ts_map_ = VERIFY_RESULT(itest::CreateTabletServerMap(cluster_.get()));
client_ = VERIFY_RESULT(cluster_->CreateClient());

return Status::OK();
}

void ExternalMiniClusterITestBase::TearDown() {
client_.reset();
if (cluster_) {
if (HasFatalFailure()) {
LOG(INFO) << "Found fatal failure";
for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
if (!cluster_->tablet_server(i)->IsProcessAlive()) {
LOG(INFO) << "Tablet server " << i << " is not running. Cannot dump its stacks.";
continue;
}
LOG(INFO) << "Attempting to dump stacks of TS " << i << " with UUID "
<< cluster_->tablet_server(i)->uuid() << " and pid "
<< cluster_->tablet_server(i)->pid();
WARN_NOT_OK(
PstackWatcher::DumpPidStacks(cluster_->tablet_server(i)->pid()),
"Couldn't dump stacks");
}
}
cluster_->Shutdown();
}
YBTest::TearDown();
ts_map_.clear();
}

Result<TabletId> ExternalMiniClusterITestBase::GetSingleTabletId(const TableName& table_name) {
TabletId tablet_id_to_split;
for (size_t i = 0; i < cluster_->num_tablet_servers(); ++i) {
const auto ts = cluster_->tablet_server(i);
const auto tablets = VERIFY_RESULT(cluster_->GetTablets(ts));
for (const auto& tablet : tablets) {
if (tablet.table_name() == table_name) {
return tablet.tablet_id();
}
}
}
return STATUS(NotFound, Format("No tablet found for table $0.", table_name));
}

} // namespace yb
66 changes: 6 additions & 60 deletions src/yb/integration-tests/external_mini_cluster-itest-base.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,79 +57,25 @@ namespace yb {
// setup routines useful for integration tests.
class ExternalMiniClusterITestBase : public YBTest {
public:
virtual void SetUpCluster(ExternalMiniClusterOptions* opts) {
// Fsync causes flakiness on EC2.
CHECK_NOTNULL(opts)->extra_tserver_flags.push_back("--never_fsync");
}
virtual void TearDown() override;

virtual void TearDown() override {
client_.reset();
if (cluster_) {
if (HasFatalFailure()) {
LOG(INFO) << "Found fatal failure";
for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
if (!cluster_->tablet_server(i)->IsProcessAlive()) {
LOG(INFO) << "Tablet server " << i << " is not running. Cannot dump its stacks.";
continue;
}
LOG(INFO) << "Attempting to dump stacks of TS " << i
<< " with UUID " << cluster_->tablet_server(i)->uuid()
<< " and pid " << cluster_->tablet_server(i)->pid();
WARN_NOT_OK(PstackWatcher::DumpPidStacks(cluster_->tablet_server(i)->pid()),
"Couldn't dump stacks");
}
}
cluster_->Shutdown();
}
YBTest::TearDown();
ts_map_.clear();
}

Result<TabletId> GetSingleTabletId(const TableName& table_name) {
TabletId tablet_id_to_split;
for (size_t i = 0; i < cluster_->num_tablet_servers(); ++i) {
const auto ts = cluster_->tablet_server(i);
const auto tablets = VERIFY_RESULT(cluster_->GetTablets(ts));
for (const auto& tablet : tablets) {
if (tablet.table_name() == table_name) {
return tablet.tablet_id();
}
}
}
return STATUS(NotFound, Format("No tablet found for table $0.", table_name));
}
Result<TabletId> GetSingleTabletId(const TableName& table_name);

protected:
virtual void SetUpOptions(ExternalMiniClusterOptions& opts);

void StartCluster(const std::vector<std::string>& extra_ts_flags = std::vector<std::string>(),
const std::vector<std::string>& extra_master_flags = std::vector<std::string>(),
int num_tablet_servers = 3,
int num_masters = 1,
bool enable_ysql = false);

Status StartCluster(ExternalMiniClusterOptions opts);

std::unique_ptr<ExternalMiniCluster> cluster_;
std::unique_ptr<itest::ExternalMiniClusterFsInspector> inspect_;
std::unique_ptr<client::YBClient> client_;
itest::TabletServerMap ts_map_;
};

void ExternalMiniClusterITestBase::StartCluster(const std::vector<std::string>& extra_ts_flags,
const std::vector<std::string>& extra_master_flags,
int num_tablet_servers,
int num_masters,
bool enable_ysql) {
ExternalMiniClusterOptions opts;
opts.num_masters = num_masters;
opts.num_tablet_servers = num_tablet_servers;
opts.extra_master_flags = extra_master_flags;
opts.extra_tserver_flags = extra_ts_flags;
opts.enable_ysql = enable_ysql;
SetUpCluster(&opts);

cluster_.reset(new ExternalMiniCluster(opts));
ASSERT_OK(cluster_->Start());
inspect_.reset(new itest::ExternalMiniClusterFsInspector(cluster_.get()));
ts_map_ = ASSERT_RESULT(itest::CreateTabletServerMap(cluster_.get()));
client_ = ASSERT_RESULT(cluster_->CreateClient());
}

} // namespace yb
12 changes: 12 additions & 0 deletions src/yb/integration-tests/external_mini_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,10 @@ Status ExternalMiniCluster::DeduceBinRoot(std::string* ret) {
return Status::OK();
}

void ExternalMiniCluster::SetDaemonBinPath(const std::string& bin_path) {
daemon_bin_path_ = bin_path;
}

std::string ExternalMiniCluster::GetClusterDataDirName() const {
if (opts_.cluster_id == "") {
return "minicluster-data";
Expand Down Expand Up @@ -483,6 +487,14 @@ string ExternalMiniCluster::GetDataPath(const string& daemon_id) const {
return JoinPathSegments(data_root_, daemon_id);
}

std::string ExternalMiniCluster::GetMasterBinaryPath() const {
return GetBinaryPath(GetMasterBinaryName());
}

std::string ExternalMiniCluster::GetTServerBinaryPath() const {
return GetBinaryPath(GetTServerBinaryName());
}

namespace {
vector<string> SubstituteInFlags(const vector<string>& orig_flags, size_t index) {
string str_index = std::to_string(index);
Expand Down
Loading

0 comments on commit 68ac66e

Please sign in to comment.