From 011d3e622aa145c338d3566031e28f4791cf057b Mon Sep 17 00:00:00 2001 From: Sandeep L <99765181+lingamsandeep@users.noreply.github.com> Date: Wed, 17 Aug 2022 11:44:33 -0700 Subject: [PATCH] [#13600] DocDB: Fix Flakiness in the XClusterAlterUniverse* tests Summary: The tests XClusterAlterUniverseAdminCliTest.TestAlterUniverseReplication* were failing because they were performing an AlterUniverseReplication using yb-admin which internally calls "IsSetupReplicationDone" on the XCluster universe with the .ALTER suffix. At the end of "AlterUniverseReplication", the universe with .ALTER is deleted and there is a code path which does not set the replication_error field in the PB correctly before returning. This results in the field retaining its default value and the yb-admin command failing with the following error: Bad status: Runtime error (yb/util/subprocess.cc:615): Subprocess '/Users/sandeep/code/yugabyte-db/build/debug-clang-dynamic-arm64-ninja/tests-tools/../bin/yb-admin' terminated with non-zero exit status 256 The internal error is: Runtime error (yb/common/wire_protocol.cc:253): (999 unknown) which comes the default value of AppStatusPB error code of 999. Fixed this by setting the replicationerror correctly Test Plan: ybd --cxx-test yb-admin-test_ent --gtest_filter XClusterAlterUniverseAdminCliTest.TestAlterUniverseReplication -n 10 Reviewers: nicolas, rahuldesirazu, hsunder Reviewed By: hsunder Subscribers: ybase, bogdan Differential Revision: https://phabricator.dev.yugabyte.com/D18999 --- ent/src/yb/master/catalog_manager_ent.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ent/src/yb/master/catalog_manager_ent.cc b/ent/src/yb/master/catalog_manager_ent.cc index 3c672bf7d539..8c33f0941f87 100644 --- a/ent/src/yb/master/catalog_manager_ent.cc +++ b/ent/src/yb/master/catalog_manager_ent.cc @@ -6145,7 +6145,7 @@ Status CatalogManager::GetUniverseReplication(const GetUniverseReplicationReques * Checks if the universe replication setup has completed. * Returns Status::OK() if this call succeeds, and uses resp->done() to determine if the setup has * completed (either failed or succeeded). If the setup has failed, then resp->replication_error() - * is also set. + * is also set. If it succeeds, replication_error() gets set to OK. */ Status CatalogManager::IsSetupUniverseReplicationDone( const IsSetupUniverseReplicationDoneRequestPB* req, @@ -6168,7 +6168,11 @@ Status CatalogManager::IsSetupUniverseReplicationDone( // If the universe was deleted, we're done. This is normal with ALTER tmp files. if (s.IsNotFound()) { resp->set_done(true); - return isAlterRequest ? Status::OK() : s; + if (isAlterRequest) { + s = Status::OK(); + StatusToPB(s, resp->mutable_replication_error()); + } + return s; } RETURN_NOT_OK(s); if (universe_resp.has_error()) {