Skip to content

Commit

Permalink
[yugabyte#3979] YSQL Create Namespace Failure Handling
Browse files Browse the repository at this point in the history
Summary:
Working on integration between YSQL and DocDB for failure scenarios.  I ran a number of
create/delete commands with regular Sys Catalog failures.  A number of issues arose:

  1. Ensuring that we end the YBClient on a failure scenario where the 'done' flag is set.
  2. Allowing YSQL to control name uniqueness instead of DOCDB to handle failure scenarios.
  3. Proper logging of errors. An invalid CHECK would crash the server instead of logging an error.

Test Plan:
./build/latest/bin/yb-ts-cli --server_address=127.0.0.1:7100 set_flag -force TEST_sys_catalog_write_rejection_percentage 33
ysqlsh
  CREATE DATABASE d; // looped
  CREATE TABLE t; // looped

Reviewers: mihnea, bogdan, rahuldesirazu, hector

Reviewed By: hector

Subscribers: yql, ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D8619
  • Loading branch information
nspiegelberg authored and deeps1991 committed Jul 22, 2020
1 parent a53573b commit 7d7f423
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 34 deletions.
66 changes: 66 additions & 0 deletions java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgDropDatabase.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,18 @@
//
package org.yb.pgsql;

import com.google.common.net.HostAndPort;
import org.apache.commons.lang3.RandomUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.postgresql.core.TransactionState;
import org.postgresql.util.PSQLException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yb.AssertionWrappers;
import org.yb.client.AsyncYBClient;
import org.yb.client.TestUtils;
import org.yb.client.YBClient;
import org.yb.minicluster.MiniYBClusterBuilder;
import org.yb.util.SanitizerUtil;
import org.yb.util.YBTestRunnerNonTsanOnly;
Expand Down Expand Up @@ -123,6 +128,67 @@ public void testAsyncDropDatabase() throws Exception {
}
}

private void runProcess(String... args) throws Exception {
assertEquals(0, new ProcessBuilder(args).start().waitFor());
}

private void setWriteRejection(HostAndPort server, int percentage) throws Exception {
runProcess(TestUtils.findBinary("yb-ts-cli"),
"--server_address",
server.toString(),
"set_flag",
"-force",
"TEST_sys_catalog_write_rejection_percentage",
Integer.toString(percentage));
}

@Test
public void testCreateDatabaseWithFailures() throws Exception {
String dbname = "basic_db";

// Toggle a GFLAG on the Master at runtime to cause periodic low-level IO failures.
AsyncYBClient client = new AsyncYBClient.AsyncYBClientBuilder(masterAddresses).build();
YBClient syncClient = new YBClient(client);
HostAndPort leaderMaster = syncClient.getLeaderMasterHostAndPort();
// Don't set to 100% to test a couple write locations in the control path.
setWriteRejection(leaderMaster, 50);

// Try to create a database, expecting failures.
Connection connection0 = createConnection(0);
int failures = 0;
int tries = 0;
do {
try (Statement statement0 = connection0.createStatement()) {
statement0.execute(String.format("CREATE DATABASE %s", dbname));
} catch (PSQLException ex) {
LOG.info("Expected error: " + ex.getMessage());
++failures;
}
if (failures == 0) {
LOG.warn("No failure occurred (uncommon). Cleaning up for the next loop.");
setWriteRejection(leaderMaster, 0);
try (Statement statement0 = connection0.createStatement()) {
statement0.execute(String.format("DROP DATABASE %s", dbname));
}
setWriteRejection(leaderMaster, 100); // Guarantee failure on the next loop.
}
} while (Math.max(tries++, failures) == 0);
assertNotEquals(0, failures);

// Toggle a GFLAG on the Master at runtime to disable low-level IO failures.
setWriteRejection(leaderMaster, 0);

// Create the same database name. NOW expecting success.
failures = 0;
try (Statement statement0 = connection0.createStatement()) {
statement0.execute(String.format("CREATE DATABASE %s", dbname));
} catch (PSQLException ex) {
LOG.info("Unexpected error: " + ex.getMessage());
++failures;
}
assertEquals(0, failures);
}

@Test
public void testRecreateDatabase() throws Exception {
String dbname = "recreate_db";
Expand Down
27 changes: 21 additions & 6 deletions src/yb/client/client-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ Status YBClient::Data::IsCreateNamespaceInProgress(
YBClient* client,
const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
CoarseTimePoint deadline,
bool *create_in_progress) {
DCHECK_ONLY_NOTNULL(create_in_progress);
Expand All @@ -766,7 +767,13 @@ Status YBClient::Data::IsCreateNamespaceInProgress(
if (database_type) {
req.mutable_namespace_()->set_database_type(*database_type);
}
if (!namespace_id.empty()) {
req.mutable_namespace_()->set_id(namespace_id);
}

// RETURN_NOT_OK macro can't take templated function call as param,
// and SyncLeaderMasterRpc must be explicitly instantiated, else the
// compiler complains.
const Status s =
SyncLeaderMasterRpc<IsCreateNamespaceDoneRequestPB, IsCreateNamespaceDoneResponsePB>(
deadline,
Expand All @@ -775,34 +782,38 @@ Status YBClient::Data::IsCreateNamespaceInProgress(
nullptr /* num_attempts */,
"IsCreateNamespaceDone",
&MasterServiceProxy::IsCreateNamespaceDone);
// RETURN_NOT_OK macro can't take templated function call as param,
// and SyncLeaderMasterRpc must be explicitly instantiated, else the
// compiler complains.

// IsCreate could return a terminal/done state as FAILED. This would result in an error'd Status.
if (resp.has_done()) {
*create_in_progress = !resp.done();
}

RETURN_NOT_OK(s);
if (resp.has_error()) {
return StatusFromPB(resp.error().status());
}

*create_in_progress = !resp.done();
return Status::OK();
}

Status YBClient::Data::WaitForCreateNamespaceToFinish(
YBClient* client,
const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
CoarseTimePoint deadline) {
return RetryFunc(
deadline,
"Waiting on Create Namespace to be completed",
"Timed out waiting for Namespace Creation",
std::bind(&YBClient::Data::IsCreateNamespaceInProgress, this, client,
namespace_name, database_type, _1, _2));
namespace_name, database_type, namespace_id, _1, _2));
}

Status YBClient::Data::IsDeleteNamespaceInProgress(YBClient* client,
const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
CoarseTimePoint deadline,
bool* delete_in_progress) {
DCHECK_ONLY_NOTNULL(delete_in_progress);
Expand All @@ -813,6 +824,9 @@ Status YBClient::Data::IsDeleteNamespaceInProgress(YBClient* client,
if (database_type) {
req.mutable_namespace_()->set_database_type(*database_type);
}
if (!namespace_id.empty()) {
req.mutable_namespace_()->set_id(namespace_id);
}

const Status s =
SyncLeaderMasterRpc<IsDeleteNamespaceDoneRequestPB, IsDeleteNamespaceDoneResponsePB>(
Expand Down Expand Up @@ -841,13 +855,14 @@ Status YBClient::Data::IsDeleteNamespaceInProgress(YBClient* client,
Status YBClient::Data::WaitForDeleteNamespaceToFinish(YBClient* client,
const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
CoarseTimePoint deadline) {
return RetryFunc(
deadline,
"Waiting on Delete Namespace to be completed",
"Timed out waiting for Namespace Deletion",
std::bind(&YBClient::Data::IsDeleteNamespaceInProgress, this,
client, namespace_name, database_type, _1, _2));
client, namespace_name, database_type, namespace_id, _1, _2));
}

Status YBClient::Data::AlterTable(YBClient* client,
Expand Down
4 changes: 4 additions & 0 deletions src/yb/client/client-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,23 +92,27 @@ class YBClient::Data {
CHECKED_STATUS IsCreateNamespaceInProgress(YBClient* client,
const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
CoarseTimePoint deadline,
bool *create_in_progress);

CHECKED_STATUS WaitForCreateNamespaceToFinish(YBClient* client,
const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
CoarseTimePoint deadline);

CHECKED_STATUS IsDeleteNamespaceInProgress(YBClient* client,
const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
CoarseTimePoint deadline,
bool *delete_in_progress);

CHECKED_STATUS WaitForDeleteNamespaceToFinish(YBClient* client,
const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
CoarseTimePoint deadline);

CHECKED_STATUS CreateTable(YBClient* client,
Expand Down
17 changes: 10 additions & 7 deletions src/yb/client/client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -641,10 +641,11 @@ Status YBClient::CreateNamespace(const std::string& namespace_name,
s = StatusFromPB(resp.error().status());
}
RETURN_NOT_OK(s);
std::string cur_id = resp.has_id() ? resp.id() : namespace_id;

// Verify that the namespace we found is running so that, once this request returns,
// the client can send operations without receiving a "namespace not found" error.
RETURN_NOT_OK(data_->WaitForCreateNamespaceToFinish(this, namespace_name, database_type,
RETURN_NOT_OK(data_->WaitForCreateNamespaceToFinish(this, namespace_name, database_type, cur_id,
CoarseMonoClock::Now() + default_admin_operation_timeout()));

return Status::OK();
Expand All @@ -662,7 +663,7 @@ Status YBClient::CreateNamespaceIfNotExists(const std::string& namespace_name,
if (VERIFY_RESULT(namespace_exists)) {
// Verify that the namespace we found is running so that, once this request returns,
// the client can send operations without receiving a "namespace not found" error.
return data_->WaitForCreateNamespaceToFinish(this, namespace_name, database_type,
return data_->WaitForCreateNamespaceToFinish(this, namespace_name, database_type, namespace_id,
CoarseMonoClock::Now() + default_admin_operation_timeout());
}

Expand All @@ -676,10 +677,11 @@ Status YBClient::CreateNamespaceIfNotExists(const std::string& namespace_name,

Status YBClient::IsCreateNamespaceInProgress(const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
bool *create_in_progress) {
auto deadline = CoarseMonoClock::Now() + default_admin_operation_timeout();
return data_->IsCreateNamespaceInProgress(this, namespace_name, database_type, deadline,
create_in_progress);
return data_->IsCreateNamespaceInProgress(this, namespace_name, database_type, namespace_id,
deadline, create_in_progress);
}

Status YBClient::DeleteNamespace(const std::string& namespace_name,
Expand All @@ -705,17 +707,18 @@ Status YBClient::DeleteNamespace(const std::string& namespace_name,

// Verify that, once this request returns, the namespace has been successfully marked as deleted.
RETURN_NOT_OK(data_->WaitForDeleteNamespaceToFinish(this, namespace_name, database_type,
CoarseMonoClock::Now() + default_admin_operation_timeout()));
namespace_id, CoarseMonoClock::Now() + default_admin_operation_timeout()));

return Status::OK();
}

Status YBClient::IsDeleteNamespaceInProgress(const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
bool *delete_in_progress) {
auto deadline = CoarseMonoClock::Now() + default_admin_operation_timeout();
return data_->IsDeleteNamespaceInProgress(this, namespace_name, database_type, deadline,
delete_in_progress);
return data_->IsDeleteNamespaceInProgress(this, namespace_name, database_type, namespace_id,
deadline, delete_in_progress);
}

YBNamespaceAlterer* YBClient::NewNamespaceAlterer(
Expand Down
2 changes: 2 additions & 0 deletions src/yb/client/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ class YBClient {
// Set 'create_in_progress' to true if a CreateNamespace operation is in-progress.
CHECKED_STATUS IsCreateNamespaceInProgress(const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
bool *create_in_progress);

// Delete namespace with the given name.
Expand All @@ -371,6 +372,7 @@ class YBClient {
// Set 'delete_in_progress' to true if a DeleteNamespace operation is in-progress.
CHECKED_STATUS IsDeleteNamespaceInProgress(const std::string& namespace_name,
const boost::optional<YQLDatabase>& database_type,
const std::string& namespace_id,
bool *delete_in_progress);

YBNamespaceAlterer* NewNamespaceAlterer(const string& namespace_name,
Expand Down
32 changes: 19 additions & 13 deletions src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1194,8 +1194,9 @@ bool CatalogManager::IsYcqlTable(const TableInfo& table) {
Status CatalogManager::PrepareNamespace(
YQLDatabase db_type, const NamespaceName& name, const NamespaceId& id, int64_t term) {

if (FindPtrOrNull(namespace_names_mapper_[db_type], name) != nullptr) {
LOG(INFO) << "Keyspace " << name << " already created, skipping initialization";
scoped_refptr<NamespaceInfo> ns = FindPtrOrNull(namespace_ids_map_, id);
if (ns != nullptr) {
LOG(INFO) << "Keyspace " << ns->ToString() << " already created, skipping initialization";
return Status::OK();
}

Expand All @@ -1206,7 +1207,7 @@ Status CatalogManager::PrepareNamespace(
ns_entry.set_state(SysNamespaceEntryPB::RUNNING);

// Create in memory object.
scoped_refptr<NamespaceInfo> ns = new NamespaceInfo(id);
ns = new NamespaceInfo(id);

// Prepare write.
auto l = ns->LockForWrite();
Expand Down Expand Up @@ -1417,8 +1418,7 @@ Status CatalogManager::AbortTableCreation(TableInfo* table,
}

auto table_ids_map_checkout = table_ids_map_.CheckOut();
CHECK_EQ(table_names_map_.erase({table_namespace_id, table_name}), 1)
<< "Unable to erase table named " << table_name << " from table names map.";
table_names_map_.erase({table_namespace_id, table_name}); // Not present if PGSQL table.
CHECK_EQ(table_ids_map_checkout->erase(table_id), 1)
<< "Unable to erase table with id " << table_id << " from table ids map.";

Expand Down Expand Up @@ -4567,8 +4567,12 @@ Status CatalogManager::CreateNamespace(const CreateNamespaceRequestPB* req,

// Validate the user request.

// Verify that the namespace does not exist (no matter what state).
ns = FindPtrOrNull(namespace_names_mapper_[db_type], req->name());
// Verify that the namespace does not already exist.
ns = FindPtrOrNull(namespace_ids_map_, req->namespace_id()); // Same ID.
if (ns == nullptr && db_type != YQL_DATABASE_PGSQL) {
// PGSQL databases have name uniqueness handled at a different layer, so ignore overlaps.
ns = FindPtrOrNull(namespace_names_mapper_[db_type], req->name());
}
if (ns != nullptr) {
resp->set_id(ns->id());
return_status = STATUS_SUBSTITUTE(AlreadyPresent, "Keyspace '$0' already exists",
Expand Down Expand Up @@ -4782,7 +4786,7 @@ void CatalogManager::ProcessPendingNamespace(
auto s = sys_catalog_->UpdateItem(ns.get(), leader_ready_term());
if (s.ok()) {
TRACE("Done processing keyspace");
LOG(INFO) << "Activated keyspace: " << ns->ToString();
LOG(INFO) << (success ? "Processed" : "Failed") << " keyspace: " << ns->ToString();
} else {
metadata.set_state(SysNamespaceEntryPB::FAILED);
if (s.IsIllegalState() || s.IsAborted()) {
Expand Down Expand Up @@ -4853,6 +4857,9 @@ Status CatalogManager::IsCreateNamespaceDone(const IsCreateNamespaceDoneRequestP
break;
// Failure cases. Done, but we need to give the user an error message.
case SysNamespaceEntryPB::FAILED:
resp->set_done(true);
return SetupError(resp->mutable_error(), MasterErrorPB::UNKNOWN_ERROR, STATUS(InternalError,
"Namespace Create Failed: not onlined."));
default:
Status s = STATUS_SUBSTITUTE(IllegalState,
"IsCreateNamespaceDone failure: state=$0", metadata.state());
Expand Down Expand Up @@ -4958,10 +4965,9 @@ Status CatalogManager::DeleteNamespace(const DeleteNamespaceRequestPB* req,
// Remove the namespace from all CatalogManager mappings.
{
std::lock_guard<LockType> l_map(lock_);
if (namespace_names_mapper_[ns->database_type()].erase(ns->name()) < 1 ||
namespace_ids_map_.erase(ns->id()) < 1) {
LOG(WARNING) << Format("Could not remove namespace from maps, name=$0, id=$1",
ns->name(), ns->id());
namespace_names_mapper_[ns->database_type()].erase(ns->name());
if (namespace_ids_map_.erase(ns->id()) < 1) {
LOG(WARNING) << Format("Could not remove namespace from maps, id=$1", ns->id());
}
}

Expand Down Expand Up @@ -5250,7 +5256,7 @@ Status CatalogManager::AlterNamespace(const AlterNamespaceRequestPB* req,
ns->database_type() == req->namespace_().database_type()) {
Status s = STATUS_SUBSTITUTE(AlreadyPresent,
"Keyspace '$0' already exists", ns->name());
LOG(WARNING) << "Found keyspace: " << ns->id() << ". Failed alterring keyspace with error: "
LOG(WARNING) << "Found keyspace: " << ns->id() << ". Failed altering keyspace with error: "
<< s.ToString() << " Request:\n" << req->DebugString();
return SetupError(resp->mutable_error(), MasterErrorPB::OBJECT_ALREADY_PRESENT, s);
}
Expand Down
4 changes: 2 additions & 2 deletions src/yb/master/master-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1219,7 +1219,7 @@ TEST_F(MasterTest, TestNamespaceCreateStates) {

// Finish Namespace create.
SetAtomicFlag(false, &FLAGS_TEST_hang_on_namespace_transition);
CreateNamespaceWait(test_name, YQLDatabase::YQL_DATABASE_PGSQL);
CreateNamespaceWait(nsid, YQLDatabase::YQL_DATABASE_PGSQL);

// Verify that Basic Access to a Namespace is now available.
// 1. Create a Table within the Schema.
Expand Down Expand Up @@ -1258,7 +1258,7 @@ TEST_F(MasterTest, TestNamespaceCreateStates) {

// We should be able to create a namespace with the same NAME at this time.
ASSERT_OK(CreateNamespaceAsync("new_" + test_name, YQLDatabase::YQL_DATABASE_PGSQL, &resp));
CreateNamespaceWait("new_" + test_name, YQLDatabase::YQL_DATABASE_PGSQL);
CreateNamespaceWait(resp.id(), YQLDatabase::YQL_DATABASE_PGSQL);
}
}

Expand Down
Loading

0 comments on commit 7d7f423

Please sign in to comment.