Skip to content

Commit

Permalink
[Backport 2.8] [#11799] YSQL/Docdb: Update all tablets when a remote …
Browse files Browse the repository at this point in the history
…server is unreachable

Summary:
For network errors YBClient/Metacache should not only update the specific tablet but should
also MarkTSFailed() to help share the knowledge with other tablets.

This can improve the recovery time, esp for cases with a lot of tablets.

Also introducing a new Gflag `update_all_tablets_upon_network_failure` (defaults to `true`) which can be used to disable this feature.

Original Revision/Commit: https://phabricator.dev.yugabyte.com/D16073 1b8f992

Test Plan:
Jenkins: rebase 2.8

Jenkins + repro manually

1) Create a dev-cluster with a lot of tablets
`bin/yb-ctl restart --tserver_flags 'fail_whole_ts_upon_network_failure=true,txn_slow_op_threshold_ms=3000,enable_tracing=true,tracing_level=2,rpc_connection_timeout_ms=15000' --replication_factor 3 --ysql_num_shards_per_tserver 24`

2) Run yb-sample apps with 16 readers and 16 writers
```
java -jar yb-sample-apps.jar \
                        --workload SqlSecondaryIndex  \
                        --nodes $HOSTS \
                        --verbose true  --drop_table_name postgresqlkeyvalue --num_threads_read $NUM_READERS --num_threads_write $NUM_WRITERS \
                        --num_reads 15000000 --num_writes 75000000 \
```
3) Cause a network partition using `iptables drop` to isolate 127.0.0.3 and compare recovery times with and without the feature.

without this change, the recovery takes over 5 mins.
With the change, the operations recover in about 30-40sec.

Reviewers: timur, sergei, bogdan

Reviewed By: bogdan

Subscribers: kannan, ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D16185
  • Loading branch information
amitanandaiyer committed Mar 25, 2022
1 parent ac03db9 commit 29ecc1d
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 2 deletions.
15 changes: 14 additions & 1 deletion src/yb/client/tablet_rpc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@

DEFINE_test_flag(bool, assert_local_op, false,
"When set, we crash if we received an operation that cannot be served locally.");
DEFINE_bool(update_all_tablets_upon_network_failure, true, "If this is enabled, then "
"upon receiving a network error, we mark the remote server as being unreachable for "
"all tablets in metacache, instead of the single tablet which issued the rpc.");
TAG_FLAG(update_all_tablets_upon_network_failure, runtime);
DEFINE_int32(force_lookup_cache_refresh_secs, 0, "When non-zero, specifies how often we send a "
"GetTabletLocations request to the master leader to update the tablet replicas cache. "
"This request is only sent if we are processing a ConsistentPrefix read.");
Expand Down Expand Up @@ -262,6 +266,7 @@ bool TabletInvoker::ShouldUseNodeLocalForwardProxy() {

Status TabletInvoker::FailToNewReplica(const Status& reason,
const tserver::TabletServerErrorPB* error_code) {
TRACE_TO(trace_, "FailToNewReplica($0)", reason.ToString());
if (ErrorCode(error_code) == tserver::TabletServerErrorPB::STALE_FOLLOWER) {
VLOG(1) << "Stale follower for " << command_->ToString() << " just retry";
} else if (ErrorCode(error_code) == tserver::TabletServerErrorPB::NOT_THE_LEADER) {
Expand All @@ -284,6 +289,13 @@ Status TabletInvoker::FailToNewReplica(const Status& reason,
VLOG(1) << "Failing " << command_->ToString() << " to a new replica: " << reason
<< ", old replica: " << yb::ToString(current_ts_);

if (GetAtomicFlag(&FLAGS_update_all_tablets_upon_network_failure) &&
reason.IsHostUnreachable()) {
YB_LOG_EVERY_N_SECS(WARNING, 1) << "Marking TServer " << current_ts_->ToString()
<< " as unreachable due to " << reason.ToString();
client_->data_->meta_cache_->MarkTSFailed(current_ts_, reason);
}

bool found = !tablet_ || tablet_->MarkReplicaFailed(current_ts_, reason);
if (!found) {
// Its possible that current_ts_ is not part of replicas if RemoteTablet.Refresh() is invoked
Expand Down Expand Up @@ -315,6 +327,7 @@ bool TabletInvoker::Done(Status* status) {
*status = STATUS(Aborted, "Retrier finished");
}
}
TRACE_TO(trace_, "Done($0)", status->ToString(false));
return true;
}

Expand All @@ -327,7 +340,7 @@ bool TabletInvoker::Done(Status* status) {
//
// TODO: This is probably too harsh; some network failures should be
// retried on the current replica.
if (status->IsNetworkError()) {
if (status->IsHostUnreachable() || status->IsNetworkError()) {
// The whole operation is completed if we can't schedule a retry.
return !FailToNewReplica(*status).ok();
}
Expand Down
1 change: 1 addition & 0 deletions src/yb/common/wire_protocol.proto
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ message AppStatusPB {
MERGE_IN_PROGRESS = 28;
COMBINED_ERROR = 29;
SNAPSHOT_TOO_OLD = 30;
HOST_UNREACHABLE = 31;
}

required ErrorCode code = 1;
Expand Down
3 changes: 2 additions & 1 deletion src/yb/rpc/connection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,8 @@ void Connection::HandleTimeout(ev::timer& watcher, int revents) { // NOLINT
auto passed = reactor_->cur_time() - last_activity_time_;
reactor_->DestroyConnection(
this,
STATUS_FORMAT(NetworkError, "Connect timeout, passed: $0, timeout: $1", passed, timeout));
STATUS_FORMAT(
HostUnreachable, "Connect timeout, passed: $0, timeout: $1", passed, timeout));
return;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/yb/util/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ namespace yb {
((MergeInProgress, MERGE_IN_PROGRESS, 28, "Merge in progress")) \
((Combined, COMBINED_ERROR, 29, "Combined status representing multiple status failures.")) \
((SnapshotTooOld, SNAPSHOT_TOO_OLD, 30, "Snapshot too old")) \
((HostUnreachable, HOST_UNREACHABLE, 31, "Host unreachable")) \
/**/

#define YB_STATUS_CODE_DECLARE(name, pb_name, value, message) \
Expand Down

0 comments on commit 29ecc1d

Please sign in to comment.