Skip to content

Commit

Permalink
[yugabyte#6785] Platform: Make wait for server timeout configurable
Browse files Browse the repository at this point in the history
Summary: It seems that depending on the amount of data in a universe, startup time can vary. Some cases seem to go over the hardcoded timeout of 2 minutes. By moving the timeout to be read from the app config it will be modifiable with a simple platform restart. Once the changes in progress for yugabyte#5975 land, this timeout will be runtime configurable.

Test Plan: Create a universe and ensure there isn't any regression.

Reviewers: wesley, arnav, sb-yb, spotachev, sanketh

Reviewed By: sanketh

Subscribers: jenkins-bot, yugaware

Differential Revision: https://phabricator.dev.yugabyte.com/D10261
  • Loading branch information
daniel-yb committed Jan 6, 2021
1 parent aad4771 commit 9bb89e5
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import com.fasterxml.jackson.databind.JsonNode;

import java.time.Duration;
import java.util.*;
import java.util.Map.Entry;

Expand All @@ -14,6 +15,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.typesafe.config.Config;
import org.apache.commons.lang3.StringUtils;
import org.yb.Common;
import org.yb.client.YBClient;
Expand Down Expand Up @@ -43,6 +45,8 @@ public abstract class UniverseTaskBase extends AbstractTaskBase {
// Flag to indicate if we have locked the universe.
private boolean universeLocked = false;

protected Config config;

// The task params.
@Override
protected UniverseTaskParams taskParams() {
Expand Down Expand Up @@ -182,6 +186,7 @@ public void initialize(ITaskParams params) {
super.initialize(params);
// Create the threadpool for the subtasks to use.
createThreadpool();
this.config = Play.current().injector().instanceOf(Config.class);
}

@Override
Expand Down Expand Up @@ -519,29 +524,33 @@ public SubTaskGroup createDeleteTableFromUniverseTask(DeleteTableFromUniverse.Pa
return subTaskGroup;
}

public SubTaskGroup createWaitForServersTasks(Collection<NodeDetails> nodes, ServerType type) {
return createWaitForServersTasks(
nodes,
type,
config.getDuration("yb.wait_for_server_timeout") /* default timeout */
);
}

/**
* Create a task list to ping all servers until they are up.
*
* @param nodes : a collection of nodes that need to be pinged.
* @param type : Master or tserver type server running on these nodes.
* @param timeoutMillis : time to wait for each rpc call to the server, in millisec.
* @param timeout : time to wait for each rpc call to the server.
*/
public SubTaskGroup createWaitForServersTasks(Collection<NodeDetails> nodes, ServerType type) {
return createWaitForServersTasks(nodes, type, -1 /* default timeout */);
}

public SubTaskGroup createWaitForServersTasks(Collection<NodeDetails> nodes,
ServerType type,
long timeoutMillis) {
public SubTaskGroup createWaitForServersTasks(
Collection<NodeDetails> nodes,
ServerType type,
Duration timeout
) {
SubTaskGroup subTaskGroup = new SubTaskGroup("WaitForServer", executor);
for (NodeDetails node : nodes) {
WaitForServer.Params params = new WaitForServer.Params();
params.universeUUID = taskParams().universeUUID;
params.nodeName = node.nodeName;
params.serverType = type;
if (timeoutMillis > 0) {
params.serverWaitTimeoutMs = timeoutMillis;
}
params.serverWaitTimeoutMs = timeout.toMillis();
WaitForServer task = new WaitForServer();
task.initialize(params);
subTaskGroup.addTask(task);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

package com.yugabyte.yw.commissioner.tasks.subtasks;

import com.typesafe.config.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yb.client.YBClient;
Expand All @@ -29,11 +30,9 @@ public class WaitForMasterLeader extends AbstractTaskBase {
// The YB client.
public YBClientService ybService = null;

// Timeout for failing to respond to pings.
private static final long TIMEOUT_SERVER_WAIT_MS = 120000;
public Config config;

public static class Params extends UniverseTaskParams {
}
public static class Params extends UniverseTaskParams {}

@Override
protected Params taskParams() {
Expand All @@ -44,6 +43,7 @@ protected Params taskParams() {
public void initialize(ITaskParams params) {
super.initialize(params);
ybService = Play.current().injector().instanceOf(YBClientService.class);
config = Play.current().injector().instanceOf(Config.class);
}

@Override
Expand All @@ -60,7 +60,7 @@ public void run() {
try {
LOG.info("Running {}: hostPorts={}.", getName(), hostPorts);
client = ybService.getClient(hostPorts, certificate);
client.waitForMasterLeader(TIMEOUT_SERVER_WAIT_MS);
client.waitForMasterLeader(config.getDuration("yb.wait_for_server_timeout").toMillis());
} catch (Exception e) {
LOG.error("{} hit error : {}", getName(), e.getMessage());
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,13 @@

import com.google.common.net.HostAndPort;
import com.yugabyte.yw.commissioner.tasks.params.ServerSubTaskParams;
import com.yugabyte.yw.commissioner.tasks.subtasks.ServerSubTaskBase;

import play.api.Play;

public class WaitForServer extends ServerSubTaskBase {
public static final Logger LOG = LoggerFactory.getLogger(WaitForServer.class);

// Timeout for failing to respond to pings.
private static final long TIMEOUT_SERVER_WAIT_MS = 120000;

public static class Params extends ServerSubTaskParams {
// Timeout for the RPC call.
public long serverWaitTimeoutMs = TIMEOUT_SERVER_WAIT_MS;
public long serverWaitTimeoutMs;
}

@Override
Expand All @@ -41,7 +35,7 @@ public void run() {

checkParams();

boolean ret = false;
boolean ret;
YBClient client = null;
long startMs = System.currentTimeMillis();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

package com.yugabyte.yw.controllers;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
Expand Down Expand Up @@ -51,6 +50,7 @@
import com.yugabyte.yw.models.helpers.PlacementInfo.PlacementRegion;
import com.yugabyte.yw.models.helpers.PlacementInfo.PlacementAZ;

import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
Expand All @@ -72,15 +72,11 @@
import org.yb.util.ServerInfo;


import play.api.Play;
import play.Configuration;
import play.data.Form;
import play.data.FormFactory;
import play.libs.Json;
import play.mvc.*;

import javax.persistence.PersistenceException;

public class ImportController extends AuthenticatedController {
public static final Logger LOG = LoggerFactory.getLogger(ImportController.class);

Expand All @@ -95,7 +91,7 @@ public class ImportController extends AuthenticatedController {
private static final long THREAD_ALIVE_TIME = 60L;

// The RPC timeouts.
private static final long RPC_TIMEOUT_MS = 5000L;
private static final Duration RPC_TIMEOUT_MS = Duration.ofMillis(5000L);

// Expected string for node exporter http request.
private static final String NODE_EXPORTER_RESP = "Node Exporter";
Expand Down
1 change: 1 addition & 0 deletions managed/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ yb {
metrics.url = "http://"${yb.metrics.host}":9090/api/v1"
storage.path="/opt/yugabyte"
platform_backup_frequency = -1 minutes
wait_for_server_timeout = 120000 ms
}

0 comments on commit 9bb89e5

Please sign in to comment.