Skip to content

Commit

Permalink
[PLAT-10879] Update StopNodeInUniverse task to new lock and freeze ta…
Browse files Browse the repository at this point in the history
…sk changes

Summary: Refactor StopNodeInUniverse task to accommodate new task freezing changes.

Test Plan:
Create a 3 node rf3 universe. Perform StopNodeInUniverse task.

Make sure that this succeeds. Also try to simulate a validateParams failure on StopNodeInUniverse (Can set nodeDetails state to a different state, say "Removed" state. Check that we do not generate the task.

Also tried to simulate under-replicated tablets and then tried to stop a node. The task should fail but should not freeze the universe. i.e. We should be able to perform other tasks afterwards.

Reviewers: sanketh, nsingh

Reviewed By: nsingh

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D30242
  • Loading branch information
charleswang234 committed Dec 4, 2023
1 parent c74f258 commit 5fbfa76
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 116 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import com.yugabyte.yw.common.DnsManager;
import com.yugabyte.yw.common.config.GlobalConfKeys;
import com.yugabyte.yw.common.config.RuntimeConfGetter;
import com.yugabyte.yw.common.config.UniverseConfKeys;
import com.yugabyte.yw.forms.NodeActionFormData;
import com.yugabyte.yw.models.Universe;
import com.yugabyte.yw.models.helpers.NodeDetails;
Expand All @@ -33,8 +32,6 @@
@Retryable
public class StopNodeInUniverse extends UniverseDefinitionTaskBase {

protected boolean isBlacklistLeaders;
protected int leaderBacklistWaitTimeMs;
@Inject private RuntimeConfGetter confGetter;

@Inject
Expand All @@ -56,125 +53,119 @@ protected NodeDetails findNewMasterIfApplicable(Universe universe, NodeDetails c
}

@Override
public void run() {
public void validateParams(boolean isFirstTry) {
super.validateParams(isFirstTry);
Universe universe = getUniverse();
NodeDetails currentNode = universe.getNode(taskParams().nodeName);
if (currentNode == null) {
String msg = "No node " + taskParams().nodeName + " found in universe " + universe.getName();
log.error(msg);
throw new RuntimeException(msg);
}
}

try {
checkUniverseVersion();

// Set the 'updateInProgress' flag to prevent other updates from happening.
Universe universe =
lockUniverseForUpdate(
taskParams().expectedUniverseVersion,
u -> {
if (isFirstTry()) {
NodeDetails node = u.getNode(taskParams().nodeName);
if (node == null) {
String msg =
"No node " + taskParams().nodeName + " found in universe " + u.getName();
log.error(msg);
throw new RuntimeException(msg);
}
if (node.isMaster) {
NodeDetails newMasterNode = findNewMasterIfApplicable(u, node);
if (newMasterNode != null && newMasterNode.masterState == null) {
newMasterNode.masterState = MasterState.ToStart;
}
node.masterState = MasterState.ToStop;
}
}
});

log.info(
"Stop Node with name {} from universe {} ({})",
taskParams().nodeName,
taskParams().getUniverseUUID(),
universe.getName());

isBlacklistLeaders =
confGetter.getConfForScope(universe, UniverseConfKeys.ybUpgradeBlacklistLeaders);
leaderBacklistWaitTimeMs =
confGetter.getConfForScope(universe, UniverseConfKeys.ybUpgradeBlacklistLeaderWaitTimeMs);

NodeDetails currentNode = universe.getNode(taskParams().nodeName);
if (currentNode == null) {
String msg =
"No node " + taskParams().nodeName + " found in universe " + universe.getName();
log.error(msg);
throw new RuntimeException(msg);
}
preTaskActions();
List<NodeDetails> nodeList = Collections.singletonList(currentNode);
@Override
protected void createPrecheckTasks(Universe universe) {
NodeDetails currentNode = universe.getNode(taskParams().nodeName);
if (currentNode.isTserver) {
createNodePrecheckTasks(
currentNode,
EnumSet.of(ServerType.TSERVER),
SubTaskGroupType.StoppingNodeProcesses,
null);
}
}

if (currentNode.isTserver) {
clearLeaderBlacklistIfAvailable(SubTaskGroupType.StoppingNodeProcesses);
@Override
protected void freezeUniverseInTxn(Universe universe) {
NodeDetails node = universe.getNode(taskParams().nodeName);
if (node == null) {
String msg = "No node " + taskParams().nodeName + " found in universe " + universe.getName();
log.error(msg);
throw new RuntimeException(msg);
}
if (node.isMaster) {
NodeDetails newMasterNode = findNewMasterIfApplicable(universe, node);
if (newMasterNode != null && newMasterNode.masterState == null) {
newMasterNode.masterState = MasterState.ToStart;
}
node.masterState = MasterState.ToStop;
}
}

// Update Node State to Stopping
createSetNodeStateTask(currentNode, NodeState.Stopping)
.setSubTaskGroupType(SubTaskGroupType.StoppingNodeProcesses);
@Override
public void run() {
super.runUpdateTasks(this::runTask);
}

private void runTask() {
log.info(
"Stop Node with name {} from universe uuid={}",
taskParams().nodeName,
taskParams().getUniverseUUID());

Universe universe = getUniverse();
NodeDetails currentNode = universe.getNode(taskParams().nodeName);

preTaskActions();
List<NodeDetails> nodeList = Collections.singletonList(currentNode);

if (currentNode.isTserver) {
clearLeaderBlacklistIfAvailable(SubTaskGroupType.StoppingNodeProcesses);
}

// Update Node State to Stopping
createSetNodeStateTask(currentNode, NodeState.Stopping)
.setSubTaskGroupType(SubTaskGroupType.StoppingNodeProcesses);

taskParams().azUuid = currentNode.azUuid;
taskParams().placementUuid = currentNode.placementUuid;
boolean instanceExists = instanceExists(taskParams());
if (instanceExists) {
if (currentNode.isTserver) {
createNodePrecheckTasks(
stopProcessesOnNode(
currentNode,
EnumSet.of(ServerType.TSERVER),
SubTaskGroupType.StoppingNodeProcesses,
null);
true,
false,
SubTaskGroupType.StoppingNodeProcesses);
// Remove leader blacklist.
removeFromLeaderBlackListIfAvailable(nodeList, SubTaskGroupType.StoppingNodeProcesses);
}

taskParams().azUuid = currentNode.azUuid;
taskParams().placementUuid = currentNode.placementUuid;
boolean instanceExists = instanceExists(taskParams());
if (instanceExists) {
if (currentNode.isTserver) {
stopProcessesOnNode(
currentNode,
EnumSet.of(ServerType.TSERVER),
true,
false,
SubTaskGroupType.StoppingNodeProcesses);
// Remove leader blacklist.
removeFromLeaderBlackListIfAvailable(nodeList, SubTaskGroupType.StoppingNodeProcesses);
}

// Stop Yb-controller on this node.
if (universe.isYbcEnabled()) {
createStopYbControllerTasks(nodeList)
.setSubTaskGroupType(SubTaskGroupType.StoppingNodeProcesses);
}
}
if (currentNode.isTserver) {
// Update the per process state in YW DB.
createUpdateNodeProcessTask(taskParams().nodeName, ServerType.TSERVER, false)
// Stop Yb-controller on this node.
if (universe.isYbcEnabled()) {
createStopYbControllerTasks(nodeList)
.setSubTaskGroupType(SubTaskGroupType.StoppingNodeProcesses);
}
}
if (currentNode.isTserver) {
// Update the per process state in YW DB.
createUpdateNodeProcessTask(taskParams().nodeName, ServerType.TSERVER, false)
.setSubTaskGroupType(SubTaskGroupType.StoppingNodeProcesses);
}

createMasterReplacementTasks(
universe,
currentNode,
() -> findNewMasterIfApplicable(universe, currentNode),
instanceExists);
createMasterReplacementTasks(
universe,
currentNode,
() -> findNewMasterIfApplicable(universe, currentNode),
instanceExists);

// Update Node State to Stopped
createSetNodeStateTask(currentNode, NodeState.Stopped)
.setSubTaskGroupType(SubTaskGroupType.StoppingNode);
// Update Node State to Stopped
createSetNodeStateTask(currentNode, NodeState.Stopped)
.setSubTaskGroupType(SubTaskGroupType.StoppingNode);

// Update the swamper target file.
createSwamperTargetUpdateTask(false /* removeFile */);
// Update the swamper target file.
createSwamperTargetUpdateTask(false /* removeFile */);

// Update the DNS entry for this universe.
createDnsManipulationTask(DnsManager.DnsCommandType.Edit, false, universe)
.setSubTaskGroupType(SubTaskGroupType.StoppingNode);
// Update the DNS entry for this universe.
createDnsManipulationTask(DnsManager.DnsCommandType.Edit, false, universe)
.setSubTaskGroupType(SubTaskGroupType.StoppingNode);

// Mark universe task state to success
createMarkUniverseUpdateSuccessTasks().setSubTaskGroupType(SubTaskGroupType.StoppingNode);
// Mark universe task state to success
createMarkUniverseUpdateSuccessTasks().setSubTaskGroupType(SubTaskGroupType.StoppingNode);

getRunnableTask().runSubTasks();
} catch (Throwable t) {
log.error("Error executing task {}, error='{}'", getName(), t.getMessage(), t);
throw t;
} finally {
unlockUniverseForUpdate();
}
getRunnableTask().runSubTasks();

log.info("Finished {} task.", getName());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

import static com.yugabyte.yw.common.AssertHelper.assertJsonEqual;
import static com.yugabyte.yw.common.ModelFactory.createUniverse;
import static com.yugabyte.yw.models.TaskInfo.State.Failure;
import static com.yugabyte.yw.models.TaskInfo.State.Success;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
Expand All @@ -28,6 +28,7 @@
import com.yugabyte.yw.common.ModelFactory;
import com.yugabyte.yw.common.NodeManager;
import com.yugabyte.yw.common.PlacementInfoUtil;
import com.yugabyte.yw.common.PlatformServiceException;
import com.yugabyte.yw.common.ShellResponse;
import com.yugabyte.yw.controllers.UniverseControllerRequestBinder;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams;
Expand Down Expand Up @@ -154,9 +155,10 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {

private static final List<TaskType> STOP_NODE_TASK_SEQUENCE =
ImmutableList.of(
TaskType.CheckUnderReplicatedTablets,
TaskType.FreezeUniverse,
TaskType.ModifyBlackList,
TaskType.SetNodeState,
TaskType.CheckUnderReplicatedTablets,
TaskType.ModifyBlackList,
TaskType.WaitForLeaderBlacklistCompletion,
TaskType.AnsibleClusterServerCtl,
Expand All @@ -169,9 +171,10 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {
private static final List<JsonNode> STOP_NODE_TASK_EXPECTED_RESULTS =
ImmutableList.of(
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("process", "tserver", "command", "stop")),
Json.toJson(ImmutableMap.of()),
Expand All @@ -182,9 +185,10 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {

private static final List<TaskType> STOP_NODE_WITH_YBC_TASK_SEQUENCE =
ImmutableList.of(
TaskType.CheckUnderReplicatedTablets,
TaskType.FreezeUniverse,
TaskType.ModifyBlackList,
TaskType.SetNodeState,
TaskType.CheckUnderReplicatedTablets,
TaskType.ModifyBlackList,
TaskType.WaitForLeaderBlacklistCompletion,
TaskType.AnsibleClusterServerCtl,
Expand All @@ -198,9 +202,10 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {
private static final List<JsonNode> STOP_NODE_WITH_YBC_TASK_EXPECTED_RESULTS =
ImmutableList.of(
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("process", "tserver", "command", "stop")),
Json.toJson(ImmutableMap.of()),
Expand All @@ -212,9 +217,10 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {

private static final List<TaskType> STOP_NODE_TASK_SEQUENCE_MASTER =
ImmutableList.of(
TaskType.CheckUnderReplicatedTablets,
TaskType.FreezeUniverse,
TaskType.ModifyBlackList,
TaskType.SetNodeState,
TaskType.CheckUnderReplicatedTablets,
TaskType.ModifyBlackList,
TaskType.WaitForLeaderBlacklistCompletion,
TaskType.AnsibleClusterServerCtl,
Expand All @@ -236,9 +242,10 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {
private static final List<JsonNode> STOP_NODE_TASK_SEQUENCE_MASTER_RESULTS =
ImmutableList.of(
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("process", "tserver", "command", "stop")),
Json.toJson(ImmutableMap.of()),
Expand All @@ -258,9 +265,10 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {

private static final List<TaskType> STOP_NODE_WITH_YBC_TASK_SEQUENCE_MASTER =
ImmutableList.of(
TaskType.CheckUnderReplicatedTablets,
TaskType.FreezeUniverse,
TaskType.ModifyBlackList,
TaskType.SetNodeState,
TaskType.CheckUnderReplicatedTablets,
TaskType.ModifyBlackList,
TaskType.WaitForLeaderBlacklistCompletion,
TaskType.AnsibleClusterServerCtl,
Expand All @@ -283,9 +291,10 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {
private static final List<JsonNode> STOP_NODE_WITH_YBC_TASK_SEQUENCE_MASTER_RESULTS =
ImmutableList.of(
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("process", "tserver", "command", "stop")),
Json.toJson(ImmutableMap.of()),
Expand All @@ -306,6 +315,7 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {

private static final List<TaskType> STOP_NODE_TASK_SEQUENCE_DEDICATED_MASTER =
ImmutableList.of(
TaskType.FreezeUniverse,
TaskType.SetNodeState,
TaskType.ChangeMasterConfig,
TaskType.AnsibleClusterServerCtl,
Expand All @@ -322,6 +332,7 @@ private TaskInfo submitTask(NodeTaskParams taskParams, String nodeName) {

private static final List<JsonNode> STOP_NODE_DEDICATED_MASTER_EXPECTED_RESULTS =
ImmutableList.of(
Json.toJson(ImmutableMap.of()),
Json.toJson(ImmutableMap.of("state", "Stopping")),
Json.toJson(ImmutableMap.of("opType", "RemoveMaster")),
Json.toJson(ImmutableMap.of("process", "master", "command", "stop")),
Expand Down Expand Up @@ -557,9 +568,8 @@ public void testStopUnknownNode() {
UniverseControllerRequestBinder.deepCopy(
defaultUniverse.getUniverseDetails(), NodeTaskParams.class);
taskParams.setUniverseUUID(defaultUniverse.getUniverseUUID());
TaskInfo taskInfo = submitTask(taskParams, "host-n9");
verify(mockNodeManager, times(0)).nodeCommand(any(), any());
assertEquals(Failure, taskInfo.getTaskState());
// Throws at validateParams check.
assertThrows(PlatformServiceException.class, () -> submitTask(taskParams, "host-n9"));
}

@Test
Expand Down

0 comments on commit 5fbfa76

Please sign in to comment.