Skip to content

Commit

Permalink
[PLAT-13282] Support manually decommissioning a node from the on-prem…
Browse files Browse the repository at this point in the history
… provider

Summary:
Allow user to manually decommission a node from `FREE` to `DECOMMISSIONED` state. This allows user to skip picking up the nodes when a new universe is created. A node placed this way will skip the node cleanup when user tries to recommission a node.

Endpoint call is as follows:

```
curl --location --request PUT  '<YBA_base_url>:9000/api/v1/customers/<customer_uuid>/providers/<provider_uuid>/instances/<node_instance_ip>/state' \
--header 'X-AUTH-YW-API-TOKEN: <api-token>' \
--header 'Content-Type: application/json' \
--data '{
    "state": "DECOMMISSIONED"

}'
```

Also, add improvement to prevent race condition between task submitting and finishing before customer task object is saved to YBA db.

Test Plan:
Added UTs for all below scenarios:

1. User has a node in FREE state and calls the endpoint to set the node instance to `DECOMMISSIONED` state. Validate that the `manually_decommissioned` bit is set as true. If user calls the same endpoint but with state as `FREE` state, we should see the node instance move to `FREE` state with the `manually_decommissioned` bit in YBA db set to false. Also, validate that no clean up is done.

2. Validate original behavior of node being placed in decommissioned state if node cleanup fails and that the `manually_decommissioned` bit is set to false. Also, if we call the endpoint to set the node to `FREE` state, it will run the clean up.

Reviewers: nsingh, sanketh, hzare, rmadhavan

Reviewed By: nsingh

Subscribers: sanketh, yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D39747
  • Loading branch information
charleswang234 committed Nov 19, 2024
1 parent e8c9e7d commit ae0af56
Show file tree
Hide file tree
Showing 14 changed files with 356 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import com.yugabyte.yw.models.TaskInfo;
import com.yugabyte.yw.models.Universe;
import com.yugabyte.yw.models.helpers.TaskType;
import io.ebean.annotation.Transactional;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
Expand All @@ -44,6 +45,7 @@
import java.util.concurrent.ThreadFactory;
import java.util.function.Consumer;
import java.util.function.Predicate;
import javax.annotation.Nullable;
import lombok.extern.slf4j.Slf4j;
import org.slf4j.MDC;
import play.inject.ApplicationLifecycle;
Expand Down Expand Up @@ -124,6 +126,22 @@ public UUID submit(TaskType taskType, ITaskParams taskParams) {
* @param taskUUID the task UUID
*/
public UUID submit(TaskType taskType, ITaskParams taskParams, UUID taskUUID) {
return submit(taskType, taskParams, taskUUID, null);
}

/**
* Creates a new task runnable to run the required task, and submits it to the TaskExecutor.
*
* @param taskType the task type.
* @param taskParams the task parameters.
* @param taskUUID the task UUID
* @param preTaskSubmitWork function to run before task submission
*/
public UUID submit(
TaskType taskType,
ITaskParams taskParams,
UUID taskUUID,
@Nullable Consumer<RunnableTask> preTaskSubmitWork) {
RunnableTask taskRunnable = null;
try {
if (runtimeConfGetter.getGlobalConf(
Expand All @@ -135,7 +153,7 @@ public UUID submit(TaskType taskType, ITaskParams taskParams, UUID taskUUID) {
"Executing TaskType {} with params {}", taskType.toString(), redactedJson.toString());
}
// Create the task runnable object based on the various parameters passed in.
taskRunnable = taskExecutor.createRunnableTask(taskType, taskParams, taskUUID);
taskRunnable = createRunnableTask(taskType, taskParams, taskUUID, preTaskSubmitWork);
// Add the consumer to handle before task if available.
taskRunnable.setTaskExecutionListener(getTaskExecutionListener());
onTaskCreated(taskRunnable, taskParams);
Expand All @@ -147,7 +165,7 @@ public UUID submit(TaskType taskType, ITaskParams taskParams, UUID taskUUID) {
TaskInfo taskInfo = taskRunnable.getTaskInfo();
if (taskInfo.getTaskState() != TaskInfo.State.Failure) {
taskInfo.setTaskState(TaskInfo.State.Failure);
taskInfo.save();
taskInfo.update();
}
}
String msg = "Error processing " + taskType + " task for " + taskParams.toString();
Expand All @@ -159,6 +177,20 @@ public UUID submit(TaskType taskType, ITaskParams taskParams, UUID taskUUID) {
}
}

@Transactional
private RunnableTask createRunnableTask(
TaskType taskType,
ITaskParams taskParams,
UUID taskUUID,
@Nullable Consumer<RunnableTask> preTaskSubmitWork) {
// Create the task runnable object based on the various parameters passed in.
RunnableTask taskRunnable = taskExecutor.createRunnableTask(taskType, taskParams, taskUUID);
if (preTaskSubmitWork != null) {
preTaskSubmitWork.accept(taskRunnable);
}
return taskRunnable;
}

private void onTaskCreated(RunnableTask taskRunnable, ITaskParams taskParams) {
providerEditRestrictionManager.onTaskCreated(
taskRunnable.getTaskUUID(), taskRunnable.getTask(), taskParams);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright (c) YugaByte, Inc.

package com.yugabyte.yw.commissioner.tasks;

import com.yugabyte.yw.commissioner.AbstractTaskBase;
import com.yugabyte.yw.commissioner.BaseTaskDependencies;
import com.yugabyte.yw.commissioner.tasks.params.DetachedNodeTaskParams;
import com.yugabyte.yw.models.NodeInstance;
import com.yugabyte.yw.models.NodeInstance.State;
import javax.inject.Inject;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class DecommissionNodeInstance extends AbstractTaskBase {

@Inject
protected DecommissionNodeInstance(BaseTaskDependencies baseTaskDependencies) {
super(baseTaskDependencies);
}

@Override
protected DetachedNodeTaskParams taskParams() {
return (DetachedNodeTaskParams) taskParams;
}

@Override
public void run() {
NodeInstance nodeInstance = NodeInstance.getOrBadRequest(taskParams().getNodeUuid());

if (nodeInstance.getState() != NodeInstance.State.FREE) {
throw new RuntimeException(
String.format(
"Node instance %s in %s state cannot be manually decommissioned. Node instance must"
+ " be in %s state to be recommissioned.",
nodeInstance.getNodeUuid(), nodeInstance.getState(), NodeInstance.State.FREE));
}

nodeInstance.setState(State.DECOMMISSIONED);
nodeInstance.setManuallyDecommissioned(true);
nodeInstance.update();
log.debug(
"Successfully set node instance {} to {} state",
nodeInstance.getNodeUuid(),
State.DECOMMISSIONED);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,34 @@ protected DetachedNodeTaskParams taskParams() {
public void run() {
NodeInstance nodeInstance = NodeInstance.getOrBadRequest(taskParams().getNodeUuid());

try {
ShellResponse response =
nodeManager
.detachedNodeCommand(NodeManager.NodeCommandType.Destroy, taskParams())
.processErrors();
} catch (Exception e) {
log.error("Clean up failed for node instance: {}", nodeInstance.getNodeUuid(), e);
throw e;
if (nodeInstance.getState() != NodeInstance.State.DECOMMISSIONED) {
throw new RuntimeException(
String.format(
"Node instance %s in %s state cannot be recommissioned. Node instance must be in %s"
+ " state to be recommissioned.",
nodeInstance.getNodeUuid(),
nodeInstance.getState(),
NodeInstance.State.DECOMMISSIONED));
}

if (!nodeInstance.isManuallyDecommissioned()) {
log.debug("Cleaning up node instance {}", nodeInstance.getNodeUuid());
try {
ShellResponse response =
nodeManager
.detachedNodeCommand(NodeManager.NodeCommandType.Destroy, taskParams())
.processErrors();
} catch (Exception e) {
log.error("Clean up failed for node instance: {}", nodeInstance.getNodeUuid(), e);
throw e;
}
log.debug("Successfully cleaned up node instance: {}", nodeInstance.getNodeUuid());
} else {
log.debug(
"Skipping clean up node instance {} as node instance was manually decommissioned by user",
nodeInstance.getNodeUuid());
}

log.debug("Successfully cleaned up node instance: {}", nodeInstance.getNodeUuid());
nodeInstance.clearNodeDetails();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -592,15 +592,7 @@ public Result updateState(
throw new PlatformServiceException(
CONFLICT, "Node " + node.getNodeUuid() + " has incomplete tasks");
}
UUID taskUUID = nodeInstanceHandler.updateState(payload, node, provider);

CustomerTask.create(
c,
node.getNodeUuid(),
taskUUID,
CustomerTask.TargetType.Node,
CustomerTask.TaskType.Update,
node.getNodeName());
UUID taskUUID = nodeInstanceHandler.updateState(payload, node, provider, c);

auditService()
.createAuditEntryWithReqBody(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
import static play.mvc.Http.Status.BAD_REQUEST;

import com.yugabyte.yw.commissioner.Commissioner;
import com.yugabyte.yw.commissioner.TaskExecutor.RunnableTask;
import com.yugabyte.yw.commissioner.tasks.params.DetachedNodeTaskParams;
import com.yugabyte.yw.common.PlatformServiceException;
import com.yugabyte.yw.forms.NodeInstanceStateFormData;
import com.yugabyte.yw.models.Customer;
import com.yugabyte.yw.models.CustomerTask;
import com.yugabyte.yw.models.NodeInstance;
import com.yugabyte.yw.models.Provider;
import com.yugabyte.yw.models.helpers.TaskType;
import java.util.UUID;
import java.util.function.Consumer;
import javax.inject.Inject;
import javax.inject.Singleton;

Expand All @@ -26,8 +30,18 @@ public NodeInstanceHandler(Commissioner commissioner) {
}

public UUID updateState(
NodeInstanceStateFormData payload, NodeInstance nodeInstance, Provider provider) {
NodeInstanceStateFormData payload, NodeInstance nodeInstance, Provider provider, Customer c) {
NodeInstance.State nodeState = nodeInstance.getState();
Consumer<RunnableTask> customerTaskCreation =
runnableTask -> {
CustomerTask.create(
c,
nodeInstance.getNodeUuid(),
runnableTask.getTaskUUID(),
CustomerTask.TargetType.Node,
CustomerTask.TaskType.Update,
nodeInstance.getNodeName());
};

// Decommissioned -> Free.
if (nodeState == NodeInstance.State.DECOMMISSIONED
Expand All @@ -36,7 +50,16 @@ public UUID updateState(
taskParams.setNodeUuid(nodeInstance.getNodeUuid());
taskParams.setInstanceType(nodeInstance.getInstanceTypeCode());
taskParams.setAzUuid(nodeInstance.getZoneUuid());
return commissioner.submit(TaskType.RecommissionNodeInstance, taskParams);
return commissioner.submit(
TaskType.RecommissionNodeInstance, taskParams, null, customerTaskCreation);
} else if (nodeState == NodeInstance.State.FREE
&& payload.state == NodeInstance.State.DECOMMISSIONED) {
DetachedNodeTaskParams taskParams = new DetachedNodeTaskParams();
taskParams.setNodeUuid(nodeInstance.getNodeUuid());
taskParams.setInstanceType(nodeInstance.getInstanceTypeCode());
taskParams.setAzUuid(nodeInstance.getZoneUuid());
return commissioner.submit(
TaskType.DecommissionNodeInstance, taskParams, null, customerTaskCreation);
}

throw new PlatformServiceException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ public enum State {
@Enumerated(EnumType.STRING)
private State state;

@Column(nullable = false)
@ApiModelProperty(value = "Manually set to decommissioned state by user", accessMode = READ_ONLY)
private boolean manuallyDecommissioned;

@DbJson @JsonIgnore private UniverseMetadata universeMetadata;

@Getter(AccessLevel.NONE)
Expand Down Expand Up @@ -181,6 +185,7 @@ public void clearNodeDetails() {
this.setState(State.FREE);
this.setNodeName("");
this.universeMetadata = null;
this.setManuallyDecommissioned(false);
this.save();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,11 @@ public enum TaskType {
CustomerTask.TaskType.Update,
CustomerTask.TargetType.Node),

DecommissionNodeInstance(
com.yugabyte.yw.commissioner.tasks.DecommissionNodeInstance.class,
CustomerTask.TaskType.Update,
CustomerTask.TargetType.Node),

MasterFailover(
com.yugabyte.yw.commissioner.tasks.MasterFailover.class,
CustomerTask.TaskType.MasterFailover,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Copyright (c) YugaByte, Inc.

ALTER TABLE IF EXISTS node_instance ADD COLUMN if NOT EXISTS manually_decommissioned boolean DEFAULT false NOT NULL;
Loading

0 comments on commit ae0af56

Please sign in to comment.