-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[BACKPORT 2.23.0.3690][PLAT-14846] Add pause/resume dr config along w…
…ith associated universes workflow Summary: Original commit: f410277 / D37598 Add 2 APIs to allow both pausing/resuming DR config + pausing/resuming underlying VMs. To pause DR replication + pause both source and target universes: ``` curl --location --request POST 'http://localhost:9000/api/v1/customers/{customer_uuid}/dr_configs/{dr_config_uuid}/pause_universes' \ --header 'X-AUTH-YW-API-TOKEN: fef7705f-950f-42ae-b8e2-10173157fe8b' ``` To resume DR replicaiton + resume both source and target universes: ``` curl --location --request POST 'http://localhost:9000/api/v1/customers/f33e3c9b-75ab-4c30-80ad-cba85646ea39/dr_configs/1a4990cb-3ea3-4c8b-ae0d-2d637cfd5c75/resume_universes' \ --header 'X-AUTH-YW-API-TOKEN: fef7705f-950f-42ae-b8e2-10173157fe8b' ``` During `pause_universes` we will set the gflag for tserver `log_min_seconds_to_retain` to a very high value both in memory and written to conf file before pausing replication and pausing the universes. During `resume_universes` we will resume/start the universes, wait for replication drain, and then revert the `log_min_seconds_to_retain` gflag from the tserver to it's original value both in memory and in the tserver conf file. Test Plan: Added both UTs + local provider test. Manually tested the following scenario: 1. Set up db scoped DR replication. Set the `log_min_seconds_to_retain` to a random value. 2. Run the `pause_universes` api endpoint. During the task run, check that the tserver conf file has the `log_min_seconds_to_retain` set to high value and that replication is disabled 3. Run `resume_universes`. Make sure replication is enabled again, replication drain subtask is generated, and the tserver conf file has `log_min_seconds_to_retain` set back to original override. Also check the tserver UI for gflags and make sure it is set to the original override also. 4. Run some inserts into tables on the source universe and make sure replicated on target universe. Reviewers: hzare, sanketh, jmak, spothuraju, amindrov Reviewed By: sanketh Subscribers: svc_phabricator, yugaware, anijhawan Differential Revision: https://phorge.dev.yugabyte.com/D38840
- Loading branch information
1 parent
9b82e22
commit 78485cc
Showing
17 changed files
with
1,043 additions
and
244 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 73 additions & 0 deletions
73
managed/src/main/java/com/yugabyte/yw/commissioner/tasks/PauseXClusterUniverses.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package com.yugabyte.yw.commissioner.tasks; | ||
|
||
import com.yugabyte.yw.commissioner.BaseTaskDependencies; | ||
import com.yugabyte.yw.commissioner.UserTaskDetails.SubTaskGroupType; | ||
import com.yugabyte.yw.common.XClusterUniverseService; | ||
import com.yugabyte.yw.models.Customer; | ||
import com.yugabyte.yw.models.Universe; | ||
import com.yugabyte.yw.models.XClusterConfig; | ||
import javax.inject.Inject; | ||
import lombok.extern.slf4j.Slf4j; | ||
|
||
@Slf4j | ||
public class PauseXClusterUniverses extends XClusterConfigTaskBase { | ||
|
||
@Inject | ||
protected PauseXClusterUniverses( | ||
BaseTaskDependencies baseTaskDependencies, XClusterUniverseService xClusterUniverseService) { | ||
super(baseTaskDependencies, xClusterUniverseService); | ||
} | ||
|
||
@Override | ||
public void run() { | ||
log.info("Running {}", getName()); | ||
|
||
XClusterConfig xClusterConfig = getXClusterConfigFromTaskParams(); | ||
Universe sourceUniverse = Universe.getOrBadRequest(xClusterConfig.getSourceUniverseUUID()); | ||
Universe targetUniverse = Universe.getOrBadRequest(xClusterConfig.getTargetUniverseUUID()); | ||
|
||
try { | ||
// Lock the source universe. | ||
lockAndFreezeUniverseForUpdate( | ||
sourceUniverse.getUniverseUUID(), sourceUniverse.getVersion(), null /* Txn callback */); | ||
try { | ||
// Lock the target universe. | ||
lockAndFreezeUniverseForUpdate( | ||
targetUniverse.getUniverseUUID(), targetUniverse.getVersion(), null /* Txn callback */); | ||
|
||
// Used in createUpdateWalRetentionTasks. | ||
taskParams().setUniverseUUID(sourceUniverse.getUniverseUUID()); | ||
taskParams().clusters = sourceUniverse.getUniverseDetails().clusters; | ||
createUpdateWalRetentionTasks(sourceUniverse, XClusterUniverseAction.PAUSE); | ||
|
||
createSetReplicationPausedTask(xClusterConfig, true /* pause */); | ||
|
||
createPauseUniverseTasks( | ||
sourceUniverse, Customer.get(sourceUniverse.getCustomerId()).getUuid()); | ||
|
||
taskParams().setUniverseUUID(targetUniverse.getUniverseUUID()); | ||
createPauseUniverseTasks( | ||
targetUniverse, Customer.get(targetUniverse.getCustomerId()).getUuid()); | ||
|
||
createMarkUniverseUpdateSuccessTasks(sourceUniverse.getUniverseUUID()) | ||
.setSubTaskGroupType(SubTaskGroupType.PauseUniverse); | ||
|
||
createMarkUniverseUpdateSuccessTasks(targetUniverse.getUniverseUUID()) | ||
.setSubTaskGroupType(SubTaskGroupType.PauseUniverse); | ||
|
||
getRunnableTask().runSubTasks(); | ||
|
||
} finally { | ||
// Unlock the target universe. | ||
unlockUniverseForUpdate(targetUniverse.getUniverseUUID()); | ||
} | ||
} catch (Throwable t) { | ||
log.error("{} hit error : {}", getName(), t.getMessage()); | ||
throw t; | ||
} finally { | ||
// Unlock the source universe. | ||
unlockUniverseForUpdate(sourceUniverse.getUniverseUUID()); | ||
} | ||
log.info("Completed {}", getName()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
managed/src/main/java/com/yugabyte/yw/commissioner/tasks/ResumeXClusterUniverses.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
// Copyright (c) YugaByte, Inc. | ||
|
||
package com.yugabyte.yw.commissioner.tasks; | ||
|
||
import com.yugabyte.yw.commissioner.BaseTaskDependencies; | ||
import com.yugabyte.yw.commissioner.UserTaskDetails.SubTaskGroupType; | ||
import com.yugabyte.yw.common.XClusterUniverseService; | ||
import com.yugabyte.yw.models.Customer; | ||
import com.yugabyte.yw.models.Universe; | ||
import com.yugabyte.yw.models.XClusterConfig; | ||
import javax.inject.Inject; | ||
import lombok.extern.slf4j.Slf4j; | ||
|
||
@Slf4j | ||
public class ResumeXClusterUniverses extends XClusterConfigTaskBase { | ||
|
||
@Inject | ||
protected ResumeXClusterUniverses( | ||
BaseTaskDependencies baseTaskDependencies, XClusterUniverseService xClusterUniverseService) { | ||
super(baseTaskDependencies, xClusterUniverseService); | ||
} | ||
|
||
@Override | ||
public void run() { | ||
log.info("Running {}", getName()); | ||
|
||
XClusterConfig xClusterConfig = getXClusterConfigFromTaskParams(); | ||
Universe sourceUniverse = Universe.getOrBadRequest(xClusterConfig.getSourceUniverseUUID()); | ||
Universe targetUniverse = Universe.getOrBadRequest(xClusterConfig.getTargetUniverseUUID()); | ||
|
||
try { | ||
// Lock the source universe. | ||
lockAndFreezeUniverseForUpdate( | ||
sourceUniverse.getUniverseUUID(), sourceUniverse.getVersion(), null /* Txn callback */); | ||
try { | ||
// Lock the target universe. | ||
lockAndFreezeUniverseForUpdate( | ||
targetUniverse.getUniverseUUID(), targetUniverse.getVersion(), null /* Txn callback */); | ||
|
||
taskParams().setUniverseUUID(sourceUniverse.getUniverseUUID()); | ||
taskParams().clusters = sourceUniverse.getUniverseDetails().clusters; | ||
createResumeUniverseTasks( | ||
sourceUniverse, Customer.get(sourceUniverse.getCustomerId()).getUuid()); | ||
|
||
taskParams().setUniverseUUID(targetUniverse.getUniverseUUID()); | ||
taskParams().clusters = targetUniverse.getUniverseDetails().clusters; | ||
createResumeUniverseTasks( | ||
targetUniverse, Customer.get(targetUniverse.getCustomerId()).getUuid()); | ||
|
||
createSetReplicationPausedTask(xClusterConfig, false /* pause */); | ||
createWaitForReplicationDrainTask(xClusterConfig); | ||
|
||
// Used in createUpdateWalRetentionTasks. | ||
taskParams().setUniverseUUID(sourceUniverse.getUniverseUUID()); | ||
taskParams().clusters = sourceUniverse.getUniverseDetails().clusters; | ||
createUpdateWalRetentionTasks(sourceUniverse, XClusterUniverseAction.RESUME); | ||
|
||
createMarkUniverseUpdateSuccessTasks(targetUniverse.getUniverseUUID()) | ||
.setSubTaskGroupType(SubTaskGroupType.ResumeUniverse); | ||
|
||
createMarkUniverseUpdateSuccessTasks(sourceUniverse.getUniverseUUID()) | ||
.setSubTaskGroupType(SubTaskGroupType.ResumeUniverse); | ||
|
||
getRunnableTask().runSubTasks(); | ||
} finally { | ||
// Unlock the target universe. | ||
unlockUniverseForUpdate(targetUniverse.getUniverseUUID()); | ||
} | ||
} catch (Throwable t) { | ||
log.error("{} hit error : {}", getName(), t.getMessage()); | ||
throw t; | ||
} finally { | ||
// Unlock the source universe. | ||
unlockUniverseForUpdate(sourceUniverse.getUniverseUUID()); | ||
} | ||
log.info("Completed {}", getName()); | ||
} | ||
} |
Oops, something went wrong.