From 35d80878b36ba7226b9f395dcf3359a7eea89856 Mon Sep 17 00:00:00 2001 From: Rameez Sajwani Date: Thu, 23 Mar 2023 15:35:37 -0700 Subject: [PATCH] Fixing backup tests flakiness (#12655) * remove unnecessary vtorc process Signed-off-by: Rameez Sajwani * Adding vtorc disabling at right place Signed-off-by: Rameez Sajwani * Adding comments Signed-off-by: Rameez Sajwani --------- Signed-off-by: Rameez Sajwani --- .../backup/vtctlbackup/backup_utils.go | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/go/test/endtoend/backup/vtctlbackup/backup_utils.go b/go/test/endtoend/backup/vtctlbackup/backup_utils.go index 87e854b7d64..cf264c612cf 100644 --- a/go/test/endtoend/backup/vtctlbackup/backup_utils.go +++ b/go/test/endtoend/backup/vtctlbackup/backup_utils.go @@ -387,6 +387,14 @@ type restoreMethod func(t *testing.T, tablet *cluster.Vttablet) // 13. verify that don't have the data added after the first backup // 14. remove the backups func primaryBackup(t *testing.T) { + // Having the VTOrc in this test causes a lot of flakiness. For example when we delete the tablet `replica2` which + // is the current primary and then try to restore from backup the old primary (`primary.Alias`), but before that sometimes the VTOrc + // promotes the `replica1` to primary right after we delete the replica2 (current primary). + // This can result in unexpected behavior. Therefore, disabling the VTOrc in this test to remove flakiness. + localCluster.DisableVTOrcRecoveries(t) + defer func() { + localCluster.EnableVTOrcRecoveries(t) + }() verifyInitialReplication(t) output, err := localCluster.VtctlclientProcess.ExecuteCommandWithOutput("Backup", primary.Alias) @@ -770,15 +778,9 @@ func terminatedRestore(t *testing.T) { // Args: // tablet_type: 'replica' or 'rdonly'. func vtctlBackup(t *testing.T, tabletType string) { - // Start vtorc before running backups - vtorcProcess := localCluster.NewVTOrcProcess(cluster.VTOrcConfiguration{}) - err := vtorcProcess.Setup() - require.NoError(t, err) - localCluster.VTOrcProcesses = append(localCluster.VTOrcProcesses, vtorcProcess) - // StopReplication on replica1. We verify that the replication works fine later in // verifyInitialReplication. So this will also check that VTOrc is running. - err = localCluster.VtctlclientProcess.ExecuteCommand("StopReplication", replica1.Alias) + err := localCluster.VtctlclientProcess.ExecuteCommand("StopReplication", replica1.Alias) require.Nil(t, err) verifyInitialReplication(t) @@ -797,12 +799,6 @@ func vtctlBackup(t *testing.T, tabletType string) { cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 2) verifyAfterRemovingBackupNoBackupShouldBePresent(t, backups) - - // Stop VTOrc - err = localCluster.VTOrcProcesses[0].TearDown() - localCluster.VTOrcProcesses = nil - require.NoError(t, err) - err = replica2.VttabletProcess.TearDown() require.Nil(t, err)