Skip to content

Commit

Permalink
feat(orca/clouddriver): make waitOnJobCompletion retries configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
apoorv-mahajan authored and kirangodishala committed Sep 27, 2024
1 parent 1d1b42d commit 4cef464
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,11 @@ public class WaitOnJobCompletion implements CloudProviderAware, OverridableTimeo
InputStream jobStream
retrySupport.retry({
jobStream = katoRestService.collectJob(appName, account, location, name).body.in()
}, 6, 5000, false) // retry for 30 seconds
},
configProperties.getJobStatusRetry().maxAttempts,
Duration.ofMillis(configProperties.getJobStatusRetry().getBackOffInMs()),
configProperties.getJobStatusRetry().exponentialBackoffEnabled
)
Map job = objectMapper.readValue(jobStream, new TypeReference<Map>() {})
outputs.jobStatus = job

Expand All @@ -169,7 +173,11 @@ public class WaitOnJobCompletion implements CloudProviderAware, OverridableTimeo
try {
retrySupport.retry({
properties = katoRestService.getFileContents(appName, account, location, name, stage.context.propertyFile)
}, 6, 5000, false) // retry for 30 seconds
},
configProperties.getFileContentRetry().maxAttempts,
Duration.ofMillis(configProperties.getFileContentRetry().getBackOffInMs()),
configProperties.getFileContentRetry().exponentialBackoffEnabled
)
} catch (Exception e) {
if (status == ExecutionStatus.SUCCEEDED) {
throw new ConfigurationException("Property File: ${stage.context.propertyFile} contents could not be retrieved. Error: " + e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ public static class WaitOnJobCompletionTaskConfig {
* Default or empty set means that no keys will be excluded.
*/
private Set<String> excludeKeysFromOutputs = Set.of();

private Retries jobStatusRetry = new Retries();

private Retries fileContentRetry = new Retries();

@Data
public static class Retries {
// total number of attempts
int maxAttempts = 6;

// time in ms to wait before subsequent retry attempts
long backOffInMs = 5000;

// flag to enable exponential backoff
boolean exponentialBackoffEnabled = false;
}
}

@Data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,22 @@ public final class WaitOnJobCompletionTest {
public void setup() {
objectMapper = new ObjectMapper();
RetrySupport retrySupport = new RetrySupport();
configProperties = new TaskConfigurationProperties();
configProperties
.getWaitOnJobCompletionTask()
.setExcludeKeysFromOutputs(Set.of("completionDetails"));
mockKatoRestService = mock(KatoRestService.class);
JobUtils mockJobUtils = mock(JobUtils.class);
mockExecutionRepository = mock(ExecutionRepository.class);
mockFront50Service = mock(Front50Service.class);

configProperties = new TaskConfigurationProperties();
TaskConfigurationProperties.WaitOnJobCompletionTaskConfig.Retries retries =
new TaskConfigurationProperties.WaitOnJobCompletionTaskConfig.Retries();
retries.setMaxAttempts(3);
retries.setBackOffInMs(1);
configProperties.getWaitOnJobCompletionTask().setFileContentRetry(retries);
configProperties.getWaitOnJobCompletionTask().setJobStatusRetry(retries);
configProperties
.getWaitOnJobCompletionTask()
.setExcludeKeysFromOutputs(Set.of("completionDetails"));

task =
new WaitOnJobCompletion(
mockKatoRestService,
Expand Down Expand Up @@ -289,8 +296,13 @@ void testPropertyFileContentsErrorHandlingForASuccessfulK8sRunJob() throws IOExc
verify(mockKatoRestService, times(1))
.collectJob(eq("test-app"), eq("test-account"), eq("test"), eq("job testrep"));

// since there are 6 tries made for this call if it fails
verify(mockKatoRestService, times(6))
verify(
mockKatoRestService,
times(
configProperties
.getWaitOnJobCompletionTask()
.getFileContentRetry()
.getMaxAttempts()))
.getFileContents(
eq("test-app"), eq("test-account"), eq("test"), eq("job testrep"), eq("testrep"));

Expand Down Expand Up @@ -448,10 +460,13 @@ void testPropertyFileContentsErrorHandlingForK8sRunJobFailures() throws IOExcept
verify(mockKatoRestService, times(1))
.collectJob(eq("test-app"), eq("test-account"), eq("test"), eq("job testrep"));

// since there are 6 tries made for this call if it fails - this is a slow call since retry
// config options are
// hard-coded
verify(mockKatoRestService, times(6))
verify(
mockKatoRestService,
times(
configProperties
.getWaitOnJobCompletionTask()
.getFileContentRetry()
.getMaxAttempts()))
.getFileContents(
eq("test-app"), eq("test-account"), eq("test"), eq("job testrep"), eq("testrep"));

Expand Down

0 comments on commit 4cef464

Please sign in to comment.