Skip to content

Commit

Permalink
Switch non interruptible @ unified retry behavior (#610)
Browse files Browse the repository at this point in the history
Signed-off-by: Dennis Keck <[email protected]>
  • Loading branch information
fellhorn authored Sep 14, 2023
1 parent c556d22 commit 9d253cd
Showing 1 changed file with 20 additions and 4 deletions.
24 changes: 20 additions & 4 deletions flytepropeller/pkg/controller/nodes/node_exec_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/io"
"github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/ioutils"
"github.com/flyteorg/flytepropeller/pkg/controller/config"

"github.com/flyteorg/flytepropeller/events"
eventsErr "github.com/flyteorg/flytepropeller/events/errors"
Expand Down Expand Up @@ -286,10 +287,25 @@ func (c *nodeExecutor) BuildNodeExecutionContext(ctx context.Context, executionC

s := nl.GetNodeExecutionStatus(ctx, currentNodeID)

// a node is not considered interruptible if the system failures have exceeded the configured threshold
if interruptible && s.GetSystemFailures() >= c.interruptibleFailureThreshold {
interruptible = false
c.metrics.InterruptedThresholdHit.Inc(ctx)
if config.GetConfig().NodeConfig.IgnoreRetryCause {
// For the unified retry behavior we execute the last interruptibleFailureThreshold attempts on a non
// interruptible machine
currentAttempt := s.GetAttempts() + 1 + s.GetSystemFailures()
maxAttempts := uint32(config.GetConfig().NodeConfig.DefaultMaxAttempts)
if n.GetRetryStrategy() != nil && n.GetRetryStrategy().MinAttempts != nil && *n.GetRetryStrategy().MinAttempts != 0 {
maxAttempts = uint32(*n.GetRetryStrategy().MinAttempts)
}

if interruptible && currentAttempt >= maxAttempts-c.interruptibleFailureThreshold {
interruptible = false
c.metrics.InterruptedThresholdHit.Inc(ctx)
}
} else {
// Else a node is not considered interruptible if the system failures have exceeded the configured threshold
if interruptible && s.GetSystemFailures() >= c.interruptibleFailureThreshold {
interruptible = false
c.metrics.InterruptedThresholdHit.Inc(ctx)
}
}

rawOutputPrefix := c.defaultDataSandbox
Expand Down

0 comments on commit 9d253cd

Please sign in to comment.