diff --git a/dkron/grpc.go b/dkron/grpc.go index f1e2e63b1..55ff64f4f 100644 --- a/dkron/grpc.go +++ b/dkron/grpc.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net" + "strings" "time" metrics "github.com/armon/go-metrics" @@ -205,8 +206,12 @@ func (grpcs *GRPCServer) ExecutionDone(ctx context.Context, execDoneReq *proto.E } // If the execution failed, retry it until retries limit (default: don't retry) + // Don't retry if the status is unknown execution := NewExecutionFromProto(&pbex) - if !execution.Success && uint(execution.Attempt) < job.Retries+1 { + if !execution.Success && + uint(execution.Attempt) < job.Retries+1 && + !strings.HasPrefix(execution.Output, ErrBrokenStream.Error()) { + // Increment the attempt counter execution.Attempt++ // Keep all execution properties intact except the last output diff --git a/dkron/grpc_client.go b/dkron/grpc_client.go index 17e4d23e4..2661b96c2 100644 --- a/dkron/grpc_client.go +++ b/dkron/grpc_client.go @@ -426,7 +426,7 @@ func (grpcc *GRPCClient) AgentRun(addr string, job *proto.Job, execution *proto. if err != nil { // At this point the execution status will be unknown, set the FinishedAt time and an explanatory message execution.FinishedAt = ptypes.TimestampNow() - execution.Output = []byte(err.Error()) + execution.Output = []byte(ErrBrokenStream.Error() + ": " + err.Error()) grpcc.logger.WithError(err).Error(ErrBrokenStream)