diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f5a1f09c23..79e4482e537 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ BUG FIXES: * core: Fixed a bug where new leader may take a long time until it can process requests [[GH-8036](https://github.com/hashicorp/nomad/issues/8036)] * core: Fixed a bug where an internal metadata, ClusterID, may not be initialized properly upon a slow server upgrade [[GH-8078](https://github.com/hashicorp/nomad/issues/8078)]. * api: Fixed a bug where setting connect sidecar task resources could fail [[GH-7993](https://github.com/hashicorp/nomad/issues/7993)] + * core: Fixed a bug where stop_after_client_disconnect could cause the server to become unresponsive [[GH-8098](https://github.com/hashicorp/nomad/issues/8098) * client: Fixed a bug where artifact downloads failed on redirects [[GH-7854](https://github.com/hashicorp/nomad/issues/7854)] * csi: Validate empty volume arguments in API. [[GH-8027](https://github.com/hashicorp/nomad/issues/8027)] diff --git a/client/heartbeatstop.go b/client/heartbeatstop.go index 7d9570e7d07..a3e57b10e1f 100644 --- a/client/heartbeatstop.go +++ b/client/heartbeatstop.go @@ -60,7 +60,7 @@ func (h *heartbeatStop) shouldStop(alloc *structs.Allocation) bool { func (h *heartbeatStop) shouldStopAfter(now time.Time, interval time.Duration) bool { lastOk := h.getLastOk() if lastOk.IsZero() { - return h.startupGrace.After(now) + return now.After(h.startupGrace) } return now.After(lastOk.Add(interval)) } diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 9b35a39ba2f..53e2d142cf1 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -474,7 +474,7 @@ func evaluatePlanPlacements(pool *EvaluatePool, snap *state.StateSnapshot, plan if !fit { // Log the reason why the node's allocations could not be made if reason != "" { - logger.Debug("plan for node rejected", "node_id", nodeID, "reason", reason) + logger.Debug("plan for node rejected", "node_id", nodeID, "reason", reason, "eval_id", plan.EvalID) } // Set that this is a partial commit partialCommit = true diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 6e42c297ac9..d69c7211314 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -87,6 +87,8 @@ type GenericScheduler struct { ctx *EvalContext stack *GenericStack + // followUpEvals are evals with WaitUntil set, which are delayed until that time + // before being rescheduled followUpEvals []*structs.Evaluation deployment *structs.Deployment @@ -258,11 +260,13 @@ func (s *GenericScheduler) process() (bool, error) { // If there are failed allocations, we need to create a blocked evaluation // to place the failed allocations when resources become available. If the - // current evaluation is already a blocked eval, we reuse it by submitting - // a new eval to the planner in createBlockedEval. If the current eval is - // pending with WaitUntil set, it's delayed rather than blocked. + // current evaluation is already a blocked eval, we reuse it. If not, submit + // a new eval to the planner in createBlockedEval. If rescheduling should + // be delayed, do that instead. + delayInstead := len(s.followUpEvals) > 0 && s.eval.WaitUntil.IsZero() + if s.eval.Status != structs.EvalStatusBlocked && len(s.failedTGAllocs) != 0 && s.blocked == nil && - s.eval.WaitUntil.IsZero() { + !delayInstead { if err := s.createBlockedEval(false); err != nil { s.logger.Error("failed to make blocked eval", "error", err) return false, err @@ -276,8 +280,9 @@ func (s *GenericScheduler) process() (bool, error) { return true, nil } - // Create follow up evals for any delayed reschedule eligible allocations - if len(s.followUpEvals) > 0 { + // Create follow up evals for any delayed reschedule eligible allocations, except in + // the case that this evaluation was already delayed. + if delayInstead { for _, eval := range s.followUpEvals { eval.PreviousEval = s.eval.ID // TODO(preetha) this should be batching evals before inserting them diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 779c3881918..95fc1a16394 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -2846,8 +2846,8 @@ func TestServiceSched_StopAfterClientDisconnect(t *testing.T) { require.Equal(t, h.Evals[0].Status, structs.EvalStatusComplete) require.Len(t, h.Plans, 1, "plan") - // Followup eval created - require.True(t, len(h.CreateEvals) > 0) + // One followup eval created, either delayed or blocked + require.Len(t, h.CreateEvals, 1) e := h.CreateEvals[0] require.Equal(t, eval.ID, e.PreviousEval) diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index a3f79180606..45d5debf79d 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -355,18 +355,12 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // Find delays for any lost allocs that have stop_after_client_disconnect lostLater := lost.delayByStopAfterClientDisconnect() - rescheduleLater = append(rescheduleLater, lostLater...) + a.handleDelayedLost(lostLater, all, tg.Name) // Create batched follow up evaluations for allocations that are // reschedulable later and mark the allocations for in place updating a.handleDelayedReschedules(rescheduleLater, all, tg.Name) - // Allocs that are lost and delayed have an attributeUpdate that correctly links to - // the eval, but incorrectly has the current (running) status - for _, d := range lostLater { - a.result.attributeUpdates[d.allocID].SetStop(structs.AllocClientStatusLost, structs.AllocClientStatusLost) - } - // Create a structure for choosing names. Seed with the taken names which is // the union of untainted and migrating nodes (includes canaries) nameIndex := newAllocNameIndex(a.jobID, group, tg.Count, untainted.union(migrate, rescheduleNow)) @@ -423,7 +417,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // * The deployment is not paused or failed // * Not placing any canaries // * If there are any canaries that they have been promoted - // * There is no delayed stop_after_client_disconnect alloc + // * There is no delayed stop_after_client_disconnect alloc, which delays scheduling for the whole group var place []allocPlaceResult if len(lostLater) == 0 { place = a.computePlacements(tg, nameIndex, untainted, migrate, rescheduleNow) @@ -845,6 +839,17 @@ func (a *allocReconciler) computeUpdates(group *structs.TaskGroup, untainted all // handleDelayedReschedules creates batched followup evaluations with the WaitUntil field set // for allocations that are eligible to be rescheduled later func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) { + a.handleDelayedReschedulesImpl(rescheduleLater, all, tgName, true) +} + +// handleDelayedLost creates batched followup evaluations with the WaitUntil field set for lost allocations +func (a *allocReconciler) handleDelayedLost(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string) { + a.handleDelayedReschedulesImpl(rescheduleLater, all, tgName, false) +} + +// handleDelayedReschedulesImpl creates batched followup evaluations with the WaitUntil field set +func (a *allocReconciler) handleDelayedReschedulesImpl(rescheduleLater []*delayedRescheduleInfo, all allocSet, tgName string, + createUpdates bool) { if len(rescheduleLater) == 0 { return } @@ -905,10 +910,12 @@ func (a *allocReconciler) handleDelayedReschedules(rescheduleLater []*delayedRes } // Create in-place updates for every alloc ID that needs to be updated with its follow up eval ID - for allocID, evalID := range allocIDToFollowupEvalID { - existingAlloc := all[allocID] - updatedAlloc := existingAlloc.Copy() - updatedAlloc.FollowupEvalID = evalID - a.result.attributeUpdates[updatedAlloc.ID] = updatedAlloc + if createUpdates { + for allocID, evalID := range allocIDToFollowupEvalID { + existingAlloc := all[allocID] + updatedAlloc := existingAlloc.Copy() + updatedAlloc.FollowupEvalID = evalID + a.result.attributeUpdates[updatedAlloc.ID] = updatedAlloc + } } }