diff --git a/CHANGELOG.md b/CHANGELOG.md index 580d7cce9..e7dd2f7a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ - (Feature) Parametrize Scheduling Graceful Duration - (Bugfix) Change Accepted Spec Propagation - (Bugfix) Pass SecurityContext Pod Settings for SELinux and Seccomp +- (Feature) Add ScheduleSpecChanged Condition ## [1.2.39](https://github.com/arangodb/kube-arangodb/tree/1.2.39) (2024-03-11) - (Feature) Extract Scheduler API diff --git a/pkg/apis/deployment/v1/conditions.go b/pkg/apis/deployment/v1/conditions.go index 724626541..f60865510 100644 --- a/pkg/apis/deployment/v1/conditions.go +++ b/pkg/apis/deployment/v1/conditions.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2023-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -43,6 +43,8 @@ const ( ConditionTypeReachable ConditionType = "Reachable" // ConditionTypeScheduled indicates that the member primary pod is scheduled. ConditionTypeScheduled ConditionType = "Scheduled" + // ConditionTypeScheduleSpecChanged indicates that the member schedule spec was changed. + ConditionTypeScheduleSpecChanged ConditionType = "ScheduleSpecChanged" // ConditionTypeServing indicates that the member core services are running. ConditionTypeServing ConditionType = "Serving" // ConditionTypeActive indicates that the member server container started. diff --git a/pkg/apis/deployment/v2alpha1/conditions.go b/pkg/apis/deployment/v2alpha1/conditions.go index 8a31c9662..33dcba8af 100644 --- a/pkg/apis/deployment/v2alpha1/conditions.go +++ b/pkg/apis/deployment/v2alpha1/conditions.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2023-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -43,6 +43,8 @@ const ( ConditionTypeReachable ConditionType = "Reachable" // ConditionTypeScheduled indicates that the member primary pod is scheduled. ConditionTypeScheduled ConditionType = "Scheduled" + // ConditionTypeScheduleSpecChanged indicates that the member schedule spec was changed. + ConditionTypeScheduleSpecChanged ConditionType = "ScheduleSpecChanged" // ConditionTypeServing indicates that the member core services are running. ConditionTypeServing ConditionType = "Serving" // ConditionTypeActive indicates that the member server container started. diff --git a/pkg/deployment/member/phase_updates.go b/pkg/deployment/member/phase_updates.go index 947c3c93f..7bde9f727 100644 --- a/pkg/deployment/member/phase_updates.go +++ b/pkg/deployment/member/phase_updates.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -85,6 +85,7 @@ func removeMemberConditionsMapFunc(m *api.MemberStatus) { m.Conditions.Remove(api.ConditionTypeActive) m.Conditions.Remove(api.ConditionTypeStarted) m.Conditions.Remove(api.ConditionTypeScheduled) + m.Conditions.Remove(api.ConditionTypeScheduleSpecChanged) m.Conditions.Remove(api.ConditionTypeReachable) m.Conditions.Remove(api.ConditionTypeServing) m.Conditions.Remove(api.ConditionTypeTerminated) diff --git a/pkg/deployment/reconcile/plan_builder_member_pod_scheduling_failure.go b/pkg/deployment/reconcile/plan_builder_member_pod_scheduling_failure.go index 86a802d57..49f699a1b 100644 --- a/pkg/deployment/reconcile/plan_builder_member_pod_scheduling_failure.go +++ b/pkg/deployment/reconcile/plan_builder_member_pod_scheduling_failure.go @@ -50,8 +50,10 @@ func (r *Reconciler) createMemberPodSchedulingFailurePlan(ctx context.Context, return p } + q := r.log.Str("step", "CreateMemberPodSchedulingFailurePlan") + for _, m := range status.Members.AsList() { - l := r.log.Str("id", m.Member.ID).Str("role", m.Group.AsRole()) + l := q.Str("id", m.Member.ID).Str("role", m.Group.AsRole()) if m.Member.Phase != api.MemberPhaseCreated || m.Member.Pod.GetName() == "" { // Act only when phase is created @@ -65,48 +67,54 @@ func (r *Reconciler) createMemberPodSchedulingFailurePlan(ctx context.Context, if c, ok := m.Member.Conditions.Get(api.ConditionTypeScheduled); !ok { // Action cant proceed if pod is not scheduled + l.Debug("Unable to find scheduled condition") continue } else if c.LastTransitionTime.IsZero() { // LastTransitionTime is not set + l.Debug("Scheduled condition LastTransitionTime is zero") continue } else { - if time.Since(c.LastTransitionTime.Time) <= globals.GetGlobalTimeouts().PodSchedulingGracePeriod().Get() { + if d := time.Since(c.LastTransitionTime.Time); d <= globals.GetGlobalTimeouts().PodSchedulingGracePeriod().Get() { // In grace period + l.Dur("since", d).Debug("Still in grace period") continue } } - imageInfo, imageFound := context.SelectImageForMember(spec, status, m.Member) - if !imageFound { - l.Warn("could not find image for already created member") - continue - } - - renderedPod, err := context.RenderPodForMember(ctx, context.ACS(), spec, status, m.Member.ID, imageInfo) - if err != nil { - l.Err(err).Warn("could not render pod for already created member") - continue - } - cache, ok := context.ACS().ClusterCache(m.Member.ClusterID) if !ok { + l.Warn("Unable to get member name") continue } memberName := m.Member.ArangoMemberName(context.GetName(), m.Group) member, ok := cache.ArangoMember().V1().GetSimple(memberName) if !ok { + l.Warn("Unable to get ArangoMember") continue } - if template := member.Spec.Template; template != nil { - if pod := template.PodSpec; pod != nil { - if !r.schedulingParametersAreTheSame(renderedPod.Spec, pod.Spec) { - l.Info("Adding KillMemberPod action: scheduling failed and parameters already updated") - p = append(p, - actions.NewAction(api.ActionTypeKillMemberPod, m.Group, m.Member, "Scheduling failed"), - ) + if m.Member.Conditions.IsTrue(api.ConditionTypeScheduleSpecChanged) { + l.Info("Adding KillMemberPod action: scheduling failed and scheduling changed condition is present") + p = append(p, + actions.NewAction(api.ActionTypeKillMemberPod, m.Group, m.Member, "Scheduling failed"), + ) + } else { + if statusTemplate, specTemplate := member.Status.Template, member.Spec.Template; statusTemplate != nil && specTemplate != nil { + if statusTemplateSpec, specTemplateSpec := statusTemplate.PodSpec, specTemplate.PodSpec; statusTemplateSpec != nil && specTemplateSpec != nil { + if !r.schedulingParametersAreTheSame(specTemplateSpec.Spec, statusTemplateSpec.Spec) { + l.Info("Adding KillMemberPod action: scheduling failed and parameters already updated") + p = append(p, + actions.NewAction(api.ActionTypeKillMemberPod, m.Group, m.Member, "Scheduling failed"), + ) + } else { + l.Info("Scheduling parameters are not updated") + } + } else { + l.Warn("Pod TemplateSpec is nil") } + } else { + l.Warn("Pod Template is nil") } } } diff --git a/pkg/deployment/rotation/arangod.go b/pkg/deployment/rotation/arangod.go index cd0c35c68..a783ccba7 100644 --- a/pkg/deployment/rotation/arangod.go +++ b/pkg/deployment/rotation/arangod.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,6 +54,7 @@ func affinityCompare(_ api.DeploymentSpec, _ api.ServerGroup, spec, status *core e = err return } else if specC != statusC { + plan = append(plan, SchedulingChangeAction(builder)) mode = mode.And(compare.SilentRotation) status.Spec.Affinity = spec.Spec.Affinity.DeepCopy() return diff --git a/pkg/deployment/rotation/arangod_tolerations.go b/pkg/deployment/rotation/arangod_tolerations.go index 4034f123a..9cd879c30 100644 --- a/pkg/deployment/rotation/arangod_tolerations.go +++ b/pkg/deployment/rotation/arangod_tolerations.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,10 @@ import ( func comparePodTolerations(_ api.DeploymentSpec, _ api.ServerGroup, spec, status *core.PodTemplateSpec) compare.Func { return func(builder api.ActionBuilder) (mode compare.Mode, plan api.Plan, err error) { if !reflect.DeepEqual(spec.Spec.Tolerations, status.Spec.Tolerations) { - plan = append(plan, builder.NewAction(api.ActionTypeRuntimeContainerSyncTolerations)) + plan = append(plan, + SchedulingChangeAction(builder), + builder.NewAction(api.ActionTypeRuntimeContainerSyncTolerations), + ) status.Spec.Tolerations = spec.Spec.Tolerations mode = mode.And(compare.InPlaceRotation) @@ -42,4 +45,5 @@ func comparePodTolerations(_ api.DeploymentSpec, _ api.ServerGroup, spec, status return } + } diff --git a/pkg/deployment/rotation/helper.go b/pkg/deployment/rotation/helper.go new file mode 100644 index 000000000..7e297e822 --- /dev/null +++ b/pkg/deployment/rotation/helper.go @@ -0,0 +1,30 @@ +// +// DISCLAIMER +// +// Copyright 2024 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package rotation + +import ( + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + sharedReconcile "github.com/arangodb/kube-arangodb/pkg/deployment/reconcile/shared" +) + +func SchedulingChangeAction(builder api.ActionBuilder) api.Action { + return sharedReconcile.UpdateMemberConditionActionV2("Scheduling Changed", api.ConditionTypeScheduleSpecChanged, builder.Group(), builder.MemberID(), true, "Scheduling Changed", "", "") +}