diff --git a/CHANGELOG.md b/CHANGELOG.md index c2d16c02d..010960310 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ - (Maintenance) Bump K8S Version to 1.24.16 - (Maintenance) Bump K8S Version to 1.25.12 - (Maintenance) Bump Go to 1.20.7 +- (Improvement) Remove PodSchedulingFailure condition instead of setting to false, restart pod if it could not be scheduled ## [1.2.31](https://github.com/arangodb/kube-arangodb/tree/1.2.31) (2023-07-14) - (Improvement) Block traffic on the services if there is more than 1 active leader in ActiveFailover mode diff --git a/docs/generated/actions.md b/docs/generated/actions.md index 06869f004..acf96d7a6 100644 --- a/docs/generated/actions.md +++ b/docs/generated/actions.md @@ -36,7 +36,7 @@ | JWTRefresh | no | 10m0s | no | Enterprise Only | Refresh current JWT secrets on the member | | JWTSetActive | no | 10m0s | no | Enterprise Only | Change active JWT key on the cluster | | JWTStatusUpdate | no | 10m0s | no | Enterprise Only | Update status of JWT propagation | -| KillMemberPod | no | 10m0s | no | Community & Enterprise | Execute Delete on Pod 9put pod in Terminating state) | +| KillMemberPod | no | 10m0s | no | Community & Enterprise | Execute Delete on Pod (put pod in Terminating state) | | LicenseSet | no | 10m0s | no | Community & Enterprise | Update Cluster license (3.9+) | | MarkToRemoveMember | no | 10m0s | no | Community & Enterprise | Marks member to be removed. Used when member Pod is annotated with replace annotation | | MemberPhaseUpdate | no | 10m0s | no | Community & Enterprise | Change member phase | diff --git a/internal/actions.yaml b/internal/actions.yaml index 08522fec5..095693e2e 100644 --- a/internal/actions.yaml +++ b/internal/actions.yaml @@ -30,7 +30,10 @@ actions: timeout: 30m optional: true KillMemberPod: - description: Execute Delete on Pod 9put pod in Terminating state) + description: Execute Delete on Pod (put pod in Terminating state) + scopes: + - Normal + - High RotateMember: description: Waits for Pod restart and recreation timeout: 15m diff --git a/pkg/apis/deployment/v1/actions.generated.go b/pkg/apis/deployment/v1/actions.generated.go index ad9fc3e44..3a44e4386 100644 --- a/pkg/apis/deployment/v1/actions.generated.go +++ b/pkg/apis/deployment/v1/actions.generated.go @@ -374,7 +374,7 @@ const ( // ActionTypeJWTStatusUpdate in scopes Normal. Update status of JWT propagation ActionTypeJWTStatusUpdate ActionType = "JWTStatusUpdate" - // ActionTypeKillMemberPod in scopes Normal. Execute Delete on Pod 9put pod in Terminating state) + // ActionTypeKillMemberPod in scopes High and Normal. Execute Delete on Pod (put pod in Terminating state) ActionTypeKillMemberPod ActionType = "KillMemberPod" // ActionTypeLicenseSet in scopes Normal. Update Cluster license (3.9+) @@ -776,7 +776,7 @@ func (a ActionType) Priority() ActionPriority { case ActionTypeJWTStatusUpdate: return ActionPriorityNormal case ActionTypeKillMemberPod: - return ActionPriorityNormal + return ActionPriorityHigh case ActionTypeLicenseSet: return ActionPriorityNormal case ActionTypeMarkToRemoveMember: diff --git a/pkg/apis/deployment/v2alpha1/actions.generated.go b/pkg/apis/deployment/v2alpha1/actions.generated.go index 83060011b..ce26dc52f 100644 --- a/pkg/apis/deployment/v2alpha1/actions.generated.go +++ b/pkg/apis/deployment/v2alpha1/actions.generated.go @@ -374,7 +374,7 @@ const ( // ActionTypeJWTStatusUpdate in scopes Normal. Update status of JWT propagation ActionTypeJWTStatusUpdate ActionType = "JWTStatusUpdate" - // ActionTypeKillMemberPod in scopes Normal. Execute Delete on Pod 9put pod in Terminating state) + // ActionTypeKillMemberPod in scopes Normal. Execute Delete on Pod (put pod in Terminating state) ActionTypeKillMemberPod ActionType = "KillMemberPod" // ActionTypeLicenseSet in scopes Normal. Update Cluster license (3.9+) diff --git a/pkg/deployment/reconcile/plan_builder_high.go b/pkg/deployment/reconcile/plan_builder_high.go index 589d22a1e..dc3ab2189 100644 --- a/pkg/deployment/reconcile/plan_builder_high.go +++ b/pkg/deployment/reconcile/plan_builder_high.go @@ -54,6 +54,7 @@ func (r *Reconciler) createHighPlan(ctx context.Context, apiObject k8sutil.APIOb ApplyIfEmpty(r.updateMemberUpdateConditionsPlan). ApplyIfEmpty(r.updateMemberRotationConditionsPlan). ApplyIfEmpty(r.createMemberRecreationConditionsPlan). + ApplyIfEmpty(r.createMemberPodSchedulingFailurePlan). ApplyIfEmpty(r.createRotateServerStoragePVCPendingResizeConditionPlan). ApplyIfEmpty(r.createChangeMemberArchPlan). ApplyIfEmpty(r.createRotateServerStorageResizePlanRuntime). diff --git a/pkg/deployment/reconcile/plan_builder_member_pod_scheduling_failure.go b/pkg/deployment/reconcile/plan_builder_member_pod_scheduling_failure.go new file mode 100644 index 000000000..a3334efe3 --- /dev/null +++ b/pkg/deployment/reconcile/plan_builder_member_pod_scheduling_failure.go @@ -0,0 +1,122 @@ +// +// DISCLAIMER +// +// Copyright 2023 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package reconcile + +import ( + "context" + "reflect" + + core "k8s.io/api/core/v1" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/arangodb/kube-arangodb/pkg/deployment/actions" + "github.com/arangodb/kube-arangodb/pkg/util" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" +) + +// createMemberPodSchedulingFailurePlan creates plan actions which are required when +// some pod has failed to schedule and scheduling parameters already changed +func (r *Reconciler) createMemberPodSchedulingFailurePlan(ctx context.Context, + _ k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, context PlanBuilderContext) api.Plan { + + var p api.Plan + if !status.Conditions.IsTrue(api.ConditionTypePodSchedulingFailure) { + return p + } + + for _, m := range status.Members.AsList() { + l := r.log.Str("id", m.Member.ID).Str("role", m.Group.AsRole()) + + if m.Member.Phase != api.MemberPhaseCreated || m.Member.Pod.GetName() == "" { + // Act only when phase is created + continue + } + + if m.Member.Conditions.IsTrue(api.ConditionTypeScheduled) || m.Member.Conditions.IsTrue(api.ConditionTypeTerminating) { + // Action is needed only for pods which are not scheduled yet + continue + } + + imageInfo, imageFound := context.SelectImageForMember(spec, status, m.Member) + if !imageFound { + l.Warn("could not find image for already created member") + continue + } + + renderedPod, err := context.RenderPodForMember(ctx, context.ACS(), spec, status, m.Member.ID, imageInfo) + if err != nil { + l.Err(err).Warn("could not render pod for already created member") + continue + } + + if r.isSchedulingParametersChanged(renderedPod.Spec, m.Member, context) { + l.Info("Adding KillMemberPod action: scheduling failed and parameters already updated") + p = append(p, + actions.NewAction(api.ActionTypeKillMemberPod, m.Group, m.Member, "Scheduling failed"), + ) + } + } + + return p +} + +// isSchedulingParametersChanged returns true if parameters related to pod scheduling has changed +func (r *Reconciler) isSchedulingParametersChanged(expectedSpec core.PodSpec, member api.MemberStatus, context PlanBuilderContext) bool { + cache, ok := context.ACS().ClusterCache(member.ClusterID) + if !ok { + return false + } + pod, ok := cache.Pod().V1().GetSimple(member.Pod.GetName()) + if !ok { + return false + } + if r.schedulingParametersAreTheSame(expectedSpec, pod.Spec) { + return false + } + return true +} + +func (r *Reconciler) schedulingParametersAreTheSame(expectedSpec, actualSpec core.PodSpec) bool { + if expectedSpec.PriorityClassName != actualSpec.PriorityClassName { + return false + } + + if !reflect.DeepEqual(expectedSpec.Tolerations, actualSpec.Tolerations) { + return false + } + + if !reflect.DeepEqual(expectedSpec.NodeSelector, actualSpec.NodeSelector) { + return false + } + + // we should use SHA256 here because DeepEqual might be unreliable for Affinity rules + if specC, err := util.SHA256FromJSON(expectedSpec.Affinity); err != nil { + return true + } else { + if statusC, err := util.SHA256FromJSON(actualSpec.Affinity); err != nil { + return true + } else if specC != statusC { + return false + } + } + + return true +} diff --git a/pkg/deployment/resources/pod_inspector.go b/pkg/deployment/resources/pod_inspector.go index 3b663fda3..0eebbde2f 100644 --- a/pkg/deployment/resources/pod_inspector.go +++ b/pkg/deployment/resources/pod_inspector.go @@ -527,9 +527,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter } } else if status.Conditions.IsTrue(api.ConditionTypePodSchedulingFailure) && len(unscheduledPodNames) == 0 { - if status.Conditions.Update(api.ConditionTypePodSchedulingFailure, false, - "Pods Scheduling Resolved", - "No pod reports a scheduling timeout") { + if status.Conditions.Remove(api.ConditionTypePodSchedulingFailure) { r.context.CreateEvent(k8sutil.NewPodsSchedulingResolvedEvent(r.context.GetAPIObject())) } }