diff --git a/controllers/podplacement/events.go b/controllers/podplacement/events.go index 9b77abeec..fe18ae3c2 100644 --- a/controllers/podplacement/events.go +++ b/controllers/podplacement/events.go @@ -20,4 +20,5 @@ const ( ImageArchitectureInspectionErrorMsg = "Failed to retrieve the supported architectures: " NoSupportedArchitecturesFoundMsg = "Pod cannot be scheduled due to incompatible image architectures; container images have no supported architectures in common" ArchitectureAwareGatedPodIgnoredMsg = "The gated pod has been modified and is no longer eligible for architecture-aware scheduling" + ImageInspectionErrorMaxRetriesMsg = "Failed to retrieve the supported architectures after multiple retries" ) diff --git a/controllers/podplacement/pod_model.go b/controllers/podplacement/pod_model.go index 41f815378..7f77f03f5 100644 --- a/controllers/podplacement/pod_model.go +++ b/controllers/podplacement/pod_model.go @@ -19,6 +19,7 @@ package podplacement import ( "context" "fmt" + "strconv" "strings" "time" @@ -37,6 +38,8 @@ var ( imageInspectionCache image.ICache = image.FacadeSingleton() ) +const MaxRetryCount = 5 + type containerImage struct { imageName string skipCache bool @@ -96,16 +99,12 @@ func (pod *Pod) RemoveSchedulingGate() { // It verifies first that no nodeSelector field is set for the kubernetes.io/arch label. // Then, it computes the intersection of the architectures supported by the images used by the pod via pod.getArchitecturePredicate. // Finally, it initializes the nodeAffinity for the pod and set it to the computed requirement via the pod.setArchNodeAffinity method. -func (pod *Pod) SetNodeAffinityArchRequirement(pullSecretDataList [][]byte) { - log := ctrllog.FromContext(pod.ctx) +func (pod *Pod) SetNodeAffinityArchRequirement(pullSecretDataList [][]byte) (bool, error) { requirement, err := pod.getArchitecturePredicate(pullSecretDataList) if err != nil { - pod.ensureLabel(utils.ImageInspectionErrorLabel, "") - metrics.FailedInspectionCounter.Inc() - pod.publishEvent(corev1.EventTypeWarning, ImageArchitectureInspectionError, ImageArchitectureInspectionErrorMsg+err.Error()) - log.Error(err, "Error getting the architecture predicate. The pod will not have the nodeAffinity set.") - return + return false, err } + pod.ensureNoLabel(utils.ImageInspectionErrorLabel) if len(requirement.Values) == 0 { pod.publishEvent(corev1.EventTypeNormal, NoSupportedArchitecturesFound, NoSupportedArchitecturesFoundMsg) } @@ -124,6 +123,7 @@ func (pod *Pod) SetNodeAffinityArchRequirement(pullSecretDataList [][]byte) { } pod.setArchNodeAffinity(requirement) + return true, nil } // setArchNodeAffinity sets the node affinity for the pod to the given requirement based on the rules in @@ -244,6 +244,50 @@ func (pod *Pod) ensureLabel(label string, value string) { pod.Labels[label] = value } +func (pod *Pod) ensureNoLabel(label string) { + if pod.Labels == nil { + return + } + delete(pod.Labels, label) +} + +// ensureLabel ensures that the pod has the given label with the given value. +func (pod *Pod) ensureAnnotation(annotation string, value string) { + if pod.Annotations == nil { + pod.Annotations = make(map[string]string) + } + pod.Annotations[annotation] = value +} + +// ensureAndIncrementLabel ensures that the pod has the given label with the given value. +// If the label is already set, it increments the value. +func (pod *Pod) ensureAndIncrementLabel(label string) { + if pod.Labels == nil { + pod.Labels = make(map[string]string) + } + if _, ok := pod.Labels[label]; !ok { + pod.Labels[label] = "1" + return + } + cur, err := strconv.ParseInt(pod.Labels[label], 10, 32) + if err != nil { + pod.Labels[label] = "1" + } else { + pod.Labels[label] = fmt.Sprintf("%d", cur+1) + } +} + +func (pod *Pod) maxRetries() bool { + if pod.Labels == nil { + return false + } + v, err := strconv.ParseInt(pod.Labels[utils.ImageInspectionErrorCountLabel], 10, 32) + if err != nil { + return true + } + return v >= MaxRetryCount +} + // ensureArchitectureLabels adds labels for the given requirement to the pod. Labels are added to indicate // the supported architectures and index pods by architecture or by whether they support more than one architecture. // In this case, single-architecture is meant as a pod that supports only one architecture: all the images in the pod @@ -359,3 +403,16 @@ func (pod *Pod) publishIgnorePod() { pod.ensureLabel(utils.NodeAffinityLabel, utils.LabelValueNotSet) pod.publishEvent(corev1.EventTypeNormal, ArchitecturePredicatesConflict, ArchitecturePredicatesConflictMsg) } + +func (pod *Pod) handleError(err error, s string) { + if err == nil { + return + } + log := ctrllog.FromContext(pod.ctx) + metrics.FailedInspectionCounter.Inc() + pod.ensureLabel(utils.ImageInspectionErrorLabel, "") + pod.ensureAnnotation(utils.ImageInspectionErrorLabel, err.Error()) + pod.ensureAndIncrementLabel(utils.ImageInspectionErrorCountLabel) + pod.publishEvent(corev1.EventTypeWarning, ImageArchitectureInspectionError, ImageArchitectureInspectionErrorMsg+err.Error()) + log.Error(err, s) +} diff --git a/controllers/podplacement/pod_model_test.go b/controllers/podplacement/pod_model_test.go index b681334d8..06c299092 100644 --- a/controllers/podplacement/pod_model_test.go +++ b/controllers/podplacement/pod_model_test.go @@ -530,6 +530,7 @@ func TestPod_SetNodeAffinityArchRequirement(t *testing.T) { pullSecretDataList [][]byte pod *v1.Pod want *v1.Pod + expectErr bool }{ { name: "pod with no node selector terms", @@ -685,9 +686,10 @@ func TestPod_SetNodeAffinityArchRequirement(t *testing.T) { }).Build(), }, { - name: "should not modify the pod if unable to inspect the images", - pod: NewPod().WithContainersImages(fake.MultiArchImage, "non-readable-image").Build(), - want: NewPod().WithContainersImages(fake.MultiArchImage, "non-readable-image").Build(), + name: "should not modify the pod if unable to inspect the images", + pod: NewPod().WithContainersImages(fake.MultiArchImage, "non-readable-image").Build(), + want: NewPod().WithContainersImages(fake.MultiArchImage, "non-readable-image").Build(), + expectErr: true, }, { name: "should prevent the pod from being scheduled when no common architecture is found", @@ -709,8 +711,13 @@ func TestPod_SetNodeAffinityArchRequirement(t *testing.T) { Pod: *tt.pod, ctx: ctx, } - pod.SetNodeAffinityArchRequirement(tt.pullSecretDataList) + _, err := pod.SetNodeAffinityArchRequirement(tt.pullSecretDataList) g := NewGomegaWithT(t) + if tt.expectErr { + g.Expect(err).Should(HaveOccurred()) + } else { + g.Expect(err).ShouldNot(HaveOccurred()) + } g.Expect(pod.Spec.Affinity).Should(Equal(tt.want.Spec.Affinity)) imageInspectionCache = mmoimage.FacadeSingleton() }) diff --git a/controllers/podplacement/pod_reconciler.go b/controllers/podplacement/pod_reconciler.go index 5439a09ef..be8a78186 100644 --- a/controllers/podplacement/pod_reconciler.go +++ b/controllers/podplacement/pod_reconciler.go @@ -18,6 +18,7 @@ package podplacement import ( "context" + "fmt" runtime2 "runtime" "time" @@ -77,52 +78,65 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R log.V(2).Info("Unable to fetch pod", "error", err) return ctrl.Result{}, client.IgnoreNotFound(err) } - - // verify whether the pod has the scheduling gate + // Pods without the scheduling gate should be ignored. if !pod.HasSchedulingGate() { log.V(2).Info("Pod does not have the scheduling gate. Ignoring...") - // if not, return return ctrl.Result{}, nil } - - // The scheduling gate is found. metrics.ProcessedPodsCtrl.Inc() defer utils.HistogramObserve(now, metrics.TimeToProcessGatedPod) - log.V(1).Info("Processing pod") + r.processPod(ctx, pod) + err := r.Client.Update(ctx, &pod.Pod) + if err != nil { + log.Error(err, "Unable to update the pod") + pod.publishEvent(corev1.EventTypeWarning, ArchitectureAwareSchedulingGateRemovalFailure, SchedulingGateRemovalFailureMsg) + return ctrl.Result{}, err + } + if !pod.HasSchedulingGate() { + // Only publish the event if the scheduling gate has been removed and the pod has been updated successfully. + pod.publishEvent(corev1.EventTypeNormal, ArchitectureAwareSchedulingGateRemovalSuccess, SchedulingGateRemovalSuccessMsg) + metrics.GatedPodsGauge.Dec() + } + return ctrl.Result{}, nil +} - if !pod.shouldIgnorePod() { - // Prepare the requirement for the node affinity. - psdl, err := r.pullSecretDataList(ctx, pod) - if err != nil { - log.Error(err, "Unable to retrieve the image pull secret data for the pod. "+ - "The nodeAffinity for this pod will not be set.") - // we still need to remove the scheduling gate. Therefore, we do not return here. - } else { - pod.SetNodeAffinityArchRequirement(psdl) - } - } else { +func (r *PodReconciler) processPod(ctx context.Context, pod *Pod) { + log := ctrllog.FromContext(ctx) + log.V(1).Info("Processing pod") + if pod.shouldIgnorePod() { log.V(3).Info("A pod with the scheduling gate should be ignored. Ignoring...") // We can reach this branch when: // - The pod has been gated but not processed before the operator changed configuration such that the pod should be ignored. // - The pod has got some other changes in the admission chain from another webhook that makes it not suitable for processing anymore // (for example another actor set the nodeAffinity already for the kubernetes.io/arch label). // In both cases, we should just remove the scheduling gate. - r.Recorder.Event(&pod.Pod, corev1.EventTypeWarning, ArchitectureAwareGatedPodIgnored, ArchitectureAwareGatedPodIgnoredMsg) + log.V(1).Info("Removing the scheduling gate from pod.") + pod.RemoveSchedulingGate() + pod.publishEvent(corev1.EventTypeWarning, ArchitectureAwareGatedPodIgnored, ArchitectureAwareGatedPodIgnoredMsg) + return } - // Remove the scheduling gate - log.V(1).Info("Removing the scheduling gate from pod.") - pod.RemoveSchedulingGate() - - err := r.Client.Update(ctx, &pod.Pod) - if err != nil { - log.Error(err, "Unable to update the pod") - r.Recorder.Event(&pod.Pod, corev1.EventTypeWarning, ArchitectureAwareSchedulingGateRemovalFailure, SchedulingGateRemovalFailureMsg) - return ctrl.Result{}, err + // Prepare the requirement for the node affinity. + psdl, err := r.pullSecretDataList(ctx, pod) + pod.handleError(err, "Unable to retrieve the image pull secret data for the pod.") + // If no error occurred when retrieving the image pull secret data, set the node affinity. + if err == nil { + _, err = pod.SetNodeAffinityArchRequirement(psdl) + pod.handleError(err, "Unable to set the node affinity for the pod.") + } + if pod.maxRetries() && err != nil { + // the number of retries is incremented in the handleError function when the error is not nil. + // If we enter this branch, the retries counter has been incremented and reached the max retries. + // The counter starts at 1 when the first error occurs. Therefore, when the reconciler tries maxRetries times, + // the counter is equal to the maxRetries value and the pod should not be processed again. + // Publish this event and remove the scheduling gate. + log.Info("Max retries Reached. The pod will not have the nodeAffinity set.") + pod.publishEvent(corev1.EventTypeWarning, ImageArchitectureInspectionError, fmt.Sprintf("%s: %s", ImageInspectionErrorMaxRetriesMsg, err.Error())) + } + // If the pod has been processed successfully or the max retries have been reached, remove the scheduling gate. + if err == nil || pod.maxRetries() { + log.V(1).Info("Removing the scheduling gate from pod.") + pod.RemoveSchedulingGate() } - r.Recorder.Event(&pod.Pod, corev1.EventTypeNormal, ArchitectureAwareSchedulingGateRemovalSuccess, SchedulingGateRemovalSuccessMsg) - metrics.GatedPodsGauge.Dec() - - return ctrl.Result{}, nil } // pullSecretDataList returns the list of secrets data for the given pod given its imagePullSecrets field diff --git a/controllers/podplacement/pod_reconciler_test.go b/controllers/podplacement/pod_reconciler_test.go index bb73ff982..5af9794a7 100644 --- a/controllers/podplacement/pod_reconciler_test.go +++ b/controllers/podplacement/pod_reconciler_test.go @@ -2,6 +2,10 @@ package podplacement import ( "fmt" + "strconv" + "time" + + "github.com/openshift/multiarch-tuning-operator/pkg/e2e" "k8s.io/apimachinery/pkg/util/sets" @@ -70,7 +74,35 @@ var _ = Describe("Controllers/Podplacement/PodReconciler", func() { Entry("Docker images", imgspecv1.MediaTypeImageManifest, utils.ArchitecturePpc64le), ) }) - + Context("with an image that cannot be inspected", func() { + It("remove the scheduling gate after the max retries count", func() { + pod := NewPod(). + WithContainersImages("quay.io/non-existing/image:latest"). + WithGenerateName("test-pod-"). + WithNamespace("test-namespace"). + Build() + err := k8sClient.Create(ctx, pod) + Expect(err).NotTo(HaveOccurred(), "failed to create pod", err) + // Test the removal of the scheduling gate. However, since the pod is mutated and the reconciler + // removes the scheduling gate concurrently, we cannot ensure that the scheduling gate is added + // and that the following Eventually works on a pod with the scheduling gate. + // Waiting for the pod to be mutated is not enough, as the pod could be mutated and the reconciler + // could have removed the scheduling gate before our check. + Eventually(func(g Gomega) { + // Get pod from the API server + err := k8sClient.Get(ctx, crclient.ObjectKeyFromObject(pod), pod) + g.Expect(err).NotTo(HaveOccurred(), "failed to get pod", err) + By(fmt.Sprintf("Error count is set to '%s'", pod.Labels[utils.ImageInspectionErrorCountLabel])) + g.Expect(pod.Spec.SchedulingGates).NotTo(ContainElement(corev1.PodSchedulingGate{ + Name: utils.SchedulingGateName, + }), "scheduling gate not removed") + g.Expect(pod.Labels).To(HaveKeyWithValue(utils.SchedulingGateLabel, utils.SchedulingGateLabelValueRemoved), + "scheduling gate annotation not found") + g.Expect(pod.Labels).To(HaveKeyWithValue(utils.ImageInspectionErrorCountLabel, strconv.Itoa(MaxRetryCount)), "image inspection error count not found") + }).WithTimeout(e2e.WaitShort).WithPolling(time.Millisecond*250).Should(Succeed(), "failed to remove scheduling gate from pod") + // Polling set to 250ms such that the error count is shown in the logs at each update. + }) + }) Context("with different pull secrets", func() { It("handles images with global pull secrets correctly", func() { // TODO: Test logic for handling a Pod with one container and image using global pull secret diff --git a/pkg/utils/const.go b/pkg/utils/const.go index 70a31a529..c03e71ab0 100644 --- a/pkg/utils/const.go +++ b/pkg/utils/const.go @@ -29,6 +29,7 @@ const ( MultiArchLabel = "multiarch.openshift.io/multi-arch" NoSupportedArchLabel = "multiarch.openshift.io/no-supported-arch" ImageInspectionErrorLabel = "multiarch.openshift.io/image-inspect-error" + ImageInspectionErrorCountLabel = "multiarch.openshift.io/image-inspect-error-count" LabelGroup = "multiarch.openshift.io" )