Skip to content

Commit

Permalink
Pod-level circuit-breaking for image registry inspection errors
Browse files Browse the repository at this point in the history
  • Loading branch information
aleskandro committed Dec 3, 2024
1 parent 13958f1 commit 37079cc
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 43 deletions.
1 change: 1 addition & 0 deletions controllers/podplacement/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ const (
ImageArchitectureInspectionErrorMsg = "Failed to retrieve the supported architectures: "
NoSupportedArchitecturesFoundMsg = "Pod cannot be scheduled due to incompatible image architectures; container images have no supported architectures in common"
ArchitectureAwareGatedPodIgnoredMsg = "The gated pod has been modified and is no longer eligible for architecture-aware scheduling"
ImageInspectionErrorMaxRetriesMsg = "Failed to retrieve the supported architectures after multiple retries"
)
71 changes: 64 additions & 7 deletions controllers/podplacement/pod_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package podplacement
import (
"context"
"fmt"
"strconv"
"strings"
"time"

Expand All @@ -37,6 +38,8 @@ var (
imageInspectionCache image.ICache = image.FacadeSingleton()
)

const MaxRetryCount = 5

type containerImage struct {
imageName string
skipCache bool
Expand Down Expand Up @@ -96,16 +99,12 @@ func (pod *Pod) RemoveSchedulingGate() {
// It verifies first that no nodeSelector field is set for the kubernetes.io/arch label.
// Then, it computes the intersection of the architectures supported by the images used by the pod via pod.getArchitecturePredicate.
// Finally, it initializes the nodeAffinity for the pod and set it to the computed requirement via the pod.setArchNodeAffinity method.
func (pod *Pod) SetNodeAffinityArchRequirement(pullSecretDataList [][]byte) {
log := ctrllog.FromContext(pod.ctx)
func (pod *Pod) SetNodeAffinityArchRequirement(pullSecretDataList [][]byte) (bool, error) {
requirement, err := pod.getArchitecturePredicate(pullSecretDataList)
if err != nil {
pod.ensureLabel(utils.ImageInspectionErrorLabel, "")
metrics.FailedInspectionCounter.Inc()
pod.publishEvent(corev1.EventTypeWarning, ImageArchitectureInspectionError, ImageArchitectureInspectionErrorMsg+err.Error())
log.Error(err, "Error getting the architecture predicate. The pod will not have the nodeAffinity set.")
return
return false, err
}
pod.ensureNoLabel(utils.ImageInspectionErrorLabel)
if len(requirement.Values) == 0 {
pod.publishEvent(corev1.EventTypeNormal, NoSupportedArchitecturesFound, NoSupportedArchitecturesFoundMsg)
}
Expand All @@ -124,6 +123,7 @@ func (pod *Pod) SetNodeAffinityArchRequirement(pullSecretDataList [][]byte) {
}

pod.setArchNodeAffinity(requirement)
return true, nil
}

// setArchNodeAffinity sets the node affinity for the pod to the given requirement based on the rules in
Expand Down Expand Up @@ -244,6 +244,50 @@ func (pod *Pod) ensureLabel(label string, value string) {
pod.Labels[label] = value
}

func (pod *Pod) ensureNoLabel(label string) {
if pod.Labels == nil {
return
}
delete(pod.Labels, label)
}

// ensureLabel ensures that the pod has the given label with the given value.
func (pod *Pod) ensureAnnotation(annotation string, value string) {
if pod.Annotations == nil {
pod.Annotations = make(map[string]string)
}
pod.Annotations[annotation] = value
}

// ensureAndIncrementLabel ensures that the pod has the given label with the given value.
// If the label is already set, it increments the value.
func (pod *Pod) ensureAndIncrementLabel(label string) {
if pod.Labels == nil {
pod.Labels = make(map[string]string)
}
if _, ok := pod.Labels[label]; !ok {
pod.Labels[label] = "1"
return
}
cur, err := strconv.ParseInt(pod.Labels[label], 10, 32)
if err != nil {
pod.Labels[label] = "1"
} else {
pod.Labels[label] = fmt.Sprintf("%d", cur+1)
}
}

func (pod *Pod) maxRetries() bool {
if pod.Labels == nil {
return false
}
v, err := strconv.ParseInt(pod.Labels[utils.ImageInspectionErrorCountLabel], 10, 32)
if err != nil {
return true
}
return v >= MaxRetryCount
}

// ensureArchitectureLabels adds labels for the given requirement to the pod. Labels are added to indicate
// the supported architectures and index pods by architecture or by whether they support more than one architecture.
// In this case, single-architecture is meant as a pod that supports only one architecture: all the images in the pod
Expand Down Expand Up @@ -359,3 +403,16 @@ func (pod *Pod) publishIgnorePod() {
pod.ensureLabel(utils.NodeAffinityLabel, utils.LabelValueNotSet)
pod.publishEvent(corev1.EventTypeNormal, ArchitecturePredicatesConflict, ArchitecturePredicatesConflictMsg)
}

func (pod *Pod) handleError(err error, s string) {
if err == nil {
return
}
log := ctrllog.FromContext(pod.ctx)
metrics.FailedInspectionCounter.Inc()
pod.ensureLabel(utils.ImageInspectionErrorLabel, "")
pod.ensureAnnotation(utils.ImageInspectionErrorLabel, err.Error())
pod.ensureAndIncrementLabel(utils.ImageInspectionErrorCountLabel)
pod.publishEvent(corev1.EventTypeWarning, ImageArchitectureInspectionError, ImageArchitectureInspectionErrorMsg+err.Error())
log.Error(err, s)
}
15 changes: 11 additions & 4 deletions controllers/podplacement/pod_model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ func TestPod_SetNodeAffinityArchRequirement(t *testing.T) {
pullSecretDataList [][]byte
pod *v1.Pod
want *v1.Pod
expectErr bool
}{
{
name: "pod with no node selector terms",
Expand Down Expand Up @@ -685,9 +686,10 @@ func TestPod_SetNodeAffinityArchRequirement(t *testing.T) {
}).Build(),
},
{
name: "should not modify the pod if unable to inspect the images",
pod: NewPod().WithContainersImages(fake.MultiArchImage, "non-readable-image").Build(),
want: NewPod().WithContainersImages(fake.MultiArchImage, "non-readable-image").Build(),
name: "should not modify the pod if unable to inspect the images",
pod: NewPod().WithContainersImages(fake.MultiArchImage, "non-readable-image").Build(),
want: NewPod().WithContainersImages(fake.MultiArchImage, "non-readable-image").Build(),
expectErr: true,
},
{
name: "should prevent the pod from being scheduled when no common architecture is found",
Expand All @@ -709,8 +711,13 @@ func TestPod_SetNodeAffinityArchRequirement(t *testing.T) {
Pod: *tt.pod,
ctx: ctx,
}
pod.SetNodeAffinityArchRequirement(tt.pullSecretDataList)
_, err := pod.SetNodeAffinityArchRequirement(tt.pullSecretDataList)
g := NewGomegaWithT(t)
if tt.expectErr {
g.Expect(err).Should(HaveOccurred())
} else {
g.Expect(err).ShouldNot(HaveOccurred())
}
g.Expect(pod.Spec.Affinity).Should(Equal(tt.want.Spec.Affinity))
imageInspectionCache = mmoimage.FacadeSingleton()
})
Expand Down
76 changes: 45 additions & 31 deletions controllers/podplacement/pod_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package podplacement

import (
"context"
"fmt"
runtime2 "runtime"
"time"

Expand Down Expand Up @@ -77,52 +78,65 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
log.V(2).Info("Unable to fetch pod", "error", err)
return ctrl.Result{}, client.IgnoreNotFound(err)
}

// verify whether the pod has the scheduling gate
// Pods without the scheduling gate should be ignored.
if !pod.HasSchedulingGate() {
log.V(2).Info("Pod does not have the scheduling gate. Ignoring...")
// if not, return
return ctrl.Result{}, nil
}

// The scheduling gate is found.
metrics.ProcessedPodsCtrl.Inc()
defer utils.HistogramObserve(now, metrics.TimeToProcessGatedPod)
log.V(1).Info("Processing pod")
r.processPod(ctx, pod)
err := r.Client.Update(ctx, &pod.Pod)
if err != nil {
log.Error(err, "Unable to update the pod")
pod.publishEvent(corev1.EventTypeWarning, ArchitectureAwareSchedulingGateRemovalFailure, SchedulingGateRemovalFailureMsg)
return ctrl.Result{}, err
}
if !pod.HasSchedulingGate() {
// Only publish the event if the scheduling gate has been removed and the pod has been updated successfully.
pod.publishEvent(corev1.EventTypeNormal, ArchitectureAwareSchedulingGateRemovalSuccess, SchedulingGateRemovalSuccessMsg)
metrics.GatedPodsGauge.Dec()
}
return ctrl.Result{}, nil
}

if !pod.shouldIgnorePod() {
// Prepare the requirement for the node affinity.
psdl, err := r.pullSecretDataList(ctx, pod)
if err != nil {
log.Error(err, "Unable to retrieve the image pull secret data for the pod. "+
"The nodeAffinity for this pod will not be set.")
// we still need to remove the scheduling gate. Therefore, we do not return here.
} else {
pod.SetNodeAffinityArchRequirement(psdl)
}
} else {
func (r *PodReconciler) processPod(ctx context.Context, pod *Pod) {
log := ctrllog.FromContext(ctx)
log.V(1).Info("Processing pod")
if pod.shouldIgnorePod() {
log.V(3).Info("A pod with the scheduling gate should be ignored. Ignoring...")
// We can reach this branch when:
// - The pod has been gated but not processed before the operator changed configuration such that the pod should be ignored.
// - The pod has got some other changes in the admission chain from another webhook that makes it not suitable for processing anymore
// (for example another actor set the nodeAffinity already for the kubernetes.io/arch label).
// In both cases, we should just remove the scheduling gate.
r.Recorder.Event(&pod.Pod, corev1.EventTypeWarning, ArchitectureAwareGatedPodIgnored, ArchitectureAwareGatedPodIgnoredMsg)
log.V(1).Info("Removing the scheduling gate from pod.")
pod.RemoveSchedulingGate()
pod.publishEvent(corev1.EventTypeWarning, ArchitectureAwareGatedPodIgnored, ArchitectureAwareGatedPodIgnoredMsg)
return
}
// Remove the scheduling gate
log.V(1).Info("Removing the scheduling gate from pod.")
pod.RemoveSchedulingGate()

err := r.Client.Update(ctx, &pod.Pod)
if err != nil {
log.Error(err, "Unable to update the pod")
r.Recorder.Event(&pod.Pod, corev1.EventTypeWarning, ArchitectureAwareSchedulingGateRemovalFailure, SchedulingGateRemovalFailureMsg)
return ctrl.Result{}, err
// Prepare the requirement for the node affinity.
psdl, err := r.pullSecretDataList(ctx, pod)
pod.handleError(err, "Unable to retrieve the image pull secret data for the pod.")
// If no error occurred when retrieving the image pull secret data, set the node affinity.
if err == nil {
_, err = pod.SetNodeAffinityArchRequirement(psdl)
pod.handleError(err, "Unable to set the node affinity for the pod.")
}
if pod.maxRetries() && err != nil {
// the number of retries is incremented in the handleError function when the error is not nil.
// If we enter this branch, the retries counter has been incremented and reached the max retries.
// The counter starts at 1 when the first error occurs. Therefore, when the reconciler tries maxRetries times,
// the counter is equal to the maxRetries value and the pod should not be processed again.
// Publish this event and remove the scheduling gate.
log.Info("Max retries Reached. The pod will not have the nodeAffinity set.")
pod.publishEvent(corev1.EventTypeWarning, ImageArchitectureInspectionError, fmt.Sprintf("%s: %s", ImageInspectionErrorMaxRetriesMsg, err.Error()))
}
// If the pod has been processed successfully or the max retries have been reached, remove the scheduling gate.
if err == nil || pod.maxRetries() {
log.V(1).Info("Removing the scheduling gate from pod.")
pod.RemoveSchedulingGate()
}
r.Recorder.Event(&pod.Pod, corev1.EventTypeNormal, ArchitectureAwareSchedulingGateRemovalSuccess, SchedulingGateRemovalSuccessMsg)
metrics.GatedPodsGauge.Dec()

return ctrl.Result{}, nil
}

// pullSecretDataList returns the list of secrets data for the given pod given its imagePullSecrets field
Expand Down
33 changes: 32 additions & 1 deletion controllers/podplacement/pod_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ package podplacement

import (
"fmt"
"strconv"
"time"

"github.com/openshift/multiarch-tuning-operator/pkg/e2e"

"k8s.io/apimachinery/pkg/util/sets"

Expand Down Expand Up @@ -70,7 +74,34 @@ var _ = Describe("Controllers/Podplacement/PodReconciler", func() {
Entry("Docker images", imgspecv1.MediaTypeImageManifest, utils.ArchitecturePpc64le),
)
})

Context("with an image that cannot be inspected", func() {
It("remove the scheduling gate after the max retries count", func() {
pod := NewPod().
WithContainersImages("quay.io/non-existing/image:latest").
WithGenerateName("test-pod-").
WithNamespace("test-namespace").
Build()
err := k8sClient.Create(ctx, pod)
Expect(err).NotTo(HaveOccurred(), "failed to create pod", err)
// Test the removal of the scheduling gate. However, since the pod is mutated and the reconciler
// removes the scheduling gate concurrently, we cannot ensure that the scheduling gate is added
// and that the following Eventually works on a pod with the scheduling gate.
// Waiting for the pod to be mutated is not enough, as the pod could be mutated and the reconciler
// could have removed the scheduling gate before our check.
Eventually(func(g Gomega) {
// Get pod from the API server
err := k8sClient.Get(ctx, crclient.ObjectKeyFromObject(pod), pod)
g.Expect(err).NotTo(HaveOccurred(), "failed to get pod", err)
By(fmt.Sprintf("Error count is set to '%s'", pod.Labels[utils.ImageInspectionErrorCountLabel]))
g.Expect(pod.Spec.SchedulingGates).NotTo(ContainElement(corev1.PodSchedulingGate{
Name: utils.SchedulingGateName,
}), "scheduling gate not removed")
g.Expect(pod.Labels).To(HaveKeyWithValue(utils.SchedulingGateLabel, utils.SchedulingGateLabelValueRemoved),
"scheduling gate annotation not found")
g.Expect(pod.Labels).To(HaveKeyWithValue(utils.ImageInspectionErrorCountLabel, strconv.Itoa(MaxRetryCount)), "image inspection error count not found")
}).WithTimeout(e2e.WaitShort).WithPolling(time.Millisecond*250).Should(Succeed(), "failed to remove scheduling gate from pod")
})
})
Context("with different pull secrets", func() {
It("handles images with global pull secrets correctly", func() {
// TODO: Test logic for handling a Pod with one container and image using global pull secret
Expand Down
1 change: 1 addition & 0 deletions pkg/utils/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const (
MultiArchLabel = "multiarch.openshift.io/multi-arch"
NoSupportedArchLabel = "multiarch.openshift.io/no-supported-arch"
ImageInspectionErrorLabel = "multiarch.openshift.io/image-inspect-error"
ImageInspectionErrorCountLabel = "multiarch.openshift.io/image-inspect-error-count"
LabelGroup = "multiarch.openshift.io"
)

Expand Down

0 comments on commit 37079cc

Please sign in to comment.