From ed3f1204d5e2e1c03080aa2ab8dcd4a897458926 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 16:54:40 +0100 Subject: [PATCH 01/15] Write mutating webhook logic --- cmd/webhook/main.go | 29 +++ pkg/shared/shared.go | 79 +++++++++ .../shared_test.go} | 22 +-- pkg/webhook/webhook.go | 165 ++++++++++++++++++ 4 files changed, 284 insertions(+), 11 deletions(-) create mode 100644 cmd/webhook/main.go create mode 100644 pkg/shared/shared.go rename pkg/{informer/informer_test.go => shared/shared_test.go} (59%) create mode 100644 pkg/webhook/webhook.go diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go new file mode 100644 index 0000000..0fdb755 --- /dev/null +++ b/cmd/webhook/main.go @@ -0,0 +1,29 @@ +package main + +import ( + "flag" + + "github.com/norbjd/k8s-pod-cpu-booster/pkg/webhook" + "k8s.io/klog/v2" +) + +func main() { + klog.InitFlags(nil) + + var port uint + var pathToCertFile string + var pathToKeyFile string + + flag.UintVar(&port, "port", 8443, "listening port") + flag.StringVar(&pathToCertFile, "cert", "", "path to cert file") + flag.StringVar(&pathToKeyFile, "key", "", "path to key file") + + flag.Parse() + + // TODO: check pathToCertFile and pathToKeyFile are existing files + + err := webhook.Run(port, pathToCertFile, pathToKeyFile) + if err != nil { + klog.Fatal(err) + } +} diff --git a/pkg/shared/shared.go b/pkg/shared/shared.go new file mode 100644 index 0000000..147eb93 --- /dev/null +++ b/pkg/shared/shared.go @@ -0,0 +1,79 @@ +package shared + +import ( + "fmt" + "strconv" + + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" +) + +const ( + cpuBoostMultiplierLabel = "norbjd.github.io/k8s-pod-cpu-booster-multiplier" + cpuBoostDefaultMultiplier = uint64(10) + cpuBoostContainerNameLabel = "norbjd.github.io/k8s-pod-cpu-booster-container" +) + +type BoostInfo struct { + ContainerIndex int + ContainerName string + Multiplier uint64 +} + +func RetrieveBoostInfo(pod *corev1.Pod) (BoostInfo, error) { + containerIndex, containerName, err := getContainerToBoost(pod) + if err != nil { + return BoostInfo{}, err + } + + boostMultiplier := getBoostMultiplierFromLabels(pod) + + return BoostInfo{ + ContainerIndex: containerIndex, + ContainerName: containerName, + Multiplier: boostMultiplier, + }, nil +} + +func getBoostMultiplierFromLabels(pod *corev1.Pod) uint64 { + if boostMultiplierLabelValue, ok := pod.Labels[cpuBoostMultiplierLabel]; ok { + boostMultiplierLabelValueInt, err := strconv.ParseUint(boostMultiplierLabelValue, 10, 64) + if err != nil { + klog.Warningf("boost multiplier is not a valid value, will take the default %d instead: %s", + cpuBoostDefaultMultiplier, err.Error()) + return cpuBoostDefaultMultiplier + } + + return boostMultiplierLabelValueInt + } + + return cpuBoostDefaultMultiplier +} + +func getContainerToBoost(pod *corev1.Pod) (index int, name string, err error) { + containerNameToBoost := pod.Labels[cpuBoostContainerNameLabel] + containerIndex := -1 + + if containerNameToBoost == "" { + if len(pod.Spec.Containers) > 1 { + return 0, "", fmt.Errorf("pod %s/%s contains %d containers but label %s is unset", + pod.Namespace, pod.Name, len(pod.Spec.Containers), cpuBoostContainerNameLabel) + } else { + containerIndex = 0 + } + } else { + for i, container := range pod.Spec.Containers { + if container.Name == containerNameToBoost { + containerIndex = i + break + } + } + + if containerIndex == -1 { + return 0, "", fmt.Errorf("pod %s/%s contains no containers named %s (found in label %s)", + pod.Namespace, pod.Name, containerNameToBoost, cpuBoostContainerNameLabel) + } + } + + return containerIndex, containerNameToBoost, nil +} diff --git a/pkg/informer/informer_test.go b/pkg/shared/shared_test.go similarity index 59% rename from pkg/informer/informer_test.go rename to pkg/shared/shared_test.go index adac60e..e299765 100644 --- a/pkg/informer/informer_test.go +++ b/pkg/shared/shared_test.go @@ -1,4 +1,4 @@ -package informer +package shared import ( "fmt" @@ -10,24 +10,24 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func Test_getBoostMultiplierFromAnnotations(t *testing.T) { - t.Run("should take the default value if no annotation is provided", func(t *testing.T) { - boostMultiplier := getBoostMultiplierFromAnnotations(&corev1.Pod{ +func Test_getBoostMultiplierFromLabels(t *testing.T) { + t.Run("should take the default value if no label is provided", func(t *testing.T) { + boostMultiplier := getBoostMultiplierFromLabels(&corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Annotations: nil, + Labels: nil, }, }) assert.Equal(t, cpuBoostDefaultMultiplier, boostMultiplier) }) - t.Run("should take the value if annotation is valid", func(t *testing.T) { + t.Run("should take the value if label is valid", func(t *testing.T) { notDefaultValue := uint64(5) notDefaultValueString := fmt.Sprintf("%d", notDefaultValue) require.NotEqual(t, cpuBoostDefaultMultiplier, notDefaultValueString, "must not use the default value in that test!") - boostMultiplier := getBoostMultiplierFromAnnotations(&corev1.Pod{ + boostMultiplier := getBoostMultiplierFromLabels(&corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Annotations: map[string]string{ + Labels: map[string]string{ "norbjd.github.io/k8s-pod-cpu-booster-multiplier": notDefaultValueString, }, }, @@ -35,10 +35,10 @@ func Test_getBoostMultiplierFromAnnotations(t *testing.T) { assert.Equal(t, notDefaultValue, boostMultiplier) }) - t.Run("should fail if annotation value is invalid", func(t *testing.T) { - boostMultiplier := getBoostMultiplierFromAnnotations(&corev1.Pod{ + t.Run("should fail if label value is invalid", func(t *testing.T) { + boostMultiplier := getBoostMultiplierFromLabels(&corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Annotations: map[string]string{ + Labels: map[string]string{ "norbjd.github.io/k8s-pod-cpu-booster-multiplier": "not-a-valid-value", }, }, diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go new file mode 100644 index 0000000..b45b3b6 --- /dev/null +++ b/pkg/webhook/webhook.go @@ -0,0 +1,165 @@ +package webhook + +import ( + "crypto/tls" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/norbjd/k8s-pod-cpu-booster/pkg/shared" + admissionv1 "k8s.io/api/admission/v1" + v1 "k8s.io/api/admission/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/klog/v2" +) + +var deserializer = serializer.NewCodecFactory(runtime.NewScheme()).UniversalDeserializer() + +func admissionReviewFromRequest(r *http.Request, deserializer runtime.Decoder) (*admissionv1.AdmissionReview, error) { + if r.Header.Get("Content-Type") != "application/json" { + return nil, fmt.Errorf("expected application/json content-type") + } + + var body []byte + if r.Body != nil { + requestData, err := io.ReadAll(r.Body) + if err != nil { + return nil, err + } + body = requestData + } + + admissionReviewRequest := &admissionv1.AdmissionReview{} + if _, _, err := deserializer.Decode(body, nil, admissionReviewRequest); err != nil { + return nil, err + } + + return admissionReviewRequest, nil +} + +func mutatePod(w http.ResponseWriter, r *http.Request) { + klog.V(9).Infof("received message on mutate") + + admissionReviewRequest, err := admissionReviewFromRequest(r, deserializer) + if err != nil { + msg := "error getting admission review from request" + klog.ErrorS(err, msg) + w.WriteHeader(400) + w.Write([]byte(msg)) + return + } + + // Do server-side validation that we are only dealing with a pod resource. This + // should also be part of the MutatingWebhookConfiguration in the cluster, but + // we should verify here before continuing. + // TODO: also check the label enabling boosting is set + podResource := metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} + if admissionReviewRequest.Request.Resource != podResource { + errNotAPod := fmt.Errorf("did not receive pod, got %s", admissionReviewRequest.Request.Resource.Resource) + klog.ErrorS(errNotAPod, "") + w.WriteHeader(400) + w.Write([]byte(errNotAPod.Error())) + return + } + + // Decode the pod from the AdmissionReview. + rawRequest := admissionReviewRequest.Request.Object.Raw + pod := corev1.Pod{} + if _, _, err := deserializer.Decode(rawRequest, nil, &pod); err != nil { + msg := "error decoding raw pod" + klog.ErrorS(err, msg) + w.WriteHeader(500) + w.Write([]byte(msg)) + return + } + + boostInfo, err := shared.RetrieveBoostInfo(&pod) + if err != nil { + klog.ErrorS(err, "cannot get boost info") + w.WriteHeader(400) + w.Write([]byte(err.Error())) + return + } + + currentCPURequest := pod.Spec.Containers[boostInfo.ContainerIndex].Resources.Requests.Cpu() + currentCPULimit := pod.Spec.Containers[boostInfo.ContainerIndex].Resources.Limits.Cpu() + + newCPURequest := resource.NewScaledQuantity(currentCPURequest.ScaledValue(resource.Nano)*int64(boostInfo.Multiplier), resource.Nano) + newCPULimit := resource.NewScaledQuantity(currentCPULimit.ScaledValue(resource.Nano)*int64(boostInfo.Multiplier), resource.Nano) + + admissionResponse := &admissionv1.AdmissionResponse{} + patchType := v1.PatchTypeJSONPatch + patch := fmt.Sprintf(` + [ + { + "op": "add", + "path": "/metadata/labels/norbjd.github.io~1k8s-pod-cpu-booster-progress", + "value": "boosting" + }, + { + "op": "replace", + "path": "/spec/containers/%d/resources/requests/cpu", + "value": "%s" + }, + { + "op": "replace", + "path": "/spec/containers/%d/resources/limits/cpu", + "value": "%s" + } + ] + `, boostInfo.ContainerIndex, newCPURequest.String(), boostInfo.ContainerIndex, newCPULimit.String()) + + // TODO: in case of a pod from a Deployment or a knative Service, pod.Name is empty, why? + klog.Infof("Current CPU request/limit for %s/%s (container 0) is %s/%s, will set new CPU limit to %s/%s (boost by %d)", + pod.Namespace, pod.Name, currentCPURequest, currentCPULimit, newCPURequest, newCPULimit, boostInfo.Multiplier) + + admissionResponse.Allowed = true + admissionResponse.PatchType = &patchType + admissionResponse.Patch = []byte(patch) + + // Construct the response, which is just another AdmissionReview. + var admissionReviewResponse admissionv1.AdmissionReview + admissionReviewResponse.Response = admissionResponse + admissionReviewResponse.SetGroupVersionKind(admissionReviewRequest.GroupVersionKind()) + admissionReviewResponse.Response.UID = admissionReviewRequest.Request.UID + + resp, err := json.Marshal(admissionReviewResponse) + if err != nil { + msg := "error marshalling response json" + klog.ErrorS(err, msg) + w.WriteHeader(500) + w.Write([]byte(msg)) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write(resp) +} + +func Run(port uint, certFile, keyFile string) error { + cert, errLoadCert := tls.LoadX509KeyPair(certFile, keyFile) + if errLoadCert != nil { + return errLoadCert + } + + klog.Info("Starting webhook server") + http.HandleFunc("/mutate", mutatePod) + server := http.Server{ + Addr: fmt.Sprintf(":%d", port), + TLSConfig: &tls.Config{ + Certificates: []tls.Certificate{cert}, + }, + ErrorLog: klog.NewStandardLogger("INFO"), // TODO? + } + + if err := server.ListenAndServeTLS("", ""); err != nil { + return err + } + + return nil +} From ce604c29b7ec5447fd7ea2bfa2d611f2e9cda026 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 16:56:11 +0100 Subject: [PATCH 02/15] Rewrite informer code logic (only reset boost now) --- cmd/{ => informer}/main.go | 0 pkg/informer/informer.go | 151 ++++++------------------------------- 2 files changed, 25 insertions(+), 126 deletions(-) rename cmd/{ => informer}/main.go (100%) diff --git a/cmd/main.go b/cmd/informer/main.go similarity index 100% rename from cmd/main.go rename to cmd/informer/main.go diff --git a/pkg/informer/informer.go b/pkg/informer/informer.go index 4200b8d..5b085ec 100644 --- a/pkg/informer/informer.go +++ b/pkg/informer/informer.go @@ -4,10 +4,9 @@ import ( "context" "errors" "fmt" - "os" - "strconv" "github.com/google/go-cmp/cmp" + "github.com/norbjd/k8s-pod-cpu-booster/pkg/shared" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -21,13 +20,7 @@ import ( ) const ( - cpuBoostStartupAnnotation = "norbjd.github.io/k8s-pod-cpu-booster-enabled" - - cpuBoostMultiplierAnnotation = "norbjd.github.io/k8s-pod-cpu-booster-multiplier" - cpuBoostDefaultMultiplier = uint64(10) - - cpuBoostContainerNameAnnotation = "norbjd.github.io/k8s-pod-cpu-booster-container" - + cpuBoostStartupLabel = "norbjd.github.io/k8s-pod-cpu-booster-enabled" cpuBoostProgressLabelName = "norbjd.github.io/k8s-pod-cpu-booster-progress" cpuBoostInProgressLabelValue = "boosting" cpuBoostDoneLabelValue = "has-been-boosted" @@ -60,91 +53,43 @@ func Run(clientset *kubernetes.Clientset) { <-stopper } -// only check pods running on the current node, assumes NODE_NAME contains the name of the node -// only necessary if we want to deploy pod-cpu-booster as a DaemonSet, otherwise a simple Deployment with 1 replica (to avoid conflicts) would be enough +// only check pods with the CPU boost label set func podCPUBoosterTweakFunc() internalinterfaces.TweakListOptionsFunc { return func(opts *metav1.ListOptions) { - opts.FieldSelector = "spec.nodeName=" + os.Getenv("NODE_NAME") + opts.LabelSelector = metav1.FormatLabelSelector(&metav1.LabelSelector{ + MatchLabels: map[string]string{ + cpuBoostStartupLabel: "true", + }, + }) } } -func getBoostMultiplierFromAnnotations(pod *corev1.Pod) uint64 { - if boostMultiplierAnnotationValue, ok := pod.Annotations[cpuBoostMultiplierAnnotation]; ok { - boostMultiplierAnnotationValueInt, err := strconv.ParseUint(boostMultiplierAnnotationValue, 10, 64) - if err != nil { - klog.Errorf("boost multiplier is not a valid value, will take the default %d instead: %s", - cpuBoostDefaultMultiplier, err.Error()) - return cpuBoostDefaultMultiplier - } - - return boostMultiplierAnnotationValueInt - } - - return cpuBoostDefaultMultiplier -} - func onUpdate(clientset *kubernetes.Clientset, oldObj interface{}, newObj interface{}) { oldPod := oldObj.(*corev1.Pod) newPod := newObj.(*corev1.Pod) klog.Infof("pod %s/%s updated", newPod.Namespace, newPod.Name) klog.V(9).Info(cmp.Diff(oldPod, newPod)) - if podHasBoostAnnotation(newPod) { - if len(newPod.Status.ContainerStatuses) == 0 { - klog.Infof("pod %s/%s has no container statuses, skipping...", newPod.Namespace, newPod.Name) - return - } - - containerNameToBoost := newPod.Annotations[cpuBoostContainerNameAnnotation] - - containerIndex := -1 - - if containerNameToBoost == "" { - if len(newPod.Spec.Containers) > 1 { - klog.Warningf("pod %s/%s contains %d containers but annotation %s is unset, skipping...", - newPod.Namespace, newPod.Name, len(newPod.Spec.Containers), cpuBoostContainerNameAnnotation) - return - } else { - containerIndex = 0 - } - } else { - for i, container := range newPod.Spec.Containers { - if container.Name == containerNameToBoost { - containerIndex = i - break - } - } - - if containerIndex == -1 { - klog.Warningf("pod %s/%s contains no containers named %s (found in annotation %s), skipping...", - newPod.Namespace, newPod.Name, containerNameToBoost, cpuBoostContainerNameAnnotation) - return - } - } + if len(newPod.Status.ContainerStatuses) == 0 { + klog.Infof("pod %s/%s has no container statuses, skipping...", newPod.Namespace, newPod.Name) + return + } - boostMultiplier := getBoostMultiplierFromAnnotations(newPod) + boostInfo, err := shared.RetrieveBoostInfo(newPod) + if err != nil { + klog.ErrorS(err, "cannot retrieve boost info") + return + } - if podJustStartedAndNotReadyYet(newPod) { - klog.Infof("will boost %s/%s (container %s) CPU limit", newPod.Namespace, newPod.Name, containerNameToBoost) - err := boostCPU(clientset, newPod, containerIndex, boostMultiplier) - if err != nil { - klog.Errorf("error while boosting CPU: %s", err.Error()) - } - } else if podIsNowReadyAfterBoosting(newPod) { - klog.Infof("will reset %s/%s (container %s) CPU limit to default", newPod.Namespace, newPod.Name, containerNameToBoost) - err := resetCPUBoost(clientset, newPod, containerIndex, boostMultiplier) - if err != nil { - klog.Errorf("error while resetting CPU boost: %s", err.Error()) - } + if podIsNowReadyAfterBoosting(newPod) { + klog.Infof("will reset %s/%s (container %s) CPU limit to default", newPod.Namespace, newPod.Name, boostInfo.ContainerName) + err := resetCPUBoost(clientset, newPod, boostInfo.ContainerIndex, boostInfo.Multiplier) + if err != nil { + klog.Errorf("error while resetting CPU boost: %s", err.Error()) } } } -func podHasBoostAnnotation(pod *corev1.Pod) bool { - boost, ok := pod.Annotations[cpuBoostStartupAnnotation] - return ok && boost == "true" -} - func podIsNowReadyAfterBoosting(newPod *corev1.Pod) bool { for _, condition := range newPod.Status.Conditions { if condition.Type == "Ready" && condition.Status == "True" { @@ -161,27 +106,6 @@ func podIsNowReadyAfterBoosting(newPod *corev1.Pod) bool { return false } -func podJustStartedAndNotReadyYet(pod *corev1.Pod) bool { - // we have to wait until it's running before changing the CPU otherwise the behavior is undefined (caught this by experimenting) - return pod.Status.Phase == corev1.PodRunning && pod.Labels[cpuBoostProgressLabelName] == "" -} - -func boostCPU(clientset *kubernetes.Clientset, pod *corev1.Pod, containerIndex int, boostMultiplier uint64) error { - container := pod.Spec.Containers[containerIndex] - currentCPULimit := container.Resources.Limits.Cpu() - cpuLimitAfterBoost := resource.NewScaledQuantity(currentCPULimit.ScaledValue(resource.Nano)*int64(boostMultiplier), resource.Nano) - - klog.Infof("Current CPU limit for %s/%s (container %s) is %s, will set new CPU limit to %s", - pod.Namespace, pod.Name, container.Name, currentCPULimit, cpuLimitAfterBoost) - - err := writeCPULimit(clientset, pod, containerIndex, cpuLimitAfterBoost, boost) - if err != nil { - return err - } - - return nil -} - func resetCPUBoost(clientset *kubernetes.Clientset, pod *corev1.Pod, containerIndex int, boostMultiplier uint64) error { container := pod.Spec.Containers[containerIndex] currentCPULimit := container.Resources.Limits.Cpu() @@ -190,7 +114,7 @@ func resetCPUBoost(clientset *kubernetes.Clientset, pod *corev1.Pod, containerIn klog.Infof("Current CPU limit for %s/%s (container %s) is %s, will reset CPU limit to %s", pod.Namespace, pod.Name, container.Name, currentCPULimit, cpuLimitAfterReset) - err := writeCPULimit(clientset, pod, containerIndex, cpuLimitAfterReset, reset) + err := writeCPULimit(clientset, pod, containerIndex, cpuLimitAfterReset) if err != nil { return err } @@ -198,14 +122,7 @@ func resetCPUBoost(clientset *kubernetes.Clientset, pod *corev1.Pod, containerIn return nil } -type action int32 - -const ( - boost action = iota - reset -) - -func writeCPULimit(clientset *kubernetes.Clientset, pod *corev1.Pod, containerIndex int, cpuLimit *resource.Quantity, action action) error { +func writeCPULimit(clientset *kubernetes.Clientset, pod *corev1.Pod, containerIndex int, cpuLimit *resource.Quantity) error { ctx := context.Background() podsClient := clientset.CoreV1().Pods(pod.Namespace) @@ -215,25 +132,7 @@ func writeCPULimit(clientset *kubernetes.Clientset, pod *corev1.Pod, containerIn return fmt.Errorf("failed to get latest version of pod %s/%s: %v", pod.Namespace, pod.Name, getErr) } - if action == boost && result.Labels[cpuBoostProgressLabelName] == cpuBoostInProgressLabelValue { - klog.Info("Already in boosting process, skipping...") - return nil - } - - switch action { - case boost: - if result.Labels == nil { - result.Labels = make(map[string]string) - } - result.Labels[cpuBoostProgressLabelName] = cpuBoostInProgressLabelValue - case reset: - if result.Labels == nil { - result.Labels = make(map[string]string) - } - result.Labels[cpuBoostProgressLabelName] = cpuBoostDoneLabelValue - default: - return fmt.Errorf("unknown action: %d (expected %d or %d)", action, boost, reset) - } + result.Labels[cpuBoostProgressLabelName] = cpuBoostDoneLabelValue container := result.Spec.Containers[containerIndex] From 11f6dae3e384b04f474cb094a7deaa7640ef46f6 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 16:56:44 +0100 Subject: [PATCH 03/15] Use labels instead of annotations --- README.md | 18 +++++++++--------- examples/deployment-with-default-boost.yaml | 3 +-- examples/knative-service.yaml | 1 + examples/pod-with-default-boost.yaml | 2 +- ...-multiple-containers-and-default-boost.yaml | 2 +- examples/pod-with-small-boost.yaml | 2 +- test/e2e/deployment-with-default-boost.yaml | 3 +-- test/e2e/pod-with-default-boost.yaml | 2 +- 8 files changed, 16 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 266052b..2e4ef92 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,9 @@ Between startup and `Ready` status, the container benefits from a CPU boost (x10 It is deployed as a controller on every node (with a `DaemonSet`). It listens for every pod update; if a pod has `norbjd.github.io/k8s-pod-cpu-booster-enabled: "true"` label: it boosts the CPU at pod startup, and reset the CPU limit when the pod is ready. -The CPU boost can be configured with `norbjd.github.io/k8s-pod-cpu-booster-multiplier` annotation: +The CPU boost can be configured with `norbjd.github.io/k8s-pod-cpu-booster-multiplier` label: -- if specified, it will increase the CPU limit by `x`, where `x` is the value of the annotation (must be an unsigned integer) +- if specified, it will increase the CPU limit by `x`, where `x` is the value of the label (must be an unsigned integer) - if unspecified or invalid, it will increase the CPU limit by the default value (`10`) ## Install @@ -27,7 +27,7 @@ The CPU boost can be configured with `norbjd.github.io/k8s-pod-cpu-booster-multi Use `ko`. Example on a `kind` cluster: ```sh -KO_DOCKER_REPO=kind.local ko apply -f config/ +make --directory config/ --silent --no-print-directory build | KO_DOCKER_REPO=kind.local ko apply -f - ``` ## Test/Demo @@ -48,25 +48,25 @@ kind load docker-image python:3.11-alpine Install `k8s-pod-cpu-booster`: ```sh -KO_DOCKER_REPO=kind.local ko apply -f config/ +make --directory config/ --silent --no-print-directory build | KO_DOCKER_REPO=kind.local ko apply -f - ``` -Start two similar pods with low CPU limits and running `python -m http.server`, with a readiness probe configured to check when the http server is started. The only differences are the name (obviously), and the annotation `norbjd.github.io/k8s-pod-cpu-booster-enabled`: +Start two similar pods with low CPU limits and running `python -m http.server`, with a readiness probe configured to check when the http server is started. The only differences are the name (obviously), and the label `norbjd.github.io/k8s-pod-cpu-booster-enabled`: ```diff --- examples/pod-no-boost.yaml +++ examples/pod-with-default-boost.yaml @@ -4 +4,3 @@ - name: pod-no-boost -+ annotations: ++ labels: + norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" + name: pod-with-default-boost ``` > [!NOTE] -> The CPU boost multiplier can also be configured (see [`pod-with-small-boost.yaml`](https://github.com/norbjd/k8s-pod-cpu-booster/blob/main/examples/pod-with-small-boost.yaml)) by using the `norbjd.github.io/k8s-pod-cpu-booster-multiplier` annotation. +> The CPU boost multiplier can also be configured (see [`pod-with-small-boost.yaml`](https://github.com/norbjd/k8s-pod-cpu-booster/blob/main/examples/pod-with-small-boost.yaml)) by using the `norbjd.github.io/k8s-pod-cpu-booster-multiplier` label. -As a result, the pod `pod-with-default-boost` (with the annotation) will benefit from a CPU boost, but `pod-no-boost` won't: +As a result, the pod `pod-with-default-boost` (with the label) will benefit from a CPU boost, but `pod-no-boost` won't: ```sh kubectl apply -f examples/pod-no-boost.yaml -f examples/pod-with-default-boost.yaml @@ -101,7 +101,7 @@ Cleanup: ```sh kubectl delete -f examples/pod-no-boost.yaml -f examples/pod-with-default-boost.yaml -KO_DOCKER_REPO=kind.local ko delete -f config/ +make --directory config/ --silent --no-print-directory build | KO_DOCKER_REPO=kind.local ko delete -f - kind delete cluster ``` diff --git a/examples/deployment-with-default-boost.yaml b/examples/deployment-with-default-boost.yaml index 1aad20f..c727698 100644 --- a/examples/deployment-with-default-boost.yaml +++ b/examples/deployment-with-default-boost.yaml @@ -9,9 +9,8 @@ spec: app: deployment-with-default-boost template: metadata: - annotations: - norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" labels: + norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" app: deployment-with-default-boost spec: containers: diff --git a/examples/knative-service.yaml b/examples/knative-service.yaml index 5666fa0..f9d348a 100644 --- a/examples/knative-service.yaml +++ b/examples/knative-service.yaml @@ -10,6 +10,7 @@ spec: queue.sidecar.serving.knative.dev/cpu-resource-limit: "300m" queue.sidecar.serving.knative.dev/memory-resource-request: "10M" queue.sidecar.serving.knative.dev/memory-resource-limit: "10M" + labels: norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" norbjd.github.io/k8s-pod-cpu-booster-container: "user-container" spec: diff --git a/examples/pod-with-default-boost.yaml b/examples/pod-with-default-boost.yaml index d7be3a8..5774b34 100644 --- a/examples/pod-with-default-boost.yaml +++ b/examples/pod-with-default-boost.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Pod metadata: - annotations: + labels: norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" name: pod-with-default-boost spec: diff --git a/examples/pod-with-multiple-containers-and-default-boost.yaml b/examples/pod-with-multiple-containers-and-default-boost.yaml index 6bb06cc..3fd8f0c 100644 --- a/examples/pod-with-multiple-containers-and-default-boost.yaml +++ b/examples/pod-with-multiple-containers-and-default-boost.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Pod metadata: - annotations: + labels: norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" norbjd.github.io/k8s-pod-cpu-booster-container: "python" name: pod-with-multiple-containers-and-default-boost diff --git a/examples/pod-with-small-boost.yaml b/examples/pod-with-small-boost.yaml index 0a9b057..34e6e26 100644 --- a/examples/pod-with-small-boost.yaml +++ b/examples/pod-with-small-boost.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Pod metadata: - annotations: + labels: norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" norbjd.github.io/k8s-pod-cpu-booster-multiplier: "3" name: pod-with-small-boost diff --git a/test/e2e/deployment-with-default-boost.yaml b/test/e2e/deployment-with-default-boost.yaml index 1aad20f..c727698 100644 --- a/test/e2e/deployment-with-default-boost.yaml +++ b/test/e2e/deployment-with-default-boost.yaml @@ -9,9 +9,8 @@ spec: app: deployment-with-default-boost template: metadata: - annotations: - norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" labels: + norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" app: deployment-with-default-boost spec: containers: diff --git a/test/e2e/pod-with-default-boost.yaml b/test/e2e/pod-with-default-boost.yaml index d7be3a8..5774b34 100644 --- a/test/e2e/pod-with-default-boost.yaml +++ b/test/e2e/pod-with-default-boost.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Pod metadata: - annotations: + labels: norbjd.github.io/k8s-pod-cpu-booster-enabled: "true" name: pod-with-default-boost spec: From f9857898d10b204256e6868289d8c5a3930a9a3a Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 16:58:16 +0100 Subject: [PATCH 04/15] Change e2e tests to include the webhook --- .github/workflows/kind-e2e.yaml | 5 +++-- test/e2e-kind.sh | 9 +++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/kind-e2e.yaml b/.github/workflows/kind-e2e.yaml index 5b54629..64306ea 100644 --- a/.github/workflows/kind-e2e.yaml +++ b/.github/workflows/kind-e2e.yaml @@ -55,8 +55,9 @@ jobs: - name: Wait for Ready run: | - echo "Waiting for Pods to become ready" - kubectl wait pod --for=condition=Ready -n pod-cpu-booster -l name=pod-cpu-booster + echo "Waiting for k8s-pod-cpu-booster items to become ready" + kubectl wait pod --for=condition=Ready -n pod-cpu-booster -l app=pod-cpu-booster + kubectl wait pod --for=condition=Ready -n pod-cpu-booster -l app=mutating-webhook sleep 5 # because readiness probe is not accurate (Ready != informer is started), but sleeping is enough for now - name: Run e2e Tests diff --git a/test/e2e-kind.sh b/test/e2e-kind.sh index dbed6db..a8de0be 100755 --- a/test/e2e-kind.sh +++ b/test/e2e-kind.sh @@ -128,9 +128,14 @@ else # cgroup v2 fi fi -echo "Pod-cpu-booster logs" +echo "mutating-webhook logs" echo "====================" -kubectl logs --tail=-1 -n pod-cpu-booster -l name=pod-cpu-booster +kubectl logs --tail=-1 -n pod-cpu-booster -l app=mutating-webhook --prefix +echo "====================" + +echo "pod-cpu-boost-reset logs" +echo "====================" +kubectl logs --tail=-1 -n pod-cpu-booster -l app=pod-cpu-boost-reset --prefix echo "====================" kubectl delete \ From 60b4b2062d213d9a425b70cb26a66ee22f6e7723 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 16:58:39 +0100 Subject: [PATCH 05/15] Include webhook in config/ (and refacto with kustomize) --- config/.gitignore | 2 + config/200-rbac.yaml | 33 -------- config/300-pod-cpu-booster.yaml | 31 -------- config/GNUmakefile | 13 ++++ config/kustomization.yaml | 25 ++++++ config/mutating-webhook.yaml | 78 +++++++++++++++++++ config/{100-namespace.yaml => namespace.yaml} | 0 config/pod-cpu-boost-reset.yaml | 62 +++++++++++++++ 8 files changed, 180 insertions(+), 64 deletions(-) create mode 100644 config/.gitignore delete mode 100644 config/200-rbac.yaml delete mode 100644 config/300-pod-cpu-booster.yaml create mode 100644 config/GNUmakefile create mode 100644 config/kustomization.yaml create mode 100644 config/mutating-webhook.yaml rename config/{100-namespace.yaml => namespace.yaml} (100%) create mode 100644 config/pod-cpu-boost-reset.yaml diff --git a/config/.gitignore b/config/.gitignore new file mode 100644 index 0000000..be870b4 --- /dev/null +++ b/config/.gitignore @@ -0,0 +1,2 @@ +*.crt +*.key diff --git a/config/200-rbac.yaml b/config/200-rbac.yaml deleted file mode 100644 index a909bec..0000000 --- a/config/200-rbac.yaml +++ /dev/null @@ -1,33 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - namespace: pod-cpu-booster - name: pod-cpu-booster ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: pod-cpu-booster -rules: -- apiGroups: - - "" - resources: - - pods - verbs: - - list - - watch - - get - - update ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: pod-cpu-booster -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: pod-cpu-booster -subjects: -- kind: ServiceAccount - name: pod-cpu-booster - namespace: pod-cpu-booster diff --git a/config/300-pod-cpu-booster.yaml b/config/300-pod-cpu-booster.yaml deleted file mode 100644 index 14120ab..0000000 --- a/config/300-pod-cpu-booster.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: pod-cpu-booster - namespace: pod-cpu-booster -spec: - selector: - matchLabels: - name: pod-cpu-booster - template: - metadata: - labels: - name: pod-cpu-booster - spec: - containers: - - name: pod-cpu-booster - image: ko://github.com/norbjd/k8s-pod-cpu-booster/cmd - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - resources: - limits: - cpu: 100m - memory: 100Mi - requests: - cpu: 100m - memory: 100Mi - serviceAccountName: pod-cpu-booster - terminationGracePeriodSeconds: 0 # TODO: change for production environments diff --git a/config/GNUmakefile b/config/GNUmakefile new file mode 100644 index 0000000..6f12410 --- /dev/null +++ b/config/GNUmakefile @@ -0,0 +1,13 @@ +ca.key: + openssl genrsa -out ca.key 4096 + +ca.crt: + openssl req -new -x509 -key ca.key -out ca.crt -days 3650 -nodes -subj "/CN=my-self-signed-ca" + +tls.crt tls.key: + openssl req -x509 -CA ca.crt -CAkey ca.key -keyout tls.key -out tls.crt -sha256 -days 3650 -nodes -subj "/CN=mutating-webhook.pod-cpu-booster.svc" -addext "subjectAltName = DNS:mutating-webhook.pod-cpu-booster.svc" + +mutating-webhook-certs: ca.key ca.crt tls.crt + +build: mutating-webhook-certs + kustomize build . diff --git a/config/kustomization.yaml b/config/kustomization.yaml new file mode 100644 index 0000000..c9fda73 --- /dev/null +++ b/config/kustomization.yaml @@ -0,0 +1,25 @@ +--- +resources: + - mutating-webhook.yaml + - namespace.yaml + - pod-cpu-boost-reset.yaml +secretGenerator: + - namespace: pod-cpu-booster + name: mutating-webhook-certs + type: kubernetes.io/tls + files: + - ca.crt + - tls.crt + - tls.key +replacements: + - source: + kind: Secret + namespace: pod-cpu-booster + name: mutating-webhook-certs + fieldPath: "data.[ca.crt]" + targets: + - select: + kind: MutatingWebhookConfiguration + name: k8s-pod-cpu-booster + fieldPaths: + - webhooks.[name=k8s-pod-cpu-booster.norbjd.github.io].clientConfig.caBundle diff --git a/config/mutating-webhook.yaml b/config/mutating-webhook.yaml new file mode 100644 index 0000000..c7ff8fd --- /dev/null +++ b/config/mutating-webhook.yaml @@ -0,0 +1,78 @@ +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: k8s-pod-cpu-booster +webhooks: + - name: k8s-pod-cpu-booster.norbjd.github.io + clientConfig: + caBundle: "" # will be overriden by kustomization replacement + service: + namespace: pod-cpu-booster + name: mutating-webhook + path: /mutate + objectSelector: + matchExpressions: + # we don't want that creation of mutating-webhook pods triggers the webhook (otherwise pods won't start) + - key: app + operator: NotIn + values: + - mutating-webhook + - key: norbjd.github.io/k8s-pod-cpu-booster-enabled + operator: In + values: + - "true" + rules: + - apiGroups: [""] + apiVersions: ["v1"] + resources: ["pods"] + operations: ["CREATE"] + scope: Namespaced + sideEffects: None + admissionReviewVersions: ["v1"] +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mutating-webhook + namespace: pod-cpu-booster +spec: + replicas: 3 + selector: + matchLabels: + app: mutating-webhook + template: + metadata: + labels: + app: mutating-webhook + spec: + containers: + - name: mutating-webhook + image: ko://github.com/norbjd/k8s-pod-cpu-booster/cmd/webhook + args: + - -v=9 # TODO: remove + - -port=8443 + - -cert=/etc/certs/tls.crt + - -key=/etc/certs/tls.key + ports: + - containerPort: 8443 + volumeMounts: + - name: certs + mountPath: /etc/certs + readOnly: true + volumes: + - name: certs + secret: + secretName: mutating-webhook-certs +--- +apiVersion: v1 +kind: Service +metadata: + name: mutating-webhook + namespace: pod-cpu-booster +spec: + selector: + app: mutating-webhook + ports: + - port: 443 + targetPort: 8443 diff --git a/config/100-namespace.yaml b/config/namespace.yaml similarity index 100% rename from config/100-namespace.yaml rename to config/namespace.yaml diff --git a/config/pod-cpu-boost-reset.yaml b/config/pod-cpu-boost-reset.yaml new file mode 100644 index 0000000..c133af0 --- /dev/null +++ b/config/pod-cpu-boost-reset.yaml @@ -0,0 +1,62 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pod-cpu-boost-reset + namespace: pod-cpu-booster +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: pod-cpu-boost-reset +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - list + - watch + - get + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: pod-cpu-boost-reset +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: pod-cpu-boost-reset +subjects: + - kind: ServiceAccount + name: pod-cpu-boost-reset + namespace: pod-cpu-booster +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pod-cpu-boost-reset + namespace: pod-cpu-booster +spec: + replicas: 1 # for now, we don't support multiple replicas + selector: + matchLabels: + app: pod-cpu-boost-reset + template: + metadata: + labels: + app: pod-cpu-boost-reset + spec: + containers: + - name: pod-cpu-boost-reset + image: ko://github.com/norbjd/k8s-pod-cpu-booster/cmd/informer + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 100m + memory: 100Mi + serviceAccountName: pod-cpu-boost-reset + terminationGracePeriodSeconds: 0 # TODO: change for production environments From 325ef0dee43c979772c43361aeb73a67717e5e31 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 17:03:19 +0100 Subject: [PATCH 06/15] No need to check the object type in the admission review request --- pkg/webhook/webhook.go | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index b45b3b6..33382a4 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -12,7 +12,6 @@ import ( v1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/klog/v2" @@ -54,20 +53,8 @@ func mutatePod(w http.ResponseWriter, r *http.Request) { return } - // Do server-side validation that we are only dealing with a pod resource. This - // should also be part of the MutatingWebhookConfiguration in the cluster, but - // we should verify here before continuing. - // TODO: also check the label enabling boosting is set - podResource := metav1.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} - if admissionReviewRequest.Request.Resource != podResource { - errNotAPod := fmt.Errorf("did not receive pod, got %s", admissionReviewRequest.Request.Resource.Resource) - klog.ErrorS(errNotAPod, "") - w.WriteHeader(400) - w.Write([]byte(errNotAPod.Error())) - return - } - - // Decode the pod from the AdmissionReview. + // decode the pod from the AdmissionReview + // here, we are **sure** the object is a pod, because this is configured in the MutatingWebhookConfiguration's rules rawRequest := admissionReviewRequest.Request.Object.Raw pod := corev1.Pod{} if _, _, err := deserializer.Decode(rawRequest, nil, &pod); err != nil { From ca95830d8b9c9dd07a394cb71c878ebb9b6aff12 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 17:39:43 +0100 Subject: [PATCH 07/15] Make namespace configurable --- config/kustomization.yaml | 7 ++++--- config/mutating-webhook.yaml | 7 ++----- config/namespace.yaml | 3 ++- config/pod-cpu-boost-reset.yaml | 3 --- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/config/kustomization.yaml b/config/kustomization.yaml index c9fda73..b8fbbd4 100644 --- a/config/kustomization.yaml +++ b/config/kustomization.yaml @@ -1,11 +1,13 @@ --- +namespace: pod-cpu-booster resources: - mutating-webhook.yaml - namespace.yaml - pod-cpu-boost-reset.yaml secretGenerator: - - namespace: pod-cpu-booster - name: mutating-webhook-certs + - name: mutating-webhook-certs + options: + disableNameSuffixHash: true type: kubernetes.io/tls files: - ca.crt @@ -14,7 +16,6 @@ secretGenerator: replacements: - source: kind: Secret - namespace: pod-cpu-booster name: mutating-webhook-certs fieldPath: "data.[ca.crt]" targets: diff --git a/config/mutating-webhook.yaml b/config/mutating-webhook.yaml index c7ff8fd..00acdb5 100644 --- a/config/mutating-webhook.yaml +++ b/config/mutating-webhook.yaml @@ -6,9 +6,8 @@ metadata: webhooks: - name: k8s-pod-cpu-booster.norbjd.github.io clientConfig: - caBundle: "" # will be overriden by kustomization replacement - service: - namespace: pod-cpu-booster + caBundle: "" # will be overridden by kustomization replacement + service: # namespace field is overridden by the namespace defined in kustomization.yaml name: mutating-webhook path: /mutate objectSelector: @@ -35,7 +34,6 @@ apiVersion: apps/v1 kind: Deployment metadata: name: mutating-webhook - namespace: pod-cpu-booster spec: replicas: 3 selector: @@ -69,7 +67,6 @@ apiVersion: v1 kind: Service metadata: name: mutating-webhook - namespace: pod-cpu-booster spec: selector: app: mutating-webhook diff --git a/config/namespace.yaml b/config/namespace.yaml index 42afbd7..c1b7060 100644 --- a/config/namespace.yaml +++ b/config/namespace.yaml @@ -1,4 +1,5 @@ +--- apiVersion: v1 kind: Namespace metadata: - name: pod-cpu-booster + name: doesnt-matter # will be overridden by kustomize diff --git a/config/pod-cpu-boost-reset.yaml b/config/pod-cpu-boost-reset.yaml index c133af0..ee7fdb4 100644 --- a/config/pod-cpu-boost-reset.yaml +++ b/config/pod-cpu-boost-reset.yaml @@ -3,7 +3,6 @@ apiVersion: v1 kind: ServiceAccount metadata: name: pod-cpu-boost-reset - namespace: pod-cpu-booster --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -31,13 +30,11 @@ roleRef: subjects: - kind: ServiceAccount name: pod-cpu-boost-reset - namespace: pod-cpu-booster --- apiVersion: apps/v1 kind: Deployment metadata: name: pod-cpu-boost-reset - namespace: pod-cpu-booster spec: replicas: 1 # for now, we don't support multiple replicas selector: From c86b4b96d30a50b5042962d5d7812cb8b6b49af1 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 17:40:14 +0100 Subject: [PATCH 08/15] Generate self-signed certs for the webhook separately --- .github/workflows/kind-e2e.yaml | 3 ++- README.md | 9 ++++++--- config/GNUmakefile | 16 ++++++++-------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/kind-e2e.yaml b/.github/workflows/kind-e2e.yaml index 64306ea..eebf2bc 100644 --- a/.github/workflows/kind-e2e.yaml +++ b/.github/workflows/kind-e2e.yaml @@ -51,7 +51,8 @@ jobs: - name: Install k8s-pod-cpu-booster run: | - ko apply -f config/ + make --directory config/ --silent --no-print-directory mutating-webhook-certs + kustomize build config/ | ko apply -f - - name: Wait for Ready run: | diff --git a/README.md b/README.md index 2e4ef92..7bca33f 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,8 @@ The CPU boost can be configured with `norbjd.github.io/k8s-pod-cpu-booster-multi Use `ko`. Example on a `kind` cluster: ```sh -make --directory config/ --silent --no-print-directory build | KO_DOCKER_REPO=kind.local ko apply -f - +make --directory config/ --silent --no-print-directory mutating-webhook-certs # generates self-signed certificates for the webhook +kustomize build config/ | KO_DOCKER_REPO=kind.local ko apply -f - ``` ## Test/Demo @@ -48,7 +49,8 @@ kind load docker-image python:3.11-alpine Install `k8s-pod-cpu-booster`: ```sh -make --directory config/ --silent --no-print-directory build | KO_DOCKER_REPO=kind.local ko apply -f - +make --directory config/ --silent --no-print-directory mutating-webhook-certs # generates self-signed certificates for the webhook +kustomize build config/ | KO_DOCKER_REPO=kind.local ko apply -f - ``` Start two similar pods with low CPU limits and running `python -m http.server`, with a readiness probe configured to check when the http server is started. The only differences are the name (obviously), and the label `norbjd.github.io/k8s-pod-cpu-booster-enabled`: @@ -101,7 +103,8 @@ Cleanup: ```sh kubectl delete -f examples/pod-no-boost.yaml -f examples/pod-with-default-boost.yaml -make --directory config/ --silent --no-print-directory build | KO_DOCKER_REPO=kind.local ko delete -f - +kustomize build config/ | KO_DOCKER_REPO=kind.local ko delete -f - +make --directory config/ --silent --no-print-directory remove-certs kind delete cluster ``` diff --git a/config/GNUmakefile b/config/GNUmakefile index 6f12410..15abdc3 100644 --- a/config/GNUmakefile +++ b/config/GNUmakefile @@ -1,13 +1,13 @@ -ca.key: - openssl genrsa -out ca.key 4096 +# namespace is used in the certificate's CN and SAN +NAMESPACE := $(shell grep 'namespace: ' kustomization.yaml | cut -d ' ' -f 2) -ca.crt: +mutating-webhook-certs: + openssl genrsa -out ca.key 4096 openssl req -new -x509 -key ca.key -out ca.crt -days 3650 -nodes -subj "/CN=my-self-signed-ca" + openssl req -x509 -CA ca.crt -CAkey ca.key -keyout tls.key -out tls.crt -sha256 -days 3650 -nodes -subj "/CN=mutating-webhook.${NAMESPACE}.svc" -addext "subjectAltName = DNS:mutating-webhook.${NAMESPACE}.svc" -tls.crt tls.key: - openssl req -x509 -CA ca.crt -CAkey ca.key -keyout tls.key -out tls.crt -sha256 -days 3650 -nodes -subj "/CN=mutating-webhook.pod-cpu-booster.svc" -addext "subjectAltName = DNS:mutating-webhook.pod-cpu-booster.svc" - -mutating-webhook-certs: ca.key ca.crt tls.crt +remove-certs: + rm -f *.key *.crt -build: mutating-webhook-certs +build: kustomize build . From 6db393390bd23bd11594800a0a6142474500e5a4 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 17:40:34 +0100 Subject: [PATCH 09/15] Fix flaky e2e test --- test/e2e-kind.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/e2e-kind.sh b/test/e2e-kind.sh index a8de0be..da4ddcb 100755 --- a/test/e2e-kind.sh +++ b/test/e2e-kind.sh @@ -43,6 +43,10 @@ exit_code=0 # pods with default boosts should start times quicker than pods with no boost ready_time_minimum_ratio=2 +# avoid divisions by zero by adding 1 second, shouldn't affect the result +pod_with_boost_seconds_to_be_ready=$(($pod_with_boost_seconds_to_be_ready + 1)) +deployment_with_boost_pod_seconds_to_be_ready=$(($deployment_with_boost_pod_seconds_to_be_ready + 1)) + if [ $(( $pod_no_boost_seconds_to_be_ready / $pod_with_boost_seconds_to_be_ready )) -ge $ready_time_minimum_ratio ] then echo -e "\033[0;32m[SUCCESS]\033[0m pod-with-default-boost started more than $ready_time_minimum_ratio times quicker than pod-no-boost" From 8e1163b6498a5c7fb520de3d92ad33860a7e019f Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 17:41:39 +0100 Subject: [PATCH 10/15] "make" command options --- .github/workflows/kind-e2e.yaml | 2 +- README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/kind-e2e.yaml b/.github/workflows/kind-e2e.yaml index eebf2bc..aa1c287 100644 --- a/.github/workflows/kind-e2e.yaml +++ b/.github/workflows/kind-e2e.yaml @@ -51,7 +51,7 @@ jobs: - name: Install k8s-pod-cpu-booster run: | - make --directory config/ --silent --no-print-directory mutating-webhook-certs + make --directory config/ mutating-webhook-certs kustomize build config/ | ko apply -f - - name: Wait for Ready diff --git a/README.md b/README.md index 7bca33f..dfb412c 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ The CPU boost can be configured with `norbjd.github.io/k8s-pod-cpu-booster-multi Use `ko`. Example on a `kind` cluster: ```sh -make --directory config/ --silent --no-print-directory mutating-webhook-certs # generates self-signed certificates for the webhook +make --directory config/ mutating-webhook-certs # generates self-signed certificates for the webhook kustomize build config/ | KO_DOCKER_REPO=kind.local ko apply -f - ``` @@ -49,7 +49,7 @@ kind load docker-image python:3.11-alpine Install `k8s-pod-cpu-booster`: ```sh -make --directory config/ --silent --no-print-directory mutating-webhook-certs # generates self-signed certificates for the webhook +make --directory config/ mutating-webhook-certs # generates self-signed certificates for the webhook kustomize build config/ | KO_DOCKER_REPO=kind.local ko apply -f - ``` @@ -104,7 +104,7 @@ Cleanup: kubectl delete -f examples/pod-no-boost.yaml -f examples/pod-with-default-boost.yaml kustomize build config/ | KO_DOCKER_REPO=kind.local ko delete -f - -make --directory config/ --silent --no-print-directory remove-certs +make --directory config/ remove-certs kind delete cluster ``` From 5b126aaeb47bc2f92e12129a3ef6d6a12815c1d7 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sat, 16 Mar 2024 17:51:54 +0100 Subject: [PATCH 11/15] Fix TODOs --- cmd/webhook/main.go | 2 -- config/mutating-webhook.yaml | 2 +- pkg/webhook/webhook.go | 10 +++++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go index 0fdb755..64818ef 100644 --- a/cmd/webhook/main.go +++ b/cmd/webhook/main.go @@ -20,8 +20,6 @@ func main() { flag.Parse() - // TODO: check pathToCertFile and pathToKeyFile are existing files - err := webhook.Run(port, pathToCertFile, pathToKeyFile) if err != nil { klog.Fatal(err) diff --git a/config/mutating-webhook.yaml b/config/mutating-webhook.yaml index 00acdb5..19fc97c 100644 --- a/config/mutating-webhook.yaml +++ b/config/mutating-webhook.yaml @@ -48,7 +48,7 @@ spec: - name: mutating-webhook image: ko://github.com/norbjd/k8s-pod-cpu-booster/cmd/webhook args: - - -v=9 # TODO: remove + - -v=9 - -port=8443 - -cert=/etc/certs/tls.crt - -key=/etc/certs/tls.key diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index 33382a4..c4007f9 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -101,9 +101,13 @@ func mutatePod(w http.ResponseWriter, r *http.Request) { ] `, boostInfo.ContainerIndex, newCPURequest.String(), boostInfo.ContainerIndex, newCPULimit.String()) - // TODO: in case of a pod from a Deployment or a knative Service, pod.Name is empty, why? + podName := pod.Name + if podName == "" { + podName = pod.GenerateName + "" + } + klog.Infof("Current CPU request/limit for %s/%s (container 0) is %s/%s, will set new CPU limit to %s/%s (boost by %d)", - pod.Namespace, pod.Name, currentCPURequest, currentCPULimit, newCPURequest, newCPULimit, boostInfo.Multiplier) + pod.Namespace, podName, currentCPURequest, currentCPULimit, newCPURequest, newCPULimit, boostInfo.Multiplier) admissionResponse.Allowed = true admissionResponse.PatchType = &patchType @@ -141,7 +145,7 @@ func Run(port uint, certFile, keyFile string) error { TLSConfig: &tls.Config{ Certificates: []tls.Certificate{cert}, }, - ErrorLog: klog.NewStandardLogger("INFO"), // TODO? + ErrorLog: klog.NewStandardLogger("INFO"), } if err := server.ListenAndServeTLS("", ""); err != nil { From f13892b4d710e36eb539568fae6b81d03fc91342 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sun, 17 Mar 2024 15:13:55 +0100 Subject: [PATCH 12/15] README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dfb412c..5ab8e86 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,10 @@ Between startup and `Ready` status, the container benefits from a CPU boost (x10 ## How does it work? -It is deployed as a controller on every node (with a `DaemonSet`). It listens for every pod update; if a pod has `norbjd.github.io/k8s-pod-cpu-booster-enabled: "true"` label: it boosts the CPU at pod startup, and reset the CPU limit when the pod is ready. +It is deployed in two parts: + +- a mutating webhook boosting the CPU of pods with `norbjd.github.io/k8s-pod-cpu-booster-enabled: "true"` label, before they are submitted to k8s API +- a controller listening for every update of pods with `norbjd.github.io/k8s-pod-cpu-booster-enabled: "true"` label; when a pod is ready, it will reset its CPU limit The CPU boost can be configured with `norbjd.github.io/k8s-pod-cpu-booster-multiplier` label: From 65242152f4a4551279c01a0ebf5c2f560bfd7523 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sun, 17 Mar 2024 15:16:26 +0100 Subject: [PATCH 13/15] Remove unnecessary changes to not bloat this PR --- test/e2e-kind.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/e2e-kind.sh b/test/e2e-kind.sh index da4ddcb..a8de0be 100755 --- a/test/e2e-kind.sh +++ b/test/e2e-kind.sh @@ -43,10 +43,6 @@ exit_code=0 # pods with default boosts should start times quicker than pods with no boost ready_time_minimum_ratio=2 -# avoid divisions by zero by adding 1 second, shouldn't affect the result -pod_with_boost_seconds_to_be_ready=$(($pod_with_boost_seconds_to_be_ready + 1)) -deployment_with_boost_pod_seconds_to_be_ready=$(($deployment_with_boost_pod_seconds_to_be_ready + 1)) - if [ $(( $pod_no_boost_seconds_to_be_ready / $pod_with_boost_seconds_to_be_ready )) -ge $ready_time_minimum_ratio ] then echo -e "\033[0;32m[SUCCESS]\033[0m pod-with-default-boost started more than $ready_time_minimum_ratio times quicker than pod-no-boost" From 0f8f416358ed1a89260cd2d289a3da4126b21cb2 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sun, 17 Mar 2024 15:38:51 +0100 Subject: [PATCH 14/15] Fix bad label in kind-e2e.yaml --- .github/workflows/kind-e2e.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/kind-e2e.yaml b/.github/workflows/kind-e2e.yaml index aa1c287..6a7d506 100644 --- a/.github/workflows/kind-e2e.yaml +++ b/.github/workflows/kind-e2e.yaml @@ -57,7 +57,7 @@ jobs: - name: Wait for Ready run: | echo "Waiting for k8s-pod-cpu-booster items to become ready" - kubectl wait pod --for=condition=Ready -n pod-cpu-booster -l app=pod-cpu-booster + kubectl wait pod --for=condition=Ready -n pod-cpu-booster -l app=pod-cpu-boost-reset kubectl wait pod --for=condition=Ready -n pod-cpu-booster -l app=mutating-webhook sleep 5 # because readiness probe is not accurate (Ready != informer is started), but sleeping is enough for now From 4a103e6002ba2bcee9399c49d1a874a4b501f1a7 Mon Sep 17 00:00:00 2001 From: norbjd Date: Sun, 17 Mar 2024 15:56:04 +0100 Subject: [PATCH 15/15] Temporarily disable ubuntu-20.04 in e2e test matrix (openssl incompatibility, will fix later) --- .github/workflows/kind-e2e.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/kind-e2e.yaml b/.github/workflows/kind-e2e.yaml index 6a7d506..5bfe215 100644 --- a/.github/workflows/kind-e2e.yaml +++ b/.github/workflows/kind-e2e.yaml @@ -22,7 +22,7 @@ jobs: - v1.27.x - v1.28.x os: - - ubuntu-20.04 # Ubuntu 20.04 uses cgroup v1 + # - ubuntu-20.04 # Ubuntu 20.04 uses cgroup v1 # TODO: temporarily disabled because of openssl commands incompatibility (1.1.1 version vs 3.x) - ubuntu-22.04 # Ubuntu 22.04 uses cgroup v2 runs-on: ${{ matrix.os }}