From 706d26548eb0b8f06760c4ed61d156af279fef4d Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Mon, 22 Jan 2024 13:25:51 -0500 Subject: [PATCH] KEP-4210: retarget to beta for 1.30 Signed-off-by: Peter Hunt --- keps/prod-readiness/sig-node/4210.yaml | 2 ++ keps/sig-node/4210-max-image-gc-age/README.md | 14 ++++++++------ keps/sig-node/4210-max-image-gc-age/kep.yaml | 5 +++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/keps/prod-readiness/sig-node/4210.yaml b/keps/prod-readiness/sig-node/4210.yaml index 3e54b0007cc..e45ce3c9bc2 100644 --- a/keps/prod-readiness/sig-node/4210.yaml +++ b/keps/prod-readiness/sig-node/4210.yaml @@ -1,3 +1,5 @@ kep-number: 4210 alpha: approver: "@johnbelamaric" +beta: + approver: "@johnbelamaric" diff --git a/keps/sig-node/4210-max-image-gc-age/README.md b/keps/sig-node/4210-max-image-gc-age/README.md index b08b352d57d..dc271cd6066 100644 --- a/keps/sig-node/4210-max-image-gc-age/README.md +++ b/keps/sig-node/4210-max-image-gc-age/README.md @@ -208,12 +208,14 @@ Additional tests will be added to this file to cover the garbage collection e2e. - Configuration field added to the Kubelet (disabled by default) - Feature supported by Kubelet Image Manager -- Unit tests and e2e tests added +- Unit tests - Add a metric `kubelet_image_garbage_collected_total` which tracks the number of images the kubelet is GC'ing through any mechanism. #### Beta -- Gather feedback from users +- Add e2e tests +- Document `kubelet_image_garbage_collected_total` (a step missed in alpha) +- Add "reason" field to `kubelet_image_garbage_collected_total` to allow distinguishing between GC reasons (space based or time based). #### GA @@ -276,8 +278,8 @@ removed, so no running workloads can be affected. ###### What specific metrics should inform a rollback? -- `kubelet_image_garbage_collected_total` metric drastically (100x) increasing, indicating thrashing of the GC manager and - images being pulled. +- `kubelet_image_garbage_collected_total` metric drastically (100x) increasing, with the "reason" field being "age", +indicating thrashing of the GC manager and images being pulled. ###### Were upgrade and rollback tested? Was the upgrade->downgrade->upgrade path tested? @@ -292,7 +294,7 @@ No. ###### How can an operator determine if the feature is in use by workloads? - Verify the Kubelet Configuration with the Kubelet's configz endpoint -- Monitor the `kubelet_image_garbage_collected_total`, and expect a slight increase. +- Monitor the `kubelet_image_garbage_collected_total`, and expect some images are removed for reason "age" ###### How can someone using this feature know that it is working for their instance? @@ -302,7 +304,6 @@ No. ###### What are the reasonable SLOs (Service Level Objectives) for the enhancement? - The eventual default value should increase the average `kubelet_image_garbage_collected_total` by no more than 10x - - TODO: On what clusters? ###### What are the SLIs (Service Level Indicators) an operator can use to determine the health of the service? @@ -370,6 +371,7 @@ No 2023-09-18: KEP opened, targeted at Alpha +2024-01-22: KEP updated to Beta ## Drawbacks diff --git a/keps/sig-node/4210-max-image-gc-age/kep.yaml b/keps/sig-node/4210-max-image-gc-age/kep.yaml index 63299d4b47f..c4dc73ce59f 100644 --- a/keps/sig-node/4210-max-image-gc-age/kep.yaml +++ b/keps/sig-node/4210-max-image-gc-age/kep.yaml @@ -14,16 +14,17 @@ approvers: # The target maturity stage in the current dev cycle for this KEP. -stage: alpha +stage: beta # The most recent milestone for which work toward delivery of this KEP has been # done. This can be the current (upcoming) milestone, if it is being actively # worked on. -latest-milestone: "v1.29" +latest-milestone: "v1.30" # The milestone at which this feature was, or is targeted to be, at each stage. milestone: alpha: "v1.29" + beta: "v1.30" # The following PRR answers are required at alpha release # List the feature gate name and the components for which it must be enabled