Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KEP-4210: add e2e tests and add small fix for ImageGCMaxAge #123343

Merged
merged 5 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions pkg/kubelet/images/image_gc_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ type StatsProvider interface {
type ImageGCManager interface {
// Applies the garbage collection policy. Errors include being unable to free
// enough space as per the garbage collection policy.
GarbageCollect(ctx context.Context) error
GarbageCollect(ctx context.Context, beganGC time.Time) error

// Start async garbage collection of images.
Start()
Expand Down Expand Up @@ -301,7 +301,7 @@ func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.
return imagesInUse, nil
}

func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
func (im *realImageGCManager) GarbageCollect(ctx context.Context, beganGC time.Time) error {
ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect")
defer otelSpan.End()

Expand All @@ -311,7 +311,7 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
return err
}

images, err = im.freeOldImages(ctx, images, freeTime)
images, err = im.freeOldImages(ctx, images, freeTime, beganGC)
if err != nil {
return err
}
Expand Down Expand Up @@ -362,10 +362,16 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
return nil
}

func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime time.Time) ([]evictionInfo, error) {
func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime, beganGC time.Time) ([]evictionInfo, error) {
if im.policy.MaxAge == 0 {
return images, nil
}

// Wait until the MaxAge has passed since the Kubelet has started,
// or else we risk prematurely garbage collecting images.
if freeTime.Sub(beganGC) <= im.policy.MaxAge {
return images, nil
}
var deletionErrors []error
remainingImages := make([]evictionInfo, 0)
for _, image := range images {
Expand Down
19 changes: 11 additions & 8 deletions pkg/kubelet/images/image_gc_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ func TestGarbageCollectBelowLowThreshold(t *testing.T) {
}
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageStats, imageStats, nil)

assert.NoError(t, manager.GarbageCollect(ctx))
assert.NoError(t, manager.GarbageCollect(ctx, time.Now()))
}

func TestGarbageCollectCadvisorFailure(t *testing.T) {
Expand All @@ -674,7 +674,7 @@ func TestGarbageCollectCadvisorFailure(t *testing.T) {
manager, _ := newRealImageGCManager(policy, mockStatsProvider)

mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, &statsapi.FsStats{}, fmt.Errorf("error"))
assert.NotNil(t, manager.GarbageCollect(ctx))
assert.NotNil(t, manager.GarbageCollect(ctx, time.Now()))
}

func TestGarbageCollectBelowSuccess(t *testing.T) {
Expand All @@ -699,7 +699,7 @@ func TestGarbageCollectBelowSuccess(t *testing.T) {
makeImage(0, 450),
}

assert.NoError(t, manager.GarbageCollect(ctx))
assert.NoError(t, manager.GarbageCollect(ctx, time.Now()))
}

func TestGarbageCollectNotEnoughFreed(t *testing.T) {
Expand All @@ -723,7 +723,7 @@ func TestGarbageCollectNotEnoughFreed(t *testing.T) {
makeImage(0, 50),
}

assert.NotNil(t, manager.GarbageCollect(ctx))
assert.NotNil(t, manager.GarbageCollect(ctx, time.Now()))
}

func TestGarbageCollectImageNotOldEnough(t *testing.T) {
Expand Down Expand Up @@ -824,14 +824,15 @@ func TestGarbageCollectImageTooOld(t *testing.T) {

// First GC round should not GC remaining image, as it was used too recently.
assert := assert.New(t)
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
oldStartTime := fakeClock.Now()
images, err = manager.freeOldImages(ctx, images, oldStartTime, oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)

// move clock by a millisecond past maxAge duration, then 1 image will be garbage collected
fakeClock.Step(policy.MaxAge + 1)
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, fakeClock.Now(), oldStartTime)
require.NoError(t, err)
assert.Len(images, 0)
assert.Len(fakeRuntime.ImageList, 1)
Expand Down Expand Up @@ -879,16 +880,18 @@ func TestGarbageCollectImageMaxAgeDisabled(t *testing.T) {
require.Equal(t, len(images), 1)
assert.Len(fakeRuntime.ImageList, 2)

oldStartTime := fakeClock.Now()

// First GC round should not GC remaining image, as it was used too recently.
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, oldStartTime, oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)

// Move clock by a lot, and the images should continue to not be garbage colleced
// See https://stackoverflow.com/questions/25065055/what-is-the-maximum-time-time-in-go
fakeClock.SetTime(time.Unix(1<<63-62135596801, 999999999))
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, fakeClock.Now(), oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)
Expand Down
11 changes: 7 additions & 4 deletions pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -1433,16 +1433,19 @@ func (kl *Kubelet) StartGarbageCollection() {
}
}, ContainerGCPeriod, wait.NeverStop)

// when the high threshold is set to 100, stub the image GC manager
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
klog.V(2).InfoS("ImageGCHighThresholdPercent is set 100, Disable image GC")
// when the high threshold is set to 100, and the max age is 0 (or the max age feature is disabled)
// stub the image GC manager
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 &&
(!utilfeature.DefaultFeatureGate.Enabled(features.ImageMaximumGCAge) || kl.kubeletConfiguration.ImageMaximumGCAge.Duration == 0) {
klog.V(2).InfoS("ImageGCHighThresholdPercent is set 100 and ImageMaximumGCAge is 0, Disable image GC")
return
}

prevImageGCFailed := false
beganGC := time.Now()
go wait.Until(func() {
ctx := context.Background()
if err := kl.imageManager.GarbageCollect(ctx); err != nil {
if err := kl.imageManager.GarbageCollect(ctx, beganGC); err != nil {
if prevImageGCFailed {
klog.ErrorS(err, "Image garbage collection failed multiple times in a row")
// Only create an event for repeated failures
Expand Down
117 changes: 117 additions & 0 deletions test/e2e_node/image_gc_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
Copyright 2024 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package e2enode

import (
"context"
"time"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
kubefeatures "k8s.io/kubernetes/pkg/features"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
admissionapi "k8s.io/pod-security-admission/api"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
)

const (
// Kubelet GC's on a frequency of once every 5 minutes
// Add a little leeway to give it time.
checkGCUntil time.Duration = 6 * time.Minute
checkGCFreq time.Duration = 30 * time.Second
)

var _ = SIGDescribe("ImageGarbageCollect", framework.WithSerial(), framework.WithNodeFeature("GarbageCollect"), func() {
f := framework.NewDefaultFramework("image-garbage-collect-test")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
var is internalapi.ImageManagerService
ginkgo.BeforeEach(func() {
var err error
_, is, err = getCRIClient()
framework.ExpectNoError(err)
})
ginkgo.AfterEach(func() {
framework.ExpectNoError(PrePullAllImages())
})
ginkgo.Context("when ImageMaximumGCAge is set", func() {
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
initialConfig.ImageMaximumGCAge = metav1.Duration{Duration: time.Duration(time.Minute * 1)}
initialConfig.ImageMinimumGCAge = metav1.Duration{Duration: time.Duration(time.Second * 1)}
if initialConfig.FeatureGates == nil {
initialConfig.FeatureGates = make(map[string]bool)
}
initialConfig.FeatureGates[string(kubefeatures.ImageMaximumGCAge)] = true
})
ginkgo.It("should GC unused images", func(ctx context.Context) {
pod := innocentPod()
e2epod.NewPodClient(f).CreateBatch(ctx, []*v1.Pod{pod})

_, err := is.PullImage(context.Background(), &runtimeapi.ImageSpec{Image: agnhostImage}, nil, nil)
framework.ExpectNoError(err)

allImages, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)

e2epod.NewPodClient(f).DeleteSync(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)

// Even though the image gc max timing is less, we are bound by the kubelet's
haircommander marked this conversation as resolved.
Show resolved Hide resolved
// ImageGCPeriod, which is hardcoded to 5 minutes.
gomega.Eventually(ctx, func() int {
gcdImageList, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)
return len(gcdImageList)
}, checkGCUntil, checkGCFreq).Should(gomega.BeNumerically("<", len(allImages)))
})
ginkgo.It("should not GC unused images prematurely", func(ctx context.Context) {
pod := innocentPod()
e2epod.NewPodClient(f).CreateBatch(ctx, []*v1.Pod{pod})

_, err := is.PullImage(context.Background(), &runtimeapi.ImageSpec{Image: agnhostImage}, nil, nil)
framework.ExpectNoError(err)

allImages, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)

e2epod.NewPodClient(f).DeleteSync(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)

restartKubelet(true)
haircommander marked this conversation as resolved.
Show resolved Hide resolved
waitForKubeletToStart(ctx, f)

// Wait until the maxAge of the image after the kubelet is restarted to ensure it doesn't
// GC too early.
gomega.Consistently(ctx, func() int {
gcdImageList, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)
return len(gcdImageList)
}, 50*time.Second, 10*time.Second).Should(gomega.Equal(len(allImages)))

// Even though the image gc max timing is less, we are bound by the kubelet's
// ImageGCPeriod, which is hardcoded to 5 minutes.
gomega.Eventually(ctx, func() int {
gcdImageList, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)
return len(gcdImageList)
}, checkGCUntil, checkGCFreq).Should(gomega.BeNumerically("<", len(allImages)))
})
})
})
4 changes: 4 additions & 0 deletions test/e2e_node/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ import (
var startServices = flag.Bool("start-services", true, "If true, start local node services")
var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
var busyboxImage = imageutils.GetE2EImage(imageutils.BusyBox)
var agnhostImage = imageutils.GetE2EImage(imageutils.Agnhost)

const (
// Kubelet internal cgroup name for node allocatable cgroup.
Expand Down Expand Up @@ -233,7 +234,10 @@ func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletCon

ginkgo.By("Starting the kubelet")
startKubelet()
waitForKubeletToStart(ctx, f)
}

func waitForKubeletToStart(ctx context.Context, f *framework.Framework) {
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
Expand Down