From 968c1e1846704a682d3cffe0b0bddb28f23315b4 Mon Sep 17 00:00:00 2001 From: Ukri Niemimuukko <ukri.niemimuukko@intel.com> Date: Mon, 20 Feb 2023 16:22:29 +0200 Subject: [PATCH] randomize cleanup interval and increase it to 20 minutes Signed-off-by: Ukri Niemimuukko <ukri.niemimuukko@intel.com> --- cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go index 159e6de04..cfe936a10 100644 --- a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go +++ b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go @@ -16,6 +16,7 @@ package rm import ( "context" + "math/rand" "os" "sort" "strconv" @@ -141,17 +142,24 @@ func NewResourceManager(skipID, fullResourceName string) (ResourceManager, error prGetClientFunc: podresources.GetV1Client, assignments: make(map[string]podAssignmentDetails), retryTimeout: 1 * time.Second, - cleanupInterval: 2 * time.Minute, + cleanupInterval: 20 * time.Minute, } klog.Info("GPU device plugin resource manager enabled") go func() { - ticker := time.NewTicker(rm.cleanupInterval) + rand.Seed(time.Now().UnixNano()) + cleanupIntervalSeconds := rm.cleanupInterval.Seconds() + n := rand.Intn(int(cleanupIntervalSeconds)) + + ticker := time.NewTicker(rm.cleanupInterval/2 + time.Duration(n)*time.Second) for range ticker.C { klog.V(4).Info("Running cleanup") + n = rand.Intn(int(cleanupIntervalSeconds)) + ticker.Reset(rm.cleanupInterval/2 + time.Duration(n)*time.Second) + // Gather both running and pending pods. It might happen that // cleanup is triggered between GetPreferredAllocation and Allocate // and it would remove the assignment data for the soon-to-be allocated pod