From 968c1e1846704a682d3cffe0b0bddb28f23315b4 Mon Sep 17 00:00:00 2001
From: Ukri Niemimuukko <ukri.niemimuukko@intel.com>
Date: Mon, 20 Feb 2023 16:22:29 +0200
Subject: [PATCH] randomize cleanup interval and increase it to 20 minutes

Signed-off-by: Ukri Niemimuukko <ukri.niemimuukko@intel.com>
---
 cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
index 159e6de04..cfe936a10 100644
--- a/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
+++ b/cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
@@ -16,6 +16,7 @@ package rm
 
 import (
 	"context"
+	"math/rand"
 	"os"
 	"sort"
 	"strconv"
@@ -141,17 +142,24 @@ func NewResourceManager(skipID, fullResourceName string) (ResourceManager, error
 		prGetClientFunc:  podresources.GetV1Client,
 		assignments:      make(map[string]podAssignmentDetails),
 		retryTimeout:     1 * time.Second,
-		cleanupInterval:  2 * time.Minute,
+		cleanupInterval:  20 * time.Minute,
 	}
 
 	klog.Info("GPU device plugin resource manager enabled")
 
 	go func() {
-		ticker := time.NewTicker(rm.cleanupInterval)
+		rand.Seed(time.Now().UnixNano())
+		cleanupIntervalSeconds := rm.cleanupInterval.Seconds()
+		n := rand.Intn(int(cleanupIntervalSeconds))
+
+		ticker := time.NewTicker(rm.cleanupInterval/2 + time.Duration(n)*time.Second)
 
 		for range ticker.C {
 			klog.V(4).Info("Running cleanup")
 
+			n = rand.Intn(int(cleanupIntervalSeconds))
+			ticker.Reset(rm.cleanupInterval/2 + time.Duration(n)*time.Second)
+
 			// Gather both running and pending pods. It might happen that
 			// cleanup is triggered between GetPreferredAllocation and Allocate
 			// and it would remove the assignment data for the soon-to-be allocated pod