Skip to content

Commit

Permalink
randomize cleanup interval and increase it to 20 minutes
Browse files Browse the repository at this point in the history
Signed-off-by: Ukri Niemimuukko <[email protected]>
  • Loading branch information
uniemimu authored and tkatila committed Mar 23, 2023
1 parent 54a27d7 commit 968c1e1
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package rm

import (
"context"
"math/rand"
"os"
"sort"
"strconv"
Expand Down Expand Up @@ -141,17 +142,24 @@ func NewResourceManager(skipID, fullResourceName string) (ResourceManager, error
prGetClientFunc: podresources.GetV1Client,
assignments: make(map[string]podAssignmentDetails),
retryTimeout: 1 * time.Second,
cleanupInterval: 2 * time.Minute,
cleanupInterval: 20 * time.Minute,
}

klog.Info("GPU device plugin resource manager enabled")

go func() {
ticker := time.NewTicker(rm.cleanupInterval)
rand.Seed(time.Now().UnixNano())
cleanupIntervalSeconds := rm.cleanupInterval.Seconds()
n := rand.Intn(int(cleanupIntervalSeconds))

ticker := time.NewTicker(rm.cleanupInterval/2 + time.Duration(n)*time.Second)

for range ticker.C {
klog.V(4).Info("Running cleanup")

n = rand.Intn(int(cleanupIntervalSeconds))
ticker.Reset(rm.cleanupInterval/2 + time.Duration(n)*time.Second)

// Gather both running and pending pods. It might happen that
// cleanup is triggered between GetPreferredAllocation and Allocate
// and it would remove the assignment data for the soon-to-be allocated pod
Expand Down

0 comments on commit 968c1e1

Please sign in to comment.