From 85739af70dbbbebca942df6c3946580b3fd11d31 Mon Sep 17 00:00:00 2001 From: Matthew Cary Date: Thu, 3 Nov 2022 13:50:33 -0700 Subject: [PATCH] Parameterize startup CRD wait retries Change-Id: I870a3d294b4d30abea3f6ecad951d68cfd5c9ceb --- README.md | 2 ++ cmd/snapshot-controller/main.go | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d2bbb8de5..ee196e534 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,8 @@ Other than this, the NODE_NAME environment variable must be set where the CSI sn * `--retry-interval-max`: Maximum retry interval of failed volume snapshot creation or deletion. Default value is 5 minutes. +* `--retry-crd-interval-max`: Maximum retry interval for detecting the snapshot CRDs on controller startup. Default is 5 seconds. + * `--enable-distributed-snapshotting` : Enables each node to handle snapshots for the volumes local to that node. Off by default. It should be set to true only if `--node-deployment` parameter for the csi external snapshotter sidecar is set to true. See https://github.com/kubernetes-csi/external-snapshotter/blob/master/README.md#distributed-snapshotting for details. * `--prevent-volume-mode-conversion`: Boolean that prevents an unauthorised user from modifying the volume mode when creating a PVC from an existing VolumeSnapshot. Only present as an alpha feature in `v6.0.0` and above. diff --git a/cmd/snapshot-controller/main.go b/cmd/snapshot-controller/main.go index 86794e6f2..72d71b2b8 100644 --- a/cmd/snapshot-controller/main.go +++ b/cmd/snapshot-controller/main.go @@ -20,6 +20,7 @@ import ( "context" "flag" "fmt" + "math" "net" "net/http" "os" @@ -71,6 +72,8 @@ var ( retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.") enableDistributedSnapshotting = flag.Bool("enable-distributed-snapshotting", false, "Enables each node to handle snapshotting for the local volumes created on that node") preventVolumeModeConversion = flag.Bool("prevent-volume-mode-conversion", false, "Prevents an unauthorised user from modifying the volume mode when creating a PVC from an existing VolumeSnapshot.") + + retryCRDIntervalMax = flag.Duration("retry-crd-interval-max", 5*time.Second, "Maximum retry interval to wait for CRDs to appear. The default is 5 seconds.") ) var version = "unknown" @@ -100,11 +103,22 @@ func ensureCustomResourceDefinitionsExist(client *clientset.Clientset) error { return true, nil } - // with a Factor of 1.5 we wait up to 7.5 seconds (the 10th attempt) + // The maximum retry duration = initial duration * retry factor ^ # steps. Rearranging, this gives + // # steps = log(maximum retry / initial duration) / log(retry factor). + const retryFactor = 1.5 + const initialDurationMs = 100 + maxMs := retryCRDIntervalMax.Milliseconds() + if maxMs < initialDurationMs { + maxMs = initialDurationMs + } + steps := int(math.Ceil(math.Log(float64(maxMs)/initialDurationMs) / math.Log(retryFactor))) + if steps < 1 { + steps = 1 + } backoff := wait.Backoff{ - Duration: 100 * time.Millisecond, - Factor: 1.5, - Steps: 10, + Duration: initialDurationMs * time.Millisecond, + Factor: retryFactor, + Steps: steps, } if err := wait.ExponentialBackoff(backoff, condition); err != nil { return err