From 888cbb283e67d5cf1adc9e648b58c7c6097cbdb3 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Wed, 18 Oct 2023 11:23:56 +0800 Subject: [PATCH] add retry limit for excluded backoff type to avoid infinite retry (#1002) * add retry limit for excluded backoff type to avoid infinite retry Signed-off-by: cfzjywxk * change log Signed-off-by: cfzjywxk --------- Signed-off-by: cfzjywxk --- internal/retry/backoff.go | 11 +++++++++-- internal/retry/backoff_test.go | 12 ++++++++++++ internal/retry/config.go | 11 +++++++++-- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/internal/retry/backoff.go b/internal/retry/backoff.go index 19f6fca69..a2723e05b 100644 --- a/internal/retry/backoff.go +++ b/internal/retry/backoff.go @@ -143,7 +143,13 @@ func (b *Backoffer) BackoffWithCfgAndMaxSleep(cfg *Config, maxSleepMs int, err e if b.noop { return err } - if b.maxSleep > 0 && (b.totalSleep-b.excludedSleep) >= b.maxSleep { + maxBackoffTimeExceeded := (b.totalSleep - b.excludedSleep) >= b.maxSleep + maxExcludedTimeExceeded := false + if maxLimit, ok := isSleepExcluded[cfg.name]; ok { + maxExcludedTimeExceeded = b.excludedSleep >= maxLimit && b.excludedSleep >= b.maxSleep + } + maxTimeExceeded := maxBackoffTimeExceeded || maxExcludedTimeExceeded + if b.maxSleep > 0 && maxTimeExceeded { longestSleepCfg, longestSleepTime := b.longestSleepCfg() errMsg := fmt.Sprintf("%s backoffer.maxSleep %dms is exceeded, errors:", cfg.String(), b.maxSleep) for i, err := range b.errors { @@ -163,7 +169,8 @@ func (b *Backoffer) BackoffWithCfgAndMaxSleep(cfg *Config, maxSleepMs int, err e backoffDetail.WriteString(":") backoffDetail.WriteString(strconv.Itoa(times)) } - errMsg += fmt.Sprintf("\ntotal-backoff-times: %v, backoff-detail: %v", totalTimes, backoffDetail.String()) + errMsg += fmt.Sprintf("\ntotal-backoff-times: %v, backoff-detail: %v, maxBackoffTimeExceeded: %v, maxExcludedTimeExceeded: %v", + totalTimes, backoffDetail.String(), maxBackoffTimeExceeded, maxExcludedTimeExceeded) returnedErr := err if longestSleepCfg != nil { errMsg += fmt.Sprintf("\nlongest sleep type: %s, time: %dms", longestSleepCfg.String(), longestSleepTime) diff --git a/internal/retry/backoff_test.go b/internal/retry/backoff_test.go index 468306fbb..f630c34fd 100644 --- a/internal/retry/backoff_test.go +++ b/internal/retry/backoff_test.go @@ -98,3 +98,15 @@ func TestBackoffDeepCopy(t *testing.T) { assert.ErrorIs(t, err, BoMaxDataNotReady.err) } } + +func TestBackoffWithMaxExcludedExceed(t *testing.T) { + setBackoffExcluded(BoTiKVServerBusy.name, 1) + b := NewBackofferWithVars(context.TODO(), 1, nil) + err := b.Backoff(BoTiKVServerBusy, errors.New("server is busy")) + assert.Nil(t, err) + + // As the total excluded sleep is greater than the max limited value, error should be returned. + err = b.Backoff(BoTiKVServerBusy, errors.New("server is busy")) + assert.NotNil(t, err) + assert.Greater(t, b.excludedSleep, b.maxSleep) +} diff --git a/internal/retry/config.go b/internal/retry/config.go index 9c062cc75..17b524786 100644 --- a/internal/retry/config.go +++ b/internal/retry/config.go @@ -130,11 +130,18 @@ var ( BoTxnLockFast = NewConfig(txnLockFastName, &metrics.BackoffHistogramLockFast, NewBackoffFnCfg(2, 3000, EqualJitter), tikverr.ErrResolveLockTimeout) ) -var isSleepExcluded = map[string]struct{}{ - BoTiKVServerBusy.name: {}, +var isSleepExcluded = map[string]int{ + BoTiKVServerBusy.name: 600000, // The max excluded limit is 10min. // add BoTiFlashServerBusy if appropriate } +// setBackoffExcluded is used for test only. +func setBackoffExcluded(name string, maxVal int) { + if _, ok := isSleepExcluded[name]; ok { + isSleepExcluded[name] = maxVal + } +} + const ( // NoJitter makes the backoff sequence strict exponential. NoJitter = 1 + iota