Skip to content

Commit

Permalink
lease: fix goroutine starvation caused by test cluster setting
Browse files Browse the repository at this point in the history
When a test overrides sql.catalog.descriptor_lease_duration to 0, then
it was possible for the test to starve itself by creating a timer with
zero duration that is repeatedly reset.

The bug occurred since after refreshTimer expired, it would call the
jitteredLeaseDuration helper to get the new duration for the timer. If a
test had overriden sql.catalog.descriptor_lease_duration to 0, this
would cause the timer to keep being reset to 0 and starve other
goroutines.

Release note: None
  • Loading branch information
rafiss committed Aug 20, 2024
1 parent 8863a71 commit 4e8f012
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
17 changes: 10 additions & 7 deletions pkg/sql/catalog/lease/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -1586,17 +1586,20 @@ func (m *Manager) checkRangeFeedStatus(ctx context.Context) (forceRefresh bool)
// range feed progress / recovery, and supporting legacy expiry
// based leases.
func (m *Manager) RunBackgroundLeasingTask(ctx context.Context) {
_ = m.stopper.RunAsyncTask(ctx, "lease-refresher", func(ctx context.Context) {
refreshTimerDuration := LeaseDuration.Get(&m.storage.settings.SV)
renewalsDisabled := false
if refreshTimerDuration <= 0 {
renewalsDisabled := false
getRefreshTimerDuration := func() time.Duration {
if LeaseDuration.Get(&m.storage.settings.SV) <= 0 {
// Session based leasing still needs a refresh loop to expire
// leases, so we will execute that without any renewals.
refreshTimerDuration = time.Millisecond * 200
renewalsDisabled = true
return 200 * time.Millisecond
} else {
refreshTimerDuration = m.storage.jitteredLeaseDuration()
renewalsDisabled = false
return m.storage.jitteredLeaseDuration()
}
}
_ = m.stopper.RunAsyncTask(ctx, "lease-refresher", func(ctx context.Context) {
refreshTimerDuration := getRefreshTimerDuration()
var refreshTimer timeutil.Timer
defer refreshTimer.Stop()
refreshTimer.Reset(refreshTimerDuration / 2)
Expand Down Expand Up @@ -1633,7 +1636,7 @@ func (m *Manager) RunBackgroundLeasingTask(ctx context.Context) {
m.refreshSomeLeases(ctx, true /*refreshAll*/)
case <-refreshTimer.C:
refreshTimer.Read = true
refreshTimer.Reset(m.storage.jitteredLeaseDuration() / 2)
refreshTimer.Reset(getRefreshTimerDuration() / 2)

// Check for any react to any range feed availability problems, and
// if needed refresh the full set of descriptors.
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/catalog/lease/lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3675,7 +3675,7 @@ func TestLeaseDescriptorRangeFeedFailure(t *testing.T) {
// so the update is detected.
if p.Params.ExecutionPhase == scop.PostCommitPhase &&
enableAfterStageKnob.Load() &&
strings.Contains(p.Statements[0].Statement, "ADD COLUMN") {
strings.Contains(p.Statements[0].Statement, "ALTER TABLE t1 ADD COLUMN j INT DEFAULT 64") {
rangeFeedResetChan = srv.ApplicationLayer(1).LeaseManager().(*lease.Manager).TestingSetDisableRangeFeedCheckpointFn(true)
}
return nil
Expand All @@ -3685,7 +3685,7 @@ func TestLeaseDescriptorRangeFeedFailure(t *testing.T) {
// so the update is detected.
if p.Params.ExecutionPhase == scop.PostCommitPhase &&
enableAfterStageKnob.Load() &&
strings.Contains(p.Statements[0].Statement, "ADD COLUMN") {
strings.Contains(p.Statements[0].Statement, "ALTER TABLE t1 ADD COLUMN j INT DEFAULT 64") {
<-rangeFeedResetChan
srv.ApplicationLayer(1).LeaseManager().(*lease.Manager).TestingSetDisableRangeFeedCheckpointFn(false)
enableAfterStageKnob.Swap(false)
Expand Down

0 comments on commit 4e8f012

Please sign in to comment.