From 4e8f01263796bbbd30f4aaf1df6ec2927e9cbd1f Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Tue, 20 Aug 2024 06:07:02 +0000 Subject: [PATCH] lease: fix goroutine starvation caused by test cluster setting When a test overrides sql.catalog.descriptor_lease_duration to 0, then it was possible for the test to starve itself by creating a timer with zero duration that is repeatedly reset. The bug occurred since after refreshTimer expired, it would call the jitteredLeaseDuration helper to get the new duration for the timer. If a test had overriden sql.catalog.descriptor_lease_duration to 0, this would cause the timer to keep being reset to 0 and starve other goroutines. Release note: None --- pkg/sql/catalog/lease/lease.go | 17 ++++++++++------- pkg/sql/catalog/lease/lease_test.go | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pkg/sql/catalog/lease/lease.go b/pkg/sql/catalog/lease/lease.go index b04fc276baea..3586b2b17b8e 100644 --- a/pkg/sql/catalog/lease/lease.go +++ b/pkg/sql/catalog/lease/lease.go @@ -1586,17 +1586,20 @@ func (m *Manager) checkRangeFeedStatus(ctx context.Context) (forceRefresh bool) // range feed progress / recovery, and supporting legacy expiry // based leases. func (m *Manager) RunBackgroundLeasingTask(ctx context.Context) { - _ = m.stopper.RunAsyncTask(ctx, "lease-refresher", func(ctx context.Context) { - refreshTimerDuration := LeaseDuration.Get(&m.storage.settings.SV) - renewalsDisabled := false - if refreshTimerDuration <= 0 { + renewalsDisabled := false + getRefreshTimerDuration := func() time.Duration { + if LeaseDuration.Get(&m.storage.settings.SV) <= 0 { // Session based leasing still needs a refresh loop to expire // leases, so we will execute that without any renewals. - refreshTimerDuration = time.Millisecond * 200 renewalsDisabled = true + return 200 * time.Millisecond } else { - refreshTimerDuration = m.storage.jitteredLeaseDuration() + renewalsDisabled = false + return m.storage.jitteredLeaseDuration() } + } + _ = m.stopper.RunAsyncTask(ctx, "lease-refresher", func(ctx context.Context) { + refreshTimerDuration := getRefreshTimerDuration() var refreshTimer timeutil.Timer defer refreshTimer.Stop() refreshTimer.Reset(refreshTimerDuration / 2) @@ -1633,7 +1636,7 @@ func (m *Manager) RunBackgroundLeasingTask(ctx context.Context) { m.refreshSomeLeases(ctx, true /*refreshAll*/) case <-refreshTimer.C: refreshTimer.Read = true - refreshTimer.Reset(m.storage.jitteredLeaseDuration() / 2) + refreshTimer.Reset(getRefreshTimerDuration() / 2) // Check for any react to any range feed availability problems, and // if needed refresh the full set of descriptors. diff --git a/pkg/sql/catalog/lease/lease_test.go b/pkg/sql/catalog/lease/lease_test.go index d50ba16b7e5a..585196aefcee 100644 --- a/pkg/sql/catalog/lease/lease_test.go +++ b/pkg/sql/catalog/lease/lease_test.go @@ -3675,7 +3675,7 @@ func TestLeaseDescriptorRangeFeedFailure(t *testing.T) { // so the update is detected. if p.Params.ExecutionPhase == scop.PostCommitPhase && enableAfterStageKnob.Load() && - strings.Contains(p.Statements[0].Statement, "ADD COLUMN") { + strings.Contains(p.Statements[0].Statement, "ALTER TABLE t1 ADD COLUMN j INT DEFAULT 64") { rangeFeedResetChan = srv.ApplicationLayer(1).LeaseManager().(*lease.Manager).TestingSetDisableRangeFeedCheckpointFn(true) } return nil @@ -3685,7 +3685,7 @@ func TestLeaseDescriptorRangeFeedFailure(t *testing.T) { // so the update is detected. if p.Params.ExecutionPhase == scop.PostCommitPhase && enableAfterStageKnob.Load() && - strings.Contains(p.Statements[0].Statement, "ADD COLUMN") { + strings.Contains(p.Statements[0].Statement, "ALTER TABLE t1 ADD COLUMN j INT DEFAULT 64") { <-rangeFeedResetChan srv.ApplicationLayer(1).LeaseManager().(*lease.Manager).TestingSetDisableRangeFeedCheckpointFn(false) enableAfterStageKnob.Swap(false)