From 30da6af5f6299afc1945e82552d2d2ad6b6dd2f3 Mon Sep 17 00:00:00 2001 From: Austen McClernon <austen@cockroachlabs.com> Date: Tue, 19 Dec 2023 15:21:29 +0000 Subject: [PATCH] allocatorimpl: check IO overload in should transfer lease Previously, when the lease IO overload enforcement was set to shed and a store satisfied the IO overload criteria, it would not be guaranteed to have its leases enqueued into the replicate queue. Add an IO overload check in should transfer lease to ensure it does. Epic: none Release note: None --- .../allocator/allocatorimpl/allocator.go | 6 ++ .../allocator/allocatorimpl/allocator_test.go | 100 ++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go index 10d922d985fe..61ae7b298bd8 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go @@ -2522,6 +2522,12 @@ func (a *Allocator) ShouldTransferLease( if a.leaseholderShouldMoveDueToPreferences(ctx, storePool, conf, leaseRepl, existing) { return true } + + if a.leaseholderShouldMoveDueToIOOverload( + ctx, storePool, existing, leaseRepl.StoreID(), a.IOOverloadOptions()) { + return true + } + existing = a.ValidLeaseTargets( ctx, storePool, diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go index 1faffaa6b36f..5541b894e361 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go @@ -2842,6 +2842,106 @@ func TestAllocatorShouldTransferSuspected(t *testing.T) { assertShouldTransferLease(true) } +func TestAllocatorShouldTransferLeaseIOOverload(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + floats := func(nums ...float64) []float64 { + return nums + } + + // We want the shed threshold to be 0.9 and the overload threhsold to be 0.5 + // i.e. block transfers at >=0.5 and block transfers + shed leases at >=0.9. + const shedThreshold = 0.9 + const threshold = 0.5 + + testCases := []struct { + name string + leaseCounts, IOScores []float64 + leaseholder roachpb.StoreID + excludeLeaseRepl bool + expected bool + enforcement IOOverloadEnforcementLevel + }{ + { + name: "shouldn't transfer off of store with high io overload when block enforcement", + leaseCounts: floats(100, 100, 100, 100, 100), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 1, + expected: false, + enforcement: IOOverloadThresholdBlockTransfers, + }, + { + name: "should transfer off of store with high io overload when shed enforcement", + leaseCounts: floats(100, 100, 100, 100, 100), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 1, + // Store 3 is above the threshold (1.0 > 0.8), but equal to the avg (1.0), so + // it is still considered a non-IO-overloaded candidate. + expected: true, + enforcement: IOOverloadThresholdShed, + }, + { + name: "should transfer to io overloaded store(s) when no action enforcement", + leaseCounts: floats(0, 100, 100, 400, 400), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 5, + expected: true, + enforcement: IOOverloadThresholdIgnore, + }, + { + name: "dont transfer off of store with high io overload but less than shed threshold with shed enforcement", + leaseCounts: floats(0, 0, 0, 0, 0), + IOScores: floats(0.89, 0, 0, 0, 0), + leaseholder: 1, + expected: false, + enforcement: IOOverloadThresholdShed, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + stopper, g, sp, a, _ := CreateTestAllocator(ctx, 10, true /* deterministic */) + defer stopper.Stop(ctx) + n := len(tc.leaseCounts) + stores := make([]*roachpb.StoreDescriptor, n) + existing := make([]roachpb.ReplicaDescriptor, 0, n) + for i := range tc.leaseCounts { + existing = append(existing, replicas(roachpb.StoreID(i+1))...) + stores[i] = &roachpb.StoreDescriptor{ + StoreID: roachpb.StoreID(i + 1), + Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(i + 1)}, + Capacity: roachpb.StoreCapacity{ + LeaseCount: int32(tc.leaseCounts[i]), + IOThreshold: TestingIOThresholdWithScore(tc.IOScores[i]), + }, + } + } + + sg := gossiputil.NewStoreGossiper(g) + sg.GossipStores(stores, t) + LeaseIOOverloadThresholdEnforcement.Override(ctx, &a.st.SV, int64(tc.enforcement)) + LeaseIOOverloadThreshold.Override(ctx, &a.st.SV, threshold) + LeaseIOOverloadShedThreshold.Override(ctx, &a.st.SV, shedThreshold) + + shouldTransfer := a.ShouldTransferLease( + ctx, + sp, + &roachpb.RangeDescriptor{}, + emptySpanConfig(), + existing, + &mockRepl{ + replicationFactor: int32(n), + storeID: tc.leaseholder, + }, + allocator.RangeUsageInfo{}, /* stats */ + ) + require.Equal(t, tc.expected, shouldTransfer) + }) + } + +} + func TestAllocatorLeasePreferences(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t)