From 3222cc9b28fc9713de8018146afc9cbb0e67172c Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Tue, 19 Jan 2021 01:32:01 -0500 Subject: [PATCH] kvserver: rename allocator actions and priorities This commit renames the existing `AllocatorAction`s and priorities to indicate that they pertain only to voting replicas. This is intended to pave the way for new non-voter specific allocator actions and priorities that follow in the subsequent commits in this PR. Release note: None --- pkg/kv/kvserver/allocator.go | 74 +++++++++++++++--------------- pkg/kv/kvserver/allocator_test.go | 54 +++++++++++----------- pkg/kv/kvserver/replicate_queue.go | 20 ++++---- 3 files changed, 75 insertions(+), 73 deletions(-) diff --git a/pkg/kv/kvserver/allocator.go b/pkg/kv/kvserver/allocator.go index 99e2c493777a..4e334c65c7ef 100644 --- a/pkg/kv/kvserver/allocator.go +++ b/pkg/kv/kvserver/allocator.go @@ -49,14 +49,14 @@ const ( minReplicaWeight = 0.001 // Priorities for various repair operations. - finalizeAtomicReplicationChangePriority float64 = 12002 - removeLearnerReplicaPriority float64 = 12001 - addDeadReplacementPriority float64 = 12000 - addMissingReplicaPriority float64 = 10000 - addDecommissioningReplacementPriority float64 = 5000 - removeDeadReplicaPriority float64 = 1000 - removeDecommissioningReplicaPriority float64 = 200 - removeExtraReplicaPriority float64 = 100 + finalizeAtomicReplicationChangePriority float64 = 12002 + removeLearnerReplicaPriority float64 = 12001 + addDeadReplacementVoterPriority float64 = 12000 + addMissingVoterPriority float64 = 10000 + addDecommissioningReplacementVoterPriority float64 = 5000 + removeDeadVoterPriority float64 = 1000 + removeDecommissioningVoterPriority float64 = 200 + removeExtraVoterPriority float64 = 100 ) // MinLeaseTransferStatsDuration configures the minimum amount of time a @@ -99,12 +99,12 @@ type AllocatorAction int const ( _ AllocatorAction = iota AllocatorNoop - AllocatorRemove - AllocatorAdd - AllocatorReplaceDead - AllocatorRemoveDead - AllocatorReplaceDecommissioning - AllocatorRemoveDecommissioning + AllocatorRemoveVoter + AllocatorAddVoter + AllocatorReplaceDeadVoter + AllocatorRemoveDeadVoter + AllocatorReplaceDecommissioningVoter + AllocatorRemoveDecommissioningVoter AllocatorRemoveLearner AllocatorConsiderRebalance AllocatorRangeUnavailable @@ -112,14 +112,16 @@ const ( ) var allocatorActionNames = map[AllocatorAction]string{ - AllocatorNoop: "noop", - AllocatorRemove: "remove", - AllocatorAdd: "add", - AllocatorReplaceDead: "replace dead", - AllocatorRemoveDead: "remove dead", - AllocatorReplaceDecommissioning: "replace decommissioning", - AllocatorRemoveDecommissioning: "remove decommissioning", - AllocatorRemoveLearner: "remove learner", + AllocatorNoop: "noop", + AllocatorRemoveVoter: "remove voter", + AllocatorAddVoter: "add voter", + AllocatorReplaceDeadVoter: "replace dead voter", + AllocatorRemoveDeadVoter: "remove dead voter", + AllocatorReplaceDecommissioningVoter: "replace decommissioning voter", + AllocatorRemoveDecommissioningVoter: "remove decommissioning voter", + AllocatorRemoveLearner: "remove learner", + // TODO(aayush): Rationalize whether or not rebalancing of non-voters needs to + // be dictated by a distinct allocator action. AllocatorConsiderRebalance: "consider rebalance", AllocatorRangeUnavailable: "range unavailable", AllocatorFinalizeAtomicReplicationChange: "finalize conf change", @@ -350,8 +352,8 @@ func (a *Allocator) ComputeAction( // TODO(dan): Since this goes before anything else, the priority here should // be influenced by whatever operations would happen right after the learner // is removed. In the meantime, we don't want to block something important - // from happening (like addDeadReplacementPriority) by queueing this at a - // low priority so until this TODO is done, keep + // from happening (like addDeadReplacementVoterPriority) by queueing this at + // a low priority so until this TODO is done, keep // removeLearnerReplicaPriority as the highest priority. return AllocatorRemoveLearner, removeLearnerReplicaPriority } @@ -373,8 +375,8 @@ func (a *Allocator) computeAction( // Range is under-replicated, and should add an additional replica. // Priority is adjusted by the difference between the current replica // count and the quorum of the desired replica count. - priority := addMissingReplicaPriority + float64(desiredQuorum-have) - action := AllocatorAdd + priority := addMissingVoterPriority + float64(desiredQuorum-have) + action := AllocatorAddVoter log.VEventf(ctx, 3, "%s - missing replica need=%d, have=%d, priority=%.2f", action, need, have, priority) return action, priority @@ -397,8 +399,8 @@ func (a *Allocator) computeAction( // where the node is only temporarily dead, but we remove it from the range // and lose a second node before we can up-replicate (#25392). // The dead replica(s) will be down-replicated later. - priority := addDeadReplacementPriority - action := AllocatorReplaceDead + priority := addDeadReplacementVoterPriority + action := AllocatorReplaceDeadVoter log.VEventf(ctx, 3, "%s - replacement for %d dead replicas priority=%.2f", action, len(deadVoterReplicas), priority) return action, priority @@ -406,8 +408,8 @@ func (a *Allocator) computeAction( if have == need && len(decommissioningReplicas) > 0 { // Range has decommissioning replica(s), which should be replaced. - priority := addDecommissioningReplacementPriority - action := AllocatorReplaceDecommissioning + priority := addDecommissioningReplacementVoterPriority + action := AllocatorReplaceDecommissioningVoter log.VEventf(ctx, 3, "%s - replacement for %d decommissioning replicas priority=%.2f", action, len(decommissioningReplicas), priority) return action, priority @@ -421,8 +423,8 @@ func (a *Allocator) computeAction( // replicas. if len(deadVoterReplicas) > 0 { // The range has dead replicas, which should be removed immediately. - priority := removeDeadReplicaPriority + float64(quorum-len(liveVoterReplicas)) - action := AllocatorRemoveDead + priority := removeDeadVoterPriority + float64(quorum-len(liveVoterReplicas)) + action := AllocatorRemoveDeadVoter log.VEventf(ctx, 3, "%s - dead=%d, live=%d, quorum=%d, priority=%.2f", action, len(deadVoterReplicas), len(liveVoterReplicas), quorum, priority) return action, priority @@ -431,8 +433,8 @@ func (a *Allocator) computeAction( if len(decommissioningReplicas) > 0 { // Range is over-replicated, and has a decommissioning replica which // should be removed. - priority := removeDecommissioningReplicaPriority - action := AllocatorRemoveDecommissioning + priority := removeDecommissioningVoterPriority + action := AllocatorRemoveDecommissioningVoter log.VEventf(ctx, 3, "%s - need=%d, have=%d, num_decommissioning=%d, priority=%.2f", action, need, have, len(decommissioningReplicas), priority) @@ -443,8 +445,8 @@ func (a *Allocator) computeAction( // Range is over-replicated, and should remove a replica. // Ranges with an even number of replicas get extra priority because // they have a more fragile quorum. - priority := removeExtraReplicaPriority - float64(have%2) - action := AllocatorRemove + priority := removeExtraVoterPriority - float64(have%2) + action := AllocatorRemoveVoter log.VEventf(ctx, 3, "%s - need=%d, have=%d, priority=%.2f", action, need, have, priority) return action, priority } diff --git a/pkg/kv/kvserver/allocator_test.go b/pkg/kv/kvserver/allocator_test.go index 9d4ae8b9c82a..3eac7a95900b 100644 --- a/pkg/kv/kvserver/allocator_test.go +++ b/pkg/kv/kvserver/allocator_test.go @@ -4265,7 +4265,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorReplaceDead, + expectedAction: AllocatorReplaceDeadVoter, }, // Need five replicas, one is on a dead store. { @@ -4304,7 +4304,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorReplaceDead, + expectedAction: AllocatorReplaceDeadVoter, }, // Need three replicas, have two. { @@ -4328,7 +4328,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorAdd, + expectedAction: AllocatorAddVoter, }, // Need five replicas, have four, one is on a dead store. { @@ -4362,7 +4362,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorAdd, + expectedAction: AllocatorAddVoter, }, // Need five replicas, have four. { @@ -4396,7 +4396,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorAdd, + expectedAction: AllocatorAddVoter, }, // Need three replicas, have four, one is on a dead store. { @@ -4430,7 +4430,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorRemoveDeadVoter, }, // Need five replicas, have six, one is on a dead store. { @@ -4474,7 +4474,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorRemoveDeadVoter, }, // Need three replicas, have five, one is on a dead store. { @@ -4513,7 +4513,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorRemoveDeadVoter, }, // Need three replicas, have four. { @@ -4547,7 +4547,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorRemove, + expectedAction: AllocatorRemoveVoter, }, // Need three replicas, have five. { @@ -4586,7 +4586,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, }, }, - expectedAction: AllocatorRemove, + expectedAction: AllocatorRemoveVoter, }, // Need three replicas, two are on dead stores. Should // be a noop because there aren't enough live replicas for @@ -4756,14 +4756,14 @@ func TestAllocatorComputeActionRemoveDead(t *testing.T) { desc: threeReplDesc, live: []roachpb.StoreID{1, 2}, dead: []roachpb.StoreID{3}, - expectedAction: AllocatorReplaceDead, + expectedAction: AllocatorReplaceDeadVoter, }, // Needs three replicas, one is dead, but there is a replacement. { desc: threeReplDesc, live: []roachpb.StoreID{1, 2, 4}, dead: []roachpb.StoreID{3}, - expectedAction: AllocatorReplaceDead, + expectedAction: AllocatorReplaceDeadVoter, }, // Needs three replicas, two are dead (i.e. the range lacks a quorum). { @@ -4777,7 +4777,7 @@ func TestAllocatorComputeActionRemoveDead(t *testing.T) { desc: fourReplDesc, live: []roachpb.StoreID{1, 2, 4}, dead: []roachpb.StoreID{3}, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorRemoveDeadVoter, }, // Needs three replicas, has four, two are dead (i.e. the range lacks a quorum). { @@ -4840,7 +4840,7 @@ func TestAllocatorComputeActionDecommission(t *testing.T) { }, }, }, - expectedAction: AllocatorReplaceDecommissioning, + expectedAction: AllocatorReplaceDecommissioningVoter, live: []roachpb.StoreID{1, 2}, dead: nil, decommissioning: []roachpb.StoreID{3}, @@ -4870,7 +4870,7 @@ func TestAllocatorComputeActionDecommission(t *testing.T) { }, }, }, - expectedAction: AllocatorReplaceDead, + expectedAction: AllocatorReplaceDeadVoter, live: []roachpb.StoreID{1}, dead: []roachpb.StoreID{2}, decommissioning: []roachpb.StoreID{3}, @@ -4905,7 +4905,7 @@ func TestAllocatorComputeActionDecommission(t *testing.T) { }, }, }, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorRemoveDeadVoter, live: []roachpb.StoreID{1, 4}, dead: []roachpb.StoreID{2}, decommissioning: []roachpb.StoreID{3}, @@ -4940,7 +4940,7 @@ func TestAllocatorComputeActionDecommission(t *testing.T) { }, }, }, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorRemoveDeadVoter, live: []roachpb.StoreID{1, 4}, dead: nil, decommissioning: []roachpb.StoreID{3}, @@ -4970,7 +4970,7 @@ func TestAllocatorComputeActionDecommission(t *testing.T) { }, }, }, - expectedAction: AllocatorReplaceDecommissioning, + expectedAction: AllocatorReplaceDecommissioningVoter, live: nil, dead: nil, decommissioning: []roachpb.StoreID{1, 2, 3}, @@ -5004,7 +5004,7 @@ func TestAllocatorComputeActionDecommission(t *testing.T) { }, }, }, - expectedAction: AllocatorRemoveDecommissioning, + expectedAction: AllocatorRemoveDecommissioningVoter, live: []roachpb.StoreID{4}, dead: nil, decommissioning: []roachpb.StoreID{1, 2, 3}, @@ -5082,7 +5082,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { // drop past 3, so 3 it is. storeList: []roachpb.StoreID{1, 2, 3, 4}, expectedNumReplicas: 3, - expectedAction: AllocatorRemoveDecommissioning, + expectedAction: AllocatorRemoveDecommissioningVoter, live: []roachpb.StoreID{4}, unavailable: nil, dead: nil, @@ -5092,7 +5092,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { // Ditto. storeList: []roachpb.StoreID{1, 2, 3}, expectedNumReplicas: 3, - expectedAction: AllocatorReplaceDecommissioning, + expectedAction: AllocatorReplaceDecommissioningVoter, live: []roachpb.StoreID{4, 5}, unavailable: nil, dead: nil, @@ -5105,7 +5105,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { // from the range at hand, rather than trying to replace it. storeList: []roachpb.StoreID{1, 2, 3, 4}, expectedNumReplicas: 3, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorRemoveDeadVoter, live: []roachpb.StoreID{1, 2, 3, 5}, unavailable: nil, dead: []roachpb.StoreID{4}, @@ -5118,7 +5118,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { // important than replacing the dead one. storeList: []roachpb.StoreID{1, 4}, expectedNumReplicas: 3, - expectedAction: AllocatorAdd, + expectedAction: AllocatorAddVoter, live: []roachpb.StoreID{1, 2, 3, 5}, unavailable: nil, dead: []roachpb.StoreID{4}, @@ -5140,7 +5140,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { // it is and we are under-replicaed. storeList: []roachpb.StoreID{1, 2}, expectedNumReplicas: 3, - expectedAction: AllocatorAdd, + expectedAction: AllocatorAddVoter, live: []roachpb.StoreID{1, 2}, unavailable: nil, dead: nil, @@ -5160,7 +5160,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { // Three again, on account of avoiding the even four. storeList: []roachpb.StoreID{1, 2, 3, 4}, expectedNumReplicas: 3, - expectedAction: AllocatorRemove, + expectedAction: AllocatorRemoveVoter, live: []roachpb.StoreID{1, 2, 3, 4}, unavailable: nil, dead: nil, @@ -5214,7 +5214,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { // slice in these tests). storeList: []roachpb.StoreID{1, 2, 3, 4, 5}, expectedNumReplicas: 5, - expectedAction: AllocatorReplaceDead, + expectedAction: AllocatorReplaceDeadVoter, live: []roachpb.StoreID{1, 2, 3}, unavailable: []roachpb.StoreID{4}, dead: []roachpb.StoreID{5}, @@ -5225,7 +5225,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { // the most important thing is removing a decommissioning replica. storeList: []roachpb.StoreID{1, 2, 3, 4, 5}, expectedNumReplicas: 3, - expectedAction: AllocatorRemoveDecommissioning, + expectedAction: AllocatorRemoveDecommissioningVoter, live: []roachpb.StoreID{1, 2, 3}, unavailable: []roachpb.StoreID{4}, dead: nil, diff --git a/pkg/kv/kvserver/replicate_queue.go b/pkg/kv/kvserver/replicate_queue.go index 6af0db0d92f0..fe59733c099a 100644 --- a/pkg/kv/kvserver/replicate_queue.go +++ b/pkg/kv/kvserver/replicate_queue.go @@ -344,11 +344,11 @@ func (rq *replicateQueue) processOneChange( // lost quorum. Either way, it's not a good idea to make changes right now. // Let the scanner requeue it again later. return false, nil - case AllocatorAdd: + case AllocatorAddVoter: return rq.addOrReplace(ctx, repl, voterReplicas, liveVoterReplicas, -1 /* removeIdx */, dryRun) - case AllocatorRemove: + case AllocatorRemoveVoter: return rq.remove(ctx, repl, voterReplicas, dryRun) - case AllocatorReplaceDead: + case AllocatorReplaceDeadVoter: if len(deadVoterReplicas) == 0 { // Nothing to do. return false, nil @@ -366,7 +366,7 @@ func (rq *replicateQueue) processOneChange( deadVoterReplicas[0], voterReplicas) } return rq.addOrReplace(ctx, repl, voterReplicas, liveVoterReplicas, removeIdx, dryRun) - case AllocatorReplaceDecommissioning: + case AllocatorReplaceDecommissioningVoter: decommissioningReplicas := rq.allocator.storePool.decommissioningReplicas(voterReplicas) if len(decommissioningReplicas) == 0 { // Nothing to do. @@ -385,14 +385,14 @@ func (rq *replicateQueue) processOneChange( decommissioningReplicas[0], voterReplicas) } return rq.addOrReplace(ctx, repl, voterReplicas, liveVoterReplicas, removeIdx, dryRun) - case AllocatorRemoveDecommissioning: + case AllocatorRemoveDecommissioningVoter: // NB: this path will only be hit when the range is over-replicated and // has decommissioning replicas; in the common case we'll hit - // AllocatorReplaceDecommissioning above. + // AllocatorReplaceDecommissioningVoter above. return rq.removeDecommissioning(ctx, repl, dryRun) - case AllocatorRemoveDead: + case AllocatorRemoveDeadVoter: // NB: this path will only be hit when the range is over-replicated and - // has dead replicas; in the common case we'll hit AllocatorReplaceDead + // has dead replicas; in the common case we'll hit AllocatorReplaceDeadVoter // above. return rq.removeDead(ctx, repl, deadVoterReplicas, dryRun) case AllocatorRemoveLearner: @@ -647,9 +647,9 @@ func (rq *replicateQueue) maybeTransferLeaseAway( // is the leaseholder, so transfer the lease instead. We don't check that // the current store has too many leases in this case under the // assumption that replica balance is a greater concern. Also note that - // AllocatorRemove action takes preference over AllocatorConsiderRebalance + // AllocatorRemoveVoter action takes preference over AllocatorConsiderRebalance // (rebalancing) which is where lease transfer would otherwise occur. We - // need to be able to transfer leases in AllocatorRemove in order to get + // need to be able to transfer leases in AllocatorRemoveVoter in order to get // out of situations where this store is overfull and yet holds all the // leases. The fullness checks need to be ignored for cases where // a replica needs to be removed for constraint violations.