From 7601d76f78abcb348e9207859fafa464382f0272 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Tue, 4 May 2021 14:41:43 +0200 Subject: [PATCH] release-20.2: kvserver: prevent StoreRebalancer from downreplicating When the replication factor is lowered and the StoreRebalancer attempts a rebalance, it will accidentally perform a downreplication. Since it wasn't ever supposed to do that, the downreplication is pretty haphazard and doesn't safeguard quorum in the same way that a "proper" downreplication likely would. Prevent if from changing the number of voters and non-voters to avoid this issue. Annoyingly, I [knew] about this problem, but instead of fixing it at the source - as this commit does - I added a lower- level check that could then not be backported to release-20.2, where we are now seeing this problem. [knew]: https://github.com/cockroachdb/cockroach/issues/54444#issuecomment-707706553 Release note: None --- pkg/kv/kvserver/store_rebalancer.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/kv/kvserver/store_rebalancer.go b/pkg/kv/kvserver/store_rebalancer.go index 461b37a69e7c..3643344e3f8a 100644 --- a/pkg/kv/kvserver/store_rebalancer.go +++ b/pkg/kv/kvserver/store_rebalancer.go @@ -506,6 +506,12 @@ func (sr *StoreRebalancer) chooseReplicaToRebalance( targetReplicas := make([]roachpb.ReplicaDescriptor, 0, desiredReplicas) currentReplicas := desc.Replicas().All() + if cur := len(desc.Replicas().Voters()); desiredReplicas != cur { + log.VEventf(ctx, 3, "cannot change number of voters from %d to %d for r%d", + cur, desiredReplicas, desc.RangeID) + continue + } + // Check the range's existing diversity score, since we want to ensure we // don't hurt locality diversity just to improve QPS. curDiversity := rangeDiversityScore(