From d6c31da6c1f2c5b57fcbf35a2f80a43eddc3cdfb Mon Sep 17 00:00:00 2001
From: Darin <darinp@gmail.com>
Date: Fri, 6 Dec 2019 15:50:53 -0800
Subject: [PATCH] allocator: fix incorrect rebalance signal and target

This change fixes two bugs:
- The first bug is related to the comparison of floating point
numbers when considering to rebalance a range. The diversity score
of two identical ranges (even the same range) may differ by a small
amount. The fix is to consider candidates equal if their diversity score
have very close values (within 1e-10).
- The second bug has been introduced with
https://github.com/cockroachdb/cockroach/pull/39157 and resulted in
ignoring the newly added replica when determining what existing replica
it should replace in Allocator.simulateRemoveTarget.
As a result - when the first bug was triggering and unnecessary
rebalancing was happening - the replica to be dropped was chosen
at random (in the case where all existing replica are necessary).
For example - let's say a range is constrained to have a replica in each
of the regions A,B,C. We are trying to add a new replica in C.
Evaluating what should be removed from the A,B,C may choose any one of them.
Regardless of the choise, the constraint will be violated.
If we however ask the same question for A,B,C(old),C(new) - the result
will be C(old) as this will preserve the constraint (to have one
replica in each region).

Fixes #42715
Fixes https://github.com/cockroachlabs/support/issues/348

Release note (bug fix): the allocator now considers stores with very close
diversity scores equal (all other things being the same) and doesn't attempt
to rebalance.
Release note (bug fix): the allocator considers the new store being added
when looking for target in case of rebalance.
---
 pkg/storage/allocator.go             |  8 +++---
 pkg/storage/allocator_scorer.go      | 39 ++++++++++++++++++++--------
 pkg/storage/allocator_scorer_test.go | 13 +++++++---
 pkg/storage/allocator_test.go        |  4 +--
 4 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/pkg/storage/allocator.go b/pkg/storage/allocator.go
index b8a64b6650a3..458fc487797c 100644
--- a/pkg/storage/allocator.go
+++ b/pkg/storage/allocator.go
@@ -699,9 +699,9 @@ func (a Allocator) RebalanceTarget(
 			ReplicaID: maxReplicaID(existingReplicas) + 1,
 		}
 		// Deep-copy the Replicas slice since we'll mutate it below.
-		replicaCandidates := append([]roachpb.ReplicaDescriptor(nil), existingReplicas...)
-		replicaCandidates = append(replicaCandidates, newReplica)
-
+		existingPlusOneNew := append([]roachpb.ReplicaDescriptor(nil), existingReplicas...)
+		existingPlusOneNew = append(existingPlusOneNew, newReplica)
+		replicaCandidates := existingPlusOneNew
 		// If we can, filter replicas as we would if we were actually removing one.
 		// If we can't (e.g. because we're the leaseholder but not the raft leader),
 		// it's better to simulate the removal with the info that we do have than to
@@ -724,7 +724,7 @@ func (a Allocator) RebalanceTarget(
 			target.store.StoreID,
 			zone,
 			replicaCandidates,
-			replicaCandidates,
+			existingPlusOneNew,
 			rangeUsageInfo,
 		)
 		if err != nil {
diff --git a/pkg/storage/allocator_scorer.go b/pkg/storage/allocator_scorer.go
index e618e9a26ba3..57047db3b6f3 100644
--- a/pkg/storage/allocator_scorer.go
+++ b/pkg/storage/allocator_scorer.go
@@ -26,6 +26,17 @@ import (
 )
 
 const (
+	// This is a somehow arbitrary chosen upper bound on the relative error to be
+	// used when comparing doubles for equality. The assumption that comes with it
+	// is that any sequence of operations on doubles won't produce a result with
+	// accuracy that is worse than this relative error. There is no guarantee
+	// however that this will be the case. A programmer writing code using
+	// floating point numbers will still need to be aware of the effect of the
+	// operations on the results and the possible loss of accuracy.
+	// More info https://en.wikipedia.org/wiki/Machine_epsilon
+	// https://en.wikipedia.org/wiki/Floating-point_arithmetic
+	epsilon = 1e-10
+
 	// The number of random candidates to select from a larger list of possible
 	// candidates. Because the allocator heuristics are being run on every node it
 	// is actually not desirable to set this value higher. Doing so can lead to
@@ -158,7 +169,7 @@ func (c candidate) compare(o candidate) float64 {
 		}
 		return -4
 	}
-	if c.diversityScore != o.diversityScore {
+	if !scoresAlmostEqual(c.diversityScore, o.diversityScore) {
 		if c.diversityScore > o.diversityScore {
 			return 3
 		}
@@ -170,7 +181,7 @@ func (c candidate) compare(o candidate) float64 {
 		}
 		return -(2 + float64(o.convergesScore-c.convergesScore)/10.0)
 	}
-	if c.balanceScore.totalScore() != o.balanceScore.totalScore() {
+	if !scoresAlmostEqual(c.balanceScore.totalScore(), o.balanceScore.totalScore()) {
 		if c.balanceScore.totalScore() > o.balanceScore.totalScore() {
 			return 1 + (c.balanceScore.totalScore()-o.balanceScore.totalScore())/10.0
 		}
@@ -234,9 +245,9 @@ var _ sort.Interface = byScoreAndID(nil)
 
 func (c byScoreAndID) Len() int { return len(c) }
 func (c byScoreAndID) Less(i, j int) bool {
-	if c[i].diversityScore == c[j].diversityScore &&
+	if scoresAlmostEqual(c[i].diversityScore, c[j].diversityScore) &&
 		c[i].convergesScore == c[j].convergesScore &&
-		c[i].balanceScore.totalScore() == c[j].balanceScore.totalScore() &&
+		scoresAlmostEqual(c[i].balanceScore.totalScore(), c[j].balanceScore.totalScore()) &&
 		c[i].rangeCount == c[j].rangeCount &&
 		c[i].necessary == c[j].necessary &&
 		c[i].fullDisk == c[j].fullDisk &&
@@ -267,7 +278,7 @@ func (cl candidateList) best() candidateList {
 	}
 	for i := 1; i < len(cl); i++ {
 		if cl[i].necessary == cl[0].necessary &&
-			cl[i].diversityScore == cl[0].diversityScore &&
+			scoresAlmostEqual(cl[i].diversityScore, cl[0].diversityScore) &&
 			cl[i].convergesScore == cl[0].convergesScore {
 			continue
 		}
@@ -301,7 +312,7 @@ func (cl candidateList) worst() candidateList {
 	// Find the worst constraint/locality/converges values.
 	for i := len(cl) - 2; i >= 0; i-- {
 		if cl[i].necessary == cl[len(cl)-1].necessary &&
-			cl[i].diversityScore == cl[len(cl)-1].diversityScore &&
+			scoresAlmostEqual(cl[i].diversityScore, cl[len(cl)-1].diversityScore) &&
 			cl[i].convergesScore == cl[len(cl)-1].convergesScore {
 			continue
 		}
@@ -801,11 +812,13 @@ func betterRebalanceTarget(target1, existing1, target2, existing2 *candidate) *c
 	// they'll replace.
 	comp1 := target1.compare(*existing1)
 	comp2 := target2.compare(*existing2)
-	if comp1 > comp2 {
-		return target1
-	}
-	if comp1 < comp2 {
-		return target2
+	if !scoresAlmostEqual(comp1, comp2) {
+		if comp1 > comp2 {
+			return target1
+		}
+		if comp1 < comp2 {
+			return target2
+		}
 	}
 	// If the two targets are equally better than their corresponding existing
 	// replicas, just return whichever target is better.
@@ -1225,3 +1238,7 @@ func maxCapacityCheck(store roachpb.StoreDescriptor) bool {
 func rebalanceToMaxCapacityCheck(store roachpb.StoreDescriptor) bool {
 	return store.Capacity.FractionUsed() < rebalanceToMaxFractionUsedThreshold
 }
+
+func scoresAlmostEqual(score1, score2 float64) bool {
+	return math.Abs(score1-score2) < epsilon
+}
diff --git a/pkg/storage/allocator_scorer_test.go b/pkg/storage/allocator_scorer_test.go
index eb71659b8012..51c1f1cc9549 100644
--- a/pkg/storage/allocator_scorer_test.go
+++ b/pkg/storage/allocator_scorer_test.go
@@ -14,6 +14,7 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"math"
 	"math/rand"
 	"reflect"
 	"sort"
@@ -115,7 +116,11 @@ func TestCandidateSelection(t *testing.T) {
 				store: roachpb.StoreDescriptor{
 					StoreID: roachpb.StoreID(i + idShift),
 				},
-				diversityScore: float64(score.diversity),
+				// We want to test here that everything works when the deversity score
+				// is not the exact value but very close to it. Nextafter will give us
+				// the closest number to the diversity score in the test case,
+				// that isn't equal to it and is either above or below (at random).
+				diversityScore: math.Nextafter(float64(score.diversity), rand.Float64()),
 				rangeCount:     score.rangeCount,
 				valid:          true,
 			})
@@ -215,7 +220,7 @@ func TestCandidateSelection(t *testing.T) {
 			if good == nil {
 				t.Fatalf("no good candidate found")
 			}
-			actual := scoreTuple{int(good.diversityScore), good.rangeCount}
+			actual := scoreTuple{int(good.diversityScore + 0.5), good.rangeCount}
 			if actual != tc.good {
 				t.Errorf("expected:%v actual:%v", tc.good, actual)
 			}
@@ -225,7 +230,7 @@ func TestCandidateSelection(t *testing.T) {
 			if bad == nil {
 				t.Fatalf("no bad candidate found")
 			}
-			actual := scoreTuple{int(bad.diversityScore), bad.rangeCount}
+			actual := scoreTuple{int(bad.diversityScore + 0.5), bad.rangeCount}
 			if actual != tc.bad {
 				t.Errorf("expected:%v actual:%v", tc.bad, actual)
 			}
@@ -244,7 +249,7 @@ func TestBetterThan(t *testing.T) {
 		},
 		{
 			valid:          true,
-			diversityScore: 1,
+			diversityScore: 0.9999999999999999,
 			rangeCount:     0,
 		},
 		{
diff --git a/pkg/storage/allocator_test.go b/pkg/storage/allocator_test.go
index 58a326f359cc..8916685d458a 100644
--- a/pkg/storage/allocator_test.go
+++ b/pkg/storage/allocator_test.go
@@ -881,8 +881,8 @@ func TestAllocatorRebalanceTarget(t *testing.T) {
 			storeFilterThrottled,
 		)
 		expTo := stores[1].StoreID
-		expFrom := map[roachpb.StoreID]bool{stores[3].StoreID: true, stores[4].StoreID: true}
-		if !ok || target.StoreID != expTo || !expFrom[origin.StoreID] {
+		expFrom := stores[0].StoreID
+		if !ok || target.StoreID != expTo || origin.StoreID != expFrom {
 			t.Fatalf("%d: expected rebalance from either of %v to s%d, but got %v->%v; details: %s",
 				i, expFrom, expTo, origin, target, details)
 		}