Skip to content

Commit

Permalink
Merge #60029
Browse files Browse the repository at this point in the history
60029: kvserver: fix up/downreplication status of non-voters in range report r=aayushshah15 a=aayushshah15

This commit updates the `calcRangeCounter()` method to consider non-voting
replicas when determining whether a range is over or under-replicated.
Without this change, ranges that have any non-voting replicas would show
up as under-replicated on the DB console range report page.

Release note: None

Co-authored-by: Aayush Shah <[email protected]>
  • Loading branch information
craig[bot] and aayushshah15 committed Mar 1, 2021
2 parents 6521a8e + 4e1c8da commit 8c51f68
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 18 deletions.
3 changes: 3 additions & 0 deletions pkg/kv/kvserver/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,9 @@ func GetNeededNonVoters(numVoters, zoneConfigNonVoterCount, clusterNodes int) in
// replica.
need = clusterNodes - numVoters
}
if need < 0 {
need = 0 // Must be non-negative.
}
return need
}

Expand Down
38 changes: 25 additions & 13 deletions pkg/kv/kvserver/replica_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func calcReplicaMetrics(
m.Ticking = ticking

m.RangeCounter, m.Unavailable, m.Underreplicated, m.Overreplicated =
calcRangeCounter(storeID, desc, livenessMap, *zone.NumReplicas, clusterNodes)
calcRangeCounter(storeID, desc, livenessMap, zone.GetNumVoters(), *zone.NumReplicas, clusterNodes)

// The raft leader computes the number of raft entries that replicas are
// behind.
Expand All @@ -139,10 +139,11 @@ func calcReplicaMetrics(
return m
}

// calcRangeCounter returns whether this replica is designated as the
// replica in the range responsible for range-level metrics, whether
// the range doesn't have a quorum of live replicas, and whether the
// range is currently under-replicated.
// calcRangeCounter returns whether this replica is designated as the replica in
// the range responsible for range-level metrics, whether the range doesn't have
// a quorum of live voting replicas, and whether the range is currently
// under-replicated (with regards to either the number of voting replicas or the
// number of non-voting replicas).
//
// Note: we compute an estimated range count across the cluster by counting the
// first live replica in each descriptor. Note that the first live replica is
Expand All @@ -158,12 +159,11 @@ func calcRangeCounter(
storeID roachpb.StoreID,
desc *roachpb.RangeDescriptor,
livenessMap liveness.IsLiveMap,
numReplicas int32,
numVoters, numReplicas int32,
clusterNodes int,
) (rangeCounter, unavailable, underreplicated, overreplicated bool) {
// It seems unlikely that a learner replica would be the first live one, but
// there's no particular reason to exclude them. Note that `All` returns the
// voters first.
// there's no particular reason to exclude them.
for _, rd := range desc.Replicas().Descriptors() {
if livenessMap[rd.NodeID].IsLive {
rangeCounter = rd.StoreID == storeID
Expand All @@ -176,11 +176,13 @@ func calcRangeCounter(
unavailable = !desc.Replicas().CanMakeProgress(func(rDesc roachpb.ReplicaDescriptor) bool {
return livenessMap[rDesc.NodeID].IsLive
})
needed := GetNeededVoters(numReplicas, clusterNodes)
liveVoterReplicas := calcLiveVoterReplicas(desc, livenessMap)
if needed > liveVoterReplicas {
neededVoters := GetNeededVoters(numVoters, clusterNodes)
liveVoters := calcLiveVoterReplicas(desc, livenessMap)
neededNonVoters := GetNeededNonVoters(int(numVoters), int(numReplicas-numVoters), clusterNodes)
liveNonVoters := calcLiveNonVoterReplicas(desc, livenessMap)
if neededVoters > liveVoters || neededNonVoters > liveNonVoters {
underreplicated = true
} else if needed < liveVoterReplicas {
} else if neededVoters < liveVoters || neededNonVoters < liveNonVoters {
overreplicated = true
}
}
Expand All @@ -192,8 +194,18 @@ func calcRangeCounter(
// method is used when indicating under-replication so only voter replicas are
// considered.
func calcLiveVoterReplicas(desc *roachpb.RangeDescriptor, livenessMap liveness.IsLiveMap) int {
return calcLiveReplicas(desc.Replicas().VoterDescriptors(), livenessMap)
}

// calcLiveNonVoterReplicas returns a count of the live non-voter replicas; a live
// replica is determined by checking its node in the provided liveness map.
func calcLiveNonVoterReplicas(desc *roachpb.RangeDescriptor, livenessMap liveness.IsLiveMap) int {
return calcLiveReplicas(desc.Replicas().NonVoterDescriptors(), livenessMap)
}

func calcLiveReplicas(repls []roachpb.ReplicaDescriptor, livenessMap liveness.IsLiveMap) int {
var live int
for _, rd := range desc.Replicas().VoterDescriptors() {
for _, rd := range repls {
if livenessMap[rd.NodeID].IsLive {
live++
}
Expand Down
78 changes: 73 additions & 5 deletions pkg/kv/kvserver/replica_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,26 @@ func TestCalcRangeCounterIsLiveMap(t *testing.T) {
// Regression test for a bug, see:
// https://github.com/cockroachdb/cockroach/pull/39936#pullrequestreview-359059629

desc := roachpb.NewRangeDescriptor(123, roachpb.RKeyMin, roachpb.RKeyMax,
threeVotersAndSingleNonVoter := roachpb.NewRangeDescriptor(123, roachpb.RKeyMin, roachpb.RKeyMax,
roachpb.MakeReplicaSet([]roachpb.ReplicaDescriptor{
{NodeID: 10, StoreID: 11, ReplicaID: 12, Type: roachpb.ReplicaTypeVoterFull()},
{NodeID: 100, StoreID: 110, ReplicaID: 120, Type: roachpb.ReplicaTypeVoterFull()},
{NodeID: 1000, StoreID: 1100, ReplicaID: 1200, Type: roachpb.ReplicaTypeVoterFull()},
{NodeID: 2000, StoreID: 2100, ReplicaID: 2200, Type: roachpb.ReplicaTypeNonVoter()},
}))

oneVoterAndThreeNonVoters := roachpb.NewRangeDescriptor(123, roachpb.RKeyMin, roachpb.RKeyMax,
roachpb.MakeReplicaSet([]roachpb.ReplicaDescriptor{
{NodeID: 10, StoreID: 11, ReplicaID: 12, Type: roachpb.ReplicaTypeVoterFull()},
{NodeID: 100, StoreID: 110, ReplicaID: 120, Type: roachpb.ReplicaTypeNonVoter()},
{NodeID: 1000, StoreID: 1100, ReplicaID: 1200, Type: roachpb.ReplicaTypeNonVoter()},
{NodeID: 2000, StoreID: 2100, ReplicaID: 2200, Type: roachpb.ReplicaTypeNonVoter()},
}))

{
ctr, down, under, over := calcRangeCounter(1100 /* storeID */, desc, liveness.IsLiveMap{
ctr, down, under, over := calcRangeCounter(1100, threeVotersAndSingleNonVoter, liveness.IsLiveMap{
1000: liveness.IsLiveMapEntry{IsLive: true}, // by NodeID
}, 3, 3)
}, 3 /* numVoters */, 4 /* numReplicas */, 4 /* clusterNodes */)

require.True(t, ctr)
require.True(t, down)
Expand All @@ -46,9 +55,9 @@ func TestCalcRangeCounterIsLiveMap(t *testing.T) {
}

{
ctr, down, under, over := calcRangeCounter(1000, desc, liveness.IsLiveMap{
ctr, down, under, over := calcRangeCounter(1000, threeVotersAndSingleNonVoter, liveness.IsLiveMap{
1000: liveness.IsLiveMapEntry{IsLive: false},
}, 3, 3)
}, 3 /* numVoters */, 4 /* numReplicas */, 4 /* clusterNodes */)

// Does not confuse a non-live entry for a live one. In other words,
// does not think that the liveness map has only entries for live nodes.
Expand All @@ -57,4 +66,63 @@ func TestCalcRangeCounterIsLiveMap(t *testing.T) {
require.False(t, under)
require.False(t, over)
}

{
ctr, down, under, over := calcRangeCounter(11, threeVotersAndSingleNonVoter, liveness.IsLiveMap{
10: liveness.IsLiveMapEntry{IsLive: true},
100: liveness.IsLiveMapEntry{IsLive: true},
1000: liveness.IsLiveMapEntry{IsLive: true},
2000: liveness.IsLiveMapEntry{IsLive: true},
}, 3 /* numVoters */, 4 /* numReplicas */, 4 /* clusterNodes */)

require.True(t, ctr)
require.False(t, down)
require.False(t, under)
require.False(t, over)
}

{
// Single non-voter dead
ctr, down, under, over := calcRangeCounter(11, oneVoterAndThreeNonVoters, liveness.IsLiveMap{
10: liveness.IsLiveMapEntry{IsLive: true},
100: liveness.IsLiveMapEntry{IsLive: true},
1000: liveness.IsLiveMapEntry{IsLive: false},
2000: liveness.IsLiveMapEntry{IsLive: true},
}, 1 /* numVoters */, 4 /* numReplicas */, 4 /* clusterNodes */)

require.True(t, ctr)
require.False(t, down)
require.True(t, under)
require.False(t, over)
}

{
// All non-voters are dead, but range is not unavailable
ctr, down, under, over := calcRangeCounter(11, oneVoterAndThreeNonVoters, liveness.IsLiveMap{
10: liveness.IsLiveMapEntry{IsLive: true},
100: liveness.IsLiveMapEntry{IsLive: false},
1000: liveness.IsLiveMapEntry{IsLive: false},
2000: liveness.IsLiveMapEntry{IsLive: false},
}, 1 /* numVoters */, 4 /* numReplicas */, 4 /* clusterNodes */)

require.True(t, ctr)
require.False(t, down)
require.True(t, under)
require.False(t, over)
}

{
// More non-voters than needed
ctr, down, under, over := calcRangeCounter(11, oneVoterAndThreeNonVoters, liveness.IsLiveMap{
10: liveness.IsLiveMapEntry{IsLive: true},
100: liveness.IsLiveMapEntry{IsLive: true},
1000: liveness.IsLiveMapEntry{IsLive: true},
2000: liveness.IsLiveMapEntry{IsLive: true},
}, 1 /* numVoters */, 3 /* numReplicas */, 4 /* clusterNodes */)

require.True(t, ctr)
require.False(t, down)
require.False(t, under)
require.True(t, over)
}
}

0 comments on commit 8c51f68

Please sign in to comment.