Skip to content

Commit

Permalink
kvserver: fix up/downreplication status of non-voters in range report
Browse files Browse the repository at this point in the history
This commit updates the `calcRangeCounter()` method to consider non-voting
replicas when determining whether a range is over or under-replicated.
Without this change, ranges that have any non-voting replicas would show
up as under-replicated on the DB console range report page.

Release note: None

Release justification: fixes misreporting of replication status of a range
  • Loading branch information
aayushshah15 committed Feb 28, 2021
1 parent 4119ed0 commit 75734ad
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 15 deletions.
3 changes: 3 additions & 0 deletions pkg/kv/kvserver/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,9 @@ func GetNeededNonVoters(numVoters, zoneConfigNonVoterCount, clusterNodes int) in
// replica.
need = clusterNodes - numVoters
}
if need < 0 {
return 0
}
return need
}

Expand Down
38 changes: 26 additions & 12 deletions pkg/kv/kvserver/replica_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func calcReplicaMetrics(
m.Ticking = ticking

m.RangeCounter, m.Unavailable, m.Underreplicated, m.Overreplicated =
calcRangeCounter(storeID, desc, livenessMap, *zone.NumReplicas, clusterNodes)
calcRangeCounter(storeID, desc, livenessMap, zone.GetNumVoters(), *zone.NumReplicas, clusterNodes)

// The raft leader computes the number of raft entries that replicas are
// behind.
Expand All @@ -139,10 +139,11 @@ func calcReplicaMetrics(
return m
}

// calcRangeCounter returns whether this replica is designated as the
// replica in the range responsible for range-level metrics, whether
// the range doesn't have a quorum of live replicas, and whether the
// range is currently under-replicated.
// calcRangeCounter returns whether this replica is designated as the replica in
// the range responsible for range-level metrics, whether the range doesn't have
// a quorum of live voting replicas, and whether the range is currently
// under-replicated (with regards to either the number of voting replicas or the
// number of non-voting replicas).
//
// Note: we compute an estimated range count across the cluster by counting the
// first live replica in each descriptor. Note that the first live replica is
Expand All @@ -158,12 +159,11 @@ func calcRangeCounter(
storeID roachpb.StoreID,
desc *roachpb.RangeDescriptor,
livenessMap liveness.IsLiveMap,
numReplicas int32,
numVoters, numReplicas int32,
clusterNodes int,
) (rangeCounter, unavailable, underreplicated, overreplicated bool) {
// It seems unlikely that a learner replica would be the first live one, but
// there's no particular reason to exclude them. Note that `All` returns the
// voters first.
// there's no particular reason to exclude them.
for _, rd := range desc.Replicas().Descriptors() {
if livenessMap[rd.NodeID].IsLive {
rangeCounter = rd.StoreID == storeID
Expand All @@ -176,11 +176,13 @@ func calcRangeCounter(
unavailable = !desc.Replicas().CanMakeProgress(func(rDesc roachpb.ReplicaDescriptor) bool {
return livenessMap[rDesc.NodeID].IsLive
})
needed := GetNeededVoters(numReplicas, clusterNodes)
liveVoterReplicas := calcLiveVoterReplicas(desc, livenessMap)
if needed > liveVoterReplicas {
neededVoters := GetNeededVoters(numVoters, clusterNodes)
liveVoters := calcLiveVoterReplicas(desc, livenessMap)
neededNonVoters := GetNeededNonVoters(int(numVoters), int(numReplicas-numVoters), clusterNodes)
liveNonVoters := calcLiveNonVoterReplicas(desc, livenessMap)
if neededVoters > liveVoters || neededNonVoters > liveNonVoters {
underreplicated = true
} else if needed < liveVoterReplicas {
} else if neededVoters < liveVoters || neededNonVoters < liveNonVoters {
overreplicated = true
}
}
Expand All @@ -201,6 +203,18 @@ func calcLiveVoterReplicas(desc *roachpb.RangeDescriptor, livenessMap liveness.I
return live
}

// calcLiveNonVoterReplicas returns a count of the live non-voter replicas; a live
// replica is determined by checking its node in the provided liveness map.
func calcLiveNonVoterReplicas(desc *roachpb.RangeDescriptor, livenessMap liveness.IsLiveMap) int {
var live int
for _, rd := range desc.Replicas().NonVoterDescriptors() {
if livenessMap[rd.NodeID].IsLive {
live++
}
}
return live
}

// calcBehindCount returns a total count of log entries that follower replicas
// are behind. This can only be computed on the raft leader.
func calcBehindCount(
Expand Down
21 changes: 18 additions & 3 deletions pkg/kv/kvserver/replica_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@ func TestCalcRangeCounterIsLiveMap(t *testing.T) {
{NodeID: 10, StoreID: 11, ReplicaID: 12, Type: roachpb.ReplicaTypeVoterFull()},
{NodeID: 100, StoreID: 110, ReplicaID: 120, Type: roachpb.ReplicaTypeVoterFull()},
{NodeID: 1000, StoreID: 1100, ReplicaID: 1200, Type: roachpb.ReplicaTypeVoterFull()},
{NodeID: 2000, StoreID: 2100, ReplicaID: 2200, Type: roachpb.ReplicaTypeNonVoter()},
}))

{
ctr, down, under, over := calcRangeCounter(1100 /* storeID */, desc, liveness.IsLiveMap{
ctr, down, under, over := calcRangeCounter(1100, desc, liveness.IsLiveMap{
1000: liveness.IsLiveMapEntry{IsLive: true}, // by NodeID
}, 3, 3)
}, 3 /* numVoters */, 4 /* numReplicas */, 4)

require.True(t, ctr)
require.True(t, down)
Expand All @@ -48,7 +49,7 @@ func TestCalcRangeCounterIsLiveMap(t *testing.T) {
{
ctr, down, under, over := calcRangeCounter(1000, desc, liveness.IsLiveMap{
1000: liveness.IsLiveMapEntry{IsLive: false},
}, 3, 3)
}, 3 /* numVoters */, 4 /* numReplicas */, 4)

// Does not confuse a non-live entry for a live one. In other words,
// does not think that the liveness map has only entries for live nodes.
Expand All @@ -57,4 +58,18 @@ func TestCalcRangeCounterIsLiveMap(t *testing.T) {
require.False(t, under)
require.False(t, over)
}

{
ctr, down, under, over := calcRangeCounter(11, desc, liveness.IsLiveMap{
10: liveness.IsLiveMapEntry{IsLive: true},
100: liveness.IsLiveMapEntry{IsLive: true},
1000: liveness.IsLiveMapEntry{IsLive: true},
2000: liveness.IsLiveMapEntry{IsLive: true},
}, 3 /* numVoters */, 4 /* numReplicas */, 4)

require.True(t, ctr)
require.False(t, down)
require.False(t, under)
require.False(t, over)
}
}

0 comments on commit 75734ad

Please sign in to comment.