Skip to content

Commit

Permalink
kv: extract etcd/raft utilities into raftutil library
Browse files Browse the repository at this point in the history
This commit extracts some scattered logic to interpret the state of an
etcd/raft Status into a new `raftutil` library. The library initially
has two functions: `ReplicaIsBehind` and `ReplicaMayNeedSnapshot`. In
the future, I expect that we'll extract more helper functions that
make working with etcd/raft easier into this library.
  • Loading branch information
nvanbenschoten committed Jun 12, 2022
1 parent ae82018 commit 78ed26c
Show file tree
Hide file tree
Showing 13 changed files with 462 additions and 60 deletions.
1 change: 1 addition & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ ALL_TESTS = [
"//pkg/kv/kvserver/protectedts/ptstorage:ptstorage_test",
"//pkg/kv/kvserver/protectedts:protectedts_test",
"//pkg/kv/kvserver/raftentry:raftentry_test",
"//pkg/kv/kvserver/raftutil:raftutil_test",
"//pkg/kv/kvserver/rangefeed:rangefeed_test",
"//pkg/kv/kvserver/rditer:rditer_test",
"//pkg/kv/kvserver/replicastats:replicastats_test",
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ go_library(
"//pkg/kv/kvserver/liveness",
"//pkg/kv/kvserver/liveness/livenesspb",
"//pkg/kv/kvserver/raftentry",
"//pkg/kv/kvserver/raftutil",
"//pkg/kv/kvserver/rangefeed",
"//pkg/kv/kvserver/rditer",
"//pkg/kv/kvserver/readsummary",
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/allocator/allocatorimpl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ go_library(
"//pkg/kv/kvserver/allocator/storepool",
"//pkg/kv/kvserver/constraint",
"//pkg/kv/kvserver/liveness/livenesspb",
"//pkg/kv/kvserver/raftutil",
"//pkg/kv/kvserver/replicastats",
"//pkg/roachpb",
"//pkg/settings",
Expand Down
75 changes: 18 additions & 57 deletions pkg/kv/kvserver/allocator/allocatorimpl/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/storepool"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/constraint"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftutil"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/replicastats"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
Expand Down Expand Up @@ -1468,8 +1469,9 @@ func (a *Allocator) ValidLeaseTargets(
conf roachpb.SpanConfig,
existing []roachpb.ReplicaDescriptor,
leaseRepl interface {
RaftStatus() *raft.Status
StoreID() roachpb.StoreID
RaftStatus() *raft.Status
GetFirstIndex() uint64
},
// excludeLeaseRepl dictates whether the result set can include the source
// replica.
Expand Down Expand Up @@ -1510,7 +1512,8 @@ func (a *Allocator) ValidLeaseTargets(
// potentially transferring the lease to a replica that may be waiting for a
// snapshot (which will wedge the range until the replica applies that
// snapshot).
candidates = excludeReplicasInNeedOfSnapshots(ctx, leaseRepl.RaftStatus(), candidates)
candidates = excludeReplicasInNeedOfSnapshots(
ctx, leaseRepl.RaftStatus(), leaseRepl.GetFirstIndex(), candidates)
}

// Determine which store(s) is preferred based on user-specified preferences.
Expand All @@ -1532,8 +1535,9 @@ func (a *Allocator) leaseholderShouldMoveDueToPreferences(
ctx context.Context,
conf roachpb.SpanConfig,
leaseRepl interface {
RaftStatus() *raft.Status
StoreID() roachpb.StoreID
RaftStatus() *raft.Status
GetFirstIndex() uint64
},
allExistingReplicas []roachpb.ReplicaDescriptor,
) bool {
Expand All @@ -1557,7 +1561,8 @@ func (a *Allocator) leaseholderShouldMoveDueToPreferences(
// If there are any replicas that do match lease preferences, then we check if
// the existing leaseholder is one of them.
preferred := a.PreferredLeaseholders(conf, candidates)
preferred = excludeReplicasInNeedOfSnapshots(ctx, leaseRepl.RaftStatus(), preferred)
preferred = excludeReplicasInNeedOfSnapshots(
ctx, leaseRepl.RaftStatus(), leaseRepl.GetFirstIndex(), preferred)
if len(preferred) == 0 {
return false
}
Expand Down Expand Up @@ -1606,9 +1611,10 @@ func (a *Allocator) TransferLeaseTarget(
conf roachpb.SpanConfig,
existing []roachpb.ReplicaDescriptor,
leaseRepl interface {
RaftStatus() *raft.Status
StoreID() roachpb.StoreID
GetRangeID() roachpb.RangeID
RaftStatus() *raft.Status
GetFirstIndex() uint64
},
stats *replicastats.ReplicaStats,
forceDecisionWithoutStats bool,
Expand Down Expand Up @@ -1854,8 +1860,9 @@ func (a *Allocator) ShouldTransferLease(
conf roachpb.SpanConfig,
existing []roachpb.ReplicaDescriptor,
leaseRepl interface {
RaftStatus() *raft.Status
StoreID() roachpb.StoreID
RaftStatus() *raft.Status
GetFirstIndex() uint64
},
stats *replicastats.ReplicaStats,
) bool {
Expand Down Expand Up @@ -2193,73 +2200,27 @@ func computeQuorum(nodes int) int {
// slice. A "behind" replica is one which is not at or past the quorum commit
// index.
func FilterBehindReplicas(
ctx context.Context, raftStatus *raft.Status, replicas []roachpb.ReplicaDescriptor,
ctx context.Context, st *raft.Status, replicas []roachpb.ReplicaDescriptor,
) []roachpb.ReplicaDescriptor {
if raftStatus == nil || len(raftStatus.Progress) == 0 {
// raftStatus.Progress is only populated on the Raft leader which means we
// won't be able to rebalance a lease away if the lease holder is not the
// Raft leader. This is rare enough not to matter.
return nil
}
candidates := make([]roachpb.ReplicaDescriptor, 0, len(replicas))
var candidates []roachpb.ReplicaDescriptor
for _, r := range replicas {
if !ReplicaIsBehind(raftStatus, r.ReplicaID) {
if !raftutil.ReplicaIsBehind(st, r.ReplicaID) {
candidates = append(candidates, r)
}
}
return candidates
}

// ReplicaIsBehind returns whether the given replica ID is considered behind
// according to the raft log.
func ReplicaIsBehind(raftStatus *raft.Status, replicaID roachpb.ReplicaID) bool {
if raftStatus == nil || len(raftStatus.Progress) == 0 {
return true
}
// NB: We use raftStatus.Commit instead of getQuorumIndex() because the
// latter can return a value that is less than the commit index. This is
// useful for Raft log truncation which sometimes wishes to keep those
// earlier indexes, but not appropriate for determining which nodes are
// behind the actual commit index of the range.
if progress, ok := raftStatus.Progress[uint64(replicaID)]; ok {
if uint64(replicaID) == raftStatus.Lead ||
(progress.State == tracker.StateReplicate &&
progress.Match >= raftStatus.Commit) {
return false
}
}
return true
}

// replicaMayNeedSnapshot determines whether the replica referred to by
// `replicaID` may be in need of a raft snapshot. If this function is called
// with an empty or nil `raftStatus` (as will be the case when its called by a
// replica that is not the raft leader), we pessimistically assume that
// `replicaID` may need a snapshot.
func replicaMayNeedSnapshot(raftStatus *raft.Status, replica roachpb.ReplicaDescriptor) bool {
if raftStatus == nil || len(raftStatus.Progress) == 0 {
return true
}
if progress, ok := raftStatus.Progress[uint64(replica.ReplicaID)]; ok {
// We can only reasonably assume that the follower replica is not in need of
// a snapshot iff it is in `StateReplicate`. However, even this is racey
// because we can still possibly have an ill-timed log truncation between
// when we make this determination and when we act on it.
return progress.State != tracker.StateReplicate
}
return true
}

// excludeReplicasInNeedOfSnapshots filters out the `replicas` that may be in
// need of a raft snapshot. VOTER_INCOMING replicas are not filtered out.
// Other replicas may be filtered out if this function is called with the
// `raftStatus` of a non-raft leader replica.
func excludeReplicasInNeedOfSnapshots(
ctx context.Context, raftStatus *raft.Status, replicas []roachpb.ReplicaDescriptor,
ctx context.Context, st *raft.Status, firstIndex uint64, replicas []roachpb.ReplicaDescriptor,
) []roachpb.ReplicaDescriptor {
filled := 0
for _, repl := range replicas {
if replicaMayNeedSnapshot(raftStatus, repl) {
if raftutil.ReplicaMayNeedSnapshot(st, firstIndex, repl.ReplicaID) != raftutil.NoSnapshotNeeded {
log.VEventf(
ctx,
5,
Expand Down
8 changes: 8 additions & 0 deletions pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,7 @@ func (r *mockRepl) RaftStatus() *raft.Status {
raftStatus := &raft.Status{
Progress: make(map[uint64]tracker.Progress),
}
raftStatus.RaftState = raft.StateLeader
for i := int32(1); i <= r.replicationFactor; i++ {
state := tracker.StateReplicate
if _, ok := r.replsInNeedOfSnapshot[roachpb.ReplicaID(i)]; ok {
Expand All @@ -1674,6 +1675,10 @@ func (r *mockRepl) RaftStatus() *raft.Status {
return raftStatus
}

func (r *mockRepl) GetFirstIndex() uint64 {
return 0
}

func (r *mockRepl) StoreID() roachpb.StoreID {
return r.storeID
}
Expand Down Expand Up @@ -7138,6 +7143,7 @@ func TestFilterBehindReplicas(t *testing.T) {
Progress: make(map[uint64]tracker.Progress),
}
status.Lead = c.leader
status.RaftState = raft.StateLeader
status.Commit = c.commit
var replicas []roachpb.ReplicaDescriptor
for j, v := range c.progress {
Expand Down Expand Up @@ -7210,6 +7216,7 @@ func TestFilterUnremovableReplicas(t *testing.T) {
// Use an invalid replica ID for the leader. TestFilterBehindReplicas covers
// valid replica IDs.
status.Lead = 99
status.RaftState = raft.StateLeader
status.Commit = c.commit
var replicas []roachpb.ReplicaDescriptor
for j, v := range c.progress {
Expand Down Expand Up @@ -7267,6 +7274,7 @@ func TestSimulateFilterUnremovableReplicas(t *testing.T) {
// Use an invalid replica ID for the leader. TestFilterBehindReplicas covers
// valid replica IDs.
status.Lead = 99
status.RaftState = raft.StateLeader
status.Commit = c.commit
var replicas []roachpb.ReplicaDescriptor
for j, v := range c.progress {
Expand Down
2 changes: 2 additions & 0 deletions pkg/kv/kvserver/allocator_impl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ func TestAllocatorRebalanceTarget(t *testing.T) {
status := &raft.Status{
Progress: make(map[uint64]tracker.Progress),
}
status.Lead = 1
status.RaftState = raft.StateLeader
status.Commit = 10
for _, replica := range replicas {
status.Progress[uint64(replica.ReplicaID)] = tracker.Progress{
Expand Down
5 changes: 3 additions & 2 deletions pkg/kv/kvserver/deprecated_store_rebalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/storepool"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftutil"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
Expand Down Expand Up @@ -108,7 +109,7 @@ func (sr *StoreRebalancer) deprecatedChooseLeaseToTransfer(
if raftStatus == nil {
raftStatus = sr.getRaftStatusFn(replWithStats.repl)
}
if allocatorimpl.ReplicaIsBehind(raftStatus, candidate.ReplicaID) {
if raftutil.ReplicaIsBehind(raftStatus, candidate.ReplicaID) {
log.VEventf(ctx, 3, "%v is behind or this store isn't the raft leader for r%d; raftStatus: %v",
candidate, desc.RangeID, raftStatus)
continue
Expand Down Expand Up @@ -297,7 +298,7 @@ func (sr *StoreRebalancer) deprecatedChooseRangeToRebalance(
if raftStatus == nil {
raftStatus = sr.getRaftStatusFn(replWithStats.repl)
}
if allocatorimpl.ReplicaIsBehind(raftStatus, replica.ReplicaID) {
if raftutil.ReplicaIsBehind(raftStatus, replica.ReplicaID) {
continue
}
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/deprecated_store_rebalancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func TestDeprecatedChooseLeaseToTransfer(t *testing.T) {
Progress: make(map[uint64]tracker.Progress),
}
status.Lead = uint64(r.ReplicaID())
status.RaftState = raft.StateLeader
status.Commit = 1
for _, replica := range r.Desc().InternalReplicas {
status.Progress[uint64(replica.ReplicaID)] = tracker.Progress{
Expand Down Expand Up @@ -219,6 +220,7 @@ func TestDeprecatedChooseRangeToRebalanceBalanceScore(t *testing.T) {
Progress: make(map[uint64]tracker.Progress),
}
status.Lead = uint64(r.ReplicaID())
status.RaftState = raft.StateLeader
status.Commit = 1
for _, replica := range r.Desc().InternalReplicas {
status.Progress[uint64(replica.ReplicaID)] = tracker.Progress{
Expand Down Expand Up @@ -290,6 +292,7 @@ func TestDeprecatedChooseRangeToRebalance(t *testing.T) {
Progress: make(map[uint64]tracker.Progress),
}
status.Lead = uint64(r.ReplicaID())
status.RaftState = raft.StateLeader
status.Commit = 1
for _, replica := range r.Desc().InternalReplicas {
status.Progress[uint64(replica.ReplicaID)] = tracker.Progress{
Expand Down Expand Up @@ -652,6 +655,7 @@ func TestDeprecatedNoLeaseTransferToBehindReplicas(t *testing.T) {
Progress: make(map[uint64]tracker.Progress),
}
status.Lead = uint64(r.ReplicaID())
status.RaftState = raft.StateLeader
status.Commit = 1
for _, replica := range r.Desc().InternalReplicas {
match := uint64(1)
Expand Down
24 changes: 24 additions & 0 deletions pkg/kv/kvserver/raftutil/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "raftutil",
srcs = ["util.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftutil",
visibility = ["//visibility:public"],
deps = [
"//pkg/roachpb",
"@io_etcd_go_etcd_raft_v3//:raft",
"@io_etcd_go_etcd_raft_v3//tracker",
],
)

go_test(
name = "raftutil_test",
srcs = ["util_test.go"],
embed = [":raftutil"],
deps = [
"@com_github_stretchr_testify//require",
"@io_etcd_go_etcd_raft_v3//:raft",
"@io_etcd_go_etcd_raft_v3//tracker",
],
)
Loading

0 comments on commit 78ed26c

Please sign in to comment.