Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-22.1.0: kvserver/loqrecovery: persist new replica ID in RaftReplicaID #80626

Merged
merged 2 commits into from
Apr 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -1322,6 +1322,15 @@ func removeDeadReplicas(
batch.Close()
return nil, err
}
// Write the new replica ID to RaftReplicaIDKey.
replicas := desc.Replicas().Descriptors()
if len(replicas) != 1 {
return nil, errors.Errorf("expected 1 replica, got %v", replicas)
}
if err := sl.SetRaftReplicaID(ctx, batch, replicas[0].ReplicaID); err != nil {
return nil, errors.Wrapf(err, "failed to write new replica ID for range %d", desc.RangeID)
}
// Update MVCC stats.
if err := sl.SetMVCCStats(ctx, batch, &ms); err != nil {
return nil, errors.Wrap(err, "updating MVCCStats")
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/kv/kvserver/loqrecovery/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,11 @@ func applyReplicaUpdate(
report.OldReplica, _ = report.RemovedReplicas.RemoveReplica(
update.NewReplica.NodeID, update.NewReplica.StoreID)

// Persist the new replica ID.
if err := sl.SetRaftReplicaID(ctx, readWriter, update.NewReplica.ReplicaID); err != nil {
return PrepareReplicaReport{}, errors.Wrap(err, "setting new replica ID")
}

// Refresh stats
if err := sl.SetMVCCStats(ctx, readWriter, &ms); err != nil {
return PrepareReplicaReport{}, errors.Wrap(err, "updating MVCCStats")
Expand Down
32 changes: 30 additions & 2 deletions pkg/kv/kvserver/loqrecovery/recovery_env_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type storeView struct {
StoreID roachpb.StoreID `yaml:"StoreID"`

Descriptors []storeDescriptorView `yaml:"Descriptors"`
LocalData []localDataView `yaml:"LocalData"`
}

// storeDescriptorView contains important fields from the range
Expand Down Expand Up @@ -119,6 +120,12 @@ func (r replicaDescriptorView) asReplicaDescriptor() roachpb.ReplicaDescriptor {
}
}

// localDataView contains interesting local store data for each range.
type localDataView struct {
RangeID roachpb.RangeID `yaml:"RangeID"`
RaftReplicaID int `yaml:"RaftReplicaID"`
}

// Store with its owning NodeID for easier grouping by owning nodes.
type wrappedStore struct {
engine storage.Engine
Expand Down Expand Up @@ -195,7 +202,8 @@ func (e *quorumRecoveryEnv) handleReplicationData(t *testing.T, d datadriven.Tes
replica.NodeID = roachpb.NodeID(replica.StoreID)
}

key, desc, replicaState, hardState, raftLog := buildReplicaDescriptorFromTestData(t, replica)
replicaID, key, desc, replicaState, hardState, raftLog :=
buildReplicaDescriptorFromTestData(t, replica)

eng := e.getOrCreateStore(ctx, t, replica.StoreID, replica.NodeID)
if err = storage.MVCCPutProto(ctx, eng, nil, key, clock.Now(), nil, /* txn */
Expand All @@ -210,6 +218,9 @@ func (e *quorumRecoveryEnv) handleReplicationData(t *testing.T, d datadriven.Tes
if err := sl.SetHardState(ctx, eng, hardState); err != nil {
t.Fatalf("failed to save raft hard state: %v", err)
}
if err := sl.SetRaftReplicaID(ctx, eng, replicaID); err != nil {
t.Fatalf("failed to set raft replica ID: %v", err)
}
for i, entry := range raftLog {
value, err := protoutil.Marshal(&entry)
if err != nil {
Expand All @@ -227,6 +238,7 @@ func (e *quorumRecoveryEnv) handleReplicationData(t *testing.T, d datadriven.Tes
func buildReplicaDescriptorFromTestData(
t *testing.T, replica testReplicaInfo,
) (
roachpb.ReplicaID,
roachpb.Key,
roachpb.RangeDescriptor,
kvserverpb.ReplicaState,
Expand All @@ -239,12 +251,16 @@ func buildReplicaDescriptorFromTestData(
endKey := parsePrettyKey(t, replica.EndKey)
key := keys.RangeDescriptorKey(startKey)
var replicas []roachpb.ReplicaDescriptor
var replicaID roachpb.ReplicaID
maxReplicaID := replica.Replicas[0].ReplicaID
for _, r := range replica.Replicas {
if r.ReplicaID > maxReplicaID {
maxReplicaID = r.ReplicaID
}
replicas = append(replicas, r.asReplicaDescriptor())
if r.NodeID == replica.NodeID && r.StoreID == replica.StoreID {
replicaID = r.ReplicaID
}
}
if replica.Generation == 0 {
replica.Generation = roachpb.RangeGeneration(maxReplicaID)
Expand Down Expand Up @@ -292,7 +308,7 @@ func buildReplicaDescriptorFromTestData(
entry := raftLogFromPendingDescriptorUpdate(t, replica, u, desc, uint64(i))
raftLog = append(raftLog, enginepb.MVCCMetadata{RawBytes: entry.RawBytes})
}
return key, desc, replicaState, hardState, raftLog
return replicaID, key, desc, replicaState, hardState, raftLog
}

func raftLogFromPendingDescriptorUpdate(
Expand Down Expand Up @@ -520,10 +536,21 @@ func (e *quorumRecoveryEnv) handleDumpStore(t *testing.T, d datadriven.TestData)
var storesView []storeView
for _, storeID := range stores {
var descriptorViews []storeDescriptorView
var localDataViews []localDataView
store := e.stores[storeID]
err := kvserver.IterateRangeDescriptorsFromDisk(ctx, store.engine,
func(desc roachpb.RangeDescriptor) error {
descriptorViews = append(descriptorViews, descriptorView(desc))

sl := stateloader.Make(desc.RangeID)
raftReplicaID, _, err := sl.LoadRaftReplicaID(ctx, store.engine)
if err != nil {
t.Fatalf("failed to load Raft replica ID: %v", err)
}
localDataViews = append(localDataViews, localDataView{
RangeID: desc.RangeID,
RaftReplicaID: int(raftReplicaID.ReplicaID),
})
return nil
})
if err != nil {
Expand All @@ -533,6 +560,7 @@ func (e *quorumRecoveryEnv) handleDumpStore(t *testing.T, d datadriven.TestData)
NodeID: e.stores[storeID].nodeID,
StoreID: storeID,
Descriptors: descriptorViews,
LocalData: localDataViews,
})
}
out, err := yaml.Marshal(storesView)
Expand Down
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/loqrecovery/testdata/learners_lose
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ dump-store stores=(1,2)
StartKey: /Min
Replicas:
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 16}
LocalData:
- RangeID: 1
RaftReplicaID: 16
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -72,6 +75,9 @@ dump-store stores=(1,2)
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
- Replica: {NodeID: 4, StoreID: 4, ReplicaID: 4}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 5}
LocalData:
- RangeID: 1
RaftReplicaID: 2

# Second use case where we can't make a decision and fail keyspace coverage as
# only a single learner is left, there is no way to recover.
Expand Down
24 changes: 24 additions & 0 deletions pkg/kv/kvserver/loqrecovery/testdata/max_applied_voter_wins
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ dump-store stores=(1,2)
StartKey: /Min
Replicas:
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 16}
LocalData:
- RangeID: 1
RaftReplicaID: 16
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -70,6 +73,9 @@ dump-store stores=(1,2)
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
- Replica: {NodeID: 4, StoreID: 4, ReplicaID: 4}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 5}
LocalData:
- RangeID: 1
RaftReplicaID: 2

dump-events stores=(1,2)
----
Expand Down Expand Up @@ -184,6 +190,11 @@ dump-store stores=(1,2,5,6)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 6}
- Replica: {NodeID: 6, StoreID: 6, ReplicaID: 7}
LocalData:
- RangeID: 1
RaftReplicaID: 1
- RangeID: 2
RaftReplicaID: 1
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -193,6 +204,9 @@ dump-store stores=(1,2,5,6)
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
- Replica: {NodeID: 4, StoreID: 4, ReplicaID: 4}
LocalData:
- RangeID: 1
RaftReplicaID: 2
- NodeID: 5
StoreID: 5
Descriptors:
Expand All @@ -208,6 +222,11 @@ dump-store stores=(1,2,5,6)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 6}
- Replica: {NodeID: 6, StoreID: 6, ReplicaID: 7}
LocalData:
- RangeID: 1
RaftReplicaID: 6
- RangeID: 2
RaftReplicaID: 6
- NodeID: 6
StoreID: 6
Descriptors:
Expand All @@ -223,3 +242,8 @@ dump-store stores=(1,2,5,6)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 6}
- Replica: {NodeID: 6, StoreID: 6, ReplicaID: 7}
LocalData:
- RangeID: 1
RaftReplicaID: 7
- RangeID: 2
RaftReplicaID: 7
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/loqrecovery/testdata/max_store_voter_wins
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,16 @@ dump-store stores=(1,2)
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
- Replica: {NodeID: 4, StoreID: 4, ReplicaID: 4}
- Replica: {NodeID: 5, StoreID: 5, ReplicaID: 5}
LocalData:
- RangeID: 1
RaftReplicaID: 1
- NodeID: 2
StoreID: 2
Descriptors:
- RangeID: 1
StartKey: /Min
Replicas:
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 16}
LocalData:
- RangeID: 1
RaftReplicaID: 16
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ dump-store stores=(1,2,3)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 1
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -67,6 +70,9 @@ dump-store stores=(1,2,3)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 2
- NodeID: 3
StoreID: 3
Descriptors:
Expand All @@ -76,3 +82,6 @@ dump-store stores=(1,2,3)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 3
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/loqrecovery/testdata/no_change_when_quorum
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ dump-store stores=(1,2)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 1
- NodeID: 2
StoreID: 2
Descriptors:
Expand All @@ -56,3 +59,6 @@ dump-store stores=(1,2)
- Replica: {NodeID: 1, StoreID: 1, ReplicaID: 1}
- Replica: {NodeID: 2, StoreID: 2, ReplicaID: 2}
- Replica: {NodeID: 3, StoreID: 3, ReplicaID: 3}
LocalData:
- RangeID: 1
RaftReplicaID: 2