From 329d82535fbaa6c1259911ae3c959b405a93cb2f Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Wed, 14 Aug 2019 13:08:47 +0200 Subject: [PATCH] storage: unify replica addition and removal paths This continues the reworking of the various replication change APIs with the goal of allowing a) testing of general atomic replication changes b) issuing replica swaps from the replicate queue (in 19.2). For previous steps, see: https://github.com/cockroachdb/cockroach/pull/39485 https://github.com/cockroachdb/cockroach/pull/39611 This change is not a pure plumbing PR. Instead, it unifies `(*Replica).addReplica` and `(*Replica).removeReplica` into a method that can do both, `(*Replica).addAndRemoveReplicas`. Given a slice of ReplicationChanges, this method first adds learner replicas corresponding to the desired new voters. After having sent snapshots to all of them, the method issues a configuration change that atomically - upgrades all learners to voters - removes any undesired replicas. Note that no atomic membership changes are *actually* carried out yet. This is because the callers of `addAndRemoveReplicas` pass in only a single change (i.e. an addition or removal), which the method also verifies. Three pieces are missing after this PR: First, we need to be able to instruct raft to carry out atomic configuration changes: https://github.com/cockroachdb/cockroach/blob/2e8db6ca53c59d3d281e64939f79d937195403d4/pkg/storage/replica_proposal_buf.go#L448-L451 which in particular requires being able to store the ConfState corresponding to a joint configuration in the unreplicated local state (under a new key). Second, we must pass the slice of changes handed to `AdminChangeReplicas` through to `addAndRemoveReplicas` without unrolling it first, see: https://github.com/cockroachdb/cockroach/blob/3b316bac6ef342590ddc68d2989714d6e126371a/pkg/storage/replica_command.go#L870-L891 and https://github.com/cockroachdb/cockroach/blob/3b316bac6ef342590ddc68d2989714d6e126371a/pkg/storage/replica.go#L1314-L1325 Third, we must to teach the replicate queue to issue the "atomic swaps"; this is the reason we're introducing atomic membership changes in the first place. Release note: None --- pkg/roachpb/api.go | 23 +++ pkg/storage/client_replica_test.go | 10 +- pkg/storage/replica_command.go | 295 +++++++++++++++++----------- pkg/storage/replica_learner_test.go | 6 +- pkg/storage/replica_test.go | 2 +- pkg/storage/replicate_queue.go | 3 +- 6 files changed, 214 insertions(+), 125 deletions(-) diff --git a/pkg/roachpb/api.go b/pkg/roachpb/api.go index ac3a94e08d4c..ed4effc8a165 100644 --- a/pkg/roachpb/api.go +++ b/pkg/roachpb/api.go @@ -1212,6 +1212,29 @@ func (acrr *AdminChangeReplicasRequest) AddChanges(chgs ...ReplicationChange) { } } +// ReplicationChanges is a slice of ReplicationChange. +type ReplicationChanges []ReplicationChange + +func (rc ReplicationChanges) byType(typ ReplicaChangeType) []ReplicationTarget { + var sl []ReplicationTarget + for _, chg := range rc { + if chg.ChangeType == typ { + sl = append(sl, chg.Target) + } + } + return sl +} + +// Additions returns a slice of all contained replication changes that add replicas. +func (rc ReplicationChanges) Additions() []ReplicationTarget { + return rc.byType(ADD_REPLICA) +} + +// Removals returns a slice of all contained replication changes that remove replicas. +func (rc ReplicationChanges) Removals() []ReplicationTarget { + return rc.byType(REMOVE_REPLICA) +} + // Changes returns the changes requested by this AdminChangeReplicasRequest, taking // the deprecated method of doing so into account. func (acrr *AdminChangeReplicasRequest) Changes() []ReplicationChange { diff --git a/pkg/storage/client_replica_test.go b/pkg/storage/client_replica_test.go index e45d3d849e50..67262a5260b2 100644 --- a/pkg/storage/client_replica_test.go +++ b/pkg/storage/client_replica_test.go @@ -1673,20 +1673,22 @@ func TestChangeReplicasGeneration(t *testing.T) { assert.EqualValues(t, repl.Desc().GetGeneration(), oldGeneration+2) oldGeneration = repl.Desc().GetGeneration() - if _, err := repl.ChangeReplicas( + oldDesc := repl.Desc() + newDesc, err := repl.ChangeReplicas( context.Background(), roachpb.REMOVE_REPLICA, roachpb.ReplicationTarget{ NodeID: mtc.idents[1].NodeID, StoreID: mtc.idents[1].StoreID, }, - repl.Desc(), + oldDesc, storagepb.ReasonRangeOverReplicated, "", - ); err != nil { + ) + if err != nil { t.Fatalf("unexpected error: %v", err) } - assert.EqualValues(t, repl.Desc().GetGeneration(), oldGeneration+1) + assert.EqualValues(t, repl.Desc().GetGeneration(), oldGeneration+1, "\nold: %+v\nnew: %+v", oldDesc, newDesc) } func TestSystemZoneConfigs(t *testing.T) { diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 8483715adec8..e20ef469650a 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -879,54 +879,90 @@ func (r *Replica) ChangeReplicas( return nil, errors.Errorf("%s: the current RangeDescriptor must not be nil", r) } + var chgs []roachpb.ReplicationChange switch changeType { case roachpb.ADD_REPLICA: - return r.addReplica(ctx, target, desc, SnapshotRequest_REBALANCE, reason, details) + chgs = roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, target) case roachpb.REMOVE_REPLICA: - return r.removeReplica(ctx, target, desc, SnapshotRequest_REBALANCE, reason, details) + chgs = roachpb.MakeReplicationChanges(roachpb.REMOVE_REPLICA, target) default: return nil, errors.Errorf(`unknown change type: %s`, changeType) } + return r.addAndRemoveReplicas(ctx, desc, SnapshotRequest_REBALANCE, reason, details, chgs) } func validateReplicationChanges( - desc *roachpb.RangeDescriptor, target roachpb.ReplicationTarget, + desc *roachpb.RangeDescriptor, chgs roachpb.ReplicationChanges, ) error { + // First make sure that the changes don't self-overlap (i.e. we're not adding + // a replica twice, or removing and immediately re-adding it). + byNodeID := make(map[roachpb.NodeID]roachpb.ReplicationChange, len(chgs)) + for _, chg := range chgs { + if _, ok := byNodeID[chg.Target.NodeID]; ok { + return fmt.Errorf("changes %+v refer to n%d twice", chgs, chg.Target.NodeID) + } + byNodeID[chg.Target.NodeID] = chg + } + + // Then, check that we're not adding a second replica on nodes that already + // have one, or "re-add" an existing replica. We delete from byNodeID so that + // after this loop, it contains only StoreIDs that we haven't seen in desc. for _, rDesc := range desc.Replicas().All() { - if rDesc.NodeID == target.NodeID { - // Two replicas from the same range are not allowed on the same node, even - // in different stores. - if rDesc.StoreID != target.StoreID { - return errors.Errorf("unable to add replica %v; node already has a replica in %s", target, desc) - } + chg, ok := byNodeID[rDesc.NodeID] + delete(byNodeID, rDesc.NodeID) + if !ok || chg.ChangeType != roachpb.ADD_REPLICA { + continue + } + // We're adding a replica that's already there. This isn't allowed, even + // when the newly added one would be on a different store. + if rDesc.StoreID != chg.Target.StoreID { + return errors.Errorf("unable to add replica %v; node already has a replica in %s", chg.Target.StoreID, desc) + } - // Looks like we found a replica with the same store and node id. If the - // replica is already a learner, then either some previous leaseholder was - // trying to add it with the learner+snapshot+voter cycle and got - // interrupted or else we hit a race between the replicate queue and - // AdminChangeReplicas. - if rDesc.GetType() == roachpb.ReplicaType_LEARNER { - return errors.Errorf( - "unable to add replica %v which is already present as a learner in %s", target, desc) - } + // Looks like we found a replica with the same store and node id. If the + // replica is already a learner, then either some previous leaseholder was + // trying to add it with the learner+snapshot+voter cycle and got + // interrupted or else we hit a race between the replicate queue and + // AdminChangeReplicas. + if rDesc.GetType() == roachpb.ReplicaType_LEARNER { + return errors.Errorf( + "unable to add replica %v which is already present as a learner in %s", chg.Target, desc) + } - // Otherwise, we already had a full voter replica. Can't add another to - // this store. - return errors.Errorf("unable to add replica %v which is already present in %s", target, desc) + // Otherwise, we already had a full voter replica. Can't add another to + // this store. + return errors.Errorf("unable to add replica %v which is already present in %s", chg.Target, desc) + } + + // Any removals left in the map now refer to nonexisting replicas, and we refuse them. + for _, chg := range byNodeID { + if chg.ChangeType != roachpb.REMOVE_REPLICA { + continue } + return errors.Errorf("removing %v which is not in %s", chg.Target, desc) } return nil } -func (r *Replica) addReplica( +// addAndRemoveReplicas validates the incoming changes, adds learner replicas for +// all replica additions, sends a snapshot to each, and then runs an atomic replication +// change that promotes the learners to voters and removes any replicas for which this +// was requested. +func (r *Replica) addAndRemoveReplicas( ctx context.Context, - target roachpb.ReplicationTarget, desc *roachpb.RangeDescriptor, priority SnapshotRequest_Priority, reason storagepb.RangeLogEventReason, details string, + chgs roachpb.ReplicationChanges, ) (*roachpb.RangeDescriptor, error) { - if err := validateReplicationChanges(desc, target); err != nil { + if len(chgs) != 1 { + // TODO(tbg): lift this restriction when atomic membership changes are + // plumbed into raft. + return nil, errors.Errorf("need exactly one change, got %+v", chgs) + } + + if err := validateReplicationChanges(desc, chgs); err != nil { return nil, err } @@ -934,76 +970,111 @@ func (r *Replica) addReplica( useLearners := useLearnerReplicas.Get(&settings.SV) useLearners = useLearners && settings.Version.IsActive(cluster.VersionLearnerReplicas) if !useLearners { + // NB: we will never use atomic replication changes while learners are not + // also active. + if len(chgs) != 1 { + return nil, errors.Errorf("need exactly one change, got %+v", chgs) + } + target := chgs[0].Target return r.addReplicaLegacyPreemptiveSnapshot(ctx, target, desc, priority, reason, details) } - // First add the replica as a raft learner. This means it accepts raft traffic - // (so it can catch up) but doesn't vote (so it doesn't affect quorum and thus - // doesn't introduce fragility into the system). For details see - _ = roachpb.ReplicaDescriptors.Learners - learnerDesc, err := addLearnerReplica(ctx, r.store, desc, target, reason, details) - if err != nil { - return nil, err + if adds := chgs.Additions(); len(adds) > 0 { + // For all newly added nodes, first add raft learner replicas. They accept raft traffic + // (so they can catch up) but don't get to vote (so they don't affect quorum and thus + // don't introduce fragility into the system). For details see: + _ = roachpb.ReplicaDescriptors.Learners + var err error + desc, err = addLearnerReplicas(ctx, r.store, desc, reason, details, chgs.Additions()) + if err != nil { + return nil, err + } } - // Now move it to be a full voter (waiting on it to get a raft snapshot first, - // so it's not immediately way behind). - voterDesc, err := r.promoteLearnerReplicaToVoter(ctx, learnerDesc, target, priority, reason, details) + // Catch up any learners, then run the atomic replication change that adds the + // final voters and removes any undesirable replicas. + desc, err := r.finalizeChangeReplicas(ctx, desc, priority, reason, details, chgs) if err != nil { // Don't leave a learner replica lying around if we didn't succeed in // promoting it to a voter. - log.Infof(ctx, "could not promote %s to voter, rolling back: %v", target, err) - r.rollbackLearnerReplica(ctx, learnerDesc, target, reason, details) + targets := chgs.Additions() + log.Infof(ctx, "could not promote %v to voter, rolling back: %v", targets, err) + for _, target := range targets { + r.tryRollBackLearnerReplica(ctx, r.Desc(), target, reason, details) + } return nil, err } - return voterDesc, nil + return desc, nil } -func addLearnerReplica( +// addLearnerReplicas adds learners to the given replication targets. +func addLearnerReplicas( ctx context.Context, store *Store, desc *roachpb.RangeDescriptor, - target roachpb.ReplicationTarget, reason storagepb.RangeLogEventReason, details string, + targets []roachpb.ReplicationTarget, ) (*roachpb.RangeDescriptor, error) { newDesc := *desc newDesc.SetReplicas(desc.Replicas().DeepCopy()) - replDesc := roachpb.ReplicaDescriptor{ - NodeID: target.NodeID, - StoreID: target.StoreID, - ReplicaID: desc.NextReplicaID, - Type: roachpb.ReplicaTypeLearner(), - } - newDesc.NextReplicaID++ - newDesc.AddReplica(replDesc) + var added []roachpb.ReplicaDescriptor + for _, target := range targets { + replDesc := roachpb.ReplicaDescriptor{ + NodeID: target.NodeID, + StoreID: target.StoreID, + ReplicaID: desc.NextReplicaID, + Type: roachpb.ReplicaTypeLearner(), + } + newDesc.NextReplicaID++ + newDesc.AddReplica(replDesc) + added = append(added, replDesc) + } err := execChangeReplicasTxn( - ctx, store, roachpb.ADD_REPLICA, desc, replDesc, &newDesc, reason, details, + ctx, store, desc, &newDesc, reason, details, added, nil, /* removed */ ) return &newDesc, err } -func (r *Replica) promoteLearnerReplicaToVoter( +// finalizeChangeReplicas carries out the atomic membership change that finalizes +// the addition and/or removal of replicas. Any voters in the process of being +// added (as reflected by the replication changes) must have been added as +// learners already and will be caught up before being promoted to voters. Any +// replica removals (from the replication changes) will be processed. All of this +// occurs in one atomic raft membership change. +func (r *Replica) finalizeChangeReplicas( ctx context.Context, desc *roachpb.RangeDescriptor, - target roachpb.ReplicationTarget, priority SnapshotRequest_Priority, reason storagepb.RangeLogEventReason, details string, + chgs roachpb.ReplicationChanges, ) (*roachpb.RangeDescriptor, error) { // TODO(dan): We allow ranges with learner replicas to split, so in theory // this may want to detect that and retry, sending a snapshot and promoting // both sides. - newReplicas := desc.Replicas().DeepCopy().All() - for i, rDesc := range newReplicas { - if rDesc.NodeID != target.NodeID || rDesc.StoreID != target.StoreID { - continue + updatedDesc := *desc + updatedDesc.SetReplicas(desc.Replicas().DeepCopy()) + + adds, removes := chgs.Additions(), chgs.Removals() + var replsAdded, replsRemoved []roachpb.ReplicaDescriptor + for _, target := range adds { + // All adds must be present as learners right now and they are removed, + // upgraded, and then re-added. + rDesc, ok := updatedDesc.RemoveReplica(target.NodeID, target.StoreID) + if !ok { + return nil, errors.Errorf("programming error: replica %v not found in %v", target, updatedDesc) } + if rDesc.GetType() != roachpb.ReplicaType_LEARNER { - return nil, errors.Errorf(`%s: cannot promote replica of type %s`, r, rDesc.Type) + return nil, errors.Errorf("programming error: cannot promote replica of type %s", rDesc.Type) } + rDesc.Type = roachpb.ReplicaTypeVoter() + updatedDesc.AddReplica(rDesc) + replsAdded = append(replsAdded, rDesc) + // Note that raft snapshot queue will refuse to send a snapshot to a learner // replica if its store is already sending a snapshot to that replica. That // races with this snapshot. Most of the time, this side will win the race, @@ -1026,25 +1097,31 @@ func (r *Replica) promoteLearnerReplicaToVoter( if err := r.sendSnapshot(ctx, rDesc, SnapshotRequest_LEARNER, priority); err != nil { return nil, err } + } - if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterLearnerSnapshot; fn != nil { - if fn() { - return desc, nil - } + if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterLearnerSnapshot; fn != nil { + if fn() { + return desc, nil } + } - rDesc.Type = roachpb.ReplicaTypeVoter() - newReplicas[i] = rDesc - - updatedDesc := *desc - updatedDesc.SetReplicas(roachpb.MakeReplicaDescriptors(&newReplicas)) - err := execChangeReplicasTxn(ctx, r.store, roachpb.ADD_REPLICA, desc, rDesc, &updatedDesc, reason, details) - return &updatedDesc, err + for _, target := range removes { + rDesc, found := updatedDesc.RemoveReplica(target.NodeID, target.StoreID) + if !found { + return nil, errors.Errorf("target to remove %v not found in %s", target, &updatedDesc) + } + replsRemoved = append(replsRemoved, rDesc) } - return nil, errors.Errorf(`%s: could not find replica to promote %s`, r, target) + + err := execChangeReplicasTxn(ctx, r.store, desc, &updatedDesc, reason, details, replsAdded, replsRemoved) + return &updatedDesc, err } -func (r *Replica) rollbackLearnerReplica( +// tryRollbackLearnerReplica attempts to remove a learner specified by the +// target. If no such learner is found in the descriptor (including when it is a +// voter instead), no action is taken. Otherwise, a single time-limited +// best-effort attempt at removing the learner is made. +func (r *Replica) tryRollBackLearnerReplica( ctx context.Context, desc *roachpb.RangeDescriptor, target roachpb.ReplicationTarget, @@ -1054,10 +1131,9 @@ func (r *Replica) rollbackLearnerReplica( newDesc := *desc newDesc.SetReplicas(desc.Replicas().DeepCopy()) replDesc, ok := newDesc.RemoveReplica(target.NodeID, target.StoreID) - if !ok { - // This is a programming error if it happens. Why are we rolling back - // something that's not present? - log.Warningf(ctx, "failed to rollback learner %s, missing from descriptor %s", target, desc) + if !ok || replDesc.GetType() != roachpb.ReplicaType_LEARNER { + // There's no learner to roll back. + log.Event(ctx, "learner to roll back not found; skipping") return } @@ -1066,9 +1142,11 @@ func (r *Replica) rollbackLearnerReplica( // context (but with the old tags and with some timeout to save this from // blocking the caller indefinitely). const rollbackTimeout = 10 * time.Second + rollbackFn := func(ctx context.Context) error { + removed := []roachpb.ReplicaDescriptor{replDesc} return execChangeReplicasTxn( - ctx, r.store, roachpb.REMOVE_REPLICA, desc, replDesc, &newDesc, reason, details, + ctx, r.store, desc, &newDesc, reason, details, nil /* added */, removed, ) } rollbackCtx := logtags.WithTags(context.Background(), logtags.FromContext(ctx)) @@ -1154,42 +1232,19 @@ func (r *Replica) addReplicaLegacyPreemptiveSnapshot( updatedDesc.NextReplicaID++ updatedDesc.AddReplica(repDesc) - err := execChangeReplicasTxn(ctx, r.store, roachpb.ADD_REPLICA, desc, repDesc, &updatedDesc, reason, details) - return &updatedDesc, err -} - -func (r *Replica) removeReplica( - ctx context.Context, - target roachpb.ReplicationTarget, - desc *roachpb.RangeDescriptor, - priority SnapshotRequest_Priority, - reason storagepb.RangeLogEventReason, - details string, -) (*roachpb.RangeDescriptor, error) { - if desc == nil { - return nil, errors.Errorf("%s: the current RangeDescriptor must not be nil", r) - } - updatedDesc := *desc - updatedDesc.SetReplicas(desc.Replicas().DeepCopy()) - // If that exact node-store combination does not have the replica, - // abort the removal. - removed, ok := updatedDesc.RemoveReplica(target.NodeID, target.StoreID) - if !ok { - return nil, errors.Errorf("%s: unable to remove replica %v which is not present", r, target) - } - err := execChangeReplicasTxn(ctx, r.store, roachpb.REMOVE_REPLICA, desc, removed, &updatedDesc, reason, details) + added := []roachpb.ReplicaDescriptor{repDesc} + err := execChangeReplicasTxn(ctx, r.store, desc, &updatedDesc, reason, details, added, nil /* removed */) return &updatedDesc, err } func execChangeReplicasTxn( ctx context.Context, store *Store, - changeType roachpb.ReplicaChangeType, desc *roachpb.RangeDescriptor, - repDesc roachpb.ReplicaDescriptor, updatedDesc *roachpb.RangeDescriptor, reason storagepb.RangeLogEventReason, details string, + added, removed []roachpb.ReplicaDescriptor, ) error { generationComparableEnabled := store.ClusterSettings().Version.IsActive(cluster.VersionGenerationComparable) if generationComparableEnabled { @@ -1205,7 +1260,7 @@ func execChangeReplicasTxn( if err != nil { return err } - log.Infof(ctx, "change replicas (%v %s): existing descriptor %s", changeType, repDesc, desc) + log.Infof(ctx, "change replicas (add %v remove %v): existing descriptor %s", added, removed, desc) { b := txn.NewBatch() @@ -1223,10 +1278,20 @@ func execChangeReplicasTxn( } // Log replica change into range event log. - if err := store.logChange( - ctx, txn, changeType, repDesc, *updatedDesc, reason, details, - ); err != nil { - return err + for _, tup := range []struct { + typ roachpb.ReplicaChangeType + repDescs []roachpb.ReplicaDescriptor + }{ + {roachpb.ADD_REPLICA, added}, + {roachpb.REMOVE_REPLICA, removed}, + } { + for _, repDesc := range tup.repDescs { + if err := store.logChange( + ctx, txn, tup.typ, repDesc, *updatedDesc, reason, details, + ); err != nil { + return err + } + } } // End the transaction manually instead of letting RunTransaction @@ -1240,21 +1305,21 @@ func execChangeReplicasTxn( var crt *roachpb.ChangeReplicasTrigger if !store.ClusterSettings().Version.IsActive(cluster.VersionAtomicChangeReplicasTrigger) { + var deprecatedChangeType roachpb.ReplicaChangeType + var deprecatedRepDesc roachpb.ReplicaDescriptor + if len(added) > 0 { + deprecatedChangeType = roachpb.ADD_REPLICA + deprecatedRepDesc = added[0] + } else { + deprecatedChangeType = roachpb.REMOVE_REPLICA + deprecatedRepDesc = removed[0] + } crt = &roachpb.ChangeReplicasTrigger{ - DeprecatedChangeType: changeType, - DeprecatedReplica: repDesc, + DeprecatedChangeType: deprecatedChangeType, + DeprecatedReplica: deprecatedRepDesc, Desc: updatedDesc, } } else { - var added, removed []roachpb.ReplicaDescriptor - switch changeType { - case roachpb.ADD_REPLICA: - added = append(added, repDesc) - case roachpb.REMOVE_REPLICA: - removed = append(removed, repDesc) - default: - return errors.Errorf("unknown change type: %d", changeType) - } crt = &roachpb.ChangeReplicasTrigger{ Desc: updatedDesc, InternalAddedReplicas: added, diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 4c49dfacf45e..7a19a5bb97c5 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -542,10 +542,8 @@ func TestLearnerReplicateQueueRace(t *testing.T) { } formattedTrace := tracing.FormatRecordedSpans(trace) expectedMessages := []string{ - `could not promote n3,s3 to voter, rolling back: change replicas of r\d+ failed: descriptor changed`, - // TODO(dan): Consider skipping the rollback when trying to promote a - // learner to a voter results in a "descriptor changed" error. - `failed to rollback learner n3,s3, abandoning it for the replicate queue: change replicas of r\d+ failed: descriptor changed`, + `could not promote .*n3,s3.* to voter, rolling back: change replicas of r\d+ failed: descriptor changed`, + `learner to roll back not found`, } return testutils.MatchInOrder(formattedTrace, expectedMessages...) }() diff --git a/pkg/storage/replica_test.go b/pkg/storage/replica_test.go index c099c4ec7110..945e4911351c 100644 --- a/pkg/storage/replica_test.go +++ b/pkg/storage/replica_test.go @@ -5990,7 +5990,7 @@ func TestChangeReplicasDuplicateError(t *testing.T) { storagepb.ReasonRebalance, "", ); err == nil || !strings.Contains(err.Error(), "node already has a replica") { - t.Fatalf("must not be able to add second replica to same node (err=%s)", err) + t.Fatalf("must not be able to add second replica to same node (err=%+v)", err) } } diff --git a/pkg/storage/replicate_queue.go b/pkg/storage/replicate_queue.go index 3550538f2245..adfbe1d9dbfd 100644 --- a/pkg/storage/replicate_queue.go +++ b/pkg/storage/replicate_queue.go @@ -760,7 +760,8 @@ func (rq *replicateQueue) addReplica( if dryRun { return nil } - if _, err := repl.addReplica(ctx, target, desc, priority, reason, details); err != nil { + chgs := roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, target) + if _, err := repl.addAndRemoveReplicas(ctx, desc, priority, reason, details, chgs); err != nil { return err } rangeUsageInfo := rangeUsageInfoForRepl(repl)