diff --git a/pkg/cli/debug.go b/pkg/cli/debug.go index 8d3aef674a5e..d922cc5a3002 100644 --- a/pkg/cli/debug.go +++ b/pkg/cli/debug.go @@ -1163,8 +1163,9 @@ func removeDeadReplicas( err = storage.IterateRangeDescriptors(ctx, db, func(desc roachpb.RangeDescriptor) (bool, error) { hasSelf := false numDeadPeers := 0 - numReplicas := len(desc.Replicas().Unwrap()) - for _, rep := range desc.Replicas().Unwrap() { + allReplicas := desc.Replicas().All() + numReplicas := len(allReplicas) + for _, rep := range allReplicas { if rep.StoreID == storeIdent.StoreID { hasSelf = true } diff --git a/pkg/kv/dist_sender.go b/pkg/kv/dist_sender.go index b86ea8a9c6dc..80ca7a434ada 100644 --- a/pkg/kv/dist_sender.go +++ b/pkg/kv/dist_sender.go @@ -487,8 +487,10 @@ func (ds *DistSender) getDescriptor( func (ds *DistSender) sendSingleRange( ctx context.Context, ba roachpb.BatchRequest, desc *roachpb.RangeDescriptor, withCommit bool, ) (*roachpb.BatchResponse, *roachpb.Error) { - // Try to send the call. - replicas := NewReplicaSlice(ds.gossip, desc) + // Try to send the call. Learner replicas won't serve reads/writes, so send + // only to the `Voters` replicas. This is just an optimization to save a + // network hop, everything would still work if we had `All` here. + replicas := NewReplicaSlice(ds.gossip, desc.Replicas().Voters()) // If this request needs to go to a lease holder and we know who that is, move // it to the front. diff --git a/pkg/kv/dist_sender_rangefeed.go b/pkg/kv/dist_sender_rangefeed.go index 5be9e8459cde..716016eaa1d2 100644 --- a/pkg/kv/dist_sender_rangefeed.go +++ b/pkg/kv/dist_sender_rangefeed.go @@ -222,7 +222,10 @@ func (ds *DistSender) singleRangeFeed( if ds.rpcContext != nil { latencyFn = ds.rpcContext.RemoteClocks.Latency } - replicas := NewReplicaSlice(ds.gossip, desc) + // Learner replicas won't serve reads/writes, so send only to the `Voters` + // replicas. This is just an optimization to save a network hop, everything + // would still work if we had `All` here. + replicas := NewReplicaSlice(ds.gossip, desc.Replicas().Voters()) replicas.OptimizeReplicaOrder(ds.getNodeDescriptor(), latencyFn) transport, err := ds.transportFactory(SendOptions{}, ds.nodeDialer, replicas) diff --git a/pkg/kv/replica_slice.go b/pkg/kv/replica_slice.go index 8e7cd18ebd91..5a9c3e451a4d 100644 --- a/pkg/kv/replica_slice.go +++ b/pkg/kv/replica_slice.go @@ -42,12 +42,12 @@ type ReplicaSlice []ReplicaInfo // NewReplicaSlice creates a ReplicaSlice from the replicas listed in the range // descriptor and using gossip to lookup node descriptors. Replicas on nodes // that are not gossiped are omitted from the result. -func NewReplicaSlice(gossip *gossip.Gossip, desc *roachpb.RangeDescriptor) ReplicaSlice { +func NewReplicaSlice(gossip *gossip.Gossip, replicas []roachpb.ReplicaDescriptor) ReplicaSlice { if gossip == nil { return nil } - replicas := make(ReplicaSlice, 0, len(desc.Replicas().Unwrap())) - for _, r := range desc.Replicas().Unwrap() { + rs := make(ReplicaSlice, 0, len(replicas)) + for _, r := range replicas { nd, err := gossip.GetNodeDescriptor(r.NodeID) if err != nil { if log.V(1) { @@ -55,12 +55,12 @@ func NewReplicaSlice(gossip *gossip.Gossip, desc *roachpb.RangeDescriptor) Repli } continue } - replicas = append(replicas, ReplicaInfo{ + rs = append(rs, ReplicaInfo{ ReplicaDescriptor: r, NodeDesc: nd, }) } - return replicas + return rs } // ReplicaSlice implements shuffle.Interface. diff --git a/pkg/roachpb/metadata.go b/pkg/roachpb/metadata.go index 27118a5c9ea2..6efe07cf340c 100644 --- a/pkg/roachpb/metadata.go +++ b/pkg/roachpb/metadata.go @@ -220,7 +220,7 @@ func (r RangeDescriptor) Validate() error { return errors.Errorf("NextReplicaID must be non-zero") } seen := map[ReplicaID]struct{}{} - for i, rep := range r.Replicas().Unwrap() { + for i, rep := range r.Replicas().All() { if err := rep.Validate(); err != nil { return errors.Errorf("replica %d is invalid: %s", i, err) } @@ -247,8 +247,8 @@ func (r RangeDescriptor) String() string { } buf.WriteString(" [") - if len(r.Replicas().Unwrap()) > 0 { - for i, rep := range r.Replicas().Unwrap() { + if allReplicas := r.Replicas().All(); len(allReplicas) > 0 { + for i, rep := range allReplicas { if i > 0 { buf.WriteString(", ") } diff --git a/pkg/roachpb/metadata_replicas.go b/pkg/roachpb/metadata_replicas.go index 1430e12fc8b1..af6c0730d6a1 100644 --- a/pkg/roachpb/metadata_replicas.go +++ b/pkg/roachpb/metadata_replicas.go @@ -37,7 +37,7 @@ func (d ReplicaDescriptors) Unwrap() []ReplicaDescriptor { } // All returns every replica in the set, including both voter replicas and -// learner replicas. +// learner replicas. Voter replicas are ordered first in the returned slice. func (d ReplicaDescriptors) All() []ReplicaDescriptor { return d.wrapped } diff --git a/pkg/server/admin.go b/pkg/server/admin.go index d77d1d12cdb0..171c280ea0bf 100644 --- a/pkg/server/admin.go +++ b/pkg/server/admin.go @@ -663,7 +663,7 @@ func (s *adminServer) statsForSpan( if err := kv.Value.GetProto(&rng); err != nil { return nil, s.serverError(err) } - for _, repl := range rng.Replicas().Unwrap() { + for _, repl := range rng.Replicas().All() { nodeIDs[repl.NodeID] = struct{}{} } } diff --git a/pkg/server/status.go b/pkg/server/status.go index 87ee58abe7cb..075e76c2f05f 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -1125,7 +1125,7 @@ func (s *statusServer) RaftDebug( desc := node.Range.State.Desc // Check for whether replica should be GCed. containsNode := false - for _, replica := range desc.Replicas().Unwrap() { + for _, replica := range desc.Replicas().All() { if replica.NodeID == node.NodeID { containsNode = true } diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index 23ef138112cf..21712ac8ba65 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -1714,7 +1714,7 @@ CREATE TABLE crdb_internal.ranges_no_leases ( database_name STRING NOT NULL, table_name STRING NOT NULL, index_name STRING NOT NULL, - replicas INT[] NOT NULL, + replicas INT[] NOT NULL, split_enforced_until TIMESTAMP ) `, @@ -1768,8 +1768,11 @@ CREATE TABLE crdb_internal.ranges_no_leases ( return nil, err } + // TODO(dan): We're trying to treat learners as a far-behind replica as + // much as possible, so just include them in the list of replicas. We can + // add a separate column for them if we get feedback about it. var replicas []int - for _, rd := range desc.Replicas().Unwrap() { + for _, rd := range desc.Replicas().All() { replicas = append(replicas, int(rd.StoreID)) } sort.Ints(replicas) diff --git a/pkg/sql/distsqlplan/replicaoracle/oracle.go b/pkg/sql/distsqlplan/replicaoracle/oracle.go index 14e7c3b0709d..0d2e0c0abd29 100644 --- a/pkg/sql/distsqlplan/replicaoracle/oracle.go +++ b/pkg/sql/distsqlplan/replicaoracle/oracle.go @@ -268,11 +268,15 @@ func (o *binPackingOracle) ChoosePreferredReplica( // is available in gossip. If no nodes are available, a RangeUnavailableError is // returned. func replicaSliceOrErr(desc roachpb.RangeDescriptor, gsp *gossip.Gossip) (kv.ReplicaSlice, error) { - replicas := kv.NewReplicaSlice(gsp, &desc) + // Learner replicas won't serve reads/writes, so send only to the `Voters` + // replicas. This is just an optimization to save a network hop, everything + // would still work if we had `All` here. + voterReplicas := desc.Replicas().Voters() + replicas := kv.NewReplicaSlice(gsp, voterReplicas) if len(replicas) == 0 { // We couldn't get node descriptors for any replicas. var nodeIDs []roachpb.NodeID - for _, r := range desc.Replicas().Unwrap() { + for _, r := range voterReplicas { nodeIDs = append(nodeIDs, r.NodeID) } return kv.ReplicaSlice{}, sqlbase.NewRangeUnavailableError( diff --git a/pkg/storage/consistency_queue.go b/pkg/storage/consistency_queue.go index ef58f1494e1b..3e0788748cd7 100644 --- a/pkg/storage/consistency_queue.go +++ b/pkg/storage/consistency_queue.go @@ -82,7 +82,7 @@ func (q *consistencyQueue) shouldQueue( } // Check if all replicas are live. Some tests run without a NodeLiveness configured. if repl.store.cfg.NodeLiveness != nil { - for _, rep := range repl.Desc().Replicas().Unwrap() { + for _, rep := range repl.Desc().Replicas().All() { if live, err := repl.store.cfg.NodeLiveness.IsLive(rep.NodeID); err != nil { log.VErrEventf(ctx, 3, "node %d liveness failed: %s", rep.NodeID, err) return false, 0 diff --git a/pkg/storage/replica.go b/pkg/storage/replica.go index 819ed140c617..10b6a537275d 100644 --- a/pkg/storage/replica.go +++ b/pkg/storage/replica.go @@ -727,12 +727,14 @@ func (r *Replica) GetGCThreshold() hlc.Timestamp { return *r.mu.state.GCThreshold } +// maxReplicaID returns the maximum ReplicaID of any replica, including voters +// and learners. func maxReplicaID(desc *roachpb.RangeDescriptor) roachpb.ReplicaID { if desc == nil || !desc.IsInitialized() { return 0 } var maxID roachpb.ReplicaID - for _, repl := range desc.Replicas().Unwrap() { + for _, repl := range desc.Replicas().All() { if repl.ReplicaID > maxID { maxID = repl.ReplicaID } @@ -903,7 +905,9 @@ func (r *Replica) State() storagepb.RangeInfo { } ri.RangeMaxBytes = *r.mu.zone.RangeMaxBytes if desc := ri.ReplicaState.Desc; desc != nil { - for _, replDesc := range desc.Replicas().Unwrap() { + // Learner replicas don't serve follower reads, but they still receive + // closed timestamp updates, so include them here. + for _, replDesc := range desc.Replicas().All() { r.store.cfg.ClosedTimestamp.Storage.VisitDescending(replDesc.NodeID, func(e ctpb.Entry) (done bool) { mlai, found := e.MLAI[r.RangeID] if !found { diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 96bc69780982..9fa1eb8d5c39 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -870,7 +870,7 @@ func (r *Replica) changeReplicas( } repDescIdx := -1 // tracks NodeID && StoreID nodeUsed := false // tracks NodeID only - for i, existingRep := range desc.Replicas().Unwrap() { + for i, existingRep := range desc.Replicas().All() { nodeUsedByExistingRep := existingRep.NodeID == repDesc.NodeID nodeUsed = nodeUsed || nodeUsedByExistingRep @@ -1349,7 +1349,7 @@ func (s *Store) AdminRelocateRange( var addTargets []roachpb.ReplicaDescriptor for _, t := range targets { found := false - for _, replicaDesc := range rangeDesc.Replicas().Unwrap() { + for _, replicaDesc := range rangeDesc.Replicas().All() { if replicaDesc.StoreID == t.StoreID && replicaDesc.NodeID == t.NodeID { found = true break @@ -1364,7 +1364,7 @@ func (s *Store) AdminRelocateRange( } var removeTargets []roachpb.ReplicaDescriptor - for _, replicaDesc := range rangeDesc.Replicas().Unwrap() { + for _, replicaDesc := range rangeDesc.Replicas().All() { found := false for _, t := range targets { if replicaDesc.StoreID == t.StoreID && replicaDesc.NodeID == t.NodeID { @@ -1420,7 +1420,7 @@ func (s *Store) AdminRelocateRange( }) desc.NextReplicaID++ case roachpb.REMOVE_REPLICA: - newReplicas := removeTargetFromSlice(desc.Replicas().Unwrap(), target) + newReplicas := removeTargetFromSlice(desc.Replicas().All(), target) desc.SetReplicas(roachpb.MakeReplicaDescriptors(newReplicas)) default: panic(errors.Errorf("unknown ReplicaChangeType %v", changeType)) @@ -1480,7 +1480,7 @@ func (s *Store) AdminRelocateRange( ctx, storeList, zone, - rangeInfo.Desc.Replicas().Unwrap(), + rangeInfo.Desc.Replicas().All(), rangeInfo, s.allocator.scorerOptions()) if targetStore == nil { @@ -1618,8 +1618,11 @@ func (r *Replica) adminScatter( // probability 1/N of choosing each. if args.RandomizeLeases && r.OwnsValidLease(r.store.Clock().Now()) { desc := r.Desc() - newLeaseholderIdx := rand.Intn(len(desc.Replicas().Unwrap())) - targetStoreID := desc.Replicas().Unwrap()[newLeaseholderIdx].StoreID + // Learner replicas aren't allowed to become the leaseholder or raft leader, + // so only consider the `Voters` replicas. + voterReplicas := desc.Replicas().Voters() + newLeaseholderIdx := rand.Intn(len(voterReplicas)) + targetStoreID := voterReplicas[newLeaseholderIdx].StoreID if targetStoreID != r.store.StoreID() { if err := r.AdminTransferLease(ctx, targetStoreID); err != nil { log.Warningf(ctx, "failed to scatter lease to s%d: %+v", targetStoreID, err) diff --git a/pkg/storage/replica_metrics.go b/pkg/storage/replica_metrics.go index 9fc9926f6e1e..2ffbf2242952 100644 --- a/pkg/storage/replica_metrics.go +++ b/pkg/storage/replica_metrics.go @@ -156,7 +156,10 @@ func calcRangeCounter( numReplicas int32, clusterNodes int, ) (rangeCounter, unavailable, underreplicated, overreplicated bool) { - for _, rd := range desc.Replicas().Unwrap() { + // It seems unlikely that a learner replica would be the first live one, but + // there's no particular reason to exclude them. Note that `All` returns the + // voters first. + for _, rd := range desc.Replicas().All() { if livenessMap[rd.NodeID].IsLive { rangeCounter = rd.StoreID == storeID break @@ -165,25 +168,27 @@ func calcRangeCounter( // We also compute an estimated per-range count of under-replicated and // unavailable ranges for each range based on the liveness table. if rangeCounter { - liveReplicas := calcLiveReplicas(desc, livenessMap) - if liveReplicas < desc.Replicas().QuorumSize() { + liveVoterReplicas := calcLiveVoterReplicas(desc, livenessMap) + if liveVoterReplicas < desc.Replicas().QuorumSize() { unavailable = true } needed := GetNeededReplicas(numReplicas, clusterNodes) - if needed > liveReplicas { + if needed > liveVoterReplicas { underreplicated = true - } else if needed < liveReplicas { + } else if needed < liveVoterReplicas { overreplicated = true } } return } -// calcLiveReplicas returns a count of the live replicas; a live replica is -// determined by checking its node in the provided liveness map. -func calcLiveReplicas(desc *roachpb.RangeDescriptor, livenessMap IsLiveMap) int { +// calcLiveVoterReplicas returns a count of the live voter replicas; a live +// replica is determined by checking its node in the provided liveness map. This +// method is used when indicating under-replication so only voter replicas are +// considered. +func calcLiveVoterReplicas(desc *roachpb.RangeDescriptor, livenessMap IsLiveMap) int { var live int - for _, rd := range desc.Replicas().Unwrap() { + for _, rd := range desc.Replicas().Voters() { if livenessMap[rd.NodeID].IsLive { live++ } diff --git a/pkg/storage/replica_raft_quiesce.go b/pkg/storage/replica_raft_quiesce.go index b38a48370b94..17a8d63613d1 100644 --- a/pkg/storage/replica_raft_quiesce.go +++ b/pkg/storage/replica_raft_quiesce.go @@ -255,7 +255,7 @@ func shouldReplicaQuiesce( } var foundSelf bool - for _, rep := range q.descRLocked().Replicas().Unwrap() { + for _, rep := range q.descRLocked().Replicas().All() { if uint64(rep.ReplicaID) == status.ID { foundSelf = true } diff --git a/pkg/storage/replica_raftstorage.go b/pkg/storage/replica_raftstorage.go index 86804b4d3a56..494e4a6cd04f 100644 --- a/pkg/storage/replica_raftstorage.go +++ b/pkg/storage/replica_raftstorage.go @@ -66,7 +66,7 @@ func (r *replicaRaftStorage) InitialState() (raftpb.HardState, raftpb.ConfState, return raftpb.HardState{}, raftpb.ConfState{}, err } var cs raftpb.ConfState - for _, rep := range r.mu.state.Desc.Replicas().Unwrap() { + for _, rep := range r.mu.state.Desc.Replicas().All() { cs.Nodes = append(cs.Nodes, uint64(rep.ReplicaID)) } @@ -536,7 +536,7 @@ func snapshot( // Synthesize our raftpb.ConfState from desc. var cs raftpb.ConfState - for _, rep := range desc.Replicas().Unwrap() { + for _, rep := range desc.Replicas().All() { cs.Nodes = append(cs.Nodes, uint64(rep.ReplicaID)) } diff --git a/pkg/storage/replicate_queue.go b/pkg/storage/replicate_queue.go index 3e6c009c3d3e..3126749ca73b 100644 --- a/pkg/storage/replicate_queue.go +++ b/pkg/storage/replicate_queue.go @@ -213,9 +213,11 @@ func (rq *replicateQueue) shouldQueue( // If the lease is valid, check to see if we should transfer it. if lease, _ := repl.GetLease(); repl.IsLeaseValid(lease, now) { + // Learner replicas aren't allowed to become the leaseholder or raft leader, + // so only consider the `Voters` replicas. if rq.canTransferLease() && rq.allocator.ShouldTransferLease( - ctx, zone, desc.Replicas().Unwrap(), lease.Replica.StoreID, desc.RangeID, repl.leaseholderStats) { + ctx, zone, desc.Replicas().Voters(), lease.Replica.StoreID, desc.RangeID, repl.leaseholderStats) { log.VEventf(ctx, 2, "lease transfer needed, enqueuing") return true, 0 } @@ -346,7 +348,7 @@ func (rq *replicateQueue) processOneChange( } rq.metrics.AddReplicaCount.Inc(1) log.VEventf(ctx, 1, "adding replica %+v due to under-replication: %s", - newReplica, rangeRaftProgress(repl.RaftStatus(), desc.Replicas().Unwrap())) + newReplica, rangeRaftProgress(repl.RaftStatus(), desc.Replicas())) if err := rq.addReplica( ctx, repl, @@ -387,7 +389,7 @@ func (rq *replicateQueue) processOneChange( } candidates = filterUnremovableReplicas(raftStatus, desc.Replicas().Unwrap(), lastReplAdded) log.VEventf(ctx, 3, "filtered unremovable replicas from %v to get %v as candidates for removal: %s", - desc.Replicas(), candidates, rangeRaftProgress(raftStatus, desc.Replicas().Unwrap())) + desc.Replicas(), candidates, rangeRaftProgress(raftStatus, desc.Replicas())) if len(candidates) > 0 { break } @@ -415,7 +417,7 @@ func (rq *replicateQueue) processOneChange( if len(candidates) == 0 { // If we timed out and still don't have any valid candidates, give up. return false, errors.Errorf("no removable replicas from range that needs a removal: %s", - rangeRaftProgress(repl.RaftStatus(), desc.Replicas().Unwrap())) + rangeRaftProgress(repl.RaftStatus(), desc.Replicas())) } removeReplica, details, err := rq.allocator.RemoveTarget(ctx, zone, candidates, rangeInfo) @@ -452,7 +454,7 @@ func (rq *replicateQueue) processOneChange( } else { rq.metrics.RemoveReplicaCount.Inc(1) log.VEventf(ctx, 1, "removing replica %+v due to over-replication: %s", - removeReplica, rangeRaftProgress(repl.RaftStatus(), desc.Replicas().Unwrap())) + removeReplica, rangeRaftProgress(repl.RaftStatus(), desc.Replicas())) target := roachpb.ReplicationTarget{ NodeID: removeReplica.NodeID, StoreID: removeReplica.StoreID, @@ -539,7 +541,7 @@ func (rq *replicateQueue) processOneChange( } rq.metrics.RebalanceReplicaCount.Inc(1) log.VEventf(ctx, 1, "rebalancing to %+v: %s", - rebalanceReplica, rangeRaftProgress(repl.RaftStatus(), desc.Replicas().Unwrap())) + rebalanceReplica, rangeRaftProgress(repl.RaftStatus(), desc.Replicas())) if err := rq.addReplica( ctx, repl, @@ -600,7 +602,9 @@ func (rq *replicateQueue) findTargetAndTransferLease( zone *config.ZoneConfig, opts transferLeaseOptions, ) (bool, error) { - candidates := filterBehindReplicas(repl.RaftStatus(), desc.Replicas().Unwrap()) + // Learner replicas aren't allowed to become the leaseholder or raft leader, + // so only consider the `Voters` replicas. + candidates := filterBehindReplicas(repl.RaftStatus(), desc.Replicas().Voters()) target := rq.allocator.TransferLeaseTarget( ctx, zone, @@ -702,7 +706,7 @@ func (rq *replicateQueue) purgatoryChan() <-chan time.Time { // rangeRaftStatus pretty-prints the Raft progress (i.e. Raft log position) of // the replicas. -func rangeRaftProgress(raftStatus *raft.Status, replicas []roachpb.ReplicaDescriptor) string { +func rangeRaftProgress(raftStatus *raft.Status, replicas roachpb.ReplicaDescriptors) string { if raftStatus == nil { return "[no raft status]" } else if len(raftStatus.Progress) == 0 { @@ -710,7 +714,7 @@ func rangeRaftProgress(raftStatus *raft.Status, replicas []roachpb.ReplicaDescri } var buf bytes.Buffer buf.WriteString("[") - for i, r := range replicas { + for i, r := range replicas.All() { if i > 0 { buf.WriteString(", ") } diff --git a/pkg/storage/store.go b/pkg/storage/store.go index bfea43aff073..a21edf236f60 100644 --- a/pkg/storage/store.go +++ b/pkg/storage/store.go @@ -1044,7 +1044,9 @@ func (s *Store) SetDraining(drain bool) { break } - needsLeaseTransfer := len(r.Desc().Replicas().Unwrap()) > 1 && + // Learner replicas aren't allowed to become the leaseholder or raft + // leader, so only consider the `Voters` replicas. + needsLeaseTransfer := len(r.Desc().Replicas().Voters()) > 1 && drainingLease.OwnedBy(s.StoreID()) && r.IsLeaseValid(drainingLease, s.Clock().Now()) @@ -2196,7 +2198,7 @@ func splitPostApply( r.store.replicateQueue.MaybeAddAsync(ctx, r, now) r.store.replicateQueue.MaybeAddAsync(ctx, rightRng, now) - if len(split.RightDesc.Replicas().Unwrap()) == 1 { + if len(split.RightDesc.Replicas().All()) == 1 { // TODO(peter): In single-node clusters, we enqueue the right-hand side of // the split (the new range) for Raft processing so that the corresponding // Raft group is created. This shouldn't be necessary for correctness, but @@ -3661,7 +3663,7 @@ func (s *Store) nodeIsLiveCallback(nodeID roachpb.NodeID) { s.mu.replicas.Range(func(k int64, v unsafe.Pointer) bool { r := (*Replica)(v) - for _, rep := range r.Desc().Replicas().Unwrap() { + for _, rep := range r.Desc().Replicas().All() { if rep.NodeID == nodeID { r.unquiesce() } diff --git a/pkg/testutils/testcluster/testcluster.go b/pkg/testutils/testcluster/testcluster.go index f450d78e47aa..761efbaf6db9 100644 --- a/pkg/testutils/testcluster/testcluster.go +++ b/pkg/testutils/testcluster/testcluster.go @@ -451,8 +451,8 @@ func (tc *TestCluster) FindRangeLease( } } else { hint = &roachpb.ReplicationTarget{ - NodeID: rangeDesc.Replicas().Unwrap()[0].NodeID, - StoreID: rangeDesc.Replicas().Unwrap()[0].StoreID} + NodeID: rangeDesc.Replicas().All()[0].NodeID, + StoreID: rangeDesc.Replicas().All()[0].StoreID} } // Find the server indicated by the hint and send a LeaseInfoRequest through @@ -509,8 +509,14 @@ func (tc *TestCluster) WaitForSplitAndReplication(startKey roachpb.Key) error { return errors.Errorf("expected range start key %s; got %s", startKey, desc.StartKey) } - // Once we've verified the split, make sure that replicas exist. - for _, rDesc := range desc.Replicas().Unwrap() { + // A learner replicas is still up-replicating, so if we have any, we're not + // replicated yet. + if learnerReplicas := desc.Replicas().Learners(); len(learnerReplicas) > 0 { + return errors.Errorf("have %d learners, still replicating %s", len(learnerReplicas), desc) + } + // Once we've verified the split and that there aren't any learners, make + // sure that the voter replicas exist. + for _, rDesc := range desc.Replicas().Voters() { store, err := tc.findMemberStore(rDesc.StoreID) if err != nil { return err