Skip to content

Commit

Permalink
kvclient: ship range descriptor in NLHE
Browse files Browse the repository at this point in the history
We recently changed a NLHE to carry the range descriptor generation to
ensure to avoid thrashing the range cache if the replica had a stale
view of the range. In cockroachdb#75742, we saw issues caused by the dist sender
having a stale range descriptor. This patch switches from sending just
the descriptor generation back in NLHE to shipping back the entire
range descriptor. In the future, we may want to solve the issue above
by updating the range cache with the fresher range descriptor thus
skipping a cache eviction and range descriptor lookup.

References cockroachdb#75742

Release note: None
  • Loading branch information
arulajmani authored and RajivTS committed Mar 6, 2022
1 parent cba706b commit a11f364
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 12 deletions.
4 changes: 2 additions & 2 deletions pkg/kv/kvclient/kvcoord/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -2154,10 +2154,10 @@ func (ds *DistSender) sendToReplicas(

var updatedLeaseholder bool
if tErr.Lease != nil {
updatedLeaseholder = routing.UpdateLease(ctx, tErr.Lease, tErr.DescriptorGeneration)
updatedLeaseholder = routing.UpdateLease(ctx, tErr.Lease, tErr.RangeDesc.Generation)
} else if tErr.LeaseHolder != nil {
// tErr.LeaseHolder might be set when tErr.Lease isn't.
routing.UpdateLeaseholder(ctx, *tErr.LeaseHolder, tErr.DescriptorGeneration)
routing.UpdateLeaseholder(ctx, *tErr.LeaseHolder, tErr.RangeDesc.Generation)
updatedLeaseholder = true
}
// Move the new leaseholder to the head of the queue for the next
Expand Down
7 changes: 6 additions & 1 deletion pkg/kv/kvclient/kvcoord/dist_sender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,12 @@ func TestDistSenderIgnoresNLHEBasedOnOldRangeGeneration(t *testing.T) {
calls = append(calls, ba.Replica.NodeID)
if ba.Replica.NodeID == 2 {
reply := &roachpb.BatchResponse{}
err := &roachpb.NotLeaseHolderError{Lease: &ambiguousLease, DescriptorGeneration: oldGeneration}
err := &roachpb.NotLeaseHolderError{
Lease: &ambiguousLease,
RangeDesc: roachpb.RangeDescriptor{
Generation: oldGeneration,
},
}
reply.Error = roachpb.NewError(err)
return reply, nil
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -913,9 +913,9 @@ func newNotLeaseHolderError(
l roachpb.Lease, proposerStoreID roachpb.StoreID, rangeDesc *roachpb.RangeDescriptor, msg string,
) *roachpb.NotLeaseHolderError {
err := &roachpb.NotLeaseHolderError{
RangeID: rangeDesc.RangeID,
DescriptorGeneration: rangeDesc.Generation,
CustomMsg: msg,
RangeID: rangeDesc.RangeID,
RangeDesc: *rangeDesc,
CustomMsg: msg,
}
if proposerStoreID != 0 {
err.Replica, _ = rangeDesc.GetReplicaDescriptor(proposerStoreID)
Expand Down
14 changes: 8 additions & 6 deletions pkg/roachpb/errors.proto
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,14 @@ message NotLeaseHolderError {
optional roachpb.Lease lease = 4;
optional int64 range_id = 3 [(gogoproto.nullable) = false,
(gogoproto.customname) = "RangeID", (gogoproto.casttype) = "RangeID"];
// The range descriptor generation of the replica the error originated from.
// Used by the DistSender's RangeCache to determine whether the error was
// returned because the replica had a stale understanding of who the
// leaseholder is.
optional int64 descriptor_generation = 6 [(gogoproto.nullable) = false,
(gogoproto.customname) = "DescriptorGeneration", (gogoproto.casttype) = "RangeGeneration"];
// The range descriptor from the replica the error originated from.
// The generation of the descriptor is used by the DistSender's RangeCache to
// determine whether the error was returned because the replica had a stale
// understanding of who the leaseholder is.
// TOOD(arul): In the future we may want to update the RangeCache if the
// returned range descriptor is newer than what is in the RangeCache. This
// would help us avoid a cache eviction/descriptor lookup.
optional roachpb.RangeDescriptor range_desc = 6 [(gogoproto.nullable)=false];
// If set, the Error() method will return this instead of composing its
// regular spiel. Useful because we reuse this error when rejecting a command
// because the lease under which its application was attempted is different
Expand Down

0 comments on commit a11f364

Please sign in to comment.