diff --git a/pkg/kv/kvclient/kvcoord/dist_sender.go b/pkg/kv/kvclient/kvcoord/dist_sender.go index 2f8ac8b138db..711b0ab275da 100644 --- a/pkg/kv/kvclient/kvcoord/dist_sender.go +++ b/pkg/kv/kvclient/kvcoord/dist_sender.go @@ -2149,13 +2149,13 @@ func (ds *DistSender) sendToReplicas( ba.Replica = curReplica ba.RangeID = desc.RangeID // Communicate to the server the information our cache has about the - // range. If it's stale, the serve will return an update. + // range. If it's stale, the server will return an update. ba.ClientRangeInfo = roachpb.ClientRangeInfo{ // Note that DescriptorGeneration will be 0 if the cached descriptor // is "speculative" (see DescSpeculative()). Even if the speculation // is correct, we want the serve to return an update, at which point // the cached entry will no longer be "speculative". - DescriptorGeneration: routing.Desc().Generation, + DescriptorGeneration: desc.Generation, // The LeaseSequence will be 0 if the cache doesn't have lease info, // or has a speculative lease. Like above, this asks the server to // return an update. @@ -2167,7 +2167,11 @@ func (ds *DistSender) sendToReplicas( defaultSendClosedTimestampPolicy, ), - ExplicitlyRequested: ba.ClientRangeInfo.ExplicitlyRequested, + // Range info is only returned when ClientRangeInfo is non-empty. + // Explicitly request an update for speculative/missing leases and + // descriptors, or when the client has requested it. + ExplicitlyRequested: ba.ClientRangeInfo.ExplicitlyRequested || + (desc.Generation == 0 && routing.LeaseSeq() == 0), } br, err = transport.SendNext(ctx, ba) ds.maybeIncrementErrCounters(br, err) diff --git a/pkg/kv/kvpb/api.proto b/pkg/kv/kvpb/api.proto index 8c912b827312..790122ae5353 100644 --- a/pkg/kv/kvpb/api.proto +++ b/pkg/kv/kvpb/api.proto @@ -2675,6 +2675,9 @@ message Header { // by the client's DistSender, however it will preserve the value of the field // `ExplicitlyRequested` so that requests passed to DistSender can request // `RangeInfos` if desired. + // + // If empty, range info is never returned (from 23.2 onwards). Use + // ExplicitlyRequested to force an update for an otherwise-empty field. ClientRangeInfo client_range_info = 17 [(gogoproto.nullable) = false]; // bounded_staleness is set when a read-only batch is performing a bounded // staleness read and wants its timestamp to be chosen dynamically, based diff --git a/pkg/kv/kvserver/BUILD.bazel b/pkg/kv/kvserver/BUILD.bazel index d80bc9dd7796..ad8b614570e6 100644 --- a/pkg/kv/kvserver/BUILD.bazel +++ b/pkg/kv/kvserver/BUILD.bazel @@ -263,6 +263,7 @@ go_test( "closed_timestamp_test.go", "consistency_queue_test.go", "debug_print_test.go", + "errors_test.go", "gossip_test.go", "helpers_test.go", "intent_resolver_integration_test.go", diff --git a/pkg/kv/kvserver/errors_test.go b/pkg/kv/kvserver/errors_test.go new file mode 100644 index 000000000000..00272e4e40d4 --- /dev/null +++ b/pkg/kv/kvserver/errors_test.go @@ -0,0 +1,29 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package kvserver + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/redact" + "github.com/stretchr/testify/require" +) + +func TestErrorFormatting(t *testing.T) { + defer leaktest.AfterTest(t)() + + var e error = decommissionPurgatoryError{errors.New("hello")} + require.Equal(t, "hello", redact.Sprint(e).Redact().StripMarkers()) + e = rangeMergePurgatoryError{errors.New("hello")} + require.Equal(t, "hello", redact.Sprint(e).Redact().StripMarkers()) +} diff --git a/pkg/kv/kvserver/merge_queue.go b/pkg/kv/kvserver/merge_queue.go index b4aa68478aad..ce1e2141c971 100644 --- a/pkg/kv/kvserver/merge_queue.go +++ b/pkg/kv/kvserver/merge_queue.go @@ -181,6 +181,13 @@ func (mq *mergeQueue) shouldQueue( // indicate that the error should send the range to purgatory. type rangeMergePurgatoryError struct{ error } +var _ errors.SafeFormatter = decommissionPurgatoryError{} + +func (e rangeMergePurgatoryError) SafeFormatError(p errors.Printer) (next error) { + p.Print(e.error) + return nil +} + func (rangeMergePurgatoryError) PurgatoryErrorMarker() {} var _ PurgatoryError = rangeMergePurgatoryError{} diff --git a/pkg/kv/kvserver/replica_send.go b/pkg/kv/kvserver/replica_send.go index 764199860c75..c86a2691192e 100644 --- a/pkg/kv/kvserver/replica_send.go +++ b/pkg/kv/kvserver/replica_send.go @@ -15,6 +15,7 @@ import ( "reflect" "runtime/pprof" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency" @@ -307,12 +308,22 @@ func (r *Replica) maybeCommitWaitBeforeCommitTrigger( func (r *Replica) maybeAddRangeInfoToResponse( ctx context.Context, ba *kvpb.BatchRequest, br *kvpb.BatchResponse, ) { - // Ignore lease requests. These are submitted directly to the replica, - // bypassing the DistSender. They don't need range info returned, but their - // ClientRangeInfo is always empty, so they'll otherwise always get it. - if ba.IsSingleRequestLeaseRequest() { + // Only return range info if ClientRangeInfo is non-empty. In particular, we + // don't want to populate this for lease requests, since these bypass + // DistSender and never use ClientRangeInfo. + // + // From 23.2, all DistSenders ensure ExplicitlyRequested is set when otherwise + // empty. Fall back to check for lease requests, to avoid 23.1 regressions. + if r.ClusterSettings().Version.IsActive(ctx, clusterversion.V23_2) { + if ba.ClientRangeInfo == (roachpb.ClientRangeInfo{}) { + return + } + } else if ba.IsSingleRequestLeaseRequest() { + // TODO(erikgrinaker): Remove this branch when 23.1 support is dropped. + _ = clusterversion.V23_1 return } + // Compare the client's info with the replica's info to detect if the client // has stale knowledge. Note that the client can have more recent knowledge // than the replica in case this is a follower. diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index e40285d3b2eb..a8be2fb92394 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -253,7 +253,7 @@ func (tc *testContext) SendWrapped(args kvpb.Request) (kvpb.Response, *kvpb.Erro return tc.SendWrappedWith(kvpb.Header{}, args) } -// addBogusReplicaToRangeDesc modifies the range descriptor to include a second +// addBogusReplicaToRangeDesc modifies the range descriptor to include an additional // replica. This is useful for tests that want to pretend they're transferring // the range lease away, as the lease can only be obtained by Replicas which are // part of the range descriptor. @@ -261,15 +261,17 @@ func (tc *testContext) SendWrapped(args kvpb.Request) (kvpb.Response, *kvpb.Erro func (tc *testContext) addBogusReplicaToRangeDesc( ctx context.Context, ) (roachpb.ReplicaDescriptor, error) { - secondReplica := roachpb.ReplicaDescriptor{ - NodeID: 2, - StoreID: 2, - ReplicaID: 2, - } oldDesc := *tc.repl.Desc() + newID := oldDesc.NextReplicaID + newReplica := roachpb.ReplicaDescriptor{ + NodeID: roachpb.NodeID(newID), + StoreID: roachpb.StoreID(newID), + ReplicaID: newID, + } newDesc := oldDesc - newDesc.InternalReplicas = append(newDesc.InternalReplicas, secondReplica) - newDesc.NextReplicaID = 3 + newDesc.InternalReplicas = append(newDesc.InternalReplicas, newReplica) + newDesc.NextReplicaID++ + newDesc.IncrementGeneration() dbDescKV, err := tc.store.DB().Get(ctx, keys.RangeDescriptorKey(oldDesc.StartKey)) if err != nil { @@ -303,7 +305,7 @@ func (tc *testContext) addBogusReplicaToRangeDesc( tc.repl.assertStateRaftMuLockedReplicaMuRLocked(ctx, tc.engine) tc.repl.mu.RUnlock() tc.repl.raftMu.Unlock() - return secondReplica, nil + return newReplica, nil } func newTransaction( @@ -13867,24 +13869,54 @@ func TestRangeInfoReturned(t *testing.T) { var tc testContext tc.Start(ctx, t, stopper) + // Add a couple of bogus configuration changes to bump the generation to 2, + // and request a new lease to bump the lease sequence to 2. + _, err := tc.addBogusReplicaToRangeDesc(ctx) + require.NoError(t, err) + _, err = tc.addBogusReplicaToRangeDesc(ctx) + require.NoError(t, err) + + { + lease, _ := tc.repl.GetLease() + tc.repl.RevokeLease(ctx, lease.Sequence) + + tc.repl.mu.Lock() + st := tc.repl.leaseStatusAtRLocked(ctx, tc.Clock().NowAsClockTimestamp()) + ll := tc.repl.requestLeaseLocked(ctx, st) + tc.repl.mu.Unlock() + select { + case pErr := <-ll.C(): + require.NoError(t, pErr.GoError()) + case <-time.After(5 * time.Second): + t.Fatal("timeout") + } + } + ri := tc.repl.GetRangeInfo(ctx) require.False(t, ri.Lease.Empty()) require.Equal(t, roachpb.LAG_BY_CLUSTER_SETTING, ri.ClosedTimestampPolicy) + require.EqualValues(t, 2, ri.Desc.Generation) + require.EqualValues(t, 2, ri.Lease.Sequence) staleDescGen := ri.Desc.Generation - 1 staleLeaseSeq := ri.Lease.Sequence - 1 wrongCTPolicy := roachpb.LEAD_FOR_GLOBAL_READS - requestLease := ri.Lease - requestLease.Sequence = 0 - for _, test := range []struct { cri roachpb.ClientRangeInfo - req kvpb.Request exp *roachpb.RangeInfo }{ { - // Empty client info. This case shouldn't happen. + // Empty client info doesn't return any info. This case shouldn't happen + // for requests via DistSender, but can happen e.g. with lease requests + // that are submitted directly to the replica. cri: roachpb.ClientRangeInfo{}, + exp: nil, + }, + { + // ExplicitlyRequested returns lease info. + cri: roachpb.ClientRangeInfo{ + ExplicitlyRequested: true, + }, exp: &ri, }, { @@ -13949,26 +13981,12 @@ func TestRangeInfoReturned(t *testing.T) { }, exp: &ri, }, - { - // RequestLeaseRequest without ClientRangeInfo. These bypass - // DistSender and don't need range info returned. - cri: roachpb.ClientRangeInfo{}, - req: &kvpb.RequestLeaseRequest{ - Lease: requestLease, - PrevLease: ri.Lease, - }, - exp: nil, - }, } { t.Run("", func(t *testing.T) { ba := &kvpb.BatchRequest{} ba.Header.ClientRangeInfo = test.cri - req := test.req - if req == nil { - args := getArgs(roachpb.Key("a")) - req = &args - } - ba.Add(req) + req := getArgs(roachpb.Key("a")) + ba.Add(&req) br, pErr := tc.Sender().Send(ctx, ba) require.NoError(t, pErr.GoError()) if test.exp == nil { diff --git a/pkg/kv/kvserver/replicate_queue.go b/pkg/kv/kvserver/replicate_queue.go index 91ee01244954..db27ffd84c23 100644 --- a/pkg/kv/kvserver/replicate_queue.go +++ b/pkg/kv/kvserver/replicate_queue.go @@ -36,6 +36,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/tracing" "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" "github.com/cockroachdb/errors" + "github.com/cockroachdb/redact" ) // The replicate queue processes replicas that required replication changes. @@ -688,6 +689,13 @@ func (rq *replicateQueue) process( // that the error should send the range to purgatory. type decommissionPurgatoryError struct{ error } +var _ errors.SafeFormatter = decommissionPurgatoryError{} + +func (e decommissionPurgatoryError) SafeFormatError(p errors.Printer) (next error) { + p.Print(e.error) + return nil +} + func (decommissionPurgatoryError) PurgatoryErrorMarker() {} var _ PurgatoryError = decommissionPurgatoryError{} @@ -740,7 +748,7 @@ func (rq *replicateQueue) processOneChangeWithTracing( loggingThreshold := rq.logTracesThresholdFunc(rq.store.cfg.Settings, repl) exceededDuration := loggingThreshold > time.Duration(0) && processDuration > loggingThreshold - var traceOutput string + var traceOutput redact.RedactableString traceLoggingNeeded := (err != nil || exceededDuration) && log.ExpensiveLogEnabled(ctx, 1) if traceLoggingNeeded { // If we have tracing spans from execChangeReplicasTxn, filter it from @@ -749,7 +757,7 @@ func (rq *replicateQueue) processOneChangeWithTracing( rec = filterTracingSpans(sp.GetConfiguredRecording(), replicaChangeTxnGetDescOpName, replicaChangeTxnUpdateDescOpName, ) - traceOutput = fmt.Sprintf("\ntrace:\n%s", rec) + traceOutput = redact.Sprintf("\ntrace:\n%s", rec) } if err != nil { diff --git a/pkg/roachpb/data.proto b/pkg/roachpb/data.proto index b84387e2e22c..9054cd8bdd84 100644 --- a/pkg/roachpb/data.proto +++ b/pkg/roachpb/data.proto @@ -776,7 +776,7 @@ message ClientRangeInfo { int64 lease_sequence = 2 [(gogoproto.casttype) = "LeaseSequence"]; RangeClosedTimestampPolicy closed_timestamp_policy = 3; // ExplicitlyRequested causes range info to be returned even if other fields - // are up-to-date. + // are up-to-date or empty. bool explicitly_requested = 4; }