Skip to content

Commit

Permalink
kvserver/batcheval: restore lease if transfer fails
Browse files Browse the repository at this point in the history
Before evaluating a TransferLeaseRequest, we revoke the existing lease.
This is needed so that further reads are not permitted before the new
lease takes effect without being captured in the timestamp cache summary
carried by the transfer.

This patch makes it so that we restore the original lease if the
evaluation of the transfer fails(*), as there doesn't seem to be any
reason to force the replica to acquire a new lease in that case. This
patch is strictly about evaluation failing, and not about failures or
ambiguity during application of the lease transfer Raft command.

(*) It's not common for the evaluation of a transfer to fail. We see
something going wrong with a transfer (either with the evaluation or
with the proposal) in cockroachdb#83687. Evaluation fails on inconsistent next
leases, which I think can happen under transfer races.

Release note: None
  • Loading branch information
andreimatei committed Jul 5, 2022
1 parent 2b2c6e3 commit ea6b276
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 9 deletions.
54 changes: 54 additions & 0 deletions pkg/kv/kvserver/batcheval/cmd_lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,60 @@ func TestLeaseTransferForwardsStartTime(t *testing.T) {
})
}

// Test that, in case a TransferLease request fails to evaluate, the previous
// lease that was revoked prior to evaluation is re-instituted.
func TestLeaseIsReinstitutedOnFailedTransferEval(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()
db := storage.NewDefaultInMemForTesting()
defer db.Close()
batch := db.NewBatch()
defer batch.Close()

replicas := []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, Type: roachpb.ReplicaTypeVoterFull(), ReplicaID: 1},
{NodeID: 2, StoreID: 2, Type: roachpb.ReplicaTypeVoterFull(), ReplicaID: 2},
}
desc := roachpb.RangeDescriptor{}
desc.SetReplicas(roachpb.MakeReplicaSet(replicas))
clock := hlc.NewClock(timeutil.NewManualTime(timeutil.Unix(0, 123)), time.Nanosecond /* maxOffset */)

prevLease := roachpb.Lease{
Replica: replicas[0],
Sequence: 1,
}
nextLease := roachpb.Lease{
Replica: replicas[1],
Start: clock.NowAsClockTimestamp(),
}
// Make the lease invalid by assigning it low expiration time. This will cause
// evaluation to fail.
exp := nextLease.Start.ToTimestamp().Add(-time.Second.Nanoseconds(), 0)
nextLease.Expiration = &exp

evalCtx := &MockEvalCtx{
ClusterSettings: cluster.MakeTestingClusterSettings(),
StoreID: 1,
Desc: &desc,
Clock: clock,
Lease: prevLease,
}
cArgs := CommandArgs{
EvalCtx: evalCtx.EvalContext(),
Args: &roachpb.TransferLeaseRequest{
Lease: nextLease,
PrevLease: prevLease,
},
}

_, err := TransferLease(ctx, batch, cArgs, nil /* resp */)
require.Error(t, err)
require.Equal(t, prevLease.Sequence, evalCtx.RevokedLeaseSeq)
require.Equal(t, prevLease.Sequence, evalCtx.RestoredLeaseSeq)
}

func TestCheckCanReceiveLease(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down
8 changes: 6 additions & 2 deletions pkg/kv/kvserver/batcheval/cmd_lease_transfer.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func TransferLease(
// such cases, we could detect that here and fail fast, but it's safe and
// easier to just let the TransferLease be proposed under the wrong lease
// and be rejected with the correct error below Raft.
cArgs.EvalCtx.RevokeLease(ctx, args.PrevLease.Sequence)
restoreLease := cArgs.EvalCtx.RevokeLease(ctx, args.PrevLease.Sequence)

// Forward the lease's start time to a current clock reading. At this
// point, we're holding latches across the entire range, we know that
Expand Down Expand Up @@ -129,6 +129,10 @@ func TransferLease(
priorReadSum.Merge(rspb.FromTimestamp(newLease.Start.ToTimestamp()))

log.VEventf(ctx, 2, "lease transfer: prev lease: %+v, new lease: %+v", prevLease, newLease)
return evalNewLease(ctx, cArgs.EvalCtx, readWriter, cArgs.Stats,
res, err := evalNewLease(ctx, cArgs.EvalCtx, readWriter, cArgs.Stats,
newLease, prevLease, &priorReadSum, true /* isTransfer */)
if err != nil {
restoreLease()
}
return res, err
}
11 changes: 9 additions & 2 deletions pkg/kv/kvserver/batcheval/eval_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ type EvalContext interface {
// RevokeLease stops the replica from using its current lease, if that lease
// matches the provided lease sequence. All future calls to leaseStatus on
// this node with the current lease will now return a PROSCRIBED status.
RevokeLease(context.Context, roachpb.LeaseSequence)
//
// Returns a function that restores the lease to a valid status. This can be
// called if the revocation proves to not be necessary.
RevokeLease(context.Context, roachpb.LeaseSequence) func()

// WatchForMerge arranges to block all requests until the in-progress merge
// completes. Returns an error if no in-progress merge is detected.
Expand Down Expand Up @@ -179,6 +182,7 @@ type MockEvalCtx struct {
CurrentReadSummary rspb.ReadSummary
ClosedTimestamp hlc.Timestamp
RevokedLeaseSeq roachpb.LeaseSequence
RestoredLeaseSeq roachpb.LeaseSequence
MaxBytes int64
ApproxDiskBytes uint64
}
Expand Down Expand Up @@ -291,8 +295,11 @@ func (m *mockEvalCtxImpl) GetExternalStorageFromURI(
) (cloud.ExternalStorage, error) {
panic("unimplemented")
}
func (m *mockEvalCtxImpl) RevokeLease(_ context.Context, seq roachpb.LeaseSequence) {
func (m *mockEvalCtxImpl) RevokeLease(_ context.Context, seq roachpb.LeaseSequence) func() {
m.RevokedLeaseSeq = seq
return func() {
m.RestoredLeaseSeq = seq
}
}
func (m *mockEvalCtxImpl) WatchForMerge(ctx context.Context) error {
panic("unimplemented")
Expand Down
6 changes: 4 additions & 2 deletions pkg/kv/kvserver/replica_eval_context_span.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,10 @@ func (rec *SpanSetReplicaEvalContext) GetExternalStorageFromURI(
}

// RevokeLease stops the replica from using its current lease.
func (rec *SpanSetReplicaEvalContext) RevokeLease(ctx context.Context, seq roachpb.LeaseSequence) {
rec.i.RevokeLease(ctx, seq)
func (rec *SpanSetReplicaEvalContext) RevokeLease(
ctx context.Context, seq roachpb.LeaseSequence,
) func() {
return rec.i.RevokeLease(ctx, seq)
}

// WatchForMerge arranges to block all requests until the in-progress merge
Expand Down
20 changes: 17 additions & 3 deletions pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -998,11 +998,25 @@ func (r *Replica) getLeaseRLocked() (roachpb.Lease, roachpb.Lease) {
// RevokeLease stops the replica from using its current lease, if that lease
// matches the provided lease sequence. All future calls to leaseStatus on this
// node with the current lease will now return a PROSCRIBED status.
func (r *Replica) RevokeLease(ctx context.Context, seq roachpb.LeaseSequence) {
//
// Returns a function that restores the lease to a valid status. This can be
// called if the revocation proves to not be necessary.
func (r *Replica) RevokeLease(ctx context.Context, seq roachpb.LeaseSequence) func() {
r.mu.Lock()
defer r.mu.Unlock()
if r.mu.state.Lease.Sequence == seq {
r.mu.minLeaseProposedTS = r.Clock().NowAsClockTimestamp()
if r.mu.state.Lease.Sequence != seq {
return func() {}
}
now := r.Clock().NowAsClockTimestamp()
origMinProposedTS := r.mu.minLeaseProposedTS
r.mu.minLeaseProposedTS = now
return func() {
r.mu.Lock()
defer r.mu.Unlock()
// Make sure that we didn't race with another revocation.
if r.mu.state.Lease.Sequence == seq && r.mu.minLeaseProposedTS == now {
r.mu.minLeaseProposedTS = origMinProposedTS
}
}
}

Expand Down

0 comments on commit ea6b276

Please sign in to comment.