Skip to content

Commit

Permalink
kv: Handle invalid lease transfer checks
Browse files Browse the repository at this point in the history
Previously it would be a programming error when checking if the
leaseholder move was valid if the leaseholder wasn't one of the
replicas. This check was overly strict. It also makes sense to
add additional checks so we don't attempt lease transfers if
we don't own a valid lease as that will never succeed.

Release note: None
  • Loading branch information
andrewbaptist committed Sep 22, 2022
1 parent d390148 commit bfbf3c7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 31 deletions.
10 changes: 6 additions & 4 deletions pkg/kv/kvserver/allocator/allocatorimpl/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1619,9 +1619,6 @@ func (a *Allocator) ValidLeaseTargets(
// leaseholderShouldMoveDueToPreferences returns true if the current leaseholder
// is in violation of lease preferences _that can otherwise be satisfied_ by
// some existing replica.
//
// INVARIANT: This method should only be called with an `allExistingReplicas`
// slice that contains `leaseRepl`.
func (a *Allocator) leaseholderShouldMoveDueToPreferences(
ctx context.Context,
conf roachpb.SpanConfig,
Expand All @@ -1641,8 +1638,13 @@ func (a *Allocator) leaseholderShouldMoveDueToPreferences(
break
}
}
// If the leaseholder is not in the descriptor, then we should not move the
// lease since we don't know who the leaseholder is. This normally doesn't
// happen, but can occasionally since the loading of the leaseholder and of
// the existing replicas aren't always under a consistent lock.
if !leaseholderInExisting {
log.KvDistribution.Errorf(ctx, "programming error: expected leaseholder store to be in the slice of existing replicas")
log.KvDistribution.Info(ctx, "expected leaseholder store to be in the slice of existing replicas")
return false
}

// Exclude suspect/draining/dead stores.
Expand Down
26 changes: 13 additions & 13 deletions pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -1483,25 +1483,25 @@ func (r *Replica) maybeExtendLeaseAsyncLocked(ctx context.Context, st kvserverpb
_ = r.requestLeaseLocked(ctx, st)
}

// checkLeaseRespectsPreferences checks if current replica owns the lease and
// if it respects the lease preferences defined in the span config. If there are no
// preferences defined then it will return true and consider that to be in-conformance.
func (r *Replica) checkLeaseRespectsPreferences(ctx context.Context) (bool, error) {
if !r.OwnsValidLease(ctx, r.store.cfg.Clock.NowAsClockTimestamp()) {
return false, errors.Errorf("replica %s is not the leaseholder, cannot check lease preferences", r)
// leaseViolatesPreferences checks if current replica owns the lease and if it
// violates the lease preferences defined in the span config. If there is an
// error or no preferences defined then it will return false and consider that
// to be in-conformance.
func (r *Replica) leaseViolatesPreferences(ctx context.Context) bool {
storeDesc, err := r.store.Descriptor(ctx, false /* useCached */)
if err != nil {
log.Infof(ctx, "Unable to load the descriptor %v: cannot check if lease violates preference", err)
return false
}
conf := r.SpanConfig()
if len(conf.LeasePreferences) == 0 {
return true, nil
}
storeDesc, err := r.store.Descriptor(ctx, false /* useCached */)
if err != nil {
return false, err
return false
}
for _, preference := range conf.LeasePreferences {
if constraint.ConjunctionsCheck(*storeDesc, preference.Constraints) {
return true, nil
return false
}
}
return false, nil
// We have at lease one preference set up, but we don't satisfy any.
return true
}
26 changes: 12 additions & 14 deletions pkg/kv/kvserver/replicate_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,9 +637,7 @@ func (rq *replicateQueue) shouldQueue(
}

// If the lease is valid, check to see if we should transfer it.
status := repl.LeaseStatusAt(ctx, now)
if status.IsValid() &&
rq.canTransferLeaseFrom(ctx, repl) &&
if rq.canTransferLeaseFrom(ctx, repl) &&
rq.allocator.ShouldTransferLease(
ctx,
conf,
Expand All @@ -650,7 +648,7 @@ func (rq *replicateQueue) shouldQueue(
log.KvDistribution.VEventf(ctx, 2, "lease transfer needed, enqueuing")
return true, 0
}
if !status.IsValid() {
if !repl.LeaseStatusAt(ctx, now).IsValid() {
// The lease for this range is currently invalid, if this replica is
// the raft leader then it is necessary that it acquires the lease. We
// enqueue it regardless of being a leader or follower, where the
Expand Down Expand Up @@ -679,9 +677,7 @@ func (rq *replicateQueue) process(
// usually signaling that a rebalancing reservation could not be made with the
// selected target.
for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
requeue, err := rq.processOneChangeWithTracing(
ctx, repl, rq.canTransferLeaseFrom, false /* scatter */, false, /* dryRun */
)
requeue, err := rq.processOneChangeWithTracing(ctx, repl)
if isSnapshotError(err) {
// If ChangeReplicas failed because the snapshot failed, we attempt to
// retry the operation. The most likely causes of the snapshot failing
Expand Down Expand Up @@ -783,17 +779,16 @@ func filterTracingSpans(rec tracingpb.Recording, opNamesToFilter ...string) trac
// logging the resulting traces to the DEV channel in the case of errors or
// when the configured log traces threshold is exceeded.
func (rq *replicateQueue) processOneChangeWithTracing(
ctx context.Context,
repl *Replica,
canTransferLeaseFrom func(ctx context.Context, repl *Replica) bool,
scatter, dryRun bool,
ctx context.Context, repl *Replica,
) (requeue bool, _ error) {
processStart := timeutil.Now()
ctx, sp := tracing.EnsureChildSpan(ctx, rq.Tracer, "process replica",
tracing.WithRecording(tracingpb.RecordingVerbose))
defer sp.Finish()

requeue, err := rq.processOneChange(ctx, repl, canTransferLeaseFrom, scatter, dryRun)
requeue, err := rq.processOneChange(ctx, repl, rq.canTransferLeaseFrom,
false /* scatter */, false, /* dryRun */
)

// Utilize a new background context (properly annotated) to avoid writing
// traces from a child context into its parent.
Expand Down Expand Up @@ -1923,11 +1918,14 @@ func (rq *replicateQueue) changeReplicas(
// replica. It considers two factors if the replica is in -conformance with
// lease preferences and the last time a transfer occurred to avoid thrashing.
func (rq *replicateQueue) canTransferLeaseFrom(ctx context.Context, repl *Replica) bool {
if !repl.OwnsValidLease(ctx, repl.store.cfg.Clock.NowAsClockTimestamp()) {
// This replica is not the leaseholder, so it can't transfer the lease.
return false
}
// Do a best effort check to see if this replica conforms to the configured
// lease preferences (if any), if it does not we want to encourage more
// aggressive lease movement and not delay it.
respectsLeasePreferences, err := repl.checkLeaseRespectsPreferences(ctx)
if err == nil && !respectsLeasePreferences {
if repl.leaseViolatesPreferences(ctx) {
return true
}
if lastLeaseTransfer := rq.lastLeaseTransfer.Load(); lastLeaseTransfer != nil {
Expand Down

0 comments on commit bfbf3c7

Please sign in to comment.