Skip to content

Commit

Permalink
kv: consolidate range lease validation
Browse files Browse the repository at this point in the history
Closes #123498.
Informs #118435.

This commit extracts lease request and lease transfer validation into
the `kv/kvserver/leases` package which was introduced in #124682. This
avoids the scattered leases validity checks which were previously spread
between `replica_range_lease.go`, `replica_proposal_buf.go`
`cmd_lease_request.go`, `cmd_lease_transfer.go`.

This makes the logic easier to understand and easier to test. It will
also make the logic easier to adjust when we make the following two
changes in service of #118435:
- default to rejecting lease requests on unknown leaders
- perform lease request validation above raft, like we do for lease
  transfers

This second change also ties in with Leader leases (#123847). Until we
make that change, we will occasionally need to propose expiration-based
leases (e.g. when we're not the leader) just for them to be rejected by
the proposal buffer.

Release note: None
  • Loading branch information
nvanbenschoten committed Jun 25, 2024
1 parent 9af051e commit a1ac434
Show file tree
Hide file tree
Showing 16 changed files with 990 additions and 468 deletions.
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ go_library(
"//pkg/kv/kvserver/multiqueue",
"//pkg/kv/kvserver/raftentry",
"//pkg/kv/kvserver/raftlog",
"//pkg/kv/kvserver/raftutil",
"//pkg/kv/kvserver/rangefeed",
"//pkg/kv/kvserver/rditer",
"//pkg/kv/kvserver/readsummary",
Expand Down Expand Up @@ -414,6 +413,7 @@ go_test(
"//pkg/kv/kvserver/kvserverbase",
"//pkg/kv/kvserver/kvserverpb",
"//pkg/kv/kvserver/kvstorage",
"//pkg/kv/kvserver/leases",
"//pkg/kv/kvserver/liveness",
"//pkg/kv/kvserver/liveness/livenesspb",
"//pkg/kv/kvserver/load",
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/batcheval/cmd_lease_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func RequestLease(

// If this check is removed at some point, the filtering of learners on the
// sending side would have to be removed as well.
// TODO(nvanbenschoten): move this into leases.Verify.
wasLastLeaseholder := prevLease.Replica.StoreID == newLease.Replica.StoreID
if err := roachpb.CheckCanReceiveLease(
newLease.Replica, cArgs.EvalCtx.Desc().Replicas(), wasLastLeaseholder,
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/batcheval/cmd_lease_transfer.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func TransferLease(

// If this check is removed at some point, the filtering of learners on the
// sending side would have to be removed as well.
// TODO(nvanbenschoten): move this into leases.Verify.
if err := roachpb.CheckCanReceiveLease(
newLease.Replica, cArgs.EvalCtx.Desc().Replicas(), false, /* wasLastLeaseholder */
); err != nil {
Expand Down
3 changes: 2 additions & 1 deletion pkg/kv/kvserver/client_replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/leases"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb"
Expand Down Expand Up @@ -1850,7 +1851,7 @@ func TestLeaseExpirationBasedDrainTransferWithProscribed(t *testing.T) {
target := filterArgs.Req.Requests[0].GetTransferLease().Lease.Replica
if target == l.replica0Desc {
failedOnce.Do(func() { close(failedCh) })
return kvpb.NewError(kvserver.NewLeaseTransferRejectedBecauseTargetMayNeedSnapshotError(
return kvpb.NewError(leases.NewLeaseTransferRejectedBecauseTargetMayNeedSnapshotError(
target, raftutil.ReplicaStateProbe))
}
}
Expand Down
11 changes: 11 additions & 0 deletions pkg/kv/kvserver/leases/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ go_library(
name = "leases",
srcs = [
"build.go",
"errors.go",
"status.go",
"verify.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/leases",
visibility = ["//visibility:public"],
Expand All @@ -13,6 +15,8 @@ go_library(
"//pkg/kv/kvserver/kvserverpb",
"//pkg/kv/kvserver/liveness",
"//pkg/kv/kvserver/liveness/livenesspb",
"//pkg/kv/kvserver/raftutil",
"//pkg/raft",
"//pkg/roachpb",
"//pkg/util/hlc",
"//pkg/util/log",
Expand All @@ -26,14 +30,21 @@ go_test(
srcs = [
"build_test.go",
"status_test.go",
"verify_test.go",
],
embed = [":leases"],
deps = [
"//pkg/kv/kvpb",
"//pkg/kv/kvserver/kvserverpb",
"//pkg/kv/kvserver/liveness",
"//pkg/kv/kvserver/liveness/livenesspb",
"//pkg/kv/kvserver/raftutil",
"//pkg/raft",
"//pkg/raft/tracker",
"//pkg/roachpb",
"//pkg/testutils/zerofields",
"//pkg/util/hlc",
"@com_github_cockroachdb_errors//:errors",
"@com_github_stretchr_testify//require",
],
)
96 changes: 87 additions & 9 deletions pkg/kv/kvserver/leases/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
package leases

import (
"context"
"time"

"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
"github.com/cockroachdb/cockroach/pkg/raft"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/errors"
Expand All @@ -25,10 +27,35 @@ import (
// Settings is the set of settings for the leasing subsystem, used when building
// a new lease object.
type Settings struct {
UseExpirationLeases bool
// UseExpirationLeases controls whether this range should be using an
// expiration-based lease.
UseExpirationLeases bool
// TransferExpirationLeases controls whether we transfer expiration-based
// leases that are later upgraded to epoch-based ones or whether we transfer
// epoch-based leases directly.
TransferExpirationLeases bool
ExpToEpochEquiv bool
RangeLeaseDuration time.Duration
// RejectLeaseOnLeaderUnknown controls whether a replica that does not know
// the current raft leader rejects a lease request.
RejectLeaseOnLeaderUnknown bool
// DisableAboveRaftLeaseTransferSafetyChecks, if set, disables the above-raft
// lease transfer safety checks (that verify that we don't transfer leases to
// followers that need a snapshot, etc). The proposal-time checks are not
// affected by this knob.
DisableAboveRaftLeaseTransferSafetyChecks bool
// AllowLeaseProposalWhenNotLeader, if set, allows lease request proposals
// even when the replica inserting that proposal is not the Raft leader. This
// can be used in tests to allow a replica to acquire a lease without first
// moving the Raft leadership to it (e.g. it allows tests to expire leases by
// stopping the old leaseholder's liveness heartbeats and then expect other
// replicas to take the lease without worrying about Raft).
AllowLeaseProposalWhenNotLeader bool
// ExpToEpochEquiv indicates whether an expiration-based lease can be
// considered equivalent to an epoch-based lease during a promotion from
// expiration-based to epoch-based. It is used for mixed-version
// compatibility.
ExpToEpochEquiv bool
// RangeLeaseDuration specifies the range lease duration.
RangeLeaseDuration time.Duration
}

// NodeLiveness is a read-only interface to the node liveness subsystem.
Expand Down Expand Up @@ -59,9 +86,15 @@ type NodeLivenessManipulation struct {
type BuildInput struct {
// Information about the local replica.
LocalStoreID roachpb.StoreID
LocalReplicaID roachpb.ReplicaID
Desc *roachpb.RangeDescriptor
Now hlc.ClockTimestamp
MinLeaseProposedTS hlc.ClockTimestamp

// Information about raft.
RaftStatus *raft.Status
RaftFirstIndex kvpb.RaftIndex

// Information about the previous lease.
PrevLease roachpb.Lease
// PrevLeaseNodeLiveness is set iff PrevLease is an epoch-based lease.
Expand All @@ -71,6 +104,15 @@ type BuildInput struct {

// Information about the (requested) next lease.
NextLeaseHolder roachpb.ReplicaDescriptor

// When set to true, BypassSafetyChecks configures lease transfers to skip
// safety checks that ensure that the transfer target is known to be
// (according to the outgoing leaseholder) alive and sufficiently caught up on
// its log. This option should be used sparingly — typically only by outgoing
// leaseholders who both have some other reason to believe that the target is
// alive and caught up on its log (e.g. they just sent it a snapshot) and also
// can't tolerate rejected lease transfers.
BypassSafetyChecks bool
}

// PrevLocal returns whether the previous lease was held by the local store.
Expand Down Expand Up @@ -147,6 +189,9 @@ func (i BuildInput) validate() error {
return errors.AssertionFailedf("cannot acquire lease from another node "+
"before it has expired: %v", i.PrevLease)
}
if !i.Transfer() && i.BypassSafetyChecks {
return errors.AssertionFailedf("cannot bypass safety checks for lease acquisition/extension")
}
return nil
}

Expand All @@ -170,6 +215,24 @@ func (i BuildInput) validatePrevLeaseExpired() error {
return nil
}

// toVerifyInput converts the BuildInput to a VerifyInput, which is used to
// verify the safety of lease requests. This is a non-lossy conversion, so the
// resulting VerifyInput should be fully populated, which is verified in
// TestInputToVerifyInput.
func (i BuildInput) toVerifyInput() VerifyInput {
return VerifyInput{
LocalStoreID: i.LocalStoreID,
LocalReplicaID: i.LocalReplicaID,
Desc: i.Desc,
RaftStatus: i.RaftStatus,
RaftFirstIndex: i.RaftFirstIndex,
PrevLease: i.PrevLease,
PrevLeaseExpired: i.PrevLeaseExpired,
NextLeaseHolder: i.NextLeaseHolder,
BypassSafetyChecks: i.BypassSafetyChecks,
}
}

// Output is the set of outputs for the lease acquisition process.
type Output struct {
NextLease roachpb.Lease
Expand All @@ -185,16 +248,31 @@ func (o Output) validate(i BuildInput) error {
return nil
}

// Build constructs a new lease based on the input settings and parameters. The
// resulting output will contain the lease to be proposed and any necessary node
// liveness manipulations that must be performed before the lease can be
// requested.
func Build(st Settings, nl NodeLiveness, i BuildInput) (Output, error) {
// Validate the input.
// VerifyAndBuild checks the safety of a lease acquisition or transfer request.
// If the safety checks fail, it returns an error.
//
// If the safety checks pass, it constructs a new lease based on the input
// settings and parameters. The resulting output will contain the lease to be
// proposed and any necessary node liveness manipulations that must be performed
// before the lease can be requested.
func VerifyAndBuild(
ctx context.Context, st Settings, nl NodeLiveness, i BuildInput,
) (Output, error) {
if err := i.validate(); err != nil {
return Output{}, err
}
if i.Transfer() && !st.DisableAboveRaftLeaseTransferSafetyChecks {
// TODO(nvanbenschoten): support build-time verification for lease
// acquisition, not just lease transfers. This currently breaks various
// tests. See #118435.
if err := Verify(ctx, st, i.toVerifyInput()); err != nil {
return Output{}, err
}
}
return build(st, nl, i)
}

func build(st Settings, nl NodeLiveness, i BuildInput) (Output, error) {
// Construct the next lease.
nextLease := roachpb.Lease{
Replica: leaseReplica(i),
Expand Down
58 changes: 53 additions & 5 deletions pkg/kv/kvserver/leases/build_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@ import (

"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
"github.com/cockroachdb/cockroach/pkg/raft"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/testutils/zerofields"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/stretchr/testify/require"
)

var (
repl1 = roachpb.ReplicaDescriptor{NodeID: 1, StoreID: 1, ReplicaID: 1}
repl2 = roachpb.ReplicaDescriptor{NodeID: 2, StoreID: 2, ReplicaID: 2}
desc = roachpb.RangeDescriptor{InternalReplicas: []roachpb.ReplicaDescriptor{repl1, repl2}}
cts10 = hlc.ClockTimestamp{WallTime: 10}
cts20 = hlc.ClockTimestamp{WallTime: 20}
cts30 = hlc.ClockTimestamp{WallTime: 30}
Expand Down Expand Up @@ -111,6 +114,18 @@ func TestInputValidation(t *testing.T) {
},
expErr: "cannot acquire lease from another node before it has expired",
},
{
name: "bypass safety checks for acquisition",
input: BuildInput{
LocalStoreID: repl1.StoreID,
Now: cts20,
PrevLease: roachpb.Lease{Replica: repl2, Expiration: &ts10},
PrevLeaseExpired: true,
NextLeaseHolder: repl1,
BypassSafetyChecks: true,
},
expErr: "cannot bypass safety checks for lease acquisition/extension",
},
{
name: "valid acquisition",
input: BuildInput{
Expand Down Expand Up @@ -158,10 +173,11 @@ func TestInputValidation(t *testing.T) {

func defaultSettings() Settings {
return Settings{
UseExpirationLeases: false,
TransferExpirationLeases: true,
ExpToEpochEquiv: true,
RangeLeaseDuration: 20,
UseExpirationLeases: false,
TransferExpirationLeases: true,
RejectLeaseOnLeaderUnknown: false,
ExpToEpochEquiv: true,
RangeLeaseDuration: 20,
}
}

Expand Down Expand Up @@ -221,7 +237,7 @@ func TestBuild(t *testing.T) {
if nl == nil {
nl = defaultNodeLiveness()
}
out, err := Build(st, nl, tt.input)
out, err := build(st, nl, tt.input)
if tt.expErr == "" {
require.NoError(t, err)
require.Equal(t, tt.expOutput, out)
Expand Down Expand Up @@ -631,3 +647,35 @@ func TestBuild(t *testing.T) {
})
})
}

func TestInputToVerifyInput(t *testing.T) {
cts := hlc.ClockTimestamp{WallTime: 1, Logical: 1}
ts := cts.ToTimestamp()
noZeroBuildInput := BuildInput{
LocalStoreID: 1,
LocalReplicaID: 1,
Desc: &roachpb.RangeDescriptor{},
RaftStatus: &raft.Status{},
RaftFirstIndex: 1,
PrevLease: roachpb.Lease{
Start: cts,
Expiration: &ts,
Replica: roachpb.ReplicaDescriptor{
NodeID: 1, StoreID: 1, ReplicaID: 1, Type: 1,
},
DeprecatedStartStasis: &ts,
ProposedTS: cts,
Epoch: 1,
Sequence: 1,
AcquisitionType: 1,
},
PrevLeaseExpired: true,
NextLeaseHolder: roachpb.ReplicaDescriptor{
NodeID: 1, StoreID: 1, ReplicaID: 1, Type: 1,
},
BypassSafetyChecks: true,
}
verifyInput := noZeroBuildInput.toVerifyInput()
require.NoError(t, zerofields.NoZeroField(verifyInput),
"make sure you update BuildInput.toVerifyInput for the new field")
}
36 changes: 36 additions & 0 deletions pkg/kv/kvserver/leases/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package leases

import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftutil"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/errors"
)

// ErrMarkLeaseTransferRejectedBecauseTargetMayNeedSnapshot indicates that the
// lease transfer failed because the current leaseholder could not prove that
// the lease transfer target did not need a Raft snapshot. In order to prove
// this, the current leaseholder must also be the Raft leader, which is
// periodically requested in maybeTransferRaftLeadershipToLeaseholderLocked.
var ErrMarkLeaseTransferRejectedBecauseTargetMayNeedSnapshot = errors.New(
"lease transfer rejected because the target may need a snapshot")

// NewLeaseTransferRejectedBecauseTargetMayNeedSnapshotError return an error
// indicating that a lease transfer failed because the current leaseholder could
// not prove that the lease transfer target did not need a Raft snapshot.
func NewLeaseTransferRejectedBecauseTargetMayNeedSnapshotError(
target roachpb.ReplicaDescriptor, snapStatus raftutil.ReplicaNeedsSnapshotStatus,
) error {
err := errors.Errorf("refusing to transfer lease to %d because target may need a Raft snapshot: %s",
target, snapStatus)
return errors.Mark(err, ErrMarkLeaseTransferRejectedBecauseTargetMayNeedSnapshot)
}
Loading

0 comments on commit a1ac434

Please sign in to comment.