Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
136232: kvserver: eagerly unquiesce ranges that use leader leases r=nvanbenschoten a=arulajmani

We eagerly unquiesce expiration based leases when starting/restarting a store. Like expiration based leases, leader leases also don't support quiescence. As such, this patch has them start off life as unquiescent.

Found while trying to port TestStoreLoadReplicaQuiescent over to use leader leases.

References cockroachdb#133763 
Closes cockroachdb#136231

Release note: None

Co-authored-by: Arul Ajmani <[email protected]>
  • Loading branch information
craig[bot] and arulajmani committed Dec 10, 2024
2 parents bb4d357 + bc23142 commit 28d9b53
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 26 deletions.
45 changes: 39 additions & 6 deletions pkg/kv/kvserver/client_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
"github.com/cockroachdb/cockroach/pkg/roachpb"
Expand All @@ -22,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/testutils/listenerutil"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -115,16 +117,16 @@ func TestStoreLoadReplicaQuiescent(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

testutils.RunTrueAndFalse(t, "kv.expiration_leases_only.enabled", func(t *testing.T, expOnly bool) {
testutils.RunValues(t, "lease-type", roachpb.TestingAllLeaseTypes(), func(t *testing.T, leaseType roachpb.LeaseType) {
storeReg := fs.NewStickyRegistry()
listenerReg := listenerutil.NewListenerRegistry()
defer listenerReg.Close()

ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
kvserver.OverrideLeaderLeaseMetamorphism(ctx, &st.SV)
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, expOnly)
kvserver.OverrideDefaultLeaseType(ctx, &st.SV, leaseType)

manualClock := hlc.NewHybridManualClock()
tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{
ReplicationMode: base.ReplicationManual,
ReusableListenerReg: listenerReg,
Expand All @@ -136,6 +138,7 @@ func TestStoreLoadReplicaQuiescent(t *testing.T) {
Knobs: base.TestingKnobs{
Server: &server.TestingKnobs{
StickyVFSRegistry: storeReg,
WallClock: manualClock,
},
Store: &kvserver.StoreTestingKnobs{
DisableScanner: true,
Expand All @@ -161,10 +164,33 @@ func TestStoreLoadReplicaQuiescent(t *testing.T) {
repl := tc.GetFirstStoreFromServer(t, 0).GetReplicaIfExists(desc.RangeID)
require.NotNil(t, repl)
lease, _ := repl.GetLease()
if expOnly {

if leaseType == roachpb.LeaseLeader {
// The first lease that'll be acquired above is going to be an expiration
// based lease. That's because at that point, there won't be a leader,
// and the lease acquisition will trigger an election. Expire that lease
// and send a request that'll force a re-acquisition. This time, we should
// get a leader lease.
manualClock.Increment(tc.Server(0).RaftConfig().RangeLeaseDuration.Nanoseconds())
incArgs := incrementArgs(key, int64(5))

testutils.SucceedsSoon(t, func() error {
_, err := kv.SendWrapped(ctx, tc.GetFirstStoreFromServer(t, 0).TestSender(), incArgs)
return err.GoError()
})
lease, _ = repl.GetLease()
require.Equal(t, roachpb.LeaseLeader, lease.Type())
}

switch leaseType {
case roachpb.LeaseExpiration:
require.Equal(t, roachpb.LeaseExpiration, lease.Type())
} else {
case roachpb.LeaseEpoch:
require.Equal(t, roachpb.LeaseEpoch, lease.Type())
case roachpb.LeaseLeader:
require.Equal(t, roachpb.LeaseLeader, lease.Type())
default:
panic("unknown")
}

// Restart the server and check whether the range starts out quiesced.
Expand All @@ -175,6 +201,13 @@ func TestStoreLoadReplicaQuiescent(t *testing.T) {
repl, _, err = tc.Server(0).GetStores().(*kvserver.Stores).GetReplicaForRangeID(ctx, desc.RangeID)
require.NoError(t, err)
require.NotNil(t, repl.RaftStatus())
require.Equal(t, !expOnly, repl.IsQuiescent())
switch leaseType {
case roachpb.LeaseExpiration, roachpb.LeaseLeader:
require.False(t, repl.IsQuiescent())
case roachpb.LeaseEpoch:
require.True(t, repl.IsQuiescent())
default:
panic("unknown")
}
})
}
17 changes: 4 additions & 13 deletions pkg/kv/kvserver/replica_raft_quiesce.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,19 +300,10 @@ func shouldReplicaQuiesceRaftMuLockedReplicaMuLocked(
}
return nil, nil, false
}
// Fast path: don't quiesce leader leases. A fortified raft leader does not
// send raft heartbeats, so quiescence is not needed. All liveness decisions
// are based on store liveness communication, which is cheap enough to not
// need a notion of quiescence.
if leaseStatus.Lease.Type() == roachpb.LeaseLeader {
log.VInfof(ctx, 4, "not quiescing: leader lease")
return nil, nil, false
}
// Fast path: don't quiesce expiration-based leases, since they'll likely be
// renewed soon. The lease may not be ours, but in that case we wouldn't be
// able to quiesce anyway (see leaseholder condition below).
if l := leaseStatus.Lease; l.Type() == roachpb.LeaseExpiration {
log.VInfof(ctx, 4, "not quiescing: expiration-based lease")
// Fast path: if the lease doesn't support quiescence (leader leases and
// expiration based leases do not), return early.
if !leaseStatus.Lease.SupportsQuiescence() {
log.VInfof(ctx, 4, "not quiescing: %s", leaseStatus.Lease.Type())
return nil, nil, false
}
if q.hasPendingProposalsRLocked() {
Expand Down
15 changes: 8 additions & 7 deletions pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -2325,15 +2325,16 @@ func (s *Store) Start(ctx context.Context, stopper *stop.Stopper) error {
return errors.AssertionFailedf("no tenantID for initialized replica %s", rep)
}

// Eagerly unquiesce replicas that use expiration-based leases. We don't
// quiesce ranges with expiration leases, and we want to eagerly acquire
// leases for them, which happens during Raft ticks. We rely on Raft
// pre-vote to avoid disturbing established Raft leaders.
// Eagerly unquiece replicas that are using a lease that doesn't support quiescnce.
// In practice, this means we'll unquiesce ranges that are using leader leases
// or expiration leases. We want to eagerly establish raft leaders for such
// ranges and acquire leases on top of this. This happens during Raft ticks.
// We rely on Raft pre-vote to avoid disturbance to Raft leaders.
//
// NB: cluster settings haven't propagated yet, so we have to check the last
// known lease instead of relying on shouldUseExpirationLeaseRLocked(). We
// also check Sequence > 0 to omit ranges that haven't seen a lease yet.
if l, _ := rep.GetLease(); l.Type() == roachpb.LeaseExpiration && l.Sequence > 0 {
// known lease instead of desiredLeaseTypeRLocked. We also check Sequence >
// 0 to omit ranges that haven't seen a lease yet.
if l, _ := rep.GetLease(); !l.SupportsQuiescence() && l.Sequence > 0 {
rep.maybeUnquiesce(ctx, true /* wakeLeader */, true /* mayCampaign */)
}
}
Expand Down
19 changes: 19 additions & 0 deletions pkg/roachpb/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -2021,6 +2021,25 @@ func (l Lease) Type() LeaseType {
return LeaseExpiration
}

// SupportsQuiescence returns whether the lease supports quiescence or not.
func (l Lease) SupportsQuiescence() bool {
switch l.Type() {
case LeaseExpiration, LeaseLeader:
// Expiration based leases do not support quiescence because they'll likely
// be renewed soon, so there's not much point to it.
//
// Leader leases do not support quiescence because a fortified raft leader
// will not send raft heartbeats, so quiescence is not needed. All liveness
// decisions are based on store liveness communication, which is cheap
// enough to not need a notion of quiescence.
return false
case LeaseEpoch:
return true
default:
panic("unexpected lease type")
}
}

// Speculative returns true if this lease instance doesn't correspond to a
// committed lease (or at least to a lease that's *known* to have committed).
// For example, nodes sometimes guess who a leaseholder might be and synthesize
Expand Down

0 comments on commit 28d9b53

Please sign in to comment.