Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
132166: rac2,kvserver: do not quiesce if send tokens held r=sumeerbhola a=pav-kv

This PR prevents range quiescence if RACv2 holds any send tokens for this range. Quiescence would prevent `MsgApp` pings which ensure that the leader reliably learns about the follower store admitting log entries, and causes it to release tokens accordingly. We do not want to end up holding tokens permanently.

Resolves #129581

132202: sql/schemachanger: clean up SequenceOwner elements during restore r=fqazi a=fqazi

Previously, when restoring a backup taken in middle of a DROP COLUMN, where a column had a sequence owner assigned, it was possible for the backup to be unrestorable. This would happen because the sequence reference would have been dropped in the plan, but the seqeunce owner element was still within the state. To address this, this test updates the rewrite logic to clean up any SequenceOwner elements which have the referenced sequence already removed.

Fixes: #130778

Release note (bug fix): Addressed a rare bug that could prevent backups taken during a DROP COLUMN operation with a sequence owner from restoring with the error: "rewriting descriptor ids: missing rewrite for <id> in SequenceOwner..."

Co-authored-by: Pavel Kalinnikov <[email protected]>
Co-authored-by: Faizan Qazi <[email protected]>
  • Loading branch information
3 people committed Oct 10, 2024
3 parents 13bc523 + 2af87f8 + e08e89b commit 188d9fe
Show file tree
Hide file tree
Showing 25 changed files with 1,764 additions and 282 deletions.
28 changes: 28 additions & 0 deletions pkg/ccl/schemachangerccl/backup_base_generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 26 additions & 28 deletions pkg/kv/kvserver/flow_control_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3371,20 +3371,10 @@ func TestFlowControlQuiescedRangeV2(t *testing.T) {
`)
h.query(n1, v2FlowTokensQueryStr)

// TODO(pav-kv,kvoli): When #129581 is complete, this test will fail
// because the range won't quiesce with a lagging admitted vector. Update
// the test to assert that the range doesn't quiesce then.
//
// Wait for the range to quiesce.
h.comment(`-- (Wait for range to quiesce.)`)
testutils.SucceedsSoon(t, func() error {
leader := tc.GetRaftLeader(t, roachpb.RKey(k))
require.NotNil(t, leader)
if !leader.IsQuiescent() {
return errors.Errorf("%s not quiescent", leader)
}
return nil
})
// The range must not quiesce because the leader holds send tokens.
leader := tc.GetRaftLeader(t, roachpb.RKey(k))
require.NotNil(t, leader)
require.False(t, leader.IsQuiescent())

h.comment(`
-- (Allow below-raft admission to proceed. We've disabled the fallback token
Expand All @@ -3409,6 +3399,15 @@ func TestFlowControlQuiescedRangeV2(t *testing.T) {
-- are returned through the fallback mechanism.
`)
h.query(n1, v2FlowTokensQueryStr)

// The range eventually quiesces because all the tokens have been returned.
h.comment(`-- (Wait for range to quiesce.)`)
testutils.SucceedsSoon(t, func() error {
if !leader.IsQuiescent() {
return errors.Errorf("%s not quiescent", leader)
}
return nil
})
})
}

Expand Down Expand Up @@ -3503,20 +3502,10 @@ func TestFlowControlUnquiescedRangeV2(t *testing.T) {
`)
h.query(n1, v2FlowTokensQueryStr)

// TODO(pav-kv,kvoli): When #129581 is complete, this test will fail
// because the range won't quiesce with a lagging admitted vector. Update
// the test to assert that the range doesn't quiesce then.
//
// Wait for the range to quiesce.
h.comment(`-- (Wait for range to quiesce.)`)
testutils.SucceedsSoon(t, func() error {
leader := tc.GetRaftLeader(t, roachpb.RKey(k))
require.NotNil(t, leader)
if !leader.IsQuiescent() {
return errors.Errorf("%s not quiescent", leader)
}
return nil
})
// The range must not quiesce because the leader holds send tokens.
leader := tc.GetRaftLeader(t, roachpb.RKey(k))
require.NotNil(t, leader)
require.False(t, leader.IsQuiescent())

h.comment(`
-- (Allow below-raft admission to proceed. We've disabled the fallback token
Expand Down Expand Up @@ -3547,6 +3536,15 @@ func TestFlowControlUnquiescedRangeV2(t *testing.T) {
-- are returned through the piggyback mechanism.
`)
h.query(n1, v2FlowTokensQueryStr)

// The range eventually quiesces because all the tokens have been returned.
h.comment(`-- (Wait for range to quiesce.)`)
testutils.SucceedsSoon(t, func() error {
if !leader.IsQuiescent() {
return errors.Errorf("%s not quiescent", leader)
}
return nil
})
})
}

Expand Down
22 changes: 20 additions & 2 deletions pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ type RangeController interface {
//
// Requires replica.raftMu to be held.
MaybeSendPingsRaftMuLocked()
// HoldsSendTokensRaftMuLocked returns true if the replica holds any send
// tokens. Used to prevent replica quiescence.
//
// Requires replica.raftMu to be held.
HoldsSendTokensRaftMuLocked() bool
// SetReplicasRaftMuLocked sets the replicas of the range. The caller will
// never mutate replicas, and neither should the callee.
//
Expand Down Expand Up @@ -1298,12 +1303,25 @@ func (rc *rangeController) MaybeSendPingsRaftMuLocked() {
if id == rc.opts.LocalReplicaID {
continue
}
if s := state.sendStream; s != nil && s.shouldPing() {
if s := state.sendStream; s != nil && s.holdsTokens() {
rc.opts.RaftInterface.SendPingRaftMuLocked(id)
}
}
}

// HoldsSendTokensRaftMuLocked implements RangeController.
func (rc *rangeController) HoldsSendTokensRaftMuLocked() bool {
// TODO(pav-kv): we are doing the same checks in MaybeSendPingsRaftMuLocked
// here, and both are called from Replica.tick. We can optimize this, and do
// both in one method.
for _, state := range rc.replicaMap {
if s := state.sendStream; s != nil && s.holdsTokens() {
return true
}
}
return false
}

// SetReplicasRaftMuLocked sets the replicas of the range. The caller will
// never mutate replicas, and neither should the callee.
//
Expand Down Expand Up @@ -1769,7 +1787,7 @@ func (rss *replicaSendStream) changeConnectedStateLocked(state connectedState, n
rss.mu.connectedStateStart = now
}

func (rss *replicaSendStream) shouldPing() bool {
func (rss *replicaSendStream) holdsTokens() bool {
rss.mu.Lock() // TODO(pav-kv): should we make it RWMutex.RLock()?
defer rss.mu.Unlock()
return !rss.mu.tracker.Empty()
Expand Down
14 changes: 14 additions & 0 deletions pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,11 @@ type Processor interface {
//
// raftMu is held.
MaybeSendPingsRaftMuLocked()
// HoldsSendTokensRaftMuLocked returns true if the replica is the leader using
// RACv2, and holds any send tokens. Used to prevent replica quiescence.
//
// raftMu is held.
HoldsSendTokensRaftMuLocked() bool

// AdmitForEval is called to admit work that wants to evaluate at the
// leaseholder.
Expand Down Expand Up @@ -1086,6 +1091,15 @@ func (p *processorImpl) MaybeSendPingsRaftMuLocked() {
}
}

// HoldsSendTokensRaftMuLocked implements Processor.
func (p *processorImpl) HoldsSendTokensRaftMuLocked() bool {
p.opts.Replica.RaftMuAssertHeld()
if rc := p.leader.rc; rc != nil {
return rc.HoldsSendTokensRaftMuLocked()
}
return false
}

// AdmitForEval implements Processor.
func (p *processorImpl) AdmitForEval(
ctx context.Context, pri admissionpb.WorkPriority, ct time.Time,
Expand Down
5 changes: 5 additions & 0 deletions pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,11 @@ func (c *testRangeController) MaybeSendPingsRaftMuLocked() {
fmt.Fprintf(c.b, " RangeController.MaybeSendPingsRaftMuLocked()\n")
}

func (c *testRangeController) HoldsSendTokensRaftMuLocked() bool {
fmt.Fprintf(c.b, " RangeController.HoldsSendTokensRaftMuLocked()\n")
return false
}

func (c *testRangeController) SetReplicasRaftMuLocked(
ctx context.Context, replicas rac2.ReplicaSet,
) error {
Expand Down
Loading

0 comments on commit 188d9fe

Please sign in to comment.