From 63dfc14930f6e4f99480ded242c5dcbb5a37b883 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Mon, 20 Dec 2021 17:42:06 -0500 Subject: [PATCH 1/2] kv: plumb context into msgAppDropper.ShouldDrop Avoids the `context.Background` in `replicaMsgAppDropper.ShouldDrop`. --- pkg/kv/kvserver/split_trigger_helper.go | 10 ++++++---- pkg/kv/kvserver/split_trigger_helper_test.go | 4 +++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pkg/kv/kvserver/split_trigger_helper.go b/pkg/kv/kvserver/split_trigger_helper.go index 4c123161df46..707c44aec9d2 100644 --- a/pkg/kv/kvserver/split_trigger_helper.go +++ b/pkg/kv/kvserver/split_trigger_helper.go @@ -32,18 +32,20 @@ func (rd *replicaMsgAppDropper) Args() (initialized bool, ticks int) { return initialized, ticks } -func (rd *replicaMsgAppDropper) ShouldDrop(startKey roachpb.RKey) (fmt.Stringer, bool) { +func (rd *replicaMsgAppDropper) ShouldDrop( + ctx context.Context, startKey roachpb.RKey, +) (fmt.Stringer, bool) { lhsRepl := (*Replica)(rd).store.LookupReplica(startKey) if lhsRepl == nil { return nil, false } - lhsRepl.store.replicaGCQueue.AddAsync(context.Background(), lhsRepl, replicaGCPriorityDefault) + lhsRepl.store.replicaGCQueue.AddAsync(ctx, lhsRepl, replicaGCPriorityDefault) return lhsRepl, true } type msgAppDropper interface { Args() (initialized bool, ticks int) - ShouldDrop(key roachpb.RKey) (fmt.Stringer, bool) + ShouldDrop(ctx context.Context, key roachpb.RKey) (fmt.Stringer, bool) } // maybeDropMsgApp returns true if the incoming Raft message should be dropped. @@ -125,7 +127,7 @@ func maybeDropMsgApp( // NB: the caller is likely holding r.raftMu, but that's OK according to // the lock order. We're not allowed to hold r.mu, but we don't. - lhsRepl, drop := r.ShouldDrop(startKey) + lhsRepl, drop := r.ShouldDrop(ctx, startKey) if !drop { return false } diff --git a/pkg/kv/kvserver/split_trigger_helper_test.go b/pkg/kv/kvserver/split_trigger_helper_test.go index 88895ccc9693..ea5940a773b0 100644 --- a/pkg/kv/kvserver/split_trigger_helper_test.go +++ b/pkg/kv/kvserver/split_trigger_helper_test.go @@ -36,7 +36,9 @@ func (td *testMsgAppDropper) Args() (initialized bool, ticks int) { return td.initialized, td.ticks } -func (td *testMsgAppDropper) ShouldDrop(startKey roachpb.RKey) (fmt.Stringer, bool) { +func (td *testMsgAppDropper) ShouldDrop( + ctx context.Context, startKey roachpb.RKey, +) (fmt.Stringer, bool) { if len(startKey) == 0 { panic("empty startKey") } From 625ee8b39f3b9fa18171261446835bee1e03a7ab Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Mon, 20 Dec 2021 17:47:50 -0500 Subject: [PATCH 2/2] kv: remove dependency on ticks from maybeDropMsgApp Related to #73838. In d77bee9, we stopped ticking uninitialized replicas, so we can no longer use ticks as a proxy for the age of a replica in the escape hatch of `maybeDropMsgApp`. Instead, we now use the age of the replica directly. We hit the escape hatch for any replica that is older than 20s, which corresponds to the 100 ticks we used before. --- pkg/kv/kvserver/replica.go | 3 +++ pkg/kv/kvserver/replica_init.go | 1 + pkg/kv/kvserver/split_trigger_helper.go | 21 +++++++++++--------- pkg/kv/kvserver/split_trigger_helper_test.go | 11 +++++----- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go index 44f25fac97cf..d4cf0a3ca9f5 100644 --- a/pkg/kv/kvserver/replica.go +++ b/pkg/kv/kvserver/replica.go @@ -214,6 +214,9 @@ type Replica struct { // The writes to this key happen in Replica.setStartKeyLocked. startKey roachpb.RKey + // creationTime is the time that the Replica struct was initially constructed. + creationTime time.Time + store *Store abortSpan *abortspan.AbortSpan // Avoids anomalous reads after abort diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go index 6310ee0cd2fc..fbb1b1dc0c69 100644 --- a/pkg/kv/kvserver/replica_init.go +++ b/pkg/kv/kvserver/replica_init.go @@ -71,6 +71,7 @@ func newUnloadedReplica( r := &Replica{ AmbientContext: store.cfg.AmbientCtx, RangeID: desc.RangeID, + creationTime: timeutil.Now(), store: store, abortSpan: abortspan.New(desc.RangeID), concMgr: concurrency.NewManager(concurrency.Config{ diff --git a/pkg/kv/kvserver/split_trigger_helper.go b/pkg/kv/kvserver/split_trigger_helper.go index 707c44aec9d2..78882c3c827a 100644 --- a/pkg/kv/kvserver/split_trigger_helper.go +++ b/pkg/kv/kvserver/split_trigger_helper.go @@ -13,23 +13,26 @@ package kvserver import ( "context" "fmt" + "time" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "go.etcd.io/etcd/raft/v3/raftpb" ) -const maxDelaySplitTriggerTicks = 100 +const maxDelaySplitTriggerDur = 20 * time.Second type replicaMsgAppDropper Replica -func (rd *replicaMsgAppDropper) Args() (initialized bool, ticks int) { +func (rd *replicaMsgAppDropper) Args() (initialized bool, age time.Duration) { r := (*Replica)(rd) r.mu.RLock() initialized = r.isInitializedRLocked() - ticks = r.mu.ticks + creationTime := r.creationTime r.mu.RUnlock() - return initialized, ticks + age = timeutil.Since(creationTime) + return initialized, age } func (rd *replicaMsgAppDropper) ShouldDrop( @@ -44,7 +47,7 @@ func (rd *replicaMsgAppDropper) ShouldDrop( } type msgAppDropper interface { - Args() (initialized bool, ticks int) + Args() (initialized bool, age time.Duration) ShouldDrop(ctx context.Context, key roachpb.RKey) (fmt.Stringer, bool) } @@ -71,7 +74,7 @@ func maybeDropMsgApp( // message via msg.Context. Check if this replica might be waiting for a // split trigger. The first condition for that is not knowing the key // bounds, i.e. not being initialized. - initialized, ticks := r.Args() + initialized, age := r.Args() if initialized { return false @@ -135,7 +138,7 @@ func maybeDropMsgApp( if verbose { log.Infof(ctx, "start key is contained in replica %v", lhsRepl) } - if ticks > maxDelaySplitTriggerTicks { + if age > maxDelaySplitTriggerDur { // This is an escape hatch in case there are other scenarios (missed in // the above analysis) in which a split trigger just isn't coming. If // there are, the idea is that we notice this log message and improve @@ -143,8 +146,8 @@ func maybeDropMsgApp( log.Warningf( ctx, "would have dropped incoming MsgApp to wait for split trigger, "+ - "but allowing due to %d (>%d) ticks", - ticks, maxDelaySplitTriggerTicks) + "but allowing because uninitialized replica was created %s (>%s) ago", + age, maxDelaySplitTriggerDur) return false } if verbose { diff --git a/pkg/kv/kvserver/split_trigger_helper_test.go b/pkg/kv/kvserver/split_trigger_helper_test.go index ea5940a773b0..46fcf5411e1f 100644 --- a/pkg/kv/kvserver/split_trigger_helper_test.go +++ b/pkg/kv/kvserver/split_trigger_helper_test.go @@ -14,6 +14,7 @@ import ( "context" "fmt" "testing" + "time" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/testutils" @@ -26,14 +27,14 @@ import ( type testMsgAppDropper struct { initialized bool - ticks int + age time.Duration lhs bool startKey string // set by ShouldDrop } -func (td *testMsgAppDropper) Args() (initialized bool, ticks int) { - return td.initialized, td.ticks +func (td *testMsgAppDropper) Args() (initialized bool, age time.Duration) { + return td.initialized, td.age } func (td *testMsgAppDropper) ShouldDrop( @@ -58,9 +59,9 @@ func TestMaybeDropMsgApp(t *testing.T) { // Drop message to wait for trigger. {initialized: false, lhs: true}: true, // Drop message to wait for trigger. - {initialized: false, lhs: true, ticks: maxDelaySplitTriggerTicks}: true, + {initialized: false, lhs: true, age: maxDelaySplitTriggerDur}: true, // Escape hatch fires. - {initialized: false, lhs: true, ticks: maxDelaySplitTriggerTicks + 1}: false, + {initialized: false, lhs: true, age: maxDelaySplitTriggerDur + 1}: false, } msgHeartbeat := &raftpb.Message{