diff --git a/pkg/kv/kvserver/concurrency/lock_table_waiter.go b/pkg/kv/kvserver/concurrency/lock_table_waiter.go index 55638da9ddc7..af6d31e15867 100644 --- a/pkg/kv/kvserver/concurrency/lock_table_waiter.go +++ b/pkg/kv/kvserver/concurrency/lock_table_waiter.go @@ -508,11 +508,10 @@ func (w *lockTableWaiterImpl) pushLockTxn( // leading the MVCC timestamp of an intent is lost, we also need to push // the intent up to the top of the transaction's local uncertainty limit // on this node. This logic currently lives in pushHeader, but we could - // simplify it when removing synthetic timestamps and then move it out - // here. + // simplify it and move it out here. // // We could also explore adding a preserve_local_timestamp flag to - // MVCCValue that would explicitly storage the local timestamp even in + // MVCCValue that would explicitly store the local timestamp even in // cases where it would normally be omitted. This could be set during // intent resolution when a push observation is provided. Or we could // not persist this, but still preserve the local timestamp when the @@ -782,42 +781,26 @@ func (w *lockTableWaiterImpl) pushHeader(req Request) kvpb.Header { // transaction's uncertainty interval. This allows us to not have to // restart for uncertainty if the push succeeds and we come back and // read. + uncertaintyLimit := req.Txn.GlobalUncertaintyLimit + // However, because we intend to read on the same node, we can limit + // this to a clock reading from the local clock, relying on the fact + // that an observed timestamp from this node will limit our local + // uncertainty limit when we return to read. // - // NOTE: GlobalUncertaintyLimit is effectively synthetic because it does - // not come from an HLC clock, but it does not currently get marked as - // so. See the comment in roachpb.MakeTransaction. This synthetic flag - // is then removed if we call Backward(clock.Now()) below. - uncertaintyLimit := req.Txn.GlobalUncertaintyLimit.WithSynthetic(true) - if !h.Timestamp.Synthetic { - // Because we intend to read on the same node, we can limit this to a - // clock reading from the local clock, relying on the fact that an - // observed timestamp from this node will limit our local uncertainty - // limit when we return to read. - // - // We intentionally do not use an observed timestamp directly to limit - // the push timestamp, because observed timestamps are not applicable in - // some cases (e.g. across lease changes). So to avoid an infinite loop - // where we continue to push to an unusable observed timestamp and - // continue to find the pushee in our uncertainty interval, we instead - // use the present time to limit the push timestamp, which is less - // optimal but is guaranteed to progress. - // - // There is some inherent raciness here, because the lease may move - // between when we push and when we later read. In such cases, we may - // need to push again, but expect to eventually succeed in reading, - // either after lease movement subsides or after the reader's read - // timestamp surpasses its global uncertainty limit. - // - // However, this argument only holds if we expect to be able to use a - // local uncertainty limit when we return to read the pushed intent. - // Notably, local uncertainty limits can not be used to ignore intents - // with synthetic timestamps that would otherwise be in a reader's - // uncertainty interval. This is because observed timestamps do not - // apply to intents/values with synthetic timestamps. So if we know - // that we will be pushing an intent to a synthetic timestamp, we - // don't limit the value to a clock reading from the local clock. - uncertaintyLimit.Backward(w.clock.Now()) - } + // We intentionally do not use an observed timestamp directly to limit + // the push timestamp, because observed timestamps are not applicable in + // some cases (e.g. across lease changes). So to avoid an infinite loop + // where we continue to push to an unusable observed timestamp and + // continue to find the pushee in our uncertainty interval, we instead + // use the present time to limit the push timestamp, which is less + // optimal but is guaranteed to progress. + // + // There is some inherent raciness here, because the lease may move + // between when we push and when we later read. In such cases, we may + // need to push again, but expect to eventually succeed in reading, + // either after lease movement subsides or after the reader's read + // timestamp surpasses its global uncertainty limit. + uncertaintyLimit.Backward(w.clock.Now()) h.Timestamp.Forward(uncertaintyLimit) } return h diff --git a/pkg/kv/kvserver/concurrency/lock_table_waiter_test.go b/pkg/kv/kvserver/concurrency/lock_table_waiter_test.go index 770158634cc7..dbbc298868de 100644 --- a/pkg/kv/kvserver/concurrency/lock_table_waiter_test.go +++ b/pkg/kv/kvserver/concurrency/lock_table_waiter_test.go @@ -143,96 +143,85 @@ func TestLockTableWaiterWithTxn(t *testing.T) { defer log.Scope(t).Close(t) ctx := context.Background() - testutils.RunTrueAndFalse(t, "synthetic", func(t *testing.T, synthetic bool) { - uncertaintyLimit := hlc.Timestamp{WallTime: 15} - makeReq := func() Request { - txn := makeTxnProto("request") - txn.GlobalUncertaintyLimit = uncertaintyLimit - if synthetic { - txn.ReadTimestamp = txn.ReadTimestamp.WithSynthetic(true) - } - ba := &kvpb.BatchRequest{} - ba.Txn = &txn - ba.Timestamp = txn.ReadTimestamp - return Request{ - Txn: &txn, - Timestamp: ba.Timestamp, - BaFmt: ba, - } - } - - expPushTS := func() hlc.Timestamp { - // If the waiter has a synthetic timestamp, it pushes all the way up to - // its global uncertainty limit, because it won't be able to use a local - // uncertainty limit to ignore a synthetic intent. If the waiter does not - // have a synthetic timestamp, it uses the local clock to bound its push - // timestamp, with the assumption that it will be able to use its local - // uncertainty limit to ignore a non-synthetic intent. For more, see - // lockTableWaiterImpl.pushHeader. - if synthetic { - return uncertaintyLimit.WithSynthetic(true) - } - // NOTE: lockTableWaiterTestClock < uncertaintyLimit - return lockTableWaiterTestClock + uncertaintyLimit := hlc.Timestamp{WallTime: 15} + makeReq := func() Request { + txn := makeTxnProto("request") + txn.GlobalUncertaintyLimit = uncertaintyLimit + ba := &kvpb.BatchRequest{} + ba.Txn = &txn + ba.Timestamp = txn.ReadTimestamp + return Request{ + Txn: &txn, + Timestamp: ba.Timestamp, + BaFmt: ba, } + } - t.Run("state", func(t *testing.T) { - t.Run("waitFor", func(t *testing.T) { - testWaitPush(t, waitFor, makeReq, expPushTS()) - }) - - t.Run("waitForDistinguished", func(t *testing.T) { - testWaitPush(t, waitForDistinguished, makeReq, expPushTS()) - }) - - t.Run("waitElsewhere", func(t *testing.T) { - testWaitPush(t, waitElsewhere, makeReq, expPushTS()) - }) + expPushTS := func() hlc.Timestamp { + // The waiter uses the local clock to bound its push timestamp, with the + // assumption that it will be able to use its local uncertainty limit to + // ignore the intent. For more, see lockTableWaiterImpl.pushHeader. + // + // NOTE: lockTableWaiterTestClock < uncertaintyLimit + return lockTableWaiterTestClock + } - t.Run("waitSelf", func(t *testing.T) { - testWaitNoopUntilDone(t, waitSelf, makeReq) - }) + t.Run("state", func(t *testing.T) { + t.Run("waitFor", func(t *testing.T) { + testWaitPush(t, waitFor, makeReq, expPushTS()) + }) - t.Run("waitQueueMaxLengthExceeded", func(t *testing.T) { - testErrorWaitPush(t, waitQueueMaxLengthExceeded, makeReq, dontExpectPush, reasonWaitQueueMaxLengthExceeded) - }) + t.Run("waitForDistinguished", func(t *testing.T) { + testWaitPush(t, waitForDistinguished, makeReq, expPushTS()) + }) - t.Run("doneWaiting", func(t *testing.T) { - w, _, g, _ := setupLockTableWaiterTest() - defer w.stopper.Stop(ctx) + t.Run("waitElsewhere", func(t *testing.T) { + testWaitPush(t, waitElsewhere, makeReq, expPushTS()) + }) - g.state = waitingState{kind: doneWaiting} - g.notify() + t.Run("waitSelf", func(t *testing.T) { + testWaitNoopUntilDone(t, waitSelf, makeReq) + }) - err := w.WaitOn(ctx, makeReq(), g) - require.Nil(t, err) - }) + t.Run("waitQueueMaxLengthExceeded", func(t *testing.T) { + testErrorWaitPush(t, waitQueueMaxLengthExceeded, makeReq, dontExpectPush, reasonWaitQueueMaxLengthExceeded) }) - t.Run("ctx done", func(t *testing.T) { + t.Run("doneWaiting", func(t *testing.T) { w, _, g, _ := setupLockTableWaiterTest() defer w.stopper.Stop(ctx) - ctxWithCancel, cancel := context.WithCancel(ctx) - go cancel() + g.state = waitingState{kind: doneWaiting} + g.notify() - err := w.WaitOn(ctxWithCancel, makeReq(), g) - require.NotNil(t, err) - require.Equal(t, context.Canceled.Error(), err.GoError().Error()) + err := w.WaitOn(ctx, makeReq(), g) + require.Nil(t, err) }) + }) - t.Run("stopper quiesce", func(t *testing.T) { - w, _, g, _ := setupLockTableWaiterTest() - defer w.stopper.Stop(ctx) + t.Run("ctx done", func(t *testing.T) { + w, _, g, _ := setupLockTableWaiterTest() + defer w.stopper.Stop(ctx) - go func() { - w.stopper.Quiesce(ctx) - }() + ctxWithCancel, cancel := context.WithCancel(ctx) + go cancel() - err := w.WaitOn(ctx, makeReq(), g) - require.NotNil(t, err) - require.IsType(t, &kvpb.NodeUnavailableError{}, err.GetDetail()) - }) + err := w.WaitOn(ctxWithCancel, makeReq(), g) + require.NotNil(t, err) + require.Equal(t, context.Canceled.Error(), err.GoError().Error()) + }) + + t.Run("stopper quiesce", func(t *testing.T) { + w, _, g, _ := setupLockTableWaiterTest() + defer w.stopper.Stop(ctx) + + go func() { + w.stopper.Quiesce(ctx) + }() + + err := w.WaitOn(ctx, makeReq(), g) + require.NotNil(t, err) + require.IsType(t, &kvpb.NodeUnavailableError{}, err.GetDetail()) }) } diff --git a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/uncertainty b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/uncertainty index 9f7bb329b8a1..f0431765c56d 100644 --- a/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/uncertainty +++ b/pkg/kv/kvserver/concurrency/testdata/concurrency_manager/uncertainty @@ -127,75 +127,6 @@ finish req=req1 reset namespace ---- -# ------------------------------------------------------------- -# Same situation as above, only here, the read-only transaction -# has a timestamp that is synthetic, meaning that if it succeeds -# in pushing the intent, the intent will be left with a -# synthetic timestamp. Even though the transaction's uncertainty -# interval extends past present time, the transaction pushes all -# the way to its uncertainty limit and marks the pushTo -# timestamp as "synthetic". See lockTableWaiterImpl.pushHeader. -# ------------------------------------------------------------- - -debug-set-clock ts=135 ----- - -new-txn name=txn1 ts=100,1 epoch=0 ----- - -new-txn name=txn2 ts=120,1? epoch=0 uncertainty-limit=150,1 ----- - -new-request name=req1 txn=txn2 ts=120,1? - get key=k ----- - -sequence req=req1 ----- -[1] sequence req1: sequencing request -[1] sequence req1: acquiring latches -[1] sequence req1: scanning lock table for conflicting locks -[1] sequence req1: sequencing complete, returned guard - -handle-lock-conflict-error req=req1 lease-seq=1 - lock txn=txn1 key=k ----- -[2] handle lock conflict error req1: handled conflicting locks on ‹"k"›, released latches - -debug-lock-table ----- -num=1 - lock: "k" - holder: txn: 00000001-0000-0000-0000-000000000000 epoch: 0, iso: Serializable, ts: 100.000000000,1, info: repl [Intent] - -sequence req=req1 ----- -[3] sequence req1: re-sequencing request -[3] sequence req1: acquiring latches -[3] sequence req1: scanning lock table for conflicting locks -[3] sequence req1: waiting in lock wait-queues -[3] sequence req1: lock wait-queue event: wait for (distinguished) txn 00000001 holding lock @ key ‹"k"› (queuedLockingRequests: 0, queuedReaders: 1) -[3] sequence req1: pushing after 0s for: liveness detection = true, deadlock detection = true, timeout enforcement = false, priority enforcement = false, wait policy error = false -[3] sequence req1: pushing timestamp of txn 00000001 above 150.000000000,1? -[3] sequence req1: blocked on select in concurrency_test.(*cluster).PushTransaction - -on-txn-updated txn=txn1 status=pending ts=150,2? ----- -[-] update txn: increasing timestamp of txn1 -[3] sequence req1: resolving intent ‹"k"› for txn 00000001 with PENDING status and clock observation {1 135.000000000,2} -[3] sequence req1: lock wait-queue event: done waiting -[3] sequence req1: conflicted with ‹00000001-0000-0000-0000-000000000000› on ‹"k"› for 0.000s -[3] sequence req1: acquiring latches -[3] sequence req1: scanning lock table for conflicting locks -[3] sequence req1: sequencing complete, returned guard - -finish req=req1 ----- -[-] finish req1: finishing request - -reset namespace ----- - # ------------------------------------------------------------- # A transactional (txn2) read-only request runs into a replicated # intent below its read timestamp and informs the lock table.