From df2f4f276c1971089093ea24269d5237c3f2df84 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Mon, 27 Jan 2020 18:49:17 -0500 Subject: [PATCH] storage: fix handling of refreshed timestamp Before this patch, the refresher interceptor was erroneously asserting its tracking of the refreshed timestamp is in sync with the TxnCoordSender. It may, in fact, not be in sync in edge cases where a refresh succeeded but the TxnCoordSender doesn't hear about that success. Touches #38156 Touches #41941 Touches #43707 Release note: None --- pkg/kv/txn_interceptor_span_refresher.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pkg/kv/txn_interceptor_span_refresher.go b/pkg/kv/txn_interceptor_span_refresher.go index be2fa9614ec5..12f5befa59ef 100644 --- a/pkg/kv/txn_interceptor_span_refresher.go +++ b/pkg/kv/txn_interceptor_span_refresher.go @@ -142,13 +142,21 @@ func (sr *txnSpanRefresher) SendLocked( // refreshes shouldn't check values below batchReadTimestamp, so initialize // sr.refreshedTimestamp. sr.refreshedTimestamp = batchReadTimestamp + } else if batchReadTimestamp.Less(sr.refreshedTimestamp) { + // sr.refreshedTimestamp might be ahead of batchReadTimestamp. We want to + // read at the latest refreshed timestamp, so bump the batch. + // batchReadTimestamp can be behind after a successful refresh, if the + // TxnCoordSender hasn't actually heard about the updated read timestamp. + // This can happen if a refresh succeeds, but then the retry of the batch + // that produced the timestamp fails without returning the update txn (for + // example, through a canceled ctx). The client should only be sending + // rollbacks in such cases. + ba.Txn.ReadTimestamp.Forward(sr.refreshedTimestamp) + ba.Txn.WriteTimestamp.Forward(sr.refreshedTimestamp) } else if sr.refreshedTimestamp != batchReadTimestamp { - // Sanity check: we're supposed to control the read timestamp. What we're - // tracking in sr.refreshedTimestamp is not supposed to get out of sync - // with what batches use (which comes from tc.mu.txn). - return nil, roachpb.NewError(errors.AssertionFailedf( - "unexpected batch read timestamp: %s. Expected refreshed timestamp: %s. ba: %s", - batchReadTimestamp, sr.refreshedTimestamp, ba)) + log.Fatalf(ctx, + "unexpected batch read timestamp: %s. Expected refreshed timestamp: %s. ba: %s. txn: %s", + batchReadTimestamp, sr.refreshedTimestamp, ba, ba.Txn) } if rArgs, hasET := ba.GetArg(roachpb.EndTxn); hasET {