From 4aab367743bfb6436f82bfd966f50a9af0bd6159 Mon Sep 17 00:00:00 2001 From: MyonKeminta <9948422+MyonKeminta@users.noreply.github.com> Date: Fri, 8 Nov 2024 19:54:34 +0800 Subject: [PATCH] Fix the problem that statement being oom-killed within DoneAggressiveLocking causing the transaction still in aggressive locking state (#1355) (#1487) Co-authored-by: MyonKeminta --- txnkv/transaction/txn.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/txnkv/transaction/txn.go b/txnkv/transaction/txn.go index f9873a33f..2bfd7da71 100644 --- a/txnkv/transaction/txn.go +++ b/txnkv/transaction/txn.go @@ -735,6 +735,18 @@ func (txn *KVTxn) CancelAggressiveLocking(ctx context.Context) { if txn.aggressiveLockingContext == nil { panic("Trying to cancel aggressive locking while it's not started") } + + // Unset `aggressiveLockingContext` in a defer block to ensure it must be executed even it panicked on the half way. + // It's because that if it's panicked by an OOM-kill of TiDB, it can then be recovered and the user can still + // continue using the transaction's state. + // The usage of `defer` can be removed once we have other way to avoid the panicking. + // See: https://github.com/pingcap/tidb/issues/53540#issuecomment-2138089140 + // Currently the problem only exists in `DoneAggressiveLocking`, but we do the same to `CancelAggressiveLocking` + // to the two function consistent, and prevent for new panics that might be introduced in the future. + defer func() { + txn.aggressiveLockingContext = nil + }() + txn.cleanupAggressiveLockingRedundantLocks(context.Background()) if txn.aggressiveLockingContext.assignedPrimaryKey { txn.resetPrimary() @@ -754,7 +766,6 @@ func (txn *KVTxn) CancelAggressiveLocking(ctx context.Context) { txn.asyncPessimisticRollback(context.Background(), keys, forUpdateTS) txn.lockedCnt -= len(keys) } - txn.aggressiveLockingContext = nil } // DoneAggressiveLocking finishes the current aggressive locking. The locked keys will be moved to the membuffer as if @@ -763,6 +774,16 @@ func (txn *KVTxn) DoneAggressiveLocking(ctx context.Context) { if txn.aggressiveLockingContext == nil { panic("Trying to finish aggressive locking while it's not started") } + + // Unset `aggressiveLockingContext` in a defer block to ensure it must be executed even it panicked on the half way. + // It's because that if it's panicked by an OOM-kill of TiDB, it can then be recovered and the user can still + // continue using the transaction's state. + // The usage of `defer` can be removed once we have other way to avoid the panicking. + // See: https://github.com/pingcap/tidb/issues/53540#issuecomment-2138089140 + defer func() { + txn.aggressiveLockingContext = nil + }() + txn.cleanupAggressiveLockingRedundantLocks(context.Background()) if txn.forUpdateTSChecks == nil { @@ -787,7 +808,6 @@ func (txn *KVTxn) DoneAggressiveLocking(ctx context.Context) { txn.committer.maxLockedWithConflictTS = txn.aggressiveLockingContext.maxLockedWithConflictTS } } - txn.aggressiveLockingContext = nil } // IsInAggressiveLockingMode checks if the transaction is currently in aggressive locking mode.