From 82028ecb08727f30602164564ed4a9f73ef2e1ef Mon Sep 17 00:00:00 2001 From: Sergei Politov Date: Thu, 18 Aug 2022 17:59:04 +0300 Subject: [PATCH] [Backport 2.14] [#13659] DocDB: Fix Prepare for the failed transaction Summary: YBTransaction starts with looking up for status tablet. When this lookup is failed the transaction is marked as failed. We lack failure state check at Prepare, so calling Prepare after transaction failure will result in adding callback to waiters list. But no more action will be taken, so callback will never be notified. The side effect is master leader restarts can cause prepared statement connections to hang. This diff fixed the issue by checking failure status in Prepare. Test Plan: Launch AWS cluster. Run the following workload against it: java -jar yb-sample-apps.jar --workload SqlDataLoad --num_writes -1 --num_threads_write 5 --num_threads_read 0 --num_reads 0 --num_unique_keys 100000000000 --batch_size 774 --num_value_columns 8 --nodes $NODES Wait 10 minutes. Stop node, that runs master leader, using AWS console. W/o the fix there will be stuck queries. Original commit: 11cdcc613e0cae37df3ad1beb1c91b76ce4ed95a/D18994 Reviewers: esheng, rthallam Reviewed By: rthallam Subscribers: ybase, rthallam Differential Revision: https://phabricator.dev.yugabyte.com/D19064 --- build-support/post_install.sh | 6 ++++- .../test/java/org/yb/cql/TestTransaction.java | 4 ++-- src/yb/client/client.cc | 1 + src/yb/client/transaction.cc | 22 +++++++++---------- src/yb/client/transaction.h | 4 ---- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/build-support/post_install.sh b/build-support/post_install.sh index 51b846daeeaa..f9889d49a77d 100755 --- a/build-support/post_install.sh +++ b/build-support/post_install.sh @@ -152,7 +152,11 @@ if [[ $install_mode == "true" ]]; then exit 1 fi - ln -sfT "$linuxbrew_dir" "$BREW_HOME" + if [[ "$linuxbrew_dir" != "$BREW_HOME" ]]; then + ln -sfT "$linuxbrew_dir" "$BREW_HOME" + else + echo "Skipping linuxbrew symlink, since it already has necessary length: $linuxbrew_dir" + fi # We are relying on the fact that $distribution_dir is not a symlink. We don't want to add symlink # resolution to the find command because someone may accidentally add a symlink pointing to a diff --git a/java/yb-cql/src/test/java/org/yb/cql/TestTransaction.java b/java/yb-cql/src/test/java/org/yb/cql/TestTransaction.java index fede2ffa33d4..1ad54b2520ab 100644 --- a/java/yb-cql/src/test/java/org/yb/cql/TestTransaction.java +++ b/java/yb-cql/src/test/java/org/yb/cql/TestTransaction.java @@ -624,11 +624,11 @@ public void testTimeout() throws Exception { LOG.info("Initial expired transactions = {}", initialExpiredTransactions); try { - thrown.expect(com.datastax.driver.core.exceptions.OperationTimedOutException.class); + thrown.expect(com.datastax.driver.core.exceptions.DriverException.class); session.execute("begin transaction" + " insert into test_timeout (k, v) values (1, 1);" + "end transaction;"); - } catch (com.datastax.driver.core.exceptions.OperationTimedOutException e) { + } catch (com.datastax.driver.core.exceptions.DriverException e) { int currentExpiredTransactions = getExpiredTransactionsCount(); LOG.info("Current expired transactions = {}", currentExpiredTransactions); assertTrue(currentExpiredTransactions > initialExpiredTransactions); diff --git a/src/yb/client/client.cc b/src/yb/client/client.cc index 226d8e1eb6cf..c093b642b311 100644 --- a/src/yb/client/client.cc +++ b/src/yb/client/client.cc @@ -487,6 +487,7 @@ Status YBClientBuilder::DoBuild(rpc::Messenger* messenger, std::unique_ptrclient_name_ + "cb"); tpb.set_max_threads(narrow_cast(callback_threadpool_size)); + tpb.set_min_threads(1); std::unique_ptr tp; RETURN_NOT_OK_PREPEND( tpb.Build(&tp), diff --git a/src/yb/client/transaction.cc b/src/yb/client/transaction.cc index 33425cbc0c63..24aeafa26626 100644 --- a/src/yb/client/transaction.cc +++ b/src/yb/client/transaction.cc @@ -319,6 +319,16 @@ class YBTransaction::Impl final : public internal::TxnBatcherIf { } const bool defer = !ready_ || *promotion_started; + if (!status_.ok()) { + auto status = status_; + lock.unlock(); + VLOG_WITH_PREFIX(2) << "Prepare, transaction already failed: " << status; + if (waiter) { + waiter(status); + } + return false; + } + if (!defer || initial) { PrepareOpsGroups(initial, ops_info->groups); } @@ -800,11 +810,6 @@ class YBTransaction::Impl final : public internal::TxnBatcherIf { return metadata_; } - void StartHeartbeat() { - VLOG_WITH_PREFIX(2) << __PRETTY_FUNCTION__; - RequestStatusTablet(TransactionRpcDeadline()); - } - void SetActiveSubTransaction(SubTransactionId id) { VLOG_WITH_PREFIX(4) << "set active sub txn=" << id << ", subtransaction_=" << subtransaction_.ToString(); @@ -2122,13 +2127,6 @@ Trace* YBTransaction::trace() { return impl_->trace(); } -YBTransactionPtr YBTransaction::Take( - TransactionManager* manager, const TransactionMetadata& metadata) { - auto result = std::make_shared(manager, metadata, PrivateOnlyTag()); - result->impl_->StartHeartbeat(); - return result; -} - void YBTransaction::SetActiveSubTransaction(SubTransactionId id) { return impl_->SetActiveSubTransaction(id); } diff --git a/src/yb/client/transaction.h b/src/yb/client/transaction.h index c9e029ba7f12..899d9f3cd993 100644 --- a/src/yb/client/transaction.h +++ b/src/yb/client/transaction.h @@ -149,10 +149,6 @@ class YBTransaction : public std::enable_shared_from_this { // So this transaction could be used by some other application instance. Result Release(); - // Creates transaction by metadata, could be used in pair with release to transfer transaction - // between application instances. - static YBTransactionPtr Take(TransactionManager* manager, const TransactionMetadata& metadata); - void SetActiveSubTransaction(SubTransactionId id); Status RollbackToSubTransaction(SubTransactionId id, CoarseTimePoint deadline);