diff --git a/src/yb/client/meta_cache.cc b/src/yb/client/meta_cache.cc index 66b512b0d6c0..bd1c08886984 100644 --- a/src/yb/client/meta_cache.cc +++ b/src/yb/client/meta_cache.cc @@ -110,6 +110,8 @@ DEFINE_test_flag(bool, force_master_lookup_all_tablets, false, "If set, force the client to go to the master for all tablet lookup " "instead of reading from cache."); +DEFINE_test_flag(int32, sleep_before_metacache_lookup_ms, 0, + "If set, will sleep in LookupTabletByKey for a random amount up to this value."); DEFINE_test_flag(double, simulate_lookup_timeout_probability, 0, "If set, mark an RPC as failed and force retry on the first attempt."); DEFINE_test_flag(double, simulate_lookup_partition_list_mismatch_probability, 0, @@ -1883,9 +1885,12 @@ bool MetaCache::DoLookupTabletByKey( LookupTabletCallback* callback, PartitionGroupStartKeyPtr* partition_group_start) { DCHECK_ONLY_NOTNULL(partition_group_start); RemoteTabletPtr tablet; - auto scope_exit = ScopeExit([callback, &tablet] { + Status status = Status::OK(); + auto scope_exit = ScopeExit([callback, &tablet, &status] { if (tablet) { (*callback)(tablet); + } else if (!status.ok()) { + (*callback)(status); } }); int64_t request_no; @@ -1912,13 +1917,13 @@ bool MetaCache::DoLookupTabletByKey( (PREDICT_FALSE(RandomActWithProbability( FLAGS_TEST_simulate_lookup_partition_list_mismatch_probability)) && table->table_type() != YBTableType::TRANSACTION_STATUS_TABLE_TYPE)) { - (*callback)(STATUS( + status = STATUS( TryAgain, Format( "MetaCache's table $0 partitions version does not match, cached: $1, got: $2, " "refresh required", table->ToString(), table_data->partition_list->version, partitions->version), - ClientError(ClientErrorCode::kTablePartitionListIsStale))); + ClientError(ClientErrorCode::kTablePartitionListIsStale)); return true; } @@ -2025,6 +2030,12 @@ void MetaCache::LookupTabletByKey(const std::shared_ptr& table, return; } + if (FLAGS_TEST_sleep_before_metacache_lookup_ms > 0) { + MonoDelta sleep_time = MonoDelta::FromMilliseconds(1) * + RandomUniformInt(1, FLAGS_TEST_sleep_before_metacache_lookup_ms); + SleepFor(sleep_time); + VLOG_WITH_FUNC(2) << "Slept for " << sleep_time; + } if (table->ArePartitionsStale()) { RefreshTablePartitions( table, @@ -2118,9 +2129,12 @@ bool MetaCache::DoLookupTabletById( UseCache use_cache, LookupTabletCallback* callback) { std::optional tablet = std::nullopt; - auto scope_exit = ScopeExit([callback, &tablet] { + Status status = Status::OK(); + auto scope_exit = ScopeExit([callback, &tablet, &status] { if (tablet) { (*callback)(*tablet); + } else if (!status.ok()) { + (*callback)(status); } }); int64_t request_no; @@ -2136,7 +2150,7 @@ bool MetaCache::DoLookupTabletById( if (use_cache) { if (!include_deleted) { tablet = std::nullopt; - (*callback)(STATUS(NotFound, "Tablet deleted")); + status = STATUS(NotFound, "Tablet deleted"); } return true; } diff --git a/src/yb/client/ql-stress-test.cc b/src/yb/client/ql-stress-test.cc index 5365ac7d813a..6a4e80c4b77f 100644 --- a/src/yb/client/ql-stress-test.cc +++ b/src/yb/client/ql-stress-test.cc @@ -74,8 +74,10 @@ DECLARE_bool(detect_duplicates_for_retryable_requests); DECLARE_bool(enable_ondisk_compression); DECLARE_bool(ycql_enable_packed_row); DECLARE_double(TEST_respond_write_failed_probability); +DECLARE_double(TEST_simulate_lookup_partition_list_mismatch_probability); DECLARE_double(transaction_max_missed_heartbeat_periods); DECLARE_int32(TEST_max_write_waiters); +DECLARE_int32(TEST_sleep_before_metacache_lookup_ms); DECLARE_int32(client_read_write_timeout_ms); DECLARE_int32(log_cache_size_limit_mb); DECLARE_int32(log_min_seconds_to_retain); @@ -441,6 +443,12 @@ TEST_F(QLStressTest, RetryWritesWithRestarts) { TestRetryWrites(true /* restarts */); } +TEST_F(QLStressTest, ReproMetaCacheDeadlock) { + ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_simulate_lookup_partition_list_mismatch_probability) = 0.8; + ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_sleep_before_metacache_lookup_ms) = 50; + TestRetryWrites(true /* restarts */); +} + void SetTransactional(YBSchemaBuilder* builder) { TableProperties table_properties; table_properties.SetTransactional(true);