-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[#22135] YSQL: Avoid read restart errors with ANALYZE
Summary: In the current state of the database, ANALYZE can run for a long time on large tables. This long duration increases the chances of errors. We want to minimize such error situations since running analyze again when there is an error can be expensive. First, we tackle the read restart errors. ANALYZE does not require strict read-after-commit-visibility guarantee, i.e. slightly stale reads are not an issue for the ANALYZE operation. Therefore, we want to avoid these errors for ANALYZE in particular. For this reason, we do not use an ambiguity window (i.e. collapse the ambiguity window to a single point) for ANALYZE. Moreover, in the current state of the database, DDLs are executed in a "special" transaction separate from the usual transaction code path. This means that multi-table ANALYZE operations such as `ANALYZE;` use a single read point for the entirety of the operation. This is undesirable since there may be a lot of tables in the database and that increases the risk of a snapshot too old error. For this reason, we explicitly pass a fresh read time for ANALYZE of each table from the Pg layer to the tserver proxy. Pg does not exhibit this problem since (a) it runs ANALYZE of each table in a separate transaction (b) it does not cleanup MVCC records that are in use. Jira: DB-11062 Test Plan: Jenkins #### Test 1 ``` ./yb_build.sh --cxx-test pg_analyze_read_time-test --gtest_filter PgAnalyzeReadTimeTest.InsertRowsConcurrentlyWithAnalyze ``` Insert rows concurrently with analyze to trigger read restart errors. #### Test 2 ``` ./yb_build.sh --cxx-test pg_analyze_read_time-test --gtest_filter PgAnalyzeReadTimeTest.AnalyzeMultipleTables ``` Analyze two tables and do a full compaction between the two analyze. #### Test 3 ```lang=sh $ ./bin/ysqlsh yugabyte=# create table keys(k int); CREATE TABLE ... concurrently insert rows using ysql_bench and wait for a while yugabyte=# analyze keys; ... fails with a read restart error prior to this change but not with this change. ``` To insert rows concurrently use the following sql script ```name=insert.sql,lang=sql \set random_id random(1, 1000000) INSERT INTO keys (k) VALUES (:random_id); ``` Ran ysql_bench using ```lang=sh build/latest/postgres/bin/ysql_bench -t 100000 -f ../insert.sql -n -R 200 ``` Reviewers: pjain, bkolagani, yguan Reviewed By: bkolagani, yguan Subscribers: ybase, smishra, svc_phabricator, steve.varnau, yql Differential Revision: https://phorge.dev.yugabyte.com/D37648
- Loading branch information
Showing
5 changed files
with
173 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
// Copyright (c) YugabyteDB, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
// in compliance with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software distributed under the License | ||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
// or implied. See the License for the specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#include <atomic> | ||
#include <thread> | ||
|
||
#include "yb/common/pgsql_error.h" | ||
#include "yb/util/flags.h" | ||
|
||
#include "yb/yql/pgwrapper/pg_mini_test_base.h" | ||
#include "yb/yql/pgwrapper/pg_test_utils.h" | ||
|
||
DECLARE_string(ysql_pg_conf_csv); | ||
DECLARE_string(ysql_log_statement); | ||
DECLARE_bool(ysql_beta_features); | ||
DECLARE_string(vmodule); | ||
DECLARE_int32(timestamp_history_retention_interval_sec); | ||
DECLARE_int64(TEST_delay_after_table_analyze_ms); | ||
|
||
namespace yb::pgwrapper { | ||
|
||
class PgAnalyzeReadTimeTest : public PgMiniTestBase { | ||
public: | ||
void SetUp() override { | ||
// ANALYZE is a beta feature. | ||
ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_beta_features) = true; | ||
// Easier debugging. | ||
// ASSERT_OK(SET_FLAG(vmodule, "read_query=1")); | ||
ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_log_statement) = "all"; | ||
PgMiniTestBase::SetUp(); | ||
} | ||
}; | ||
|
||
class PgAnalyzeNoReadRestartsTest : public PgAnalyzeReadTimeTest { | ||
public: | ||
void SetUp() override { | ||
// So that read restart errors are not retried internally. | ||
ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_pg_conf_csv) = | ||
MaxQueryLayerRetriesConf(0); | ||
PgAnalyzeReadTimeTest::SetUp(); | ||
} | ||
}; | ||
|
||
TEST_F_EX(PgAnalyzeReadTimeTest, InsertRowsConcurrentlyWithAnalyze, PgAnalyzeNoReadRestartsTest) { | ||
constexpr auto kNumInitialRows = 100000; | ||
|
||
// Create table with keys from 1 to kNumInitialRows. | ||
auto setup_conn = ASSERT_RESULT(Connect()); | ||
ASSERT_OK(setup_conn.Execute("CREATE TABLE keys (k INT) SPLIT INTO 3 TABLETS")); | ||
ASSERT_OK(setup_conn.ExecuteFormat( | ||
"INSERT INTO keys(k) SELECT GENERATE_SERIES(1, $0)", kNumInitialRows)); | ||
|
||
// Warm the catalog cache so that subsequent inserts are fast. | ||
// Unfortunately, this is necessary because this test depends on timing. | ||
auto insert_conn = ASSERT_RESULT(Connect()); | ||
auto key = kNumInitialRows; | ||
// Populates catalog cache. | ||
key++; | ||
ASSERT_OK(insert_conn.ExecuteFormat( | ||
"INSERT INTO keys(k) VALUES ($0)", key)); | ||
|
||
std::atomic<bool> stop{false}; | ||
CountDownLatch begin_analyze(1); | ||
auto analyze_conn = ASSERT_RESULT(Connect()); | ||
auto analyze_status_future = std::async(std::launch::async, [&] { | ||
begin_analyze.Wait(); | ||
auto status = analyze_conn.Execute("ANALYZE keys"); | ||
stop.store(true); | ||
return status; | ||
}); | ||
|
||
begin_analyze.CountDown(); | ||
while (!stop.load() && key < kNumInitialRows + 100) { | ||
key++; | ||
ASSERT_OK(insert_conn.ExecuteFormat( | ||
"INSERT INTO keys(k) VALUES ($0)", key)); | ||
|
||
// Throttle inserts to avoid overloading the system. | ||
std::this_thread::sleep_for(10ms); | ||
} | ||
|
||
ASSERT_OK(analyze_status_future.get()); | ||
} | ||
|
||
class PgAnalyzeMultiTableTest : public PgAnalyzeReadTimeTest { | ||
public: | ||
void SetUp() override { | ||
ANNOTATE_UNPROTECTED_WRITE( | ||
FLAGS_timestamp_history_retention_interval_sec) = 0; | ||
// This test is timing based and 10s provides enough time for compaction. | ||
ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_delay_after_table_analyze_ms) = 10000; | ||
PgAnalyzeReadTimeTest::SetUp(); | ||
} | ||
}; | ||
|
||
TEST_F_EX(PgAnalyzeReadTimeTest, AnalyzeMultipleTables, PgAnalyzeMultiTableTest) { | ||
constexpr auto kNumInitialRows = 10000; | ||
|
||
// Create table with keys from 1 to kNumInitialRows. | ||
auto setup_conn = ASSERT_RESULT(Connect()); | ||
ASSERT_OK(setup_conn.Execute("CREATE TABLE keys (k INT)")); | ||
ASSERT_OK(setup_conn.ExecuteFormat( | ||
"INSERT INTO keys(k) SELECT GENERATE_SERIES(1, $0)", kNumInitialRows)); | ||
ASSERT_OK(setup_conn.Execute("CREATE TABLE values (v INT)")); | ||
ASSERT_OK(setup_conn.ExecuteFormat( | ||
"INSERT INTO values(v) SELECT GENERATE_SERIES(1, $0)", kNumInitialRows)); | ||
|
||
auto update_conn = ASSERT_RESULT(Connect()); | ||
auto analyze_conn = ASSERT_RESULT(Connect()); | ||
|
||
CountDownLatch update_thread_started(1); | ||
auto update_status_future = std::async(std::launch::async, [&] { | ||
update_thread_started.CountDown(); | ||
auto status = update_conn.Execute("UPDATE values SET v = v + 1"); | ||
FlushAndCompactTablets(); | ||
LOG(INFO) << "Compaction done!"; | ||
return status; | ||
}); | ||
|
||
update_thread_started.Wait(); | ||
auto analyze_status = analyze_conn.Execute("ANALYZE keys, values"); | ||
ASSERT_OK(analyze_status); | ||
LOG(INFO) << "Analyze done!"; | ||
|
||
ASSERT_OK(update_status_future.get()); | ||
} | ||
|
||
} // namespace yb::pgwrapper |