From 4017745693546e9ab1d29ff7cc7f4ba4e1acc102 Mon Sep 17 00:00:00 2001 From: Minghui Yang Date: Mon, 9 Sep 2024 20:38:44 +0000 Subject: [PATCH] [BACKPORT 2.20][#23786] YSQL: yb_make_next_ddl_statement_nonincrementing Summary: In a recent customer issue investigation, the customer performed an application upgrade, which involves running many DDLs (100+) consecutively. Many of these DDLs cause catalog version to increment. As a result, all the active connections kept doing catalog cache refreshes continuously that led to PG memory spike and latency spike. It will be beneficial to reduce the number of DDLs that increment the catalog version. After analyzing customer's DDL statements that have caused catalog version to increment, they can be classified as 3 categories: (1) create index statements that build indexes on a newly created table (2) create new partition statements and link them to the existing parent partition (3) create new table but referencing an existing table via foreign key For (1), because the table itself is newly created, there is no need for the create index statement to increment the catalog version. To make things worse, a create index statement by default runs concurrently, and increments the catalog version by 3. In this particular customer's situation, more than half of the DDLs belong to category (1). One solution is to combine create statement and the create index statement together. Using an example found online: ``` yugabyte=# create table foo ( yugabyte(# id serial primary key, yugabyte(# code integer, yugabyte(# label text); CREATE TABLE yugabyte=# create unique index foo_idx on foo using btree (code, label); NOTICE: index method "btree" was replaced with "lsm" in YugabyteDB CREATE INDEX yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 4 | 1 13252 | 1 | 1 (5 rows) ``` By create table and then create a unique index, the catalog version was incremented by 3. On the other hand, we can combine them into one create table statement: ``` yugabyte=# create table foo ( yugabyte(# id serial primary key, yugabyte(# code integer, yugabyte(# label text, yugabyte(# constraint foo_uq unique (code, label)); select *CREATE TABLE yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 1 | 1 13252 | 1 | 1 (5 rows) ``` We can see that there was no catalog version increment. However it is not always possible to do these rewrite. The create index statement does allow more options, such as specifying ASC, DESC which isn't allowed when declaring a uniqe constraint in the create table statement itself. It is in such a case we introduce the new GUC `yb_make_next_ddl_statement_nonincrementing` to help. This is an auto-reset gflag that is done similar to the existing GUC `yb_make_next_ddl_statement_nonbreaking`. When set to true, it will suppress the next DDL statement from incrementing both the `current_version` and the `last_breaking_version`. Note that in order for the GUC to work for create index statement, we need to use the nonconcurrent keyword. By default the create index statement runs concurrently and its algorithm involves incrementing the catalog version by 3 to work correctly. A create index noncurrently only bumps up the catalog version by 1. For a newly created table, because it is empty, it is safe and correct to run create index nonconcurrently. For example, ``` yugabyte=# create table foo(id int); CREATE TABLE yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 1 | 1 13252 | 1 | 1 (5 rows) yugabyte=# create index nonconcurrently id_idx on foo(id); CREATE INDEX yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 2 | 1 13252 | 1 | 1 (5 rows) ``` With the new GUC `yb_make_next_ddl_statement_nonincrementing`, the catalog version will stay the same: ``` yugabyte=# create table foo(id int); CREATE TABLE yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 1 | 1 13252 | 1 | 1 (5 rows) yugabyte=# set yb_make_next_ddl_statement_nonincrementing = true; SET yugabyte=# create index nonconcurrently id_idx on foo(id); CREATE INDEX yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 1 | 1 13252 | 1 | 1 (5 rows) ``` A new unit test is added. Jira: DB-12689 Original commit: 063dbe5dc2b986c089c461dcfb6b471a607277f5 / D37915 Test Plan: ./yb_build.sh --cxx-test pg_catalog_version-test --gtest_filter PgCatalogVersionTest.NonIncrementingDDLMode Reviewers: kfranz Reviewed By: kfranz Subscribers: yql, svc_phabricator Differential Revision: https://phorge.dev.yugabyte.com/D38128 --- src/postgres/src/backend/utils/misc/guc.c | 12 ++ .../src/backend/utils/misc/pg_yb_utils.c | 25 +++- src/postgres/src/include/pg_yb_utils.h | 6 + .../yql/pgwrapper/pg_catalog_version-test.cc | 119 ++++++++++++++++++ 4 files changed, 159 insertions(+), 3 deletions(-) diff --git a/src/postgres/src/backend/utils/misc/guc.c b/src/postgres/src/backend/utils/misc/guc.c index fc3fe35ab30e..8afc9883c2fa 100644 --- a/src/postgres/src/backend/utils/misc/guc.c +++ b/src/postgres/src/backend/utils/misc/guc.c @@ -2281,6 +2281,18 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"yb_make_next_ddl_statement_nonincrementing", PGC_SUSET, CUSTOM_OPTIONS, + gettext_noop("When set, the next ddl statement will not cause " + "catalog version to increment. This only affects " + "the next ddl statement and resets automatically."), + NULL + }, + &yb_make_next_ddl_statement_nonincrementing, + false, + NULL, NULL, NULL + }, + { {"yb_plpgsql_disable_prefetch_in_for_query", PGC_USERSET, QUERY_TUNING, gettext_noop("Disable prefetching in a PLPGSQL FOR loop over a query."), diff --git a/src/postgres/src/backend/utils/misc/pg_yb_utils.c b/src/postgres/src/backend/utils/misc/pg_yb_utils.c index c43be0d29442..5c154c8e20cf 100644 --- a/src/postgres/src/backend/utils/misc/pg_yb_utils.c +++ b/src/postgres/src/backend/utils/misc/pg_yb_utils.c @@ -1338,6 +1338,7 @@ bool yb_enable_index_aggregate_pushdown = true; bool yb_enable_optimizer_statistics = false; bool yb_bypass_cond_recheck = true; bool yb_make_next_ddl_statement_nonbreaking = false; +bool yb_make_next_ddl_statement_nonincrementing = false; bool yb_plpgsql_disable_prefetch_in_for_query = false; bool yb_enable_sequence_pushdown = true; bool yb_disable_wait_for_backends_catalog_version = false; @@ -1442,13 +1443,19 @@ typedef struct DdlTransactionState { static DdlTransactionState ddl_transaction_state = {0}; static void -YBResetEnableNonBreakingDDLMode() +YBResetEnableSpecialDDLMode() { /* * Reset yb_make_next_ddl_statement_nonbreaking to avoid its further side * effect that may not be intended. */ yb_make_next_ddl_statement_nonbreaking = false; + + /* + * Reset yb_make_next_ddl_statement_nonincrementing to avoid its further side + * effect that may not be intended. + */ + yb_make_next_ddl_statement_nonincrementing = false; } /* @@ -1492,7 +1499,7 @@ YBResetDdlState() status = YbMemCtxReset(ddl_transaction_state.mem_context); } ddl_transaction_state = (struct DdlTransactionState){0}; - YBResetEnableNonBreakingDDLMode(); + YBResetEnableSpecialDDLMode(); HandleYBStatus(YBCPgClearSeparateDdlTxnMode()); HandleYBStatus(status); } @@ -1550,7 +1557,7 @@ YBDecrementDdlNestingLevel() */ ddl_transaction_state.mem_context = NULL; - YBResetEnableNonBreakingDDLMode(); + YBResetEnableSpecialDDLMode(); bool is_catalog_version_increment = ddl_transaction_state.is_catalog_version_increment; bool is_breaking_catalog_change = ddl_transaction_state.is_breaking_catalog_change; bool is_global_ddl = ddl_transaction_state.is_global_ddl; @@ -2080,6 +2087,18 @@ bool IsTransactionalDdlStatement(PlannedStmt *pstmt, if (yb_make_next_ddl_statement_nonbreaking) *is_breaking_catalog_change = false; + /* + * If yb_make_next_ddl_statement_nonincrementing is true, then no DDL statement + * will cause a catalog version to increment. Note that we also disable breaking + * catalog change as well because it does not make sense to only increment + * breaking breaking catalog version. + */ + if (yb_make_next_ddl_statement_nonincrementing) + { + *is_catalog_version_increment = false; + *is_breaking_catalog_change = false; + } + /* * For DDL, it does not make sense to get breaking catalog change without * catalog version increment. diff --git a/src/postgres/src/include/pg_yb_utils.h b/src/postgres/src/include/pg_yb_utils.h index 0df3810d1c99..989d9d7056a5 100644 --- a/src/postgres/src/include/pg_yb_utils.h +++ b/src/postgres/src/include/pg_yb_utils.h @@ -473,6 +473,12 @@ extern bool yb_bypass_cond_recheck; */ extern bool yb_make_next_ddl_statement_nonbreaking; +/* + * Enables nonincrementing DDL mode in which a DDL statement is considered as a + * "same version DDL" and therefore will not cause catalog version to increment. + */ +extern bool yb_make_next_ddl_statement_nonincrementing; + /* * Allows capability to disable prefetching in a PLPGSQL FOR loop over a query. * This is introduced for some test(s) with lazy evaluation in READ COMMITTED diff --git a/src/yb/yql/pgwrapper/pg_catalog_version-test.cc b/src/yb/yql/pgwrapper/pg_catalog_version-test.cc index 865b4d252e1e..76641d087206 100644 --- a/src/yb/yql/pgwrapper/pg_catalog_version-test.cc +++ b/src/yb/yql/pgwrapper/pg_catalog_version-test.cc @@ -48,6 +48,16 @@ class PgCatalogVersionTest : public LibPqTestBase { "--allowed_preview_flags_csv=ysql_enable_db_catalog_version_mode"); } + Result GetCatalogVersion(PGConn* conn, bool per_database_mode = false) { + if (per_database_mode) { + const auto db_oid = VERIFY_RESULT(conn->FetchValue(Format( + "SELECT oid FROM pg_database WHERE datname = '$0'", PQdb(conn->get())))); + return conn->FetchValue( + Format("SELECT current_version FROM pg_yb_catalog_version where db_oid = $0", db_oid)); + } + return conn->FetchValue("SELECT current_version FROM pg_yb_catalog_version"); + } + // Prepare the table pg_yb_catalog_version according to 'per_database_mode': // * if 'per_database_mode' is true, we prepare table pg_yb_catalog_version // for per-database catalog version mode by updating the table to have one @@ -1394,6 +1404,115 @@ TEST_F(PgCatalogVersionTest, NonBreakingDDLMode) { ASSERT_OK(conn1.Execute("ABORT")); } +TEST_F(PgCatalogVersionTest, NonIncrementingDDLMode) { + const string kDatabaseName = "yugabyte"; + + auto conn = ASSERT_RESULT(ConnectToDB(kDatabaseName)); + ASSERT_OK(conn.Execute("CREATE TABLE t1(a int)")); + auto version = ASSERT_RESULT(GetCatalogVersion(&conn)); + + // REVOKE bumps up the catalog version by 1. + ASSERT_OK(conn.Execute("REVOKE SELECT ON t1 FROM public")); + auto new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + // GRANT bumps up the catalog version by 1. + ASSERT_OK(conn.Execute("GRANT SELECT ON t1 TO public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + ASSERT_OK(conn.Execute("CREATE INDEX idx1 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + // By default CREATE INDEX runs concurrently and its algorithm requires to bump up catalog + // version 3 times. + ASSERT_EQ(new_version, version + 3); + version = new_version; + + // CREATE INDEX CONCURRENTLY bumps up catalog version by 1. + ASSERT_OK(conn.Execute("CREATE INDEX NONCONCURRENTLY idx2 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + // Let's start over, but this time use yb_make_next_ddl_statement_nonincrementing to suppress + // incrementing catalog version. + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("REVOKE SELECT ON t1 FROM public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version); + + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("GRANT SELECT ON t1 TO public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version); + + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("CREATE INDEX idx3 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + // By default CREATE INDEX runs concurrently and its algorithm requires to bump up catalog + // version 3 times, only the first bump is suppressed. + ASSERT_EQ(new_version, version + 2); + version = new_version; + + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("CREATE INDEX NONCONCURRENTLY idx4 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version); + + // Verify that the session variable yb_make_next_ddl_statement_nonbreaking auto-resets to false. + ASSERT_OK(conn.Execute("REVOKE SELECT ON t1 FROM public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + // Since yb_make_next_ddl_statement_nonbreaking auto-resets to false, we should see catalog + // version gets bumped up as before. + ASSERT_OK(conn.Execute("GRANT SELECT ON t1 TO public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + ASSERT_OK(conn.Execute("CREATE INDEX idx5 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 3); + version = new_version; + + ASSERT_OK(conn.Execute("CREATE INDEX NONCONCURRENTLY idx6 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + // Now test the scenario where we create a new table, followed by create index nonconcurrently + // on the new table. Use yb_make_next_ddl_statement_nonbreaking to suppress catalog version + // increment on the create index statement. + // First create a second connection conn2. + auto conn2 = ASSERT_RESULT(ConnectToDB(kDatabaseName)); + + ASSERT_OK(conn.Execute("CREATE TABLE demo (a INT, b INT)")); + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("CREATE INDEX NONCONCURRENTLY a_idx ON demo (a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version); + + // Sanity test on conn2 write, count, select and delete on the new table created on conn. + ASSERT_OK(conn2.Execute("INSERT INTO demo SELECT n, n FROM generate_series(1,100) n")); + auto row_count = ASSERT_RESULT(conn.FetchValue("SELECT COUNT(*) FROM demo")); + ASSERT_EQ(row_count, 100); + std::tuple expected_row = {50, 50}; + auto res = ASSERT_RESULT(conn2.Fetch("SELECT * FROM demo WHERE a = 50")); + const auto lines = PQntuples(res.get()); + ASSERT_EQ(lines, 1); + const auto a = ASSERT_RESULT(GetValue(res.get(), 0, 0)); + const auto b = ASSERT_RESULT(GetValue(res.get(), 0, 1)); + std::tuple row = {a, b}; + ASSERT_EQ(row, expected_row); + ASSERT_OK(conn2.Execute("DELETE FROM demo WHERE a = 50")); + row_count = ASSERT_RESULT(conn.FetchValue("SELECT COUNT(*) FROM demo")); + ASSERT_EQ(row_count, 99); +} + TEST_F(PgCatalogVersionTest, SimulateRollingUpgrade) { // Manually switch back to non-per-db catalog version mode. RestartClusterWithoutDBCatalogVersionMode();