From 063dbe5dc2b986c089c461dcfb6b471a607277f5 Mon Sep 17 00:00:00 2001 From: Minghui Yang Date: Mon, 9 Sep 2024 20:38:44 +0000 Subject: [PATCH] [#23786] YSQL: yb_make_next_ddl_statement_nonincrementing Summary: In a recent customer issue investigation, the customer performed an application upgrade, which involves running many DDLs (100+) consecutively. Many of these DDLs cause catalog version to increment. As a result, all the active connections kept doing catalog cache refreshes continuously that led to PG memory spike and latency spike. It will be beneficial to reduce the number of DDLs that increment the catalog version. After analyzing customer's DDL statements that have caused catalog version to increment, they can be classified as 3 categories: (1) create index statements that build indexes on a newly created table (2) create new partition statements and link them to the existing parent partition (3) create new table but referencing an existing table via foreign key For (1), because the table itself is newly created, there is no need for the create index statement to increment the catalog version. To make things worse, a create index statement by default runs concurrently, and increments the catalog version by 3. In this particular customer's situation, more than half of the DDLs belong to category (1). One solution is to combine create statement and the create index statement together. Using an example found online: ``` yugabyte=# create table foo ( yugabyte(# id serial primary key, yugabyte(# code integer, yugabyte(# label text); CREATE TABLE yugabyte=# create unique index foo_idx on foo using btree (code, label); NOTICE: index method "btree" was replaced with "lsm" in YugabyteDB CREATE INDEX yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 4 | 1 13252 | 1 | 1 (5 rows) ``` By create table and then create a unique index, the catalog version was incremented by 3. On the other hand, we can combine them into one create table statement: ``` yugabyte=# create table foo ( yugabyte(# id serial primary key, yugabyte(# code integer, yugabyte(# label text, yugabyte(# constraint foo_uq unique (code, label)); select *CREATE TABLE yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 1 | 1 13252 | 1 | 1 (5 rows) ``` We can see that there was no catalog version increment. However it is not always possible to do these rewrite. The create index statement does allow more options, such as specifying ASC, DESC which isn't allowed when declaring a uniqe constraint in the create table statement itself. It is in such a case we introduce the new GUC `yb_make_next_ddl_statement_nonincrementing` to help. This is an auto-reset gflag that is done similar to the existing GUC `yb_make_next_ddl_statement_nonbreaking`. When set to true, it will suppress the next DDL statement from incrementing both the `current_version` and the `last_breaking_version`. Note that in order for the GUC to work for create index statement, we need to use the nonconcurrent keyword. By default the create index statement runs concurrently and its algorithm involves incrementing the catalog version by 3 to work correctly. A create index noncurrently only bumps up the catalog version by 1. For a newly created table, because it is empty, it is safe and correct to run create index nonconcurrently. For example, ``` yugabyte=# create table foo(id int); CREATE TABLE yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 1 | 1 13252 | 1 | 1 (5 rows) yugabyte=# create index nonconcurrently id_idx on foo(id); CREATE INDEX yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 2 | 1 13252 | 1 | 1 (5 rows) ``` With the new GUC `yb_make_next_ddl_statement_nonincrementing`, the catalog version will stay the same: ``` yugabyte=# create table foo(id int); CREATE TABLE yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 1 | 1 13252 | 1 | 1 (5 rows) yugabyte=# set yb_make_next_ddl_statement_nonincrementing = true; SET yugabyte=# create index nonconcurrently id_idx on foo(id); CREATE INDEX yugabyte=# select * from pg_yb_catalog_version; db_oid | current_version | last_breaking_version --------+-----------------+----------------------- 1 | 1 | 1 13248 | 1 | 1 13249 | 1 | 1 13251 | 1 | 1 13252 | 1 | 1 (5 rows) ``` A new unit test is added. Jira: DB-12689 Test Plan: ./yb_build.sh --cxx-test pg_catalog_version-test --gtest_filter PgCatalogVersionTest.NonIncrementingDDLMode Reviewers: kfranz Reviewed By: kfranz Subscribers: svc_phabricator, yql Differential Revision: https://phorge.dev.yugabyte.com/D37915 --- src/postgres/src/backend/utils/misc/guc.c | 12 ++ .../src/backend/utils/misc/pg_yb_utils.c | 30 ++++- src/postgres/src/include/pg_yb_utils.h | 6 + .../yql/pgwrapper/pg_catalog_version-test.cc | 111 ++++++++++++++++++ 4 files changed, 156 insertions(+), 3 deletions(-) diff --git a/src/postgres/src/backend/utils/misc/guc.c b/src/postgres/src/backend/utils/misc/guc.c index b1b7383c7287..95323cebf7df 100644 --- a/src/postgres/src/backend/utils/misc/guc.c +++ b/src/postgres/src/backend/utils/misc/guc.c @@ -2450,6 +2450,18 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"yb_make_next_ddl_statement_nonincrementing", PGC_SUSET, CUSTOM_OPTIONS, + gettext_noop("When set, the next ddl statement will not cause " + "catalog version to increment. This only affects " + "the next ddl statement and resets automatically."), + NULL + }, + &yb_make_next_ddl_statement_nonincrementing, + false, + NULL, NULL, NULL + }, + { {"yb_plpgsql_disable_prefetch_in_for_query", PGC_USERSET, QUERY_TUNING, gettext_noop("Disable prefetching in a PLPGSQL FOR loop over a query."), diff --git a/src/postgres/src/backend/utils/misc/pg_yb_utils.c b/src/postgres/src/backend/utils/misc/pg_yb_utils.c index b3547855cebb..168158e5d905 100644 --- a/src/postgres/src/backend/utils/misc/pg_yb_utils.c +++ b/src/postgres/src/backend/utils/misc/pg_yb_utils.c @@ -1376,6 +1376,7 @@ bool yb_enable_index_aggregate_pushdown = true; bool yb_enable_optimizer_statistics = false; bool yb_bypass_cond_recheck = true; bool yb_make_next_ddl_statement_nonbreaking = false; +bool yb_make_next_ddl_statement_nonincrementing = false; bool yb_plpgsql_disable_prefetch_in_for_query = false; bool yb_enable_sequence_pushdown = true; bool yb_disable_wait_for_backends_catalog_version = false; @@ -1555,7 +1556,7 @@ MergeCatalogModificationAspects( } static void -YBResetEnableNonBreakingDDLMode() +YBResetEnableSpecialDDLMode() { /* * Reset yb_make_next_ddl_statement_nonbreaking to avoid its further side @@ -1567,6 +1568,17 @@ YBResetEnableNonBreakingDDLMode() if (YbIsClientYsqlConnMgr() && yb_make_next_ddl_statement_nonbreaking) YbSendParameterStatusForConnectionManager("yb_make_next_ddl_statement_nonbreaking", "false"); yb_make_next_ddl_statement_nonbreaking = false; + + /* + * Reset yb_make_next_ddl_statement_nonincrementing to avoid its further side + * effect that may not be intended. + * + * Also, reset Connection Manager cache if the value was cached to begin + * with. + */ + if (YbIsClientYsqlConnMgr() && yb_make_next_ddl_statement_nonincrementing) + YbSendParameterStatusForConnectionManager("yb_make_next_ddl_statement_nonincrementing", "false"); + yb_make_next_ddl_statement_nonincrementing = false; } /* @@ -1610,7 +1622,7 @@ YBResetDdlState() status = YbMemCtxReset(ddl_transaction_state.mem_context); } ddl_transaction_state = (struct DdlTransactionState){0}; - YBResetEnableNonBreakingDDLMode(); + YBResetEnableSpecialDDLMode(); HandleYBStatus(YBCPgClearSeparateDdlTxnMode()); HandleYBStatus(status); } @@ -1684,7 +1696,7 @@ YBDecrementDdlNestingLevel() if (GetCurrentMemoryContext() == ddl_transaction_state.mem_context) MemoryContextSwitchTo(ddl_transaction_state.mem_context->parent); - YBResetEnableNonBreakingDDLMode(); + YBResetEnableSpecialDDLMode(); bool increment_done = false; bool is_silent_altering = false; if (has_write) @@ -2241,6 +2253,18 @@ YbDdlModeOptional YbGetDdlMode( */ if (yb_make_next_ddl_statement_nonbreaking) is_breaking_change = false; + /* + * If yb_make_next_ddl_statement_nonincrementing is true, then no DDL statement + * will cause a catalog version to increment. Note that we also disable breaking + * catalog change as well because it does not make sense to only increment + * breaking breaking catalog version. + */ + if (yb_make_next_ddl_statement_nonincrementing) + { + is_version_increment = false; + is_breaking_change = false; + } + is_altering_existing_data |= is_version_increment; diff --git a/src/postgres/src/include/pg_yb_utils.h b/src/postgres/src/include/pg_yb_utils.h index c7d53142f5da..2766c684ed1a 100644 --- a/src/postgres/src/include/pg_yb_utils.h +++ b/src/postgres/src/include/pg_yb_utils.h @@ -489,6 +489,12 @@ extern bool yb_bypass_cond_recheck; */ extern bool yb_make_next_ddl_statement_nonbreaking; +/* + * Enables nonincrementing DDL mode in which a DDL statement is considered as a + * "same version DDL" and therefore will not cause catalog version to increment. + */ +extern bool yb_make_next_ddl_statement_nonincrementing; + /* * Allows capability to disable prefetching in a PLPGSQL FOR loop over a query. * This is introduced for some test(s) with lazy evaluation in READ COMMITTED diff --git a/src/yb/yql/pgwrapper/pg_catalog_version-test.cc b/src/yb/yql/pgwrapper/pg_catalog_version-test.cc index 5acd6babe0a0..e809828af032 100644 --- a/src/yb/yql/pgwrapper/pg_catalog_version-test.cc +++ b/src/yb/yql/pgwrapper/pg_catalog_version-test.cc @@ -49,6 +49,13 @@ class PgCatalogVersionTest : public LibPqTestBase { "--allowed_preview_flags_csv=ysql_enable_db_catalog_version_mode"); } + Result GetCatalogVersion(PGConn* conn) { + const auto db_oid = VERIFY_RESULT(conn->FetchRow(Format( + "SELECT oid FROM pg_database WHERE datname = '$0'", PQdb(conn->get())))); + return conn->FetchRow( + Format("SELECT current_version FROM pg_yb_catalog_version where db_oid = $0", db_oid)); + } + // Prepare the table pg_yb_catalog_version according to 'per_database_mode': // * if 'per_database_mode' is true, we prepare table pg_yb_catalog_version // for per-database catalog version mode by updating the table to have one @@ -1525,6 +1532,110 @@ TEST_F(PgCatalogVersionTest, NonBreakingDDLMode) { ASSERT_OK(conn1.Execute("ABORT")); } +TEST_F(PgCatalogVersionTest, NonIncrementingDDLMode) { + const string kDatabaseName = "yugabyte"; + + auto conn = ASSERT_RESULT(ConnectToDB(kDatabaseName)); + ASSERT_OK(conn.Execute("CREATE TABLE t1(a int)")); + auto version = ASSERT_RESULT(GetCatalogVersion(&conn)); + + // REVOKE bumps up the catalog version by 1. + ASSERT_OK(conn.Execute("REVOKE SELECT ON t1 FROM public")); + auto new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + // GRANT bumps up the catalog version by 1. + ASSERT_OK(conn.Execute("GRANT SELECT ON t1 TO public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + ASSERT_OK(conn.Execute("CREATE INDEX idx1 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + // By default CREATE INDEX runs concurrently and its algorithm requires to bump up catalog + // version 3 times. + ASSERT_EQ(new_version, version + 3); + version = new_version; + + // CREATE INDEX CONCURRENTLY bumps up catalog version by 1. + ASSERT_OK(conn.Execute("CREATE INDEX NONCONCURRENTLY idx2 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + // Let's start over, but this time use yb_make_next_ddl_statement_nonincrementing to suppress + // incrementing catalog version. + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("REVOKE SELECT ON t1 FROM public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version); + + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("GRANT SELECT ON t1 TO public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version); + + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("CREATE INDEX idx3 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + // By default CREATE INDEX runs concurrently and its algorithm requires to bump up catalog + // version 3 times, only the first bump is suppressed. + ASSERT_EQ(new_version, version + 2); + version = new_version; + + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("CREATE INDEX NONCONCURRENTLY idx4 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version); + + // Verify that the session variable yb_make_next_ddl_statement_nonbreaking auto-resets to false. + ASSERT_OK(conn.Execute("REVOKE SELECT ON t1 FROM public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + // Since yb_make_next_ddl_statement_nonbreaking auto-resets to false, we should see catalog + // version gets bumped up as before. + ASSERT_OK(conn.Execute("GRANT SELECT ON t1 TO public")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + ASSERT_OK(conn.Execute("CREATE INDEX idx5 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 3); + version = new_version; + + ASSERT_OK(conn.Execute("CREATE INDEX NONCONCURRENTLY idx6 ON t1(a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version + 1); + version = new_version; + + // Now test the scenario where we create a new table, followed by create index nonconcurrently + // on the new table. Use yb_make_next_ddl_statement_nonbreaking to suppress catalog version + // increment on the create index statement. + // First create a second connection conn2. + auto conn2 = ASSERT_RESULT(ConnectToDB(kDatabaseName)); + + ASSERT_OK(conn.Execute("CREATE TABLE demo (a INT, b INT)")); + ASSERT_OK(conn.Execute("SET yb_make_next_ddl_statement_nonincrementing TO TRUE")); + ASSERT_OK(conn.Execute("CREATE INDEX NONCONCURRENTLY a_idx ON demo (a)")); + new_version = ASSERT_RESULT(GetCatalogVersion(&conn)); + ASSERT_EQ(new_version, version); + + // Sanity test on conn2 write, count, select and delete on the new table created on conn. + ASSERT_OK(conn2.Execute("INSERT INTO demo SELECT n, n FROM generate_series(1,100) n")); + auto row_count = ASSERT_RESULT(conn.FetchRow("SELECT COUNT(*) FROM demo")); + ASSERT_EQ(row_count, 100); + std::tuple expected_row = {50, 50}; + auto row = ASSERT_RESULT((conn2.FetchRow("SELECT * FROM demo WHERE a = 50"))); + ASSERT_EQ(row, expected_row); + ASSERT_OK(conn2.Execute("DELETE FROM demo WHERE a = 50")); + row_count = ASSERT_RESULT(conn.FetchRow("SELECT COUNT(*) FROM demo")); + ASSERT_EQ(row_count, 99); +} + TEST_F(PgCatalogVersionTest, SimulateRollingUpgrade) { // Manually switch back to non-per-db catalog version mode. RestartClusterWithoutDBCatalogVersionMode();