Skip to content

Commit

Permalink
[#24313] YSQL: fix index build corruption during concurrent updates
Browse files Browse the repository at this point in the history
Summary:
The new function YbExecUpdateIndexTuples introduced in f0a5db7 / D36588
was missing index status checks and therefore could cause corruption of indexes built concurrently with target table updates.

The index status is represented by three boolean flags: idxislive, idxisready and idxisvalid. Those flags change their values from
false to true as the index build process progresses through the stages. When idxislive is true, updates to the target table should
attempt to delete old index records. When indisready is true, updates to the target table should insert new index records. When
indisvalid is true, the index can be used for reads.

Before YbExecUpdateIndexTuples was introduced the update deleted the old index record then inserted the new index record.
Those routines respectively checked the indislive and indisready flags. YbExecUpdateIndexTuples may fallback to delete/insert
if update in place is not possible. But first it checks the index status. If idxislive is false, the index is ignored. If idxislive true, but
indisready is false,  YbExecUpdateIndexTuples deletes the old record only. If indisready is true, YbExecUpdateIndexTuples proceeds
to the choice between in-place update or delete/insert, as usual. The idxisvalid does not matter for the index update.
Jira: DB-13202

Test Plan: ./yb_build.sh --cxx-test pgwrapper_pg_index_backfill-test --gtest_filter PgIndexBackfillTest.PhantomIdxEntry

Reviewers: jason, kramanathan

Reviewed By: jason

Subscribers: smishra, yql

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D39430
  • Loading branch information
andrei-mart committed Nov 4, 2024
1 parent eb0c30c commit 68f7de8
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 6 deletions.
10 changes: 7 additions & 3 deletions src/postgres/src/backend/commands/indexcmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -1040,10 +1040,10 @@ DefineIndex(Oid relationId,
shdepFindImplicitTablegroup(tablespaceId, &tablegroupId);

/*
* If we do not find a tablegroup corresponding to the given tablespace, we
* If we do not find a tablegroup corresponding to the given tablespace, we
* would have to create one. We derive the name from tablespace OID.
*/
tablegroup_name = OidIsValid(tablegroupId) ? get_tablegroup_name(tablegroupId) :
tablegroup_name = OidIsValid(tablegroupId) ? get_tablegroup_name(tablegroupId) :
get_implicit_tablegroup_name(tablespaceId);

}
Expand Down Expand Up @@ -1088,7 +1088,7 @@ DefineIndex(Oid relationId,
RoleSpec *spec = makeNode(RoleSpec);
spec->roletype = ROLESPEC_CSTRING;
spec->rolename = pstrdup("postgres");

CreateTableGroupStmt *tablegroup_stmt = makeNode(CreateTableGroupStmt);
tablegroup_stmt->tablegroupname = tablegroup_name;
tablegroup_stmt->tablespacename = tablespace_name;
Expand Down Expand Up @@ -2109,6 +2109,10 @@ DefineIndex(Oid relationId,
else
{
elog(LOG, "committing pg_index tuple with indislive=true");
if (yb_test_block_index_phase[0] != '\0')
YbTestGucBlockWhileStrEqual(&yb_test_block_index_phase,
"indislive",
"index state change indislive=true");
/*
* No need to break (abort) ongoing txns since this is an online schema
* change.
Expand Down
13 changes: 13 additions & 0 deletions src/postgres/src/backend/executor/execIndexing.c
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,19 @@ YbExecUpdateIndexTuples(ResultRelInfo *resultRelInfo,

indexInfo = indexInfoArray[i];

/*
* If the index is not yet ready for insert we shouldn't attempt to
* add new entries, but should delete the old entry from a live index,
* because newer transaction may have seen it ready, and inserted the
* record into the index.
*/
if (!indexInfo->ii_ReadyForInserts)
{
if (indexRelation->rd_index->indislive)
deleteIndexes = lappend_int(deleteIndexes, i);
continue;
}

/*
* Check for partial index -
* There are four different update scenarios for an index with a predicate:
Expand Down
7 changes: 4 additions & 3 deletions src/postgres/src/backend/utils/misc/guc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2969,7 +2969,7 @@ static struct config_bool ConfigureNamesBool[] =
{
{"yb_skip_redundant_update_ops", PGC_SIGHUP, QUERY_TUNING_OTHER,
gettext_noop("Enables the comparison of old and new values of columns specified in the "
"SET clause of YSQL UPDATE queries to skip redundant secondary index "
"SET clause of YSQL UPDATE queries to skip redundant secondary index "
"updates and redundant constraint checks."),
NULL,
GUC_NOT_IN_SAMPLE
Expand Down Expand Up @@ -6062,8 +6062,9 @@ static struct config_string ConfigureNamesString[] =
{
{"yb_test_block_index_phase", PGC_SIGHUP, DEVELOPER_OPTIONS,
gettext_noop("Block the given index creation phase."),
gettext_noop("Valid values are \"indisready\", \"backfill\", "
" and \"postbackfill\". Any other value is ignored."),
gettext_noop("Valid values are \"indislive\", \"indisready\", "
"\"backfill\", and \"postbackfill\". "
"Any other value is ignored."),
GUC_NOT_IN_SAMPLE
},
&yb_test_block_index_phase,
Expand Down
64 changes: 64 additions & 0 deletions src/yb/yql/pgwrapper/pg_index_backfill-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2290,4 +2290,68 @@ TEST_F_EX(PgIndexBackfillTest,
ASSERT_EQ(rows, (decltype(rows){{1, 2}, {3, 5}}));
}

class PgIndexBackfillReadCommitted : public PgIndexBackfillTest {
public:
void UpdateMiniClusterOptions(ExternalMiniClusterOptions* options) override {
PgIndexBackfillTest::UpdateMiniClusterOptions(options);
options->extra_tserver_flags.push_back("--yb_enable_read_committed_isolation=true");
}
};

// Test for https://github.com/yugabyte/yugabyte-db/issues/24313
// Verify that concurrent updates do not leave phantom entries in the index
TEST_F_EX(PgIndexBackfillTest, PhantomIdxEntry, PgIndexBackfillReadCommitted) {
constexpr int64_t kNumRows = 10;
const IndexStateFlags index_live_flags{IndexStateFlag::kIndIsLive};
ASSERT_OK(conn_->ExecuteFormat("CREATE TABLE $0 (i int, t text)", kTableName));
ASSERT_OK(conn_->ExecuteFormat("INSERT INTO $0 VALUES (generate_series(1, $1), 'a')",
kTableName, kNumRows));
ASSERT_OK(cluster_->SetFlagOnTServers("ysql_yb_test_block_index_phase", "indislive"));
thread_holder_.AddThreadFunctor([this] {
LOG(INFO) << "Begin create index thread";
auto create_idx_conn = ASSERT_RESULT(ConnectToDB(kDatabaseName));
ASSERT_OK(create_idx_conn.ExecuteFormat("CREATE INDEX $0 ON $1(t)", kIndexName, kTableName));
LOG(INFO) << "Create index thread has been completed";
});
// There's no reliable indicator that index build has stopped before 'indislive' phase, just give
// the index creation thread some extra time.
SleepFor(MonoDelta::FromMilliseconds(RegularBuildVsSanitizers(5000, 60000)));
auto other_conn = ASSERT_RESULT(ConnectToDB(kDatabaseName));
// The index shouldn't be visible
ASSERT_FALSE(ASSERT_RESULT(IsAtTargetIndexStateFlags(kIndexName, IndexStateFlags{})));
// Start transaction and make sure it is in progress
LOG(INFO) << "Begin older txn";
ASSERT_OK(other_conn.Execute("BEGIN"));
const std::string query = Format("SELECT t FROM $0 WHERE i = $1", kTableName, 1);
auto rows = ASSERT_RESULT((other_conn.FetchRows<std::string>(query)));
ASSERT_EQ(rows, (decltype(rows){{"a"}}));
// Allow index build to proceed to the next phase
ASSERT_OK(cluster_->SetFlagOnTServers("ysql_yb_test_block_index_phase", "indisready"));
ASSERT_OK(WaitForIndexStateFlags(index_live_flags, kIndexName));
// New transaction can see the index and add 'b' into it
LOG(INFO) << "Update record by newer txn";
ASSERT_OK(conn_->ExecuteFormat("UPDATE $0 SET t = 'b' WHERE i = $1", kTableName, 2));
// If old transaction use cached metadata, it does not see index, so don't replace 'b' with 'c'
// index record b becomes phantom
LOG(INFO) << "Update record by older txn";
ASSERT_OK(other_conn.ExecuteFormat("UPDATE $0 SET t = 'c' WHERE i = $1", kTableName, 2));
ASSERT_OK(other_conn.Execute("COMMIT"));
// New transaction attempts to replace 'c' with 'd' but if there's the phantom record, it only
// inserts 'd'
LOG(INFO) << "Update record by newer txn again";
ASSERT_OK(conn_->ExecuteFormat("UPDATE $0 SET t = 'd' WHERE i = $1", kTableName, 2));
ASSERT_TRUE(ASSERT_RESULT(IsAtTargetIndexStateFlags(kIndexName, index_live_flags)));
// Make sure catalog versions differ
// Complete the index build
ASSERT_OK(cluster_->SetFlagOnTServers("ysql_yb_test_block_index_phase", "none"));
thread_holder_.Stop();
// Check rowcount
const std::string count_query = Format("SELECT count(t) FROM $0 WHERE t = 'b'", kTableName);
ASSERT_OK(WaitForIndexScan(count_query));
LOG(INFO) << "Check for phantom record";
auto idx_count = ASSERT_RESULT((conn_->FetchRow<PGUint64>(count_query)));
// Phantom entry makes count 1
ASSERT_EQ(idx_count, 0);
}

} // namespace yb::pgwrapper

0 comments on commit 68f7de8

Please sign in to comment.