Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
112700: upgrades: assert descriptor repair has correct set of targets r=rafiss a=annrpom

This patch adds a test to assert that during automated repair of
corrupt descriptors, we do not try to repair a descriptor that
is not corrupted.

While testing, it was discovered that function descriptors (regardless of corruption/repair) had their descriptor version increased due to `grantExecuteToPublicOnAllFunctions` being called on each function during a cluster upgrade; however, we noticed that the functions we were testing already had execute privileges for `public`. Thus, a check was added in said logic that ensures functions in this situation (where public already has execute priv. for the func) do not try to grant execute again.

Epic: none
Fixes: #110906

Release note: None

113319: opt: optimize FuncDepSet.ReduceCols r=mgartner a=mgartner

#### opt/bench: add slow-query-5

This commit adds the new `slow-query-5` benchmark which tests a 40+ -way
join modelled off of a real-world production query. The query stresses
the efficiency of the optimizer, particularly in building and
manipulating thousands of functional dependencies for the thousands of
possible join orderings.

Release note: None

#### opt: optimize FuncDepSet.ReduceCols

`FuncDepSet.ReduceCols` is used frequently to manipulate `FuncDepSet`s.
In queries with many tables, columns, and joins, it can contribute
significantly to the latency of query planning, due to the `O(n²)` time
complexity of calculating a transitive closure (see
`FuncDepSet.inClosureOf`), where `n` is the number of dependencies in
the set, and the relatively expensive `SubsetOf` set operation performed
multiple times for each of the `n²` iterations.

This commit optimizes the `ReduceCols` by adding a fast path that can
more quickly determine if a column cannot be reduced by checking if it
exists in none of the `to` sets of the functional dependencies. This
avoids having to calculate the transitive closure in many cases,
significantly speeding up query planning time.

Epic: None

Release note (performance improvement): Query planning time has been
reduced significantly for some queries in which many tables are joined.


Co-authored-by: Annie Pompa <[email protected]>
Co-authored-by: Marcus Gartner <[email protected]>
  • Loading branch information
3 people committed Nov 2, 2023
3 parents e35ea6d + 64c1f6c + 846fc7b commit 8e059c2
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 15 deletions.
5 changes: 5 additions & 0 deletions pkg/sql/catalog/post_deserialization_changes.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ func (c *PostDeserializationChanges) Add(change PostDeserializationChangeType) {
c.s.Add(int(change))
}

// Len returns length of the set of changes.
func (c PostDeserializationChanges) Len() int {
return c.s.Len()
}

// ForEach calls f for every change in the set of changes.
func (c PostDeserializationChanges) ForEach(f func(change PostDeserializationChangeType)) {
c.s.ForEach(func(i int) { f(PostDeserializationChangeType(i)) })
Expand Down
67 changes: 67 additions & 0 deletions pkg/sql/opt/bench/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2039,6 +2039,23 @@ var slowSchemas = []string{
PRIMARY KEY (region, id)
);
`,
`
CREATE TABLE multi_col_pk_no_indexes (
region STRING NOT NULL,
id INT NOT NULL,
c1 INT NOT NULL, c2 INT NOT NULL, c3 INT NOT NULL, c4 INT NOT NULL, c5 INT NOT NULL,
c6 INT NOT NULL, c7 INT NOT NULL, c8 INT NOT NULL, c9 INT NOT NULL, c10 INT NOT NULL,
c11 INT NOT NULL, c12 INT NOT NULL, c13 INT NOT NULL, c14 INT NOT NULL, c15 INT NOT NULL,
c16 INT NOT NULL, c17 INT NOT NULL, c18 INT NOT NULL, c19 INT NOT NULL, c20 INT NOT NULL,
c21 INT NOT NULL, c22 INT NOT NULL, c23 INT NOT NULL, c24 INT NOT NULL, c25 INT NOT NULL,
c26 INT NOT NULL, c27 INT NOT NULL, c28 INT NOT NULL, c29 INT NOT NULL, c30 INT NOT NULL,
c31 INT NOT NULL, c32 INT NOT NULL, c33 INT NOT NULL, c34 INT NOT NULL, c35 INT NOT NULL,
c36 INT NOT NULL, c37 INT NOT NULL, c38 INT NOT NULL, c39 INT NOT NULL, c40 INT NOT NULL,
c41 INT NOT NULL, c42 INT NOT NULL, c43 INT NOT NULL, c44 INT NOT NULL, c45 INT NOT NULL,
c46 INT NOT NULL, c47 INT NOT NULL, c48 INT NOT NULL, c49 INT NOT NULL, c50 INT NOT NULL,
PRIMARY KEY (region, id)
);
`,
}

var slowQueries = [...]benchQuery{
Expand Down Expand Up @@ -2246,6 +2263,56 @@ var slowQueries = [...]benchQuery{
`,
args: []interface{}{10, 20, 30, 40, 50, 60, 70, 80, 90},
},
{
name: "slow-query-5",
query: `
SELECT *
FROM multi_col_pk_no_indexes AS t1
LEFT JOIN multi_col_pk_no_indexes AS t2 ON t2.region = $3 AND t1.c1 = t2.id
LEFT JOIN multi_col_pk_no_indexes AS t3 ON t3.region = $3 AND t2.id = t3.c1 AND t2.region = t3.region
LEFT JOIN multi_col_pk_no_indexes AS t4 ON t4.region = $3 AND t2.id = t4.c1 AND t2.region = t4.region
LEFT JOIN multi_col_pk_no_indexes AS t5 ON t5.region = $3 AND t4.c1 = t5.id
LEFT JOIN multi_col_pk_no_indexes AS t6 ON t6.region = $3 AND t2.id = t6.c1 AND t2.region = t6.region
LEFT JOIN multi_col_pk_no_indexes AS t7 ON t7.region = $3 AND t1.c1 = t7.id
LEFT JOIN multi_col_pk_no_indexes AS t8 ON t8.region = $3 AND t1.c1 = t8.id
LEFT JOIN multi_col_pk_no_indexes AS t9 ON t9.region = $3 AND t1.c1 = t9.id
LEFT JOIN multi_col_pk_no_indexes AS t10 ON t10.region = $3 AND t9.id = t10.c1 AND t9.region = t10.region
LEFT JOIN multi_col_pk_no_indexes AS t11 ON t11.region = $3 AND t10.c1 = t11.id
LEFT JOIN multi_col_pk_no_indexes AS t12 ON t12.region = $3 AND t1.id = t12.c1 AND t1.region = t12.region
LEFT JOIN multi_col_pk_no_indexes AS t13 ON t13.region = $3 AND t12.c1 = t13.id
LEFT JOIN multi_col_pk_no_indexes AS t14 ON t14.region = $3 AND t1.id = t14.c1 AND t1.region = t14.region
LEFT JOIN multi_col_pk_no_indexes AS t15 ON t15.region = $3 AND t1.id = t15.c1 AND t1.region = t15.region
LEFT JOIN multi_col_pk_no_indexes AS t16 ON t16.region = $3 AND t1.id = t16.c1 AND t1.region = t16.region
LEFT JOIN multi_col_pk_no_indexes AS t17 ON t17.region = $3 AND t1.c1 = t17.id
LEFT JOIN multi_col_pk_no_indexes AS t18 ON t18.region = $3 AND t1.c1 = t18.id
LEFT JOIN multi_col_pk_no_indexes AS t19 ON t19.region = $3 AND t1.id = t19.c1 AND t1.region = t19.region
LEFT JOIN multi_col_pk_no_indexes AS t20 ON t20.region = $3 AND t19.id = t20.c1 AND t19.region = t20.region
LEFT JOIN multi_col_pk_no_indexes AS t21 ON t21.region = $3 AND t20.id = t21.c1 AND t20.region = t21.region
LEFT JOIN multi_col_pk_no_indexes AS t22 ON t22.region = $3 AND t21.c1 = t22.id
LEFT JOIN multi_col_pk_no_indexes AS t23 ON t23.region = $3 AND t19.id = t23.c9 AND t19.region = t23.region
LEFT JOIN multi_col_pk_no_indexes AS t24 ON t24.region = $3 AND t23.c1 = t24.id
LEFT JOIN multi_col_pk_no_indexes AS t25 ON t25.region = $3 AND t23.c1 = t25.id
LEFT JOIN multi_col_pk_no_indexes AS t26 ON t26.region = $3 AND t26.c1 = t23.id AND t26.region = t23.region
LEFT JOIN multi_col_pk_no_indexes AS t27 ON t27.region = $3 AND t26.c1 = t27.id
LEFT JOIN multi_col_pk_no_indexes AS t28 ON t28.region = $3 AND t28.c1 = t19.id AND t28.region = t19.region
LEFT JOIN multi_col_pk_no_indexes AS t29 ON t29.region = $3 AND t28.c1 = t29.id
LEFT JOIN multi_col_pk_no_indexes AS t30 ON t30.region = $3 AND t28.c1 = t30.id
LEFT JOIN multi_col_pk_no_indexes AS t31 ON t31.region = $3 AND t31.c1 = t28.id AND t31.region = t28.region
LEFT JOIN multi_col_pk_no_indexes AS t32 ON t32.region = $3 AND t31.c1 = t32.id
LEFT JOIN multi_col_pk_no_indexes AS t33 ON t33.region = $3 AND t33.c1 = t19.id AND t33.region = t19.region
LEFT JOIN multi_col_pk_no_indexes AS t34 ON t34.region = $3 AND t33.c1 = t34.id
LEFT JOIN multi_col_pk_no_indexes AS t35 ON t35.region = $3 AND t35.c1 = t33.id AND t35.region = t33.region
LEFT JOIN multi_col_pk_no_indexes AS t36 ON t36.region = $3 AND t35.c1 = t36.id
LEFT JOIN multi_col_pk_no_indexes AS t37 ON t37.region = $3 AND t37.c1 = t19.id AND t37.region = t19.region
LEFT JOIN multi_col_pk_no_indexes AS t38 ON t38.region = $3 AND t37.c1 = t38.id
LEFT JOIN multi_col_pk_no_indexes AS t39 ON t39.region = $3 AND t37.c1 = t39.id
LEFT JOIN multi_col_pk_no_indexes AS t40 ON t40.region = $3 AND t37.c1 = t40.id
LEFT JOIN multi_col_pk_no_indexes AS t41 ON t41.region = $3 AND t41.c1 = t37.id AND t41.region = t37.region
LEFT JOIN multi_col_pk_no_indexes AS t42 ON t42.region = $3 AND t41.c1 = t42.id
WHERE t1.id = $1 AND t1.c2 = $2 AND t1.region = $3;
`,
args: []interface{}{10001, 5, "'east'"},
},
}

func BenchmarkSlowQueries(b *testing.B) {
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/opt/props/func_dep.go
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,23 @@ func (f *FuncDepSet) ReduceCols(cols opt.ColSet) opt.ColSet {
var removed opt.ColSet
cols = cols.Copy()
for i, ok := cols.Next(0); ok; i, ok = cols.Next(i + 1) {
// First check if the column is present in any "to" set of a dependency.
// If not, then it is not redundant and must remain in the set. This is
// a fast-path to avoid the more expensive functional-dependency-closure
// test below, when possible.
inToSet := false
for j := range f.deps {
if f.deps[j].to.Contains(i) {
inToSet = true
break
}
}
if !inToSet {
continue
}

// Determine if the column is functionally determined by the other
// columns.
cols.Remove(i)
removed.Add(i)
if !f.inClosureOf(removed, cols, true /* strict */) {
Expand Down
6 changes: 2 additions & 4 deletions pkg/upgrade/upgrades/first_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ func FirstUpgradeFromRelease(
var batch catalog.DescriptorIDSet
const batchSize = 1000
if err := all.ForEachDescriptor(func(desc catalog.Descriptor) error {
if !desc.GetPostDeserializationChanges().HasChanges() {
changes := desc.GetPostDeserializationChanges()
if !changes.HasChanges() || (changes.Len() == 1 && changes.Contains(catalog.SetModTimeToMVCCTimestamp)) {
return nil
}
batch.Add(desc.GetID())
Expand Down Expand Up @@ -89,9 +90,6 @@ func upgradeDescriptors(
}
b := txn.KV().NewBatch()
for _, mut := range muts {
if !mut.GetPostDeserializationChanges().HasChanges() {
continue
}
key := catalogkeys.MakeDescMetadataKey(d.Codec, mut.GetID())
b.CPut(key, mut.DescriptorProto(), mut.GetRawBytesInStorage())
}
Expand Down
79 changes: 68 additions & 11 deletions pkg/upgrade/upgrades/first_upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ func TestFirstUpgrade(t *testing.T) {
"CREATE TABLE foo (i INT PRIMARY KEY, j INT, INDEX idx(j))",
)

// Corrupt the table descriptor in an unrecoverable manner.
// Corrupt the table descriptor in an unrecoverable manner. We are not able to automatically repair this
// descriptor.
tbl := desctestutils.TestingGetPublicTableDescriptor(kvDB, keys.SystemSQLCodec, "test", "foo")
descKey := catalogkeys.MakeDescMetadataKey(keys.SystemSQLCodec, tbl.GetID())
require.NoError(t, kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
Expand Down Expand Up @@ -116,8 +117,13 @@ func TestFirstUpgrade(t *testing.T) {
}))
return b.BuildImmutable()
}

// Confirm that the only change undergone is the modification time being set to
// the MVCC timestamp.
require.False(t, readDescFromStorage().GetModificationTime().IsEmpty())
require.True(t, readDescFromStorage().GetPostDeserializationChanges().HasChanges())
changes := readDescFromStorage().GetPostDeserializationChanges()
require.Equal(t, changes.Len(), 1)
require.True(t, changes.Contains(catalog.SetModTimeToMVCCTimestamp))

// Wait long enough for precondition check to see the unbroken table descriptor.
execStmts(t, "CREATE DATABASE test3")
Expand All @@ -126,9 +132,12 @@ func TestFirstUpgrade(t *testing.T) {
// Upgrade the cluster version.
tdb.Exec(t, qUpgrade)

// The table descriptor protobuf should have the modification time set.
// The table descriptor protobuf should still have the modification time set;
// the only post-deserialization change should be SetModTimeToMVCCTimestamp.
require.False(t, readDescFromStorage().GetModificationTime().IsEmpty())
require.False(t, readDescFromStorage().GetPostDeserializationChanges().HasChanges())
changes = readDescFromStorage().GetPostDeserializationChanges()
require.Equal(t, changes.Len(), 1)
require.True(t, changes.Contains(catalog.SetModTimeToMVCCTimestamp))
}

// TestFirstUpgradeRepair tests the correct repair behavior of upgrade
Expand Down Expand Up @@ -166,22 +175,35 @@ func TestFirstUpgradeRepair(t *testing.T) {
}
}

// Create a table and a function for this test.
execStmts(t,
"CREATE DATABASE test",
"USE test",
// Create a table and function that we will corrupt for this test.
"CREATE TABLE foo (i INT PRIMARY KEY, j INT, INDEX idx(j))",
"INSERT INTO foo VALUES (1, 2)",
"CREATE FUNCTION test.public.f() RETURNS INT LANGUAGE SQL AS $$ SELECT 1 $$",
// Create the following to cover more descriptor types - ensure that said descriptors do not get repaired.
"CREATE TABLE bar (i INT PRIMARY KEY, j INT, INDEX idx(j))",
"CREATE SCHEMA bar",
"CREATE TYPE bar.bar AS ENUM ('hello')",
"CREATE FUNCTION bar.bar(a INT) RETURNS INT AS 'SELECT a*a' LANGUAGE SQL",
)

// Corrupt FK back references in the test table descriptor.
dbDesc := desctestutils.TestingGetDatabaseDescriptor(kvDB, keys.SystemSQLCodec, "test")
tblDesc := desctestutils.TestingGetPublicTableDescriptor(kvDB, keys.SystemSQLCodec, "test", "bar")
schemaDesc := desctestutils.TestingGetSchemaDescriptor(kvDB, keys.SystemSQLCodec, dbDesc.GetID(), "bar")
typDesc := desctestutils.TestingGetTypeDescriptor(kvDB, keys.SystemSQLCodec, "test", "bar", "bar")
fnDesc := desctestutils.TestingGetFunctionDescriptor(kvDB, keys.SystemSQLCodec, "test", "bar", "bar")
nonCorruptDescs := []catalog.Descriptor{dbDesc, tblDesc, schemaDesc, typDesc, fnDesc}

// Corrupt FK back references in the test table descriptor, foo.
codec := keys.SystemSQLCodec
tbl := desctestutils.TestingGetPublicTableDescriptor(kvDB, codec, "test", "foo")
fn := desctestutils.TestingGetFunctionDescriptor(kvDB, codec, "test", "public", "f")
fooTbl := desctestutils.TestingGetPublicTableDescriptor(kvDB, codec, "test", "foo")
fooFn := desctestutils.TestingGetFunctionDescriptor(kvDB, codec, "test", "public", "f")
corruptDescs := []catalog.Descriptor{fooTbl, fooFn}
require.NoError(t, kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
b := txn.NewBatch()
tbl := tabledesc.NewBuilder(tbl.TableDesc()).BuildExistingMutableTable()
tbl := tabledesc.NewBuilder(fooTbl.TableDesc()).BuildExistingMutableTable()
tbl.InboundFKs = []descpb.ForeignKeyConstraint{{
OriginTableID: 123456789,
OriginColumnIDs: tbl.PublicColumnIDs(), // Used such that len(OriginColumnIDs) == len(PublicColumnIDs)
Expand All @@ -193,7 +215,7 @@ func TestFirstUpgradeRepair(t *testing.T) {
}}
tbl.NextConstraintID++
b.Put(catalogkeys.MakeDescMetadataKey(codec, tbl.GetID()), tbl.DescriptorProto())
fn := funcdesc.NewBuilder(fn.FuncDesc()).BuildExistingMutableFunction()
fn := funcdesc.NewBuilder(fooFn.FuncDesc()).BuildExistingMutableFunction()
fn.DependedOnBy = []descpb.FunctionDescriptor_Reference{{
ID: 123456789,
ColumnIDs: []descpb.ColumnID{1},
Expand All @@ -202,6 +224,27 @@ func TestFirstUpgradeRepair(t *testing.T) {
return txn.Run(ctx, b)
}))

readDescFromStorage := func(descID descpb.ID) catalog.Descriptor {
descKey := catalogkeys.MakeDescMetadataKey(keys.SystemSQLCodec, descID)
var b catalog.DescriptorBuilder
require.NoError(t, kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
v, err := txn.Get(ctx, descKey)
if err != nil {
return err
}
b, err = descbuilder.FromSerializedValue(v.Value)
return err
}))
return b.BuildImmutable()
}

descOldVersionMap := make(map[descpb.ID]descpb.DescriptorVersion)

for _, desc := range append(nonCorruptDescs, corruptDescs...) {
descId := desc.GetID()
descOldVersionMap[descId] = readDescFromStorage(descId).GetVersion()
}

// The corruption should remain undetected for DML queries.
tdb.CheckQueryResults(t, "SELECT * FROM test.public.foo", [][]string{{"1", "2"}})
tdb.CheckQueryResults(t, "SELECT test.public.f()", [][]string{{"1"}})
Expand Down Expand Up @@ -232,7 +275,21 @@ func TestFirstUpgradeRepair(t *testing.T) {
tdb.CheckQueryResults(t, qDetectCorruption, [][]string{{"0"}})
tdb.CheckQueryResults(t, qDetectRepairableCorruption, [][]string{{"0"}})

// Check that the table and function are OK.
// Assert that a version upgrade is reflected for repaired descriptors (stricly one version upgrade).
for _, d := range corruptDescs {
descId := d.GetID()
desc := readDescFromStorage(descId)
require.Equalf(t, descOldVersionMap[descId]+1, desc.GetVersion(), desc.GetName())
}

// Assert that no version upgrade is reflected for non-repaired descriptors.
for _, d := range nonCorruptDescs {
descId := d.GetID()
desc := readDescFromStorage(descId)
require.Equalf(t, descOldVersionMap[descId], desc.GetVersion(), desc.GetName())
}

// Check that the repaired table and function are OK.
tdb.CheckQueryResults(t, "SELECT * FROM test.public.foo", [][]string{{"1", "2"}})
tdb.Exec(t, "ALTER TABLE test.foo ADD COLUMN k INT DEFAULT 42")
tdb.CheckQueryResults(t, "SELECT * FROM test.public.foo", [][]string{{"1", "2", "42"}})
Expand Down
3 changes: 3 additions & 0 deletions pkg/upgrade/upgrades/grant_execute_to_public.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ func grantExecuteToPublicOnAllFunctions(
}
kvBatch := txn.KV().NewBatch()
for _, desc := range descs {
if desc.GetPrivileges().CheckPrivilege(username.PublicRoleName(), privilege.EXECUTE) {
continue
}
desc.GetPrivileges().Grant(
username.PublicRoleName(),
privilege.List{privilege.EXECUTE},
Expand Down

0 comments on commit 8e059c2

Please sign in to comment.