From 2b985fe917b1901787786b4b9b82fb560f020894 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Thu, 18 Feb 2021 17:23:15 -0800 Subject: [PATCH 1/2] opt: add arbiterSet to track arbiter indexes and unique constraints This commit introduces a `arbiterSet` which can contain both indexes and unique constraints. The `ForEach` function of this set abstracts the type of arbiter so that consumers of the set need not be concerned with the type. This cleans up code that builds expressions for each arbiter. Note that this will allow further cleanup within `buildInputForUpsert`, once that function supports partial unique constraints. Release note: None --- pkg/sql/opt/optbuilder/BUILD.bazel | 1 + pkg/sql/opt/optbuilder/arbiter_set.go | 128 ++++++++++++++++++ pkg/sql/opt/optbuilder/insert.go | 98 ++++---------- pkg/sql/opt/optbuilder/mutation_builder.go | 15 +- .../optbuilder/mutation_builder_arbiter.go | 39 +++--- 5 files changed, 178 insertions(+), 103 deletions(-) create mode 100644 pkg/sql/opt/optbuilder/arbiter_set.go diff --git a/pkg/sql/opt/optbuilder/BUILD.bazel b/pkg/sql/opt/optbuilder/BUILD.bazel index fba3523199cc..ca4efa139ff4 100644 --- a/pkg/sql/opt/optbuilder/BUILD.bazel +++ b/pkg/sql/opt/optbuilder/BUILD.bazel @@ -4,6 +4,7 @@ go_library( name = "optbuilder", srcs = [ "alter_table.go", + "arbiter_set.go", "builder.go", "create_table.go", "create_view.go", diff --git a/pkg/sql/opt/optbuilder/arbiter_set.go b/pkg/sql/opt/optbuilder/arbiter_set.go new file mode 100644 index 000000000000..7bea4baff966 --- /dev/null +++ b/pkg/sql/opt/optbuilder/arbiter_set.go @@ -0,0 +1,128 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package optbuilder + +import ( + "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/util" +) + +// arbiterSet represents a set of arbiters. Unique indexes or constraints can be +// arbiters. This set provides an abstraction on top of both types of arbiters +// so that consumers of the set do not need to be concerned with the underlying +// arbiter type. +type arbiterSet struct { + mb *mutationBuilder + + // indexes contains the index arbiters in the set, as ordinals into the + // table's indexes. + indexes util.FastIntSet + + // uniqueConstraints contains the unique constraint arbiters in the set, as + // ordinals into the table's unique constraints. + uniqueConstraints util.FastIntSet +} + +// makeArbiterSet returns an initialized arbiterSet. +func makeArbiterSet(mb *mutationBuilder) arbiterSet { + return arbiterSet{ + mb: mb, + } +} + +// makeSingleIndexArbiterSet returns an initialized arbiterSet with the given +// index as the sole arbiter in the set. +func makeSingleIndexArbiterSet(mb *mutationBuilder, idx cat.IndexOrdinal) arbiterSet { + a := makeArbiterSet(mb) + a.AddIndex(idx) + return a +} + +// makeSingleUniqueConstraintArbiterSet returns an initialized arbiterSet with +// the given unique constraint as the sole arbiter in the set. +func makeSingleUniqueConstraintArbiterSet(mb *mutationBuilder, uniq cat.UniqueOrdinal) arbiterSet { + a := makeArbiterSet(mb) + a.AddUniqueConstraint(uniq) + return a +} + +// AddIndex adds an index arbiter to the set. +func (a *arbiterSet) AddIndex(idx cat.IndexOrdinal) { + a.indexes.Add(idx) +} + +// AddUniqueConstraint adds a unique constraint arbiter to the set. +func (a *arbiterSet) AddUniqueConstraint(uniq cat.UniqueOrdinal) { + a.uniqueConstraints.Add(uniq) +} + +// Empty returns true if the set is empty. +func (a *arbiterSet) Empty() bool { + return a.indexes.Empty() && a.uniqueConstraints.Empty() +} + +// Len returns the number of the arbiters in the set. +func (a *arbiterSet) Len() int { + return a.indexes.Len() + a.uniqueConstraints.Len() +} + +// IndexOrdinals returns a slice of all the index ordinals in the set. +func (a *arbiterSet) IndexOrdinals() []int { + return a.indexes.Ordered() +} + +// UniqueConstraintOrdinals returns a slice of all the unique constraint +// ordinals in the set. +func (a *arbiterSet) UniqueConstraintOrdinals() []int { + return a.uniqueConstraints.Ordered() +} + +// ForEach calls a function for every arbiter in the set. The function is called +// with the following arguments: +// +// - name is the name of the index or unique constraint. +// - columnOrdinals is the set of table column ordinals of the arbiter. For +// index arbiters, this is the lax key columns. For constraint arbiters, +// this is all the columns in the constraint. +// - pred is the partial predicate expression of the arbiter, if the arbiter +// is a partial index or partial constraint. If the arbiter is not partial, +// pred is nil. +// +func (a *arbiterSet) ForEach(f func(name string, columnOrdinals util.FastIntSet, pred tree.Expr)) { + // Call the callback for each index arbiter. + a.indexes.ForEach(func(i int) { + index := a.mb.tab.Index(i) + + colOrds := getIndexLaxKeyOrdinals(index) + + var pred tree.Expr + if _, isPartial := index.Predicate(); isPartial { + pred = a.mb.parsePartialIndexPredicateExpr(i) + } + + f(string(index.Name()), colOrds, pred) + }) + + // Call the callback for each unique constraint arbiter. + a.uniqueConstraints.ForEach(func(i int) { + uniqueConstraint := a.mb.tab.Unique(i) + + colOrds := getUniqueConstraintOrdinals(a.mb.tab, uniqueConstraint) + + var pred tree.Expr + if _, isPartial := uniqueConstraint.Predicate(); isPartial { + pred = a.mb.parseUniqueConstraintPredicateExpr(i) + } + + f(uniqueConstraint.Name(), colOrds, pred) + }) +} diff --git a/pkg/sql/opt/optbuilder/insert.go b/pkg/sql/opt/optbuilder/insert.go index c7df434de2ed..d0f3694dc4ba 100644 --- a/pkg/sql/opt/optbuilder/insert.go +++ b/pkg/sql/opt/optbuilder/insert.go @@ -675,9 +675,7 @@ func (mb *mutationBuilder) buildInputForDoNothing( ) { // Determine the set of arbiter indexes and constraints to use to check for // conflicts. - arbiterIndexes, arbiterConstraints := mb.arbiterIndexesAndConstraints(conflictOrds, arbiterPredicate) - mb.arbiterIndexes = arbiterIndexes.Ordered() - mb.arbiterConstraints = arbiterConstraints.Ordered() + mb.arbiters = mb.findArbiters(conflictOrds, arbiterPredicate) insertColScope := mb.outScope.replace() insertColScope.appendColumnsFromScope(mb.outScope) @@ -686,77 +684,27 @@ func (mb *mutationBuilder) buildInputForDoNothing( // TODO(andyk): do we need to do more here? mb.outScope.ordering = nil - // Loop over each arbiter index and constraint, creating an anti-join for - // each one. - arbiterIndexes.ForEach(func(idx int) { - index := mb.tab.Index(idx) - var pred tree.Expr - if _, isPartial := index.Predicate(); isPartial { - pred = mb.parsePartialIndexPredicateExpr(idx) - } - - mb.buildAntiJoinForDoNothingArbiter( - inScope, - getIndexLaxKeyOrdinals(index), - pred, - ) - }) - arbiterConstraints.ForEach(func(uc int) { - uniqueConstraint := mb.tab.Unique(uc) - var pred tree.Expr - if _, isPartial := uniqueConstraint.Predicate(); isPartial { - pred = mb.parseUniqueConstraintPredicateExpr(uc) - } - mb.buildAntiJoinForDoNothingArbiter( - inScope, - getUniqueConstraintOrdinals(mb.tab, uniqueConstraint), - pred, - ) + // Create an anti-join for each arbiter. + mb.arbiters.ForEach(func(name string, columnOrdinals util.FastIntSet, pred tree.Expr) { + mb.buildAntiJoinForDoNothingArbiter(inScope, columnOrdinals, pred) }) - // Loop over each arbiter index and constraint, creating an UpsertDistinctOn - // for each one. This must happen after all conflicting rows are removed with - // the anti-joins created above, to avoid removing valid rows (see #59125). - arbiterIndexes.ForEach(func(idx int) { - index := mb.tab.Index(idx) - - // If the index is a partial index, project a new column that allows the - // UpsertDistinctOn to only de-duplicate insert rows that satisfy the - // partial index predicate. See projectPartialArbiterDistinctColumn for more - // details. - var partialIndexDistinctCol *scopeColumn - if _, isPartial := index.Predicate(); isPartial { - pred := mb.parsePartialIndexPredicateExpr(idx) - partialIndexDistinctCol = mb.projectPartialArbiterDistinctColumn( - insertColScope, pred, string(index.Name()), + // Create an UpsertDistinctOn for each arbiter. This must happen after all + // conflicting rows are removed with the anti-joins created above, to avoid + // removing valid rows (see #59125). + mb.arbiters.ForEach(func(name string, columnOrdinals util.FastIntSet, pred tree.Expr) { + // If the arbiter has a partial predicate, project a new column that + // allows the UpsertDistinctOn to only de-duplicate insert rows that + // satisfy the predicate. See projectPartialArbiterDistinctColumn for + // more details. + var partialDistinctCol *scopeColumn + if pred != nil { + partialDistinctCol = mb.projectPartialArbiterDistinctColumn( + insertColScope, pred, name, ) } - mb.buildDistinctOnForDoNothingArbiter( - insertColScope, - getIndexLaxKeyOrdinals(index), - partialIndexDistinctCol, - ) - }) - arbiterConstraints.ForEach(func(uc int) { - uniqueConstraint := mb.tab.Unique(uc) - - // If the constraint is partial, project a new column that allows the - // UpsertDistinctOn to only de-duplicate insert rows that satisfy the - // partial predicate. See projectPartialArbiterDistinctColumn for more - // details. - var partialIndexDistinctCol *scopeColumn - if _, isPartial := uniqueConstraint.Predicate(); isPartial { - pred := mb.parseUniqueConstraintPredicateExpr(uc) - partialIndexDistinctCol = mb.projectPartialArbiterDistinctColumn( - insertColScope, pred, uniqueConstraint.Name(), - ) - } - mb.buildDistinctOnForDoNothingArbiter( - insertColScope, - getUniqueConstraintOrdinals(mb.tab, uniqueConstraint), - partialIndexDistinctCol, - ) + mb.buildDistinctOnForDoNothingArbiter(insertColScope, columnOrdinals, partialDistinctCol) }) mb.targetColList = make(opt.ColList, 0, mb.tab.ColumnCount()) @@ -774,14 +722,16 @@ func (mb *mutationBuilder) buildInputForUpsert( ) { // Determine the set of arbiter indexes and constraints to use to check for // conflicts. - arbiterIndexes, arbiterConstraints := mb.arbiterIndexesAndConstraints(conflictOrds, arbiterPredicate) - mb.arbiterIndexes = arbiterIndexes.Ordered() - mb.arbiterConstraints = arbiterConstraints.Ordered() + mb.arbiters = mb.findArbiters(conflictOrds, arbiterPredicate) + // TODO(mgartner): Use arbiters.ForEach to iterate over arbiters rather than + // directly accessing the index and constraint sets. This will require + // support for partial unique constraints in this function. + arbiterIndexes := mb.arbiters.indexes + arbiterConstraints := mb.arbiters.uniqueConstraints // TODO(mgartner): Add support for multiple arbiter indexes or constraints, // similar to buildInputForDoNothing. - if arbiterIndexes.Len() > 1 || arbiterConstraints.Len() > 1 || - (!arbiterIndexes.Empty() && !arbiterConstraints.Empty()) { + if mb.arbiters.Len() > 1 { panic(unimplemented.NewWithIssue(53170, "there are multiple unique or exclusion constraints matching the ON CONFLICT specification")) } diff --git a/pkg/sql/opt/optbuilder/mutation_builder.go b/pkg/sql/opt/optbuilder/mutation_builder.go index 7e4aec570c3a..f4bc19a7c608 100644 --- a/pkg/sql/opt/optbuilder/mutation_builder.go +++ b/pkg/sql/opt/optbuilder/mutation_builder.go @@ -132,14 +132,9 @@ type mutationBuilder struct { // an insert; otherwise it's an update. canaryColID opt.ColumnID - // arbiterIndexes stores the ordinals of indexes that are used to detect - // conflicts for UPSERT and INSERT ON CONFLICT statements. - arbiterIndexes cat.IndexOrdinals - - // arbiterConstraints stores the ordinals of unique without index constraints - // that are used to detect conflicts for UPSERT and INSERT ON CONFLICT - // statements. - arbiterConstraints cat.UniqueOrdinals + // arbiters is the set of indexes and unique constraints that are used to + // detect conflicts for UPSERT and INSERT ON CONFLICT statements. + arbiters arbiterSet // roundedDecimalCols is the set of columns that have already been rounded. // Keeping this set avoids rounding the same column multiple times. @@ -1011,8 +1006,8 @@ func (mb *mutationBuilder) makeMutationPrivate(needResults bool) *memo.MutationP FetchCols: checkEmptyList(mb.fetchColIDs), UpdateCols: checkEmptyList(mb.updateColIDs), CanaryCol: mb.canaryColID, - ArbiterIndexes: mb.arbiterIndexes, - ArbiterConstraints: mb.arbiterConstraints, + ArbiterIndexes: mb.arbiters.IndexOrdinals(), + ArbiterConstraints: mb.arbiters.UniqueConstraintOrdinals(), CheckCols: checkEmptyList(mb.checkColIDs), PartialIndexPutCols: checkEmptyList(mb.partialIndexPutColIDs), PartialIndexDelCols: checkEmptyList(mb.partialIndexDelColIDs), diff --git a/pkg/sql/opt/optbuilder/mutation_builder_arbiter.go b/pkg/sql/opt/optbuilder/mutation_builder_arbiter.go index 05557ce87f97..97c09ff51f25 100644 --- a/pkg/sql/opt/optbuilder/mutation_builder_arbiter.go +++ b/pkg/sql/opt/optbuilder/mutation_builder_arbiter.go @@ -25,10 +25,9 @@ import ( "github.com/cockroachdb/errors" ) -// arbiterIndexesAndConstraints returns sets of index ordinals and unique -// constraint ordinals to be used as arbiter constraints for an INSERT ON -// CONFLICT statement. This function panics if no arbiter indexes or constraints -// are found. +// findArbiters returns a set of arbiters for an INSERT ON CONFLICT statement. +// Both unique indexes and unique constraints can be arbiters. This function +// panics if no arbiters are found. // // Arbiter constraints ensure that the columns designated by conflictOrds // reference at most one target row of a UNIQUE index or constraint. Using ANTI @@ -69,23 +68,25 @@ import ( // found. // 3. Otherwise, returns all partial arbiter indexes and constraints. // -func (mb *mutationBuilder) arbiterIndexesAndConstraints( +func (mb *mutationBuilder) findArbiters( conflictOrds util.FastIntSet, arbiterPredicate tree.Expr, -) (indexes util.FastIntSet, uniqueConstraints util.FastIntSet) { +) arbiterSet { + arbiters := makeArbiterSet(mb) + // If conflictOrds is empty, then all unique indexes and unique without // index constraints are arbiters. if conflictOrds.Empty() { for idx, idxCount := 0, mb.tab.IndexCount(); idx < idxCount; idx++ { if mb.tab.Index(idx).IsUnique() { - indexes.Add(idx) + arbiters.AddIndex(idx) } } for uc, ucCount := 0, mb.tab.UniqueCount(); uc < ucCount; uc++ { if mb.tab.Unique(uc).WithoutIndex() { - uniqueConstraints.Add(uc) + arbiters.AddUniqueConstraint(uc) } } - return indexes, uniqueConstraints + return arbiters } h := &mb.arbiterPredicateHelper @@ -112,7 +113,7 @@ func (mb *mutationBuilder) arbiterIndexesAndConstraints( // Furthermore, it is the only arbiter needed because it guarantees // uniqueness of its columns across all rows. if _, isPartial := index.Predicate(); !isPartial { - return util.MakeFastIntSet(idx), util.FastIntSet{} + return makeSingleIndexArbiterSet(mb, idx) } // If the index is a pseudo-partial index, it can always be an arbiter. @@ -120,13 +121,13 @@ func (mb *mutationBuilder) arbiterIndexesAndConstraints( // uniqueness of its columns across all rows. pred := h.partialIndexPredicate(idx) if pred.IsTrue() { - return util.MakeFastIntSet(idx), util.FastIntSet{} + return makeSingleIndexArbiterSet(mb, idx) } // If the index is a partial index, then it can only be an arbiter if // the arbiterPredicate implies it. if h.predicateIsImpliedByArbiterPredicate(pred) { - indexes.Add(idx) + arbiters.AddIndex(idx) } } @@ -158,7 +159,7 @@ func (mb *mutationBuilder) arbiterIndexesAndConstraints( // If the unique constraint is not partial, it should be returned // without any partial index arbiters. if _, isPartial := uniqueConstraint.Predicate(); !isPartial { - return util.FastIntSet{}, util.MakeFastIntSet(uc) + return makeSingleUniqueConstraintArbiterSet(mb, uc) } // If the constraint is a pseudo-partial unique constraint, it can @@ -166,24 +167,24 @@ func (mb *mutationBuilder) arbiterIndexesAndConstraints( // arbiters. pred := h.partialUniqueConstraintPredicate(uc) if pred.IsTrue() { - return util.FastIntSet{}, util.MakeFastIntSet(uc) + return makeSingleUniqueConstraintArbiterSet(mb, uc) } // If the unique constraint is partial, then it can only be an arbiter // if the arbiterPredicate implies it. if h.predicateIsImpliedByArbiterPredicate(pred) { - uniqueConstraints.Add(uc) + arbiters.AddUniqueConstraint(uc) } } - // Err if we did not previously return and did not find partial indexes or - // partial unique constraints. - if indexes.Empty() && uniqueConstraints.Empty() { + // Err if we did not previously return and did not find any partial + // arbiters. + if arbiters.Empty() { panic(pgerror.Newf(pgcode.InvalidColumnReference, "there is no unique or exclusion constraint matching the ON CONFLICT specification")) } - return indexes, uniqueConstraints + return arbiters } // buildAntiJoinForDoNothingArbiter builds an anti-join for a single arbiter index From 6f1748101f8b1f153a9d0d3a6ac81600f63d67f3 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 19 Feb 2021 13:33:48 -0800 Subject: [PATCH 2/2] opt: move left-join building logic to mutation_builder_arbiter.go This commit move the logic for building `UPSERT` left-joins to `mutation_builder_arbiter.go`, making it consistent with the structure of `INSERT ON CONFLICT DO NOTHING` logic. Some duplicate code for building DistinctOn expressions was also removed - both `UPSERT` and `INSERT ON CONFLICT DO NOTHING` build essentially the same DistinctOn expressions. Some of the code for dealing with `UPSERT` arbiter indexes and arbiter constraints is still repetitive. This will be cleaned up in a follow-up commit that adds `UPSERT` support for partial unique constraint arbiters. Release note: None --- pkg/sql/opt/optbuilder/arbiter_set.go | 12 +- pkg/sql/opt/optbuilder/distinct.go | 4 +- pkg/sql/opt/optbuilder/insert.go | 230 ++++++------------ .../optbuilder/mutation_builder_arbiter.go | 122 +++++++++- 4 files changed, 191 insertions(+), 177 deletions(-) diff --git a/pkg/sql/opt/optbuilder/arbiter_set.go b/pkg/sql/opt/optbuilder/arbiter_set.go index 7bea4baff966..92f315798e02 100644 --- a/pkg/sql/opt/optbuilder/arbiter_set.go +++ b/pkg/sql/opt/optbuilder/arbiter_set.go @@ -90,39 +90,39 @@ func (a *arbiterSet) UniqueConstraintOrdinals() []int { // with the following arguments: // // - name is the name of the index or unique constraint. -// - columnOrdinals is the set of table column ordinals of the arbiter. For +// - conflictOrds is the set of table column ordinals of the arbiter. For // index arbiters, this is the lax key columns. For constraint arbiters, // this is all the columns in the constraint. // - pred is the partial predicate expression of the arbiter, if the arbiter // is a partial index or partial constraint. If the arbiter is not partial, // pred is nil. // -func (a *arbiterSet) ForEach(f func(name string, columnOrdinals util.FastIntSet, pred tree.Expr)) { +func (a *arbiterSet) ForEach(f func(name string, conflictOrds util.FastIntSet, pred tree.Expr)) { // Call the callback for each index arbiter. a.indexes.ForEach(func(i int) { index := a.mb.tab.Index(i) - colOrds := getIndexLaxKeyOrdinals(index) + conflictOrds := getIndexLaxKeyOrdinals(index) var pred tree.Expr if _, isPartial := index.Predicate(); isPartial { pred = a.mb.parsePartialIndexPredicateExpr(i) } - f(string(index.Name()), colOrds, pred) + f(string(index.Name()), conflictOrds, pred) }) // Call the callback for each unique constraint arbiter. a.uniqueConstraints.ForEach(func(i int) { uniqueConstraint := a.mb.tab.Unique(i) - colOrds := getUniqueConstraintOrdinals(a.mb.tab, uniqueConstraint) + conflictOrds := getUniqueConstraintOrdinals(a.mb.tab, uniqueConstraint) var pred tree.Expr if _, isPartial := uniqueConstraint.Predicate(); isPartial { pred = a.mb.parseUniqueConstraintPredicateExpr(i) } - f(uniqueConstraint.Name(), colOrds, pred) + f(uniqueConstraint.Name(), conflictOrds, pred) }) } diff --git a/pkg/sql/opt/optbuilder/distinct.go b/pkg/sql/opt/optbuilder/distinct.go index 0fc17a44b4b5..58a11ff64a98 100644 --- a/pkg/sql/opt/optbuilder/distinct.go +++ b/pkg/sql/opt/optbuilder/distinct.go @@ -55,8 +55,8 @@ func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr { // operator rather than the DistinctOn operator (see the UpsertDistinctOn // operator comment for details on the differences). The errorOnDup parameter // controls whether multiple rows in the same distinct group trigger an error. -// This can only take on a value in the EnsureDistinctOn and -// EnsureUpsertDistinctOn cases. +// If empty, no error is triggered. This can only take on a value in the +// EnsureDistinctOn and EnsureUpsertDistinctOn cases. func (b *Builder) buildDistinctOn( distinctOnCols opt.ColSet, inScope *scope, nullsAreDistinct bool, errorOnDup string, ) (outScope *scope) { diff --git a/pkg/sql/opt/optbuilder/insert.go b/pkg/sql/opt/optbuilder/insert.go index d0f3694dc4ba..ffdcff2be12b 100644 --- a/pkg/sql/opt/optbuilder/insert.go +++ b/pkg/sql/opt/optbuilder/insert.go @@ -685,14 +685,14 @@ func (mb *mutationBuilder) buildInputForDoNothing( mb.outScope.ordering = nil // Create an anti-join for each arbiter. - mb.arbiters.ForEach(func(name string, columnOrdinals util.FastIntSet, pred tree.Expr) { - mb.buildAntiJoinForDoNothingArbiter(inScope, columnOrdinals, pred) + mb.arbiters.ForEach(func(name string, conflictOrds util.FastIntSet, pred tree.Expr) { + mb.buildAntiJoinForDoNothingArbiter(inScope, conflictOrds, pred) }) // Create an UpsertDistinctOn for each arbiter. This must happen after all // conflicting rows are removed with the anti-joins created above, to avoid // removing valid rows (see #59125). - mb.arbiters.ForEach(func(name string, columnOrdinals util.FastIntSet, pred tree.Expr) { + mb.arbiters.ForEach(func(name string, conflictOrds util.FastIntSet, pred tree.Expr) { // If the arbiter has a partial predicate, project a new column that // allows the UpsertDistinctOn to only de-duplicate insert rows that // satisfy the predicate. See projectPartialArbiterDistinctColumn for @@ -704,7 +704,7 @@ func (mb *mutationBuilder) buildInputForDoNothing( ) } - mb.buildDistinctOnForDoNothingArbiter(insertColScope, columnOrdinals, partialDistinctCol) + mb.buildDistinctOnForArbiter(insertColScope, conflictOrds, partialDistinctCol, "" /* errorOnDup */) }) mb.targetColList = make(opt.ColList, 0, mb.tab.ColumnCount()) @@ -742,20 +742,28 @@ func (mb *mutationBuilder) buildInputForUpsert( // Ignore any ordering requested by the input. mb.outScope.ordering = nil - // buildInputForArbiter builds the input for a single arbiter index or - // constraint. - // - colCount is the number of columns in the constraint or lax key columns - // in the index. - // - colOrdinal is a function that returns the position of the ith constraint - // or index column in its table. - // - predExpr is the partial index predicate. If the arbiter is not a partial - // index, predExpr is nil. - // - partialIndexDistinctCol is a column that allows the UpsertDistinctOn to - // only de-duplicate insert rows that satisfy the partial index predicate. - // If the arbiter is not a partial index, partialIndexDistinctCol is nil. - buildInputForArbiter := func( - getCanaryCol func() *scopeColumn, predExpr tree.Expr, partialIndexDistinctCol *scopeColumn, - ) { + var canaryCol *scopeColumn + if arbiterIndexes.Len() > 0 { + idx, _ := arbiterIndexes.Next(0) + index := mb.tab.Index(idx) + + _, isPartial := index.Predicate() + var pred tree.Expr + if isPartial { + pred = mb.parsePartialIndexPredicateExpr(idx) + } + + // If the index is a partial index, project a new column that allows the + // UpsertDistinctOn to only de-duplicate insert rows that satisfy the + // partial index predicate. See projectPartialArbiterDistinctColumn for + // more details. + var partialIndexDistinctCol *scopeColumn + if isPartial { + partialIndexDistinctCol = mb.projectPartialArbiterDistinctColumn( + insertColScope, pred, string(index.Name()), + ) + } + // Ensure that input is distinct on the conflict columns. Otherwise, the // Upsert could affect the same row more than once, which can lead to index // corruption. See issue #44466 for more context. @@ -764,24 +772,7 @@ func (mb *mutationBuilder) buildInputForUpsert( // EnsureUpsertDistinctOn operator does not allow multiple rows in distinct // groupings, the internal ordering is meaningless (and can trigger a // misleading error in buildDistinctOn if present). - var conflictCols opt.ColSet - for ord, ok := conflictOrds.Next(0); ok; ord, ok = conflictOrds.Next(ord + 1) { - conflictCols.Add(mb.insertColIDs[ord]) - } - if partialIndexDistinctCol != nil { - conflictCols.Add(partialIndexDistinctCol.id) - } - - mb.outScope = mb.b.buildDistinctOn( - conflictCols, mb.outScope, true /* nullsAreDistinct */, duplicateUpsertErrText) - - // Remove the partialIndexDistinctCol from the output. - if partialIndexDistinctCol != nil { - projectionScope := mb.outScope.replace() - projectionScope.appendColumnsFromScope(insertColScope) - mb.b.constructProjectForScope(mb.outScope, projectionScope) - mb.outScope = projectionScope - } + mb.buildDistinctOnForArbiter(insertColScope, conflictOrds, partialIndexDistinctCol, duplicateUpsertErrText) // Re-alias all INSERT columns so that they are accessible as if they were // part of a special data source named "crdb_internal.excluded". @@ -789,140 +780,65 @@ func (mb *mutationBuilder) buildInputForUpsert( mb.outScope.cols[i].table = excludedTableName } - // Build the right side of the left outer join. Use a different instance of - // table metadata so that col IDs do not overlap. - // - // NOTE: Include mutation columns, but be careful to never use them for any - // reason other than as "fetch columns". See buildScan comment. - // TODO(andyk): Why does execution engine need mutation columns for Insert? - mb.fetchScope = mb.b.buildScan( - mb.b.addTable(mb.tab, &mb.alias), - tableOrdinals(mb.tab, columnKinds{ - includeMutations: true, - includeSystem: true, - includeVirtualInverted: false, - includeVirtualComputed: true, - }), - nil, /* indexFlags */ - noRowLocking, - inScope, + // Create a left-join for the arbiter. + mb.buildLeftJoinForUpsertArbiter( + inScope, conflictOrds, pred, ) - // If the index is a unique partial index, then rows that are not in the - // partial index cannot conflict with insert rows. Therefore, a Select wraps - // the scan on the right side of the left outer join with the partial index - // predicate expression as the filter. - if predExpr != nil { - texpr := mb.fetchScope.resolveAndRequireType(predExpr, types.Bool) - predScalar := mb.b.buildScalar(texpr, mb.fetchScope, nil, nil, nil) - mb.fetchScope.expr = mb.b.factory.ConstructSelect( - mb.fetchScope.expr, - memo.FiltersExpr{mb.b.factory.ConstructFiltersItem(predScalar)}, - ) - } - - // Record a not-null "canary" column. After the left-join, this will be null - // if no conflict has been detected, or not null otherwise. At least one not- - // null column must exist, since primary key columns are not-null. - canaryScopeCol := getCanaryCol() - mb.canaryColID = canaryScopeCol.id - - // Set fetchColIDs to reference the columns created for the fetch values. - mb.setFetchColIDs(mb.fetchScope.cols) - - // Add the fetch columns to the current scope. It's OK to modify the current - // scope because it contains only INSERT columns that were added by the - // mutationBuilder, and which aren't needed for any other purpose. - mb.outScope.appendColumnsFromScope(mb.fetchScope) - - // Build the join condition by creating a conjunction of equality conditions - // that test each conflict column: - // - // ON ins.x = scan.a AND ins.y = scan.b + // Record a not-null "canary" column. After the left-join, this will be + // null if no conflict has been detected, or not null otherwise. At + // least one not-null column must exist, since primary key columns are + // not-null. + canaryCol = &mb.fetchScope.cols[findNotNullIndexCol(index)] + mb.canaryColID = canaryCol.id + } else if arbiterConstraints.Len() > 0 { + // Ensure that input is distinct on the conflict columns. Otherwise, the + // Upsert could affect the same row more than once, which can lead to index + // corruption. See issue #44466 for more context. // - var on memo.FiltersExpr - for i := range mb.fetchScope.cols { - // Include fetch columns with ordinal positions in conflictOrds. - if conflictOrds.Contains(i) { - condition := mb.b.factory.ConstructEq( - mb.b.factory.ConstructVariable(mb.insertColIDs[i]), - mb.b.factory.ConstructVariable(mb.fetchScope.cols[i].id), - ) - on = append(on, mb.b.factory.ConstructFiltersItem(condition)) - } - } + // Ignore any ordering requested by the input. Since the + // EnsureUpsertDistinctOn operator does not allow multiple rows in distinct + // groupings, the internal ordering is meaningless (and can trigger a + // misleading error in buildDistinctOn if present). + mb.buildDistinctOnForArbiter(insertColScope, conflictOrds, nil /* partialIndexDistinctCol */, duplicateUpsertErrText) - // If the index is a unique partial index, then insert rows that do not - // satisfy the partial index predicate cannot conflict with existing rows in - // the unique partial index. Therefore, the partial index predicate - // expression is added to the ON filters. - if predExpr != nil { - texpr := insertColScope.resolveAndRequireType(predExpr, types.Bool) - predScalar := mb.b.buildScalar(texpr, insertColScope, nil, nil, nil) - on = append(on, mb.b.factory.ConstructFiltersItem(predScalar)) + // Re-alias all INSERT columns so that they are accessible as if they were + // part of a special data source named "crdb_internal.excluded". + for i := range mb.outScope.cols { + mb.outScope.cols[i].table = excludedTableName } - // Construct the left join. - mb.outScope.expr = mb.b.factory.ConstructLeftJoin( - mb.outScope.expr, - mb.fetchScope.expr, - on, - memo.EmptyJoinPrivate, + // Create a left-join for the arbiter. + mb.buildLeftJoinForUpsertArbiter( + inScope, conflictOrds, nil, /* pred */ ) - // Add a filter from the WHERE clause if one exists. - if whereClause != nil { - where := &tree.Where{ - Type: whereClause.Type, - Expr: &tree.OrExpr{ - Left: &tree.ComparisonExpr{ - Operator: tree.IsNotDistinctFrom, - Left: canaryScopeCol, - Right: tree.DNull, - }, - Right: whereClause.Expr, - }, - } - mb.b.buildWhere(where, mb.outScope) - } - } - - if arbiterIndexes.Len() > 0 { - idx, _ := arbiterIndexes.Next(0) - index := mb.tab.Index(idx) - - _, isPartial := index.Predicate() - var pred tree.Expr - if isPartial { - pred = mb.parsePartialIndexPredicateExpr(idx) - } - - // If the index is a partial index, project a new column that allows the - // UpsertDistinctOn to only de-duplicate insert rows that satisfy the - // partial index predicate. See projectPartialArbiterDistinctColumn for - // more details. - var partialIndexDistinctCol *scopeColumn - if isPartial { - partialIndexDistinctCol = mb.projectPartialArbiterDistinctColumn( - insertColScope, pred, string(index.Name()), - ) - } - - buildInputForArbiter(func() *scopeColumn { - return &mb.fetchScope.cols[findNotNullIndexCol(index)] - }, pred, partialIndexDistinctCol) - } else if arbiterConstraints.Len() > 0 { - buildInputForArbiter(func() *scopeColumn { - // Use the primary index, since we don't know at this point whether - // another index will be used for this plan. This should select one - // of the primary keys. - return &mb.fetchScope.cols[findNotNullIndexCol(mb.tab.Index(cat.PrimaryIndex))] - }, nil /* predExpr */, nil /* partialIndexDistinctCol */) + // Record a not-null "canary" column. Use the primary index, since we + // don't know at this point whether another index will be used for this + // plan. This should select one of the primary keys. + canaryCol = &mb.fetchScope.cols[findNotNullIndexCol(mb.tab.Index(cat.PrimaryIndex))] + mb.canaryColID = canaryCol.id } else { // This should never happen. panic(errors.AssertionFailedf("no arbiter index or constraint available")) } + // Add a filter from the WHERE clause if one exists. + if whereClause != nil { + where := &tree.Where{ + Type: whereClause.Type, + Expr: &tree.OrExpr{ + Left: &tree.ComparisonExpr{ + Operator: tree.IsNotDistinctFrom, + Left: canaryCol, + Right: tree.DNull, + }, + Right: whereClause.Expr, + }, + } + mb.b.buildWhere(where, mb.outScope) + } + mb.targetColList = make(opt.ColList, 0, mb.tab.ColumnCount()) mb.targetColSet = opt.ColSet{} } diff --git a/pkg/sql/opt/optbuilder/mutation_builder_arbiter.go b/pkg/sql/opt/optbuilder/mutation_builder_arbiter.go index 97c09ff51f25..692238e8606a 100644 --- a/pkg/sql/opt/optbuilder/mutation_builder_arbiter.go +++ b/pkg/sql/opt/optbuilder/mutation_builder_arbiter.go @@ -187,18 +187,18 @@ func (mb *mutationBuilder) findArbiters( return arbiters } -// buildAntiJoinForDoNothingArbiter builds an anti-join for a single arbiter index -// or constraint for an INSERT ON CONFLICT DO NOTHING mutation. The anti-join -// wraps the current mb.outScope.expr and removes rows that would conflict with -// existing rows. +// buildAntiJoinForDoNothingArbiter builds an anti-join for a single arbiter +// index or constraint for an INSERT ON CONFLICT DO NOTHING mutation. The +// anti-join wraps the current mb.outScope.expr (which produces the insert rows) +// and removes rows that would conflict with existing rows. // -// - columnOrds is the set of table column ordinals that the arbiter +// - conflictOrds is the set of table column ordinals that the arbiter // guarantees uniqueness of. // - pred is the partial index or constraint predicate. If the arbiter is // not a partial index or constraint, pred is nil. // func (mb *mutationBuilder) buildAntiJoinForDoNothingArbiter( - inScope *scope, columnOrds util.FastIntSet, pred tree.Expr, + inScope *scope, conflictOrds util.FastIntSet, pred tree.Expr, ) { // Build the right side of the anti-join. Use a new metadata instance // of the mutation table so that a different set of column IDs are used for @@ -235,7 +235,7 @@ func (mb *mutationBuilder) buildAntiJoinForDoNothingArbiter( // ON ins.x = scan.a AND ins.y = scan.b // var on memo.FiltersExpr - for i, ok := columnOrds.Next(0); ok; i, ok = columnOrds.Next(i + 1) { + for i, ok := conflictOrds.Next(0); ok; i, ok = conflictOrds.Next(i + 1) { fetchCol := fetchScope.getColumnForTableOrdinal(i) if fetchCol == nil { panic(errors.AssertionFailedf("missing column in fetchScope")) @@ -267,24 +267,121 @@ func (mb *mutationBuilder) buildAntiJoinForDoNothingArbiter( ) } +// buildLeftJoinForUpsertArbiter builds a left-join for a single arbiter index +// or constraint for an UPSERT or INSERT ON CONFLICT DO UPDATE mutation. It +// left-joins each insert row to the target table, using the given conflict +// columns as the join condition. +// +// - conflictOrds is the set of table column ordinals that the arbiter +// guarantees uniqueness of. +// - pred is the partial index predicate. If the arbiter is not a partial +// index, pred is nil. +// - partialIndexDistinctCol is a column that allows the UpsertDistinctOn to +// only de-duplicate insert rows that satisfy the partial index predicate. +// If the arbiter is not a partial index, partialIndexDistinctCol is nil. +// +func (mb *mutationBuilder) buildLeftJoinForUpsertArbiter( + inScope *scope, conflictOrds util.FastIntSet, pred tree.Expr, +) { + // Build the right side of the left outer join. Use a different instance of + // table metadata so that col IDs do not overlap. + // + // NOTE: Include mutation columns, but be careful to never use them for any + // reason other than as "fetch columns". See buildScan comment. + // TODO(andyk): Why does execution engine need mutation columns for Insert? + mb.fetchScope = mb.b.buildScan( + mb.b.addTable(mb.tab, &mb.alias), + tableOrdinals(mb.tab, columnKinds{ + includeMutations: true, + includeSystem: true, + includeVirtualInverted: false, + includeVirtualComputed: true, + }), + nil, /* indexFlags */ + noRowLocking, + inScope, + ) + // Set fetchColIDs to reference the columns created for the fetch values. + mb.setFetchColIDs(mb.fetchScope.cols) + + // If the index is a unique partial index, then rows that are not in the + // partial index cannot conflict with insert rows. Therefore, a Select wraps + // the scan on the right side of the left outer join with the partial index + // predicate expression as the filter. + if pred != nil { + texpr := mb.fetchScope.resolveAndRequireType(pred, types.Bool) + predScalar := mb.b.buildScalar(texpr, mb.fetchScope, nil, nil, nil) + mb.fetchScope.expr = mb.b.factory.ConstructSelect( + mb.fetchScope.expr, + memo.FiltersExpr{mb.b.factory.ConstructFiltersItem(predScalar)}, + ) + } + + // Build the join condition by creating a conjunction of equality conditions + // that test each conflict column: + // + // ON ins.x = scan.a AND ins.y = scan.b + // + var on memo.FiltersExpr + for i := range mb.fetchScope.cols { + // Include fetch columns with ordinal positions in conflictOrds. + if conflictOrds.Contains(i) { + condition := mb.b.factory.ConstructEq( + mb.b.factory.ConstructVariable(mb.insertColIDs[i]), + mb.b.factory.ConstructVariable(mb.fetchScope.cols[i].id), + ) + on = append(on, mb.b.factory.ConstructFiltersItem(condition)) + } + } + + // If the index is a unique partial index, then insert rows that do not + // satisfy the partial index predicate cannot conflict with existing rows in + // the unique partial index. Therefore, the partial index predicate + // expression is added to the ON filters. + if pred != nil { + texpr := mb.outScope.resolveAndRequireType(pred, types.Bool) + predScalar := mb.b.buildScalar(texpr, mb.outScope, nil, nil, nil) + on = append(on, mb.b.factory.ConstructFiltersItem(predScalar)) + } + + // Add the fetch columns to the current scope. It's OK to modify the current + // scope because it contains only INSERT columns that were added by the + // mutationBuilder, and which are no longer needed for any other purpose. + mb.outScope.appendColumnsFromScope(mb.fetchScope) + + // Construct the left join. + mb.outScope.expr = mb.b.factory.ConstructLeftJoin( + mb.outScope.expr, + mb.fetchScope.expr, + on, + memo.EmptyJoinPrivate, + ) +} + // buildDistinctOnForDoNothingArbiter adds an UpsertDistinctOn operator for a // single arbiter index or constraint. // -// - columnOrds is the set of table column ordinals that the arbiter +// - insertColScope contains the columns of the insert rows. +// - conflictOrds is the set of table column ordinals that the arbiter // guarantees uniqueness of. // - partialArbiterDistinctCol is a column that allows the UpsertDistinctOn to // only de-duplicate insert rows that satisfy the partial index or // constraint predicate. If the arbiter is not a partial index or // constraint, partialArbiterDistinctCol is nil. +// - errorOnDup indicates whether multiple rows in the same distinct group +// should trigger an error. If empty, no error is triggered. // -func (mb *mutationBuilder) buildDistinctOnForDoNothingArbiter( - insertColScope *scope, colOrds util.FastIntSet, partialArbiterDistinctCol *scopeColumn, +func (mb *mutationBuilder) buildDistinctOnForArbiter( + insertColScope *scope, + conflictOrds util.FastIntSet, + partialArbiterDistinctCol *scopeColumn, + errorOnDup string, ) { // Add an UpsertDistinctOn operator to ensure there are no duplicate input // rows for this arbiter. Duplicate rows can trigger conflict errors at // runtime, which DO NOTHING is not supposed to do. See issue #37880. var conflictCols opt.ColSet - for i, ok := colOrds.Next(0); ok; i, ok = colOrds.Next(i + 1) { + for i, ok := conflictOrds.Next(0); ok; i, ok = conflictOrds.Next(i + 1) { conflictCols.Add(mb.insertColIDs[i]) } if partialArbiterDistinctCol != nil { @@ -294,7 +391,8 @@ func (mb *mutationBuilder) buildDistinctOnForDoNothingArbiter( // Treat NULL values as distinct from one another. And if duplicates are // detected, remove them rather than raising an error. mb.outScope = mb.b.buildDistinctOn( - conflictCols, mb.outScope, true /* nullsAreDistinct */, "" /* errorOnDup */) + conflictCols, mb.outScope, true /* nullsAreDistinct */, errorOnDup, + ) // Remove the partialArbiterDistinctCol from the output. if partialArbiterDistinctCol != nil {