-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sql/backfill: fix bug adding new columns to new index with volatile d…
…efault Release note (bug fix): In 21.1 a bug was introduced whereby default values were recomputed when populating data in new secondary indexes for columns which were added in the same transaction as the index. This arises, for example in cases like `ALTER TABLE t ADD COLUMN f FLOAT8 UNIQUE DEFAULT (random())`. If the default expression is not volatile, then the recomputation is harmless. If, however, the default expression is volatile, the data in the secondary index will not match the data in the primary index: a corrupt index will have been created. This bug has now been fixed.
- Loading branch information
Showing
7 changed files
with
637 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
// Copyright 2022 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package backfill | ||
|
||
import ( | ||
"github.com/cockroachdb/cockroach/pkg/sql/catalog" | ||
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" | ||
"github.com/cockroachdb/cockroach/pkg/util" | ||
"github.com/cockroachdb/errors" | ||
) | ||
|
||
// indexBackfillerCols holds the interned set of columns used by the | ||
// indexBackfiller and the information about which columns will need to | ||
// be evaluated during the backfill. | ||
type indexBackfillerCols struct { | ||
// colIdxMap maps ColumnIDs to indices into desc.Columns and desc.Mutations. | ||
colIdxMap catalog.TableColMap | ||
|
||
// cols are all writable (PUBLIC and DELETE_AND_WRITE_ONLY) columns in | ||
// the descriptor. | ||
cols []catalog.Column | ||
|
||
// addedCols are the columns in DELETE_AND_WRITE_ONLY being added as part of | ||
// this index which are not computed. The definition of being added is that | ||
// the index being backfilled is a new primary index and the columns do not | ||
// exist in the currently public primary index. Note that this should never | ||
// be populated when running using the legacy schema changer; the legacy | ||
// schema changer rewrites primary indexes in-place using the column | ||
// backfiller before creating any secondary indexes. | ||
addedCols []catalog.Column | ||
|
||
// computedCols are the columns in this index which are computed and do | ||
// not have concrete values in the source index. This is virtual computed | ||
// columns and stored computed columns which are non-public and not part | ||
// of the currently public primary index. | ||
computedCols []catalog.Column | ||
|
||
// valNeededForCol contains the indexes (into cols) of all columns that we | ||
// need to fetch values for. | ||
valNeededForCol util.FastIntSet | ||
} | ||
|
||
// makeIndexBackfillColumns computes the set of writable columns and | ||
// classifies the two subsets of that set which will need to be computed | ||
// during execution of the backfill. Note that all columns will be included | ||
// in the complete cols slice regardless of whether they appear in | ||
// addedIndexes. Appearance in addedIndexes will affect valNeededForColumn. | ||
// Note that the valNeededForCol set will not be populated with | ||
// requirements due to references in expressions. That needs to be added after | ||
// constructing this information. | ||
func makeIndexBackfillColumns( | ||
deletableColumns []catalog.Column, sourcePrimaryIndex catalog.Index, addedIndexes []catalog.Index, | ||
) (indexBackfillerCols, error) { | ||
|
||
// We will need to evaluate default or computed expressions for | ||
// physical columns being added only if this is a primary index backfill | ||
// and only if the added columns are not in the existing public primary | ||
// index. We're going to assume that if there is a new column being added | ||
// to a new primary index that it doesn't appear in any secondary index | ||
// being simultaneously backfilled, because if that column were needed, | ||
// and we're adding it now, we'd have a problem. | ||
|
||
var ib indexBackfillerCols | ||
ib.cols = make([]catalog.Column, 0, len(deletableColumns)) | ||
|
||
var computedVirtual catalog.TableColSet | ||
for _, column := range deletableColumns { | ||
if !column.Public() && | ||
!(column.Adding() && column.WriteAndDeleteOnly()) { | ||
continue | ||
} | ||
if column.IsComputed() && column.IsVirtual() { | ||
computedVirtual.Add(column.GetID()) | ||
ib.computedCols = append(ib.computedCols, column) | ||
} | ||
// Create a map of each column's ID to its ordinal. | ||
ib.colIdxMap.Set(column.GetID(), len(ib.cols)) | ||
ib.cols = append(ib.cols, column) | ||
} | ||
|
||
// Find the adding columns which are being added to new primary indexes. | ||
primaryColumns := indexColumns(sourcePrimaryIndex) | ||
var addedDefaultOrComputed catalog.TableColSet | ||
for _, idx := range addedIndexes { | ||
if idx.GetEncodingType() != descpb.PrimaryIndexEncoding { | ||
if !indexColumns(idx).Difference(computedVirtual).SubsetOf(primaryColumns) { | ||
return indexBackfillerCols{}, errors.AssertionFailedf( | ||
"secondary index for backfill contains physical column not present in " + | ||
"source primary index", | ||
) | ||
} | ||
} | ||
addedDefaultOrComputed.UnionWith( | ||
indexColumns(idx).Difference(computedVirtual).Difference(primaryColumns), | ||
) | ||
} | ||
|
||
// Account for these new columns being initially backfilled into new primary | ||
// indexes. | ||
for _, colID := range addedDefaultOrComputed.Ordered() { | ||
ord, ok := ib.colIdxMap.Get(colID) | ||
if !ok { | ||
return indexBackfillerCols{}, errors.AssertionFailedf( | ||
"index being backfilled contains non-writable or dropping column") | ||
} | ||
col := ib.cols[ord] | ||
if col.IsComputed() { | ||
ib.computedCols = append(ib.computedCols, col) | ||
} else { | ||
ib.addedCols = append(ib.addedCols, col) | ||
} | ||
} | ||
|
||
ib.valNeededForCol = makeInitialValNeededForCol(ib, addedIndexes) | ||
return ib, nil | ||
} | ||
|
||
// makeInitialValNeededForCol computes the initial set of columns whose values | ||
// are known to be needed based solely on index membership. More may be needed | ||
// because of references in expressions. | ||
func makeInitialValNeededForCol( | ||
ib indexBackfillerCols, addedIndexes []catalog.Index, | ||
) (valNeededForCol util.FastIntSet) { | ||
// Any columns we're going to eval, we don't need values for ahead of time. | ||
toEval := func() catalog.TableColSet { | ||
columnIDs := func(columns []catalog.Column) (s catalog.TableColSet) { | ||
for _, c := range columns { | ||
s.Add(c.GetID()) | ||
} | ||
return s | ||
} | ||
|
||
// The set of columns we'll evaluate are the addedColumns and the computed | ||
// columns. We don't need values for these columns. | ||
toEval := columnIDs(ib.addedCols) | ||
toEval.UnionWith(columnIDs(ib.computedCols)) | ||
return toEval | ||
}() | ||
|
||
for _, idx := range addedIndexes { | ||
colIDs := idx.CollectKeyColumnIDs() | ||
if idx.GetEncodingType() == descpb.PrimaryIndexEncoding { | ||
for _, col := range ib.cols { | ||
if !col.IsVirtual() { | ||
colIDs.Add(col.GetID()) | ||
} | ||
} | ||
} else { | ||
colIDs.UnionWith(idx.CollectSecondaryStoredColumnIDs()) | ||
colIDs.UnionWith(idx.CollectKeySuffixColumnIDs()) | ||
} | ||
|
||
for i := range ib.cols { | ||
if id := ib.cols[i].GetID(); colIDs.Contains(id) && !toEval.Contains(id) { | ||
valNeededForCol.Add(i) | ||
} | ||
} | ||
} | ||
return valNeededForCol | ||
} | ||
|
||
// indexColumns computes the complete set of column stored in an index. | ||
func indexColumns(idx catalog.Index) (s catalog.TableColSet) { | ||
s.UnionWith(idx.CollectKeyColumnIDs()) | ||
s.UnionWith(idx.CollectKeySuffixColumnIDs()) | ||
s.UnionWith(idx.CollectPrimaryStoredColumnIDs()) | ||
s.UnionWith(idx.CollectSecondaryStoredColumnIDs()) | ||
return s | ||
} |
Oops, something went wrong.