diff --git a/pkg/sql/colencoding/key_encoding.go b/pkg/sql/colencoding/key_encoding.go index dfc47995a473..c765865b446b 100644 --- a/pkg/sql/colencoding/key_encoding.go +++ b/pkg/sql/colencoding/key_encoding.go @@ -34,6 +34,12 @@ import ( // the key is from a different table, and the returned remainingKey indicates a // "seek prefix": the next key that might be part of the table being searched // for. See the analog in sqlbase/index_encoding.go. +// +// Sometimes it is necessary to determine if the value of a column is NULL even +// though the value itself is not needed. If checkAllColsForNull is true, then +// foundNull=true will be returned if any columns in the key are NULL, +// regardless of whether or not indexColIdx indicates that the column should be +// decoded. func DecodeIndexKeyToCols( da *rowenc.DatumAlloc, vecs []coldata.Vec, @@ -41,6 +47,7 @@ func DecodeIndexKeyToCols( desc catalog.TableDescriptor, index catalog.Index, indexColIdx []int, + checkAllColsForNull bool, types []*types.T, colDirs []descpb.IndexDescriptor_Direction, key roachpb.Key, @@ -78,7 +85,7 @@ func DecodeIndexKeyToCols( // it is a interleaving ancestor. var isNull bool key, isNull, err = DecodeKeyValsToCols( - da, vecs, idx, indexColIdx[:length], types[:length], + da, vecs, idx, indexColIdx[:length], checkAllColsForNull, types[:length], colDirs[:length], nil /* unseen */, key, invertedColIdx, ) if err != nil { @@ -117,7 +124,7 @@ func DecodeIndexKeyToCols( var isNull bool key, isNull, err = DecodeKeyValsToCols( - da, vecs, idx, indexColIdx, types, colDirs, nil /* unseen */, key, invertedColIdx, + da, vecs, idx, indexColIdx, checkAllColsForNull, types, colDirs, nil /* unseen */, key, invertedColIdx, ) if err != nil { return nil, false, false, err @@ -147,18 +154,24 @@ func DecodeIndexKeyToCols( // have been observed during decoding. // See the analog in sqlbase/index_encoding.go. // DecodeKeyValsToCols additionally returns whether a NULL was encountered when decoding. +// +// Sometimes it is necessary to determine if the value of a column is NULL even +// though the value itself is not needed. If checkAllColsForNull is true, then +// foundNull=true will be returned if any columns in the key are NULL, +// regardless of whether or not indexColIdx indicates that the column should be +// decoded. func DecodeKeyValsToCols( da *rowenc.DatumAlloc, vecs []coldata.Vec, idx int, indexColIdx []int, + checkAllColsForNull bool, types []*types.T, directions []descpb.IndexDescriptor_Direction, unseen *util.FastIntSet, key []byte, invertedColIdx int, -) ([]byte, bool, error) { - foundNull := false +) (remainingKey []byte, foundNull bool, _ error) { for j := range types { enc := descpb.IndexDescriptor_ASC if directions != nil { @@ -167,6 +180,10 @@ func DecodeKeyValsToCols( var err error i := indexColIdx[j] if i == -1 { + if checkAllColsForNull { + _, isNull := encoding.DecodeIfNull(key) + foundNull = foundNull || isNull + } // Don't need the coldata - skip it. key, err = rowenc.SkipTableKey(key) } else { diff --git a/pkg/sql/colfetcher/cfetcher.go b/pkg/sql/colfetcher/cfetcher.go index 683ec2550ad6..0371a01c17f5 100644 --- a/pkg/sql/colfetcher/cfetcher.go +++ b/pkg/sql/colfetcher/cfetcher.go @@ -893,6 +893,11 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) { if rf.traceKV { indexOrds = rf.table.allIndexColOrdinals } + // For unique secondary indexes on tables with multiple column + // families, we must check all columns for NULL values in order + // to determine whether a KV belongs to the same row as the + // previous KV or a different row. + checkAllColsForNull := rf.table.isSecondaryIndex && rf.table.index.IsUnique() && rf.table.desc.NumFamilies() != 1 key, matches, foundNull, err = colencoding.DecodeIndexKeyToCols( &rf.table.da, rf.machine.colvecs, @@ -900,6 +905,7 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) { rf.table.desc, rf.table.index, indexOrds, + checkAllColsForNull, rf.table.keyValTypes, rf.table.indexColumnDirs, rf.machine.nextKV.Key[rf.table.knownPrefixLength:], @@ -932,23 +938,31 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) { rf.machine.lastRowPrefix = rf.machine.nextKV.Key[:prefixLen] } - // For unique secondary indexes, the index-key does not distinguish one row - // from the next if both rows contain identical values along with a NULL. + // For unique secondary indexes on tables with multiple column + // families, the index-key does not distinguish one row from the + // next if both rows contain identical values along with a NULL. // Consider the keys: // // /test/unique_idx/NULL/0 // /test/unique_idx/NULL/1 // - // The index-key extracted from the above keys is /test/unique_idx/NULL. The - // trailing /0 and /1 are the primary key used to unique-ify the keys when a - // NULL is present. When a null is present in the index key, we cut off more - // of the index key so that the prefix includes the primary key columns. + // The index-key extracted from the above keys is + // /test/unique_idx/NULL. The trailing /0 and /1 are the primary key + // used to unique-ify the keys when a NULL is present. When a null + // is present in the index key, we include the primary key columns + // in lastRowPrefix. // - // Note that we do not need to do this for non-unique secondary indexes because - // the extra columns in the primary key will _always_ be there, so we can decode - // them when processing the index. The difference with unique secondary indexes - // is that the extra columns are not always there, and are used to unique-ify + // Note that we do not need to do this for non-unique secondary + // indexes because the extra columns in the primary key will + // _always_ be there, so we can decode them when processing the + // index. The difference with unique secondary indexes is that the + // extra columns are not always there, and are used to unique-ify // the index key, rather than provide the primary key column values. + // + // We also do not need to do this when a table has only one column + // family because it is guaranteed that there is only one KV per + // row. We entirely skip the check that determines if the row is + // unfinished. if foundNull && rf.table.isSecondaryIndex && rf.table.index.IsUnique() && rf.table.desc.NumFamilies() != 1 { // We get the remaining bytes after the computed prefix, and then // slice off the extra encoded columns from those bytes. We calculate @@ -985,11 +999,15 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) { if rf.table.rowLastModified.Less(rf.machine.nextKV.Value.Timestamp) { rf.table.rowLastModified = rf.machine.nextKV.Value.Timestamp } + // If the table has only one column family, then the next KV will + // always belong to a different row than the current KV. if rf.table.desc.NumFamilies() == 1 { rf.machine.state[0] = stateFinalizeRow rf.machine.state[1] = stateInitFetch continue } + // If the table has more than one column family, then the next KV + // may belong to the same row as the current KV. rf.machine.state[0] = stateFetchNextKVWithUnfinishedRow case stateSeekPrefix: // Note: seekPrefix is only used for interleaved tables. @@ -1259,6 +1277,7 @@ func (rf *cFetcher) processValue( rf.machine.colvecs, rf.machine.rowIdx, extraColOrds, + false, /* checkAllColsForNull */ table.extraTypes, nil, &rf.machine.remainingValueColsByIdx, diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize b/pkg/sql/logictest/testdata/logic_test/vectorize index 84fad2f5ada0..12304a66760f 100644 --- a/pkg/sql/logictest/testdata/logic_test/vectorize +++ b/pkg/sql/logictest/testdata/logic_test/vectorize @@ -1249,3 +1249,28 @@ SELECT _float4::FLOAT8, _float8::FLOAT4 FROM t67793 ---- 1 2 2 1 + +# Regression test for #66706. Ensure that cfetcher can correctly determine when +# consecutive KVs in a unique secondary index belong to the same row. +statement ok +SET vectorize=on + +statement ok +CREATE TABLE t66706 ( + a STRING, + b STRING, + UNIQUE INDEX u (b, a), + FAMILY (a), + FAMILY (b) +) + +statement ok +INSERT INTO t66706 VALUES + (NULL, 'bar'), + (NULL, 'bar') + +query T +SELECT b FROM t66706@u WHERE NOT (b = 'foo') +---- +bar +bar