cockroachdb · craig · Jul 27, 2021 · Jul 26, 2021
@@ -34,13 +34,20 @@ import (
 // the key is from a different table, and the returned remainingKey indicates a
 // "seek prefix": the next key that might be part of the table being searched
 // for. See the analog in sqlbase/index_encoding.go.
+//
+// Sometimes it is necessary to determine if the value of a column is NULL even
+// though the value itself is not needed. If checkAllColsForNull is true, then
+// foundNull=true will be returned if any columns in the key are NULL,
+// regardless of whether or not indexColIdx indicates that the column should be
+// decoded.
 func DecodeIndexKeyToCols(
 	da *rowenc.DatumAlloc,
 	vecs []coldata.Vec,
 	idx int,
 	desc catalog.TableDescriptor,
 	index catalog.Index,
 	indexColIdx []int,
+	checkAllColsForNull bool,
 	types []*types.T,
 	colDirs []descpb.IndexDescriptor_Direction,
 	key roachpb.Key,
@@ -78,7 +85,7 @@ func DecodeIndexKeyToCols(
 			// it is a interleaving ancestor.
 			var isNull bool
 			key, isNull, err = DecodeKeyValsToCols(
-				da, vecs, idx, indexColIdx[:length], types[:length],
+				da, vecs, idx, indexColIdx[:length], checkAllColsForNull, types[:length],
 				colDirs[:length], nil /* unseen */, key, invertedColIdx,
 			)
 			if err != nil {
@@ -117,7 +124,7 @@ func DecodeIndexKeyToCols(
 
 	var isNull bool
 	key, isNull, err = DecodeKeyValsToCols(
-		da, vecs, idx, indexColIdx, types, colDirs, nil /* unseen */, key, invertedColIdx,
+		da, vecs, idx, indexColIdx, checkAllColsForNull, types, colDirs, nil /* unseen */, key, invertedColIdx,
 	)
 	if err != nil {
 		return nil, false, false, err
@@ -147,18 +154,24 @@ func DecodeIndexKeyToCols(
 // have been observed during decoding.
 // See the analog in sqlbase/index_encoding.go.
 // DecodeKeyValsToCols additionally returns whether a NULL was encountered when decoding.
+//
+// Sometimes it is necessary to determine if the value of a column is NULL even
+// though the value itself is not needed. If checkAllColsForNull is true, then
+// foundNull=true will be returned if any columns in the key are NULL,
+// regardless of whether or not indexColIdx indicates that the column should be
+// decoded.
 func DecodeKeyValsToCols(
 	da *rowenc.DatumAlloc,
 	vecs []coldata.Vec,
 	idx int,
 	indexColIdx []int,
+	checkAllColsForNull bool,
 	types []*types.T,
 	directions []descpb.IndexDescriptor_Direction,
 	unseen *util.FastIntSet,
 	key []byte,
 	invertedColIdx int,
-) ([]byte, bool, error) {
-	foundNull := false
+) (remainingKey []byte, foundNull bool, _ error) {
 	for j := range types {
 		enc := descpb.IndexDescriptor_ASC
 		if directions != nil {
@@ -167,6 +180,10 @@ func DecodeKeyValsToCols(
 		var err error
 		i := indexColIdx[j]
 		if i == -1 {
+			if checkAllColsForNull {
+				isNull := encoding.PeekType(key) == encoding.Null
+				foundNull = foundNull || isNull
+			}
 			// Don't need the coldata - skip it.
 			key, err = rowenc.SkipTableKey(key)
 		} else {

@@ -893,13 +893,19 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
 				if rf.traceKV {
 					indexOrds = rf.table.allIndexColOrdinals
 				}
+				// For unique secondary indexes on tables with multiple column
+				// families, we must check all columns for NULL values in order
+				// to determine whether a KV belongs to the same row as the
+				// previous KV or a different row.
+				checkAllColsForNull := rf.table.isSecondaryIndex && rf.table.index.IsUnique() && rf.table.desc.NumFamilies() != 1
 				key, matches, foundNull, err = colencoding.DecodeIndexKeyToCols(
 					&rf.table.da,
 					rf.machine.colvecs,
 					rf.machine.rowIdx,
 					rf.table.desc,
 					rf.table.index,
 					indexOrds,
+					checkAllColsForNull,
 					rf.table.keyValTypes,
 					rf.table.indexColumnDirs,
 					rf.machine.nextKV.Key[rf.table.knownPrefixLength:],
@@ -932,23 +938,31 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
 				rf.machine.lastRowPrefix = rf.machine.nextKV.Key[:prefixLen]
 			}
 
-			// For unique secondary indexes, the index-key does not distinguish one row
-			// from the next if both rows contain identical values along with a NULL.
+			// For unique secondary indexes on tables with multiple column
+			// families, the index-key does not distinguish one row from the
+			// next if both rows contain identical values along with a NULL.
 			// Consider the keys:
 			//
 			//   /test/unique_idx/NULL/0
 			//   /test/unique_idx/NULL/1
 			//
-			// The index-key extracted from the above keys is /test/unique_idx/NULL. The
-			// trailing /0 and /1 are the primary key used to unique-ify the keys when a
-			// NULL is present. When a null is present in the index key, we cut off more
-			// of the index key so that the prefix includes the primary key columns.
+			// The index-key extracted from the above keys is
+			// /test/unique_idx/NULL. The trailing /0 and /1 are the primary key
+			// used to unique-ify the keys when a NULL is present. When a null
+			// is present in the index key, we include the primary key columns
+			// in lastRowPrefix.
 			//
-			// Note that we do not need to do this for non-unique secondary indexes because
-			// the extra columns in the primary key will _always_ be there, so we can decode
-			// them when processing the index. The difference with unique secondary indexes
-			// is that the extra columns are not always there, and are used to unique-ify
+			// Note that we do not need to do this for non-unique secondary
+			// indexes because the extra columns in the primary key will
+			// _always_ be there, so we can decode them when processing the
+			// index. The difference with unique secondary indexes is that the
+			// extra columns are not always there, and are used to unique-ify
 			// the index key, rather than provide the primary key column values.
+			//
+			// We also do not need to do this when a table has only one column
+			// family because it is guaranteed that there is only one KV per
+			// row. We entirely skip the check that determines if the row is
+			// unfinished.
 			if foundNull && rf.table.isSecondaryIndex && rf.table.index.IsUnique() && rf.table.desc.NumFamilies() != 1 {
 				// We get the remaining bytes after the computed prefix, and then
 				// slice off the extra encoded columns from those bytes. We calculate
@@ -985,11 +999,15 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
 			if rf.table.rowLastModified.Less(rf.machine.nextKV.Value.Timestamp) {
 				rf.table.rowLastModified = rf.machine.nextKV.Value.Timestamp
 			}
+			// If the table has only one column family, then the next KV will
+			// always belong to a different row than the current KV.
 			if rf.table.desc.NumFamilies() == 1 {
 				rf.machine.state[0] = stateFinalizeRow
 				rf.machine.state[1] = stateInitFetch
 				continue
 			}
+			// If the table has more than one column family, then the next KV
+			// may belong to the same row as the current KV.
 			rf.machine.state[0] = stateFetchNextKVWithUnfinishedRow
 		case stateSeekPrefix:
 			// Note: seekPrefix is only used for interleaved tables.
@@ -1259,6 +1277,7 @@ func (rf *cFetcher) processValue(
 					rf.machine.colvecs,
 					rf.machine.rowIdx,
 					extraColOrds,
+					false, /* checkAllColsForNull */
 					table.extraTypes,
 					nil,
 					&rf.machine.remainingValueColsByIdx,

diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize b/pkg/sql/logictest/testdata/logic_test/vectorize
@@ -1249,3 +1249,25 @@ SELECT _float4::FLOAT8, _float8::FLOAT4 FROM t67793
 ----
 1  2
 2  1
+
+# Regression test for #66706. Ensure that cfetcher can correctly determine when
+# consecutive KVs in a unique secondary index belong to the same row.
+statement ok
+CREATE TABLE t66706 (
+  a STRING,
+  b STRING,
+  UNIQUE INDEX u (b, a),
+  FAMILY (a),
+  FAMILY (b)
+)
+
+statement ok
+INSERT INTO t66706 VALUES
+  (NULL, 'bar'),
+  (NULL, 'bar')
+
+query T
+SELECT b FROM t66706@u WHERE NOT (b = 'foo')
+----
+bar
+bar