Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

colfetcher: fix NULL checks during unique index decoding #68071

Merged
merged 1 commit into from
Jul 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions pkg/sql/colencoding/key_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,20 @@ import (
// the key is from a different table, and the returned remainingKey indicates a
// "seek prefix": the next key that might be part of the table being searched
// for. See the analog in sqlbase/index_encoding.go.
//
// Sometimes it is necessary to determine if the value of a column is NULL even
// though the value itself is not needed. If checkAllColsForNull is true, then
// foundNull=true will be returned if any columns in the key are NULL,
// regardless of whether or not indexColIdx indicates that the column should be
// decoded.
func DecodeIndexKeyToCols(
da *rowenc.DatumAlloc,
vecs []coldata.Vec,
idx int,
desc catalog.TableDescriptor,
index catalog.Index,
indexColIdx []int,
checkAllColsForNull bool,
types []*types.T,
colDirs []descpb.IndexDescriptor_Direction,
key roachpb.Key,
Expand Down Expand Up @@ -78,7 +85,7 @@ func DecodeIndexKeyToCols(
// it is a interleaving ancestor.
var isNull bool
key, isNull, err = DecodeKeyValsToCols(
da, vecs, idx, indexColIdx[:length], types[:length],
da, vecs, idx, indexColIdx[:length], checkAllColsForNull, types[:length],
colDirs[:length], nil /* unseen */, key, invertedColIdx,
)
if err != nil {
Expand Down Expand Up @@ -117,7 +124,7 @@ func DecodeIndexKeyToCols(

var isNull bool
key, isNull, err = DecodeKeyValsToCols(
da, vecs, idx, indexColIdx, types, colDirs, nil /* unseen */, key, invertedColIdx,
da, vecs, idx, indexColIdx, checkAllColsForNull, types, colDirs, nil /* unseen */, key, invertedColIdx,
)
if err != nil {
return nil, false, false, err
Expand Down Expand Up @@ -147,18 +154,24 @@ func DecodeIndexKeyToCols(
// have been observed during decoding.
// See the analog in sqlbase/index_encoding.go.
// DecodeKeyValsToCols additionally returns whether a NULL was encountered when decoding.
//
// Sometimes it is necessary to determine if the value of a column is NULL even
// though the value itself is not needed. If checkAllColsForNull is true, then
// foundNull=true will be returned if any columns in the key are NULL,
// regardless of whether or not indexColIdx indicates that the column should be
// decoded.
func DecodeKeyValsToCols(
da *rowenc.DatumAlloc,
vecs []coldata.Vec,
idx int,
indexColIdx []int,
checkAllColsForNull bool,
types []*types.T,
directions []descpb.IndexDescriptor_Direction,
unseen *util.FastIntSet,
key []byte,
invertedColIdx int,
) ([]byte, bool, error) {
foundNull := false
) (remainingKey []byte, foundNull bool, _ error) {
for j := range types {
enc := descpb.IndexDescriptor_ASC
if directions != nil {
Expand All @@ -167,6 +180,10 @@ func DecodeKeyValsToCols(
var err error
i := indexColIdx[j]
if i == -1 {
if checkAllColsForNull {
isNull := encoding.PeekType(key) == encoding.Null
foundNull = foundNull || isNull
}
// Don't need the coldata - skip it.
key, err = rowenc.SkipTableKey(key)
} else {
Expand Down
39 changes: 29 additions & 10 deletions pkg/sql/colfetcher/cfetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,13 +893,19 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
if rf.traceKV {
indexOrds = rf.table.allIndexColOrdinals
}
// For unique secondary indexes on tables with multiple column
// families, we must check all columns for NULL values in order
// to determine whether a KV belongs to the same row as the
// previous KV or a different row.
checkAllColsForNull := rf.table.isSecondaryIndex && rf.table.index.IsUnique() && rf.table.desc.NumFamilies() != 1
key, matches, foundNull, err = colencoding.DecodeIndexKeyToCols(
&rf.table.da,
rf.machine.colvecs,
rf.machine.rowIdx,
rf.table.desc,
rf.table.index,
indexOrds,
checkAllColsForNull,
rf.table.keyValTypes,
rf.table.indexColumnDirs,
rf.machine.nextKV.Key[rf.table.knownPrefixLength:],
Expand Down Expand Up @@ -932,23 +938,31 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
rf.machine.lastRowPrefix = rf.machine.nextKV.Key[:prefixLen]
}

// For unique secondary indexes, the index-key does not distinguish one row
// from the next if both rows contain identical values along with a NULL.
// For unique secondary indexes on tables with multiple column
// families, the index-key does not distinguish one row from the
// next if both rows contain identical values along with a NULL.
// Consider the keys:
//
// /test/unique_idx/NULL/0
// /test/unique_idx/NULL/1
//
// The index-key extracted from the above keys is /test/unique_idx/NULL. The
// trailing /0 and /1 are the primary key used to unique-ify the keys when a
// NULL is present. When a null is present in the index key, we cut off more
// of the index key so that the prefix includes the primary key columns.
// The index-key extracted from the above keys is
// /test/unique_idx/NULL. The trailing /0 and /1 are the primary key
// used to unique-ify the keys when a NULL is present. When a null
// is present in the index key, we include the primary key columns
// in lastRowPrefix.
//
// Note that we do not need to do this for non-unique secondary indexes because
// the extra columns in the primary key will _always_ be there, so we can decode
// them when processing the index. The difference with unique secondary indexes
// is that the extra columns are not always there, and are used to unique-ify
// Note that we do not need to do this for non-unique secondary
// indexes because the extra columns in the primary key will
// _always_ be there, so we can decode them when processing the
// index. The difference with unique secondary indexes is that the
// extra columns are not always there, and are used to unique-ify
// the index key, rather than provide the primary key column values.
//
// We also do not need to do this when a table has only one column
// family because it is guaranteed that there is only one KV per
// row. We entirely skip the check that determines if the row is
// unfinished.
if foundNull && rf.table.isSecondaryIndex && rf.table.index.IsUnique() && rf.table.desc.NumFamilies() != 1 {
// We get the remaining bytes after the computed prefix, and then
// slice off the extra encoded columns from those bytes. We calculate
Expand Down Expand Up @@ -985,11 +999,15 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
if rf.table.rowLastModified.Less(rf.machine.nextKV.Value.Timestamp) {
rf.table.rowLastModified = rf.machine.nextKV.Value.Timestamp
}
// If the table has only one column family, then the next KV will
// always belong to a different row than the current KV.
if rf.table.desc.NumFamilies() == 1 {
rf.machine.state[0] = stateFinalizeRow
rf.machine.state[1] = stateInitFetch
continue
}
// If the table has more than one column family, then the next KV
// may belong to the same row as the current KV.
rf.machine.state[0] = stateFetchNextKVWithUnfinishedRow
case stateSeekPrefix:
// Note: seekPrefix is only used for interleaved tables.
Expand Down Expand Up @@ -1259,6 +1277,7 @@ func (rf *cFetcher) processValue(
rf.machine.colvecs,
rf.machine.rowIdx,
extraColOrds,
false, /* checkAllColsForNull */
table.extraTypes,
nil,
&rf.machine.remainingValueColsByIdx,
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/vectorize
Original file line number Diff line number Diff line change
Expand Up @@ -1249,3 +1249,25 @@ SELECT _float4::FLOAT8, _float8::FLOAT4 FROM t67793
----
1 2
2 1

# Regression test for #66706. Ensure that cfetcher can correctly determine when
# consecutive KVs in a unique secondary index belong to the same row.
statement ok
CREATE TABLE t66706 (
a STRING,
b STRING,
UNIQUE INDEX u (b, a),
FAMILY (a),
FAMILY (b)
)

statement ok
INSERT INTO t66706 VALUES
(NULL, 'bar'),
(NULL, 'bar')

query T
SELECT b FROM t66706@u WHERE NOT (b = 'foo')
----
bar
bar