Skip to content

Commit

Permalink
colfetcher: fix NULL checks during unique index decoding
Browse files Browse the repository at this point in the history
`colfetcher` must detect `NULL` values in unique secondary index keys on
tables with multiple column families in order to determine whether
consecutive KVs belongs to the same row or different rows. Previously,
only the columns in the key that were needed by the query were decoded
and checked for `NULL`. This caused incorrect query results when `NULL`
column values were not detected because those columns were not needed.

This commit fixes the issue by checking all columns for `NULL` when
decoding unique secondary index keys on tables with multiple column
families.

Fixes #66706

Release note (bug fix): A bug has been fix that caused incorrect query
results when querying tables with multiple column families and unique
secondary indexes. The bug only occurred if 1) vectorized execution was
enabled for the query, 2) the query scanned a unique secondary index
that contained columns from more than one column family, and 3) rows
fetched by the query contained NULL values for some of the indexed
columns. This bug was present since version 20.1.
  • Loading branch information
mgartner committed Jul 26, 2021
1 parent c7e35f8 commit 503191d
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 14 deletions.
25 changes: 21 additions & 4 deletions pkg/sql/colencoding/key_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,20 @@ import (
// the key is from a different table, and the returned remainingKey indicates a
// "seek prefix": the next key that might be part of the table being searched
// for. See the analog in sqlbase/index_encoding.go.
//
// Sometimes it is necessary to determine if the value of a column is NULL even
// though the value itself is not needed. If checkAllColsForNull is true, then
// foundNull=true will be returned if any columns in the key are NULL,
// regardless of whether or not indexColIdx indicates that the column should be
// decoded.
func DecodeIndexKeyToCols(
da *rowenc.DatumAlloc,
vecs []coldata.Vec,
idx int,
desc catalog.TableDescriptor,
index catalog.Index,
indexColIdx []int,
checkAllColsForNull bool,
types []*types.T,
colDirs []descpb.IndexDescriptor_Direction,
key roachpb.Key,
Expand Down Expand Up @@ -78,7 +85,7 @@ func DecodeIndexKeyToCols(
// it is a interleaving ancestor.
var isNull bool
key, isNull, err = DecodeKeyValsToCols(
da, vecs, idx, indexColIdx[:length], types[:length],
da, vecs, idx, indexColIdx[:length], checkAllColsForNull, types[:length],
colDirs[:length], nil /* unseen */, key, invertedColIdx,
)
if err != nil {
Expand Down Expand Up @@ -117,7 +124,7 @@ func DecodeIndexKeyToCols(

var isNull bool
key, isNull, err = DecodeKeyValsToCols(
da, vecs, idx, indexColIdx, types, colDirs, nil /* unseen */, key, invertedColIdx,
da, vecs, idx, indexColIdx, checkAllColsForNull, types, colDirs, nil /* unseen */, key, invertedColIdx,
)
if err != nil {
return nil, false, false, err
Expand Down Expand Up @@ -147,18 +154,24 @@ func DecodeIndexKeyToCols(
// have been observed during decoding.
// See the analog in sqlbase/index_encoding.go.
// DecodeKeyValsToCols additionally returns whether a NULL was encountered when decoding.
//
// Sometimes it is necessary to determine if the value of a column is NULL even
// though the value itself is not needed. If checkAllColsForNull is true, then
// foundNull=true will be returned if any columns in the key are NULL,
// regardless of whether or not indexColIdx indicates that the column should be
// decoded.
func DecodeKeyValsToCols(
da *rowenc.DatumAlloc,
vecs []coldata.Vec,
idx int,
indexColIdx []int,
checkAllColsForNull bool,
types []*types.T,
directions []descpb.IndexDescriptor_Direction,
unseen *util.FastIntSet,
key []byte,
invertedColIdx int,
) ([]byte, bool, error) {
foundNull := false
) (remainingKey []byte, foundNull bool, _ error) {
for j := range types {
enc := descpb.IndexDescriptor_ASC
if directions != nil {
Expand All @@ -167,6 +180,10 @@ func DecodeKeyValsToCols(
var err error
i := indexColIdx[j]
if i == -1 {
if checkAllColsForNull {
_, isNull := encoding.DecodeIfNull(key)
foundNull = foundNull || isNull
}
// Don't need the coldata - skip it.
key, err = rowenc.SkipTableKey(key)
} else {
Expand Down
39 changes: 29 additions & 10 deletions pkg/sql/colfetcher/cfetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,13 +893,19 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
if rf.traceKV {
indexOrds = rf.table.allIndexColOrdinals
}
// For unique secondary indexes on tables with multiple column
// families, we must check all columns for NULL values in order
// to determine whether a KV belongs to the same row as the
// previous KV or a different row.
checkAllColsForNull := rf.table.isSecondaryIndex && rf.table.index.IsUnique() && rf.table.desc.NumFamilies() != 1
key, matches, foundNull, err = colencoding.DecodeIndexKeyToCols(
&rf.table.da,
rf.machine.colvecs,
rf.machine.rowIdx,
rf.table.desc,
rf.table.index,
indexOrds,
checkAllColsForNull,
rf.table.keyValTypes,
rf.table.indexColumnDirs,
rf.machine.nextKV.Key[rf.table.knownPrefixLength:],
Expand Down Expand Up @@ -932,23 +938,31 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
rf.machine.lastRowPrefix = rf.machine.nextKV.Key[:prefixLen]
}

// For unique secondary indexes, the index-key does not distinguish one row
// from the next if both rows contain identical values along with a NULL.
// For unique secondary indexes on tables with multiple column
// families, the index-key does not distinguish one row from the
// next if both rows contain identical values along with a NULL.
// Consider the keys:
//
// /test/unique_idx/NULL/0
// /test/unique_idx/NULL/1
//
// The index-key extracted from the above keys is /test/unique_idx/NULL. The
// trailing /0 and /1 are the primary key used to unique-ify the keys when a
// NULL is present. When a null is present in the index key, we cut off more
// of the index key so that the prefix includes the primary key columns.
// The index-key extracted from the above keys is
// /test/unique_idx/NULL. The trailing /0 and /1 are the primary key
// used to unique-ify the keys when a NULL is present. When a null
// is present in the index key, we include the primary key columns
// in lastRowPrefix.
//
// Note that we do not need to do this for non-unique secondary indexes because
// the extra columns in the primary key will _always_ be there, so we can decode
// them when processing the index. The difference with unique secondary indexes
// is that the extra columns are not always there, and are used to unique-ify
// Note that we do not need to do this for non-unique secondary
// indexes because the extra columns in the primary key will
// _always_ be there, so we can decode them when processing the
// index. The difference with unique secondary indexes is that the
// extra columns are not always there, and are used to unique-ify
// the index key, rather than provide the primary key column values.
//
// We also do not need to do this when a table has only one column
// family because it is guaranteed that there is only one KV per
// row. We entirely skip the check that determines if the row is
// unfinished.
if foundNull && rf.table.isSecondaryIndex && rf.table.index.IsUnique() && rf.table.desc.NumFamilies() != 1 {
// We get the remaining bytes after the computed prefix, and then
// slice off the extra encoded columns from those bytes. We calculate
Expand Down Expand Up @@ -985,11 +999,15 @@ func (rf *cFetcher) nextBatch(ctx context.Context) (coldata.Batch, error) {
if rf.table.rowLastModified.Less(rf.machine.nextKV.Value.Timestamp) {
rf.table.rowLastModified = rf.machine.nextKV.Value.Timestamp
}
// If the table has only one column family, then the next KV will
// always belong to a different row than the current KV.
if rf.table.desc.NumFamilies() == 1 {
rf.machine.state[0] = stateFinalizeRow
rf.machine.state[1] = stateInitFetch
continue
}
// If the table has more than one column family, then the next KV
// may belong to the same row as the current KV.
rf.machine.state[0] = stateFetchNextKVWithUnfinishedRow
case stateSeekPrefix:
// Note: seekPrefix is only used for interleaved tables.
Expand Down Expand Up @@ -1259,6 +1277,7 @@ func (rf *cFetcher) processValue(
rf.machine.colvecs,
rf.machine.rowIdx,
extraColOrds,
false, /* checkAllColsForNull */
table.extraTypes,
nil,
&rf.machine.remainingValueColsByIdx,
Expand Down
25 changes: 25 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/vectorize
Original file line number Diff line number Diff line change
Expand Up @@ -1249,3 +1249,28 @@ SELECT _float4::FLOAT8, _float8::FLOAT4 FROM t67793
----
1 2
2 1

# Regression test for #66706. Ensure that cfetcher can correctly determine when
# consecutive KVs in a unique secondary index belong to the same row.
statement ok
SET vectorize=on

statement ok
CREATE TABLE t66706 (
a STRING,
b STRING,
UNIQUE INDEX u (b, a),
FAMILY (a),
FAMILY (b)
)

statement ok
INSERT INTO t66706 VALUES
(NULL, 'bar'),
(NULL, 'bar')

query T
SELECT b FROM t66706@u WHERE NOT (b = 'foo')
----
bar
bar

0 comments on commit 503191d

Please sign in to comment.