Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
59028: coldata: fix Bytes invariant in some cases r=yuzefovich a=yuzefovich

**execgen: remove SLICE method**

`execgen.SLICE` directive is used only in one place, and we can use
`execgen.WINDOW` there instead (which will have the same effect).

Release note: None

**coldata: fix updating offsets of bytes in Batch.SetLength**

In `SetLength` method we are maintaining the invariant of `Bytes`
vectors that the offsets are non-decreasing sequences. Previously, this
was done incorrectly when a selection vector is present on the batch
which could lead to out of bounds errors (caught by our panic-catcher)
some time later. This is now fixed by correctly paying attention to the
selection vector.

I neither can easily come up with an example query that would trigger
this condition nor can I prove that it won't occur, but I think we have
seen a single sentry report that could be explained by this bug, so I
think it's worth backporting.

Additionally, this commit uses the assumption that the selection vectors
are increasing sequences in order to calculate the largest index
accessed by the batch.

Fixes: cockroachdb#57297.

Release note (bug fix): Previously, CockroachDB could encounter an
internal error when executing queries with BYTES or STRING types via the
vectorized engine in rare circumstances, and now this is fixed.

59127: importccl: skip BenchmarkUserFileImport r=RaduBerinde a=RaduBerinde

This benchmark causes repeated CI failures.

Informs cockroachdb#59126.

Release note: None

Co-authored-by: Yahor Yuzefovich <[email protected]>
Co-authored-by: Radu Berinde <[email protected]>
  • Loading branch information
3 people committed Jan 19, 2021
3 parents 6a51818 + 3589b52 + 1ecbc2f commit 731d083
Show file tree
Hide file tree
Showing 11 changed files with 64 additions and 33 deletions.
1 change: 1 addition & 0 deletions pkg/ccl/importccl/import_stmt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3511,6 +3511,7 @@ func BenchmarkNodelocalImport(b *testing.B) {
// BenchmarkUserfileImport-16 1 4060204527 ns/op 6.68 MB/s
// BenchmarkUserfileImport-16 1 4627419761 ns/op 5.86 MB/s
func BenchmarkUserfileImport(b *testing.B) {
skip.WithIssue(b, 59126)
benchUserUpload(b, "userfile://defaultdb.public.root")
}

Expand Down
11 changes: 10 additions & 1 deletion pkg/col/coldata/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,17 @@ func (m *MemBatch) SetSelection(b bool) {
func (m *MemBatch) SetLength(length int) {
m.length = length
if length > 0 {
// In order to maintain the invariant of Bytes vectors we need to update
// offsets up to the element with the largest index that can be accessed
// by the batch.
maxIdx := length - 1
if m.useSel {
// Note that here we rely on the fact that selection vectors are
// increasing sequences.
maxIdx = m.sel[length-1]
}
for i, ok := m.bytesVecIdxs.Next(0); ok; i, ok = m.bytesVecIdxs.Next(i + 1) {
m.b[i].Bytes().UpdateOffsetsToBeNonDecreasing(length)
m.b[i].Bytes().UpdateOffsetsToBeNonDecreasing(maxIdx + 1)
}
}
}
Expand Down
34 changes: 34 additions & 0 deletions pkg/col/coldata/batch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,37 @@ func TestBatchReset(t *testing.T) {
b = coldata.NewMemBatch(typsBytes, coldata.StandardColumnFactory)
resetAndCheck(b, typsBytes, 1, true)
}

// TestBatchWithBytesAndNulls verifies that the invariant of Bytes vectors is
// maintained when NULL values are present in the vector and there is a
// selection vector on the batch.
func TestBatchWithBytesAndNulls(t *testing.T) {
defer leaktest.AfterTest(t)()

b := coldata.NewMemBatch([]*types.T{types.Bytes}, coldata.StandardColumnFactory)
// We will insert some garbage data into the Bytes vector in positions that
// are not mentioned in the selection vector. All the values that are
// selected are actually NULL values, so we don't set anything on the Bytes
// vector there.
if coldata.BatchSize() < 6 {
return
}
sel := []int{1, 3, 5}
vec := b.ColVec(0).Bytes()
vec.Set(0, []byte("zero"))
vec.Set(2, []byte("two"))
b.SetSelection(true)
copy(b.Selection(), sel)

// This is where the invariant of non-decreasing offsets in the Bytes vector
// should be updated.
b.SetLength(len(sel))

// In many cases in the vectorized execution engine, for performance
// reasons, we will attempt to get something from the Bytes vector at
// positions on which we have NULLs, so all of the Gets below should be
// safe.
for _, idx := range sel {
assert.True(t, len(vec.Get(idx)) == 0)
}
}
10 changes: 3 additions & 7 deletions pkg/col/coldata/vec.eg.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/col/coldata/vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ type Vec interface {
// An optional Sel slice can also be provided to apply a filter on the source
// Vec.
// Refer to the SliceArgs comment for specifics and TestAppend for examples.
//
// NOTE: Append does *not* support the case of appending 0 values (i.e.
// the behavior of Append when args.SrcStartIdx == args.SrcEndIdx is
// undefined).
Append(SliceArgs)

// Copy uses CopySliceArgs to copy elements of a source Vec into this Vec. It is
Expand Down
14 changes: 6 additions & 8 deletions pkg/col/coldata/vec_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,13 @@ func (m *memColumn) Append(args SliceArgs) {
// whether the value is NULL. It is possible that Bytes' invariant of
// non-decreasing offsets on the source is currently not maintained, so
// we explicitly enforce it.
maxIdx := 0
for _, selIdx := range sel {
if selIdx > maxIdx {
maxIdx = selIdx
}
}
fromCol.UpdateOffsetsToBeNonDecreasing(maxIdx + 1)
//
// Note that here we rely on the fact that selection vectors are
// increasing sequences.
fromCol.UpdateOffsetsToBeNonDecreasing(sel[len(sel)-1] + 1)
// {{else}}
toCol = execgen.SLICE(toCol, 0, args.DestIdx)
// {{/* Here WINDOW means slicing which allows us to use APPENDVAL below. */}}
toCol = execgen.WINDOW(toCol, 0, args.DestIdx)
// {{end}}
for _, selIdx := range sel {
val := fromCol.Get(selIdx)
Expand Down
5 changes: 0 additions & 5 deletions pkg/sql/colexec/execgen/cmd/execgen/data_manipulation_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ var dataManipulationReplacementInfos = []dataManipulationReplacementInfo{
numArgs: 3,
replaceWith: "Set",
},
{
templatePlaceholder: "execgen.SLICE",
numArgs: 3,
replaceWith: "Slice",
},
{
templatePlaceholder: "execgen.COPYSLICE",
numArgs: 5,
Expand Down
9 changes: 4 additions & 5 deletions pkg/sql/colexec/execgen/cmd/execgen/overloads_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,12 +481,12 @@ func (b *argWidthOverloadBase) Set(target, i, new string) string {
return set(b.CanonicalTypeFamily, target, i, new)
}

// Slice is a function that should only be used in templates.
func (b *argWidthOverloadBase) Slice(target, start, end string) string {
// slice is a function that should only be used in templates.
func (b *argWidthOverloadBase) slice(target, start, end string) string {
switch b.CanonicalTypeFamily {
case types.BytesFamily:
// Bytes vector doesn't support slicing.
colexecerror.InternalError(errors.AssertionFailedf("Slice method is attempted to be generated on Bytes vector"))
colexecerror.InternalError(errors.AssertionFailedf("slice method is attempted to be generated on Bytes vector"))
case typeconv.DatumVecCanonicalTypeFamily:
return fmt.Sprintf(`%s.Slice(%s, %s)`, target, start, end)
}
Expand Down Expand Up @@ -608,7 +608,7 @@ func (b *argWidthOverloadBase) Window(target, start, end string) string {
case types.BytesFamily:
return fmt.Sprintf(`%s.Window(%s, %s)`, target, start, end)
}
return b.Slice(target, start, end)
return b.slice(target, start, end)
}

// setVariableSize is a function that should only be used in templates. It
Expand Down Expand Up @@ -651,7 +651,6 @@ var (
_ = awob.GoTypeSliceName
_ = awob.CopyVal
_ = awob.Set
_ = awob.Slice
_ = awob.Sliceable
_ = awob.CopySlice
_ = awob.AppendSlice
Expand Down
7 changes: 0 additions & 7 deletions pkg/sql/colexec/execgen/placeholders.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ const nonTemplatePanic = "do not call from non-template code"
var (
_ = COPYVAL
_ = SET
_ = SLICE
_ = COPYSLICE
_ = APPENDSLICE
_ = APPENDVAL
Expand All @@ -44,12 +43,6 @@ func SET(target, i, new interface{}) {
colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic))
}

// SLICE is a template function.
func SLICE(target, start, end interface{}) interface{} {
colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic))
return nil
}

// COPYSLICE is a template function.
func COPYSLICE(target, src, destIdx, srcStartIdx, srcEndIdx interface{}) {
colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic))
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/colexec/hashtable.go
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ func (ht *hashTable) removeDuplicates(
// appendAllDistinct appends all tuples from batch to the hash table. It
// assumes that all tuples are distinct and that ht.probeScratch.hashBuffer
// contains the hash codes for all of them.
// NOTE: batch must be of non-zero length.
func (ht *hashTable) appendAllDistinct(ctx context.Context, batch coldata.Batch) {
numBuffered := uint64(ht.vals.Length())
ht.allocator.PerformOperation(ht.vals.ColVecs(), func() {
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/colexec/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ func (b *appendOnlyBufferedBatch) ReplaceCol(coldata.Vec, int) {
// [startIdx, endIdx) from batch (paying attention to the selection vector)
// into b.
// NOTE: this does *not* perform memory accounting.
// NOTE: batch must be of non-zero length.
func (b *appendOnlyBufferedBatch) append(batch coldata.Batch, startIdx, endIdx int) {
for _, colIdx := range b.colsToStore {
b.colVecs[colIdx].Append(
Expand Down

0 comments on commit 731d083

Please sign in to comment.