From 699284b4760e94c26a34f88c6f65852cbcf84b76 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 28 Mar 2022 20:40:01 -0700 Subject: [PATCH] colexecop: reset the internal batch in a test operator This commit makes a test operator that "owns" a batch to properly reset the batch on each `Next` call. This is the expectation that we forgot to follow. This was discovered when running `BenchmarkLikeOps` which uses a Bytes vector. Due to the way `Bytes.Copy` is implemented, the Bytes vector's buffer could grow arbitrarily large because the test operator never reset the vector. Release note: None --- pkg/col/coldata/bytes.go | 8 ++++++++ pkg/sql/colexecop/testutils.go | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pkg/col/coldata/bytes.go b/pkg/col/coldata/bytes.go index 2616ad60dbd4..da4488a72d5a 100644 --- a/pkg/col/coldata/bytes.go +++ b/pkg/col/coldata/bytes.go @@ -244,6 +244,14 @@ func (b *Bytes) copyElements(srcElementsToCopy []element, src *Bytes, destIdx in // Optimize copying of the elements by copying all of them directly into the // destination. This way all inlined values become correctly set, and we // only need to set the non-inlined values separately. + // + // Note that this behavior results in losing the references to the old + // non-inlined values, even if they could be reused. If Bytes is not Reset, + // then that unused space in Bytes.buffer can accumulate. However, checking + // whether there are old non-inlined values with non-zero capacity leads to + // performance regressions, and in the production code we do reset the Bytes + // in all cases, so we accept this poor behavior in such a hypothetical / + // test-only scenario. See #78703 for more details. copy(destElements, srcElementsToCopy) // Early bounds checks. _ = destElements[len(srcElementsToCopy)-1] diff --git a/pkg/sql/colexecop/testutils.go b/pkg/sql/colexecop/testutils.go index 886f3b8df646..e050c2611715 100644 --- a/pkg/sql/colexecop/testutils.go +++ b/pkg/sql/colexecop/testutils.go @@ -108,8 +108,9 @@ func (s *RepeatableBatchSource) Next() coldata.Batch { if s.batchesToReturn != 0 && s.batchesReturned > s.batchesToReturn { return coldata.ZeroBatch } - s.output.SetSelection(s.sel != nil) + s.output.ResetInternalBatch() if s.sel != nil { + s.output.SetSelection(true) copy(s.output.Selection()[:s.batchLen], s.sel[:s.batchLen]) } for i, colVec := range s.colVecs {