Merge #56224

56224: colexec: optimize the read path a bit and cleanup internal memory handling r=yuzefovich a=yuzefovich **colexec: reuse the same eval context for objects of the same proc spec** In the row-execution processors we create a copy of the eval context for each processor core because the processor might modify it. In the vectorized engine we took it to the extreme - we're currently creating a copy for every usage of the eval context. That is unnecessary, and this commit makes the vectorized operator creator to be more like the row engine - by reusing the same eval context for all operators that are created for a single processor spec. This commit also switches to using the "original" eval context that is part of the flow context when instantiating a ColumnFactory and when initializing materializers because those two components don't modify the eval context. Release note: None **colconv,colfetcher: pool allocations of converters and cTableInfos** This commit pools the allocations of the converters when they are used in the materializers as well as `cTableInfo` structs used by the ColBatchScan. Additionally, this commit reuses the same ColumnFactory when setting up the whole flow and removes a copy of a processor spec when checking whether it is supported. Another notable change is reusing the same global empty post-processing spec when creating the materializers. It is thread-safe because the object is never modified. Release note: None **colbuilder: remove redundant expr deserialization after scan** Whenever we're creating a `ColBatchScan`, we're initializing a `ProcOutputHelper` in order to obtain the set of needed columns (based on the schema of the table and the post-processing spec). As part of that initialization we're obtaining well-typed expression for everything in the post-processing spec. However, later when actually planning the post-processing in the vectorized engine we deserialize the expressions again. This is redundant, and the code has been adjusted to reuse the same helper for both scan operator creation and post-processing planning. Additionally, those `ProcOutputHelper` objects are now pooled. Release note: None **colexec: update the allocator with the internal memory usage** This commit removes `InternalMemoryOperator` interface that was put in-place for the operators to register their static memory usage. This was introduced before we had the allocator object (I think), and now we can remove that interface and simply update the allocators with the corresponding internal memory usage. There were only two operators that actually implemented the interface, and both of them were converted to the new pattern. This commit also removes the separation of streaming memory accounts in the vectorized flow setup and merges them with all other accounts. Release note: None Co-authored-by: Yahor Yuzefovich <[email protected]>
cockroachdb · Nov 9, 2020 · a5086fc · a5086fc
2 parents 0446d7c + 0220f65
commit a5086fc
Show file tree

Hide file tree

Showing 26 changed files with 386 additions and 315 deletions.
diff --git a/pkg/sql/colconv/vec_to_datum.eg.go b/pkg/sql/colconv/vec_to_datum.eg.go
diff --git a/pkg/sql/colconv/vec_to_datum_tmpl.go b/pkg/sql/colconv/vec_to_datum_tmpl.go
@@ -20,7 +20,10 @@
 package colconv
 
 import (
+	"sync"
+
 	"github.com/cockroachdb/cockroach/pkg/col/coldata"
+	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
 	"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
 	"github.com/cockroachdb/cockroach/pkg/sql/types"
@@ -49,14 +52,33 @@ type VecToDatumConverter struct {
 	da               rowenc.DatumAlloc
 }
 
+var _ execinfra.Releasable = &VecToDatumConverter{}
+
+var vecToDatumConverterPool = sync.Pool{
+	New: func() interface{} {
+		return &VecToDatumConverter{}
+	},
+}
+
 // NewVecToDatumConverter creates a new VecToDatumConverter.
 // - batchWidth determines the width of the batches that it will be converting.
 // - vecIdxsToConvert determines which vectors need to be converted.
 func NewVecToDatumConverter(batchWidth int, vecIdxsToConvert []int) *VecToDatumConverter {
-	return &VecToDatumConverter{
-		convertedVecs:    make([]tree.Datums, batchWidth),
-		vecIdxsToConvert: vecIdxsToConvert,
+	c := vecToDatumConverterPool.Get().(*VecToDatumConverter)
+	if cap(c.convertedVecs) < batchWidth {
+		c.convertedVecs = make([]tree.Datums, batchWidth)
+	} else {
+		c.convertedVecs = c.convertedVecs[:batchWidth]
 	}
+	c.vecIdxsToConvert = vecIdxsToConvert
+	return c
+}
+
+// Release is part of the execinfra.Releasable interface.
+func (c *VecToDatumConverter) Release() {
+	c.convertedVecs = c.convertedVecs[:0]
+	c.vecIdxsToConvert = nil
+	vecToDatumConverterPool.Put(c)
 }
 
 // ConvertBatchAndDeselect converts the selected vectors from the batch while
@@ -78,17 +100,17 @@ func (c *VecToDatumConverter) ConvertBatchAndDeselect(batch coldata.Batch) {
 		return
 	}
 	// Ensure that convertedVecs are of sufficient length.
-	if cap(c.convertedVecs[c.vecIdxsToConvert[0]]) < batchLength {
-		for _, vecIdx := range c.vecIdxsToConvert {
+	for _, vecIdx := range c.vecIdxsToConvert {
+		if cap(c.convertedVecs[vecIdx]) < batchLength {
 			c.convertedVecs[vecIdx] = make([]tree.Datum, batchLength)
+		} else {
+			c.convertedVecs[vecIdx] = c.convertedVecs[vecIdx][:batchLength]
 		}
+	}
+	if c.da.AllocSize < batchLength {
 		// Adjust the datum alloc according to the length of the batch since
 		// this batch is the longest we've seen so far.
 		c.da.AllocSize = batchLength
-	} else {
-		for _, vecIdx := range c.vecIdxsToConvert {
-			c.convertedVecs[vecIdx] = c.convertedVecs[vecIdx][:batchLength]
-		}
 	}
 	sel := batch.Selection()
 	vecs := batch.ColVecs()
@@ -132,17 +154,17 @@ func (c *VecToDatumConverter) ConvertVecs(vecs []coldata.Vec, inputLen int, sel
 		// rely on the fact that selection vectors are increasing sequences.
 		requiredLength = sel[inputLen-1] + 1
 	}
-	if cap(c.convertedVecs[c.vecIdxsToConvert[0]]) < requiredLength {
-		for _, vecIdx := range c.vecIdxsToConvert {
+	for _, vecIdx := range c.vecIdxsToConvert {
+		if cap(c.convertedVecs[vecIdx]) < requiredLength {
 			c.convertedVecs[vecIdx] = make([]tree.Datum, requiredLength)
+		} else {
+			c.convertedVecs[vecIdx] = c.convertedVecs[vecIdx][:requiredLength]
 		}
+	}
+	if c.da.AllocSize < requiredLength {
 		// Adjust the datum alloc according to the length of the batch since
 		// this batch is the longest we've seen so far.
 		c.da.AllocSize = requiredLength
-	} else {
-		for _, vecIdx := range c.vecIdxsToConvert {
-			c.convertedVecs[vecIdx] = c.convertedVecs[vecIdx][:requiredLength]
-		}
 	}
 	for _, vecIdx := range c.vecIdxsToConvert {
 		ColVecToDatum(

diff --git a/pkg/sql/colexec/case.go b/pkg/sql/colexec/case.go
@@ -46,7 +46,7 @@ type caseOp struct {
 	prevSel []int
 }
 
-var _ InternalMemoryOperator = &caseOp{}
+var _ colexecbase.Operator = &caseOp{}
 
 func (c *caseOp) ChildCount(verbose bool) int {
 	return 1 + len(c.caseOps) + 1
@@ -65,11 +65,6 @@ func (c *caseOp) Child(nth int, verbose bool) execinfra.OpNode {
 	return nil
 }
 
-func (c *caseOp) InternalMemoryUsage() int {
-	// We internally use two selection vectors, origSel and prevSel.
-	return 2 * colmem.SizeOfBatchSizeSelVector
-}
-
 // NewCaseOp returns an operator that runs a case statement.
 // buffer is a bufferOp that will return the input batch repeatedly.
 // caseOps is a list of operator chains, one per branch in the case statement.
@@ -89,6 +84,8 @@ func NewCaseOp(
 	outputIdx int,
 	typ *types.T,
 ) colexecbase.Operator {
+	// We internally use two selection vectors, origSel and prevSel.
+	allocator.AdjustMemoryUsage(int64(2 * colmem.SizeOfBatchSizeSelVector))
 	return &caseOp{
 		allocator: allocator,
 		buffer:    buffer.(*bufferOp),