cockroachdb · craig · Jul 27, 2021 · Jul 27, 2021
@@ -55,6 +55,9 @@ type Batch interface {
 	// provided Vec. The original and the replacement vectors *must* be of the
 	// same type.
 	ReplaceCol(Vec, int)
+	// BytesLikeTotalSize returns the total size in bytes of all Bytes-like
+	// vectors in the batch.
+	BytesLikeTotalSize() int64
 	// Reset modifies the caller in-place to have the given length and columns
 	// with the given types. If it's possible, Reset will reuse the existing
 	// columns and allocations, invalidating existing references to the Batch or
@@ -181,6 +184,10 @@ func (b *zeroBatch) ReplaceCol(Vec, int) {
 	panic("no columns should be replaced in zero batch")
 }
 
+func (b *zeroBatch) BytesLikeTotalSize() int64 {
+	return 0
+}
+
 func (b *zeroBatch) Reset([]*types.T, int, ColumnFactory) {
 	panic("zero batch should not be reset")
 }
@@ -281,6 +288,20 @@ func (m *MemBatch) ReplaceCol(col Vec, colIdx int) {
 	m.b[colIdx] = col
 }
 
+// BytesLikeTotalSize implements the Batch interface.
+func (m *MemBatch) BytesLikeTotalSize() int64 {
+	var sz int64
+	for i, ok := m.bytesVecIdxs.Next(0); ok; i, ok = m.bytesVecIdxs.Next(i + 1) {
+		switch m.b[i].CanonicalTypeFamily() {
+		case types.BytesFamily:
+			sz += m.b[i].Bytes().Size()
+		case types.JsonFamily:
+			sz += m.b[i].JSON().Size()
+		}
+	}
+	return sz
+}
+
 // Reset implements the Batch interface.
 func (m *MemBatch) Reset(typs []*types.T, length int, factory ColumnFactory) {
 	cannotReuse := m == nil || m.Capacity() < length || m.Width() < len(typs)

@@ -428,15 +428,15 @@ func (r opResult) createDiskBackedSort(
 			// sort itself is responsible for making sure that we stay within
 			// the memory limit.
 			sortUnlimitedAllocator := colmem.NewAllocator(
-				ctx, r.createBufferingUnlimitedMemAccount(
+				ctx, r.createUnlimitedMemAccount(
 					ctx, flowCtx, opName+"-sort", processorID,
 				), factory)
 			mergeUnlimitedAllocator := colmem.NewAllocator(
-				ctx, r.createBufferingUnlimitedMemAccount(
+				ctx, r.createUnlimitedMemAccount(
 					ctx, flowCtx, opName+"-merge", processorID,
 				), factory)
 			outputUnlimitedAllocator := colmem.NewAllocator(
-				ctx, r.createBufferingUnlimitedMemAccount(
+				ctx, r.createUnlimitedMemAccount(
 					ctx, flowCtx, opName+"-output", processorID,
 				), factory)
 			diskAccount := r.createDiskAccount(ctx, flowCtx, opName, processorID)
@@ -745,10 +745,17 @@ func NewColOperator(
 			if err := checkNumIn(inputs, 0); err != nil {
 				return r, err
 			}
-
+			// We have to create a separate account in order for the cFetcher to
+			// be able to precisely track the size of its output batch. This
+			// memory account is "streaming" in its nature, so we create an
+			// unlimited one.
+			cFetcherMemAcc := result.createUnlimitedMemAccount(
+				ctx, flowCtx, "cfetcher" /* opName */, spec.ProcessorID,
+			)
 			estimatedRowCount := spec.EstimatedRowCount
 			scanOp, err := colfetcher.NewColBatchScan(
-				ctx, streamingAllocator, flowCtx, evalCtx, core.TableReader, post, estimatedRowCount,
+				ctx, colmem.NewAllocator(ctx, cFetcherMemAcc, factory), flowCtx,
+				evalCtx, core.TableReader, post, estimatedRowCount,
 			)
 			if err != nil {
 				return r, err
@@ -846,7 +853,7 @@ func NewColOperator(
 					// The disk spilling is disabled by the cluster setting, so
 					// we give an unlimited memory account to the in-memory
 					// hash aggregator and don't set up the disk spiller.
-					hashAggregatorUnlimitedMemAccount := result.createBufferingUnlimitedMemAccount(
+					hashAggregatorUnlimitedMemAccount := result.createUnlimitedMemAccount(
 						ctx, flowCtx, opName, spec.ProcessorID,
 					)
 					newAggArgs.Allocator = colmem.NewAllocator(
@@ -869,7 +876,7 @@ func NewColOperator(
 					// We need to create a separate memory account for the
 					// spilling queue because it looks at how much memory it has
 					// already used in order to decide when to spill to disk.
-					spillingQueueMemAccount := result.createBufferingUnlimitedMemAccount(
+					spillingQueueMemAccount := result.createUnlimitedMemAccount(
 						ctx, flowCtx, spillingQueueMemMonitorName, spec.ProcessorID,
 					)
 					spillingQueueCfg := args.DiskQueueCfg
@@ -893,7 +900,7 @@ func NewColOperator(
 						return r, err
 					}
 					ehaOpName := "external-hash-aggregator"
-					ehaMemAccount := result.createBufferingUnlimitedMemAccount(ctx, flowCtx, ehaOpName, spec.ProcessorID)
+					ehaMemAccount := result.createUnlimitedMemAccount(ctx, flowCtx, ehaOpName, spec.ProcessorID)
 					// Note that we will use an unlimited memory account here
 					// even for the in-memory hash aggregator since it is easier
 					// to do so than to try to replace the memory account if the
@@ -967,7 +974,7 @@ func NewColOperator(
 					distinctMemMonitorName,
 					func(input colexecop.Operator) colexecop.Operator {
 						unlimitedAllocator := colmem.NewAllocator(
-							ctx, result.createBufferingUnlimitedMemAccount(ctx, flowCtx, edOpName, spec.ProcessorID), factory,
+							ctx, result.createUnlimitedMemAccount(ctx, flowCtx, edOpName, spec.ProcessorID), factory,
 						)
 						return colexec.NewExternalDistinct(
 							unlimitedAllocator,
@@ -1007,7 +1014,7 @@ func NewColOperator(
 				// We are performing a cross-join, so we need to plan a
 				// specialized operator.
 				opName := "cross-joiner"
-				crossJoinerMemAccount := result.createBufferingUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID)
+				crossJoinerMemAccount := result.createUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID)
 				crossJoinerDiskAcc := result.createDiskAccount(ctx, flowCtx, opName, spec.ProcessorID)
 				unlimitedAllocator := colmem.NewAllocator(ctx, crossJoinerMemAccount, factory)
 				result.Root = colexecjoin.NewCrossJoiner(
@@ -1034,7 +1041,7 @@ func NewColOperator(
 						ctx, flowCtx, opName, spec.ProcessorID,
 					)
 					hashJoinerUnlimitedAllocator = colmem.NewAllocator(
-						ctx, result.createBufferingUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID), factory,
+						ctx, result.createUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID), factory,
 					)
 				}
 				hjSpec := colexecjoin.MakeHashJoinerSpec(
@@ -1064,7 +1071,7 @@ func NewColOperator(
 						hashJoinerMemMonitorName,
 						func(inputOne, inputTwo colexecop.Operator) colexecop.Operator {
 							unlimitedAllocator := colmem.NewAllocator(
-								ctx, result.createBufferingUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID), factory,
+								ctx, result.createUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID), factory,
 							)
 							ehj := colexec.NewExternalHashJoiner(
 								unlimitedAllocator,
@@ -1120,7 +1127,7 @@ func NewColOperator(
 			// joiner itself is responsible for making sure that we stay within
 			// the memory limit, and it will fall back to disk if necessary.
 			unlimitedAllocator := colmem.NewAllocator(
-				ctx, result.createBufferingUnlimitedMemAccount(
+				ctx, result.createUnlimitedMemAccount(
 					ctx, flowCtx, opName, spec.ProcessorID,
 				), factory)
 			diskAccount := result.createDiskAccount(ctx, flowCtx, opName, spec.ProcessorID)
@@ -1650,15 +1657,17 @@ func (r opResult) createMemAccountForSpillStrategyWithLimit(
 	return &bufferingMemAccount, monitorName
 }
 
-// createBufferingUnlimitedMemAccount instantiates an unlimited memory monitor
-// and a memory account to be used with a buffering disk-backed Operator. The
-// receiver is updated to have references to both objects. Note that the
-// returned account is only "unlimited" in that it does not have a hard limit
-// that it enforces, but a limit might be enforced by a root monitor.
+// createUnlimitedMemAccount instantiates an unlimited memory monitor
+// and a memory account to be used with a buffering disk-backed Operator (or in
+// special circumstances in place of a streaming account when the precise memory
+// usage is needed by an operator). The receiver is updated to have references
+// to both objects. Note that the returned account is only "unlimited" in that
+// it does not have a hard limit that it enforces, but a limit might be enforced
+// by a root monitor.
 //
 // Note that the memory monitor name is not returned (unlike above) because no
 // caller actually needs it.
-func (r opResult) createBufferingUnlimitedMemAccount(
+func (r opResult) createUnlimitedMemAccount(
 	ctx context.Context, flowCtx *execinfra.FlowCtx, opName string, processorID int32,
 ) *mon.BoundAccount {
 	monitorName := r.getMemMonitorName(opName, processorID, "unlimited" /* suffix */)
@@ -1710,10 +1719,10 @@ func (r opResult) finishBufferedWindowerArgs(
 	needsBuffer bool,
 ) {
 	args.DiskAcc = r.createDiskAccount(ctx, flowCtx, opName, processorID)
-	mainAcc := r.createBufferingUnlimitedMemAccount(ctx, flowCtx, opName, processorID)
+	mainAcc := r.createUnlimitedMemAccount(ctx, flowCtx, opName, processorID)
 	args.MainAllocator = colmem.NewAllocator(ctx, mainAcc, factory)
 	if needsBuffer {
-		bufferAcc := r.createBufferingUnlimitedMemAccount(ctx, flowCtx, opName, processorID)
+		bufferAcc := r.createUnlimitedMemAccount(ctx, flowCtx, opName, processorID)
 		args.BufferAllocator = colmem.NewAllocator(ctx, bufferAcc, factory)
 	}
 }

@@ -103,8 +103,9 @@ func TestNewColOperatorExpectedTypeSchema(t *testing.T) {
 		},
 		StreamingMemAccount: &streamingMemAcc,
 	}
-	r, err := NewColOperator(ctx, flowCtx, args)
+	r1, err := NewColOperator(ctx, flowCtx, args)
 	require.NoError(t, err)
+	defer r1.TestCleanup()
 
 	args = &colexecargs.NewColOperatorArgs{
 		Spec: &execinfrapb.ProcessorSpec{
@@ -113,11 +114,12 @@ func TestNewColOperatorExpectedTypeSchema(t *testing.T) {
 			Post:        execinfrapb.PostProcessSpec{RenderExprs: []execinfrapb.Expression{{Expr: "@1 - 1"}}},
 			ResultTypes: []*types.T{types.Int},
 		},
-		Inputs:              []colexecargs.OpWithMetaInfo{{Root: r.Root}},
+		Inputs:              []colexecargs.OpWithMetaInfo{{Root: r1.Root}},
 		StreamingMemAccount: &streamingMemAcc,
 	}
-	r, err = NewColOperator(ctx, flowCtx, args)
+	r, err := NewColOperator(ctx, flowCtx, args)
 	require.NoError(t, err)
+	defer r.TestCleanup()
 
 	m := colexec.NewMaterializer(
 		flowCtx,

@@ -131,6 +131,17 @@ func (r *NewColOperatorResult) AssertInvariants() {
 	}
 }
 
+// TestCleanup releases the resources associated with this result. It should
+// only be used in tests.
+func (r *NewColOperatorResult) TestCleanup() {
+	for _, acc := range r.OpAccounts {
+		acc.Close(context.Background())
+	}
+	for _, m := range r.OpMonitors {
+		m.Stop(context.Background())
+	}
+}
+
 var newColOperatorResultPool = sync.Pool{
 	New: func() interface{} {
 		return &NewColOperatorResult{}

@@ -175,6 +175,11 @@ func (b *AppendOnlyBufferedBatch) ReplaceCol(coldata.Vec, int) {
 	colexecerror.InternalError(errors.AssertionFailedf("ReplaceCol is prohibited on AppendOnlyBufferedBatch"))
 }
 
+// BytesLikeTotalSize implements the coldata.Batch interface.
+func (b *AppendOnlyBufferedBatch) BytesLikeTotalSize() int64 {
+	return b.batch.BytesLikeTotalSize()
+}
+
 // Reset implements the coldata.Batch interface.
 func (b *AppendOnlyBufferedBatch) Reset([]*types.T, int, coldata.ColumnFactory) {
 	colexecerror.InternalError(errors.AssertionFailedf("Reset is prohibited on AppendOnlyBufferedBatch"))