From 11fff373f78b606246f3f4dc8cc9b0b0c30978d5 Mon Sep 17 00:00:00 2001
From: Rebecca Taft <becca@cockroachlabs.com>
Date: Mon, 6 Apr 2020 17:47:06 -0500
Subject: [PATCH] rowexec: account for some additional memory used by stats
 collection

Prior to this commit, we did not account for the memory used in the
sampleAggregator when we copy all samples into a new slice before
generating a histogram. This commit adds some additional memory
accounting for this overhead.

Informs #45812

Release note: None
---
 pkg/sql/rowexec/sample_aggregator.go | 45 +++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/pkg/sql/rowexec/sample_aggregator.go b/pkg/sql/rowexec/sample_aggregator.go
index 89a0516e0824..2e6b47c01a48 100644
--- a/pkg/sql/rowexec/sample_aggregator.go
+++ b/pkg/sql/rowexec/sample_aggregator.go
@@ -41,10 +41,17 @@ type sampleAggregator struct {
 
 	spec    *execinfrapb.SampleAggregatorSpec
 	input   execinfra.RowSource
-	memAcc  mon.BoundAccount
 	inTypes []types.T
 	sr      stats.SampleReservoir
 
+	// memAcc accounts for memory accumulated throughout the life of the
+	// sampleAggregator.
+	memAcc mon.BoundAccount
+
+	// tempMemAcc is used to account for memory that is allocated temporarily
+	// and released before the sampleAggregator is finished.
+	tempMemAcc mon.BoundAccount
+
 	tableID     sqlbase.ID
 	sampledCols []sqlbase.ColumnID
 	sketches    []sketchInfo
@@ -97,8 +104,9 @@ func newSampleAggregator(
 	s := &sampleAggregator{
 		spec:         spec,
 		input:        input,
-		memAcc:       memMonitor.MakeBoundAccount(),
 		inTypes:      input.OutputTypes(),
+		memAcc:       memMonitor.MakeBoundAccount(),
+		tempMemAcc:   memMonitor.MakeBoundAccount(),
 		tableID:      spec.TableID,
 		sampledCols:  spec.SampledColumnIDs,
 		sketches:     make([]sketchInfo, len(spec.Sketches)),
@@ -161,6 +169,7 @@ func (s *sampleAggregator) Run(ctx context.Context) {
 func (s *sampleAggregator) close() {
 	if s.InternalClose() {
 		s.memAcc.Close(s.Ctx)
+		s.tempMemAcc.Close(s.Ctx)
 		s.MemMonitor.Stop(s.Ctx)
 	}
 }
@@ -322,7 +331,8 @@ func (s *sampleAggregator) writeResults(ctx context.Context) error {
 				colIdx := int(si.spec.Columns[0])
 				typ := &s.inTypes[colIdx]
 
-				h, err := generateHistogram(
+				h, err := s.generateHistogram(
+					ctx,
 					s.EvalCtx,
 					s.sr.Get(),
 					colIdx,
@@ -368,6 +378,9 @@ func (s *sampleAggregator) writeResults(ctx context.Context) error {
 			); err != nil {
 				return err
 			}
+
+			// Release any memory temporarily used for this statistic.
+			s.tempMemAcc.Clear(ctx)
 		}
 
 		return nil
@@ -382,7 +395,8 @@ func (s *sampleAggregator) writeResults(ctx context.Context) error {
 // generateHistogram returns a histogram (on a given column) from a set of
 // samples.
 // numRows is the total number of rows from which values were sampled.
-func generateHistogram(
+func (s *sampleAggregator) generateHistogram(
+	ctx context.Context,
 	evalCtx *tree.EvalContext,
 	samples []stats.SampledRow,
 	colIdx int,
@@ -391,15 +405,32 @@ func generateHistogram(
 	distinctCount int64,
 	maxBuckets int,
 ) (stats.HistogramData, error) {
-	var da sqlbase.DatumAlloc
+	// Account for the memory we'll use copying the samples into values.
+	if err := s.tempMemAcc.Grow(ctx, sizeOfDatum*int64(len(samples))); err != nil {
+		return stats.HistogramData{}, err
+	}
 	values := make(tree.Datums, 0, len(samples))
-	for _, s := range samples {
-		ed := &s.Row[colIdx]
+
+	var da sqlbase.DatumAlloc
+	for _, sample := range samples {
+		ed := &sample.Row[colIdx]
 		// Ignore NULLs (they are counted separately).
 		if !ed.IsNull() {
+			beforeSize := ed.Datum.Size()
 			if err := ed.EnsureDecoded(colType, &da); err != nil {
 				return stats.HistogramData{}, err
 			}
+			afterSize := ed.Datum.Size()
+
+			// Perform memory accounting. This memory is not added to the temporary
+			// account since it won't be released until the sampleAggregator is
+			// destroyed.
+			if afterSize > beforeSize {
+				if err := s.memAcc.Grow(ctx, int64(afterSize-beforeSize)); err != nil {
+					return stats.HistogramData{}, err
+				}
+			}
+
 			values = append(values, ed.Datum)
 		}
 	}