Merge #50727

50727: sql: avoid copying ColumnDescriptors in initColsForScan r=nvanbenschoten a=nvanbenschoten This change switches `scanNode` from constructing and passing around a `[]ColumnDescriptor` to constructing and passing around a `[]*ColumnDescriptor` which references the existing `ColumnDescriptor`s in the `TableDescriptor`. This is in response to seeing the allocation in `initColsForScan` pop up as the single largest source of total heap allocations by size (`alloc_space`, the heap profile sample that most closely measures GC pressure) while running TPC-E. The allocation in `initColsForScan` was responsible for **4.1%** of the `alloc_space` profile after a 30 minute run of the workload. In general, this indicates that we should move away from copying around these ColumnDescriptors by value. They are currently 120 bytes large, which isn't huge, but also isn't small. Furthermore, unlike TableDescriptors, we almost never pass around only a single ColumnDescriptor. Instead, we're usually operating on every column touched by a query, so this 120 bytes can blow up fast. For instance, if we estimate that the average TPC-E query touches somewhere between 8 and 10 columns then a single copy of all of these descriptors during the execution of a query (like we were doing in initColsForScan) requires allocating and copying over 1KB of memory. Yahor, I'm assigning you for two reasons. One, because you seem to be working most closely to this code and likely have a good idea for how disruptive this kind of change will be. I don't want to split the world into functions that work with []ColumnDescriptor and functions that work with []*ColumnDescriptor. I also figured you'd be interested to know that I was running this using an older SHA and the second and third largest sources of allocations were in `createTableReaders` (**3.54%**) and `ColumnTypesWithMutations` (**2.60%**). Both had to do with constructing slices of `types.T` and both appear to have been fixed by c06277e. So nice job with that change! Co-authored-by: Nathan VanBenschoten <[email protected]>
cockroachdb · Jun 29, 2020 · dbb5ad1 · dbb5ad1
2 parents 4d6e2ec + fe43441
commit dbb5ad1
Show file tree

Hide file tree

Showing 4 changed files with 42 additions and 30 deletions.
diff --git a/pkg/sql/distsql_physical_planner.go b/pkg/sql/distsql_physical_planner.go
@@ -926,7 +926,7 @@ func tableOrdinal(
 // toTableOrdinals returns a mapping from column ordinals in cols to table
 // reader column ordinals.
 func toTableOrdinals(
-	cols []sqlbase.ColumnDescriptor,
+	cols []*sqlbase.ColumnDescriptor,
 	desc *sqlbase.ImmutableTableDescriptor,
 	visibility execinfrapb.ScanVisibility,
 ) []int {
@@ -940,17 +940,17 @@ func toTableOrdinals(
 // getOutputColumnsFromColsForScan returns the indices of the columns that are
 // returned by a scanNode or a tableReader.
 // If remap is not nil, the column ordinals are remapped accordingly.
-func getOutputColumnsFromColsForScan(cols []sqlbase.ColumnDescriptor, remap []int) []uint32 {
-	outputColumns := make([]uint32, 0, len(cols))
+func getOutputColumnsFromColsForScan(cols []*sqlbase.ColumnDescriptor, remap []int) []uint32 {
+	outputColumns := make([]uint32, len(cols))
 	// TODO(radu): if we have a scan with a filter, cols will include the
 	// columns needed for the filter, even if they aren't needed for the next
 	// stage.
-	for i := 0; i < len(cols); i++ {
+	for i := range outputColumns {
 		colIdx := i
 		if remap != nil {
 			colIdx = remap[i]
 		}
-		outputColumns = append(outputColumns, uint32(colIdx))
+		outputColumns[i] = uint32(colIdx)
 	}
 	return outputColumns
 }
@@ -1091,7 +1091,7 @@ type tableReaderPlanningInfo struct {
 	maxResults             uint64
 	estimatedRowCount      uint64
 	reqOrdering            ReqOrdering
-	cols                   []sqlbase.ColumnDescriptor
+	cols                   []*sqlbase.ColumnDescriptor
 	colsToTableOrdrinalMap []int
 }
 

diff --git a/pkg/sql/distsql_spec_exec_factory.go b/pkg/sql/distsql_spec_exec_factory.go
@@ -124,7 +124,7 @@ func (e *distSQLSpecExecFactory) ConstructScan(
 	if err != nil {
 		return nil, err
 	}
-	p.ResultColumns = sqlbase.ResultColumnsFromColDescs(tabDesc.GetID(), cols)
+	p.ResultColumns = sqlbase.ResultColumnsFromColDescPtrs(tabDesc.GetID(), cols)
 
 	if indexConstraint != nil && indexConstraint.IsContradiction() {
 		// TODO(yuzefovich): once ConstructValues is implemented, consider

diff --git a/pkg/sql/scan.go b/pkg/sql/scan.go
@@ -57,7 +57,7 @@ type scanNode struct {
 	// be gained (e.g. for tables with wide rows) by reading only certain
 	// columns from KV using point lookups instead of a single range lookup for
 	// the entire row.
-	cols []sqlbase.ColumnDescriptor
+	cols []*sqlbase.ColumnDescriptor
 	// There is a 1-1 correspondence between cols and resultColumns.
 	resultColumns sqlbase.ResultColumns
 
@@ -280,12 +280,12 @@ func (n *scanNode) lookupSpecifiedIndex(indexFlags *tree.IndexFlags) error {
 // initColsForScan initializes cols according to desc and colCfg.
 func initColsForScan(
 	desc *sqlbase.ImmutableTableDescriptor, colCfg scanColumnsConfig,
-) (cols []sqlbase.ColumnDescriptor, err error) {
+) (cols []*sqlbase.ColumnDescriptor, err error) {
 	if colCfg.wantedColumns == nil {
 		return nil, errors.AssertionFailedf("unexpectedly wantedColumns is nil")
 	}
 
-	cols = make([]sqlbase.ColumnDescriptor, 0, len(desc.ReadableColumns))
+	cols = make([]*sqlbase.ColumnDescriptor, 0, len(desc.ReadableColumns))
 	for _, wc := range colCfg.wantedColumns {
 		var c *sqlbase.ColumnDescriptor
 		var err error
@@ -298,7 +298,7 @@ func initColsForScan(
 			return cols, err
 		}
 
-		cols = append(cols, *c)
+		cols = append(cols, c)
 	}
 
 	if colCfg.addUnwantedAsHidden {
@@ -312,9 +312,12 @@ func initColsForScan(
 				}
 			}
 			if !found {
+				// NB: we could amortize this allocation using a second slice,
+				// but addUnwantedAsHidden is only used by scrub, so doing so
+				// doesn't seem worth it.
 				col := *c
 				col.Hidden = true
-				cols = append(cols, col)
+				cols = append(cols, &col)
 			}
 		}
 	}
@@ -334,7 +337,7 @@ func (n *scanNode) initDescDefaults(colCfg scanColumnsConfig) error {
 	}
 
 	// Set up the rest of the scanNode.
-	n.resultColumns = sqlbase.ResultColumnsFromColDescs(n.desc.GetID(), n.cols)
+	n.resultColumns = sqlbase.ResultColumnsFromColDescPtrs(n.desc.GetID(), n.cols)
 	n.colIdxMap = make(map[sqlbase.ColumnID]int, len(n.cols))
 	for i, c := range n.cols {
 		n.colIdxMap[c.ID] = i

diff --git a/pkg/sql/sqlbase/result_columns.go b/pkg/sql/sqlbase/result_columns.go
@@ -36,28 +36,37 @@ type ResultColumn struct {
 // describe the column types of a table.
 type ResultColumns []ResultColumn
 
-// ResultColumnsFromColDescs converts ColumnDescriptors to ResultColumns.
+// ResultColumnsFromColDescs converts []ColumnDescriptor to []ResultColumn.
 func ResultColumnsFromColDescs(tableID ID, colDescs []ColumnDescriptor) ResultColumns {
-	cols := make(ResultColumns, 0, len(colDescs))
-	for i := range colDescs {
-		// Convert the ColumnDescriptor to ResultColumn.
-		colDesc := &colDescs[i]
+	return resultColumnsFromColDescs(tableID, len(colDescs), func(i int) *ColumnDescriptor {
+		return &colDescs[i]
+	})
+}
+
+// ResultColumnsFromColDescPtrs converts []*ColumnDescriptor to []ResultColumn.
+func ResultColumnsFromColDescPtrs(tableID ID, colDescs []*ColumnDescriptor) ResultColumns {
+	return resultColumnsFromColDescs(tableID, len(colDescs), func(i int) *ColumnDescriptor {
+		return colDescs[i]
+	})
+}
+
+func resultColumnsFromColDescs(
+	tableID ID, numCols int, getColDesc func(int) *ColumnDescriptor,
+) ResultColumns {
+	cols := make(ResultColumns, numCols)
+	for i := range cols {
+		colDesc := getColDesc(i)
 		typ := colDesc.Type
 		if typ == nil {
 			panic(fmt.Sprintf("unsupported column type: %s", colDesc.Type.Family()))
 		}
-
-		hidden := colDesc.Hidden
-		cols = append(
-			cols,
-			ResultColumn{
-				Name:           colDesc.Name,
-				Typ:            typ,
-				Hidden:         hidden,
-				TableID:        tableID,
-				PGAttributeNum: colDesc.GetLogicalColumnID(),
-			},
-		)
+		cols[i] = ResultColumn{
+			Name:           colDesc.Name,
+			Typ:            typ,
+			Hidden:         colDesc.Hidden,
+			TableID:        tableID,
+			PGAttributeNum: colDesc.GetLogicalColumnID(),
+		}
 	}
 	return cols
 }