Skip to content

Commit

Permalink
Merge #50727
Browse files Browse the repository at this point in the history
50727: sql: avoid copying ColumnDescriptors in initColsForScan r=nvanbenschoten a=nvanbenschoten

This change switches `scanNode` from constructing and passing around a `[]ColumnDescriptor` to constructing and passing around a `[]*ColumnDescriptor` which references the existing `ColumnDescriptor`s in the `TableDescriptor`. This is in response to seeing the allocation in `initColsForScan` pop up as the single largest source of total heap allocations by size (`alloc_space`, the heap profile sample that most closely measures GC pressure) while running TPC-E. The allocation in `initColsForScan` was responsible for **4.1%** of the `alloc_space` profile after a 30 minute run of the workload.

In general, this indicates that we should move away from copying around these ColumnDescriptors by value. They are currently 120 bytes large, which isn't huge, but also isn't small. Furthermore, unlike TableDescriptors, we almost never pass around only a single ColumnDescriptor. Instead, we're usually operating on every column touched by a query, so this 120 bytes can blow up fast. For instance, if we estimate that the average TPC-E query touches somewhere between 8 and 10 columns then a single copy of all of these descriptors during the execution of a query (like we were doing in initColsForScan) requires allocating and copying over 1KB of memory.

Yahor, I'm assigning you for two reasons. One, because you seem to be working most closely to this code and likely have a good idea for how disruptive this kind of change will be. I don't want to split the world into functions that work with []ColumnDescriptor and functions that work with []*ColumnDescriptor. I also figured you'd be interested to know that I was running this using an older SHA and the second and third largest sources of allocations were in `createTableReaders` (**3.54%**) and `ColumnTypesWithMutations` (**2.60%**). Both had to do with constructing slices of `types.T` and both appear to have been fixed by c06277e. So nice job with that change!

Co-authored-by: Nathan VanBenschoten <[email protected]>
  • Loading branch information
craig[bot] and nvanbenschoten committed Jun 29, 2020
2 parents 4d6e2ec + fe43441 commit dbb5ad1
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 30 deletions.
12 changes: 6 additions & 6 deletions pkg/sql/distsql_physical_planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,7 @@ func tableOrdinal(
// toTableOrdinals returns a mapping from column ordinals in cols to table
// reader column ordinals.
func toTableOrdinals(
cols []sqlbase.ColumnDescriptor,
cols []*sqlbase.ColumnDescriptor,
desc *sqlbase.ImmutableTableDescriptor,
visibility execinfrapb.ScanVisibility,
) []int {
Expand All @@ -940,17 +940,17 @@ func toTableOrdinals(
// getOutputColumnsFromColsForScan returns the indices of the columns that are
// returned by a scanNode or a tableReader.
// If remap is not nil, the column ordinals are remapped accordingly.
func getOutputColumnsFromColsForScan(cols []sqlbase.ColumnDescriptor, remap []int) []uint32 {
outputColumns := make([]uint32, 0, len(cols))
func getOutputColumnsFromColsForScan(cols []*sqlbase.ColumnDescriptor, remap []int) []uint32 {
outputColumns := make([]uint32, len(cols))
// TODO(radu): if we have a scan with a filter, cols will include the
// columns needed for the filter, even if they aren't needed for the next
// stage.
for i := 0; i < len(cols); i++ {
for i := range outputColumns {
colIdx := i
if remap != nil {
colIdx = remap[i]
}
outputColumns = append(outputColumns, uint32(colIdx))
outputColumns[i] = uint32(colIdx)
}
return outputColumns
}
Expand Down Expand Up @@ -1091,7 +1091,7 @@ type tableReaderPlanningInfo struct {
maxResults uint64
estimatedRowCount uint64
reqOrdering ReqOrdering
cols []sqlbase.ColumnDescriptor
cols []*sqlbase.ColumnDescriptor
colsToTableOrdrinalMap []int
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/distsql_spec_exec_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func (e *distSQLSpecExecFactory) ConstructScan(
if err != nil {
return nil, err
}
p.ResultColumns = sqlbase.ResultColumnsFromColDescs(tabDesc.GetID(), cols)
p.ResultColumns = sqlbase.ResultColumnsFromColDescPtrs(tabDesc.GetID(), cols)

if indexConstraint != nil && indexConstraint.IsContradiction() {
// TODO(yuzefovich): once ConstructValues is implemented, consider
Expand Down
15 changes: 9 additions & 6 deletions pkg/sql/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ type scanNode struct {
// be gained (e.g. for tables with wide rows) by reading only certain
// columns from KV using point lookups instead of a single range lookup for
// the entire row.
cols []sqlbase.ColumnDescriptor
cols []*sqlbase.ColumnDescriptor
// There is a 1-1 correspondence between cols and resultColumns.
resultColumns sqlbase.ResultColumns

Expand Down Expand Up @@ -280,12 +280,12 @@ func (n *scanNode) lookupSpecifiedIndex(indexFlags *tree.IndexFlags) error {
// initColsForScan initializes cols according to desc and colCfg.
func initColsForScan(
desc *sqlbase.ImmutableTableDescriptor, colCfg scanColumnsConfig,
) (cols []sqlbase.ColumnDescriptor, err error) {
) (cols []*sqlbase.ColumnDescriptor, err error) {
if colCfg.wantedColumns == nil {
return nil, errors.AssertionFailedf("unexpectedly wantedColumns is nil")
}

cols = make([]sqlbase.ColumnDescriptor, 0, len(desc.ReadableColumns))
cols = make([]*sqlbase.ColumnDescriptor, 0, len(desc.ReadableColumns))
for _, wc := range colCfg.wantedColumns {
var c *sqlbase.ColumnDescriptor
var err error
Expand All @@ -298,7 +298,7 @@ func initColsForScan(
return cols, err
}

cols = append(cols, *c)
cols = append(cols, c)
}

if colCfg.addUnwantedAsHidden {
Expand All @@ -312,9 +312,12 @@ func initColsForScan(
}
}
if !found {
// NB: we could amortize this allocation using a second slice,
// but addUnwantedAsHidden is only used by scrub, so doing so
// doesn't seem worth it.
col := *c
col.Hidden = true
cols = append(cols, col)
cols = append(cols, &col)
}
}
}
Expand All @@ -334,7 +337,7 @@ func (n *scanNode) initDescDefaults(colCfg scanColumnsConfig) error {
}

// Set up the rest of the scanNode.
n.resultColumns = sqlbase.ResultColumnsFromColDescs(n.desc.GetID(), n.cols)
n.resultColumns = sqlbase.ResultColumnsFromColDescPtrs(n.desc.GetID(), n.cols)
n.colIdxMap = make(map[sqlbase.ColumnID]int, len(n.cols))
for i, c := range n.cols {
n.colIdxMap[c.ID] = i
Expand Down
43 changes: 26 additions & 17 deletions pkg/sql/sqlbase/result_columns.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,37 @@ type ResultColumn struct {
// describe the column types of a table.
type ResultColumns []ResultColumn

// ResultColumnsFromColDescs converts ColumnDescriptors to ResultColumns.
// ResultColumnsFromColDescs converts []ColumnDescriptor to []ResultColumn.
func ResultColumnsFromColDescs(tableID ID, colDescs []ColumnDescriptor) ResultColumns {
cols := make(ResultColumns, 0, len(colDescs))
for i := range colDescs {
// Convert the ColumnDescriptor to ResultColumn.
colDesc := &colDescs[i]
return resultColumnsFromColDescs(tableID, len(colDescs), func(i int) *ColumnDescriptor {
return &colDescs[i]
})
}

// ResultColumnsFromColDescPtrs converts []*ColumnDescriptor to []ResultColumn.
func ResultColumnsFromColDescPtrs(tableID ID, colDescs []*ColumnDescriptor) ResultColumns {
return resultColumnsFromColDescs(tableID, len(colDescs), func(i int) *ColumnDescriptor {
return colDescs[i]
})
}

func resultColumnsFromColDescs(
tableID ID, numCols int, getColDesc func(int) *ColumnDescriptor,
) ResultColumns {
cols := make(ResultColumns, numCols)
for i := range cols {
colDesc := getColDesc(i)
typ := colDesc.Type
if typ == nil {
panic(fmt.Sprintf("unsupported column type: %s", colDesc.Type.Family()))
}

hidden := colDesc.Hidden
cols = append(
cols,
ResultColumn{
Name: colDesc.Name,
Typ: typ,
Hidden: hidden,
TableID: tableID,
PGAttributeNum: colDesc.GetLogicalColumnID(),
},
)
cols[i] = ResultColumn{
Name: colDesc.Name,
Typ: typ,
Hidden: colDesc.Hidden,
TableID: tableID,
PGAttributeNum: colDesc.GetLogicalColumnID(),
}
}
return cols
}
Expand Down

0 comments on commit dbb5ad1

Please sign in to comment.