Skip to content

Commit

Permalink
sql: avoid copying ColumnDescriptors in initColsForScan
Browse files Browse the repository at this point in the history
This change switches `scanNode` from constructing and passing around a
[]ColumnDescriptor to constructing and passing around a []*ColumnDescriptor.
This is in response to seeing the allocation in `initColsForScan` pop up as
the single largest source of total heap allocations by size (`alloc_space`,
the heap profile sample that most closely measures GC pressure) while running
TPC-E. The allocation in `initColsForScan` was responsible for **4.1%** of
the `alloc_space` profile after a 30 minute run of the workload.

In general, this indicates that we should move away from copying around these
ColumnDescriptors by value. They are currently 120 bytes large, which isn't
huge, but also isn't small. Furthermore, unlike TableDescriptors, we almost
never pass around only a single ColumnDescriptor. Instead, we're usually
operating on every column touched by a query, so this 120 bytes can blow up
fast. For instance, if we estimate that the average TPC-E query touches
somewhere between 8 and 10 columns then a single copy of all of these
descriptors during the execution of a query (like we were doing in
initColsForScan) requires allocating and copying over 1KB of memory.

Yahor, I'm assigning you for two reasons. One, because you seem to be working
most closely to this code and likely have a good idea for how disruptive this
kind of change will be. I don't want to split the world into functions that
work with []ColumnDescriptor and functions that work with []*ColumnDescriptor.
I also figured you'd be interested to know that I was running this using an
older SHA and the second and third largest sources of allocations were in
`createTableReaders` (3.54%) and `ColumnTypesWithMutations` (2.60%). Both
had to do with constructing slices of `types.T` and both appear to have been
fixed by c06277e. So nice job with that change!
  • Loading branch information
nvanbenschoten committed Jun 27, 2020
1 parent 5286181 commit fe43441
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 30 deletions.
12 changes: 6 additions & 6 deletions pkg/sql/distsql_physical_planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,7 @@ func tableOrdinal(
// toTableOrdinals returns a mapping from column ordinals in cols to table
// reader column ordinals.
func toTableOrdinals(
cols []sqlbase.ColumnDescriptor,
cols []*sqlbase.ColumnDescriptor,
desc *sqlbase.ImmutableTableDescriptor,
visibility execinfrapb.ScanVisibility,
) []int {
Expand All @@ -934,17 +934,17 @@ func toTableOrdinals(
// getOutputColumnsFromColsForScan returns the indices of the columns that are
// returned by a scanNode or a tableReader.
// If remap is not nil, the column ordinals are remapped accordingly.
func getOutputColumnsFromColsForScan(cols []sqlbase.ColumnDescriptor, remap []int) []uint32 {
outputColumns := make([]uint32, 0, len(cols))
func getOutputColumnsFromColsForScan(cols []*sqlbase.ColumnDescriptor, remap []int) []uint32 {
outputColumns := make([]uint32, len(cols))
// TODO(radu): if we have a scan with a filter, cols will include the
// columns needed for the filter, even if they aren't needed for the next
// stage.
for i := 0; i < len(cols); i++ {
for i := range outputColumns {
colIdx := i
if remap != nil {
colIdx = remap[i]
}
outputColumns = append(outputColumns, uint32(colIdx))
outputColumns[i] = uint32(colIdx)
}
return outputColumns
}
Expand Down Expand Up @@ -1085,7 +1085,7 @@ type tableReaderPlanningInfo struct {
maxResults uint64
estimatedRowCount uint64
reqOrdering ReqOrdering
cols []sqlbase.ColumnDescriptor
cols []*sqlbase.ColumnDescriptor
colsToTableOrdrinalMap []int
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/distsql_spec_exec_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ func (e *distSQLSpecExecFactory) ConstructScan(
if err != nil {
return nil, err
}
p.ResultColumns = sqlbase.ResultColumnsFromColDescs(tabDesc.GetID(), cols)
p.ResultColumns = sqlbase.ResultColumnsFromColDescPtrs(tabDesc.GetID(), cols)

if indexConstraint != nil && indexConstraint.IsContradiction() {
// TODO(yuzefovich): once ConstructValues is implemented, consider
Expand Down
15 changes: 9 additions & 6 deletions pkg/sql/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ type scanNode struct {
// be gained (e.g. for tables with wide rows) by reading only certain
// columns from KV using point lookups instead of a single range lookup for
// the entire row.
cols []sqlbase.ColumnDescriptor
cols []*sqlbase.ColumnDescriptor
// There is a 1-1 correspondence between cols and resultColumns.
resultColumns sqlbase.ResultColumns

Expand Down Expand Up @@ -280,12 +280,12 @@ func (n *scanNode) lookupSpecifiedIndex(indexFlags *tree.IndexFlags) error {
// initColsForScan initializes cols according to desc and colCfg.
func initColsForScan(
desc *sqlbase.ImmutableTableDescriptor, colCfg scanColumnsConfig,
) (cols []sqlbase.ColumnDescriptor, err error) {
) (cols []*sqlbase.ColumnDescriptor, err error) {
if colCfg.wantedColumns == nil {
return nil, errors.AssertionFailedf("unexpectedly wantedColumns is nil")
}

cols = make([]sqlbase.ColumnDescriptor, 0, len(desc.ReadableColumns))
cols = make([]*sqlbase.ColumnDescriptor, 0, len(desc.ReadableColumns))
for _, wc := range colCfg.wantedColumns {
var c *sqlbase.ColumnDescriptor
var err error
Expand All @@ -298,7 +298,7 @@ func initColsForScan(
return cols, err
}

cols = append(cols, *c)
cols = append(cols, c)
}

if colCfg.addUnwantedAsHidden {
Expand All @@ -312,9 +312,12 @@ func initColsForScan(
}
}
if !found {
// NB: we could amortize this allocation using a second slice,
// but addUnwantedAsHidden is only used by scrub, so doing so
// doesn't seem worth it.
col := *c
col.Hidden = true
cols = append(cols, col)
cols = append(cols, &col)
}
}
}
Expand All @@ -334,7 +337,7 @@ func (n *scanNode) initDescDefaults(colCfg scanColumnsConfig) error {
}

// Set up the rest of the scanNode.
n.resultColumns = sqlbase.ResultColumnsFromColDescs(n.desc.GetID(), n.cols)
n.resultColumns = sqlbase.ResultColumnsFromColDescPtrs(n.desc.GetID(), n.cols)
n.colIdxMap = make(map[sqlbase.ColumnID]int, len(n.cols))
for i, c := range n.cols {
n.colIdxMap[c.ID] = i
Expand Down
43 changes: 26 additions & 17 deletions pkg/sql/sqlbase/result_columns.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,37 @@ type ResultColumn struct {
// describe the column types of a table.
type ResultColumns []ResultColumn

// ResultColumnsFromColDescs converts ColumnDescriptors to ResultColumns.
// ResultColumnsFromColDescs converts []ColumnDescriptor to []ResultColumn.
func ResultColumnsFromColDescs(tableID ID, colDescs []ColumnDescriptor) ResultColumns {
cols := make(ResultColumns, 0, len(colDescs))
for i := range colDescs {
// Convert the ColumnDescriptor to ResultColumn.
colDesc := &colDescs[i]
return resultColumnsFromColDescs(tableID, len(colDescs), func(i int) *ColumnDescriptor {
return &colDescs[i]
})
}

// ResultColumnsFromColDescPtrs converts []*ColumnDescriptor to []ResultColumn.
func ResultColumnsFromColDescPtrs(tableID ID, colDescs []*ColumnDescriptor) ResultColumns {
return resultColumnsFromColDescs(tableID, len(colDescs), func(i int) *ColumnDescriptor {
return colDescs[i]
})
}

func resultColumnsFromColDescs(
tableID ID, numCols int, getColDesc func(int) *ColumnDescriptor,
) ResultColumns {
cols := make(ResultColumns, numCols)
for i := range cols {
colDesc := getColDesc(i)
typ := colDesc.Type
if typ == nil {
panic(fmt.Sprintf("unsupported column type: %s", colDesc.Type.Family()))
}

hidden := colDesc.Hidden
cols = append(
cols,
ResultColumn{
Name: colDesc.Name,
Typ: typ,
Hidden: hidden,
TableID: tableID,
PGAttributeNum: colDesc.GetLogicalColumnID(),
},
)
cols[i] = ResultColumn{
Name: colDesc.Name,
Typ: typ,
Hidden: colDesc.Hidden,
TableID: tableID,
PGAttributeNum: colDesc.GetLogicalColumnID(),
}
}
return cols
}
Expand Down

0 comments on commit fe43441

Please sign in to comment.