Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
119095: opt: speed up execbuilder phase r=mgartner a=mgartner

#### opt/bench: add benchmark for execbuilder

This commit adds a benchmark that measures only the execbuilder phase of
optimization, and includes no other phases.

Release note: None

#### opt/execbuilder: remove column map from execPlan

As the execbuilder traverses a relational expression and recursively
builds an `execPlan`, it creates mappings from column IDs to their
ordinal position in the expression for each `execPlan` node. These
mappings are used when building parent nodes to correctly map column IDs
to indexed variables. In most cases the mappings are only used when
building a parent, and never again.

Prior to this commit, the column mappings were a field of `execPlan`,
tying the lifetime of `execPlan` nodes and column mappings together.
This commit decouples the lifetimes of both by removing the mapping
field from `execPlan` and propagating mappings up as return values of
recursive function calls. This will enable future optimizations that can
reuse memory allocated for mappings that are no longer needed.

Release note: None

#### opt/exebuilder: introduce colOrdMap

This commit introduces a new struct, `colOrdMap`, which maps column IDs
to ordinals. See the comment for `colOrdMap` for more details. This type
will be used in execbuilder in future commits to store output column
mappings.

Release note: None

#### opt/execbuilder: use colOrdMap to store output columns

Output columns of execution nodes are now stored in `colOrdMap`s instead
of `opt.ColMap`s. The `colOrdMapAllocator` struct, which is used to
allocate new `colOrdMaps` has been added as a field of `Builder`. It
currently is a simple implementation. Future commits will extend it to
reuse allocated `colOrdMap`s when possible.

Release note: None

#### opt/execbuilder: reuse allocated colOrdMaps

This commit extends `colOrdOrdMapAllocator` with a `Free` method. Freed
maps will be reused in future calls to `Alloc` instead of allocating a
new map. The build functions of the major relational expressions have
been updated to free maps when they are no longer needed. This reduces
the number of maps allocated, especially for complex queries with many
execution nodes.

Informs #117546

Release note: None

#### opt/execbuilder: faster maximum ordinal method for colOrdMap

This commit makes `colOrdMap.MaxOrd()` a constant-time operation in most
cases. See the newly added comments for more details.

Release note: None


119597: opt: add tests showing that index hints don't affect uniqueness checks r=rytaft a=rytaft

Informs #98211

Release note: None

Co-authored-by: Marcus Gartner <[email protected]>
Co-authored-by: Rebecca Taft <[email protected]>
  • Loading branch information
3 people committed Feb 27, 2024
3 parents 21ff5a7 + 3f4d099 + a9fa594 commit 3b2c1cc
Show file tree
Hide file tree
Showing 14 changed files with 1,740 additions and 821 deletions.
67 changes: 67 additions & 0 deletions pkg/sql/opt/bench/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1584,3 +1584,70 @@ func BenchmarkSlowQueries(b *testing.B) {
})
}
}

// BenchmarkExecBuild measures the time that the execbuilder phase takes. It
// does not include any other phases.
func BenchmarkExecBuild(b *testing.B) {
type testCase struct {
query benchQuery
schema []string
}
var testCases []testCase

// Add the basic queries.
for _, query := range queriesToTest(b) {
testCases = append(testCases, testCase{query, schemas})
}

// Add the slow queries.
p := datapathutils.TestDataPath(b, "slow-schemas.sql")
slowSchemas, err := os.ReadFile(p)
if err != nil {
b.Fatalf("%v", err)
}
for _, query := range slowQueries {
testCases = append(testCases, testCase{query, []string{string(slowSchemas)}})
}

for _, tc := range testCases {
h := newHarness(b, tc.query, tc.schema)

stmt, err := parser.ParseOne(tc.query.query)
if err != nil {
b.Fatalf("%v", err)
}

h.optimizer.Init(context.Background(), &h.evalCtx, h.testCat)
bld := optbuilder.New(h.ctx, &h.semaCtx, &h.evalCtx, h.testCat, h.optimizer.Factory(), stmt.AST)
if err = bld.Build(); err != nil {
b.Fatalf("%v", err)
}

if _, err := h.optimizer.Optimize(); err != nil {
panic(err)
}

execMemo := h.optimizer.Memo()
root := execMemo.RootExpr()

b.Run(tc.query.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
eb := execbuilder.New(
context.Background(),
explain.NewPlanGistFactory(exec.StubFactory{}),
&h.optimizer,
execMemo,
nil, /* catalog */
root,
&h.semaCtx,
&h.evalCtx,
true, /* allowAutoCommit */
false, /* isANSIDML */
)
if _, err := eb.Build(); err != nil {
b.Fatalf("%v", err)
}
}
})
}
}
2 changes: 2 additions & 0 deletions pkg/sql/opt/exec/execbuilder/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go_library(
srcs = [
"builder.go",
"cascades.go",
"col_ord_map.go",
"format.go",
"mutation.go",
"relational.go",
Expand Down Expand Up @@ -65,6 +66,7 @@ go_test(
name = "execbuilder_test",
size = "small",
srcs = [
"col_ord_map_test.go",
"main_test.go",
"mutation_test.go",
],
Expand Down
18 changes: 10 additions & 8 deletions pkg/sql/opt/exec/execbuilder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ type Builder struct {
disableTelemetry bool
semaCtx *tree.SemaContext
evalCtx *eval.Context
colOrdsAlloc colOrdMapAllocator

// subqueries accumulates information about subqueries that are part of scalar
// expressions we built. Each entry is associated with a tree.Subquery
Expand Down Expand Up @@ -205,6 +206,7 @@ func New(
initialAllowAutoCommit: allowAutoCommit,
IsANSIDML: isANSIDML,
}
b.colOrdsAlloc.Init(mem.Metadata().MaxColumn())
if evalCtx != nil {
sd := evalCtx.SessionData()
if sd.SaveTablesPrefix != "" {
Expand All @@ -230,7 +232,7 @@ func New(
// Build constructs the execution node tree and returns its root node if no
// error occurred.
func (b *Builder) Build() (_ exec.Plan, err error) {
plan, err := b.build(b.e)
plan, _, err := b.build(b.e)
if err != nil {
return nil, err
}
Expand All @@ -257,7 +259,7 @@ func (b *Builder) wrapFunction(fnName string) (tree.ResolvableFunctionReference,
return tree.WrapFunction(fnName), nil
}

func (b *Builder) build(e opt.Expr) (_ execPlan, err error) {
func (b *Builder) build(e opt.Expr) (_ execPlan, outputCols colOrdMap, err error) {
defer func() {
if r := recover(); r != nil {
// This code allows us to propagate errors without adding lots of checks
Expand All @@ -274,7 +276,7 @@ func (b *Builder) build(e opt.Expr) (_ execPlan, err error) {

rel, ok := e.(memo.RelExpr)
if !ok {
return execPlan{}, errors.AssertionFailedf(
return execPlan{}, colOrdMap{}, errors.AssertionFailedf(
"building execution for non-relational operator %s", redact.Safe(e.Op()),
)
}
Expand All @@ -297,12 +299,12 @@ func (b *Builder) BuildScalar() (tree.TypedExpr, error) {
if !ok {
return nil, errors.AssertionFailedf("BuildScalar cannot be called for non-scalar operator %s", redact.Safe(b.e.Op()))
}
var ctx buildScalarCtx
md := b.mem.Metadata()
ctx.ivh = tree.MakeIndexedVarHelper(&mdVarContainer{md: md}, md.NumColumns())
cols := b.colOrdsAlloc.Alloc()
for i := 0; i < md.NumColumns(); i++ {
ctx.ivarMap.Set(i+1, i)
cols.Set(opt.ColumnID(i+1), i)
}
ctx := makeBuildScalarCtx(cols)
return b.buildScalar(&ctx, scalar)
}

Expand All @@ -320,11 +322,11 @@ type builtWithExpr struct {
id opt.WithID
// outputCols maps the output ColumnIDs of the With expression to the ordinal
// positions they are output to. See execPlan.outputCols for more details.
outputCols opt.ColMap
outputCols colOrdMap
bufferNode exec.Node
}

func (b *Builder) addBuiltWithExpr(id opt.WithID, outputCols opt.ColMap, bufferNode exec.Node) {
func (b *Builder) addBuiltWithExpr(id opt.WithID, outputCols colOrdMap, bufferNode exec.Node) {
b.withExprs = append(b.withExprs, builtWithExpr{
id: id,
outputCols: outputCols,
Expand Down
19 changes: 11 additions & 8 deletions pkg/sql/opt/exec/execbuilder/cascades.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ type cascadeBuilder struct {
mutationBuffer exec.Node
// mutationBufferCols maps With column IDs from the original memo to buffer
// node column ordinals; see builtWithExpr.outputCols.
mutationBufferCols opt.ColMap
mutationBufferCols colOrdMap

// colMeta remembers the metadata of the With columns from the original memo.
colMeta []opt.ColumnMeta
Expand Down Expand Up @@ -142,10 +142,9 @@ func makeCascadeBuilder(b *Builder, mutationWithID opt.WithID) (*cascadeBuilder,
// Remember the column metadata, as we will need to recreate it in the new
// memo.
md := b.mem.Metadata()
cb.colMeta = make([]opt.ColumnMeta, 0, cb.mutationBufferCols.Len())
cb.mutationBufferCols.ForEach(func(key, val int) {
id := opt.ColumnID(key)
cb.colMeta = append(cb.colMeta, *md.ColumnMeta(id))
cb.colMeta = make([]opt.ColumnMeta, 0, cb.mutationBufferCols.MaxOrd())
cb.mutationBufferCols.ForEach(func(col opt.ColumnID, ord int) {
cb.colMeta = append(cb.colMeta, *md.ColumnMeta(col))
})

return cb, nil
Expand Down Expand Up @@ -198,7 +197,7 @@ func (cb *cascadeBuilder) planCascade(
var relExpr memo.RelExpr
// bufferColMap is the mapping between the column IDs in the new memo and
// the column ordinal in the buffer node.
var bufferColMap opt.ColMap
var bufferColMap colOrdMap
if bufferRef == nil {
// No input buffering.
var err error
Expand All @@ -219,15 +218,19 @@ func (cb *cascadeBuilder) planCascade(
} else {
// Set up metadata for the buffer columns.

// Allocate a map with enough capacity to store the new columns being
// added below.
bufferColMap = newColOrdMap(md.MaxColumn() + opt.ColumnID(len(cb.colMeta)))

// withColRemap is the mapping between the With column IDs in the original
// memo and the corresponding column IDs in the new memo.
var withColRemap opt.ColMap
var withCols opt.ColSet
for i := range cb.colMeta {
id := md.AddColumn(cb.colMeta[i].Alias, cb.colMeta[i].Type)
withCols.Add(id)
ordinal, _ := cb.mutationBufferCols.Get(int(cb.colMeta[i].MetaID))
bufferColMap.Set(int(id), ordinal)
ordinal, _ := cb.mutationBufferCols.Get(cb.colMeta[i].MetaID)
bufferColMap.Set(id, ordinal)
withColRemap.Set(int(cb.colMeta[i].MetaID), int(id))
}

Expand Down
Loading

0 comments on commit 3b2c1cc

Please sign in to comment.