-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
explorer.go
251 lines (233 loc) · 9.5 KB
/
explorer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package xform
import (
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
"github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util"
)
// explorer generates alternate expressions that are logically equivalent to
// existing expressions in the memo. The new expressions are added to the same
// memo group as the existing expression. The optimizer will cost all the
// expressions and pick the lowest cost alternative that provides any required
// physical properties.
//
// Equivalent expressions are generated by exploration rules. An exploration
// rule efficiently enumerates all possible combinations of its sub-expressions
// in order to look for matches. For example:
//
// // [AssociateJoin]
// (InnerJoin
// (InnerJoin $r:* $s:* $lowerOn:*)
// $t:*
// $upperOn:*
// )
// =>
// ...
//
// Say the memo group containing the upper inner-join has 3 expressions in it,
// and the memo group containing the lower inner-join has 4 expressions. Then
// the explorer will enumerate 12 possible expression combinations, looking for
// a combination that has an InnerJoin operator with another InnerJoin operator
// as its left operand.
//
// Once new expressions have been added to a group, they themselves become
// eligible for exploration, which might generate further expressions, and so
// on. Because the same group will often be explored multiple times, the
// explorer keeps state which helps it avoid duplicate work during subsequent
// passes.
//
// The explorer only traverses expression trees to the depth required by the
// exploration match patterns. It expects the optimizer to call exploreGroup
// for each group that needs to be explored. The optimizer can then use branch
// and bound pruning to skip exploration of entire sub-trees.
//
// For each expression combination that matches, a replace expression is
// constructed and added to the same memo group as the matched expression:
//
// // [AssociateJoin]
// (InnerJoin
// (InnerJoin $r:* $s:* $lowerOn:*)
// $t:*
// $upperOn:*
// )
// =>
// (InnerJoin
// (InnerJoin
// $r
// $t
// (ConstructFiltersNotUsing $s $lowerOn $upperOn)
// )
// $s
// (ConstructFiltersUsing $s $lowerOn $upperOn)
// )
//
// In this example, if the upper and lower groups each contain two InnerJoin
// expressions, then four new expressions will be added to the memo group of the
// matched expression. During the next pass, the four new expressions will
// themselves match this same rule. However, adding their replace expressions to
// the memo group will be a no-op, because they're already present.
type explorer struct {
evalCtx *tree.EvalContext
o *Optimizer
f *norm.Factory
mem *memo.Memo
// funcs is the struct used to call all custom match and replace functions
// used by the exploration rules. It wraps an unnamed norm.CustomFuncs, so
// it provides a clean interface for calling functions from both the xform
// and norm packages using the same prefix.
funcs CustomFuncs
}
// init initializes the explorer for use (or reuse).
func (e *explorer) init(o *Optimizer) {
e.evalCtx = o.evalCtx
e.o = o
e.f = o.Factory()
e.mem = o.mem
e.funcs.Init(e)
}
// exploreGroup generates alternate expressions that are logically equivalent
// to the expressions already in the given group, and adds them to the group.
// The explorer maintains state that tracks which expressions were explored in
// previous passes. It keeps "start" and "end" expressions for the group which
// track the expressions which need to be fully explored during the current
// pass. Each time exploreGroup is called, the end of the previous pass becomes
// the start of the next pass. For example:
//
// pass1 pass2 pass3
// <-start
// e0 e0 e0
// <-end <-start
// e1 (new) e1 e1
//
// e2 (new) e2 e2
// <-end <-start
// e3 (new) e3
// <-end
//
// For rules which match one or more sub-expressions in addition to the top-
// level expression, there is extra complexity because every combination needs
// to be considered. Even expressions which were explored in previous passes
// need to be partially re-explored, because they may match when considered in
// combination with a new sub-expression which wasn't present during the last
// pass. Only combinations which consist solely of old expressions can be
// skipped.
//
// Combination enumeration code is just a series of nested loops generated by
// Optgen. Each non-scalar match pattern or sub-pattern uses a loop to
// enumerate the expressions in the corresponding memo group. For example:
//
// $join:(InnerJoin
// $left:(InnerJoin)
// $right:(Select)
// $on:*
// )
//
// This match pattern would be implemented with 3 nested loops: 1 each for the
// $join, $left, and $right memo groups. If $join had 2 expressions, $left had
// 3 expressions, and $right had 2 expressions, then 2 * 3 * 2 = 12 combos will
// be considered. The innermost loop can skip iteration if all outer loops are
// bound to expressions which have already been explored in previous passes:
//
// for e1 in memo-exprs($join):
// for e2 in memo-exprs($left):
// for e3 in memo-exprs($right):
// if ordinal(e3) >= state.start:
// ... explore (e1, e2, e3) combo ...
//
func (e *explorer) exploreGroup(grp memo.RelExpr) *exploreState {
// Do nothing if this group has already been fully explored.
state := e.ensureExploreState(grp)
if state.fullyExplored {
return state
}
// Update set of group members that will be considered during this pass, by
// setting the start member to be the end expression from last pass.
state.start = state.end
state.end = 0
for member := grp; member != nil; member = member.NextExpr() {
state.end++
}
var member memo.RelExpr
var i int
fullyExplored := true
for i, member = 0, grp; i < state.end; i, member = i+1, member.NextExpr() {
// If member was fully explored in previous passes, then nothing further
// to do.
if state.isMemberFullyExplored(i) {
continue
}
if memberExplored := e.exploreGroupMember(state, member, i); memberExplored {
// No more rules can ever match this expression, so skip it in
// future passes.
state.markMemberAsFullyExplored(i)
} else {
// If even one member is not fully explored, then the group is not
// fully explored.
fullyExplored = false
}
}
// If new group members were added by the explorer, then the group has not
// yet been fully explored.
if fullyExplored && member == nil {
state.fullyExplored = true
}
return state
}
// lookupExploreState returns the optState struct associated with the memo
// group.
func (e *explorer) lookupExploreState(grp memo.RelExpr) *exploreState {
return &e.o.lookupOptState(grp, physical.MinRequired).explore
}
// ensureExploreState allocates the exploration state in the optState struct
// associated with the memo group, with respect to the min physical props.
func (e *explorer) ensureExploreState(grp memo.RelExpr) *exploreState {
return &e.o.ensureOptState(grp, physical.MinRequired).explore
}
// ----------------------------------------------------------------------
//
// Exploration state
//
// ----------------------------------------------------------------------
// exploreState contains state needed by the explorer for each memo group it
// explores. The state is allocated lazily for a group when the exploreGroup
// method is called. Various fields record what exploration has taken place so
// that the same work isn't repeated.
type exploreState struct {
// start (inclusive) and end (exclusive) specify which expressions need to
// be explored in the current pass. Expressions < start have been partly
// explored during previous passes. Expressions >= end are new expressions
// added during the current pass.
start int
end int
// fullyExplored is set to true once all members of the group have been fully
// explored, meaning that no new members will ever be added to the group, or
// to dependent child groups. Further exploration of the group can be skipped.
fullyExplored bool
// fullyExploredMembers is a set of ordinal positions of members within the
// memo group. Once a member expression has been fully explored, its ordinal
// is added to this set.
fullyExploredMembers util.FastIntSet
}
// isMemberFullyExplored is true if the member at the given ordinal position
// within the group will never match an additional rule, and can therefore be
// skipped in future exploration passes.
func (e *exploreState) isMemberFullyExplored(ordinal int) bool {
return e.fullyExploredMembers.Contains(ordinal)
}
// markMemberAsFullyExplored is called when all possible matching combinations
// have been considered for the subtree rooted at the given expression. Even if
// there are more exploration passes, this expression will never have new
// children, grand-children, etc. that might cause it to match another rule.
func (e *exploreState) markMemberAsFullyExplored(ordinal int) {
e.fullyExploredMembers.Add(ordinal)
}