-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
logical_props.go
215 lines (196 loc) · 7.96 KB
/
logical_props.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
// Copyright 2018 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
package memo
import (
"bytes"
"fmt"
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
"github.com/cockroachdb/cockroach/pkg/sql/sem/types"
"github.com/cockroachdb/cockroach/pkg/util/treeprinter"
)
// LogicalProps describe the content and characteristics of data returned by
// all expression variants within a memo group. While each expression in the
// group may return rows or columns in a different order, or compute the
// result using different algorithms, the complete set of data is returned
// and can be transformed into whatever layout or presentation format that is
// desired.
type LogicalProps struct {
// Relational contains the set of properties that describe relational
// operators, like select, join, and project. It is nil for scalar
// operators.
Relational *RelationalProps
// Scalar contains the set of properties that describe scalar operators,
// like And, Plus, and Const. It is nil for relational operators.
Scalar *ScalarProps
}
// RelationalProps are the subset of logical properties that are computed for
// relational expressions that return rows and columns rather than scalar
// values.
type RelationalProps struct {
// OutputCols is the set of columns that can be projected by the
// expression. Ordering, naming, and duplication of columns is not
// representable by this property; those are physical properties.
OutputCols opt.ColSet
// NotNullCols is the subset of output columns which cannot be NULL.
// The NULL-ability of columns flows from the inputs and can also be
// derived from filters that are NULL-intolerant.
NotNullCols opt.ColSet
// WeakKeys are the column sets which form weak keys and are subsets of the
// expression's output columns. A weak key set cannot contain any other weak
// key set (it would be redundant).
//
// A column set is a key if no two rows are equal after projection onto that
// set. A requirement for a column set to be a key is for no columns in the
// set to be NULL-able. This requirement stems from the property of NULL
// where NULL != NULL. The simplest example of a key is the primary key for a
// table (recall that all of the columns of the primary key are defined to be
// NOT NULL).
//
// A weak key is a set of columns where no two rows containing non-NULL
// values are equal after projection onto that set. A UNIQUE index on a table
// is a weak key and possibly a key if all of the columns are NOT NULL. A
// weak key is a key if "(WeakKeys[i] & NotNullCols) == WeakKeys[i]".
//
// An empty key set is valid (an empty key it implies there is at most one
// row).
WeakKeys opt.WeakKeys
// OuterCols is the set of columns that are referenced by variables within
// this relational sub-expression, but are not bound within the scope of
// the expression. For example:
//
// SELECT *
// FROM a
// WHERE EXISTS(SELECT * FROM b WHERE b.x = a.x AND b.y = 5)
//
// For the inner SELECT expression, a.x is an outer column, meaning that it
// is defined "outside" the SELECT expression (hence the name "outer"). The
// SELECT expression binds the b.x and b.y references, so they are not
// part of the outer column set. The outer SELECT binds the a.x column, and
// so its outer column set is empty.
//
// TODO(andyk): populate this when we have subquery support
OuterCols opt.ColSet
// Stats is the set of statistics that apply to this relational expression.
// See the comment for the Statistics struct for more details.
Stats Statistics
}
// Statistics is a collection of measurements and statistics that is used by
// the coster to estimate the cost of expressions. Statistics are collected
// for tables and indexes and are exposed to the optimizer via opt.Catalog
// interfaces. As logical properties are derived bottom-up for each
// expression, statistics are derived for each relational expression, based
// on the characteristics of the expression's operator type, as well as the
// properties of its operands. For example:
//
// SELECT * FROM a WHERE a=1
//
// Table and column statistics can be used to estimate the number of rows
// in table a, and then to determine the selectivity of the a=1 filter, in
// order to derive the statistics of the Select expression.
type Statistics struct {
// RowCount is the estimated number of rows returned by the expression.
RowCount uint64
}
// ScalarProps are the subset of logical properties that are computed for
// scalar expressions that return primitive-valued types.
type ScalarProps struct {
// Type is the data type of the scalar expression (int, string, etc).
Type types.T
// OuterCols is the set of columns that are referenced by variables within
// this scalar sub-expression, but are not bound within the scope of the
// expression. For example:
//
// SELECT *
// FROM a
// WHERE EXISTS(SELECT * FROM b WHERE b.x = a.x AND b.y = 5)
//
// For the EXISTS expression, only a.x is an outer column, meaning that
// only it is defined "outside" the EXISTS expression (hence the name
// "outer"). Note that what constitutes an "outer column" is dependent on
// an expression's location in the query. For example, while the b.x and
// b.y columns are not outer columns on the EXISTS expression, they *are*
// outer columns on the inner WHERE condition.
OuterCols opt.ColSet
// Constraints is the set of constraints deduced from a boolean expression.
// For the expression to be true, all constraints in the set must be
// satisfied. Unset for non-boolean scalars.
Constraints *constraint.Set
// TightConstraints is true if the expression is exactly equivalent to the
// constraints. If it is false, the constraints are weaker than the
// expression.
TightConstraints bool
}
// OuterCols is a helper method that returns either the relational or scalar
// OuterCols field, depending on the operator's type.
func (p *LogicalProps) OuterCols() opt.ColSet {
if p.Scalar != nil {
return p.Scalar.OuterCols
}
return p.Relational.OuterCols
}
// FormatColSet outputs the specified set of columns using FormatCol to format
// the output.
func (p *LogicalProps) FormatColSet(
tp treeprinter.Node, md *opt.Metadata, heading string, colSet opt.ColSet,
) {
if !colSet.Empty() {
var buf bytes.Buffer
buf.WriteString(heading)
colSet.ForEach(func(i int) {
p.FormatCol(&buf, md, "", opt.ColumnID(i))
})
tp.Child(buf.String())
}
}
// FormatColList outputs the specified list of columns using FormatCol to
// format the output.
func (p *LogicalProps) FormatColList(
tp treeprinter.Node, md *opt.Metadata, heading string, colList opt.ColList,
) {
if len(colList) > 0 {
var buf bytes.Buffer
buf.WriteString(heading)
for _, col := range colList {
p.FormatCol(&buf, md, "", col)
}
tp.Child(buf.String())
}
}
// FormatCol outputs the specified column using the following format:
// label:index(type)
//
// If the column is not nullable, then this is the format:
// label:index(type!null)
//
// If a label is given, then it is used. Otherwise, a "best effort" label is
// used from query metadata.
func (p *LogicalProps) FormatCol(
buf *bytes.Buffer, md *opt.Metadata, label string, id opt.ColumnID,
) {
if label == "" {
label = md.ColumnLabel(id)
}
typ := md.ColumnType(id)
buf.WriteByte(' ')
buf.WriteString(label)
buf.WriteByte(':')
fmt.Fprintf(buf, "%d", id)
buf.WriteByte('(')
buf.WriteString(typ.String())
if p.Relational.NotNullCols.Contains(int(id)) {
buf.WriteString("!null")
}
buf.WriteByte(')')
}