-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
tlp.go
372 lines (334 loc) · 12.1 KB
/
tlp.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package sqlsmith
import (
"fmt"
"strings"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/errors"
)
// GenerateTLP returns two SQL queries as strings that can be used for Ternary
// Logic Partitioning (TLP). It also returns any placeholder arguments necessary
// for the second partitioned query. TLP is a method for logically testing DBMSs
// which is based on the logical guarantee that for a given predicate p, all
// rows must satisfy exactly one of the following three predicates: p, NOT p, p
// IS NULL. TLP can find bugs when an unpartitioned query and a query
// partitioned into three sub-queries do not yield the same results.
//
// More information on TLP: https://www.manuelrigger.at/preprints/TLP.pdf.
//
// This TLP implementation is limited in the types of queries that are tested.
// We currently only test basic WHERE, JOIN, and MAX/MIN query filters. It is
// possible to use TLP to test other aggregations, GROUP BY, and HAVING, which
// have all been implemented in SQLancer. See:
// https://github.com/sqlancer/sqlancer/tree/1.1.0/src/sqlancer/cockroachdb/oracle/tlp.
func (s *Smither) GenerateTLP() (unpartitioned, partitioned string, args []interface{}) {
// Set disableImpureFns to true so that generated predicates are immutable.
originalDisableImpureFns := s.disableImpureFns
s.disableImpureFns = true
defer func() {
s.disableImpureFns = originalDisableImpureFns
}()
switch tlpType := s.rnd.Intn(4); tlpType {
case 0:
return s.generateWhereTLP()
case 1:
partitioned, unpartitioned = s.generateOuterJoinTLP()
case 2:
partitioned, unpartitioned = s.generateInnerJoinTLP()
default:
partitioned, unpartitioned = s.generateAggregationTLP()
}
return partitioned, unpartitioned, nil
}
// generateWhereTLP returns two SQL queries as strings that can be used by the
// GenerateTLP function. These queries make use of the WHERE clause to partition
// the original query into three. This function also returns a list of arguments
// if the predicate contains placeholders. These arguments can be read
// sequentially to match the placeholders.
//
// The first query returned is an unpartitioned query of the form:
//
// SELECT *, p, NOT (p), (p) IS NULL, true, false, false FROM table
//
// The second query returned is a partitioned query of the form:
//
// SELECT *, p, NOT (p), (p) IS NULL, p, NOT (p), (p) IS NULL FROM table WHERE (p)
// UNION ALL
// SELECT *, p, NOT (p), (p) IS NULL, NOT(p), p, (p) IS NULL FROM table WHERE NOT (p)
// UNION ALL
// SELECT *, p, NOT (p), (p) IS NULL, (p) IS NULL, (p) IS NOT NULL, (NOT(p)) IS NOT NULL FROM table WHERE (p) IS NULL
//
// If the resulting values of the two queries are not equal, there is a logical
// bug.
func (s *Smither) generateWhereTLP() (unpartitioned, partitioned string, args []interface{}) {
f := tree.NewFmtCtx(tree.FmtParsable)
table, _, _, cols, ok := s.getSchemaTable()
if !ok {
panic(errors.AssertionFailedf("failed to find random table"))
}
table.Format(f)
tableName := f.CloseAndGetString()
var pred tree.Expr
if s.coin() {
pred = makeBoolExpr(s, cols)
} else {
pred, args = makeBoolExprWithPlaceholders(s, cols)
}
pred.Format(f)
predicate := f.CloseAndGetString()
allPreds := fmt.Sprintf("%[1]s, NOT (%[1]s), (%[1]s) IS NULL", predicate)
unpartitioned = fmt.Sprintf("SELECT *, %s, true, false, false FROM %s", allPreds, tableName)
pred1 := predicate
pred2 := fmt.Sprintf("NOT (%s)", predicate)
pred3 := fmt.Sprintf("(%s) IS NULL", predicate)
part1 := fmt.Sprintf(`SELECT *,
%s,
%s, %s, %s
FROM %s
WHERE %s`, allPreds, pred1, pred2, pred3, tableName, pred1)
part2 := fmt.Sprintf(`SELECT *,
%s,
%s, %s, %s
FROM %s
WHERE %s`, allPreds, pred2, pred1, pred3, tableName, pred2)
part3 := fmt.Sprintf(`SELECT *,
%s,
%s, (%s) IS NOT NULL, (%s) IS NOT NULL
FROM %s
WHERE %s`, allPreds, pred3, pred1, pred2, tableName, pred3)
partitioned = fmt.Sprintf(
`(%s)
UNION ALL (%s)
UNION ALL (%s)`,
part1, part2, part3,
)
return unpartitioned, partitioned, args
}
// generateOuterJoinTLP returns two SQL queries as strings that can be used by the
// GenerateTLP function. These queries make use of LEFT JOIN to partition the
// original query in two ways. The latter query is partitioned by a predicate p,
// while the former is not.
//
// The first query returned is an unpartitioned query of the form:
//
// SELECT * FROM table1 LEFT JOIN table2 ON TRUE
// UNION ALL
// SELECT * FROM table1 LEFT JOIN table2 ON FALSE
// UNION ALL
// SELECT * FROM table1 LEFT JOIN table2 ON FALSE
//
// The second query returned is a partitioned query of the form:
//
// SELECT * FROM table1 LEFT JOIN table2 ON (p)
// UNION ALL
// SELECT * FROM table1 LEFT JOIN table2 ON NOT (p)
// UNION ALL
// SELECT * FROM table1 LEFT JOIN table2 ON (p) IS NULL
//
// From the first query, we have a CROSS JOIN of the two tables (JOIN ON TRUE)
// and then all rows concatenated with NULL values for the second and third
// parts (JOIN ON FALSE). Recall our TLP logical guarantee that a given
// predicate p always evaluates to either TRUE, FALSE, or NULL. It follows that
// for any row in table1, exactly one of the expressions (p), NOT (p), or (p) is
// NULL will resolve to TRUE. For a given row, when the expression resolves to
// TRUE in table1, it matches with every row in table2. Otherwise, it is
// concatenated with null values. So each row in table1 is matched with every
// row in table2 exactly once (CROSS JOIN) and also matched with NULL values
// exactly twice, as expected by the unpartitioned query.
//
// Note that this implementation is restricted in that it only uses columns from
// the left table in the predicate p.
// If the resulting values of the two queries are not equal, there is a logical
// bug.
func (s *Smither) generateOuterJoinTLP() (unpartitioned, partitioned string) {
f := tree.NewFmtCtx(tree.FmtParsable)
table1, _, _, cols1, ok1 := s.getSchemaTable()
table2, _, _, _, ok2 := s.getSchemaTable()
if !ok1 || !ok2 {
panic(errors.AssertionFailedf("failed to find random tables"))
}
table1.Format(f)
tableName1 := f.CloseAndGetString()
table2.Format(f)
tableName2 := f.CloseAndGetString()
leftJoinTrue := fmt.Sprintf(
"SELECT * FROM %s LEFT JOIN %s ON TRUE",
tableName1, tableName2,
)
leftJoinFalse := fmt.Sprintf(
"SELECT * FROM %s LEFT JOIN %s ON FALSE",
tableName1, tableName2,
)
unpartitioned = fmt.Sprintf(
"(%s) UNION ALL (%s) UNION ALL (%s)",
leftJoinTrue, leftJoinFalse, leftJoinFalse,
)
pred := makeBoolExpr(s, cols1)
pred.Format(f)
predicate := f.CloseAndGetString()
part1 := fmt.Sprintf(
"SELECT * FROM %s LEFT JOIN %s ON %s",
tableName1, tableName2, predicate,
)
part2 := fmt.Sprintf(
"SELECT * FROM %s LEFT JOIN %s ON NOT (%s)",
tableName1, tableName2, predicate,
)
part3 := fmt.Sprintf(
"SELECT * FROM %s LEFT JOIN %s ON (%s) IS NULL",
tableName1, tableName2, predicate,
)
partitioned = fmt.Sprintf(
"(%s) UNION ALL (%s) UNION ALL (%s)",
part1, part2, part3,
)
return unpartitioned, partitioned
}
// generateInnerJoinTLP returns two SQL queries as strings that can be used by
// the GenerateTLP function. These queries make use of INNER JOIN to partition
// the original query in two ways. The latter query is partitioned by a
// predicate p, while the former is not.
//
// The first query returned is an unpartitioned query of the form:
//
// SELECT * FROM table1 JOIN table2 ON TRUE
//
// The second query returned is a partitioned query of the form:
//
// SELECT * FROM table1 JOIN table2 ON (p)
// UNION ALL
// SELECT * FROM table1 JOIN table2 ON NOT (p)
// UNION ALL
// SELECT * FROM table1 JOIN table2 ON (p) IS NULL
//
// From the first query, we have a CROSS JOIN of the two tables (JOIN ON TRUE).
// Recall our TLP logical guarantee that a given predicate p always evaluates to
// either TRUE, FALSE, or NULL. It follows that for any row returned by the
// first query, exactly one of the expressions (p), NOT (p), or (p) is NULL will
// resolve to TRUE. So the partitioned query accounts for each row in the
// CROSS JOIN exactly once.
//
// If the resulting values of the two queries are not equal, there is a logical
// bug.
func (s *Smither) generateInnerJoinTLP() (unpartitioned, partitioned string) {
f := tree.NewFmtCtx(tree.FmtParsable)
table1, _, _, cols1, ok1 := s.getSchemaTable()
table2, _, _, cols2, ok2 := s.getSchemaTable()
if !ok1 || !ok2 {
panic(errors.AssertionFailedf("failed to find random tables"))
}
table1.Format(f)
tableName1 := f.CloseAndGetString()
table2.Format(f)
tableName2 := f.CloseAndGetString()
unpartitioned = fmt.Sprintf(
"SELECT * FROM %s JOIN %s ON true",
tableName1, tableName2,
)
cols := cols1.extend(cols2...)
pred := makeBoolExpr(s, cols)
pred.Format(f)
predicate := f.CloseAndGetString()
part1 := fmt.Sprintf(
"SELECT * FROM %s JOIN %s ON %s",
tableName1, tableName2, predicate,
)
part2 := fmt.Sprintf(
"SELECT * FROM %s JOIN %s ON NOT (%s)",
tableName1, tableName2, predicate,
)
part3 := fmt.Sprintf(
"SELECT * FROM %s JOIN %s ON (%s) IS NULL",
tableName1, tableName2, predicate,
)
partitioned = fmt.Sprintf(
"(%s) UNION ALL (%s) UNION ALL (%s)",
part1, part2, part3,
)
return unpartitioned, partitioned
}
// generateAggregationTLP returns two SQL queries as strings that can be used by
// the GenerateTLP function. These queries make use of the WHERE clause and a
// predicate p to partition the original query into three. The aggregations that
// are supported are MAX(), MIN(), and COUNT(). AVG() and SUM() are also valid
// TLP aggregations.
//
// The first query returned is an unpartitioned query of the form:
//
// SELECT MAX(first) FROM (SELECT * FROM table) table(first)
//
// The second query returned is a partitioned query of the form:
//
// SELECT MAX(agg) FROM (
// SELECT MAX(first) AS agg FROM (
// SELECT * FROM table WHERE p
// ) table(first)
// UNION ALL
// SELECT MAX(first) AS agg FROM (
// SELECT * FROM table WHERE NOT (p)
// ) table(first)
// UNION ALL
// SELECT MAX(first) AS agg FROM (
// SELECT * FROM table WHERE (p) IS NULL
// ) table(first)
// )
//
// Note that all instances of MAX can be replaced with MIN to get the
// corresponding MIN version of the queries. For the COUNT version, we
// replace the outer MAX in the partitioned query with SUM, and then replace all
// other instances of MAX with COUNT. Both of these queries return the total
// count.
//
// If the resulting values of the two queries are not equal, there is a logical
// bug.
func (s *Smither) generateAggregationTLP() (unpartitioned, partitioned string) {
f := tree.NewFmtCtx(tree.FmtParsable)
table, _, _, cols, ok := s.getSchemaTable()
if !ok {
panic(errors.AssertionFailedf("failed to find random table"))
}
table.Format(f)
tableName := f.CloseAndGetString()
tableNameAlias := strings.TrimSpace(strings.Split(tableName, "AS")[1])
var innerAgg, outerAgg string
switch aggType := s.rnd.Intn(3); aggType {
case 0:
innerAgg, outerAgg = "MAX", "MAX"
case 1:
innerAgg, outerAgg = "MIN", "MIN"
default:
innerAgg, outerAgg = "COUNT", "SUM"
}
unpartitioned = fmt.Sprintf(
"SELECT %s(first) FROM (SELECT * FROM %s) %s(first)",
innerAgg, tableName, tableNameAlias,
)
pred := makeBoolExpr(s, cols)
pred.Format(f)
predicate := f.CloseAndGetString()
part1 := fmt.Sprintf(
"SELECT %s(first) AS agg FROM (SELECT * FROM %s WHERE %s) %s(first)",
innerAgg, tableName, predicate, tableNameAlias,
)
part2 := fmt.Sprintf(
"SELECT %s(first) AS agg FROM (SELECT * FROM %s WHERE NOT (%s)) %s(first)",
innerAgg, tableName, predicate, tableNameAlias,
)
part3 := fmt.Sprintf(
"SELECT %s(first) AS agg FROM (SELECT * FROM %s WHERE (%s) IS NULL) %s(first)",
innerAgg, tableName, predicate, tableNameAlias,
)
partitioned = fmt.Sprintf(
"SELECT %s(agg) FROM (%s UNION ALL %s UNION ALL %s)",
outerAgg, part1, part2, part3,
)
return unpartitioned, partitioned
}