-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
Copy pathparse.go
521 lines (470 loc) · 17.8 KB
/
parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
// Copyright 2012, Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in licenses/BSD-vitess.txt.
// Portions of this file are additionally subject to the following
// license and copyright.
//
// Copyright 2015 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
// This code was derived from https://github.com/youtube/vitess.
package parser
import (
"fmt"
"go/constant"
"strings"
"github.com/cockroachdb/cockroach/pkg/sql/parser/statements"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/scanner"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/errors"
)
func init() {
scanner.NewNumValFn = func(a constant.Value, s string, b bool) interface{} { return tree.NewNumVal(a, s, b) }
scanner.NewPlaceholderFn = func(s string) (interface{}, error) { return tree.NewPlaceholder(s) }
}
// Parser wraps a scanner, parser and other utilities present in the parser
// package.
type Parser struct {
scanner scanner.SQLScanner
lexer lexer
parserImpl sqlParserImpl
tokBuf [8]sqlSymType
stmtBuf [1]statements.Statement[tree.Statement]
}
// INT8 is the historical interpretation of INT. This should be left
// alone in the future, since there are many sql fragments stored
// in various descriptors. Any user input that was created after
// INT := INT4 will simply use INT4 in any resulting code.
var defaultNakedIntType = types.Int
// NakedIntTypeFromDefaultIntSize given the size in bits or bytes (preferred)
// of how a "naked" INT type should be parsed returns the corresponding integer
// type.
func NakedIntTypeFromDefaultIntSize(defaultIntSize int32) *types.T {
switch defaultIntSize {
case 4, 32:
return types.Int4
default:
return types.Int
}
}
// Parse parses the sql and returns a list of statements.
func (p *Parser) Parse(sql string) (statements.Statements, error) {
return p.parseWithDepth(1, sql, defaultNakedIntType, disgardComments)
}
// ParseWithInt parses a sql statement string and returns a list of
// Statements. The INT token will result in the specified TInt type.
func (p *Parser) ParseWithInt(sql string, nakedIntType *types.T) (statements.Statements, error) {
return p.parseWithDepth(1, sql, nakedIntType, disgardComments)
}
func (p *Parser) parseOneWithInt(
sql string, nakedIntType *types.T, comments commentsMode,
) (statements.Statement[tree.Statement], error) {
stmts, err := p.parseWithDepth(1, sql, nakedIntType, comments)
if err != nil {
return statements.Statement[tree.Statement]{}, err
}
if len(stmts) != 1 {
return statements.Statement[tree.Statement]{}, errors.AssertionFailedf("expected 1 statement, but found %d", len(stmts))
}
return stmts[0], nil
}
func (p *Parser) scanOneStmt() (sql string, tokens []sqlSymType, done bool) {
tokens = p.tokBuf[:0]
tokens = append(tokens, sqlSymType{})
lval := &p.tokBuf[0]
// Scan the first token.
for {
p.scanner.Scan(lval)
if lval.id == 0 {
return "", nil, true
}
if lval.id != ';' {
break
}
}
startPos := lval.pos
// We make the resulting token positions match the returned string.
lval.pos = 0
var preValID int32
// This is used to track the degree of nested `BEGIN ATOMIC ... END` function
// body context. When greater than zero, it means that we're scanning through
// the function body of a `CREATE FUNCTION` statement. ';' character is only
// a separator of sql statements within the body instead of a finishing line
// of the `CREATE FUNCTION` statement.
curFuncBodyCnt := 0
for {
if lval.id == ERROR {
return p.scanner.In()[startPos:], tokens, true
}
preValID = lval.id
tokens = append(tokens, sqlSymType{})
lval = &tokens[len(tokens)-1]
p.scanner.Scan(lval)
if preValID == BEGIN && lval.id == ATOMIC {
curFuncBodyCnt++
}
if curFuncBodyCnt > 0 && lval.id == END {
curFuncBodyCnt--
}
if lval.id == 0 || (curFuncBodyCnt == 0 && lval.id == ';') {
endPos := p.scanner.Pos()
if lval.id == ';' {
// Don't include the ending semicolon, if there is one, in the raw SQL.
endPos--
}
tokens = tokens[:len(tokens)-1]
return p.scanner.In()[startPos:endPos], tokens, (lval.id == 0)
}
lval.pos -= startPos
}
}
type commentsMode bool
const (
retainComments commentsMode = true
disgardComments commentsMode = false
)
func (p *Parser) parseWithDepth(
depth int, sql string, nakedIntType *types.T, cm commentsMode,
) (statements.Statements, error) {
stmts := statements.Statements(p.stmtBuf[:0])
p.scanner.Init(sql)
if cm == retainComments {
p.scanner.RetainComments()
}
defer p.scanner.Cleanup()
for {
sql, tokens, done := p.scanOneStmt()
stmt, err := p.parse(depth+1, sql, tokens, nakedIntType)
if err != nil {
return nil, err
}
if stmt.AST != nil {
stmts = append(stmts, stmt)
}
if done {
break
}
}
return stmts, nil
}
// parse parses a statement from the given scanned tokens.
func (p *Parser) parse(
depth int, sql string, tokens []sqlSymType, nakedIntType *types.T,
) (statements.Statement[tree.Statement], error) {
p.lexer.init(sql, tokens, nakedIntType)
defer p.lexer.cleanup()
if p.parserImpl.Parse(&p.lexer) != 0 {
if p.lexer.lastError == nil {
// This should never happen -- there should be an error object
// every time Parse() returns nonzero. We're just playing safe
// here.
p.lexer.Error("syntax error")
}
err := p.lexer.lastError
// Compatibility with 19.1 telemetry: prefix the telemetry keys
// with the "syntax." prefix.
// TODO(knz): move the auto-prefixing of feature names to a
// higher level in the call stack.
tkeys := errors.GetTelemetryKeys(err)
if len(tkeys) > 0 {
for i := range tkeys {
tkeys[i] = "syntax." + tkeys[i]
}
err = errors.WithTelemetry(err, tkeys...)
}
return statements.Statement[tree.Statement]{}, err
}
return statements.Statement[tree.Statement]{
AST: p.lexer.stmt,
SQL: sql,
Comments: p.scanner.Comments,
NumPlaceholders: p.lexer.numPlaceholders,
NumAnnotations: p.lexer.numAnnotations,
}, nil
}
// unaryNegation constructs an AST node for a negation. This attempts
// to preserve constant NumVals and embed the negative sign inside
// them instead of wrapping in an UnaryExpr. This in turn ensures
// that negative numbers get considered as a single constant
// for the purpose of formatting and scrubbing.
func unaryNegation(e tree.Expr) tree.Expr {
if cst, ok := e.(*tree.NumVal); ok {
cst.Negate()
return cst
}
// Common case.
return &tree.UnaryExpr{
Operator: tree.MakeUnaryOperator(tree.UnaryMinus),
Expr: e,
}
}
// Parse parses a sql statement string and returns a list of Statements.
func Parse(sql string) (statements.Statements, error) {
return ParseWithInt(sql, defaultNakedIntType)
}
// ParseWithInt parses a sql statement string and returns a list of
// Statements. The INT token will result in the specified TInt type.
func ParseWithInt(sql string, nakedIntType *types.T) (statements.Statements, error) {
var p Parser
return p.parseWithDepth(1, sql, nakedIntType, disgardComments)
}
// ParseOne parses a sql statement string, ensuring that it contains only a
// single statement, and returns that Statement. ParseOne will always
// interpret the INT and SERIAL types as 64-bit types, since this is
// used in various internal-execution paths where we might receive
// bits of SQL from other nodes. In general, we expect that all
// user-generated SQL has been run through the ParseWithInt() function.
func ParseOne(sql string) (statements.Statement[tree.Statement], error) {
return ParseOneWithInt(sql, defaultNakedIntType)
}
// ParseOneRetainComments is similar to ParseOne, but it retains scanned
// comments in the returned statement's Comment field.
func ParseOneRetainComments(sql string) (statements.Statement[tree.Statement], error) {
var p Parser
return p.parseOneWithInt(sql, defaultNakedIntType, retainComments)
}
// ParseOneWithInt is similar to ParseOn but interprets the INT and SERIAL
// types as the provided integer type.
func ParseOneWithInt(
sql string, nakedIntType *types.T,
) (statements.Statement[tree.Statement], error) {
var p Parser
return p.parseOneWithInt(sql, nakedIntType, disgardComments)
}
// ParseQualifiedTableName parses a possibly qualified table name. The
// table name must contain one or more name parts, using the full
// input SQL syntax: each name part containing special characters, or
// non-lowercase characters, must be enclosed in double quote. The
// name may not be an invalid table name (the caller is responsible
// for guaranteeing that only valid table names are provided as
// input).
func ParseQualifiedTableName(sql string) (*tree.TableName, error) {
name, err := ParseTableName(sql)
if err != nil {
return nil, err
}
tn := name.ToTableName()
return &tn, nil
}
// ParseTableName parses a table name. The table name must contain one
// or more name parts, using the full input SQL syntax: each name
// part containing special characters, or non-lowercase characters,
// must be enclosed in double quote. The name may not be an invalid
// table name (the caller is responsible for guaranteeing that only
// valid table names are provided as input).
func ParseTableName(sql string) (*tree.UnresolvedObjectName, error) {
// We wrap the name we want to parse into a dummy statement since our parser
// can only parse full statements.
stmt, err := ParseOne(fmt.Sprintf("ALTER TABLE %s RENAME TO x", sql))
if err != nil {
return nil, err
}
rename, ok := stmt.AST.(*tree.RenameTable)
if !ok {
return nil, errors.AssertionFailedf("expected an ALTER TABLE statement, but found %T", stmt)
}
return rename.Name, nil
}
// ParseFunctionName parses a function name. The function name must contain one
// or more name parts, using the full input SQL syntax: each name
// part containing special characters, or non-lowercase characters,
// must be enclosed in double quote. The name may not be an invalid
// function name (the caller is responsible for guaranteeing that only
// valid function names are provided as input).
func ParseFunctionName(sql string) (*tree.UnresolvedObjectName, error) {
// We wrap the name we want to parse into a dummy statement since our parser
// can only parse full statements.
stmt, err := ParseOne(fmt.Sprintf("ALTER FUNCTION %s RENAME TO x", sql))
if err != nil {
return nil, err
}
rename, ok := stmt.AST.(*tree.AlterRoutineRename)
if !ok {
return nil, errors.AssertionFailedf("expected an ALTER FUNCTION statement, but found %T", stmt)
}
return rename.Function.FuncName.ToUnresolvedObjectName(), nil
}
// ParseTablePattern parses a table pattern. The table name must contain one
// or more name parts, using the full input SQL syntax: each name
// part containing special characters, or non-lowercase characters,
// must be enclosed in double quote. The name may not be an invalid
// table name (the caller is responsible for guaranteeing that only
// valid table names are provided as input).
// The last part may be '*' to denote a wildcard.
func ParseTablePattern(sql string) (tree.TablePattern, error) {
// We wrap the name we want to parse into a dummy statement since our parser
// can only parse full statements.
stmt, err := ParseOne(fmt.Sprintf("GRANT SELECT ON TABLE %s TO admin", sql))
if err != nil {
return nil, err
}
grant, ok := stmt.AST.(*tree.Grant)
if !ok {
return nil, errors.AssertionFailedf("expected a GRANT statement, but found %T", stmt)
}
if len(grant.Targets.Tables.TablePatterns) == 0 {
return nil, errors.AssertionFailedf("expected at least one pattern")
}
u := grant.Targets.Tables.TablePatterns[0]
un, ok := u.(*tree.UnresolvedName)
if !ok {
return nil, errors.AssertionFailedf("expected an unresolved name, but found %T", u)
}
return un.NormalizeTablePattern()
}
// parseExprsWithInt parses one or more sql expressions.
func parseExprsWithInt(exprs []string, nakedIntType *types.T) (tree.Exprs, error) {
stmt, err := ParseOneWithInt(fmt.Sprintf("SET ROW (%s)", strings.Join(exprs, ",")), nakedIntType)
if err != nil {
return nil, err
}
set, ok := stmt.AST.(*tree.SetVar)
if !ok {
return nil, errors.AssertionFailedf("expected a SET statement, but found %T", stmt)
}
return set.Values, nil
}
// ParseExprs parses a comma-delimited sequence of SQL scalar
// expressions. The caller is responsible for ensuring that the input
// is, in fact, a comma-delimited sequence of SQL scalar expressions —
// the results are undefined if the string contains invalid SQL
// syntax.
func ParseExprs(sql []string) (tree.Exprs, error) {
if len(sql) == 0 {
return tree.Exprs{}, nil
}
return parseExprsWithInt(sql, defaultNakedIntType)
}
// ParseExpr parses a SQL scalar expression. The caller is responsible
// for ensuring that the input is, in fact, a valid SQL scalar
// expression — the results are undefined if the string contains
// invalid SQL syntax.
func ParseExpr(sql string) (tree.Expr, error) {
return ParseExprWithInt(sql, defaultNakedIntType)
}
// ParseExprWithInt parses a SQL scalar expression, using the given
// type when INT is used as type name in the SQL syntax. The caller is
// responsible for ensuring that the input is, in fact, a valid SQL
// scalar expression — the results are undefined if the string
// contains invalid SQL syntax.
func ParseExprWithInt(sql string, nakedIntType *types.T) (tree.Expr, error) {
exprs, err := parseExprsWithInt([]string{sql}, nakedIntType)
if err != nil {
return nil, err
}
if len(exprs) != 1 {
return nil, errors.AssertionFailedf("expected 1 expression, found %d", len(exprs))
}
return exprs[0], nil
}
// GetTypeReferenceFromName turns a type name into a type
// reference. This supports only “simple” (single-identifier)
// references to built-in types, when the identifer has already been
// parsed away from the input SQL syntax.
func GetTypeReferenceFromName(typeName tree.Name) (tree.ResolvableTypeReference, error) {
expr, err := ParseExpr(fmt.Sprintf("1::%s", typeName.String()))
if err != nil {
return nil, err
}
cast, ok := expr.(*tree.CastExpr)
if !ok {
return nil, errors.AssertionFailedf("expected a tree.CastExpr, but found %T", expr)
}
return cast.Type, nil
}
// GetTypeFromValidSQLSyntax retrieves a type from its SQL syntax. The caller is
// responsible for guaranteeing that the type expression is valid
// SQL (or handling the resulting error). This includes verifying that complex
// identifiers are enclosed in double quotes, etc.
func GetTypeFromValidSQLSyntax(sql string) (tree.ResolvableTypeReference, error) {
expr, err := ParseExpr(fmt.Sprintf("1::%s", sql))
if err != nil {
return nil, err
}
return GetTypeFromCastOrCollate(expr)
}
// GetTypeFromCastOrCollate returns the type of the given tree.Expr. The method
// assumes that the expression is either tree.CastExpr or tree.CollateExpr
// (which wraps the tree.CastExpr).
func GetTypeFromCastOrCollate(expr tree.Expr) (tree.ResolvableTypeReference, error) {
// COLLATE clause has lower precedence than the cast, so if we have
// something like `1::STRING COLLATE en`, it'll be parsed as
// CollateExpr(CastExpr).
if collate, ok := expr.(*tree.CollateExpr); ok {
return types.MakeCollatedString(types.String, collate.Locale), nil
}
cast, ok := expr.(*tree.CastExpr)
if !ok {
return nil, errors.AssertionFailedf("expected a tree.CastExpr, but found %T", expr)
}
return cast.Type, nil
}
var errBitLengthNotPositive = pgerror.WithCandidateCode(
errors.New("length for type bit must be at least 1"), pgcode.InvalidParameterValue)
// newBitType creates a new BIT type with the given bit width.
func newBitType(width int32, varying bool) (*types.T, error) {
if width < 1 {
return nil, errBitLengthNotPositive
}
if varying {
return types.MakeVarBit(width), nil
}
return types.MakeBit(width), nil
}
var errFloatPrecAtLeast1 = pgerror.WithCandidateCode(
errors.New("precision for type float must be at least 1 bit"), pgcode.InvalidParameterValue)
var errFloatPrecMax54 = pgerror.WithCandidateCode(
errors.New("precision for type float must be less than 54 bits"), pgcode.InvalidParameterValue)
// newFloat creates a type for FLOAT with the given precision.
func newFloat(prec int64) (*types.T, error) {
if prec < 1 {
return nil, errFloatPrecAtLeast1
}
if prec <= 24 {
return types.Float4, nil
}
if prec <= 54 {
return types.Float, nil
}
return nil, errFloatPrecMax54
}
// newDecimal creates a type for DECIMAL with the given precision and scale.
func newDecimal(prec, scale int32) (*types.T, error) {
if scale > prec {
err := pgerror.WithCandidateCode(
errors.Newf("scale (%d) must be between 0 and precision (%d)", scale, prec),
pgcode.InvalidParameterValue)
return nil, err
}
return types.MakeDecimal(prec, scale), nil
}
// arrayOf creates a type alias for an array of the given element type and fixed
// bounds. The bounds are currently ignored.
func arrayOf(
ref tree.ResolvableTypeReference, bounds []int32,
) (tree.ResolvableTypeReference, error) {
// If the reference is a statically known type, then return an array type,
// rather than an array type reference.
if typ, ok := tree.GetStaticallyKnownType(ref); ok {
switch typ.Family() {
case types.UnknownFamily, types.VoidFamily, types.TriggerFamily:
// Do not allow arrays of these types. This is consistent with Postgres'
// behavior.
return nil, pgerror.Newf(pgcode.UndefinedObject, "type %s[] does not exist", typ.Name())
}
if err := types.CheckArrayElementType(typ); err != nil {
return nil, err
}
return types.MakeArray(typ), nil
}
return &tree.ArrayTypeReference{ElementType: ref}, nil
}