From 31041197212651288ed51ead2e5b5f2b8db25c5a Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Mon, 23 Jan 2023 13:35:15 +0100 Subject: [PATCH 1/3] sql: avoid panic when normalizing ALTER TENANT REPLICATION Release note: None --- pkg/ccl/logictestccl/testdata/logic_test/tenant | 6 ++++++ pkg/sql/sem/tree/walk.go | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/ccl/logictestccl/testdata/logic_test/tenant b/pkg/ccl/logictestccl/testdata/logic_test/tenant index a132f0c00a06..835ae79b09de 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/tenant +++ b/pkg/ccl/logictestccl/testdata/logic_test/tenant @@ -53,3 +53,9 @@ SELECT * FROM crdb_internal.cluster_queries statement ok SELECT * FROM crdb_internal.cluster_contention_events + +subtest regression_95612 + +statement ok +PREPARE pstmt AS ALTER TENANT 'foo' COMPLETE REPLICATION TO LATEST + diff --git a/pkg/sql/sem/tree/walk.go b/pkg/sql/sem/tree/walk.go index 3757bac8345c..ecd862a67dbb 100644 --- a/pkg/sql/sem/tree/walk.go +++ b/pkg/sql/sem/tree/walk.go @@ -994,7 +994,7 @@ func (n *AlterTenantReplication) walkStmt(v Visitor) Statement { } ret.TenantSpec = ts } - if n.Cutover != nil { + if n.Cutover != nil && n.Cutover.Timestamp != nil { e, changed := WalkExpr(v, n.Cutover.Timestamp) if changed { if ret == n { From 15266fdeed707dd1f08f35bdf1e6b654c9140527 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Mon, 23 Jan 2023 13:40:06 +0100 Subject: [PATCH 2/3] sql/parser: double check the WITH handling in BACKUP We have a random syntax generation test failing on this specific syntax form. We suspect the test was running on an old SHA where the WITH rendering was not fixed yet. But we're not sure; so this commit adds the specific form to double check. Release note: None --- pkg/sql/parser/testdata/backup_restore | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/sql/parser/testdata/backup_restore b/pkg/sql/parser/testdata/backup_restore index 4d806976e569..950df44ffaa1 100644 --- a/pkg/sql/parser/testdata/backup_restore +++ b/pkg/sql/parser/testdata/backup_restore @@ -855,3 +855,12 @@ DETAIL: source SQL: RESTORE ROLE foo, bar FROM 'baz' ^ HINT: try \h RESTORE + +# Regression test for #95612 +parse +BACKUP INTO LATEST IN UNLOGGED WITH OPTIONS ( DETACHED = FALSE ) +---- +BACKUP INTO LATEST IN 'unlogged' WITH detached = FALSE -- normalized! +BACKUP INTO LATEST IN ('unlogged') WITH detached = FALSE -- fully parenthesized +BACKUP INTO LATEST IN '_' WITH detached = FALSE -- literals removed +BACKUP INTO LATEST IN 'unlogged' WITH detached = FALSE -- identifiers removed From cca03b25a6f5ae994fad5d92fcbf0eb38b45c9bb Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Mon, 23 Jan 2023 14:00:14 +0100 Subject: [PATCH 3/3] sql: avoid a RSG error on tenant specs containing a cast Prior to this commit, the RSG parse-after-format test was choking on `ALTER TENANT 'foo' MINUTE`. This commit fixes that. This is achieved by introducing a new rule during pretty-printing: if the expression in a TenantSpec has the property "well-enclosed expression" (`alreadyDelimitedAsSyntacticDExpr` in code), it can be pretty-printed without enclosing parentheses. Otherwise, parentheses must be added. More details follow for the interested reader. ---- There are two salient aspects of this patch. - why it works (what needed to be done). This is explained by the definition of this new interface type `alreadyDelimitedAsSyntacticDExpr`, included for reference below. - why we only needed to do this now, i.e. what makes ALTER TENANT special (and why we haven't needed to do so earlier). The second point is perhaps the most interesting of the two. ---- **As to why we only needed to do this now, and why we may not do this in too many other places.** The problem, and its solution, exists every time we would like to include a syntactic expression inside a grammar rule, where the expression is followed by more keywords. As of this writing, *we do not have other syntax rules that do this* besides ALTER/DROP TENANT. This is why ALTER/DROP TENANT was the first time we actually had to deal with this problem. This, in turn, raises two additional questions. The first is why we found it so important to make ALTER/DROP TENANT accept an expression in third position. The reason for this is that we are building a service control plane where we want to pass the identity of the tenant as a variable (via a placeholder), and we will in the future want to place subqueries in that position. This will make us able to compose the SQL syntax with a more complex orchestration logic. The other question is why we haven't tried to use expressions in other places. For example, we know there is appetite to make the table name in ALTER TABLE a variable so it can be passed via a placeholder. However, in doing so we still should be careful: the mechanism that ALTER TENANT uses, that of parsing the expression as a `d_expr` instead of `a_expr` or `b_expr`, is only possible when *every syntax form following the expression only uses a keyword that will never be part of the expression*. That is, if/when someone chooses to define a grammar rule: ``` rule: A B d_expr C D ``` ... they are committing to a future where the keyword C will _never_ be used as an extension form for one of the `d_expr` expression syntax. We control this future for ALTER/DROP TENANT, which is a CockroachDB oddity, but we can't know what other SQL databases will ever add to their CREATE TABLE and other syntax which we may want to emulate later. ---- **How the special case is defined** Quoth the docstring for the new interface: ``` // alreadyDelimitedAsSyntacticDExpr is an interface that marks // Expr types for which there is never an ambiguity when // the expression syntax is followed by a non-reserved // keyword. When this property is true, that expression // can be pretty-printed without enclosing parentheses in // a context followed by more non-reserved keywords, and // result in syntax that is still unambiguous. // That is, given an expression E and an arbitrary following // word X, the syntax "E X" is always unambiguously parsed // as "(E) X". // // This property is obviously true of "atomic" expressions such as // string and number literals, and also obviously true of // well-enclosed expressions "(...)" / "[...]". However, it is not // always true of other composite expression types. For example, // "A::B" (CastExpr) is not well-delimited because there are // identifiers/keywords such that "A::B C" can be parsed as "A::(B // C)". Consider "'a'::INTERVAL" and the non-reserved keyword // "MINUTE". // // This property is closely related to the d_expr syntactic rule in // the grammar, hence its name. *Approximately* the expression types // produced by the d_expr rule tend to exhibit the "well-delimited" // property. However, this is not a proper equivalence: certain Expr // types are _also_ produced by other parsing rules than d_expr, so // inspection of the contents of the Expr object is necessary to // determine whether it is well-delimited or not (for example, some // FuncExpr objects are well-delimited, and others are not). // Therefore, it is not generally correct to assign the property to // all the d_expr expression *types*. We can only do so for a few // types for which we know that *all possible objects* of that type // are well-delimited, such as Subquery, NumVal or Placeholder. ``` Release note: None --- pkg/sql/parser/testdata/alter_tenant | 9 +++++ pkg/sql/sem/tree/alter_tenant.go | 55 ++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/pkg/sql/parser/testdata/alter_tenant b/pkg/sql/parser/testdata/alter_tenant index ba86731fcc6f..88301819bda9 100644 --- a/pkg/sql/parser/testdata/alter_tenant +++ b/pkg/sql/parser/testdata/alter_tenant @@ -190,3 +190,12 @@ ALTER TENANT 'foo' RENAME TO bar ALTER TENANT ('foo') RENAME TO (bar) -- fully parenthesized ALTER TENANT '_' RENAME TO bar -- literals removed ALTER TENANT 'foo' RENAME TO _ -- identifiers removed + +# Regression test for #95612 +parse +ALTER TENANT INTERVAL 'string' MINUTE RESET CLUSTER SETTING ident +---- +ALTER TENANT ('string'::INTERVAL MINUTE) SET CLUSTER SETTING ident = DEFAULT -- normalized! +ALTER TENANT ((('string')::INTERVAL MINUTE)) SET CLUSTER SETTING ident = (DEFAULT) -- fully parenthesized +ALTER TENANT ('_'::INTERVAL MINUTE) SET CLUSTER SETTING ident = DEFAULT -- literals removed +ALTER TENANT ('string'::INTERVAL MINUTE) SET CLUSTER SETTING ident = DEFAULT -- identifiers removed diff --git a/pkg/sql/sem/tree/alter_tenant.go b/pkg/sql/sem/tree/alter_tenant.go index c0329f241a6c..66ff0ea47ee9 100644 --- a/pkg/sql/sem/tree/alter_tenant.go +++ b/pkg/sql/sem/tree/alter_tenant.go @@ -55,12 +55,67 @@ type TenantSpec struct { All bool } +// alreadyDelimitedAsSyntacticDExpr is an interface that marks +// Expr types for which there is never an ambiguity when +// the expression syntax is followed by a non-reserved +// keyword. When this property is true, that expression +// can be pretty-printed without enclosing parentheses in +// a context followed by more non-reserved keywords, and +// result in syntax that is still unambiguous. +// That is, given an expression E and an arbitrary following +// word X, the syntax "E X" is always unambiguously parsed +// as "(E) X". +// +// This property is obviously true of "atomic" expressions such as +// string and number literals, and also obviously true of +// well-enclosed expressions "(...)" / "[...]". However, it is not +// always true of other composite expression types. For example, +// "A::B" (CastExpr) is not well-delimited because there are +// identifiers/keywords such that "A::B C" can be parsed as "A::(B +// C)". Consider "'a'::INTERVAL" and the non-reserved keyword +// "MINUTE". +// +// This property is closely related to the d_expr syntactic rule in +// the grammar, hence its name. *Approximately* the expression types +// produced by the d_expr rule tend to exhibit the "well-delimited" +// property. However, this is not a proper equivalence: certain Expr +// types are _also_ produced by other parsing rules than d_expr, so +// inspection of the contents of the Expr object is necessary to +// determine whether it is well-delimited or not (for example, some +// FuncExpr objects are well-delimited, and others are not). +// Therefore, it is not generally correct to assign the property to +// all the d_expr expression *types*. We can only do so for a few +// types for which we know that *all possible objects* of that type +// are well-delimited, such as Subquery, NumVal or Placeholder. +type alreadyDelimitedAsSyntacticDExpr interface { + Expr + alreadyDelimitedAsSyntacticDExpr() +} + +func (*UnresolvedName) alreadyDelimitedAsSyntacticDExpr() {} +func (*ParenExpr) alreadyDelimitedAsSyntacticDExpr() {} +func (*Subquery) alreadyDelimitedAsSyntacticDExpr() {} +func (*Placeholder) alreadyDelimitedAsSyntacticDExpr() {} +func (*NumVal) alreadyDelimitedAsSyntacticDExpr() {} +func (*StrVal) alreadyDelimitedAsSyntacticDExpr() {} +func (dNull) alreadyDelimitedAsSyntacticDExpr() {} + // Format implements the NodeFormatter interface. func (n *TenantSpec) Format(ctx *FmtCtx) { if n.All { ctx.WriteString("ALL") } else if n.IsName { + // Beware to enclose the expression within parentheses if it is + // not a simple identifier and is not already enclosed in + // parentheses. + _, canOmitParentheses := n.Expr.(alreadyDelimitedAsSyntacticDExpr) + if !canOmitParentheses { + ctx.WriteByte('(') + } ctx.FormatNode(n.Expr) + if !canOmitParentheses { + ctx.WriteByte(')') + } } else { ctx.WriteByte('[') ctx.FormatNode(n.Expr)