From a2b871eada29ccccbd47d951ef33d9f56706fc72 Mon Sep 17 00:00:00 2001 From: Faizaan Madhani Date: Tue, 27 Sep 2022 17:04:18 -0400 Subject: [PATCH 1/5] sql: add support for `DELETE FROM ... USING` to parser Previously, the statement `DELETE FROM .. USING` would return an unimplemented error. This commit adds production rules in the parser to handle the `USING` clause in a `DELETE` statement, however usage will return an error as support has not been implemented in `optbuilder`. Release note: None --- docs/generated/sql/bnf/delete_stmt.bnf | 2 +- docs/generated/sql/bnf/stmt_block.bnf | 166 +++++++++++---------- pkg/sql/opt/optbuilder/delete.go | 2 +- pkg/sql/opt/optbuilder/mutation_builder.go | 12 +- pkg/sql/parser/sql.y | 14 +- pkg/sql/parser/testdata/delete | 50 +++++++ pkg/sql/sem/tree/delete.go | 5 + pkg/sql/sem/tree/pretty.go | 8 +- 8 files changed, 170 insertions(+), 89 deletions(-) diff --git a/docs/generated/sql/bnf/delete_stmt.bnf b/docs/generated/sql/bnf/delete_stmt.bnf index 24c383ee0c15..1fb9827344f9 100644 --- a/docs/generated/sql/bnf/delete_stmt.bnf +++ b/docs/generated/sql/bnf/delete_stmt.bnf @@ -1,2 +1,2 @@ delete_stmt ::= - ( ( 'WITH' ( ( common_table_expr ) ( ( ',' common_table_expr ) )* ) | 'WITH' 'RECURSIVE' ( ( common_table_expr ) ( ( ',' common_table_expr ) )* ) ) | ) 'DELETE' 'FROM' ( ( ( 'ONLY' | ) table_name opt_index_flags ( '*' | ) ) | ( ( 'ONLY' | ) table_name opt_index_flags ( '*' | ) ) table_alias_name | ( ( 'ONLY' | ) table_name opt_index_flags ( '*' | ) ) 'AS' table_alias_name ) ( ( 'WHERE' a_expr ) | ) ( sort_clause | ) ( limit_clause | ) ( 'RETURNING' target_list | 'RETURNING' 'NOTHING' | ) + ( ( 'WITH' ( ( common_table_expr ) ( ( ',' common_table_expr ) )* ) | 'WITH' 'RECURSIVE' ( ( common_table_expr ) ( ( ',' common_table_expr ) )* ) ) | ) 'DELETE' 'FROM' ( ( ( 'ONLY' | ) table_name opt_index_flags ( '*' | ) ) | ( ( 'ONLY' | ) table_name opt_index_flags ( '*' | ) ) table_alias_name | ( ( 'ONLY' | ) table_name opt_index_flags ( '*' | ) ) 'AS' table_alias_name ) opt_using_clause ( ( 'WHERE' a_expr ) | ) ( sort_clause | ) ( limit_clause | ) ( 'RETURNING' target_list | 'RETURNING' 'NOTHING' | ) diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index 200677b447cc..af80317983e5 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -193,7 +193,7 @@ create_stmt ::= | create_external_connection_stmt delete_stmt ::= - opt_with_clause 'DELETE' 'FROM' table_expr_opt_alias_idx opt_where_clause opt_sort_clause opt_limit_clause returning_clause + opt_with_clause 'DELETE' 'FROM' table_expr_opt_alias_idx opt_using_clause opt_where_clause opt_sort_clause opt_limit_clause returning_clause drop_stmt ::= drop_ddl_stmt @@ -597,6 +597,10 @@ table_expr_opt_alias_idx ::= | table_name_opt_idx table_alias_name | table_name_opt_idx 'AS' table_alias_name +opt_using_clause ::= + 'USING' from_list + | + opt_sort_clause ::= sort_clause | @@ -1769,6 +1773,9 @@ with_clause ::= table_name_opt_idx ::= opt_only table_name opt_index_flags opt_descendant +from_list ::= + ( table_ref ) ( ( ',' table_ref ) )* + sort_clause ::= 'ORDER' 'BY' sortby_list @@ -1969,9 +1976,6 @@ set_clause ::= single_set_clause | multiple_set_clause -from_list ::= - ( table_ref ) ( ( ',' table_ref ) )* - simple_db_object_name ::= db_object_name_component @@ -2485,6 +2489,16 @@ opt_descendant ::= '*' | +table_ref ::= + relation_expr opt_index_flags opt_ordinality opt_alias_clause + | select_with_parens opt_ordinality opt_alias_clause + | 'LATERAL' select_with_parens opt_ordinality opt_alias_clause + | joined_table + | '(' joined_table ')' opt_ordinality alias_clause + | func_table opt_ordinality opt_func_alias_clause + | 'LATERAL' func_table opt_ordinality opt_alias_clause + | '[' row_source_extension_stmt ']' opt_ordinality opt_alias_clause + sortby_list ::= ( sortby ) ( ( ',' sortby ) )* @@ -2600,16 +2614,6 @@ single_set_clause ::= multiple_set_clause ::= '(' insert_column_list ')' '=' in_expr -table_ref ::= - relation_expr opt_index_flags opt_ordinality opt_alias_clause - | select_with_parens opt_ordinality opt_alias_clause - | 'LATERAL' select_with_parens opt_ordinality opt_alias_clause - | joined_table - | '(' joined_table ')' opt_ordinality alias_clause - | func_table opt_ordinality opt_func_alias_clause - | 'LATERAL' func_table opt_ordinality opt_alias_clause - | '[' row_source_extension_stmt ']' opt_ordinality opt_alias_clause - type_func_name_crdb_extra_keyword ::= 'FAMILY' @@ -3058,6 +3062,43 @@ common_table_expr ::= index_flags_param_list ::= ( index_flags_param ) ( ( ',' index_flags_param ) )* +opt_ordinality ::= + 'WITH' 'ORDINALITY' + | + +opt_alias_clause ::= + alias_clause + | + +joined_table ::= + '(' joined_table ')' + | table_ref 'CROSS' opt_join_hint 'JOIN' table_ref + | table_ref join_type opt_join_hint 'JOIN' table_ref join_qual + | table_ref 'JOIN' table_ref join_qual + | table_ref 'NATURAL' join_type opt_join_hint 'JOIN' table_ref + | table_ref 'NATURAL' 'JOIN' table_ref + +alias_clause ::= + 'AS' table_alias_name opt_col_def_list_no_types + | table_alias_name opt_col_def_list_no_types + +func_table ::= + func_expr_windowless + | 'ROWS' 'FROM' '(' rowsfrom_list ')' + +opt_func_alias_clause ::= + func_alias_clause + | + +row_source_extension_stmt ::= + delete_stmt + | explain_stmt + | insert_stmt + | select_stmt + | show_stmt + | update_stmt + | upsert_stmt + sortby ::= a_expr opt_asc_desc opt_nulls_order | 'PRIMARY' 'KEY' table_name opt_asc_desc @@ -3117,43 +3158,6 @@ var_list ::= schema_wildcard ::= wildcard_pattern -opt_ordinality ::= - 'WITH' 'ORDINALITY' - | - -opt_alias_clause ::= - alias_clause - | - -joined_table ::= - '(' joined_table ')' - | table_ref 'CROSS' opt_join_hint 'JOIN' table_ref - | table_ref join_type opt_join_hint 'JOIN' table_ref join_qual - | table_ref 'JOIN' table_ref join_qual - | table_ref 'NATURAL' join_type opt_join_hint 'JOIN' table_ref - | table_ref 'NATURAL' 'JOIN' table_ref - -alias_clause ::= - 'AS' table_alias_name opt_col_def_list_no_types - | table_alias_name opt_col_def_list_no_types - -func_table ::= - func_expr_windowless - | 'ROWS' 'FROM' '(' rowsfrom_list ')' - -opt_func_alias_clause ::= - func_alias_clause - | - -row_source_extension_stmt ::= - delete_stmt - | explain_stmt - | insert_stmt - | select_stmt - | show_stmt - | update_stmt - | upsert_stmt - type_func_name_no_crdb_extra_keyword ::= 'AUTHORIZATION' | 'COLLATION' @@ -3483,6 +3487,30 @@ index_flags_param ::= | 'FORCE_ZIGZAG' | 'FORCE_ZIGZAG' '=' index_name +opt_join_hint ::= + 'HASH' + | 'MERGE' + | 'LOOKUP' + | 'INVERTED' + | + +join_type ::= + 'FULL' join_outer + | 'LEFT' join_outer + | 'RIGHT' join_outer + | 'INNER' + +join_qual ::= + 'USING' '(' name_list ')' + | 'ON' a_expr + +rowsfrom_list ::= + ( rowsfrom_item ) ( ( ',' rowsfrom_item ) )* + +func_alias_clause ::= + 'AS' table_alias_name opt_col_def_list + | table_alias_name opt_col_def_list + opt_asc_desc ::= 'ASC' | 'DESC' @@ -3515,30 +3543,6 @@ opt_nowait_or_skip ::= wildcard_pattern ::= name '.' '*' -opt_join_hint ::= - 'HASH' - | 'MERGE' - | 'LOOKUP' - | 'INVERTED' - | - -join_type ::= - 'FULL' join_outer - | 'LEFT' join_outer - | 'RIGHT' join_outer - | 'INNER' - -join_qual ::= - 'USING' '(' name_list ')' - | 'ON' a_expr - -rowsfrom_list ::= - ( rowsfrom_item ) ( ( ',' rowsfrom_item ) )* - -func_alias_clause ::= - 'AS' table_alias_name opt_col_def_list - | table_alias_name opt_col_def_list - func_arg ::= func_arg_class param_name func_arg_type | param_name func_arg_class func_arg_type @@ -3752,12 +3756,6 @@ func_as ::= col_def_list_no_types ::= ( name ) ( ( ',' name ) )* -group_by_item ::= - a_expr - -window_definition ::= - window_name 'AS' window_specification - join_outer ::= 'OUTER' | @@ -3768,6 +3766,12 @@ rowsfrom_item ::= opt_col_def_list ::= '(' col_def_list ')' +group_by_item ::= + a_expr + +window_definition ::= + window_name 'AS' window_specification + func_arg_class ::= 'IN' diff --git a/pkg/sql/opt/optbuilder/delete.go b/pkg/sql/opt/optbuilder/delete.go index 26448035c5d9..16d9cbcb3f4d 100644 --- a/pkg/sql/opt/optbuilder/delete.go +++ b/pkg/sql/opt/optbuilder/delete.go @@ -62,7 +62,7 @@ func (b *Builder) buildDelete(del *tree.Delete, inScope *scope) (outScope *scope // ORDER BY LIMIT // // All columns from the delete table will be projected. - mb.buildInputForDelete(inScope, del.Table, del.Where, del.Limit, del.OrderBy) + mb.buildInputForDelete(inScope, del.Table, del.Where, del.Using, del.Limit, del.OrderBy) // Build the final delete statement, including any returned expressions. if resultsNeeded(del.Returning) { diff --git a/pkg/sql/opt/optbuilder/mutation_builder.go b/pkg/sql/opt/optbuilder/mutation_builder.go index df9312a9a341..5dba82f2f80e 100644 --- a/pkg/sql/opt/optbuilder/mutation_builder.go +++ b/pkg/sql/opt/optbuilder/mutation_builder.go @@ -384,7 +384,12 @@ func (mb *mutationBuilder) buildInputForUpdate( // All columns from the table to update are added to fetchColList. // TODO(andyk): Do needed column analysis to project fewer columns if possible. func (mb *mutationBuilder) buildInputForDelete( - inScope *scope, texpr tree.TableExpr, where *tree.Where, limit *tree.Limit, orderBy tree.OrderBy, + inScope *scope, + texpr tree.TableExpr, + where *tree.Where, + using tree.TableExprs, + limit *tree.Limit, + orderBy tree.OrderBy, ) { var indexFlags *tree.IndexFlags if source, ok := texpr.(*tree.AliasedTableExpr); ok && source.IndexFlags != nil { @@ -425,6 +430,11 @@ func (mb *mutationBuilder) buildInputForDelete( mb.b.buildOrderBy(mb.outScope, projectionsScope, orderByScope) mb.b.constructProjectForScope(mb.outScope, projectionsScope) + // USING + if using != nil { + panic("DELETE USING is unimplemented so should not be used") + } + // LIMIT if limit != nil { mb.b.buildLimit(limit, inScope, projectionsScope) diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index 420ee4e38183..b4b8035c28c7 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -1375,7 +1375,7 @@ func (u *sqlSymUnion) functionObjs() tree.FuncObjs { %type <*tree.Limit> select_limit opt_select_limit %type relation_expr_list %type returning_clause -%type opt_using_clause +%type opt_using_clause %type opt_clear_data %type <[]tree.SequenceOption> sequence_option_list opt_sequence_option_list @@ -4824,6 +4824,7 @@ opt_changefeed_sink: // %Category: DML // %Text: DELETE FROM [WHERE ] // [ORDER BY ] +// [USING ] // [LIMIT ] // [RETURNING ] // %SeeAlso: WEBDOCS/delete.html @@ -4833,6 +4834,7 @@ delete_stmt: $$.val = &tree.Delete{ With: $1.with(), Table: $4.tblExpr(), + Using: $5.tblExprs(), Where: tree.NewWhere(tree.AstWhere, $6.expr()), OrderBy: $7.orderBy(), Limit: $8.limit(), @@ -4842,8 +4844,14 @@ delete_stmt: | opt_with_clause DELETE error // SHOW HELP: DELETE opt_using_clause: - USING from_list { return unimplementedWithIssueDetail(sqllex, 40963, "delete using") } -| /* EMPTY */ { } + USING from_list + { + $$.val = $2.tblExprs() + } +| /* EMPTY */ + { + $$.val = tree.TableExprs{} + } // %Help: DISCARD - reset the session to its initial state diff --git a/pkg/sql/parser/testdata/delete b/pkg/sql/parser/testdata/delete index 98576008e4fa..d715bff0057b 100644 --- a/pkg/sql/parser/testdata/delete +++ b/pkg/sql/parser/testdata/delete @@ -125,3 +125,53 @@ DELETE FROM a WHERE a = b -- normalized! DELETE FROM a WHERE ((a) = (b)) -- fully parenthesized DELETE FROM a WHERE a = b -- literals removed DELETE FROM _ WHERE _ = _ -- identifiers removed + +parse +DELETE FROM a USING b +---- +DELETE FROM a USING b +DELETE FROM a USING b -- fully parenthesized +DELETE FROM a USING b -- literals removed +DELETE FROM _ USING _ -- identifiers removed + +parse +DELETE FROM a USING b WHERE c = d +---- +DELETE FROM a USING b WHERE c = d +DELETE FROM a USING b WHERE ((c) = (d)) -- fully parenthesized +DELETE FROM a USING b WHERE c = d -- literals removed +DELETE FROM _ USING _ WHERE _ = _ -- identifiers removed + +parse +DELETE FROM a USING b WHERE c = d AND e = f +---- +DELETE FROM a USING b WHERE (c = d) AND (e = f) -- normalized! +DELETE FROM a USING b WHERE ((((c) = (d))) AND (((e) = (f)))) -- fully parenthesized +DELETE FROM a USING b WHERE (c = d) AND (e = f) -- literals removed +DELETE FROM _ USING _ WHERE (_ = _) AND (_ = _) -- identifiers removed + +parse +DELETE FROM a USING b, c WHERE d > e AND e < f +---- +DELETE FROM a USING b, c WHERE (d > e) AND (e < f) -- normalized! +DELETE FROM a USING b, c WHERE ((((d) > (e))) AND (((e) < (f)))) -- fully parenthesized +DELETE FROM a USING b, c WHERE (d > e) AND (e < f) -- literals removed +DELETE FROM _ USING _, _ WHERE (_ > _) AND (_ < _) -- identifiers removed + +parse +DELETE FROM a USING b, c, d AS other WHERE e = f AND g = h OR i = j +---- +DELETE FROM a USING b, c, d AS other WHERE ((e = f) AND (g = h)) OR (i = j) -- normalized! +DELETE FROM a USING b, c, d AS other WHERE ((((((e) = (f))) AND (((g) = (h))))) OR (((i) = (j)))) -- fully parenthesized +DELETE FROM a USING b, c, d AS other WHERE ((e = f) AND (g = h)) OR (i = j) -- literals removed +DELETE FROM _ USING _, _, _ AS _ WHERE ((_ = _) AND (_ = _)) OR (_ = _) -- identifiers removed + +parse +DELETE FROM a USING b AS one, c AS two, d AS three, e AS four WHERE f != g AND g = h RETURNING e +---- +DELETE FROM a USING b AS one, c AS two, d AS three, e AS four WHERE (f != g) AND (g = h) RETURNING e -- normalized! +DELETE FROM a USING b AS one, c AS two, d AS three, e AS four WHERE ((((f) != (g))) AND (((g) = (h)))) RETURNING (e) -- fully parenthesized +DELETE FROM a USING b AS one, c AS two, d AS three, e AS four WHERE (f != g) AND (g = h) RETURNING e -- literals removed +DELETE FROM _ USING _ AS _, _ AS _, _ AS _, _ AS _ WHERE (_ != _) AND (_ = _) RETURNING _ -- identifiers removed + + diff --git a/pkg/sql/sem/tree/delete.go b/pkg/sql/sem/tree/delete.go index 8f24d8edfe8b..998dba9d48b8 100644 --- a/pkg/sql/sem/tree/delete.go +++ b/pkg/sql/sem/tree/delete.go @@ -25,6 +25,7 @@ type Delete struct { Table TableExpr Where *Where OrderBy OrderBy + Using TableExprs Limit *Limit Returning ReturningClause } @@ -34,6 +35,10 @@ func (node *Delete) Format(ctx *FmtCtx) { ctx.FormatNode(node.With) ctx.WriteString("DELETE FROM ") ctx.FormatNode(node.Table) + if len(node.Using) > 0 { + ctx.WriteString(" USING ") + ctx.FormatNode(&node.Using) + } if node.Where != nil { ctx.WriteByte(' ') ctx.FormatNode(node.Where) diff --git a/pkg/sql/sem/tree/pretty.go b/pkg/sql/sem/tree/pretty.go index 49319434e24c..b56c73517a72 100644 --- a/pkg/sql/sem/tree/pretty.go +++ b/pkg/sql/sem/tree/pretty.go @@ -1161,10 +1161,14 @@ func (node *Update) doc(p *PrettyCfg) pretty.Doc { } func (node *Delete) doc(p *PrettyCfg) pretty.Doc { - items := make([]pretty.TableRow, 0, 6) + items := make([]pretty.TableRow, 0, 7) items = append(items, node.With.docRow(p), - p.row("DELETE FROM", p.Doc(node.Table)), + p.row("DELETE FROM", p.Doc(node.Table))) + if len(node.Using) > 0 { + items = append(items, p.row("USING", p.Doc(&node.Using))) + } + items = append(items, node.Where.docRow(p), node.OrderBy.docRow(p)) items = append(items, node.Limit.docTable(p)...) From b3759000665ea8a4fe1a7f5f859010ed3ea5403d Mon Sep 17 00:00:00 2001 From: Faizaan Madhani Date: Mon, 10 Oct 2022 12:09:59 -0500 Subject: [PATCH 2/5] sql: add support for `DELETE FROM ... USING` to optbuilder Previously, the optbuilder would return an error when given sql statements of the form `DELETE FROM USING`. This commit adds support to the Optbuilder to build query plans for statements of the form `DELETE FROM ... USING`. Release note: None --- pkg/sql/opt/memo/expr_format.go | 1 + pkg/sql/opt/ops/mutation.opt | 7 +- pkg/sql/opt/optbuilder/delete.go | 5 + pkg/sql/opt/optbuilder/mutation_builder.go | 70 +- pkg/sql/opt/optbuilder/testdata/delete | 728 +++++++++++++++++++++ 5 files changed, 795 insertions(+), 16 deletions(-) diff --git a/pkg/sql/opt/memo/expr_format.go b/pkg/sql/opt/memo/expr_format.go index 0b9e10429ce3..4df3270123bd 100644 --- a/pkg/sql/opt/memo/expr_format.go +++ b/pkg/sql/opt/memo/expr_format.go @@ -684,6 +684,7 @@ func (f *ExprFmtCtx) formatRelational(e RelExpr, tp treeprinter.Node) { } f.formatOptionalColList(e, tp, "fetch columns:", t.FetchCols) f.formatMutationCols(e, tp, "return-mapping:", t.ReturnCols, t.Table) + f.formatOptionalColList(e, tp, "passthrough columns", opt.OptionalColList(t.PassthroughCols)) f.formatOptionalColList(e, tp, "partial index del columns:", t.PartialIndexDelCols) f.formatMutationCommon(tp, &t.MutationPrivate) } diff --git a/pkg/sql/opt/ops/mutation.opt b/pkg/sql/opt/ops/mutation.opt index 734e1ca0efc8..cc9670beb477 100644 --- a/pkg/sql/opt/ops/mutation.opt +++ b/pkg/sql/opt/ops/mutation.opt @@ -162,9 +162,10 @@ define MutationPrivate { # PassthroughCols are columns that the mutation needs to passthrough from # its input. It's similar to the passthrough columns in projections. This - # is useful for `UPDATE .. FROM` mutations where the `RETURNING` clause - # references columns from tables in the `FROM` clause. When this happens - # the update will need to pass through those refenced columns from its input. + # is useful for `UPDATE .. FROM` and `DELETE ... USING` mutations where the + # `RETURNING` clause references columns from tables in the `FROM` or `USING` + # clause, respectively. When this happens the mutation will need to pass through + # those referenced columns from its input. PassthroughCols ColList # Mutation operators can act similarly to a With operator: they buffer their diff --git a/pkg/sql/opt/optbuilder/delete.go b/pkg/sql/opt/optbuilder/delete.go index 16d9cbcb3f4d..834eee850a8c 100644 --- a/pkg/sql/opt/optbuilder/delete.go +++ b/pkg/sql/opt/optbuilder/delete.go @@ -83,6 +83,11 @@ func (mb *mutationBuilder) buildDelete(returning tree.ReturningExprs) { mb.projectPartialIndexDelCols() private := mb.makeMutationPrivate(returning != nil) + for _, col := range mb.extraAccessibleCols { + if col.id != 0 { + private.PassthroughCols = append(private.PassthroughCols, col.id) + } + } mb.outScope.expr = mb.b.factory.ConstructDelete( mb.outScope.expr, mb.uniqueChecks, mb.fkChecks, private, ) diff --git a/pkg/sql/opt/optbuilder/mutation_builder.go b/pkg/sql/opt/optbuilder/mutation_builder.go index 5dba82f2f80e..b51d50b7a90f 100644 --- a/pkg/sql/opt/optbuilder/mutation_builder.go +++ b/pkg/sql/opt/optbuilder/mutation_builder.go @@ -184,8 +184,9 @@ type mutationBuilder struct { // extraAccessibleCols stores all the columns that are available to the // mutation that are not part of the target table. This is useful for - // UPDATE ... FROM queries, as the columns from the FROM tables must be - // made accessible to the RETURNING clause. + // UPDATE ... FROM queries and DELETE ... USING queries, as the columns + // from the FROM and USING tables must be made accessible to the + // RETURNING clause, respectively. extraAccessibleCols []scopeColumn // fkCheckHelper is used to prevent allocating the helper separately. @@ -376,7 +377,7 @@ func (mb *mutationBuilder) buildInputForUpdate( // the Delete operator, similar to this: // // SELECT -// FROM +// FROM
[, ] // WHERE // ORDER BY // LIMIT @@ -418,7 +419,39 @@ func (mb *mutationBuilder) buildInputForDelete( inScope, false, /* disableNotVisibleIndex */ ) - mb.outScope = mb.fetchScope + + // Set list of columns that will be fetched by the input expression. + mb.setFetchColIDs(mb.fetchScope.cols) + + // USING + usingClausePresent := len(using) > 0 + if usingClausePresent { + usingScope := mb.b.buildFromTables(using, noRowLocking, inScope) + + // Check that the same table name is not used multiple times + mb.b.validateJoinTableNames(mb.fetchScope, usingScope) + + // The USING table columns can be accessed by the RETURNING clause of the + // query and so we have to make them accessible. + mb.extraAccessibleCols = usingScope.cols + + // Add the columns to the USING scope. + // We create a new scope so that fetchScope is not modified + // as fetchScope contains the set of columns from the target + // table specified by USING. This will be used later with partial + // index predicate expressions and will prevent ambiguities with + // column names in the USING clause. + mb.outScope = mb.fetchScope.replace() + mb.outScope.appendColumnsFromScope(mb.fetchScope) + mb.outScope.appendColumnsFromScope(usingScope) + + left := mb.fetchScope.expr + right := usingScope.expr + + mb.outScope.expr = mb.b.factory.ConstructInnerJoin(left, right, memo.TrueFilter, memo.EmptyJoinPrivate) + } else { + mb.outScope = mb.fetchScope + } // WHERE mb.b.buildWhere(where, mb.outScope) @@ -430,11 +463,6 @@ func (mb *mutationBuilder) buildInputForDelete( mb.b.buildOrderBy(mb.outScope, projectionsScope, orderByScope) mb.b.constructProjectForScope(mb.outScope, projectionsScope) - // USING - if using != nil { - panic("DELETE USING is unimplemented so should not be used") - } - // LIMIT if limit != nil { mb.b.buildLimit(limit, inScope, projectionsScope) @@ -442,8 +470,23 @@ func (mb *mutationBuilder) buildInputForDelete( mb.outScope = projectionsScope - // Set list of columns that will be fetched by the input expression. - mb.setFetchColIDs(mb.outScope.cols) + // Build a distinct on to ensure there is at most one row in the joined output + // for every row in the table + if usingClausePresent { + var pkCols opt.ColSet + + // We need to ensure that the join has a maximum of one row for every row + // in the table and we ensure this by constructing a distinct on the primary + // key columns. + primaryIndex := mb.tab.Index(cat.PrimaryIndex) + for i := 0; i < primaryIndex.KeyColumnCount(); i++ { + col := primaryIndex.Column(i) + pkCols.Add(mb.fetchColIDs[col.Ordinal()]) + } + + mb.outScope = mb.b.buildDistinctOn( + pkCols, mb.outScope, false /* nullsAreDistinct */, "" /* errorOnDup */) + } } // addTargetColsByName adds one target column for each of the names in the given @@ -1011,8 +1054,9 @@ func (mb *mutationBuilder) buildReturning(returning tree.ReturningExprs) { // extraAccessibleCols contains all the columns that the RETURNING // clause can refer to in addition to the table columns. This is useful for - // UPDATE ... FROM statements, where all columns from tables in the FROM clause - // are in scope for the RETURNING clause. + // UPDATE ... FROM and DELETE ... USING statements, where all columns from + // tables in the FROM clause and USING clause are in scope for the RETURNING + // clause, respectively. inScope.appendColumns(mb.extraAccessibleCols) // Construct the Project operator that projects the RETURNING expressions. diff --git a/pkg/sql/opt/optbuilder/testdata/delete b/pkg/sql/opt/optbuilder/testdata/delete index 2a439a82c5a1..8c354e8c2b46 100644 --- a/pkg/sql/opt/optbuilder/testdata/delete +++ b/pkg/sql/opt/optbuilder/testdata/delete @@ -32,6 +32,14 @@ CREATE TABLE mutation ( ) ---- +exec-ddl +CREATE TABLE fgh ( + f INT, + g TEXT, + h INT +) +---- + # ------------------------------------------------------------------------------ # Basic tests. # ------------------------------------------------------------------------------ @@ -455,3 +463,723 @@ build DELETE FROM mutation ORDER BY p LIMIT 2 ---- error (42P10): column "p" is being backfilled + +# ------------------------------------------------------------------------------ +# Test USING. +# ------------------------------------------------------------------------------ + +# Test a simple join with a filter. +build format=show-qual +DELETE FROM abcde USING fgh WHERE c = fgh.h AND fgh.g = 'd' +---- +delete t.public.abcde + ├── columns: + ├── fetch columns: t.public.abcde.a:9 t.public.abcde.b:10 t.public.abcde.c:11 t.public.abcde.d:12 t.public.abcde.e:13 t.public.abcde.rowid:14 + ├── passthrough columns t.public.fgh.f:17 t.public.fgh.g:18 t.public.fgh.h:19 t.public.fgh.rowid:20 t.public.fgh.crdb_internal_mvcc_timestamp:21 t.public.fgh.tableoid:22 + └── distinct-on + ├── columns: t.public.abcde.a:9!null t.public.abcde.b:10 t.public.abcde.c:11!null t.public.abcde.d:12 t.public.abcde.e:13 t.public.abcde.rowid:14!null t.public.abcde.crdb_internal_mvcc_timestamp:15 t.public.abcde.tableoid:16 t.public.fgh.f:17 t.public.fgh.g:18!null t.public.fgh.h:19!null t.public.fgh.rowid:20!null t.public.fgh.crdb_internal_mvcc_timestamp:21 t.public.fgh.tableoid:22 + ├── grouping columns: t.public.abcde.rowid:14!null + ├── select + │ ├── columns: t.public.abcde.a:9!null t.public.abcde.b:10 t.public.abcde.c:11!null t.public.abcde.d:12 t.public.abcde.e:13 t.public.abcde.rowid:14!null t.public.abcde.crdb_internal_mvcc_timestamp:15 t.public.abcde.tableoid:16 t.public.fgh.f:17 t.public.fgh.g:18!null t.public.fgh.h:19!null t.public.fgh.rowid:20!null t.public.fgh.crdb_internal_mvcc_timestamp:21 t.public.fgh.tableoid:22 + │ ├── inner-join (cross) + │ │ ├── columns: t.public.abcde.a:9!null t.public.abcde.b:10 t.public.abcde.c:11 t.public.abcde.d:12 t.public.abcde.e:13 t.public.abcde.rowid:14!null t.public.abcde.crdb_internal_mvcc_timestamp:15 t.public.abcde.tableoid:16 t.public.fgh.f:17 t.public.fgh.g:18 t.public.fgh.h:19 t.public.fgh.rowid:20!null t.public.fgh.crdb_internal_mvcc_timestamp:21 t.public.fgh.tableoid:22 + │ │ ├── scan t.public.abcde + │ │ │ ├── columns: t.public.abcde.a:9!null t.public.abcde.b:10 t.public.abcde.c:11 t.public.abcde.d:12 t.public.abcde.e:13 t.public.abcde.rowid:14!null t.public.abcde.crdb_internal_mvcc_timestamp:15 t.public.abcde.tableoid:16 + │ │ │ └── computed column expressions + │ │ │ ├── t.public.abcde.d:12 + │ │ │ │ └── (t.public.abcde.b:10 + t.public.abcde.c:11) + 1 + │ │ │ └── t.public.abcde.e:13 + │ │ │ └── t.public.abcde.a:9 + │ │ ├── scan t.public.fgh + │ │ │ └── columns: t.public.fgh.f:17 t.public.fgh.g:18 t.public.fgh.h:19 t.public.fgh.rowid:20!null t.public.fgh.crdb_internal_mvcc_timestamp:21 t.public.fgh.tableoid:22 + │ │ └── filters (true) + │ └── filters + │ └── (t.public.abcde.c:11 = t.public.fgh.h:19) AND (t.public.fgh.g:18 = 'd') + └── aggregations + ├── first-agg [as=t.public.abcde.a:9] + │ └── t.public.abcde.a:9 + ├── first-agg [as=t.public.abcde.b:10] + │ └── t.public.abcde.b:10 + ├── first-agg [as=t.public.abcde.c:11] + │ └── t.public.abcde.c:11 + ├── first-agg [as=t.public.abcde.d:12] + │ └── t.public.abcde.d:12 + ├── first-agg [as=t.public.abcde.e:13] + │ └── t.public.abcde.e:13 + ├── first-agg [as=t.public.abcde.crdb_internal_mvcc_timestamp:15] + │ └── t.public.abcde.crdb_internal_mvcc_timestamp:15 + ├── first-agg [as=t.public.abcde.tableoid:16] + │ └── t.public.abcde.tableoid:16 + ├── first-agg [as=t.public.fgh.f:17] + │ └── t.public.fgh.f:17 + ├── first-agg [as=t.public.fgh.g:18] + │ └── t.public.fgh.g:18 + ├── first-agg [as=t.public.fgh.h:19] + │ └── t.public.fgh.h:19 + ├── first-agg [as=t.public.fgh.rowid:20] + │ └── t.public.fgh.rowid:20 + ├── first-agg [as=t.public.fgh.crdb_internal_mvcc_timestamp:21] + │ └── t.public.fgh.crdb_internal_mvcc_timestamp:21 + └── first-agg [as=t.public.fgh.tableoid:22] + └── t.public.fgh.tableoid:22 + +# Test a self join. +build +DELETE FROM abcde USING abcde abcde2 WHERE abcde.a = abcde2.c +---- +delete abcde + ├── columns: + ├── fetch columns: abcde.a:9 abcde.b:10 abcde.c:11 abcde.d:12 abcde.e:13 abcde.rowid:14 + ├── passthrough columns abcde2.a:17 abcde2.b:18 abcde2.c:19 abcde2.d:20 abcde2.e:21 abcde2.rowid:22 abcde2.crdb_internal_mvcc_timestamp:23 abcde2.tableoid:24 + └── distinct-on + ├── columns: abcde.a:9!null abcde.b:10 abcde.c:11 abcde.d:12 abcde.e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 abcde2.a:17!null abcde2.b:18 abcde2.c:19!null abcde2.d:20 abcde2.e:21 abcde2.rowid:22!null abcde2.crdb_internal_mvcc_timestamp:23 abcde2.tableoid:24 + ├── grouping columns: abcde.rowid:14!null + ├── select + │ ├── columns: abcde.a:9!null abcde.b:10 abcde.c:11 abcde.d:12 abcde.e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 abcde2.a:17!null abcde2.b:18 abcde2.c:19!null abcde2.d:20 abcde2.e:21 abcde2.rowid:22!null abcde2.crdb_internal_mvcc_timestamp:23 abcde2.tableoid:24 + │ ├── inner-join (cross) + │ │ ├── columns: abcde.a:9!null abcde.b:10 abcde.c:11 abcde.d:12 abcde.e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 abcde2.a:17!null abcde2.b:18 abcde2.c:19 abcde2.d:20 abcde2.e:21 abcde2.rowid:22!null abcde2.crdb_internal_mvcc_timestamp:23 abcde2.tableoid:24 + │ │ ├── scan abcde + │ │ │ ├── columns: abcde.a:9!null abcde.b:10 abcde.c:11 abcde.d:12 abcde.e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 + │ │ │ └── computed column expressions + │ │ │ ├── abcde.d:12 + │ │ │ │ └── (abcde.b:10 + abcde.c:11) + 1 + │ │ │ └── abcde.e:13 + │ │ │ └── abcde.a:9 + │ │ ├── scan abcde [as=abcde2] + │ │ │ ├── columns: abcde2.a:17!null abcde2.b:18 abcde2.c:19 abcde2.d:20 abcde2.e:21 abcde2.rowid:22!null abcde2.crdb_internal_mvcc_timestamp:23 abcde2.tableoid:24 + │ │ │ └── computed column expressions + │ │ │ ├── abcde2.d:20 + │ │ │ │ └── (abcde2.b:18 + abcde2.c:19) + 1 + │ │ │ └── abcde2.e:21 + │ │ │ └── abcde2.a:17 + │ │ └── filters (true) + │ └── filters + │ └── abcde.a:9 = abcde2.c:19 + └── aggregations + ├── first-agg [as=abcde.a:9] + │ └── abcde.a:9 + ├── first-agg [as=abcde.b:10] + │ └── abcde.b:10 + ├── first-agg [as=abcde.c:11] + │ └── abcde.c:11 + ├── first-agg [as=abcde.d:12] + │ └── abcde.d:12 + ├── first-agg [as=abcde.e:13] + │ └── abcde.e:13 + ├── first-agg [as=abcde.crdb_internal_mvcc_timestamp:15] + │ └── abcde.crdb_internal_mvcc_timestamp:15 + ├── first-agg [as=abcde.tableoid:16] + │ └── abcde.tableoid:16 + ├── first-agg [as=abcde2.a:17] + │ └── abcde2.a:17 + ├── first-agg [as=abcde2.b:18] + │ └── abcde2.b:18 + ├── first-agg [as=abcde2.c:19] + │ └── abcde2.c:19 + ├── first-agg [as=abcde2.d:20] + │ └── abcde2.d:20 + ├── first-agg [as=abcde2.e:21] + │ └── abcde2.e:21 + ├── first-agg [as=abcde2.rowid:22] + │ └── abcde2.rowid:22 + ├── first-agg [as=abcde2.crdb_internal_mvcc_timestamp:23] + │ └── abcde2.crdb_internal_mvcc_timestamp:23 + └── first-agg [as=abcde2.tableoid:24] + └── abcde2.tableoid:24 + +# Test when USING uses multiple tables. +build +DELETE FROM fgh USING abcde, xyz WHERE abcde.c = fgh.f AND xyz.x = fgh.g +---- +delete fgh + ├── columns: + ├── fetch columns: f:7 g:8 h:9 fgh.rowid:10 + ├── passthrough columns a:13 b:14 c:15 d:16 e:17 abcde.rowid:18 abcde.crdb_internal_mvcc_timestamp:19 abcde.tableoid:20 x:21 y:22 z:23 xyz.crdb_internal_mvcc_timestamp:24 xyz.tableoid:25 + └── distinct-on + ├── columns: f:7!null g:8!null h:9 fgh.rowid:10!null fgh.crdb_internal_mvcc_timestamp:11 fgh.tableoid:12 a:13!null b:14 c:15!null d:16 e:17 abcde.rowid:18!null abcde.crdb_internal_mvcc_timestamp:19 abcde.tableoid:20 x:21!null y:22 z:23 xyz.crdb_internal_mvcc_timestamp:24 xyz.tableoid:25 + ├── grouping columns: fgh.rowid:10!null + ├── select + │ ├── columns: f:7!null g:8!null h:9 fgh.rowid:10!null fgh.crdb_internal_mvcc_timestamp:11 fgh.tableoid:12 a:13!null b:14 c:15!null d:16 e:17 abcde.rowid:18!null abcde.crdb_internal_mvcc_timestamp:19 abcde.tableoid:20 x:21!null y:22 z:23 xyz.crdb_internal_mvcc_timestamp:24 xyz.tableoid:25 + │ ├── inner-join (cross) + │ │ ├── columns: f:7 g:8 h:9 fgh.rowid:10!null fgh.crdb_internal_mvcc_timestamp:11 fgh.tableoid:12 a:13!null b:14 c:15 d:16 e:17 abcde.rowid:18!null abcde.crdb_internal_mvcc_timestamp:19 abcde.tableoid:20 x:21!null y:22 z:23 xyz.crdb_internal_mvcc_timestamp:24 xyz.tableoid:25 + │ │ ├── scan fgh + │ │ │ └── columns: f:7 g:8 h:9 fgh.rowid:10!null fgh.crdb_internal_mvcc_timestamp:11 fgh.tableoid:12 + │ │ ├── inner-join (cross) + │ │ │ ├── columns: a:13!null b:14 c:15 d:16 e:17 abcde.rowid:18!null abcde.crdb_internal_mvcc_timestamp:19 abcde.tableoid:20 x:21!null y:22 z:23 xyz.crdb_internal_mvcc_timestamp:24 xyz.tableoid:25 + │ │ │ ├── scan abcde + │ │ │ │ ├── columns: a:13!null b:14 c:15 d:16 e:17 abcde.rowid:18!null abcde.crdb_internal_mvcc_timestamp:19 abcde.tableoid:20 + │ │ │ │ └── computed column expressions + │ │ │ │ ├── d:16 + │ │ │ │ │ └── (b:14 + c:15) + 1 + │ │ │ │ └── e:17 + │ │ │ │ └── a:13 + │ │ │ ├── scan xyz + │ │ │ │ └── columns: x:21!null y:22 z:23 xyz.crdb_internal_mvcc_timestamp:24 xyz.tableoid:25 + │ │ │ └── filters (true) + │ │ └── filters (true) + │ └── filters + │ └── (c:15 = f:7) AND (x:21 = g:8) + └── aggregations + ├── first-agg [as=f:7] + │ └── f:7 + ├── first-agg [as=g:8] + │ └── g:8 + ├── first-agg [as=h:9] + │ └── h:9 + ├── first-agg [as=fgh.crdb_internal_mvcc_timestamp:11] + │ └── fgh.crdb_internal_mvcc_timestamp:11 + ├── first-agg [as=fgh.tableoid:12] + │ └── fgh.tableoid:12 + ├── first-agg [as=a:13] + │ └── a:13 + ├── first-agg [as=b:14] + │ └── b:14 + ├── first-agg [as=c:15] + │ └── c:15 + ├── first-agg [as=d:16] + │ └── d:16 + ├── first-agg [as=e:17] + │ └── e:17 + ├── first-agg [as=abcde.rowid:18] + │ └── abcde.rowid:18 + ├── first-agg [as=abcde.crdb_internal_mvcc_timestamp:19] + │ └── abcde.crdb_internal_mvcc_timestamp:19 + ├── first-agg [as=abcde.tableoid:20] + │ └── abcde.tableoid:20 + ├── first-agg [as=x:21] + │ └── x:21 + ├── first-agg [as=y:22] + │ └── y:22 + ├── first-agg [as=z:23] + │ └── z:23 + ├── first-agg [as=xyz.crdb_internal_mvcc_timestamp:24] + │ └── xyz.crdb_internal_mvcc_timestamp:24 + └── first-agg [as=xyz.tableoid:25] + └── xyz.tableoid:25 + +# Test if USING works well with RETURNING expressions that reference +# the USING table. +build +DELETE FROM + abcde +USING + fgh +WHERE + fgh.h > abcde.b AND fgh.h <= 4 +RETURNING + abcde.a, abcde.b, abcde.c, abcde.d, abcde.e +---- +project + ├── columns: a:1!null b:2!null c:3 d:4 e:5 + └── delete abcde + ├── columns: a:1!null b:2!null c:3 d:4 e:5 abcde.rowid:6!null f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + ├── fetch columns: a:9 b:10 c:11 d:12 e:13 abcde.rowid:14 + ├── return-mapping: + │ ├── a:9 => a:1 + │ ├── b:10 => b:2 + │ ├── c:11 => c:3 + │ ├── d:12 => d:4 + │ ├── e:13 => e:5 + │ └── abcde.rowid:14 => abcde.rowid:6 + ├── passthrough columns f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + └── distinct-on + ├── columns: a:9!null b:10!null c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19!null fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + ├── grouping columns: abcde.rowid:14!null + ├── select + │ ├── columns: a:9!null b:10!null c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19!null fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ ├── inner-join (cross) + │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ ├── scan abcde + │ │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 + │ │ │ └── computed column expressions + │ │ │ ├── d:12 + │ │ │ │ └── (b:10 + c:11) + 1 + │ │ │ └── e:13 + │ │ │ └── a:9 + │ │ ├── scan fgh + │ │ │ └── columns: f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ └── filters (true) + │ └── filters + │ └── (h:19 > b:10) AND (h:19 <= 4) + └── aggregations + ├── first-agg [as=a:9] + │ └── a:9 + ├── first-agg [as=b:10] + │ └── b:10 + ├── first-agg [as=c:11] + │ └── c:11 + ├── first-agg [as=d:12] + │ └── d:12 + ├── first-agg [as=e:13] + │ └── e:13 + ├── first-agg [as=abcde.crdb_internal_mvcc_timestamp:15] + │ └── abcde.crdb_internal_mvcc_timestamp:15 + ├── first-agg [as=abcde.tableoid:16] + │ └── abcde.tableoid:16 + ├── first-agg [as=f:17] + │ └── f:17 + ├── first-agg [as=g:18] + │ └── g:18 + ├── first-agg [as=h:19] + │ └── h:19 + ├── first-agg [as=fgh.rowid:20] + │ └── fgh.rowid:20 + ├── first-agg [as=fgh.crdb_internal_mvcc_timestamp:21] + │ └── fgh.crdb_internal_mvcc_timestamp:21 + └── first-agg [as=fgh.tableoid:22] + └── fgh.tableoid:22 + +# Test if RETURNING * returns everything. +build +DELETE FROM abcde USING fgh WHERE c = fgh.f AND fgh.g = 'd' RETURNING * +---- +project + ├── columns: a:1!null b:2 c:3!null d:4 e:5 f:17 g:18 h:19 + └── delete abcde + ├── columns: a:1!null b:2 c:3!null d:4 e:5 abcde.rowid:6!null f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + ├── fetch columns: a:9 b:10 c:11 d:12 e:13 abcde.rowid:14 + ├── return-mapping: + │ ├── a:9 => a:1 + │ ├── b:10 => b:2 + │ ├── c:11 => c:3 + │ ├── d:12 => d:4 + │ ├── e:13 => e:5 + │ └── abcde.rowid:14 => abcde.rowid:6 + ├── passthrough columns f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + └── distinct-on + ├── columns: a:9!null b:10 c:11!null d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17!null g:18!null h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + ├── grouping columns: abcde.rowid:14!null + ├── select + │ ├── columns: a:9!null b:10 c:11!null d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17!null g:18!null h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ ├── inner-join (cross) + │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ ├── scan abcde + │ │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 + │ │ │ └── computed column expressions + │ │ │ ├── d:12 + │ │ │ │ └── (b:10 + c:11) + 1 + │ │ │ └── e:13 + │ │ │ └── a:9 + │ │ ├── scan fgh + │ │ │ └── columns: f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ └── filters (true) + │ └── filters + │ └── (c:11 = f:17) AND (g:18 = 'd') + └── aggregations + ├── first-agg [as=a:9] + │ └── a:9 + ├── first-agg [as=b:10] + │ └── b:10 + ├── first-agg [as=c:11] + │ └── c:11 + ├── first-agg [as=d:12] + │ └── d:12 + ├── first-agg [as=e:13] + │ └── e:13 + ├── first-agg [as=abcde.crdb_internal_mvcc_timestamp:15] + │ └── abcde.crdb_internal_mvcc_timestamp:15 + ├── first-agg [as=abcde.tableoid:16] + │ └── abcde.tableoid:16 + ├── first-agg [as=f:17] + │ └── f:17 + ├── first-agg [as=g:18] + │ └── g:18 + ├── first-agg [as=h:19] + │ └── h:19 + ├── first-agg [as=fgh.rowid:20] + │ └── fgh.rowid:20 + ├── first-agg [as=fgh.crdb_internal_mvcc_timestamp:21] + │ └── fgh.crdb_internal_mvcc_timestamp:21 + └── first-agg [as=fgh.tableoid:22] + └── fgh.tableoid:22 + +# Test ORDER BY and LIMIT when ordering by primary key columns +build +DELETE FROM mutation AS foo USING abcde as bar WHERE foo.n > bar.a ORDER BY foo.m LIMIT 3 +---- +delete mutation [as=foo] + ├── columns: + ├── fetch columns: m:7 n:8 o:9 p:10 + ├── passthrough columns a:13 b:14 c:15 d:16 e:17 rowid:18 bar.crdb_internal_mvcc_timestamp:19 bar.tableoid:20 + └── distinct-on + ├── columns: m:7!null n:8!null o:9 p:10 foo.crdb_internal_mvcc_timestamp:11 foo.tableoid:12 a:13!null b:14 c:15 d:16 e:17 rowid:18!null bar.crdb_internal_mvcc_timestamp:19 bar.tableoid:20 + ├── grouping columns: m:7!null + ├── limit + │ ├── columns: m:7!null n:8!null o:9 p:10 foo.crdb_internal_mvcc_timestamp:11 foo.tableoid:12 a:13!null b:14 c:15 d:16 e:17 rowid:18!null bar.crdb_internal_mvcc_timestamp:19 bar.tableoid:20 + │ ├── internal-ordering: +7 + │ ├── sort + │ │ ├── columns: m:7!null n:8!null o:9 p:10 foo.crdb_internal_mvcc_timestamp:11 foo.tableoid:12 a:13!null b:14 c:15 d:16 e:17 rowid:18!null bar.crdb_internal_mvcc_timestamp:19 bar.tableoid:20 + │ │ ├── ordering: +7 + │ │ ├── limit hint: 3.00 + │ │ └── select + │ │ ├── columns: m:7!null n:8!null o:9 p:10 foo.crdb_internal_mvcc_timestamp:11 foo.tableoid:12 a:13!null b:14 c:15 d:16 e:17 rowid:18!null bar.crdb_internal_mvcc_timestamp:19 bar.tableoid:20 + │ │ ├── inner-join (cross) + │ │ │ ├── columns: m:7!null n:8 o:9 p:10 foo.crdb_internal_mvcc_timestamp:11 foo.tableoid:12 a:13!null b:14 c:15 d:16 e:17 rowid:18!null bar.crdb_internal_mvcc_timestamp:19 bar.tableoid:20 + │ │ │ ├── scan mutation [as=foo] + │ │ │ │ └── columns: m:7!null n:8 o:9 p:10 foo.crdb_internal_mvcc_timestamp:11 foo.tableoid:12 + │ │ │ ├── scan abcde [as=bar] + │ │ │ │ ├── columns: a:13!null b:14 c:15 d:16 e:17 rowid:18!null bar.crdb_internal_mvcc_timestamp:19 bar.tableoid:20 + │ │ │ │ └── computed column expressions + │ │ │ │ ├── d:16 + │ │ │ │ │ └── (b:14 + c:15) + 1 + │ │ │ │ └── e:17 + │ │ │ │ └── a:13 + │ │ │ └── filters (true) + │ │ └── filters + │ │ └── n:8 > a:13 + │ └── 3 + └── aggregations + ├── first-agg [as=n:8] + │ └── n:8 + ├── first-agg [as=o:9] + │ └── o:9 + ├── first-agg [as=p:10] + │ └── p:10 + ├── first-agg [as=foo.crdb_internal_mvcc_timestamp:11] + │ └── foo.crdb_internal_mvcc_timestamp:11 + ├── first-agg [as=foo.tableoid:12] + │ └── foo.tableoid:12 + ├── first-agg [as=a:13] + │ └── a:13 + ├── first-agg [as=b:14] + │ └── b:14 + ├── first-agg [as=c:15] + │ └── c:15 + ├── first-agg [as=d:16] + │ └── d:16 + ├── first-agg [as=e:17] + │ └── e:17 + ├── first-agg [as=rowid:18] + │ └── rowid:18 + ├── first-agg [as=bar.crdb_internal_mvcc_timestamp:19] + │ └── bar.crdb_internal_mvcc_timestamp:19 + └── first-agg [as=bar.tableoid:20] + └── bar.tableoid:20 + +# Aliased table names, ORDER BY and LIMIT when ordering by non primary key columns +# TODO(#89817): Add support for ORDER BY columns that are non-PK columns of the target +# table or columns from non-target tables. +build +DELETE FROM abcde AS foo USING xyz AS bar WHERE bar.y > 0 ORDER BY foo.a DESC LIMIT 5 +---- +error (42P10): SELECT DISTINCT ON expressions must match initial ORDER BY expressions + +# Test if DELETE FROM ... USING can return hidden columns. +build +DELETE FROM + abcde +USING + fgh +WHERE + abcde.a = fgh.f +RETURNING + fgh.rowid +---- +project + ├── columns: rowid:20 + └── delete abcde + ├── columns: a:1!null b:2 c:3 d:4 e:5 abcde.rowid:6!null f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + ├── fetch columns: a:9 b:10 c:11 d:12 e:13 abcde.rowid:14 + ├── return-mapping: + │ ├── a:9 => a:1 + │ ├── b:10 => b:2 + │ ├── c:11 => c:3 + │ ├── d:12 => d:4 + │ ├── e:13 => e:5 + │ └── abcde.rowid:14 => abcde.rowid:6 + ├── passthrough columns f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + └── distinct-on + ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17!null g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + ├── grouping columns: abcde.rowid:14!null + ├── select + │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17!null g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ ├── inner-join (cross) + │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ ├── scan abcde + │ │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 + │ │ │ └── computed column expressions + │ │ │ ├── d:12 + │ │ │ │ └── (b:10 + c:11) + 1 + │ │ │ └── e:13 + │ │ │ └── a:9 + │ │ ├── scan fgh + │ │ │ └── columns: f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ └── filters (true) + │ └── filters + │ └── a:9 = f:17 + └── aggregations + ├── first-agg [as=a:9] + │ └── a:9 + ├── first-agg [as=b:10] + │ └── b:10 + ├── first-agg [as=c:11] + │ └── c:11 + ├── first-agg [as=d:12] + │ └── d:12 + ├── first-agg [as=e:13] + │ └── e:13 + ├── first-agg [as=abcde.crdb_internal_mvcc_timestamp:15] + │ └── abcde.crdb_internal_mvcc_timestamp:15 + ├── first-agg [as=abcde.tableoid:16] + │ └── abcde.tableoid:16 + ├── first-agg [as=f:17] + │ └── f:17 + ├── first-agg [as=g:18] + │ └── g:18 + ├── first-agg [as=h:19] + │ └── h:19 + ├── first-agg [as=fgh.rowid:20] + │ └── fgh.rowid:20 + ├── first-agg [as=fgh.crdb_internal_mvcc_timestamp:21] + │ └── fgh.crdb_internal_mvcc_timestamp:21 + └── first-agg [as=fgh.tableoid:22] + └── fgh.tableoid:22 + +# Test if returning returns columns in the target table and USING table. +build +DELETE FROM abcde USING fgh WHERE abcde.a = fgh.f RETURNING fgh.f, abcde.a +---- +project + ├── columns: f:17 a:1!null + └── delete abcde + ├── columns: a:1!null b:2 c:3 d:4 e:5 abcde.rowid:6!null f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + ├── fetch columns: a:9 b:10 c:11 d:12 e:13 abcde.rowid:14 + ├── return-mapping: + │ ├── a:9 => a:1 + │ ├── b:10 => b:2 + │ ├── c:11 => c:3 + │ ├── d:12 => d:4 + │ ├── e:13 => e:5 + │ └── abcde.rowid:14 => abcde.rowid:6 + ├── passthrough columns f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + └── distinct-on + ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17!null g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + ├── grouping columns: abcde.rowid:14!null + ├── select + │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17!null g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ ├── inner-join (cross) + │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ ├── scan abcde + │ │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 + │ │ │ └── computed column expressions + │ │ │ ├── d:12 + │ │ │ │ └── (b:10 + c:11) + 1 + │ │ │ └── e:13 + │ │ │ └── a:9 + │ │ ├── scan fgh + │ │ │ └── columns: f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ └── filters (true) + │ └── filters + │ └── a:9 = f:17 + └── aggregations + ├── first-agg [as=a:9] + │ └── a:9 + ├── first-agg [as=b:10] + │ └── b:10 + ├── first-agg [as=c:11] + │ └── c:11 + ├── first-agg [as=d:12] + │ └── d:12 + ├── first-agg [as=e:13] + │ └── e:13 + ├── first-agg [as=abcde.crdb_internal_mvcc_timestamp:15] + │ └── abcde.crdb_internal_mvcc_timestamp:15 + ├── first-agg [as=abcde.tableoid:16] + │ └── abcde.tableoid:16 + ├── first-agg [as=f:17] + │ └── f:17 + ├── first-agg [as=g:18] + │ └── g:18 + ├── first-agg [as=h:19] + │ └── h:19 + ├── first-agg [as=fgh.rowid:20] + │ └── fgh.rowid:20 + ├── first-agg [as=fgh.crdb_internal_mvcc_timestamp:21] + │ └── fgh.crdb_internal_mvcc_timestamp:21 + └── first-agg [as=fgh.tableoid:22] + └── fgh.tableoid:22 + +# Test if DELETE FROM ... USING works with LATERAL. +build +DELETE FROM abcde USING fgh, LATERAL (SELECT x FROM xyz WHERE fgh.g > xyz.x) AS other WHERE other.x = 'a' +---- +delete abcde + ├── columns: + ├── fetch columns: a:9 b:10 c:11 d:12 e:13 abcde.rowid:14 + ├── passthrough columns f:17 g:18 h:19 fgh.rowid:20 fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 x:23 + └── distinct-on + ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 x:23!null + ├── grouping columns: abcde.rowid:14!null + ├── select + │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 x:23!null + │ ├── inner-join (cross) + │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 x:23!null + │ │ ├── scan abcde + │ │ │ ├── columns: a:9!null b:10 c:11 d:12 e:13 abcde.rowid:14!null abcde.crdb_internal_mvcc_timestamp:15 abcde.tableoid:16 + │ │ │ └── computed column expressions + │ │ │ ├── d:12 + │ │ │ │ └── (b:10 + c:11) + 1 + │ │ │ └── e:13 + │ │ │ └── a:9 + │ │ ├── inner-join-apply + │ │ │ ├── columns: f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 x:23!null + │ │ │ ├── scan fgh + │ │ │ │ └── columns: f:17 g:18 h:19 fgh.rowid:20!null fgh.crdb_internal_mvcc_timestamp:21 fgh.tableoid:22 + │ │ │ ├── project + │ │ │ │ ├── columns: x:23!null + │ │ │ │ └── select + │ │ │ │ ├── columns: x:23!null y:24 z:25 xyz.crdb_internal_mvcc_timestamp:26 xyz.tableoid:27 + │ │ │ │ ├── scan xyz + │ │ │ │ │ └── columns: x:23!null y:24 z:25 xyz.crdb_internal_mvcc_timestamp:26 xyz.tableoid:27 + │ │ │ │ └── filters + │ │ │ │ └── g:18 > x:23 + │ │ │ └── filters (true) + │ │ └── filters (true) + │ └── filters + │ └── x:23 = 'a' + └── aggregations + ├── first-agg [as=a:9] + │ └── a:9 + ├── first-agg [as=b:10] + │ └── b:10 + ├── first-agg [as=c:11] + │ └── c:11 + ├── first-agg [as=d:12] + │ └── d:12 + ├── first-agg [as=e:13] + │ └── e:13 + ├── first-agg [as=abcde.crdb_internal_mvcc_timestamp:15] + │ └── abcde.crdb_internal_mvcc_timestamp:15 + ├── first-agg [as=abcde.tableoid:16] + │ └── abcde.tableoid:16 + ├── first-agg [as=f:17] + │ └── f:17 + ├── first-agg [as=g:18] + │ └── g:18 + ├── first-agg [as=h:19] + │ └── h:19 + ├── first-agg [as=fgh.rowid:20] + │ └── fgh.rowid:20 + ├── first-agg [as=fgh.crdb_internal_mvcc_timestamp:21] + │ └── fgh.crdb_internal_mvcc_timestamp:21 + ├── first-agg [as=fgh.tableoid:22] + │ └── fgh.tableoid:22 + └── first-agg [as=x:23] + └── x:23 + +# Test if DELETE FROM ... USING works with partial indexes. +exec-ddl +CREATE TABLE pindex ( + a DECIMAL(10, 2), + INDEX (a) WHERE a > 3 +) +---- + +build +DELETE FROM pindex USING (VALUES (5.0, 6.0)) v(b) WHERE pindex.a = v.b +---- +delete pindex + ├── columns: + ├── fetch columns: a:5 rowid:6 + ├── passthrough columns column1:9 column2:10 + ├── partial index del columns: partial_index_del1:11 + └── project + ├── columns: partial_index_del1:11!null a:5!null rowid:6!null crdb_internal_mvcc_timestamp:7 tableoid:8 column1:9!null column2:10!null + ├── distinct-on + │ ├── columns: a:5!null rowid:6!null crdb_internal_mvcc_timestamp:7 tableoid:8 column1:9!null column2:10!null + │ ├── grouping columns: rowid:6!null + │ ├── select + │ │ ├── columns: a:5!null rowid:6!null crdb_internal_mvcc_timestamp:7 tableoid:8 column1:9!null column2:10!null + │ │ ├── inner-join (cross) + │ │ │ ├── columns: a:5 rowid:6!null crdb_internal_mvcc_timestamp:7 tableoid:8 column1:9!null column2:10!null + │ │ │ ├── scan pindex + │ │ │ │ ├── columns: a:5 rowid:6!null crdb_internal_mvcc_timestamp:7 tableoid:8 + │ │ │ │ └── partial index predicates + │ │ │ │ └── pindex_a_idx: filters + │ │ │ │ └── a:5 > 3 + │ │ │ ├── values + │ │ │ │ ├── columns: column1:9!null column2:10!null + │ │ │ │ └── (5.0, 6.0) + │ │ │ └── filters (true) + │ │ └── filters + │ │ └── a:5 = column1:9 + │ └── aggregations + │ ├── first-agg [as=a:5] + │ │ └── a:5 + │ ├── first-agg [as=crdb_internal_mvcc_timestamp:7] + │ │ └── crdb_internal_mvcc_timestamp:7 + │ ├── first-agg [as=tableoid:8] + │ │ └── tableoid:8 + │ ├── first-agg [as=column1:9] + │ │ └── column1:9 + │ └── first-agg [as=column2:10] + │ └── column2:10 + └── projections + └── a:5 > 3 [as=partial_index_del1:11] + +# Test that multiple of the same table in the USING clause returns an error. +build +DELETE FROM abcde USING xyz, fgh, fgh WHERE fgh.f = abcde.a +---- +error (42712): source name "fgh" specified more than once (missing AS clause) + +# Test when the target table has a compound primary key, +# to ensure that the distinct-on groups by all the PK columns. +exec-ddl +CREATE TABLE hij ( + h INT, + i INT, + j INT, + PRIMARY KEY (h, i) +) +---- + +build +DELETE FROM hij USING abcde WHERE hij.i = abcde.a +---- +delete hij + ├── columns: + ├── fetch columns: h:6 i:7 j:8 + ├── passthrough columns a:11 b:12 c:13 d:14 e:15 rowid:16 abcde.crdb_internal_mvcc_timestamp:17 abcde.tableoid:18 + └── distinct-on + ├── columns: h:6!null i:7!null j:8 hij.crdb_internal_mvcc_timestamp:9 hij.tableoid:10 a:11!null b:12 c:13 d:14 e:15 rowid:16!null abcde.crdb_internal_mvcc_timestamp:17 abcde.tableoid:18 + ├── grouping columns: h:6!null i:7!null + ├── select + │ ├── columns: h:6!null i:7!null j:8 hij.crdb_internal_mvcc_timestamp:9 hij.tableoid:10 a:11!null b:12 c:13 d:14 e:15 rowid:16!null abcde.crdb_internal_mvcc_timestamp:17 abcde.tableoid:18 + │ ├── inner-join (cross) + │ │ ├── columns: h:6!null i:7!null j:8 hij.crdb_internal_mvcc_timestamp:9 hij.tableoid:10 a:11!null b:12 c:13 d:14 e:15 rowid:16!null abcde.crdb_internal_mvcc_timestamp:17 abcde.tableoid:18 + │ │ ├── scan hij + │ │ │ └── columns: h:6!null i:7!null j:8 hij.crdb_internal_mvcc_timestamp:9 hij.tableoid:10 + │ │ ├── scan abcde + │ │ │ ├── columns: a:11!null b:12 c:13 d:14 e:15 rowid:16!null abcde.crdb_internal_mvcc_timestamp:17 abcde.tableoid:18 + │ │ │ └── computed column expressions + │ │ │ ├── d:14 + │ │ │ │ └── (b:12 + c:13) + 1 + │ │ │ └── e:15 + │ │ │ └── a:11 + │ │ └── filters (true) + │ └── filters + │ └── i:7 = a:11 + └── aggregations + ├── first-agg [as=j:8] + │ └── j:8 + ├── first-agg [as=hij.crdb_internal_mvcc_timestamp:9] + │ └── hij.crdb_internal_mvcc_timestamp:9 + ├── first-agg [as=hij.tableoid:10] + │ └── hij.tableoid:10 + ├── first-agg [as=a:11] + │ └── a:11 + ├── first-agg [as=b:12] + │ └── b:12 + ├── first-agg [as=c:13] + │ └── c:13 + ├── first-agg [as=d:14] + │ └── d:14 + ├── first-agg [as=e:15] + │ └── e:15 + ├── first-agg [as=rowid:16] + │ └── rowid:16 + ├── first-agg [as=abcde.crdb_internal_mvcc_timestamp:17] + │ └── abcde.crdb_internal_mvcc_timestamp:17 + └── first-agg [as=abcde.tableoid:18] + └── abcde.tableoid:18 From 8f1d48f4bd17312185baca524f93064ed02fc89d Mon Sep 17 00:00:00 2001 From: Leon Fattakhov Date: Wed, 28 Sep 2022 13:37:14 -0400 Subject: [PATCH 3/5] metrics: expose pebble flush utilization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a new `GaugeFloat64` metric for pebble’s flush utilization. This metric is not cumulative, rather, it is the metric over an interval. This interval is determined by the `interval` parameter of the `Node.startComputePeriodicMetrics` method. In order to compute the metric over an interval the previous value of the metric must be stored. As a result, a map is constructed that takes a pointer to a store and maps it to a pointer to storage metrics: `make(map[*kvserver.Store]*storage.Metrics)`. This map is passed to `node.computeMetricsPeriodically` which gets the store to calculate its metrics and then updates the previous metrics in the map. Refactor `store.go`'s metric calculation by separating `ComputeMetrics(ctx context.Context, tick int) error` into two methods: * `ComputeMetrics(ctx context.Context) error` * `ComputeMetricsPeriodically(ctx context.Context, prevMetrics *storage.Metrics, tick int) (m storage.Metrics, err error)` Both methods call the `computeMetrics` which contains the common code between the two calls. Before this, the process for retrieving metrics instantaneous was to pass a tick value such as `-1` or `0` to the `ComputeMetrics(ctx context.Context, tick int)` however it can be done with a call to `ComputeMetrics(ctx context.Context)` The `store.ComputeMetricsPeriodically` method will also return the latest storage metrics. These metrics are used to update the mapping between stores and metrics used for computing the metric delta over an interval. Release note: None --- pkg/kv/kvserver/client_metrics_test.go | 4 +- .../client_replica_raft_overload_test.go | 4 +- pkg/kv/kvserver/client_replica_test.go | 6 +- pkg/kv/kvserver/metrics.go | 10 ++++ pkg/kv/kvserver/store.go | 57 ++++++++++++++----- pkg/server/node.go | 17 ++++-- pkg/server/node_test.go | 2 +- pkg/server/status_test.go | 3 +- pkg/testutils/testcluster/testcluster.go | 2 +- pkg/ts/catalog/chart_catalog.go | 4 ++ 10 files changed, 80 insertions(+), 29 deletions(-) diff --git a/pkg/kv/kvserver/client_metrics_test.go b/pkg/kv/kvserver/client_metrics_test.go index 2312ba1ad5fe..38be14e0ddea 100644 --- a/pkg/kv/kvserver/client_metrics_test.go +++ b/pkg/kv/kvserver/client_metrics_test.go @@ -137,7 +137,7 @@ func verifyStats(t *testing.T, tc *testcluster.TestCluster, storeIdxSlice ...int } func verifyStorageStats(t *testing.T, s *kvserver.Store) { - if err := s.ComputeMetrics(context.Background(), 0); err != nil { + if err := s.ComputeMetrics(context.Background()); err != nil { t.Fatal(err) } @@ -417,7 +417,7 @@ func TestStoreMaxBehindNanosOnlyTracksEpochBasedLeases(t *testing.T) { sinceExpBasedLeaseStart := timeutil.Since(timeutil.Unix(0, l.Start.WallTime)) for i := 0; i < tc.NumServers(); i++ { s, _ := getFirstStoreReplica(t, tc.Server(i), keys.Meta1Prefix) - require.NoError(t, s.ComputeMetrics(ctx, 0)) + require.NoError(t, s.ComputeMetrics(ctx)) maxBehind := time.Duration(s.Metrics().ClosedTimestampMaxBehindNanos.Value()) // We want to make sure that maxBehind ends up being much smaller than the // start of an expiration based lease. diff --git a/pkg/kv/kvserver/client_replica_raft_overload_test.go b/pkg/kv/kvserver/client_replica_raft_overload_test.go index 13babeacbd7c..678b567deb80 100644 --- a/pkg/kv/kvserver/client_replica_raft_overload_test.go +++ b/pkg/kv/kvserver/client_replica_raft_overload_test.go @@ -80,7 +80,7 @@ func TestReplicaRaftOverload(t *testing.T) { // See: https://github.com/cockroachdb/cockroach/issues/84252 require.NoError(t, tc.Servers[0].DB().Put(ctx, tc.ScratchRange(t), "foo")) s1 := tc.GetFirstStoreFromServer(t, 0) - require.NoError(t, s1.ComputeMetrics(ctx, 0 /* tick */)) + require.NoError(t, s1.ComputeMetrics(ctx)) if n := s1.Metrics().RaftPausedFollowerCount.Value(); n == 0 { return errors.New("no paused followers") } @@ -95,7 +95,7 @@ func TestReplicaRaftOverload(t *testing.T) { require.NoError(t, tc.GetFirstStoreFromServer(t, 2 /* n3 */).GossipStore(ctx, false /* useCached */)) testutils.SucceedsSoon(t, func() error { s1 := tc.GetFirstStoreFromServer(t, 0) - require.NoError(t, s1.ComputeMetrics(ctx, 0 /* tick */)) + require.NoError(t, s1.ComputeMetrics(ctx)) if n := s1.Metrics().RaftPausedFollowerCount.Value(); n > 0 { return errors.Errorf("%d paused followers", n) } diff --git a/pkg/kv/kvserver/client_replica_test.go b/pkg/kv/kvserver/client_replica_test.go index 4b50b3e397b7..68628d76b833 100644 --- a/pkg/kv/kvserver/client_replica_test.go +++ b/pkg/kv/kvserver/client_replica_test.go @@ -2024,7 +2024,7 @@ func TestLeaseMetricsOnSplitAndTransfer(t *testing.T) { var expirationLeases int64 var epochLeases int64 for i := range tc.Servers { - if err := tc.GetFirstStoreFromServer(t, i).ComputeMetrics(context.Background(), 0); err != nil { + if err := tc.GetFirstStoreFromServer(t, i).ComputeMetrics(context.Background()); err != nil { return err } metrics = tc.GetFirstStoreFromServer(t, i).Metrics() @@ -4835,7 +4835,7 @@ func TestUninitializedMetric(t *testing.T) { targetStore := tc.GetFirstStoreFromServer(t, 1) // Force the store to compute the replica metrics - require.NoError(t, targetStore.ComputeMetrics(ctx, 0)) + require.NoError(t, targetStore.ComputeMetrics(ctx)) // Blocked snapshot on the second server (1) should realize 1 uninitialized replica. require.Equal(t, int64(1), targetStore.Metrics().UninitializedCount.Value()) @@ -4845,7 +4845,7 @@ func TestUninitializedMetric(t *testing.T) { require.NoError(t, <-addReplicaErr) // Again force the store to compute metrics, increment tick counter 0 -> 1 - require.NoError(t, targetStore.ComputeMetrics(ctx, 1)) + require.NoError(t, targetStore.ComputeMetrics(ctx)) // There should now be no uninitialized replicas in the recorded metrics require.Equal(t, int64(0), targetStore.Metrics().UninitializedCount.Value()) diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index abe976485fa1..fb1fd54795e9 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -1663,6 +1663,13 @@ Note that the measurement does not include the duration for replicating the eval Measurement: "Occurrences", Unit: metric.Unit_COUNT, } + + metaStorageFlushUtilization = metric.Metadata{ + Name: "storage.flush.utilization", + Help: "The percentage of time the storage engine is actively flushing memtables to disk.", + Measurement: "Flush Utilization", + Unit: metric.Unit_PERCENT, + } ) // StoreMetrics is the set of metrics for a given store. @@ -1957,6 +1964,8 @@ type StoreMetrics struct { // Replica batch evaluation metrics. ReplicaReadBatchEvaluationLatency *metric.Histogram ReplicaWriteBatchEvaluationLatency *metric.Histogram + + FlushUtilization *metric.GaugeFloat64 } type tenantMetricsRef struct { @@ -2494,6 +2503,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { ReplicaWriteBatchEvaluationLatency: metric.NewHistogram( metaReplicaWriteBatchEvaluationLatency, histogramWindow, metric.IOLatencyBuckets, ), + FlushUtilization: metric.NewGaugeFloat64(metaStorageFlushUtilization), } { diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index 2bbdc0b27177..8e1db2082c3a 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -2354,7 +2354,7 @@ func (s *Store) systemGossipUpdate(sysCfg *config.SystemConfig) { // Metrics depend in part on the system config. Compute them as soon as we // get the first system config, then periodically in the background // (managed by the Node). - if err := s.ComputeMetrics(ctx, -1); err != nil { + if err := s.ComputeMetrics(ctx); err != nil { log.Infof(ctx, "%s: failed initial metrics computation: %s", s, err) } log.Event(ctx, "computed initial metrics") @@ -3315,29 +3315,25 @@ func (s *Store) checkpoint(ctx context.Context, tag string) (string, error) { return checkpointDir, nil } -// ComputeMetrics immediately computes the current value of store metrics which -// cannot be computed incrementally. This method should be invoked periodically -// by a higher-level system which records store metrics. -// -// The tick argument should increment across repeated calls to this -// method. It is used to compute some metrics less frequently than others. -func (s *Store) ComputeMetrics(ctx context.Context, tick int) error { +// computeMetrics is a common metric computation that is used by +// ComputeMetricsPeriodically and ComputeMetrics to compute metrics +func (s *Store) computeMetrics(ctx context.Context) (m storage.Metrics, err error) { ctx = s.AnnotateCtx(ctx) - if err := s.updateCapacityGauges(ctx); err != nil { - return err + if err = s.updateCapacityGauges(ctx); err != nil { + return m, err } - if err := s.updateReplicationGauges(ctx); err != nil { - return err + if err = s.updateReplicationGauges(ctx); err != nil { + return m, err } // Get the latest engine metrics. - m := s.engine.GetMetrics() + m = s.engine.GetMetrics() s.metrics.updateEngineMetrics(m) // Get engine Env stats. envStats, err := s.engine.GetEnvStats() if err != nil { - return err + return m, err } s.metrics.updateEnvStats(*envStats) @@ -3349,6 +3345,29 @@ func (s *Store) ComputeMetrics(ctx context.Context, tick int) error { s.metrics.RdbCheckpoints.Update(int64(len(dirs))) } + return m, nil +} + +// ComputeMetricsPeriodically computes metrics that need to be computed +// periodically along with the regular metrics +func (s *Store) ComputeMetricsPeriodically( + ctx context.Context, prevMetrics *storage.Metrics, tick int, +) (m storage.Metrics, err error) { + m, err = s.computeMetrics(ctx) + if err != nil { + return m, err + } + wt := m.Flush.WriteThroughput + + if prevMetrics != nil { + wt.Subtract(prevMetrics.Flush.WriteThroughput) + } + flushUtil := 0.0 + if wt.WorkDuration > 0 { + flushUtil = float64(wt.WorkDuration) / float64(wt.WorkDuration+wt.IdleDuration) + } + s.metrics.FlushUtilization.Update(flushUtil) + // Log this metric infrequently (with current configurations, // every 10 minutes). Trigger on tick 1 instead of tick 0 so that // non-periodic callers of this method don't trigger expensive @@ -3370,7 +3389,15 @@ func (s *Store) ComputeMetrics(ctx context.Context, tick int) error { e.StoreId = int32(s.StoreID()) log.StructuredEvent(ctx, &e) } - return nil + return m, nil +} + +// ComputeMetrics immediately computes the current value of store metrics which +// cannot be computed incrementally. This method should be invoked periodically +// by a higher-level system which records store metrics. +func (s *Store) ComputeMetrics(ctx context.Context) error { + _, err := s.computeMetrics(ctx) + return err } // ClusterNodeCount returns this store's view of the number of nodes in the diff --git a/pkg/server/node.go b/pkg/server/node.go index caccb33a1a61..7bbed0232836 100644 --- a/pkg/server/node.go +++ b/pkg/server/node.go @@ -742,11 +742,12 @@ func (n *Node) startComputePeriodicMetrics(stopper *stop.Stopper, interval time. _ = stopper.RunAsyncTask(ctx, "compute-metrics", func(ctx context.Context) { // Compute periodic stats at the same frequency as metrics are sampled. ticker := time.NewTicker(interval) + previousMetrics := make(map[*kvserver.Store]*storage.Metrics) defer ticker.Stop() for tick := 0; ; tick++ { select { case <-ticker.C: - if err := n.computePeriodicMetrics(ctx, tick); err != nil { + if err := n.computeMetricsPeriodically(ctx, previousMetrics, tick); err != nil { log.Errorf(ctx, "failed computing periodic metrics: %s", err) } case <-stopper.ShouldQuiesce(): @@ -756,12 +757,20 @@ func (n *Node) startComputePeriodicMetrics(stopper *stop.Stopper, interval time. }) } -// computePeriodicMetrics instructs each store to compute the value of +// computeMetricsPeriodically instructs each store to compute the value of // complicated metrics. -func (n *Node) computePeriodicMetrics(ctx context.Context, tick int) error { +func (n *Node) computeMetricsPeriodically( + ctx context.Context, storeToMetrics map[*kvserver.Store]*storage.Metrics, tick int, +) error { return n.stores.VisitStores(func(store *kvserver.Store) error { - if err := store.ComputeMetrics(ctx, tick); err != nil { + if newMetrics, err := store.ComputeMetricsPeriodically(ctx, storeToMetrics[store], tick); err != nil { log.Warningf(ctx, "%s: unable to compute metrics: %s", store, err) + } else { + if storeToMetrics[store] == nil { + storeToMetrics[store] = &newMetrics + } else { + *storeToMetrics[store] = newMetrics + } } return nil }) diff --git a/pkg/server/node_test.go b/pkg/server/node_test.go index a0ab4126395b..e20ae98cc6e1 100644 --- a/pkg/server/node_test.go +++ b/pkg/server/node_test.go @@ -491,7 +491,7 @@ func TestNodeStatusWritten(t *testing.T) { // were multiple replicas, more care would need to be taken in the initial // syncFeed(). forceWriteStatus := func() { - if err := ts.node.computePeriodicMetrics(ctx, 0); err != nil { + if err := ts.node.computeMetricsPeriodically(ctx, map[*kvserver.Store]*storage.Metrics{}, 0); err != nil { t.Fatalf("error publishing store statuses: %s", err) } diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index d97e521be982..dcf059f76bf0 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -50,6 +50,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/cockroach/pkg/sql/sqlstats" "github.com/cockroachdb/cockroach/pkg/sql/tests" + "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" @@ -344,7 +345,7 @@ func startServer(t *testing.T) *TestServer { // Make sure the node status is available. This is done by forcing stores to // publish their status, synchronizing to the event feed with a canary // event, and then forcing the server to write summaries immediately. - if err := ts.node.computePeriodicMetrics(context.Background(), 0); err != nil { + if err := ts.node.computeMetricsPeriodically(context.Background(), map[*kvserver.Store]*storage.Metrics{}, 0); err != nil { t.Fatalf("error publishing store statuses: %s", err) } diff --git a/pkg/testutils/testcluster/testcluster.go b/pkg/testutils/testcluster/testcluster.go index 38554ed4a2fc..3766be3e8745 100644 --- a/pkg/testutils/testcluster/testcluster.go +++ b/pkg/testutils/testcluster/testcluster.go @@ -1380,7 +1380,7 @@ func (tc *TestCluster) WaitForFullReplication() error { if err := s.ForceReplicationScanAndProcess(); err != nil { return err } - if err := s.ComputeMetrics(context.TODO(), 0); err != nil { + if err := s.ComputeMetrics(context.TODO()); err != nil { // This can sometimes fail since ComputeMetrics calls // updateReplicationGauges which needs the system config gossiped. log.Infof(context.TODO(), "%v", err) diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go index e4e060c66e93..2f3f33678dae 100644 --- a/pkg/ts/catalog/chart_catalog.go +++ b/pkg/ts/catalog/chart_catalog.go @@ -3072,6 +3072,10 @@ var charts = []sectionDescription{ "storage.l6-level-score", }, }, + { + Title: "Flush Utilization", + Metrics: []string{"storage.flush.utilization"}, + }, }, }, { From 42d08d86d275aec8f919d57d82e7d7cd1eaa76da Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Mon, 10 Oct 2022 09:25:44 -0400 Subject: [PATCH 4/5] roachtest: introduce admission-control/elastic-cdc Part of #89208. This test sets up a 3-node CRDB cluster on 8vCPU machines running 1000-warehouse TPC-C, and kicks off a few changefeed backfills concurrently. We've observed latency spikes during backfills because of its CPU/scan-heavy nature -- it can elevate CPU scheduling latencies which in turn translates to an increase in foreground latency. Also in this commit: routing std{err,out} from prometheus/grafana setup that roachtests do to the logger in scope. Release note: None --- pkg/cmd/roachtest/tests/BUILD.bazel | 1 + pkg/cmd/roachtest/tests/admission_control.go | 1 + .../tests/admission_control_elastic_cdc.go | 147 ++++++++++++++++++ pkg/cmd/roachtest/tests/tpcc.go | 3 + pkg/roachprod/prometheus/prometheus.go | 40 ++--- 5 files changed, 172 insertions(+), 20 deletions(-) create mode 100644 pkg/cmd/roachtest/tests/admission_control_elastic_cdc.go diff --git a/pkg/cmd/roachtest/tests/BUILD.bazel b/pkg/cmd/roachtest/tests/BUILD.bazel index 28e66b5fcf1e..1f1128d83909 100644 --- a/pkg/cmd/roachtest/tests/BUILD.bazel +++ b/pkg/cmd/roachtest/tests/BUILD.bazel @@ -10,6 +10,7 @@ go_library( "activerecord_blocklist.go", "admission_control.go", "admission_control_elastic_backup.go", + "admission_control_elastic_cdc.go", "admission_control_multi_store_overload.go", "admission_control_snapshot_overload.go", "admission_control_tpcc_overload.go", diff --git a/pkg/cmd/roachtest/tests/admission_control.go b/pkg/cmd/roachtest/tests/admission_control.go index 4d3835f14493..f039f236c697 100644 --- a/pkg/cmd/roachtest/tests/admission_control.go +++ b/pkg/cmd/roachtest/tests/admission_control.go @@ -29,6 +29,7 @@ func registerAdmission(r registry.Registry) { // over some latency threshold. Will be Useful to track over time. registerElasticControlForBackups(r) + registerElasticControlForCDC(r) registerMultiStoreOverload(r) registerSnapshotOverload(r) registerTPCCOverload(r) diff --git a/pkg/cmd/roachtest/tests/admission_control_elastic_cdc.go b/pkg/cmd/roachtest/tests/admission_control_elastic_cdc.go new file mode 100644 index 000000000000..361a90498753 --- /dev/null +++ b/pkg/cmd/roachtest/tests/admission_control_elastic_cdc.go @@ -0,0 +1,147 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + "context" + "fmt" + "time" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/roachprod/prometheus" +) + +// This test sets up a 3-node CRDB cluster on 8vCPU machines running +// 1000-warehouse TPC-C, and kicks off a few changefeed backfills concurrently. +// We've observed latency spikes during backfills because of its CPU/scan-heavy +// nature -- it can elevate CPU scheduling latencies which in turn translates to +// an increase in foreground latency. +func registerElasticControlForCDC(r registry.Registry) { + r.Add(registry.TestSpec{ + Name: "admission-control/elastic-cdc", + Owner: registry.OwnerAdmissionControl, + // TODO(irfansharif): After two weeks of nightly baking time, reduce + // this to a weekly cadence. This is a long-running test and serves only + // as a coarse-grained benchmark. + // Tags: []string{`weekly`}, + Cluster: r.MakeClusterSpec(4, spec.CPU(8)), + RequiresLicense: true, + Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { + if c.Spec().NodeCount < 4 { + t.Fatalf("expected at least 4 nodes, found %d", c.Spec().NodeCount) + } + + crdbNodes := c.Spec().NodeCount - 1 + workloadNode := crdbNodes + 1 + numWarehouses, workloadDuration, estimatedSetupTime := 1000, 60*time.Minute, 10*time.Minute + if c.IsLocal() { + numWarehouses, workloadDuration, estimatedSetupTime = 1, time.Minute, 2*time.Minute + } + + promCfg := &prometheus.Config{} + promCfg.WithPrometheusNode(c.Node(workloadNode).InstallNodes()[0]). + WithNodeExporter(c.Range(1, c.Spec().NodeCount-1).InstallNodes()). + WithCluster(c.Range(1, c.Spec().NodeCount-1).InstallNodes()). + WithGrafanaDashboard("http://go.crdb.dev/p/changefeed-admission-control-grafana"). + WithScrapeConfigs( + prometheus.MakeWorkloadScrapeConfig("workload", "/", + makeWorkloadScrapeNodes( + c.Node(workloadNode).InstallNodes()[0], + []workloadInstance{{nodes: c.Node(workloadNode)}}, + ), + ), + ) + + if t.SkipInit() { + t.Status(fmt.Sprintf("running tpcc for %s (<%s)", workloadDuration, time.Minute)) + } else { + t.Status(fmt.Sprintf("initializing + running tpcc for %s (<%s)", workloadDuration, 10*time.Minute)) + } + + padDuration, err := time.ParseDuration(ifLocal(c, "5s", "5m")) + if err != nil { + t.Fatal(err) + } + stopFeedsDuration, err := time.ParseDuration(ifLocal(c, "5s", "1m")) + if err != nil { + t.Fatal(err) + } + + runTPCC(ctx, t, c, tpccOptions{ + Warehouses: numWarehouses, + Duration: workloadDuration, + SetupType: usingImport, + EstimatedSetupTime: estimatedSetupTime, + SkipPostRunCheck: true, + ExtraSetupArgs: "--checks=false", + PrometheusConfig: promCfg, + During: func(ctx context.Context) error { + db := c.Conn(ctx, t.L(), crdbNodes) + defer db.Close() + + t.Status(fmt.Sprintf("configuring cluster (<%s)", 30*time.Second)) + { + setAdmissionControl(ctx, t, c, true) + + // Changefeeds depend on rangefeeds being enabled. + if _, err := db.Exec("SET CLUSTER SETTING kv.rangefeed.enabled = true"); err != nil { + return err + } + } + + stopFeeds(db) // stop stray feeds (from repeated runs against the same cluster for ex.) + defer stopFeeds(db) + + m := c.NewMonitor(ctx, c.Range(1, crdbNodes)) + m.Go(func(ctx context.Context) error { + const iters, changefeeds = 5, 10 + for i := 0; i < iters; i++ { + if i == 0 { + t.Status(fmt.Sprintf("setting performance baseline (<%s)", padDuration)) + } + time.Sleep(padDuration) // each iteration lasts long enough to observe effects in metrics + + t.Status(fmt.Sprintf("during: round %d: stopping extant changefeeds (<%s)", i, stopFeedsDuration)) + stopFeeds(db) + time.Sleep(stopFeedsDuration) // buffer for cancellations to take effect/show up in metrics + + t.Status(fmt.Sprintf("during: round %d: creating %d changefeeds (<%s)", i, changefeeds, time.Minute)) + for j := 0; j < changefeeds; j++ { + stmtWithCursor := fmt.Sprintf(` + CREATE CHANGEFEED FOR tpcc.order_line, tpcc.stock, tpcc.customer + INTO 'null://' WITH cursor = '-%ds' + `, int64(float64(i+1)*padDuration.Seconds())) // scanning as far back as possible (~ when the workload started) + if _, err := db.ExecContext(ctx, stmtWithCursor); err != nil { + return err + } + } + + // TODO(irfansharif): Add a version of this test + // with initial_scan = 'only' to demonstrate the + // need+efficacy of using elastic CPU control in + // changefeed workers. That too has a severe effect + // on scheduling latencies. + } + return nil + }) + + t.Status(fmt.Sprintf("waiting for workload to finish (<%s)", workloadDuration)) + m.Wait() + + return nil + }, + }) + }, + }) +} diff --git a/pkg/cmd/roachtest/tests/tpcc.go b/pkg/cmd/roachtest/tests/tpcc.go index fead65e29d9b..6400a610fcea 100644 --- a/pkg/cmd/roachtest/tests/tpcc.go +++ b/pkg/cmd/roachtest/tests/tpcc.go @@ -211,6 +211,9 @@ func runTPCC(ctx context.Context, t test.Test, c cluster.Cluster, opts tpccOptio var ep *tpccChaosEventProcessor var promCfg *prometheus.Config if !opts.DisablePrometheus { + // TODO(irfansharif): Move this after the import step. The statistics + // during import itself is uninteresting and pollutes actual workload + // data. var cleanupFunc func() promCfg, cleanupFunc = setupPrometheusForRoachtest(ctx, t, c, opts.PrometheusConfig, workloadInstances) defer cleanupFunc() diff --git a/pkg/roachprod/prometheus/prometheus.go b/pkg/roachprod/prometheus/prometheus.go index f35597157788..ac11cb0c0d8f 100644 --- a/pkg/roachprod/prometheus/prometheus.go +++ b/pkg/roachprod/prometheus/prometheus.go @@ -202,7 +202,7 @@ func Init( // NB: when upgrading here, make sure to target a version that picks up this PR: // https://github.com/prometheus/node_exporter/pull/2311 // At time of writing, there hasn't been a release in over half a year. - if err := c.RepeatRun(ctx, l, os.Stdout, os.Stderr, cfg.NodeExporter, + if err := c.RepeatRun(ctx, l, l.Stdout, l.Stderr, cfg.NodeExporter, "download node exporter", ` (sudo systemctl stop node_exporter || true) && @@ -214,7 +214,7 @@ rm -rf node_exporter && mkdir -p node_exporter && curl -fsSL \ } // Start node_exporter. - if err := c.Run(ctx, l, os.Stdout, os.Stderr, cfg.NodeExporter, "init node exporter", + if err := c.Run(ctx, l, l.Stdout, l.Stderr, cfg.NodeExporter, "init node exporter", `cd node_exporter && sudo systemd-run --unit node_exporter --same-dir ./node_exporter`, ); err != nil { @@ -226,8 +226,8 @@ sudo systemd-run --unit node_exporter --same-dir ./node_exporter`, if err := c.RepeatRun( ctx, l, - os.Stdout, - os.Stderr, + l.Stdout, + l.Stderr, cfg.PrometheusNode, "reset prometheus", "sudo systemctl stop prometheus || echo 'no prometheus is running'", @@ -238,8 +238,8 @@ sudo systemd-run --unit node_exporter --same-dir ./node_exporter`, if err := c.RepeatRun( ctx, l, - os.Stdout, - os.Stderr, + l.Stdout, + l.Stderr, cfg.PrometheusNode, "download prometheus", `sudo rm -rf /tmp/prometheus && mkdir /tmp/prometheus && cd /tmp/prometheus && @@ -272,8 +272,8 @@ sudo systemd-run --unit node_exporter --same-dir ./node_exporter`, if err := c.Run( ctx, l, - os.Stdout, - os.Stderr, + l.Stdout, + l.Stderr, cfg.PrometheusNode, "start-prometheus", `cd /tmp/prometheus && @@ -286,8 +286,8 @@ sudo systemd-run --unit prometheus --same-dir \ if cfg.Grafana.Enabled { // Install Grafana. if err := c.RepeatRun(ctx, l, - os.Stdout, - os.Stderr, cfg.PrometheusNode, "install grafana", + l.Stdout, + l.Stderr, cfg.PrometheusNode, "install grafana", `sudo apt-get install -qqy apt-transport-https && sudo apt-get install -qqy software-properties-common wget && wget -q -O - https://packages.grafana.com/gpg.key | sudo apt-key add - && @@ -299,8 +299,8 @@ sudo apt-get update -qqy && sudo apt-get install -qqy grafana-enterprise && sudo // Provision local prometheus instance as data source. if err := c.RepeatRun(ctx, l, - os.Stdout, - os.Stderr, cfg.PrometheusNode, "permissions", + l.Stdout, + l.Stderr, cfg.PrometheusNode, "permissions", `sudo chmod 777 /etc/grafana/provisioning/datasources /etc/grafana/provisioning/dashboards /var/lib/grafana/dashboards /etc/grafana/grafana.ini`, ); err != nil { return nil, err @@ -342,14 +342,14 @@ org_role = Admin for idx, u := range cfg.Grafana.DashboardURLs { cmd := fmt.Sprintf("curl -fsSL %s -o /var/lib/grafana/dashboards/%d.json", u, idx) - if err := c.Run(ctx, l, os.Stdout, os.Stderr, cfg.PrometheusNode, "download dashboard", + if err := c.Run(ctx, l, l.Stdout, l.Stderr, cfg.PrometheusNode, "download dashboard", cmd); err != nil { l.PrintfCtx(ctx, "failed to download dashboard from %s: %s", u, err) } } // Start Grafana. Default port is 3000. - if err := c.Run(ctx, l, os.Stdout, os.Stderr, cfg.PrometheusNode, "start grafana", + if err := c.Run(ctx, l, l.Stdout, l.Stderr, cfg.PrometheusNode, "start grafana", `sudo systemctl restart grafana-server`); err != nil { return nil, err } @@ -371,8 +371,8 @@ func Snapshot( if err := c.Run( ctx, l, - os.Stdout, - os.Stderr, + l.Stdout, + l.Stderr, promNode, "prometheus snapshot", `sudo rm -rf /tmp/prometheus/data/snapshots/* && curl -XPOST http://localhost:9090/api/v1/admin/tsdb/snapshot && @@ -442,13 +442,13 @@ func Shutdown( shutdownErr = errors.CombineErrors(shutdownErr, err) } } - if err := c.Run(ctx, l, os.Stdout, os.Stderr, nodes, "stop node exporter", + if err := c.Run(ctx, l, l.Stdout, l.Stderr, nodes, "stop node exporter", `sudo systemctl stop node_exporter || echo 'Stopped node exporter'`); err != nil { l.Printf("Failed to stop node exporter: %v", err) shutdownErr = errors.CombineErrors(shutdownErr, err) } - if err := c.Run(ctx, l, os.Stdout, os.Stderr, promNode, "stop grafana", + if err := c.Run(ctx, l, l.Stdout, l.Stderr, promNode, "stop grafana", `sudo systemctl stop grafana-server || echo 'Stopped grafana'`); err != nil { l.Printf("Failed to stop grafana server: %v", err) shutdownErr = errors.CombineErrors(shutdownErr, err) @@ -457,8 +457,8 @@ func Shutdown( if err := c.RepeatRun( ctx, l, - os.Stdout, - os.Stderr, + l.Stdout, + l.Stderr, promNode, "stop prometheus", "sudo systemctl stop prometheus || echo 'Stopped prometheus'", From 74ed574b4ff335ace9fb6fe8da4d966e10665039 Mon Sep 17 00:00:00 2001 From: Faizaan Madhani Date: Wed, 12 Oct 2022 11:51:23 -0400 Subject: [PATCH 5/5] sql: add support for `DELETE FROM ... USING` to execbuilder Previously, while the optimizer would generate query plans for sql statements of the form `DELETE FROM ... USING` executing the statement in an instance of CockroachDB may return errors, particularly with statements that included `RETURNING` clauses. This commit adds support to the execbuilder to execute statements of the form `DELETE FROM ... USING`. Release note (sql change): CockroachDB now supports executing statements of the form `DELETE FROM ... USING`. --- pkg/sql/delete.go | 31 ++- pkg/sql/distsql_spec_exec_factory.go | 1 + pkg/sql/logictest/testdata/logic_test/cursor | 1 - pkg/sql/logictest/testdata/logic_test/delete | 200 ++++++++++++++++++- pkg/sql/logictest/testdata/logic_test/views | 1 - pkg/sql/opt/exec/execbuilder/mutation.go | 18 +- pkg/sql/opt/exec/explain/testdata/gists | 8 +- pkg/sql/opt/exec/explain/testdata/gists_tpce | 4 +- pkg/sql/opt/exec/factory.opt | 6 + pkg/sql/opt_exec_factory.go | 7 +- 10 files changed, 260 insertions(+), 17 deletions(-) diff --git a/pkg/sql/delete.go b/pkg/sql/delete.go index 00134b08cd2f..b331c4a16fc7 100644 --- a/pkg/sql/delete.go +++ b/pkg/sql/delete.go @@ -50,8 +50,8 @@ type deleteRun struct { traceKV bool // partialIndexDelValsOffset is the offset of partial index delete - // indicators in the source values. It is equal to the number of fetched - // columns. + // indicators in the source values. It is equal to the sum of the number + // of fetched columns and the number of passthrough columns. partialIndexDelValsOffset int // rowIdxToRetIdx is the mapping from the columns returned by the deleter @@ -60,6 +60,11 @@ type deleteRun struct { // of the mutation. Otherwise, the value at the i-th index refers to the // index of the resultRowBuffer where the i-th column is to be returned. rowIdxToRetIdx []int + + // numPassthrough is the number of columns in addition to the set of columns + // of the target table being returned, that must be passed through from the + // input node. + numPassthrough int } var _ mutationPlanNode = &deleteNode{} @@ -184,12 +189,32 @@ func (d *deleteNode) processSourceRow(params runParams, sourceVals tree.Datums) // d.run.rows.NumCols() is guaranteed to only contain the requested // public columns. resultValues := make(tree.Datums, d.run.td.rows.NumCols()) - for i, retIdx := range d.run.rowIdxToRetIdx { + largestRetIdx := -1 + for i := range d.run.rowIdxToRetIdx { + retIdx := d.run.rowIdxToRetIdx[i] if retIdx >= 0 { + if retIdx >= largestRetIdx { + largestRetIdx = retIdx + } resultValues[retIdx] = sourceVals[i] } } + // At this point we've extracted all the RETURNING values that are part + // of the target table. We must now extract the columns in the RETURNING + // clause that refer to other tables (from the USING clause of the delete). + if d.run.numPassthrough > 0 { + passthroughBegin := len(d.run.td.rd.FetchCols) + passthroughEnd := passthroughBegin + d.run.numPassthrough + passthroughValues := sourceVals[passthroughBegin:passthroughEnd] + + for i := 0; i < d.run.numPassthrough; i++ { + largestRetIdx++ + resultValues[largestRetIdx] = passthroughValues[i] + } + + } + if _, err := d.run.td.rows.AddRow(params.ctx, resultValues); err != nil { return err } diff --git a/pkg/sql/distsql_spec_exec_factory.go b/pkg/sql/distsql_spec_exec_factory.go index 02cc1b8ff64e..84105be01a71 100644 --- a/pkg/sql/distsql_spec_exec_factory.go +++ b/pkg/sql/distsql_spec_exec_factory.go @@ -997,6 +997,7 @@ func (e *distSQLSpecExecFactory) ConstructDelete( table cat.Table, fetchCols exec.TableColumnOrdinalSet, returnCols exec.TableColumnOrdinalSet, + passthrough colinfo.ResultColumns, autoCommit bool, ) (exec.Node, error) { return nil, unimplemented.NewWithIssue(47473, "experimental opt-driven distsql planning: delete") diff --git a/pkg/sql/logictest/testdata/logic_test/cursor b/pkg/sql/logictest/testdata/logic_test/cursor index 733e9a32fddb..cb720f04468d 100644 --- a/pkg/sql/logictest/testdata/logic_test/cursor +++ b/pkg/sql/logictest/testdata/logic_test/cursor @@ -598,4 +598,3 @@ FETCH 1 a b; statement ok COMMIT; - diff --git a/pkg/sql/logictest/testdata/logic_test/delete b/pkg/sql/logictest/testdata/logic_test/delete index 719d5dc5721b..bb881aa6ea35 100644 --- a/pkg/sql/logictest/testdata/logic_test/delete +++ b/pkg/sql/logictest/testdata/logic_test/delete @@ -307,9 +307,6 @@ SELECT x, y, z FROM family 1 1 NULL 3 3 NULL -statement error at or near "where": syntax error: unimplemented: this syntax -DELETE FROM family USING family, other_table WHERE x=2 - # Verify that the fast path does its deletes at the expected timestamp. statement ok CREATE TABLE a (a INT PRIMARY KEY) @@ -337,3 +334,200 @@ SELECT * FROM a AS OF SYSTEM TIME $ts 3 4 5 + +# Test that USING works. + +statement ok +CREATE TABLE u_a ( + a INT NOT NULL PRIMARY KEY, + b STRING, + c INT +) + +statement ok +CREATE TABLE u_b ( + a INT NOT NULL PRIMARY KEY, + b STRING +) + +statement ok +CREATE TABLE u_c ( + a INT NOT NULL PRIMARY KEY, + b STRING, + c INT +) + +statement ok +CREATE TABLE u_d ( + a INT, + b INT +) + +statement ok +INSERT INTO u_a VALUES (1, 'a', 10), (2, 'b', 20), (3, 'c', 30), (4, 'd', 40) + +statement ok +INSERT INTO u_b VALUES (10, 'a'), (20, 'b'), (30, 'c'), (40, 'd') + +statement ok +INSERT INTO u_c VALUES (1, 'a', 10), (2, 'b', 50), (3, 'c', 50), (4, 'd', 40) + +# Test a join with a filter. +statement ok +DELETE FROM u_a USING u_b WHERE c = u_b.a AND u_b.b = 'd' + +query ITI rowsort +SELECT * FROM u_a; +---- +1 a 10 +2 b 20 +3 c 30 + +# Test a self join. +statement ok +INSERT INTO u_a VALUES (5, 'd', 5), (6, 'e', 6) + +statement ok +DELETE FROM u_a USING u_a u_a2 WHERE u_a.a = u_a2.c + +query ITI rowsort +SELECT * FROM u_a; +---- +1 a 10 +2 b 20 +3 c 30 + +# Test when USING uses multiple tables. + +statement ok +INSERT INTO u_c VALUES (30, 'a', 1) + +statement ok +DELETE FROM u_a USING u_b, u_c WHERE u_a.c = u_b.a AND u_a.c = u_c.a + +query ITI rowsort +SELECT * FROM u_a; +---- +1 a 10 +2 b 20 + +# Test if USING works well with RETURNING expressions that reference +# the USING table and target table. +query ITIT colnames,rowsort +DELETE FROM u_a USING u_b WHERE u_a.c = u_b.a RETURNING u_b.a, u_b.b, u_a.a, u_a.b; +---- +a b a b +10 a 1 a +20 b 2 b + +query ITI rowsort +SELECT * FROM u_a; +---- + +statement ok +INSERT INTO u_a VALUES (1, 'a', 10), (2, 'b', 20), (3, 'c', 30), (4, 'd', 40); + +# Test if RETURNING * returns everything. +query ITIITI colnames,rowsort +DELETE FROM u_a USING u_c WHERE u_a.c = u_c.c RETURNING *; +---- +a b c a b c +1 a 10 1 a 10 +4 d 40 4 d 40 + +# Clean u_a to input a new set of data, and to improve test readability. +statement ok +TRUNCATE u_a + +statement ok +INSERT INTO u_a VALUES (1, 'a', 5), (2, 'b', 10), (3, 'c', 15), (4, 'd', 20), (5, 'd', 25), (6, 'd', 30), (7, 'd', 35), (8, 'd', 40), (9, 'd', 45) + +# Using ORDER BY and LIMIT with a `DELETE ... USING` where ORDER BY and LIMIT references the USING +# table is not supported. +# TODO(#89817): Add support in DELETE ... USING for ORDER BY clauses to reference the USING +# table. This is not supported in UPDATE ... FROM either: #89817. +statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions +DELETE FROM u_a AS foo USING u_b AS bar WHERE bar.a > foo.c ORDER BY bar.a DESC LIMIT 3 RETURNING *; + +# Test aliased table names, ORDER BY and LIMIT where ORDER BY references the target +# table. +query ITIIT +DELETE FROM u_a AS foo USING u_b AS bar WHERE bar.a > foo.c ORDER BY foo.a DESC LIMIT 3 RETURNING *; +---- +7 d 35 40 d +6 d 30 40 d +5 d 25 40 d + +query ITI rowsort +SELECT * FROM u_a; +---- +1 a 5 +2 b 10 +3 c 15 +4 d 20 +8 d 40 +9 d 45 + +statement ok +INSERT INTO u_d VALUES (1, 10), (2, 20), (3, 30), (4, 40) + +query IT rowsort +SELECT * FROM u_b; +---- +10 a +20 b +30 c +40 d + +query ITI rowsort +SELECT * FROM u_c; +---- +1 a 10 +2 b 50 +3 c 50 +4 d 40 +30 a 1 + +# Test if DELETE FROM ... USING works with LATERAL. + +statement ok +DELETE FROM u_a USING u_b, LATERAL (SELECT u_c.a, u_c.b, u_c.c FROM u_c WHERE u_b.b = u_c.b) AS other WHERE other.c = 1 AND u_a.c = 35 + +query ITI rowsort +SELECT * FROM u_a +---- +1 a 5 +2 b 10 +3 c 15 +4 d 20 +8 d 40 +9 d 45 + +# Test if DELETE FROM ... USING works with partial indexes. + +statement ok +CREATE TABLE pindex ( + a DECIMAL(10, 2), + INDEX (a) WHERE a > 3 +) + +statement ok +INSERT INTO pindex VALUES (1.0), (2.0), (3.0), (4.0), (5.0), (8.0) + +statement ok +DELETE FROM pindex USING (VALUES (5.0), (6.0)) v(b) WHERE pindex.a = v.b + +query F rowsort +SELECT * FROM pindex; +---- +1.00 +2.00 +3.00 +4.00 +8.00 + +query F rowsort +SELECT a FROM pindex@pindex_a_idx WHERE a > 3 +---- +4.00 +8.00 diff --git a/pkg/sql/logictest/testdata/logic_test/views b/pkg/sql/logictest/testdata/logic_test/views index 61796b891858..a985d90b22bf 100644 --- a/pkg/sql/logictest/testdata/logic_test/views +++ b/pkg/sql/logictest/testdata/logic_test/views @@ -1492,4 +1492,3 @@ SELECT * FROM v; statement ok SET DATABASE = test; - diff --git a/pkg/sql/opt/exec/execbuilder/mutation.go b/pkg/sql/opt/exec/execbuilder/mutation.go index 430a12477e47..f24df385ba7c 100644 --- a/pkg/sql/opt/exec/execbuilder/mutation.go +++ b/pkg/sql/opt/exec/execbuilder/mutation.go @@ -493,25 +493,39 @@ func (b *Builder) buildDelete(del *memo.DeleteExpr) (execPlan, error) { // // TODO(andyk): Using ensureColumns here can result in an extra Render. // Upgrade execution engine to not require this. - colList := make(opt.ColList, 0, len(del.FetchCols)+len(del.PartialIndexDelCols)) + colList := make(opt.ColList, 0, len(del.FetchCols)+len(del.PartialIndexDelCols)+len(del.PassthroughCols)) colList = appendColsWhenPresent(colList, del.FetchCols) colList = appendColsWhenPresent(colList, del.PartialIndexDelCols) + if del.NeedResults() { + colList = append(colList, del.PassthroughCols...) + } + input, err := b.buildMutationInput(del, del.Input, colList, &del.MutationPrivate) if err != nil { return execPlan{}, err } - // Construct the Delete node. md := b.mem.Metadata() tab := md.Table(del.Table) fetchColOrds := ordinalSetFromColList(del.FetchCols) returnColOrds := ordinalSetFromColList(del.ReturnCols) + + //Construct the result columns for the passthrough set + var passthroughCols colinfo.ResultColumns + if del.NeedResults() { + for _, passthroughCol := range del.PassthroughCols { + colMeta := b.mem.Metadata().ColumnMeta(passthroughCol) + passthroughCols = append(passthroughCols, colinfo.ResultColumn{Name: colMeta.Alias, Typ: colMeta.Type}) + } + } + node, err := b.factory.ConstructDelete( input.root, tab, fetchColOrds, returnColOrds, + passthroughCols, b.allowAutoCommit && len(del.FKChecks) == 0 && len(del.FKCascades) == 0, ) if err != nil { diff --git a/pkg/sql/opt/exec/explain/testdata/gists b/pkg/sql/opt/exec/explain/testdata/gists index 50c85a0793ee..dce9996092ec 100644 --- a/pkg/sql/opt/exec/explain/testdata/gists +++ b/pkg/sql/opt/exec/explain/testdata/gists @@ -699,8 +699,8 @@ explain(gist): gist-explain-roundtrip DELETE FROM foo ---- -hash: 5369057709634423529 -plan-gist: AgFqAgAHAAAAI2oB +hash: 17378315733259356217 +plan-gist: AgFqAgAHAAAAI2oAAQ== explain(shape): • delete │ from: foo @@ -722,8 +722,8 @@ explain(gist): gist-explain-roundtrip DELETE FROM foo WHERE a = 1 ---- -hash: 7691685103096689151 -plan-gist: AgFqAgAHAgAAI2oB +hash: 11485970487285265051 +plan-gist: AgFqAgAHAgAAI2oAAQ== explain(shape): • delete │ from: foo diff --git a/pkg/sql/opt/exec/explain/testdata/gists_tpce b/pkg/sql/opt/exec/explain/testdata/gists_tpce index 4e3f741bf4b8..6152776af388 100644 --- a/pkg/sql/opt/exec/explain/testdata/gists_tpce +++ b/pkg/sql/opt/exec/explain/testdata/gists_tpce @@ -210,8 +210,8 @@ update_trade_submitted AS ( ) SELECT * FROM request_list; ---- -hash: 7096273538769246907 -plan-gist: AgGkAQIAHwIAAAcQBRAhpAEAAAcCMAGUAQIAHwAAAAMHCDAxBQIUAJQBAgIBBQgHCAUII5QBAAcCMDEFAgcGBQYwH5IBADEFAhQFkAECAgEqMQUCFAWwAQICASoHAjAxBQIUAJABAgIBBRwHIAUgMCGQAQAAMQUCFAWwAQICASoHAjAxBQgGCA== +hash: 14329018118666014305 +plan-gist: AgGkAQIAHwIAAAcQBRAhpAEAAAcCMAGUAQIAHwAAAAMHCDAxBQIUAJQBAgIBBQgHCAUII5QBAAAHAjAxBQIHBgUGMB+SAQAxBQIUBZABAgIBKjEFAhQFsAECAgEqBwIwMQUCFACQAQICAQUcByAFIDAhkAEAADEFAhQFsAECAgEqBwIwMQUIBgg= explain(shape): • root │ diff --git a/pkg/sql/opt/exec/factory.opt b/pkg/sql/opt/exec/factory.opt index e655a7f5deda..97efd738b4f0 100644 --- a/pkg/sql/opt/exec/factory.opt +++ b/pkg/sql/opt/exec/factory.opt @@ -556,11 +556,17 @@ define Upsert { # The fetchCols set contains the ordinal positions of the fetch columns in # the target table. The input must contain those columns in the same order # as they appear in the table schema. +# +# The passthrough parameter contains all the result columns that are part of +# the input node that the update node needs to return (passing through from +# the input). The pass through columns are used to return any column from the +# USING tables that are referenced in the RETURNING clause. define Delete { Input exec.Node Table cat.Table FetchCols exec.TableColumnOrdinalSet ReturnCols exec.TableColumnOrdinalSet + Passthrough colinfo.ResultColumns # If set, the operator will commit the transaction as part of its execution. # This is false when executing inside an explicit transaction, or there are diff --git a/pkg/sql/opt_exec_factory.go b/pkg/sql/opt_exec_factory.go index 9e6c223c9b7a..7ef2830579a2 100644 --- a/pkg/sql/opt_exec_factory.go +++ b/pkg/sql/opt_exec_factory.go @@ -1669,6 +1669,7 @@ func (ef *execFactory) ConstructDelete( table cat.Table, fetchColOrdSet exec.TableColumnOrdinalSet, returnColOrdSet exec.TableColumnOrdinalSet, + passthrough colinfo.ResultColumns, autoCommit bool, ) (exec.Node, error) { // Derive table and column descriptors. @@ -1696,7 +1697,8 @@ func (ef *execFactory) ConstructDelete( source: input.(planNode), run: deleteRun{ td: tableDeleter{rd: rd, alloc: ef.getDatumAlloc()}, - partialIndexDelValsOffset: len(rd.FetchCols), + partialIndexDelValsOffset: len(rd.FetchCols) + len(passthrough), + numPassthrough: len(passthrough), }, } @@ -1707,6 +1709,9 @@ func (ef *execFactory) ConstructDelete( // order they are defined in the table. del.columns = colinfo.ResultColumnsFromColumns(tabDesc.GetID(), returnCols) + // Add the passthrough columns to the returning columns. + del.columns = append(del.columns, passthrough...) + del.run.rowIdxToRetIdx = row.ColMapping(rd.FetchCols, returnCols) del.run.rowsNeeded = true }