From b9e28cf0745b52765a3dd7be7132b79cdc1fa1af Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 26 Jan 2022 14:32:50 -0500 Subject: [PATCH 1/3] opt: more accurate join multiplicity with FKs and self-joins When calculating join multiplicity in the presence of a foreign key, it was previously possible for the incorrect base table to be selected from the right expression, in the case that the right expression contained a self-join. The base table selected was incorrect because it did not contain the equality column from the join filter. This prevented the optimizer from determining that these joins preserve all rows from the left side of the join, which could prevent further optimization of a query. Now, only the right equality columns are passed to `checkForeignKeyCase` which ensures that the correct base table is selected. This is safe because `verifyFiltersAreValidEqualities` has already checked that the right equality columns are unfiltered in the right expression, so the same checks in `checkForeignKeyCase` are redundant. Release note (performance improvement): The optimizer better optimizes queries that include both foreign key joins and self-joins. --- pkg/sql/opt/memo/multiplicity_builder.go | 68 ++++++++++++------- pkg/sql/opt/memo/multiplicity_builder_test.go | 30 ++++++++ pkg/sql/opt/memo/testdata/logprops/join | 2 +- 3 files changed, 74 insertions(+), 26 deletions(-) diff --git a/pkg/sql/opt/memo/multiplicity_builder.go b/pkg/sql/opt/memo/multiplicity_builder.go index 7952a55163fa..a28369186d3b 100644 --- a/pkg/sql/opt/memo/multiplicity_builder.go +++ b/pkg/sql/opt/memo/multiplicity_builder.go @@ -122,8 +122,8 @@ func deriveUnfilteredCols(in RelExpr) opt.ColSet { right := t.Child(1).(RelExpr) multiplicity := GetJoinMultiplicity(t) - // Use the UnfilteredCols to determine whether unfiltered columns can be - // passed through. + // Use the join's multiplicity to determine whether unfiltered columns + // can be passed through. if multiplicity.JoinPreservesLeftRows(t.Op()) { unfilteredCols.UnionWith(deriveUnfilteredCols(left)) } @@ -267,7 +267,8 @@ func filtersMatchAllLeftRows(left, right RelExpr, filters FiltersExpr) bool { filters, ) } - if !verifyFiltersAreValidEqualities(left, right, filters) { + rightEqualityCols, ok := verifyFiltersAreValidEqualities(left, right, filters) + if !ok { return false } if checkSelfJoinCase(left.Memo().Metadata(), filters) { @@ -276,19 +277,28 @@ func filtersMatchAllLeftRows(left, right RelExpr, filters FiltersExpr) bool { } // Case 2b. return checkForeignKeyCase( - left.Memo().Metadata(), left.Relational().NotNullCols, deriveUnfilteredCols(right), filters) + left.Memo().Metadata(), + left.Relational().NotNullCols, + rightEqualityCols, + filters, + ) } -// verifyFiltersAreValidEqualities returns true when all of the following -// conditions are satisfied: -// 1. All filters are equalities. -// 2. All equalities directly compare two columns. -// 3. All equalities contain one column from the left not-null columns, and one -// column from the right unfiltered columns. -// 4. All equality columns come from a base table. -// 5. All left columns come from a single table, and all right columns come from -// a single table. -func verifyFiltersAreValidEqualities(left, right RelExpr, filters FiltersExpr) bool { +// verifyFiltersAreValidEqualities returns the set of equality columns in the +// right relation and true when all of the following conditions are satisfied: +// +// 1. All filters are equalities. +// 2. All equalities directly compare two columns. +// 3. All equalities contain one column from the left not-null columns, and +// one column from the right unfiltered columns. +// 4. All equality columns come from a base table. +// 5. All left columns come from a single table, and all right columns come +// from a single table. +// +// Returns ok=false if any of these conditions are unsatisfied. +func verifyFiltersAreValidEqualities( + left, right RelExpr, filters FiltersExpr, +) (rightEqualityCols opt.ColSet, ok bool) { md := left.Memo().Metadata() var leftTab, rightTab opt.TableID @@ -296,21 +306,21 @@ func verifyFiltersAreValidEqualities(left, right RelExpr, filters FiltersExpr) b rightUnfilteredCols := deriveUnfilteredCols(right) if rightUnfilteredCols.Empty() { // There are no unfiltered columns from the right input. - return false + return opt.ColSet{}, false } for i := range filters { eq, _ := filters[i].Condition.(*EqExpr) if eq == nil { // Condition #1: Conjunct is not an equality comparison. - return false + return opt.ColSet{}, false } leftVar, _ := eq.Left.(*VariableExpr) rightVar, _ := eq.Right.(*VariableExpr) if leftVar == nil || rightVar == nil { // Condition #2: Conjunct does not directly compare two columns. - return false + return opt.ColSet{}, false } leftColID := leftVar.Col @@ -322,7 +332,7 @@ func verifyFiltersAreValidEqualities(left, right RelExpr, filters FiltersExpr) b } if !leftNotNullCols.Contains(leftColID) || !rightUnfilteredCols.Contains(rightColID) { // Condition #3: Columns don't come from both the left and right ColSets. - return false + return opt.ColSet{}, false } if leftTab == 0 || rightTab == 0 { @@ -331,16 +341,19 @@ func verifyFiltersAreValidEqualities(left, right RelExpr, filters FiltersExpr) b rightTab = md.ColumnMeta(rightColID).Table if leftTab == 0 || rightTab == 0 { // Condition #4: Columns don't come from base tables. - return false + return opt.ColSet{}, false } } if leftTab != md.ColumnMeta(leftColID).Table || rightTab != md.ColumnMeta(rightColID).Table { // Condition #5: The filter conditions reference more than one table from // each side. - return false + return opt.ColSet{}, false } + + rightEqualityCols.Add(rightColID) } - return true + + return rightEqualityCols, true } // checkSelfJoinCase returns true if all equalities in the given FiltersExpr @@ -374,7 +387,9 @@ func checkForeignKeyCase( ) bool { if rightUnfilteredCols.Empty() { // There are no unfiltered columns from the right; a valid foreign key - // relation is not possible. + // relation is not possible. This check, which is a duplicate of a check + // in verifyFiltersAreValidEqualities, is necessary in the case of a + // cross-join because verifyFiltersAreValidEqualities is not called. return false } @@ -419,7 +434,7 @@ func checkForeignKeyCase( rightColID := rightTableID.ColumnID(rightColOrd) if !leftNotNullCols.Contains(leftColID) { // The left column isn't a left not-null output column. It can't be - // used in a filter (since it isn't an output column) but the foreign key may still be valid.but it may still + // used in a filter (since it isn't an output column) but it may still // be provably not null. // // A column that isn't in leftNotNullCols is guaranteed not-null if it @@ -444,8 +459,11 @@ func checkForeignKeyCase( continue } if !rightUnfilteredCols.Contains(rightColID) { - // The right column isn't guaranteed to be unfiltered. It can't be - // used in a filter. + // The right column isn't guaranteed to be unfiltered. It + // can't be used in a filter. This check, which is a + // duplicate of a check in verifyFiltersAreValidEqualities, + // is necessary in the case of a cross-join because + // verifyFiltersAreValidEqualities is not called. continue } if filtersHaveEquality(filters, leftColID, rightColID) { diff --git a/pkg/sql/opt/memo/multiplicity_builder_test.go b/pkg/sql/opt/memo/multiplicity_builder_test.go index bcfd9c97a136..8450f8da41bb 100644 --- a/pkg/sql/opt/memo/multiplicity_builder_test.go +++ b/pkg/sql/opt/memo/multiplicity_builder_test.go @@ -270,6 +270,36 @@ func TestGetJoinMultiplicity(t *testing.T) { on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])), expected: "left-rows(exactly-one)", }, + { // 19 + // SELECT * + // FROM fk_tab + // INNER JOIN (SELECT * FROM xy AS xy1 INNER JOIN xy xy2 ON xy1.x = xy2.x) + // ON r1 = xy1.x; + joinOp: opt.InnerJoinOp, + left: fkScan, + right: ob.makeInnerJoin( + xyScan, + xyScan2, + ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])), + ), + on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])), + expected: "left-rows(exactly-one), right-rows(zero-or-more)", + }, + { // 20 + // SELECT * + // FROM fk_tab + // INNER JOIN (SELECT * FROM xy AS xy1 INNER JOIN xy xy2 ON xy1.x = xy2.x) + // ON r1 = xy2.x; + joinOp: opt.InnerJoinOp, + left: fkScan, + right: ob.makeInnerJoin( + xyScan, + xyScan2, + ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])), + ), + on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols2[0])), + expected: "left-rows(exactly-one), right-rows(zero-or-more)", + }, } for i, tc := range testCases { diff --git a/pkg/sql/opt/memo/testdata/logprops/join b/pkg/sql/opt/memo/testdata/logprops/join index 1ed4d0a0f1f4..d41cb39e3707 100644 --- a/pkg/sql/opt/memo/testdata/logprops/join +++ b/pkg/sql/opt/memo/testdata/logprops/join @@ -2224,7 +2224,7 @@ INNER JOIN (SELECT xysd.x, a.x AS t FROM xysd INNER JOIN xysd AS a ON xysd.x = a ---- inner-join (hash) ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:7(int!null) t:13(int!null) - ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more) + ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) ├── key: (1) ├── fd: (1)-->(2-4), (7)==(3,13), (13)==(3,7), (3)==(7,13) ├── prune: (1,2,4) From 128eb4a8b1e7056f47adf370a94868522dd1406f Mon Sep 17 00:00:00 2001 From: Thomas Hardy Date: Wed, 19 Jan 2022 11:02:19 -0500 Subject: [PATCH 2/3] sql: pass statement request id to backend Previously, the frontend passed a statement fingerprint to the API to update the corresponding statement diagnostic request to an expired state. Now we pass the statement diagnostic ID directly. This resolves a SQL parsing error that occurs when a statement fingerprint contains single-quotes (i.e. when a statement fingerprint contains placeholders). Release note: None --- docs/generated/http/full.md | 2 +- pkg/server/serverpb/status.proto | 2 +- pkg/server/statement_diagnostics_requests.go | 2 +- pkg/server/status.go | 2 +- pkg/sql/stmtdiagnostics/statement_diagnostics.go | 14 ++++++++------ .../stmtdiagnostics/statement_diagnostics_test.go | 6 +++--- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index 032447f8eed6..ae920cd75c34 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -3687,7 +3687,7 @@ Support status: [reserved](#support-status) | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | -| statement_fingerprint | [string](#cockroach.server.serverpb.CancelStatementDiagnosticsReportRequest-string) | | | [reserved](#support-status) | +| request_id | [int64](#cockroach.server.serverpb.CancelStatementDiagnosticsReportRequest-int64) | | | [reserved](#support-status) | diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 4bc5ae8a7a9c..d2f3cf665f35 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -1286,7 +1286,7 @@ message CreateStatementDiagnosticsReportResponse { } message CancelStatementDiagnosticsReportRequest { - string statement_fingerprint = 1; + int64 request_id = 1 [(gogoproto.customname) = "RequestID"]; } message CancelStatementDiagnosticsReportResponse { diff --git a/pkg/server/statement_diagnostics_requests.go b/pkg/server/statement_diagnostics_requests.go index 58443d31f8c7..3b2a042b1b16 100644 --- a/pkg/server/statement_diagnostics_requests.go +++ b/pkg/server/statement_diagnostics_requests.go @@ -106,7 +106,7 @@ func (s *statusServer) CancelStatementDiagnosticsReport( } var response serverpb.CancelStatementDiagnosticsReportResponse - err := s.stmtDiagnosticsRequester.CancelRequest(ctx, req.StatementFingerprint) + err := s.stmtDiagnosticsRequester.CancelRequest(ctx, req.RequestID) if err != nil { response.Canceled = false response.Error = err.Error() diff --git a/pkg/server/status.go b/pkg/server/status.go index f501b97b3eff..fab322a0fba1 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -396,7 +396,7 @@ type StmtDiagnosticsRequester interface { // CancelRequest updates an entry in system.statement_diagnostics_requests // for tracing a query with the given fingerprint to be expired (thus, // canceling any new tracing for it). - CancelRequest(ctx context.Context, stmtFingerprint string) error + CancelRequest(ctx context.Context, requestID int64) error } // newStatusServer allocates and returns a statusServer. diff --git a/pkg/sql/stmtdiagnostics/statement_diagnostics.go b/pkg/sql/stmtdiagnostics/statement_diagnostics.go index 30fc6ec6d4a3..c40a36c99346 100644 --- a/pkg/sql/stmtdiagnostics/statement_diagnostics.go +++ b/pkg/sql/stmtdiagnostics/statement_diagnostics.go @@ -380,7 +380,7 @@ func (r *Registry) insertRequestInternal( } // CancelRequest is part of the server.StmtDiagnosticsRequester interface. -func (r *Registry) CancelRequest(ctx context.Context, stmtFingerprint string) error { +func (r *Registry) CancelRequest(ctx context.Context, requestID int64) error { g, err := r.gossip.OptionalErr(48274) if err != nil { return err @@ -402,19 +402,21 @@ func (r *Registry) CancelRequest(ctx context.Context, stmtFingerprint string) er // request as "expired" by setting `expires_at` into the past. This will // allow any queries that are currently being traced for this request to // write their collected bundles. - fmt.Sprintf("UPDATE system.statement_diagnostics_requests SET expires_at = '1970-01-01' "+ - "WHERE completed = false AND statement_fingerprint = '%s' "+ - "AND (expires_at IS NULL OR expires_at > now()) RETURNING id;", stmtFingerprint), + "UPDATE system.statement_diagnostics_requests SET expires_at = '1970-01-01' "+ + "WHERE completed = false AND id = $1 "+ + "AND (expires_at IS NULL OR expires_at > now()) RETURNING id;", + requestID, ) if err != nil { return err } + if row == nil { // There is no pending diagnostics request with the given fingerprint. - return errors.Newf("no pending request found for the fingerprint: %s", stmtFingerprint) + return errors.Newf("no pending request found for the fingerprint: %s", requestID) } - reqID := RequestID(tree.MustBeDInt(row[0])) + reqID := RequestID(requestID) r.cancelRequest(reqID) // Notify all the other nodes that this request has been canceled. diff --git a/pkg/sql/stmtdiagnostics/statement_diagnostics_test.go b/pkg/sql/stmtdiagnostics/statement_diagnostics_test.go index 07f356369867..000f4610758e 100644 --- a/pkg/sql/stmtdiagnostics/statement_diagnostics_test.go +++ b/pkg/sql/stmtdiagnostics/statement_diagnostics_test.go @@ -202,7 +202,7 @@ func TestDiagnosticsRequest(t *testing.T) { // Verify that an error is returned when attempting to cancel non-existent // request. t.Run("cancel non-existent request", func(t *testing.T) { - require.NotNil(t, registry.CancelRequest(ctx, "foo")) + require.NotNil(t, registry.CancelRequest(ctx, 123456789)) }) // Verify that if a request (either conditional or unconditional, w/ or w/o @@ -227,7 +227,7 @@ func TestDiagnosticsRequest(t *testing.T) { require.NoError(t, err) checkNotCompleted(reqID) - err = registry.CancelRequest(ctx, fprint) + err = registry.CancelRequest(ctx, reqID) require.NoError(t, err) checkNotCompleted(reqID) @@ -271,7 +271,7 @@ func TestDiagnosticsRequest(t *testing.T) { go func() { defer wg.Done() <-waitCh - err := registry.CancelRequest(ctx, fprint) + err := registry.CancelRequest(ctx, reqID) require.NoError(t, err) }() From 40390d6a899ace08e0fa99f42f28c5592c6f2e9c Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 26 Jan 2022 17:07:32 -0500 Subject: [PATCH 3/3] opt: push limit into FK and self-joins in more cases Previously, a constant equality condition pushed into both sides of a foreign key join or self-join would prevent a limit from being pushed into the left side of the join. This was because the multiplicity builder could not determine that right filter would not remove any values also removed by the left filter. Without the join being labelled as left-preserving, the limit could not be pushed down. The multiplicity builder has been updated to recognize a few additional cases where left rows are preserved in foreign key joins and self-joins, allowing a limit to be pushed into the left side of the join. Currently, the multiplicity builder only recognizes cases where corresponding left and right columns are held equal to the same constant value. It is possible to extend this to more complex inequalities and boolean expressions, but this is left as a TODO for now. Fixes #74419 Release note (performance improvement): A LIMIT can now be pushed below a foreign key join or self-join in more cases, which may result in more efficient query plans. --- pkg/sql/opt/memo/multiplicity_builder.go | 120 ++++++++++++++-- pkg/sql/opt/memo/multiplicity_builder_test.go | 136 +++++++++++++++++- pkg/sql/opt/memo/testdata/logprops/join | 105 +++++++++++++- pkg/sql/opt/norm/testdata/rules/limit | 43 ++++++ 4 files changed, 386 insertions(+), 18 deletions(-) diff --git a/pkg/sql/opt/memo/multiplicity_builder.go b/pkg/sql/opt/memo/multiplicity_builder.go index a28369186d3b..0f13a212d679 100644 --- a/pkg/sql/opt/memo/multiplicity_builder.go +++ b/pkg/sql/opt/memo/multiplicity_builder.go @@ -234,13 +234,18 @@ func filtersMatchLeftRowsAtMostOnce(left, right RelExpr, filters FiltersExpr) bo // must come from the same foreign key. // // In both the self-join and the foreign key cases, the left columns must be -// not-null, and the right columns must be unfiltered. +// not-null, and the right columns must be either unfiltered, or the left and +// right must be Select expressions where the left side filters imply the right +// side filters and right columns are unfiltered in the right Select's input +// (see condition #3b in the comment for verifyFiltersAreValidEqualities). // -// Why do the left columns have to be not-null and the right columns -// unfiltered? In both the self-join and the foreign-key cases, a non-null -// value in the left column guarantees a corresponding value in the right -// column. As long as no nulls have been added to the left column and no values -// have been removed from the right, this property will be valid. +// Why do the left columns have to be non-null, and the right columns unfiltered +// or filtered identically as their corresponding left column? In both the +// self-join and the foreign-key cases, a non-null value in the left column +// guarantees a corresponding value in the right column. As long as no nulls +// have been added to the left column and no values have been removed from the +// right that have not also been removed from the left, this property will be +// valid. // // Why do all foreign key columns in the foreign key case have to come from the // same foreign key? Equalities on different foreign keys may each be @@ -285,12 +290,16 @@ func filtersMatchAllLeftRows(left, right RelExpr, filters FiltersExpr) bool { } // verifyFiltersAreValidEqualities returns the set of equality columns in the -// right relation and true when all of the following conditions are satisfied: +// right relation and true when all the following conditions are satisfied: // // 1. All filters are equalities. // 2. All equalities directly compare two columns. -// 3. All equalities contain one column from the left not-null columns, and -// one column from the right unfiltered columns. +// 3. All equalities x=y (or y=x) have x as a left non-null column and y as a +// right column, and either: +// a. y is an unfiltered column in the right expression, or +// b. both the left and right expressions are Selects; the left side +// filters imply the right side filters when replacing x with y; and y +// is an unfiltered column in the right Select's input. // 4. All equality columns come from a base table. // 5. All left columns come from a single table, and all right columns come // from a single table. @@ -304,10 +313,6 @@ func verifyFiltersAreValidEqualities( var leftTab, rightTab opt.TableID leftNotNullCols := left.Relational().NotNullCols rightUnfilteredCols := deriveUnfilteredCols(right) - if rightUnfilteredCols.Empty() { - // There are no unfiltered columns from the right input. - return opt.ColSet{}, false - } for i := range filters { eq, _ := filters[i].Condition.(*EqExpr) @@ -329,9 +334,21 @@ func verifyFiltersAreValidEqualities( // Normalize leftColID to come from leftColIDs. if !leftNotNullCols.Contains(leftColID) { leftColID, rightColID = rightColID, leftColID + if !leftNotNullCols.Contains(leftColID) { + // Condition #3: Left column is not guaranteed to be non-null. + return opt.ColSet{}, false + } } - if !leftNotNullCols.Contains(leftColID) || !rightUnfilteredCols.Contains(rightColID) { - // Condition #3: Columns don't come from both the left and right ColSets. + + switch { + case rightUnfilteredCols.Contains(rightColID): + // Condition #3a: the right column is unfiltered. + case rightHasSingleFilterThatMatchesLeft(left, right, leftColID, rightColID): + // Condition #3b: The left and right are Selects where the left filters + // imply the right filters when replacing the left column with the right + // column, and the right column is unfiltered in the right Select's + // input. + default: return opt.ColSet{}, false } @@ -356,6 +373,79 @@ func verifyFiltersAreValidEqualities( return rightEqualityCols, true } +// rightHasSingleFilterThatMatchesLeft returns true if: +// +// 1. Both left and right are Select expressions. +// 2. rightCol is unfiltered in right's input. +// 3. The left Select has a filter in the form leftCol=const. +// 4. The right Select has a single filter in the form rightCol=const where +// the const value is the same as the const value in (2). +// +// This function is used by verifyFiltersAreValidEqualities to try to prove that +// every row in the left input of a join will have a match in the right input +// (see condition #3b in the comment of verifyFiltersAreValidEqualities). +// +// TODO(mgartner): Extend this to return true when the left filters imply the +// right filters, after remapping leftCol to rightCol in the left filters. For +// example, leftCol<10 implies rightCol<20 when leftCol and rightCol are held +// equal by the join filters. This may be a good opportunity to reuse +// partialidx.Implicator. Be aware that it might not be possible to simply +// replace columns in a filter when one of the columns has a composite type. +func rightHasSingleFilterThatMatchesLeft(left, right RelExpr, leftCol, rightCol opt.ColumnID) bool { + leftSelect, ok := left.(*SelectExpr) + if !ok { + return false + } + rightSelect, ok := right.(*SelectExpr) + if !ok { + return false + } + + // Return false if the right column has been filtered in the input to + // rightSelect. + rightUnfilteredCols := deriveUnfilteredCols(rightSelect.Input) + if !rightUnfilteredCols.Contains(rightCol) { + return false + } + + // Return false if rightSelect has more than one filter. + if len(rightSelect.Filters) > 1 { + return false + } + + // constValueForCol searches for an expression in the form + // (Eq (Var col) Const) and returns the Const expression, if one is found. + constValueForCol := func(filters FiltersExpr, col opt.ColumnID) (_ *ConstExpr, ok bool) { + var constant *ConstExpr + for i := range filters { + if !filters[i].ScalarProps().OuterCols.Contains(col) { + continue + } + eq, _ := filters[i].Condition.(*EqExpr) + if eq == nil { + continue + } + v, _ := eq.Left.(*VariableExpr) + c, _ := eq.Right.(*ConstExpr) + if v == nil || v.Col != col || c == nil { + continue + } + constant = c + } + return constant, constant != nil + } + + leftConst, ok := constValueForCol(leftSelect.Filters, leftCol) + if !ok { + return false + } + rightConst, ok := constValueForCol(rightSelect.Filters, rightCol) + if !ok { + return false + } + return leftConst == rightConst +} + // checkSelfJoinCase returns true if all equalities in the given FiltersExpr // are between columns from the same position in the same base table. Panics // if verifyFilters is not checked first. diff --git a/pkg/sql/opt/memo/multiplicity_builder_test.go b/pkg/sql/opt/memo/multiplicity_builder_test.go index 8450f8da41bb..e4ac1291c085 100644 --- a/pkg/sql/opt/memo/multiplicity_builder_test.go +++ b/pkg/sql/opt/memo/multiplicity_builder_test.go @@ -19,6 +19,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat" "github.com/cockroachdb/cockroach/pkg/sql/parser" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/errors" ) @@ -67,6 +68,7 @@ func TestGetJoinMultiplicity(t *testing.T) { xyScan, xyCols := ob.xyScan() xyScan2, xyCols2 := ob.xyScan() + xyScanFiltered, xyColsFiltered := ob.makeFilteredScan("xy") uvScan, uvCols := ob.uvScan() fkScan, fkCols := ob.fkScan() abcScan, abcCols := ob.abcScan() @@ -300,6 +302,108 @@ func TestGetJoinMultiplicity(t *testing.T) { on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols2[0])), expected: "left-rows(exactly-one), right-rows(zero-or-more)", }, + { // 21 + // SELECT * FROM fk_tab INNER JOIN xy ON r1 = x WHERE r1 = 5 AND x = 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))), + right: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))), + on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])), + expected: "left-rows(exactly-one), right-rows(zero-or-more)", + }, + { // 22 + // SELECT * FROM xy INNER JOIN xy AS xy2 ON xy.x = xy2.x WHERE xy.x = 5 AND xy2.x = 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))), + right: ob.makeSelect(xyScan2, ob.makeFilters(ob.makeConstEquality(xyCols2[0], 5))), + on: ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])), + expected: "left-rows(exactly-one), right-rows(exactly-one)", + }, + { // 23 + // SELECT * FROM fk_tab INNER JOIN xy ON r1 = x WHERE r1 = 5 AND x >= 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))), + right: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstInequality(xyCols[0], 5))), + on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])), + expected: "left-rows(zero-or-one), right-rows(zero-or-more)", + }, + { // 24 + // SELECT * FROM xy INNER JOIN xy AS xy2 ON xy.x = xy2.x WHERE xy.x = 5 AND xy2.x >= 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))), + right: ob.makeSelect(xyScan2, ob.makeFilters(ob.makeConstInequality(xyCols2[0], 5))), + on: ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])), + expected: "left-rows(zero-or-one), right-rows(zero-or-one)", + }, + { // 25 + // SELECT * FROM fk_tab INNER JOIN xy ON r1 = x WHERE r1 = 5 AND y = 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))), + right: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[1], 5))), + on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])), + expected: "left-rows(zero-or-one), right-rows(zero-or-more)", + }, + { // 26 + // SELECT * FROM xy INNER JOIN xy AS xy2 ON xy.x = xy2.x WHERE xy.x = 5 AND xy2.y = 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))), + right: ob.makeSelect(xyScan2, ob.makeFilters(ob.makeConstEquality(xyCols2[1], 5))), + on: ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])), + expected: "left-rows(zero-or-one), right-rows(zero-or-one)", + }, + { // 27 + // SELECT * FROM fk_tab INNER JOIN ( + // SELECT * FROM xy WHERE x = 5 LIMIT 10 + // ) AS xy2 ON r1 = x + // WHERE xy.x = 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))), + right: ob.makeSelect(xyScanFiltered, ob.makeFilters( + ob.makeConstEquality(xyColsFiltered[0], 5), + )), + on: ob.makeFilters(ob.makeEquality(fkCols[0], xyColsFiltered[0])), + expected: "left-rows(zero-or-one), right-rows(zero-or-more)", + }, + { // 28 + // SELECT * FROM xy INNER JOIN ( + // SELECT * FROM xy WHERE x = 5 LIMIT 10 + // ) AS xy2 ON xy.x = xy2.x + // WHERE xy.x = 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))), + right: ob.makeSelect(xyScanFiltered, ob.makeFilters( + ob.makeConstEquality(xyColsFiltered[0], 5), + )), + on: ob.makeFilters(ob.makeEquality(xyCols[0], xyColsFiltered[0])), + expected: "left-rows(zero-or-one), right-rows(exactly-one)", + }, + { // 29 + // SELECT * FROM fk_tab INNER JOIN ( + // SELECT * FROM xy WHERE x = 5 AND y = 2 + // ) AS xy2 ON r1 = x + // WHERE xy.x = 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(fkScan, ob.makeFilters(ob.makeConstEquality(fkCols[0], 5))), + right: ob.makeSelect(xyScan, ob.makeFilters( + ob.makeConstEquality(xyCols[0], 5), + ob.makeConstEquality(xyCols[1], 2), + )), + on: ob.makeFilters(ob.makeEquality(fkCols[0], xyCols[0])), + expected: "left-rows(zero-or-one), right-rows(zero-or-more)", + }, + { // 30 + // SELECT * FROM xy INNER JOIN ( + // SELECT * FROM xy WHERE x = 5 AND y = 2 + // ) AS xy2 ON xy.x = xy2.x + // WHERE xy.x = 5; + joinOp: opt.InnerJoinOp, + left: ob.makeSelect(xyScan, ob.makeFilters(ob.makeConstEquality(xyCols[0], 5))), + right: ob.makeSelect(xyScan2, ob.makeFilters( + ob.makeConstEquality(xyCols2[0], 5), + ob.makeConstEquality(xyCols2[1], 2), + )), + on: ob.makeFilters(ob.makeEquality(xyCols[0], xyCols2[0])), + expected: "left-rows(zero-or-one), right-rows(exactly-one)", + }, } for i, tc := range testCases { @@ -308,7 +412,7 @@ func TestGetJoinMultiplicity(t *testing.T) { joinWithMult, _ := join.(joinWithMultiplicity) multiplicity := joinWithMult.getMultiplicity() if multiplicity.Format(tc.joinOp) != tc.expected { - t.Fatalf("\nexpected: %s\nactual: %s", tc.expected, multiplicity.Format(tc.joinOp)) + t.Errorf("\nexpected: %s\nactual: %s", tc.expected, multiplicity.Format(tc.joinOp)) } }) } @@ -355,6 +459,18 @@ func (ob *testOpBuilder) createTables(stmts string) { } func (ob *testOpBuilder) makeScan(tableName tree.Name) (scan RelExpr, vars []*VariableExpr) { + return ob.makeScanImpl(tableName, false /* filtered */) +} + +func (ob *testOpBuilder) makeFilteredScan( + tableName tree.Name, +) (scan RelExpr, vars []*VariableExpr) { + return ob.makeScanImpl(tableName, true /* filtered */) +} + +func (ob *testOpBuilder) makeScanImpl( + tableName tree.Name, filtered bool, +) (scan RelExpr, vars []*VariableExpr) { tn := tree.NewUnqualifiedTableName(tableName) tab := ob.cat.Table(tn) tabID := ob.mem.Metadata().AddTable(tab, tn) @@ -365,7 +481,11 @@ func (ob *testOpBuilder) makeScan(tableName tree.Name) (scan RelExpr, vars []*Va newVar := ob.mem.MemoizeVariable(col) vars = append(vars, newVar) } - return ob.mem.MemoizeScan(&ScanPrivate{Table: tabID, Cols: cols}), vars + sp := &ScanPrivate{Table: tabID, Cols: cols} + if filtered { + sp.HardLimit = 10 + } + return ob.mem.MemoizeScan(sp), vars } func (ob *testOpBuilder) xyScan() (scan RelExpr, vars []*VariableExpr) { @@ -392,6 +512,10 @@ func (ob *testOpBuilder) oneNullMultiColFKScan() (scan RelExpr, vars []*Variable return ob.makeScan("one_null_multi_col_fk_tab") } +func (ob *testOpBuilder) makeSelect(input RelExpr, filters FiltersExpr) RelExpr { + return ob.mem.MemoizeSelect(input, filters) +} + func (ob *testOpBuilder) makeInnerJoin(left, right RelExpr, on FiltersExpr) RelExpr { return ob.mem.MemoizeInnerJoin(left, right, on, EmptyJoinPrivate) } @@ -433,6 +557,14 @@ func (ob *testOpBuilder) makeEquality(left, right *VariableExpr) opt.ScalarExpr return ob.mem.MemoizeEq(left, right) } +func (ob *testOpBuilder) makeConstEquality(v *VariableExpr, c int) opt.ScalarExpr { + return ob.mem.MemoizeEq(v, ob.mem.MemoizeConst(tree.NewDInt(tree.DInt(c)), types.Int)) +} + +func (ob *testOpBuilder) makeConstInequality(v *VariableExpr, c int) opt.ScalarExpr { + return ob.mem.MemoizeGe(v, ob.mem.MemoizeConst(tree.NewDInt(tree.DInt(c)), types.Int)) +} + func (ob *testOpBuilder) makeFilters(conditions ...opt.ScalarExpr) (filters FiltersExpr) { for i := range conditions { filtersItem := FiltersItem{Condition: conditions[i]} diff --git a/pkg/sql/opt/memo/testdata/logprops/join b/pkg/sql/opt/memo/testdata/logprops/join index d41cb39e3707..afeca680c877 100644 --- a/pkg/sql/opt/memo/testdata/logprops/join +++ b/pkg/sql/opt/memo/testdata/logprops/join @@ -2023,6 +2023,57 @@ inner-join (hash) ├── variable: x:7 [type=int] └── variable: r1:3 [type=int] +# LeftJoin case with a not-null foreign key and a constant equality filter. The +# filter pushed down on both sides of the join is redundant, so all rows on the +# left side of the join will be preserved. +norm +SELECT * FROM fk LEFT JOIN xysd ON x = r1 WHERE r1 = 10 +---- +inner-join (hash) + ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:7(int!null) y:8(int) s:9(string) d:10(decimal!null) + ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more) + ├── key: (1) + ├── fd: ()-->(3,7-10), (1)-->(2,4), (3)==(7), (7)==(3) + ├── prune: (1,2,4,8-10) + ├── interesting orderings: (+1 opt(3)) + ├── select + │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ ├── key: (1) + │ ├── fd: ()-->(3), (1)-->(2,4) + │ ├── prune: (1,2,4) + │ ├── interesting orderings: (+1 opt(3)) + │ ├── scan fk + │ │ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) + │ │ ├── key: (1) + │ │ ├── fd: (1)-->(2-4) + │ │ ├── prune: (1-4) + │ │ ├── interesting orderings: (+1) + │ │ └── unfiltered-cols: (1-6) + │ └── filters + │ └── eq [type=bool, outer=(3), constraints=(/3: [/10 - /10]; tight), fd=()-->(3)] + │ ├── variable: r1:3 [type=int] + │ └── const: 10 [type=int] + ├── select + │ ├── columns: x:7(int!null) y:8(int) s:9(string) d:10(decimal!null) + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ ├── fd: ()-->(7-10) + │ ├── prune: (8-10) + │ ├── scan xysd + │ │ ├── columns: x:7(int!null) y:8(int) s:9(string) d:10(decimal!null) + │ │ ├── key: (7) + │ │ ├── fd: (7)-->(8-10), (9,10)~~>(7,8) + │ │ ├── prune: (7-10) + │ │ ├── interesting orderings: (+7) (-9,+10,+7) + │ │ └── unfiltered-cols: (7-12) + │ └── filters + │ └── eq [type=bool, outer=(7), constraints=(/7: [/10 - /10]; tight), fd=()-->(7)] + │ ├── variable: x:7 [type=int] + │ └── const: 10 [type=int] + └── filters + └── eq [type=bool, outer=(3,7), constraints=(/3: (/NULL - ]; /7: (/NULL - ]), fd=(3)==(7), (7)==(3)] + ├── variable: x:7 [type=int] + └── variable: r1:3 [type=int] # LeftJoin case with a nullable foreign key. The LeftJoin cannot be simplified # because a nullable foreign key is not guaranteed matches. @@ -2121,6 +2172,58 @@ inner-join (hash) ├── variable: xysd.x:1 [type=int] └── variable: a.x:7 [type=int] +# Self-join case with a constant equality filter. The filter pushed down on both +# sides of the join is redundant, so all rows on the left side of the join will +# be preserved. +norm +SELECT * FROM xysd INNER JOIN xysd AS a ON xysd.x = a.x WHERE xysd.x = 10 +---- +inner-join (hash) + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) x:7(int!null) y:8(int) s:9(string) d:10(decimal!null) + ├── cardinality: [0 - 1] + ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one) + ├── key: () + ├── fd: ()-->(1-4,7-10), (7)==(1), (1)==(7) + ├── prune: (2-4,8-10) + ├── select + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ ├── fd: ()-->(1-4) + │ ├── prune: (2-4) + │ ├── scan xysd + │ │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) + │ │ ├── key: (1) + │ │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2) + │ │ ├── prune: (1-4) + │ │ ├── interesting orderings: (+1) (-3,+4,+1) + │ │ └── unfiltered-cols: (1-6) + │ └── filters + │ └── eq [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)] + │ ├── variable: xysd.x:1 [type=int] + │ └── const: 10 [type=int] + ├── select + │ ├── columns: a.x:7(int!null) a.y:8(int) a.s:9(string) a.d:10(decimal!null) + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ ├── fd: ()-->(7-10) + │ ├── prune: (8-10) + │ ├── scan xysd [as=a] + │ │ ├── columns: a.x:7(int!null) a.y:8(int) a.s:9(string) a.d:10(decimal!null) + │ │ ├── key: (7) + │ │ ├── fd: (7)-->(8-10), (9,10)~~>(7,8) + │ │ ├── prune: (7-10) + │ │ ├── interesting orderings: (+7) (-9,+10,+7) + │ │ └── unfiltered-cols: (7-12) + │ └── filters + │ └── eq [type=bool, outer=(7), constraints=(/7: [/10 - /10]; tight), fd=()-->(7)] + │ ├── variable: a.x:7 [type=int] + │ └── const: 10 [type=int] + └── filters + └── eq [type=bool, outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)] + ├── variable: xysd.x:1 [type=int] + └── variable: a.x:7 [type=int] + # Case with duplicated referenced columns. norm SELECT * FROM @@ -2444,7 +2547,7 @@ inner-join (hash) norm SELECT * FROM ref -INNER JOIN abc +INNER JOIN abc ON (r1, r2, r3) = (a, b, c) ---- inner-join (hash) diff --git a/pkg/sql/opt/norm/testdata/rules/limit b/pkg/sql/opt/norm/testdata/rules/limit index 347f2e4defbc..36448d5c02c0 100644 --- a/pkg/sql/opt/norm/testdata/rules/limit +++ b/pkg/sql/opt/norm/testdata/rules/limit @@ -1088,6 +1088,49 @@ left-join (hash) └── filters └── a:1 = u:5 [outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] +# Push the limit if both sides of an inner join have identical equality filters +# on FK equality columns. +norm expect=PushLimitIntoJoinLeft +SELECT * FROM kvr_fk INNER JOIN uv ON r = u WHERE r = 5 LIMIT 10 +---- +inner-join (hash) + ├── columns: k:1!null v:2 r:3!null u:6!null v:7 + ├── cardinality: [0 - 10] + ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more) + ├── key: (1) + ├── fd: ()-->(3,6,7), (1)-->(2), (3)==(6), (6)==(3) + ├── limit + │ ├── columns: k:1!null kvr_fk.v:2 r:3!null + │ ├── cardinality: [0 - 10] + │ ├── key: (1) + │ ├── fd: ()-->(3), (1)-->(2) + │ ├── select + │ │ ├── columns: k:1!null kvr_fk.v:2 r:3!null + │ │ ├── key: (1) + │ │ ├── fd: ()-->(3), (1)-->(2) + │ │ ├── limit hint: 10.00 + │ │ ├── scan kvr_fk + │ │ │ ├── columns: k:1!null kvr_fk.v:2 r:3!null + │ │ │ ├── key: (1) + │ │ │ ├── fd: (1)-->(2,3) + │ │ │ └── limit hint: 1000.00 + │ │ └── filters + │ │ └── r:3 = 5 [outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)] + │ └── 10 + ├── select + │ ├── columns: u:6!null uv.v:7 + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ ├── fd: ()-->(6,7) + │ ├── scan uv + │ │ ├── columns: u:6!null uv.v:7 + │ │ ├── key: (6) + │ │ └── fd: (6)-->(7) + │ └── filters + │ └── u:6 = 5 [outer=(6), constraints=(/6: [/5 - /5]; tight), fd=()-->(6)] + └── filters + └── r:3 = u:6 [outer=(3,6), constraints=(/3: (/NULL - ]; /6: (/NULL - ]), fd=(3)==(6), (6)==(3)] + # Don't push negative limits (or we would enter an infinite loop). norm expect-not=PushLimitIntoJoinLeft SELECT * FROM ab LEFT JOIN uv ON a = u LIMIT -1