diff --git a/pkg/ccl/logictestccl/testdata/logic_test/multi_region_foreign_key_lookup_join b/pkg/ccl/logictestccl/testdata/logic_test/multi_region_foreign_key_lookup_join new file mode 100644 index 000000000000..b4a0c0e267e9 --- /dev/null +++ b/pkg/ccl/logictestccl/testdata/logic_test/multi_region_foreign_key_lookup_join @@ -0,0 +1,237 @@ +# tenant-cluster-setting-override-opt: allow-multi-region-abstractions-for-secondary-tenants +# LogicTest: multiregion-9node-3region-3azs !metamorphic-batch-sizes + +# Set the closed timestamp interval to be short to shorten the amount of time +# we need to wait for the system config to propagate. +statement ok +SET CLUSTER SETTING kv.closed_timestamp.side_transport_interval = '10ms'; + +statement ok +SET CLUSTER SETTING kv.closed_timestamp.target_duration = '10ms'; + +statement ok +CREATE DATABASE multi_region_test_db PRIMARY REGION "ca-central-1" REGIONS "ap-southeast-2", "us-east-1"; + +statement ok +USE multi_region_test_db + +statement ok +CREATE TABLE parent ( + p_id INT PRIMARY KEY, + cr crdb_internal_region NOT NULL DEFAULT gateway_region()::crdb_internal_region, + p_data string, + p_int INT, + p_text TEXT, + p_json JSON, + UNIQUE INDEX (p_data), + INVERTED INDEX (p_int, p_json), + INVERTED INDEX (p_int, p_text gin_trgm_ops) +) LOCALITY REGIONAL BY ROW AS cr; + +statement ok +CREATE TABLE child ( + c_id INT PRIMARY KEY, + c_p_id INT, + cr crdb_internal_region NOT NULL DEFAULT gateway_region()::crdb_internal_region, + c_data string, + c_int INt, + c_json JSON, + FOREIGN KEY (c_p_id, cr) REFERENCES parent (p_id, cr) +) LOCALITY REGIONAL BY ROW AS cr; + +# A 'cr = cr' condition should be derived for this join. +query T retry +EXPLAIN SELECT * +FROM child +INNER LOOKUP JOIN parent ON c_p_id = p_id +---- +distribution: local +vectorized: true +· +• lookup join +│ table: parent@parent_pkey +│ equality: (cr, c_p_id) = (cr,p_id) +│ equality cols are key +│ +└── • scan + missing stats + table: child@child_pkey + spans: FULL SCAN + +# A 'cr = cr' condition should not be derived for this join. +query T retry +EXPLAIN SELECT 1 +FROM child +INNER LOOKUP JOIN parent ON p_data = c_data +---- +distribution: local +vectorized: true +· +• render +│ +└── • lookup join + │ table: parent@parent_p_data_key + │ equality cols are key + │ lookup condition: (cr = 'ap-southeast-2') AND (c_data = p_data) + │ remote lookup condition: (cr IN ('ca-central-1', 'us-east-1')) AND (c_data = p_data) + │ + └── • scan + missing stats + table: child@child_pkey + spans: FULL SCAN + +# Verify inverted index cases cannot derive a 'cr = cr' condition. Inverted +# join is not possible. +statement error could not produce a query plan conforming to the INVERTED JOIN hint +EXPLAIN SELECT * +FROM child +INNER INVERTED JOIN parent ON p_int = c_int AND p_text LIKE '%foo%' + +subtest implicit_crdb_region_column + +statement ok +CREATE TABLE parent_rbr ( + p_id INT PRIMARY KEY, + p_data STRING, + p_int INT +) LOCALITY REGIONAL BY ROW; + +statement ok +CREATE TABLE child_rbr ( + c_id INT PRIMARY KEY, + c_p_id INT, + c_int INT, + c_int2 INT, + FOREIGN KEY (c_p_id, crdb_region) REFERENCES parent_rbr (p_id, crdb_region) +) LOCALITY REGIONAL BY ROW; + +statement ok +INSERT INTO parent_rbr VALUES (1, 'foo', 1); + +statement ok +INSERT INTO parent_rbr (crdb_region, p_id, p_data, p_int) VALUES ('us-east-1', 2, 'bar', 2); + +statement ok +INSERT INTO child_rbr VALUES (1, 1, 1, 0); + +statement ok +INSERT INTO child_rbr (crdb_region, c_id, c_p_id, c_int, c_int2) VALUES ('us-east-1', 2, 2, 2, 0); + +# A 'crdb_region = crdb_region' condition should be derived for this join. +query T retry +EXPLAIN SELECT * +FROM child_rbr +INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id +---- +distribution: local +vectorized: true +· +• lookup join +│ table: parent_rbr@parent_rbr_pkey +│ equality: (crdb_region, c_p_id) = (crdb_region,p_id) +│ equality cols are key +│ +└── • scan + missing stats + table: child_rbr@child_rbr_pkey + spans: FULL SCAN + +query IIIIITI +SELECT * +FROM child_rbr +INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id +---- +1 1 1 0 1 foo 1 +2 2 2 0 2 bar 2 + +# A 'crdb_region = crdb_region' condition should be derived for this join with +# selection filter. +query T retry +EXPLAIN SELECT * +FROM child_rbr +INNER JOIN parent_rbr ON c_p_id = p_id WHERE c_int = 1 +---- +distribution: local +vectorized: true +· +• lookup join +│ table: parent_rbr@parent_rbr_pkey +│ equality: (crdb_region, c_p_id) = (crdb_region,p_id) +│ equality cols are key +│ +└── • filter + │ filter: c_int = 1 + │ + └── • scan + missing stats + table: child_rbr@child_rbr_pkey + spans: [/'ap-southeast-2' - /'ap-southeast-2'] [/'ca-central-1' - /'us-east-1'] + +query IIIIITI +SELECT * +FROM child_rbr +INNER JOIN parent_rbr ON c_p_id = p_id WHERE c_int = 1 +---- +1 1 1 0 1 foo 1 + +# A 'crdb_region = crdb_region' condition should be derived for this join with +# a projection and selection before the join. +query T retry +EXPLAIN SELECT * +FROM child_rbr +INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id AND c_int + c_int2 = p_int WHERE c_int2 > -1 +---- +distribution: local +vectorized: true +· +• lookup join +│ table: parent_rbr@parent_rbr_pkey +│ equality: (crdb_region, c_p_id) = (crdb_region,p_id) +│ equality cols are key +│ pred: column14 = p_int +│ +└── • render + │ + └── • filter + │ filter: c_int2 > -1 + │ + └── • scan + missing stats + table: child_rbr@child_rbr_pkey + spans: [/'ap-southeast-2' - /'ap-southeast-2'] [/'ca-central-1' - /'us-east-1'] + +query IIIIITI +SELECT * +FROM child_rbr +INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id AND c_int + c_int2 = p_int WHERE c_int2 > -1 +---- +1 1 1 0 1 foo 1 +2 2 2 0 2 bar 2 + +# A 'crdb_region = crdb_region' condition should be derived for this join with +# selection filter on child_rbr.crdb_region. Is it worthwhile to compute TC and +# generate parent_rbr.crdb_region = 'ap-southeast-2'? +query T retry +EXPLAIN SELECT * +FROM child_rbr +INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id WHERE child_rbr.crdb_region = 'ap-southeast-2' +---- +distribution: local +vectorized: true +· +• lookup join +│ table: parent_rbr@parent_rbr_pkey +│ equality: (crdb_region, c_p_id) = (crdb_region,p_id) +│ equality cols are key +│ +└── • scan + missing stats + table: child_rbr@child_rbr_pkey + spans: [/'ap-southeast-2' - /'ap-southeast-2'] + +query IIIIITI +SELECT * +FROM child_rbr +INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id WHERE child_rbr.crdb_region = 'ap-southeast-2' +---- +1 1 1 0 1 foo 1 diff --git a/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go b/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go index 574ac7ce8e66..b047190a73ce 100644 --- a/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go +++ b/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go @@ -115,6 +115,13 @@ func TestCCLLogic_multi_region_drop_region( runCCLLogicTest(t, "multi_region_drop_region") } +func TestCCLLogic_multi_region_foreign_key_lookup_join( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "multi_region_foreign_key_lookup_join") +} + func TestCCLLogic_multi_region_import_export( t *testing.T, ) { diff --git a/pkg/sql/opt/lookupjoin/constraint_builder.go b/pkg/sql/opt/lookupjoin/constraint_builder.go index 15d0682f3a5a..6df6ea84a924 100644 --- a/pkg/sql/opt/lookupjoin/constraint_builder.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder.go @@ -139,10 +139,14 @@ func (b *ConstraintBuilder) Init( // The constraint returned may be unconstrained if no constraint could be built. // foundEqualityCols indicates whether any equality conditions were used to // constrain the index columns; this can be used to decide whether to build a -// lookup join. +// lookup join. `derivedFkOnFilters` is a set of extra equijoin predicates, +// derived from a foreign key constraint, to add to the explicit ON clause, +// but which should not be used in calculating join selectivity estimates. func (b *ConstraintBuilder) Build( - index cat.Index, onFilters, optionalFilters memo.FiltersExpr, + index cat.Index, onFilters, optionalFilters, derivedFkOnFilters memo.FiltersExpr, ) (_ Constraint, foundEqualityCols bool) { + onFilters = append(onFilters, derivedFkOnFilters...) + // Extract the equality columns from onFilters. We cannot use the results of // the extraction in Init because onFilters may be reduced by the caller // after Init due to partial index implication. If the filters are reduced, @@ -189,6 +193,25 @@ func (b *ConstraintBuilder) Build( keyCols := make(opt.ColList, 0, numIndexKeyCols) var derivedEquivCols opt.ColSet + // Don't change the selectivity estimate of this join vs. other joins which + // don't use derivedFkOnFilters. Add column IDs from these filters to the set + // of columns to ignore for join selectivity estimation. The alternative would + // be to derive these filters for all joins, but it's not clear that this + // would always be better given that some joins like hash join would have more + // terms to evaluate. Also, that approach may cause selectivity + // underestimation, so would require more effort to make sure correlations + // between columns are accurately captured. + for _, filtersItem := range derivedFkOnFilters { + if eqExpr, ok := filtersItem.Condition.(*memo.EqExpr); ok { + leftVariable, leftOk := eqExpr.Left.(*memo.VariableExpr) + rightVariable, rightOk := eqExpr.Right.(*memo.VariableExpr) + if leftOk && rightOk { + derivedEquivCols.Add(leftVariable.Col) + derivedEquivCols.Add(rightVariable.Col) + } + } + } + rightSideCols := make(opt.ColList, 0, numIndexKeyCols) var inputProjections memo.ProjectionsExpr var lookupExpr memo.FiltersExpr diff --git a/pkg/sql/opt/lookupjoin/constraint_builder_test.go b/pkg/sql/opt/lookupjoin/constraint_builder_test.go index ee74c5aaf7c3..9d8118272cec 100644 --- a/pkg/sql/opt/lookupjoin/constraint_builder_test.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder_test.go @@ -143,7 +143,8 @@ func TestLookupConstraints(t *testing.T) { var cb lookupjoin.ConstraintBuilder cb.Init(&f, md, f.EvalContext(), rightTable, leftCols, rightCols) - lookupConstraint, _ := cb.Build(index, filters, optionalFilters) + lookupConstraint, _ := cb.Build(index, filters, optionalFilters, + memo.FiltersExpr{} /* derivedFkOnFilters */) var b strings.Builder if lookupConstraint.IsUnconstrained() { b.WriteString("lookup join not possible") diff --git a/pkg/sql/opt/norm/join_funcs.go b/pkg/sql/opt/norm/join_funcs.go index 39f91f422c10..94e635454f95 100644 --- a/pkg/sql/opt/norm/join_funcs.go +++ b/pkg/sql/opt/norm/join_funcs.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/props" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/errors" "github.com/cockroachdb/redact" @@ -621,3 +622,271 @@ func (c *CustomFuncs) MakeProjectionsFromValues(values *memo.ValuesExpr) memo.Pr } return projections } + +// ForeignKeyConstraintFilters examines `fkChild`, the left input to lookup join +// (and possible foreign key child table), and `fkParentScanPrivate`, the lookup +// table of lookup join (and possible foreign key parent table), along with the +// `indexCols` of the current index being considered for lookup join. +// `fkParentJoinKey` contains the ON clause equijoin columns which form a +// reduced strict key and `fkParentEquijoinCols` contains the full set of +// equijoin columns on the foreign key parent lookup table. If `fkParentJoinKey` +// is a proper subset of FK constraint referencing columns on `fkChild`, then +// equijoin predicates are built between referenced and referencing columns of +// the FK constraint not currently represented in `fkParentEquijoinCols`, and +// returned to the caller. +func (c *CustomFuncs) ForeignKeyConstraintFilters( + fkChild memo.RelExpr, + fkParentScanPrivate *memo.ScanPrivate, + indexCols, fkParentJoinKey opt.ColSet, + fkParentEquijoinCols, fkChildEquijoinCols opt.ColList, +) (fkFilters memo.FiltersExpr) { + md := c.mem.Metadata() + fkChildEquijoinColSet := fkChildEquijoinCols.ToSet() + + tableIDs := make(map[opt.TableID]struct{}) + fkChildEquijoinColSet.ForEach(func(x opt.ColumnID) { + tabID := md.ColumnMeta(x).Table + if tabID != opt.TableID(0) { + tableIDs[tabID] = struct{}{} + } + }) + + matchedEquijoinCols := make(map[opt.ColumnID]opt.ColumnID) + if len(fkParentEquijoinCols) != len(fkChildEquijoinCols) { + panic(errors.AssertionFailedf("ForeignKeyConstraintFilters expects fkParentEquijoinCols and fkChildEquijoinCols to have the same number of columns.")) + } + for i := range fkParentEquijoinCols { + matchedEquijoinCols[fkParentEquijoinCols[i]] = fkChildEquijoinCols[i] + } + + parentTable := md.Table(fkParentScanPrivate.Table) + fkChildNotNullCols := fkChild.Relational().NotNullCols + + for fkChildTableID := range tableIDs { + fkChildTable := md.Table(fkChildTableID) + fkChildTableMeta := md.TableMeta(fkChildTableID) + if fkChildTableMeta.IgnoreForeignKeys { + // We can't use foreign keys from this table. + continue + } + + for i := 0; i < fkChildTable.OutboundForeignKeyCount(); i++ { + fk := fkChildTable.OutboundForeignKey(i) + if !fk.Validated() { + // The data is not guaranteed to follow the foreign key constraint. + continue + } + if parentTable.ID() != fk.ReferencedTableID() { + continue + } + fkFilters = nil + fkFiltersValid := true + var fkParentColSet opt.ColSet + for j := 0; j < fk.ColumnCount() && fkFiltersValid; j++ { + fkParentColumnOrd := fk.ReferencedColumnOrdinal(parentTable, j) + fkParentColID := fkParentScanPrivate.Table.ColumnID(fkParentColumnOrd) + fkParentColSet.Add(fkParentColID) + } + + // The strict key covered by equijoin columns must be a subset of the FK + // constraint referenced columns to guarantee equating the remaining FK + // columns is legal. + if !fkParentJoinKey.SubsetOf(fkParentColSet) { + continue + } + for j := 0; j < fk.ColumnCount() && fkFiltersValid; j++ { + fkParentColumnOrd := fk.ReferencedColumnOrdinal(parentTable, j) + fkChildColumnOrd := fk.OriginColumnOrdinal(fkChildTable, j) + fkChildColID := fkChildTableID.ColumnID(fkChildColumnOrd) + fkParentColID := fkParentScanPrivate.Table.ColumnID(fkParentColumnOrd) + if inputJoinCol, ok := matchedEquijoinCols[fkParentColID]; ok { + if inputJoinCol != fkChildColID { + // The equijoin term on fkParentColID in the ON clause doesn't + // equate to the same child table column as in the FK constraint. Do + // not derive terms from this constraint. + fkFiltersValid = false + break + } + // This FK constraint equality predicate is already in the ON clause; + // no need to build a new one. + continue + } else if fkParentJoinKey.Contains(fkParentColID) { + // If the parent column we're looking at is part of the join key, we + // expect an entry in matchedEquijoinCols to verify the join predicate + // columns and FK constraint matched columns are the same. If the + // entry doesn't exist, perhaps the key was reduced to a different + // column id than was present in the predicate, in which case we can't + // verify the predicate. + fkFiltersValid = false + break + } + // If the lookup table's ScanPrivate does not include the join column + // for the predicate we want to build, then don't build the predicate. + if !fkParentScanPrivate.Cols.Contains(fkParentColID) { + fkFiltersValid = false + break + } + // If the FK child table column is not included in the fkChild's output + // columns, it would be incorrect to build a predicate on this column. + if fkChild != nil && !fkChild.Relational().OutputCols.Contains(fkChildColID) { + fkFiltersValid = false + break + } + if !indexCols.Contains(fkParentColID) { + fkFiltersValid = false + break + } + if !fkChildNotNullCols.Contains(fkChildColID) { + // If the base table column is not nullable, the output column will + // also contain no nulls, unless it comes from the outer table of an + // outer join, in which case all columns from that source table will + // be null for any given output row and therefore the ON clause + // equijoin constraints overlapping the FK constraint will have + // already disqualified the row. So, addition of derived join + // constraints is OK. + if fk.MatchMethod() != tree.MatchFull { + // The FK child column isn't a not-null output column, so it can't + // be used in a filter unless this is a MATCH FULL foreign key, + // which only allows an FK column to be NULL if all FK columns are + // NULL. + fkFiltersValid = false + break + } + } + if fkFiltersValid { + fkFilters = append(fkFilters, + c.f.ConstructFiltersItem( + c.f.ConstructEq(c.f.ConstructVariable(fkChildColID), + c.f.ConstructVariable(fkParentColID)), + ), + ) + } + } + if fkFiltersValid && len(fkFilters) > 0 { + // If we got this far without fkFiltersValid being unset, then we found + // a useful foreign key and have built equality predicates on all PK/FK + // columns not contained in fkParentEquijoinCols. + return fkFilters + } + } + } + return nil +} + +// AddDerivedOnClauseConditionsFromFKContraints examines any strict keys from +// the left and right relations and for each key which is a strict subset of the +// referencing columns in a PK/FK constraint, and also has equijoin predicates +// on the strict key columns, new equijoin predicates are built involving the +// missing PK/FK constraints columns and appended to a copy of the ON clause. +func (c *CustomFuncs) AddDerivedOnClauseConditionsFromFKContraints( + on memo.FiltersExpr, leftRelExpr, rightRelExpr memo.RelExpr, +) memo.FiltersExpr { + + leftPossibleScan := leftRelExpr + rightPossibleScan := rightRelExpr + + if selectExpr, ok := leftPossibleScan.(*memo.SelectExpr); ok { + leftPossibleScan = selectExpr.Input + } + + if selectExpr, ok := rightPossibleScan.(*memo.SelectExpr); ok { + rightPossibleScan = selectExpr.Input + } + + leftScan, leftScanFound := leftPossibleScan.(*memo.ScanExpr) + rightScan, rightScanFound := rightPossibleScan.(*memo.ScanExpr) + if !leftScanFound && !rightScanFound { + return on + } + var leftUniqueKeyCols, rightUniqueKeyCols opt.ColSet + var leftjoinCols, rightjoinCols, fkChildLeftTableJoinCols, fkChildRightTableJoinCols opt.ColList + var okLeft, okRight bool + if leftScanFound { + leftUniqueKeyCols, leftjoinCols, fkChildRightTableJoinCols, okLeft = + c.GetEquijoinStrictKeyCols(on, &leftScan.ScanPrivate, rightRelExpr) + } + if rightScanFound { + rightUniqueKeyCols, rightjoinCols, fkChildLeftTableJoinCols, okRight = + c.GetEquijoinStrictKeyCols(on, &rightScan.ScanPrivate, leftRelExpr) + } + + if !okLeft && !okRight { + return on + } + newOn := make(memo.FiltersExpr, len(on)) + copy(newOn, on) + if okLeft { + // Pass `leftScan.Cols` to ForeignKeyConstraintFilters because we want to + // allow derivation of join terms on columns in any index. It may be a waste + // of CPU to enumerate each index and do this call for every index as the + // main caller of this function is only determining the scan columns to + // include. The same applies to the 2nd call below and `rightScan.Cols`. + fkFiltersFromLeftUniqueIndex := c.ForeignKeyConstraintFilters( + rightRelExpr, &leftScan.ScanPrivate, leftScan.Cols, + leftUniqueKeyCols, leftjoinCols, fkChildRightTableJoinCols) + if len(fkFiltersFromLeftUniqueIndex) > 0 { + newOn = append(newOn, fkFiltersFromLeftUniqueIndex...) + } + } + if okRight { + fkFiltersFromRightUniqueIndex := c.ForeignKeyConstraintFilters( + leftRelExpr, &rightScan.ScanPrivate, rightScan.Cols, + rightUniqueKeyCols, rightjoinCols, fkChildLeftTableJoinCols) + if len(fkFiltersFromRightUniqueIndex) > 0 { + newOn = append(newOn, fkFiltersFromRightUniqueIndex...) + } + } + return newOn +} + +// GetEquijoinStrictKeyCols collects the ON clause columns present in equijoin +// predicates in a join between a Scan, `sp`, and another `input` relation of +// indeterminate operation type. If those columns contain a strict key on `sp`, +// the function returns the reduced strict key, plus the list of table equijoin +// columns and positionally-matched list of `input` relation equijoin columns. +func (c *CustomFuncs) GetEquijoinStrictKeyCols( + on memo.FiltersExpr, sp *memo.ScanPrivate, input memo.RelExpr, +) (tableKeyCols opt.ColSet, tableJoinCols, inputRelJoinCols opt.ColList, ok bool) { + if sp == nil { + return opt.ColSet{}, opt.ColList{}, opt.ColList{}, false + } + md := c.mem.Metadata() + funcDeps := memo.MakeTableFuncDep(md, sp.Table) + + // If there is no strict key, no need to proceed further. + _, ok = funcDeps.StrictKey() + if !ok { + return opt.ColSet{}, opt.ColList{}, opt.ColList{}, false + } + + tempInputRelJoinCols, tempTableCols := memo.ExtractJoinEqualityColumns( + input.Relational().OutputCols, + sp.Cols, + on, + ) + + return getJoinKeyAndEquijoinCols(funcDeps, tempTableCols, tempInputRelJoinCols) +} + +// getJoinKeyAndEquijoinCols tests if `tableJoinCols` is a strict key given a +// the table's `funcDeps`, and if so, returns the set +// of reduced join key columns plus the list of table equijoin +// columns and positionally-matched list of input relation equijoin columns. +func getJoinKeyAndEquijoinCols( + funcDeps *props.FuncDepSet, tableJoinCols, inputRelJoinCols opt.ColList, +) (tableKeyCols opt.ColSet, reducedTableJoinCols, reducedInputRelJoinCols opt.ColList, ok bool) { + parentTableJoinColSet := tableJoinCols.ToSet() + if funcDeps.ColsAreStrictKey(parentTableJoinColSet) { + tableKeyCols = funcDeps.ReduceCols(parentTableJoinColSet) + reducedInputRelJoinCols = make(opt.ColList, 0, tableKeyCols.Len()) + reducedTableJoinCols = make(opt.ColList, 0, tableKeyCols.Len()) + for i, tableJoinCol := range tableJoinCols { + if tableKeyCols.Contains(tableJoinCol) { + reducedInputRelJoinCols = append(reducedInputRelJoinCols, inputRelJoinCols[i]) + reducedTableJoinCols = append(reducedTableJoinCols, tableJoinCol) + } + } + return tableKeyCols, reducedTableJoinCols, reducedInputRelJoinCols, true + } + return opt.ColSet{}, opt.ColList{}, opt.ColList{}, false +} diff --git a/pkg/sql/opt/norm/rules/prune_cols.opt b/pkg/sql/opt/norm/rules/prune_cols.opt index d920613a4bf2..8e1c173a9900 100644 --- a/pkg/sql/opt/norm/rules/prune_cols.opt +++ b/pkg/sql/opt/norm/rules/prune_cols.opt @@ -153,7 +153,9 @@ ) # PruneJoinLeftCols discards columns on the left side of a join that are never -# used. +# used. AddDerivedOnClauseConditionsFromFKContraints builds equijoin predicates +# which might be added during optimization, if any, to ensure those columns are +# not pruned away. [PruneJoinLeftCols, Normalize] (Project $input:(Join $left:* $right:* $on:* $private:*) @@ -163,7 +165,13 @@ $left $needed:(UnionCols4 (OuterCols $right) - (FilterOuterCols $on) + (FilterOuterCols + (AddDerivedOnClauseConditionsFromFKContraints + $on + $left + $right + ) + ) (ProjectionOuterCols $projections) $passthrough ) @@ -182,7 +190,9 @@ ) # PruneJoinRightCols discards columns on the right side of a join that are never -# used. +# used. AddDerivedOnClauseConditionsFromFKContraints builds equijoin predicates +# which might be added during optimization, if any, to ensure those columns are +# not pruned away. # # The PruneCols property should prevent this rule (which pushes Project below # Join) from cycling with the TryDecorrelateProject rule (which pushes Join @@ -195,7 +205,13 @@ (CanPruneCols $right $needed:(UnionCols3 - (FilterOuterCols $on) + (FilterOuterCols + (AddDerivedOnClauseConditionsFromFKContraints + $on + $left + $right + ) + ) (ProjectionOuterCols $projections) $passthrough ) diff --git a/pkg/sql/opt/ops/relational.opt b/pkg/sql/opt/ops/relational.opt index e8b6a85c3d1d..de89933cda55 100644 --- a/pkg/sql/opt/ops/relational.opt +++ b/pkg/sql/opt/ops/relational.opt @@ -377,9 +377,13 @@ define LookupJoinPrivate { # DerivedEquivCols is the set of lookup join equijoin columns which are part # of synthesized equality constraints based on another equality join # condition and a computed index key column in the lookup table. Since these - # columns are not reducing the selectivity of the join, but are just added to - # facilitate index lookups, they should not be used in determining join - # selectivity. + # columns are not reducing the selectivity of the join, but are just added + # to facilitate index lookups, they should not be used in determining join + # selectivity. DerivedEquivCols could also include predicates derived from + # foreign key constraints which are used to facilitate index lookups. The + # additional terms should not affect selectivity so that we have consistent + # join relation stats for fairness in costing lookup join vs. joins which do + # not use these predicates. DerivedEquivCols ColSet # LookupExpr represents the part of the join condition used to perform diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index c50a3a986384..8260993153e3 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -382,6 +382,9 @@ func (c *CustomFuncs) generateLookupJoinsImpl( computedColFilters := c.computedColFilters(scanPrivate, on, optionalFilters) optionalFilters = append(optionalFilters, computedColFilters...) + onClauseLookupRelStrictKeyCols, lookupRelEquijoinCols, inputRelJoinCols, lookupIsKey := + c.GetEquijoinStrictKeyCols(on, scanPrivate, input) + var pkCols opt.ColList var newScanPrivate *memo.ScanPrivate var iter scanIndexIter @@ -398,7 +401,12 @@ func (c *CustomFuncs) generateLookupJoinsImpl( return } - lookupConstraint, foundEqualityCols := cb.Build(index, onFilters, optionalFilters) + var derivedfkOnFilters memo.FiltersExpr + if lookupIsKey { + derivedfkOnFilters = c.ForeignKeyConstraintFilters( + input, scanPrivate, indexCols, onClauseLookupRelStrictKeyCols, lookupRelEquijoinCols, inputRelJoinCols) + } + lookupConstraint, foundEqualityCols := cb.Build(index, onFilters, optionalFilters, derivedfkOnFilters) if lookupConstraint.IsUnconstrained() { // We couldn't find equality columns or a lookup expression to // perform a lookup join on this index. diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 59a2af6f1b9f..b9e7a1389dd4 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -13011,3 +13011,200 @@ explain │ └── filters │ └── t85353.a:1 < 10 [outer=(1), constraints=(/1: (/NULL - /9]; tight)] └── filters (true) + +### BEGIN Regression tests for issue #69617 + +exec-ddl set=experimental_enable_unique_without_index_constraints=true +CREATE TABLE t69617_uniq_fk_parent ( + a INT NOT NULL, + b INT NOT NULL, + c INT NOT NULL, + UNIQUE WITHOUT INDEX (b), + UNIQUE WITHOUT INDEX (c), + UNIQUE INDEX (a,b,c), + UNIQUE INDEX (a,c) +) +---- + +exec-ddl +CREATE TABLE t69617_uniq_fk_child ( + a INT NOT NULL, + b INT NOT NULL, + c INT NOT NULL, + FOREIGN KEY (a, b, c) REFERENCES t69617_uniq_fk_parent (a, b, c) ON UPDATE CASCADE +) +---- + +exec-ddl +CREATE TABLE t69617_uniq_fk_child2 ( + a INT, + b INT, + c INT, + FOREIGN KEY (a, b, c) REFERENCES t69617_uniq_fk_parent (a, b, c) ON UPDATE CASCADE +) +---- + +exec-ddl +CREATE TABLE t69617_uniq_fk_child3 ( + a INT, + b INT, + c INT, + FOREIGN KEY (a, b, c) REFERENCES t69617_uniq_fk_parent (a, b, c) MATCH FULL ON UPDATE CASCADE +) +---- + +exec-ddl +CREATE TABLE t69617_other_table ( + a INT, + b INT, + c INT +) +---- + +# Predicates on columns a and b should be derived and index +# uniq_fk_parent_a_b_c_key should be used for the lookup join. +opt expect=GenerateLookupJoins +SELECT * +FROM t69617_uniq_fk_child +INNER LOOKUP JOIN t69617_uniq_fk_parent USING (b) +---- +project + ├── columns: b:2!null a:1!null c:3!null a:7!null c:9!null + ├── fd: (9)-->(2,7), (2)-->(7,9) + └── inner-join (lookup t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key) + ├── columns: t69617_uniq_fk_child.a:1!null t69617_uniq_fk_child.b:2!null t69617_uniq_fk_child.c:3!null t69617_uniq_fk_parent.a:7!null t69617_uniq_fk_parent.b:8!null t69617_uniq_fk_parent.c:9!null + ├── flags: force lookup join (into right side) + ├── key columns: [1 2 3] = [7 8 9] + ├── lookup columns are key + ├── fd: (9)-->(7,8), (8)-->(7,9), (2)==(8), (8)==(2) + ├── scan t69617_uniq_fk_child + │ └── columns: t69617_uniq_fk_child.a:1!null t69617_uniq_fk_child.b:2!null t69617_uniq_fk_child.c:3!null + └── filters (true) + +# Left outer join should derive predicates. +opt expect=GenerateLookupJoins +SELECT * +FROM t69617_uniq_fk_child +LEFT OUTER LOOKUP JOIN t69617_uniq_fk_parent USING (b) +---- +project + ├── columns: b:2!null a:1!null c:3!null a:7!null c:9!null + ├── fd: (9)-->(2,7), (2)-->(7,9) + └── inner-join (lookup t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key) + ├── columns: t69617_uniq_fk_child.a:1!null t69617_uniq_fk_child.b:2!null t69617_uniq_fk_child.c:3!null t69617_uniq_fk_parent.a:7!null t69617_uniq_fk_parent.b:8!null t69617_uniq_fk_parent.c:9!null + ├── flags: force lookup join (into right side) + ├── key columns: [1 2 3] = [7 8 9] + ├── lookup columns are key + ├── fd: (9)-->(7,8), (8)-->(7,9), (2)==(8), (8)==(2) + ├── scan t69617_uniq_fk_child + │ └── columns: t69617_uniq_fk_child.a:1!null t69617_uniq_fk_child.b:2!null t69617_uniq_fk_child.c:3!null + └── filters (true) + +# Lateral join which utilizes lookup join should derive predicates. +opt expect=GenerateLookupJoinsWithFilter +SELECT * +FROM t69617_uniq_fk_child +JOIN LATERAL (SELECT * FROM t69617_uniq_fk_parent WHERE t69617_uniq_fk_child.b = t69617_uniq_fk_parent.b) +ON true WHERE b = 1 +---- +inner-join (lookup t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key) + ├── columns: a:1!null b:2!null c:3!null a:7!null b:8!null c:9!null + ├── key columns: [1 2 3] = [7 8 9] + ├── lookup columns are key + ├── fd: ()-->(2,7-9), (2)==(8), (8)==(2) + ├── select + │ ├── columns: t69617_uniq_fk_child.a:1!null t69617_uniq_fk_child.b:2!null t69617_uniq_fk_child.c:3!null + │ ├── fd: ()-->(2) + │ ├── scan t69617_uniq_fk_child + │ │ └── columns: t69617_uniq_fk_child.a:1!null t69617_uniq_fk_child.b:2!null t69617_uniq_fk_child.c:3!null + │ └── filters + │ └── t69617_uniq_fk_child.b:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + └── filters + └── t69617_uniq_fk_parent.b:8 = 1 [outer=(8), constraints=(/8: [/1 - /1]; tight), fd=()-->(8)] + +# Outer join as the input to lookup join may derive predicates from the FK constraint. +opt expect=GenerateLookupJoins +SELECT t69617_uniq_fk_parent.* FROM + (t69617_other_table other RIGHT OUTER JOIN t69617_uniq_fk_child child ON other.a = child.a) +INNER LOOKUP JOIN + t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key ON child.b = t69617_uniq_fk_parent.b; +---- +project + ├── columns: a:13!null b:14!null c:15!null + ├── fd: (15)-->(13,14), (14)-->(13,15) + └── inner-join (lookup t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key) + ├── columns: other.a:1 child.a:7!null child.b:8!null child.c:9!null t69617_uniq_fk_parent.a:13!null t69617_uniq_fk_parent.b:14!null t69617_uniq_fk_parent.c:15!null + ├── flags: force lookup join (into right side) + ├── key columns: [7 8 9] = [13 14 15] + ├── lookup columns are key + ├── fd: (15)-->(13,14), (14)-->(13,15), (8)==(14), (14)==(8) + ├── left-join (hash) + │ ├── columns: other.a:1 child.a:7!null child.b:8!null child.c:9!null + │ ├── scan t69617_uniq_fk_child [as=child] + │ │ └── columns: child.a:7!null child.b:8!null child.c:9!null + │ ├── scan t69617_other_table [as=other] + │ │ └── columns: other.a:1 + │ └── filters + │ └── other.a:1 = child.a:7 [outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)] + └── filters (true) + +# The optimizer may not derive predicates on nullable columns from an FK +# constraint which does not use the MATCH FULL option. +opt expect-not=(GenerateLookupJoins,GenerateLookupJoinsWithFilter) +SELECT t69617_uniq_fk_parent.* FROM + (t69617_other_table other RIGHT OUTER JOIN t69617_uniq_fk_child2 child ON other.a = child.a) +INNER LOOKUP JOIN + t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key ON child.b = t69617_uniq_fk_parent.b; +---- +project + ├── columns: a:13!null b:14!null c:15!null + ├── fd: (15)-->(13,14), (14)-->(13,15) + └── inner-join (hash) + ├── columns: other.a:1 child.a:7 child.b:8!null t69617_uniq_fk_parent.a:13!null t69617_uniq_fk_parent.b:14!null t69617_uniq_fk_parent.c:15!null + ├── flags: force lookup join (into right side) + ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more) + ├── fd: (15)-->(13,14), (14)-->(13,15), (8)==(14), (14)==(8) + ├── left-join (hash) + │ ├── columns: other.a:1 child.a:7 child.b:8 + │ ├── scan t69617_uniq_fk_child2 [as=child] + │ │ └── columns: child.a:7 child.b:8 + │ ├── scan t69617_other_table [as=other] + │ │ └── columns: other.a:1 + │ └── filters + │ └── other.a:1 = child.a:7 [outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)] + ├── scan t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key + │ ├── columns: t69617_uniq_fk_parent.a:13!null t69617_uniq_fk_parent.b:14!null t69617_uniq_fk_parent.c:15!null + │ ├── flags: force-index=t69617_uniq_fk_parent_a_b_c_key + │ ├── key: (15) + │ └── fd: (15)-->(13,14), (14)-->(13,15) + └── filters + └── child.b:8 = t69617_uniq_fk_parent.b:14 [outer=(8,14), constraints=(/8: (/NULL - ]; /14: (/NULL - ]), fd=(8)==(14), (14)==(8)] + +# It is OK to derive predicates on nullable columns from an FK +# constraint which uses the MATCH FULL option. +opt expect=GenerateLookupJoins +SELECT t69617_uniq_fk_parent.* FROM + (t69617_other_table other RIGHT OUTER JOIN t69617_uniq_fk_child3 child ON other.a = child.a) +INNER LOOKUP JOIN + t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key ON child.b = t69617_uniq_fk_parent.b; +---- +project + ├── columns: a:13!null b:14!null c:15!null + ├── fd: (15)-->(13,14), (14)-->(13,15) + └── inner-join (lookup t69617_uniq_fk_parent@t69617_uniq_fk_parent_a_b_c_key) + ├── columns: other.a:1 child.a:7 child.b:8!null child.c:9 t69617_uniq_fk_parent.a:13!null t69617_uniq_fk_parent.b:14!null t69617_uniq_fk_parent.c:15!null + ├── flags: force lookup join (into right side) + ├── key columns: [7 8 9] = [13 14 15] + ├── lookup columns are key + ├── fd: (15)-->(13,14), (14)-->(13,15), (8)==(14), (14)==(8) + ├── left-join (hash) + │ ├── columns: other.a:1 child.a:7 child.b:8 child.c:9 + │ ├── scan t69617_uniq_fk_child3 [as=child] + │ │ └── columns: child.a:7 child.b:8 child.c:9 + │ ├── scan t69617_other_table [as=other] + │ │ └── columns: other.a:1 + │ └── filters + │ └── other.a:1 = child.a:7 [outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)] + └── filters (true) + +### END regression tests for issue #69617