From aaa6876a52b5e51b778ee82413c6b2a61da775b3 Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Tue, 18 Oct 2022 13:58:45 -0700 Subject: [PATCH] xform: derive implicit predicates from FK constraint for lookup join MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #69617 When a unique constraint exists on a subset of the referenced columns in a foreign key constraint, the remaining columns in the constraint can be used to generate equijoin predicates which may enable more efficient use of an index on the lookup side of a lookup join. If the index is a multiregion index, a join predicate may be derived which could potentially eliminate reads of remote rows. Example: ``` CREATE TABLE accounts ( account_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name STRING NOT NULL, crdb_region crdb_internal_region NOT NULL, UNIQUE INDEX acct_id_crdb_region_idx (account_id, crdb_region) ) LOCALITY GLOBAL; drop table if exists tweets; CREATE TABLE tweets ( account_id UUID NOT NULL, tweet_id UUID DEFAULT gen_random_uuid(), message STRING NOT NULL, crdb_region crdb_internal_region NOT NULL, PRIMARY KEY (crdb_region, account_id, tweet_id), -- The PK of accounts is a subset of the referenced columns in -- the FK constraint. FOREIGN KEY (account_id, crdb_region) REFERENCES accounts (account_id, crdb_region) ON DELETE CASCADE ON UPDATE CASCADE ) LOCALITY REGIONAL BY ROW as crdb_region; -- Join on account_id uses the uniqueness of accounts_pkey and the FK -- constraint to derive tweets.crdb_region = accounts.crdb_region EXPLAIN SELECT * FROM tweets INNER LOOKUP JOIN accounts@acct_id_crdb_region_idx USING (account_id) WHERE account_id = '6f781502-4936-43cc-b384-04e5cf292cc8'; ------------------------------------------------------------------------- distribution: local vectorized: true • lookup join │ table: accounts@accounts_pkey │ equality: (account_id) = (account_id) │ equality cols are key │ └── • lookup join │ table: accounts@acct_id_crdb_region_idx │ equality: (account_id, crdb_region) = (account_id,crdb_region) │ equality cols are key │ pred: account_id = '6f781502-4936-43cc-b384-04e5cf292cc8' │ └── • scan missing stats table: tweets@tweets_pkey spans: [/'ca'/'6f781502-4936-43cc-b384-04e5cf292cc8' - /'ca'/'6f781502-4936-43cc-b384-04e5cf292cc8'] [/'eu'/'6f781502-4936-43cc-b384-04e5cf292cc8' - /'eu'/'6f781502-4936-43cc-b384-04e5cf292cc8'] [/'us'/'6f781502-4936-43cc-b384-04e5cf292cc8' - /'us'/'6f781502-4936-43cc-b384-04e5cf292cc8'] ``` Release note (performance improvement): This patch enables more efficient lookup joins by deriving new join constraints when equijoin predicates exist on the column(s) of a unique constraint on one table which are a proper subset of the referencing columns of a foreign key constraint on the other table. If an index exists on those FK constraint referencing columns, equijoin predicates are derived between the PK and FK columns not currently bound by ON clause predicates. --- .../multi_region_foreign_key_lookup_join | 88 ++++++++++++++++++ .../generated_test.go | 7 ++ pkg/sql/opt/norm/join_funcs.go | 92 +++++++++++++++++++ pkg/sql/opt/xform/join_funcs.go | 59 +++++++++++- 4 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 pkg/ccl/logictestccl/testdata/logic_test/multi_region_foreign_key_lookup_join diff --git a/pkg/ccl/logictestccl/testdata/logic_test/multi_region_foreign_key_lookup_join b/pkg/ccl/logictestccl/testdata/logic_test/multi_region_foreign_key_lookup_join new file mode 100644 index 000000000000..e69fcada8567 --- /dev/null +++ b/pkg/ccl/logictestccl/testdata/logic_test/multi_region_foreign_key_lookup_join @@ -0,0 +1,88 @@ +# tenant-cluster-setting-override-opt: allow-multi-region-abstractions-for-secondary-tenants +# LogicTest: multiregion-9node-3region-3azs + +# Set the closed timestamp interval to be short to shorten the amount of time +# we need to wait for the system config to propagate. +statement ok +SET CLUSTER SETTING kv.closed_timestamp.side_transport_interval = '10ms'; + +statement ok +SET CLUSTER SETTING kv.closed_timestamp.target_duration = '10ms'; + +statement ok +CREATE DATABASE multi_region_test_db PRIMARY REGION "ca-central-1" REGIONS "ap-southeast-2", "us-east-1"; + +statement ok +USE multi_region_test_db + +statement ok +CREATE TABLE parent ( + p_id INT PRIMARY KEY, + cr crdb_internal_region NOT NULL DEFAULT gateway_region()::crdb_internal_region, + p_data string, + p_int INT, + p_text TEXT, + p_json JSON, + UNIQUE INDEX (p_data), + INVERTED INDEX (p_int, p_json), + INVERTED INDEX (p_int, p_text gin_trgm_ops) +) LOCALITY REGIONAL BY ROW AS cr; + +statement ok +CREATE TABLE child ( + c_id INT PRIMARY KEY, + c_p_id INT, + cr crdb_internal_region NOT NULL DEFAULT gateway_region()::crdb_internal_region, + c_data string, + c_int INt, + c_json JSON, + FOREIGN KEY (c_p_id, cr) REFERENCES parent (p_id, cr) +) LOCALITY REGIONAL BY ROW AS cr; + +# A 'cr = cr' condition should be derived for this join. +query T +EXPLAIN SELECT * +FROM child +INNER LOOKUP JOIN parent ON c_p_id = p_id +---- +distribution: local +vectorized: true +· +• lookup join +│ table: parent@parent_pkey +│ equality: (cr, c_p_id) = (cr,p_id) +│ equality cols are key +│ +└── • scan + missing stats + table: child@child_pkey + spans: FULL SCAN + +# A 'cr = cr' condition should not be derived for this join. +query T +EXPLAIN SELECT 1 +FROM child +INNER LOOKUP JOIN parent ON p_data = c_data +---- +distribution: local +vectorized: true +· +• render +│ +└── • lookup join + │ table: parent@parent_p_data_key + │ equality cols are key + │ lookup condition: (cr = 'ap-southeast-2') AND (c_data = p_data) + │ remote lookup condition: (cr IN ('ca-central-1', 'us-east-1')) AND (c_data = p_data) + │ + └── • scan + missing stats + table: child@child_pkey + spans: FULL SCAN + +# Verify inverted index cases cannot derive a 'cr = cr' condition. Lookup join +# is not possible. +statement error could not produce a query plan conforming to the LOOKUP JOIN hint +EXPLAIN SELECT * +FROM child +INNER LOOKUP JOIN parent ON p_int = c_int AND p_text LIKE '%foo%' diff --git a/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go b/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go index 574ac7ce8e66..b047190a73ce 100644 --- a/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go +++ b/pkg/ccl/logictestccl/tests/multiregion-9node-3region-3azs/generated_test.go @@ -115,6 +115,13 @@ func TestCCLLogic_multi_region_drop_region( runCCLLogicTest(t, "multi_region_drop_region") } +func TestCCLLogic_multi_region_foreign_key_lookup_join( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "multi_region_foreign_key_lookup_join") +} + func TestCCLLogic_multi_region_import_export( t *testing.T, ) { diff --git a/pkg/sql/opt/norm/join_funcs.go b/pkg/sql/opt/norm/join_funcs.go index 39f91f422c10..e0e2f9ba970c 100644 --- a/pkg/sql/opt/norm/join_funcs.go +++ b/pkg/sql/opt/norm/join_funcs.go @@ -621,3 +621,95 @@ func (c *CustomFuncs) MakeProjectionsFromValues(values *memo.ValuesExpr) memo.Pr } return projections } + +// ForeignKeyConstraintFilters examines `maybeFKTableScanExpr`, the left input +// to lookup join, and `pkTableScanPrivate`, the lookup table of lookup join, +// along with the `indexCols` of the current index being considered for lookup +// join. `onClauseLookupRelStrictKeyCols` contains the ON clause equijoin +// columns which form a reduced strict key and lookupRelEquijoinCols contains +// the full set of equijoin columns on the lookup table. If +// `onClauseLookupRelStrictKeyCols` is a proper subset of FK constraint +// referencing columns on `maybeFKTableScanExpr`, then equijoin predicates are +// built between PK/FK columns not currently represented in +// `lookupRelEquijoinCols`, and returned to the caller. +func (c *CustomFuncs) ForeignKeyConstraintFilters( + maybeFKTableScanExpr memo.RelExpr, + pkTableScanPrivate *memo.ScanPrivate, + indexCols, onClauseLookupRelStrictKeyCols, lookupRelEquijoinCols opt.ColSet, +) (pkFkFilters memo.FiltersExpr) { + md := c.mem.Metadata() + + var ok bool + var projectExpr *memo.ProjectExpr + var selectExpr *memo.SelectExpr + possibleScan := maybeFKTableScanExpr + if projectExpr, ok = possibleScan.(*memo.ProjectExpr); ok { + possibleScan = projectExpr.Input + } + if selectExpr, ok = possibleScan.(*memo.SelectExpr); ok { + possibleScan = selectExpr.Input + } + scanRelExpr, ok := possibleScan.(*memo.ScanExpr) + if !ok { + return nil + } + pkTable := md.Table(pkTableScanPrivate.Table) + fkTable := md.Table(scanRelExpr.Table) + + for i := 0; i < fkTable.OutboundForeignKeyCount(); i++ { + fk := fkTable.OutboundForeignKey(i) + if pkTable.ID() != fk.ReferencedTableID() { + continue + } + pkFkFilters = nil + nextFK := false + var pkColSet opt.ColSet + for j := 0; j < fk.ColumnCount() && !nextFK; j++ { + lookupColumnOrd := fk.ReferencedColumnOrdinal(pkTable, j) + pkColID := pkTableScanPrivate.Table.ColumnID(lookupColumnOrd) + pkColSet.Add(pkColID) + } + + // The strict key covered by equijoin columns must be a subset of the PK-FK + // constraint to guarantee equating the remaining PK-FK columns is legal. + if !onClauseLookupRelStrictKeyCols.SubsetOf(pkColSet) { + continue + } + for j := 0; j < fk.ColumnCount() && !nextFK; j++ { + lookupColumnOrd := fk.ReferencedColumnOrdinal(pkTable, j) + inputColumnOrd := fk.OriginColumnOrdinal(fkTable, j) + inputColID := scanRelExpr.Table.ColumnID(inputColumnOrd) + pkColID := pkTableScanPrivate.Table.ColumnID(lookupColumnOrd) + if lookupRelEquijoinCols.Contains(pkColID) { + // This column is already in an ON clause equality predicate, no need + // to build a new one. + continue + } + if selectExpr != nil && !selectExpr.Relational().OutputCols.Contains(inputColID) { + nextFK = true + break + } + if projectExpr != nil && !projectExpr.Passthrough.Contains(inputColID) { + nextFK = true + break + } + if !indexCols.Contains(pkColID) { + nextFK = true + break + } + pkFkFilters = append(pkFkFilters, + c.f.ConstructFiltersItem( + c.f.ConstructEq(c.f.ConstructVariable(inputColID), + c.f.ConstructVariable(pkColID)), + ), + ) + } + if !nextFK && len(pkFkFilters) > 0 { + // If we got this far without nextFK being set, then we found a useful + // foreign key and have built equality predicates on all PK/FK columns not + // contained in lookupRelEquijoinCols. + return pkFkFilters + } + } + return nil +} diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index c50a3a986384..88d9eb0a2a48 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -342,6 +342,54 @@ func (c *CustomFuncs) canGenerateLookupJoins( return false } +// getOnClauseLookupRelStrictKeyCols collects the ON clause columns from the +// lookup table which are equated with columns in another relation. If they +// contain a strict key on lookupTableScanPrivate, the key columns and entire +// set of equijoin columns are returned. +func (c *CustomFuncs) getOnClauseLookupRelStrictKeyCols( + on memo.FiltersExpr, lookupTableScanPrivate *memo.ScanPrivate, +) (lookupKeyCols, lookupCols opt.ColSet, ok bool) { + md := c.e.mem.Metadata() + funcDeps := memo.MakeTableFuncDep(md, lookupTableScanPrivate.Table) + + // If there is no strict key, no need to proceed further. + _, ok = funcDeps.StrictKey() + if !ok { + return opt.ColSet{}, opt.ColSet{}, false + } + + var lookupTableColID opt.ColumnID + for _, filtersItem := range on { + eqExpr, ok := filtersItem.Condition.(*memo.EqExpr) + if !ok { + continue + } + leftVariable, ok := eqExpr.Left.(*memo.VariableExpr) + if !ok { + continue + } + rightVariable, ok := eqExpr.Right.(*memo.VariableExpr) + if !ok { + continue + } + if md.ColumnMeta(leftVariable.Col).Table == lookupTableScanPrivate.Table { + lookupTableColID = leftVariable.Col + if md.ColumnMeta(rightVariable.Col).Table == lookupTableScanPrivate.Table { + continue + } + } else if md.ColumnMeta(rightVariable.Col).Table == lookupTableScanPrivate.Table { + lookupTableColID = rightVariable.Col + } else { + continue + } + lookupCols.Add(lookupTableColID) + } + if funcDeps.ColsAreStrictKey(lookupCols) { + return funcDeps.ReduceCols(lookupCols), lookupCols, true + } + return opt.ColSet{}, opt.ColSet{}, false +} + // generateLookupJoinsImpl is the general implementation for generating lookup // joins. The rightCols argument must be the columns output by the right side of // matched join expression. projectedVirtualCols is the set of virtual columns @@ -382,6 +430,9 @@ func (c *CustomFuncs) generateLookupJoinsImpl( computedColFilters := c.computedColFilters(scanPrivate, on, optionalFilters) optionalFilters = append(optionalFilters, computedColFilters...) + onClauseLookupRelStrictKeyCols, lookupRelEquijoinCols, lookupIsKey := + c.getOnClauseLookupRelStrictKeyCols(on, scanPrivate) + var pkCols opt.ColList var newScanPrivate *memo.ScanPrivate var iter scanIndexIter @@ -398,7 +449,13 @@ func (c *CustomFuncs) generateLookupJoinsImpl( return } - lookupConstraint, foundEqualityCols := cb.Build(index, onFilters, optionalFilters) + var fkFilters memo.FiltersExpr + if lookupIsKey { + fkFilters = c.ForeignKeyConstraintFilters( + input, scanPrivate, indexCols, onClauseLookupRelStrictKeyCols, lookupRelEquijoinCols) + } + allOnFilters := append(onFilters, fkFilters...) + lookupConstraint, foundEqualityCols := cb.Build(index, allOnFilters, optionalFilters) if lookupConstraint.IsUnconstrained() { // We couldn't find equality columns or a lookup expression to // perform a lookup join on this index.