Skip to content

Commit

Permalink
xform: derive implicit predicates from FK constraint for lookup join
Browse files Browse the repository at this point in the history
Fixes #69617

When a unique constraint exists on a subset of the referenced columns in
a foreign key constraint, the remaining columns in the constraint can be
used to generate equijoin predicates which may enable more efficient use
of an index on the lookup side of a lookup join. If the index is a
multiregion index, a join predicate may be derived which could
potentially eliminate reads of remote rows.

Example:
```
CREATE TABLE accounts (
    account_id  UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    name        STRING NOT NULL,
    crdb_region crdb_internal_region NOT NULL,
    UNIQUE INDEX acct_id_crdb_region_idx (account_id, crdb_region)
) LOCALITY GLOBAL;

drop table if exists tweets;
CREATE TABLE tweets (
    account_id UUID NOT NULL,
    tweet_id   UUID DEFAULT gen_random_uuid(),
    message    STRING NOT NULL,
    crdb_region crdb_internal_region NOT NULL,
    PRIMARY KEY (crdb_region, account_id, tweet_id),
    -- The PK of accounts is a subset of the referenced columns in
    -- the FK constraint.
    FOREIGN KEY (account_id, crdb_region)
    REFERENCES accounts (account_id, crdb_region)
    ON DELETE CASCADE ON UPDATE CASCADE
) LOCALITY REGIONAL BY ROW as crdb_region;

-- Join on account_id uses the uniqueness of accounts_pkey and the FK
-- constraint to derive tweets.crdb_region = accounts.crdb_region
EXPLAIN SELECT *
    FROM tweets
    INNER LOOKUP JOIN accounts@acct_id_crdb_region_idx USING (account_id)
WHERE account_id = '6f781502-4936-43cc-b384-04e5cf292cc8';
-------------------------------------------------------------------------
  distribution: local
  vectorized: true

  • lookup join
  │ table: accounts@accounts_pkey
  │ equality: (account_id) = (account_id)
  │ equality cols are key
  │
  └── • lookup join
      │ table: accounts@acct_id_crdb_region_idx
      │ equality: (account_id, crdb_region) = (account_id,crdb_region)
      │ equality cols are key
      │ pred: account_id = '6f781502-4936-43cc-b384-04e5cf292cc8'
      │
      └── • scan
            missing stats
            table: tweets@tweets_pkey
            spans: [/'ca'/'6f781502-4936-43cc-b384-04e5cf292cc8' - /'ca'/'6f781502-4936-43cc-b384-04e5cf292cc8'] [/'eu'/'6f781502-4936-43cc-b384-04e5cf292cc8' - /'eu'/'6f781502-4936-43cc-b384-04e5cf292cc8'] [/'us'/'6f781502-4936-43cc-b384-04e5cf292cc8' - /'us'/'6f781502-4936-43cc-b384-04e5cf292cc8']
```

Release note (performance improvement): This patch enables more
efficient lookup joins by deriving new join constraints when
equijoin predicates exist on the column(s) of a unique constraint on one
table which are a proper subset of the referencing columns of a foreign
key constraint on the other table. If an index exists on those FK
constraint referencing columns, equijoin predicates are derived between
the PK and FK columns not currently bound by ON clause predicates.
  • Loading branch information
Mark Sirek committed Nov 8, 2022
1 parent 1b4aa43 commit aaa6876
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# tenant-cluster-setting-override-opt: allow-multi-region-abstractions-for-secondary-tenants
# LogicTest: multiregion-9node-3region-3azs

# Set the closed timestamp interval to be short to shorten the amount of time
# we need to wait for the system config to propagate.
statement ok
SET CLUSTER SETTING kv.closed_timestamp.side_transport_interval = '10ms';

statement ok
SET CLUSTER SETTING kv.closed_timestamp.target_duration = '10ms';

statement ok
CREATE DATABASE multi_region_test_db PRIMARY REGION "ca-central-1" REGIONS "ap-southeast-2", "us-east-1";

statement ok
USE multi_region_test_db

statement ok
CREATE TABLE parent (
p_id INT PRIMARY KEY,
cr crdb_internal_region NOT NULL DEFAULT gateway_region()::crdb_internal_region,
p_data string,
p_int INT,
p_text TEXT,
p_json JSON,
UNIQUE INDEX (p_data),
INVERTED INDEX (p_int, p_json),
INVERTED INDEX (p_int, p_text gin_trgm_ops)
) LOCALITY REGIONAL BY ROW AS cr;

statement ok
CREATE TABLE child (
c_id INT PRIMARY KEY,
c_p_id INT,
cr crdb_internal_region NOT NULL DEFAULT gateway_region()::crdb_internal_region,
c_data string,
c_int INt,
c_json JSON,
FOREIGN KEY (c_p_id, cr) REFERENCES parent (p_id, cr)
) LOCALITY REGIONAL BY ROW AS cr;

# A 'cr = cr' condition should be derived for this join.
query T
EXPLAIN SELECT *
FROM child
INNER LOOKUP JOIN parent ON c_p_id = p_id
----
distribution: local
vectorized: true
·
• lookup join
│ table: parent@parent_pkey
│ equality: (cr, c_p_id) = (cr,p_id)
│ equality cols are key
└── • scan
missing stats
table: child@child_pkey
spans: FULL SCAN

# A 'cr = cr' condition should not be derived for this join.
query T
EXPLAIN SELECT 1
FROM child
INNER LOOKUP JOIN parent ON p_data = c_data
----
distribution: local
vectorized: true
·
• render
└── • lookup join
│ table: parent@parent_p_data_key
│ equality cols are key
│ lookup condition: (cr = 'ap-southeast-2') AND (c_data = p_data)
│ remote lookup condition: (cr IN ('ca-central-1', 'us-east-1')) AND (c_data = p_data)
└── • scan
missing stats
table: child@child_pkey
spans: FULL SCAN

# Verify inverted index cases cannot derive a 'cr = cr' condition. Lookup join
# is not possible.
statement error could not produce a query plan conforming to the LOOKUP JOIN hint
EXPLAIN SELECT *
FROM child
INNER LOOKUP JOIN parent ON p_int = c_int AND p_text LIKE '%foo%'

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

92 changes: 92 additions & 0 deletions pkg/sql/opt/norm/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -621,3 +621,95 @@ func (c *CustomFuncs) MakeProjectionsFromValues(values *memo.ValuesExpr) memo.Pr
}
return projections
}

// ForeignKeyConstraintFilters examines `maybeFKTableScanExpr`, the left input
// to lookup join, and `pkTableScanPrivate`, the lookup table of lookup join,
// along with the `indexCols` of the current index being considered for lookup
// join. `onClauseLookupRelStrictKeyCols` contains the ON clause equijoin
// columns which form a reduced strict key and lookupRelEquijoinCols contains
// the full set of equijoin columns on the lookup table. If
// `onClauseLookupRelStrictKeyCols` is a proper subset of FK constraint
// referencing columns on `maybeFKTableScanExpr`, then equijoin predicates are
// built between PK/FK columns not currently represented in
// `lookupRelEquijoinCols`, and returned to the caller.
func (c *CustomFuncs) ForeignKeyConstraintFilters(
maybeFKTableScanExpr memo.RelExpr,
pkTableScanPrivate *memo.ScanPrivate,
indexCols, onClauseLookupRelStrictKeyCols, lookupRelEquijoinCols opt.ColSet,
) (pkFkFilters memo.FiltersExpr) {
md := c.mem.Metadata()

var ok bool
var projectExpr *memo.ProjectExpr
var selectExpr *memo.SelectExpr
possibleScan := maybeFKTableScanExpr
if projectExpr, ok = possibleScan.(*memo.ProjectExpr); ok {
possibleScan = projectExpr.Input
}
if selectExpr, ok = possibleScan.(*memo.SelectExpr); ok {
possibleScan = selectExpr.Input
}
scanRelExpr, ok := possibleScan.(*memo.ScanExpr)
if !ok {
return nil
}
pkTable := md.Table(pkTableScanPrivate.Table)
fkTable := md.Table(scanRelExpr.Table)

for i := 0; i < fkTable.OutboundForeignKeyCount(); i++ {
fk := fkTable.OutboundForeignKey(i)
if pkTable.ID() != fk.ReferencedTableID() {
continue
}
pkFkFilters = nil
nextFK := false
var pkColSet opt.ColSet
for j := 0; j < fk.ColumnCount() && !nextFK; j++ {
lookupColumnOrd := fk.ReferencedColumnOrdinal(pkTable, j)
pkColID := pkTableScanPrivate.Table.ColumnID(lookupColumnOrd)
pkColSet.Add(pkColID)
}

// The strict key covered by equijoin columns must be a subset of the PK-FK
// constraint to guarantee equating the remaining PK-FK columns is legal.
if !onClauseLookupRelStrictKeyCols.SubsetOf(pkColSet) {
continue
}
for j := 0; j < fk.ColumnCount() && !nextFK; j++ {
lookupColumnOrd := fk.ReferencedColumnOrdinal(pkTable, j)
inputColumnOrd := fk.OriginColumnOrdinal(fkTable, j)
inputColID := scanRelExpr.Table.ColumnID(inputColumnOrd)
pkColID := pkTableScanPrivate.Table.ColumnID(lookupColumnOrd)
if lookupRelEquijoinCols.Contains(pkColID) {
// This column is already in an ON clause equality predicate, no need
// to build a new one.
continue
}
if selectExpr != nil && !selectExpr.Relational().OutputCols.Contains(inputColID) {
nextFK = true
break
}
if projectExpr != nil && !projectExpr.Passthrough.Contains(inputColID) {
nextFK = true
break
}
if !indexCols.Contains(pkColID) {
nextFK = true
break
}
pkFkFilters = append(pkFkFilters,
c.f.ConstructFiltersItem(
c.f.ConstructEq(c.f.ConstructVariable(inputColID),
c.f.ConstructVariable(pkColID)),
),
)
}
if !nextFK && len(pkFkFilters) > 0 {
// If we got this far without nextFK being set, then we found a useful
// foreign key and have built equality predicates on all PK/FK columns not
// contained in lookupRelEquijoinCols.
return pkFkFilters
}
}
return nil
}
59 changes: 58 additions & 1 deletion pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,54 @@ func (c *CustomFuncs) canGenerateLookupJoins(
return false
}

// getOnClauseLookupRelStrictKeyCols collects the ON clause columns from the
// lookup table which are equated with columns in another relation. If they
// contain a strict key on lookupTableScanPrivate, the key columns and entire
// set of equijoin columns are returned.
func (c *CustomFuncs) getOnClauseLookupRelStrictKeyCols(
on memo.FiltersExpr, lookupTableScanPrivate *memo.ScanPrivate,
) (lookupKeyCols, lookupCols opt.ColSet, ok bool) {
md := c.e.mem.Metadata()
funcDeps := memo.MakeTableFuncDep(md, lookupTableScanPrivate.Table)

// If there is no strict key, no need to proceed further.
_, ok = funcDeps.StrictKey()
if !ok {
return opt.ColSet{}, opt.ColSet{}, false
}

var lookupTableColID opt.ColumnID
for _, filtersItem := range on {
eqExpr, ok := filtersItem.Condition.(*memo.EqExpr)
if !ok {
continue
}
leftVariable, ok := eqExpr.Left.(*memo.VariableExpr)
if !ok {
continue
}
rightVariable, ok := eqExpr.Right.(*memo.VariableExpr)
if !ok {
continue
}
if md.ColumnMeta(leftVariable.Col).Table == lookupTableScanPrivate.Table {
lookupTableColID = leftVariable.Col
if md.ColumnMeta(rightVariable.Col).Table == lookupTableScanPrivate.Table {
continue
}
} else if md.ColumnMeta(rightVariable.Col).Table == lookupTableScanPrivate.Table {
lookupTableColID = rightVariable.Col
} else {
continue
}
lookupCols.Add(lookupTableColID)
}
if funcDeps.ColsAreStrictKey(lookupCols) {
return funcDeps.ReduceCols(lookupCols), lookupCols, true
}
return opt.ColSet{}, opt.ColSet{}, false
}

// generateLookupJoinsImpl is the general implementation for generating lookup
// joins. The rightCols argument must be the columns output by the right side of
// matched join expression. projectedVirtualCols is the set of virtual columns
Expand Down Expand Up @@ -382,6 +430,9 @@ func (c *CustomFuncs) generateLookupJoinsImpl(
computedColFilters := c.computedColFilters(scanPrivate, on, optionalFilters)
optionalFilters = append(optionalFilters, computedColFilters...)

onClauseLookupRelStrictKeyCols, lookupRelEquijoinCols, lookupIsKey :=
c.getOnClauseLookupRelStrictKeyCols(on, scanPrivate)

var pkCols opt.ColList
var newScanPrivate *memo.ScanPrivate
var iter scanIndexIter
Expand All @@ -398,7 +449,13 @@ func (c *CustomFuncs) generateLookupJoinsImpl(
return
}

lookupConstraint, foundEqualityCols := cb.Build(index, onFilters, optionalFilters)
var fkFilters memo.FiltersExpr
if lookupIsKey {
fkFilters = c.ForeignKeyConstraintFilters(
input, scanPrivate, indexCols, onClauseLookupRelStrictKeyCols, lookupRelEquijoinCols)
}
allOnFilters := append(onFilters, fkFilters...)
lookupConstraint, foundEqualityCols := cb.Build(index, allOnFilters, optionalFilters)
if lookupConstraint.IsUnconstrained() {
// We couldn't find equality columns or a lookup expression to
// perform a lookup join on this index.
Expand Down

0 comments on commit aaa6876

Please sign in to comment.