Skip to content

Commit

Permalink
norm: do not prune scan columns which may show up in derived join terms
Browse files Browse the repository at this point in the history
Fixes cockroachdb#69617

This commit amends the PruneJoinLeftCols and PruneJoinRightCols
normalization rules to include potential derived ON clause predicates so
that columns not present in the SELECT list are not pruned away from the
involved Scans before predicates are derived. Derived ON clause
predicate columns are excluded from the set of columns to use for
equijoin selectivity estimation.

Release note: None
  • Loading branch information
Mark Sirek committed Oct 25, 2022
1 parent 8b3d723 commit cf985d9
Show file tree
Hide file tree
Showing 8 changed files with 406 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,152 @@ statement error could not produce a query plan conforming to the LOOKUP JOIN hin
EXPLAIN SELECT *
FROM child
INNER LOOKUP JOIN parent ON p_int = c_int AND p_text LIKE '%foo%'

subtest implicit_crdb_region_column

statement ok
CREATE TABLE parent_rbr (
p_id INT PRIMARY KEY,
p_data STRING,
p_int INT
) LOCALITY REGIONAL BY ROW;

statement ok
CREATE TABLE child_rbr (
c_id INT PRIMARY KEY,
c_p_id INT,
c_int INT,
c_int2 INT,
FOREIGN KEY (c_p_id, crdb_region) REFERENCES parent_rbr (p_id, crdb_region)
) LOCALITY REGIONAL BY ROW;

statement ok
INSERT INTO parent_rbr VALUES (1, 'foo', 1);

statement ok
INSERT INTO parent_rbr (crdb_region, p_id, p_data, p_int) VALUES ('us-east-1', 2, 'bar', 2);

statement ok
INSERT INTO child_rbr VALUES (1, 1, 1, 0);

statement ok
INSERT INTO child_rbr (crdb_region, c_id, c_p_id, c_int, c_int2) VALUES ('us-east-1', 2, 2, 2, 0);

# A 'crdb_region = crdb_region' condition should be derived for this join.
query T
EXPLAIN SELECT *
FROM child_rbr
INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id
----
distribution: local
vectorized: true
·
• lookup join
│ table: parent_rbr@parent_rbr_pkey
│ equality: (crdb_region, c_p_id) = (crdb_region,p_id)
│ equality cols are key
└── • scan
missing stats
table: child_rbr@child_rbr_pkey
spans: FULL SCAN

query IIIIITI
SELECT *
FROM child_rbr
INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id
----
1 1 1 0 1 foo 1
2 2 2 0 2 bar 2

# A 'crdb_region = crdb_region' condition should be derived for this join with
# selection filter.
query T
EXPLAIN SELECT *
FROM child_rbr
INNER JOIN parent_rbr ON c_p_id = p_id WHERE c_int = 1
----
distribution: local
vectorized: true
·
• lookup join
│ table: parent_rbr@parent_rbr_pkey
│ equality: (crdb_region, c_p_id) = (crdb_region,p_id)
│ equality cols are key
└── • filter
│ filter: c_int = 1
└── • scan
missing stats
table: child_rbr@child_rbr_pkey
spans: [/'ap-southeast-2' - /'ap-southeast-2'] [/'ca-central-1' - /'us-east-1']

query IIIIITI
SELECT *
FROM child_rbr
INNER JOIN parent_rbr ON c_p_id = p_id WHERE c_int = 1
----
1 1 1 0 1 foo 1

# A 'crdb_region = crdb_region' condition should be derived for this join with
# a projection and selection before the join.
query T
EXPLAIN SELECT *
FROM child_rbr
INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id AND c_int + c_int2 = p_int WHERE c_int2 > -1
----
distribution: local
vectorized: true
·
• lookup join
│ table: parent_rbr@parent_rbr_pkey
│ equality: (crdb_region, c_p_id) = (crdb_region,p_id)
│ equality cols are key
│ pred: column14 = p_int
└── • render
└── • filter
│ filter: c_int2 > -1
└── • scan
missing stats
table: child_rbr@child_rbr_pkey
spans: [/'ap-southeast-2' - /'ap-southeast-2'] [/'ca-central-1' - /'us-east-1']

query IIIIITI
SELECT *
FROM child_rbr
INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id AND c_int + c_int2 = p_int WHERE c_int2 > -1
----
1 1 1 0 1 foo 1
2 2 2 0 2 bar 2

# A 'crdb_region = crdb_region' condition should be derived for this join with
# selection filter on child_rbr.crdb_region. Is it worthwhile to compute TC and
# generate parent_rbr.crdb_region = 'ap-southeast-2'?
query T
EXPLAIN SELECT *
FROM child_rbr
INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id WHERE child_rbr.crdb_region = 'ap-southeast-2'
----
distribution: local
vectorized: true
·
• lookup join
│ table: parent_rbr@parent_rbr_pkey
│ equality: (crdb_region, c_p_id) = (crdb_region,p_id)
│ equality cols are key
└── • scan
missing stats
table: child_rbr@child_rbr_pkey
spans: [/'ap-southeast-2' - /'ap-southeast-2']

query IIIIITI
SELECT *
FROM child_rbr
INNER LOOKUP JOIN parent_rbr ON c_p_id = p_id WHERE child_rbr.crdb_region = 'ap-southeast-2'
----
1 1 1 0 1 foo 1
27 changes: 25 additions & 2 deletions pkg/sql/opt/lookupjoin/constraint_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,14 @@ func (b *ConstraintBuilder) Init(
// The constraint returned may be unconstrained if no constraint could be built.
// foundEqualityCols indicates whether any equality conditions were used to
// constrain the index columns; this can be used to decide whether to build a
// lookup join.
// lookup join. `derivedFkOnFilters` is a set of extra equijoin predicates,
// derived from a foreign key constraint, to add to the explicit ON clause,
// but which should not be used in calculating join selectivity estimates.
func (b *ConstraintBuilder) Build(
index cat.Index, onFilters, optionalFilters memo.FiltersExpr,
index cat.Index, onFilters, optionalFilters, derivedFkOnFilters memo.FiltersExpr,
) (_ Constraint, foundEqualityCols bool) {
onFilters = append(onFilters, derivedFkOnFilters...)

// Extract the equality columns from onFilters. We cannot use the results of
// the extraction in Init because onFilters may be reduced by the caller
// after Init due to partial index implication. If the filters are reduced,
Expand Down Expand Up @@ -189,6 +193,25 @@ func (b *ConstraintBuilder) Build(

keyCols := make(opt.ColList, 0, numIndexKeyCols)
var derivedEquivCols opt.ColSet
// Don't change the selectivity estimate of this join vs. other joins which
// don't use derivedFkOnFilters. Add column IDs from these filters to the set
// of columns to ignore for join selectivity estimation. The alternative would
// be to derive these filters for all joins, but it's not clear that this
// would always be better given that some joins like hash join would have more
// terms to evaluate. Also, that approach may cause selectivity
// underestimation, so would require more effort to make sure correlations
// between columns are accurately captured.
for _, filtersItem := range derivedFkOnFilters {
if eqExpr, ok := filtersItem.Condition.(*memo.EqExpr); ok {
leftVariable, leftOk := eqExpr.Left.(*memo.VariableExpr)
rightVariable, rightOk := eqExpr.Right.(*memo.VariableExpr)
if leftOk && rightOk {
derivedEquivCols.Add(leftVariable.Col)
derivedEquivCols.Add(rightVariable.Col)
}
}
}

rightSideCols := make(opt.ColList, 0, numIndexKeyCols)
var inputProjections memo.ProjectionsExpr
var lookupExpr memo.FiltersExpr
Expand Down
3 changes: 2 additions & 1 deletion pkg/sql/opt/lookupjoin/constraint_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ func TestLookupConstraints(t *testing.T) {
var cb lookupjoin.ConstraintBuilder
cb.Init(&f, md, f.EvalContext(), rightTable, leftCols, rightCols)

lookupConstraint, _ := cb.Build(index, filters, optionalFilters)
lookupConstraint, _ := cb.Build(index, filters, optionalFilters,
memo.FiltersExpr{} /* derivedFkOnFilters */)
var b strings.Builder
if lookupConstraint.IsUnconstrained() {
b.WriteString("lookup join not possible")
Expand Down
115 changes: 115 additions & 0 deletions pkg/sql/opt/norm/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,14 @@ func (c *CustomFuncs) ForeignKeyConstraintFilters(
// to build a new one.
continue
}
// If the lookup table's ScanPrivate does not include the join column for
// the predicate we want to build, then don't build the predicate.
if !pkTableScanPrivate.Cols.Contains(pkColID) {
nextFK = true
break
}
// If the left table column is not included in the Select or Project, it
// would be incorrect to build a predicate on this column.
if selectExpr != nil && !selectExpr.Relational().OutputCols.Contains(inputColID) {
nextFK = true
break
Expand Down Expand Up @@ -713,3 +721,110 @@ func (c *CustomFuncs) ForeignKeyConstraintFilters(
}
return nil
}

// AddDerivedOnClauseConditionsFromFKContraints examines any strict keys from
// the left and right relations and for each key which is a strict subset of the
// referencing columns in a PK/FK constraint, and also has equijoin predicates
// on the strict key columns, new equijoin predicates are built involving the
// missing PK/FK constraints columns and appended to a copy of the ON clause.
func (c *CustomFuncs) AddDerivedOnClauseConditionsFromFKContraints(
on memo.FiltersExpr, leftPossibleScan, rightPossibleScan memo.RelExpr,
) memo.FiltersExpr {

if projectExpr, ok := leftPossibleScan.(*memo.ProjectExpr); ok {
leftPossibleScan = projectExpr.Input
}

if projectExpr, ok := rightPossibleScan.(*memo.ProjectExpr); ok {
rightPossibleScan = projectExpr.Input
}

if selectExpr, ok := leftPossibleScan.(*memo.SelectExpr); ok {
leftPossibleScan = selectExpr.Input
}

if selectExpr, ok := rightPossibleScan.(*memo.SelectExpr); ok {
rightPossibleScan = selectExpr.Input
}

leftScan, ok := leftPossibleScan.(*memo.ScanExpr)
if !ok {
return on
}
rightScan, ok := rightPossibleScan.(*memo.ScanExpr)
if !ok {
return on
}
leftUniqueKeyCols, leftjoinCols, okLeft := c.findAdditionalJoinColsFromFKContraints(leftScan, on)
rightUniqueKeyCols, rightjoinCols, okRight := c.findAdditionalJoinColsFromFKContraints(rightScan, on)

if !okLeft && !okRight {
return on
}
newOn := make(memo.FiltersExpr, len(on))
copy(newOn, on)
if okLeft {
// Pass `leftScan.Cols` to ForeignKeyConstraintFilters because we want to
// allow derivation of join terms on columns in any index. It may be a waste
// of CPU to enumerate each index and do this call for every index as the
// main caller of this function is only determining the scan columns to
// include. The same applies to the 2nd call below and `rightScan.Cols`.
fkFiltersFromLeftUniqueIndex := c.ForeignKeyConstraintFilters(
rightScan, &leftScan.ScanPrivate, leftScan.Cols, leftUniqueKeyCols, leftjoinCols)
if len(fkFiltersFromLeftUniqueIndex) > 0 {
newOn = append(newOn, fkFiltersFromLeftUniqueIndex...)
}
}
if okRight {
fkFiltersFromRightUniqueIndex := c.ForeignKeyConstraintFilters(
leftScan, &rightScan.ScanPrivate, rightScan.Cols, rightUniqueKeyCols, rightjoinCols)
if len(fkFiltersFromRightUniqueIndex) > 0 {
newOn = append(newOn, fkFiltersFromRightUniqueIndex...)
}
}
return newOn
}

func (c *CustomFuncs) findAdditionalJoinColsFromFKContraints(
lookupTable *memo.ScanExpr, on memo.FiltersExpr,
) (keyCols, joinCols opt.ColSet, ok bool) {
md := c.mem.Metadata()
funcDeps := memo.MakeTableFuncDep(md, lookupTable.Table)

// If there is no strict key, no need to proceed further.
_, ok = funcDeps.StrictKey()
if !ok {
return opt.ColSet{}, opt.ColSet{}, false
}

var lookupTableColID opt.ColumnID
for _, filtersItem := range on {
eqExpr, ok := filtersItem.Condition.(*memo.EqExpr)
if !ok {
continue
}
leftVariable, ok := eqExpr.Left.(*memo.VariableExpr)
if !ok {
continue
}
rightVariable, ok := eqExpr.Right.(*memo.VariableExpr)
if !ok {
continue
}
if md.ColumnMeta(leftVariable.Col).Table == lookupTable.Table {
lookupTableColID = leftVariable.Col
if md.ColumnMeta(rightVariable.Col).Table == lookupTable.Table {
continue
}
} else if md.ColumnMeta(rightVariable.Col).Table == lookupTable.Table {
lookupTableColID = rightVariable.Col
} else {
continue
}
joinCols.Add(lookupTableColID)
}
if funcDeps.ColsAreStrictKey(joinCols) {
return funcDeps.ReduceCols(joinCols), joinCols, true
}
return opt.ColSet{}, opt.ColSet{}, false
}
24 changes: 20 additions & 4 deletions pkg/sql/opt/norm/rules/prune_cols.opt
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@
)

# PruneJoinLeftCols discards columns on the left side of a join that are never
# used.
# used. AddDerivedOnClauseConditionsFromFKContraints builds equijoin predicates
# which might be added during optimization, if any, to ensure those columns are
# not pruned away.
[PruneJoinLeftCols, Normalize]
(Project
$input:(Join $left:* $right:* $on:* $private:*)
Expand All @@ -163,7 +165,13 @@
$left
$needed:(UnionCols4
(OuterCols $right)
(FilterOuterCols $on)
(FilterOuterCols
(AddDerivedOnClauseConditionsFromFKContraints
$on
$left
$right
)
)
(ProjectionOuterCols $projections)
$passthrough
)
Expand All @@ -182,7 +190,9 @@
)

# PruneJoinRightCols discards columns on the right side of a join that are never
# used.
# used. AddDerivedOnClauseConditionsFromFKContraints builds equijoin predicates
# which might be added during optimization, if any, to ensure those columns are
# not pruned away.
#
# The PruneCols property should prevent this rule (which pushes Project below
# Join) from cycling with the TryDecorrelateProject rule (which pushes Join
Expand All @@ -195,7 +205,13 @@
(CanPruneCols
$right
$needed:(UnionCols3
(FilterOuterCols $on)
(FilterOuterCols
(AddDerivedOnClauseConditionsFromFKContraints
$on
$left
$right
)
)
(ProjectionOuterCols $projections)
$passthrough
)
Expand Down
Loading

0 comments on commit cf985d9

Please sign in to comment.