From 4807844e08adeca218960359f4fb5fe2ab576616 Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Mon, 22 Aug 2022 14:00:27 -0700 Subject: [PATCH] xform: ignore derived hash bucket lookup join cols for selectivity estimate Fixes #85353 Previously, a lookup join involving the first column of a hash-sharded index and a column from another table would use a derived equality condition between the invisible hash bucket column and an expression on the other table's column for selectivity estimation purposes. Since the derived join condition does not actually reduce the number of qualified rows, the optimizer would end up with an underestimated row count estimate for the join, and end up selecting it, when lower cost join methods existed. To address this, this patch remembers which left table key columns in the lookup join are synthesized for derived equijoin conditions on the lookup table hash bucket column, and ignores them when building the filter functional dependencies which are later used in `selectivityFromEquivalencies()` for calculating the join selectivity. Release justification: Low risk fix for costly lookup joins on hash-sharded indexes. Release note (bug fix): This patch fixes a bug in lookup join selectivity estimation involving hash-sharded indexes which may cause lookup joins to be selected by the optimizer in cases where other join methods are less expensive. --- ...partitioning_hash_sharded_index_query_plan | 65 ++++---- ...ional_by_row_hash_sharded_index_query_plan | 69 +++++---- .../execbuilder/testdata/hash_sharded_index | 58 +++---- pkg/sql/opt/lookupjoin/constraint_builder.go | 12 ++ pkg/sql/opt/memo/statistics_builder.go | 11 ++ pkg/sql/opt/ops/relational.opt | 8 + pkg/sql/opt/xform/join_funcs.go | 1 + pkg/sql/opt/xform/testdata/rules/join | 144 +++++++++++++++++- pkg/sql/opt/xform/testdata/rules/join_order | 2 +- 9 files changed, 282 insertions(+), 88 deletions(-) diff --git a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_hash_sharded_index_query_plan b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_hash_sharded_index_query_plan index f64b8f083b1e..44438b606e5b 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/partitioning_hash_sharded_index_query_plan +++ b/pkg/ccl/logictestccl/testdata/logic_test/partitioning_hash_sharded_index_query_plan @@ -1136,40 +1136,47 @@ vectorized: true │ render column3: column3 │ render crdb_internal_email_shard_16_comp: crdb_internal_email_shard_16_comp │ - └── • lookup join (anti) + └── • project │ columns: (column1, column2, column3, crdb_internal_email_shard_16_comp) - │ estimated row count: 0 (missing stats) - │ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey - │ equality cols are key - │ lookup condition: (part IN ('new york', 'seattle')) AND (column1 = id) │ - └── • project - │ columns: (column1, column2, column3, crdb_internal_email_shard_16_comp) + └── • lookup join (anti) + │ columns: (crdb_internal_email_shard_16_eq, column1, column2, column3, crdb_internal_email_shard_16_comp) + │ estimated row count: 0 (missing stats) + │ table: t_unique_hash_sec_key@idx_uniq_hash_email + │ equality cols are key + │ lookup condition: ((part IN ('new york', 'seattle')) AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email) │ - └── • lookup join (anti) - │ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, column1, column2, column3) - │ estimated row count: 0 (missing stats) - │ table: t_unique_hash_sec_key@idx_uniq_hash_email - │ equality cols are key - │ lookup condition: ((part IN ('new york', 'seattle')) AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email) + └── • render + │ columns: (crdb_internal_email_shard_16_eq, column1, column2, column3, crdb_internal_email_shard_16_comp) + │ render crdb_internal_email_shard_16_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16) + │ render column1: column1 + │ render column2: column2 + │ render column3: column3 + │ render crdb_internal_email_shard_16_comp: crdb_internal_email_shard_16_comp │ - └── • render - │ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, column1, column2, column3) - │ render crdb_internal_email_shard_16_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16) - │ render crdb_internal_email_shard_16_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16) - │ render column1: column1 - │ render column2: column2 - │ render column3: column3 + └── • lookup join (anti) + │ columns: (crdb_internal_email_shard_16_comp, column1, column2, column3) + │ estimated row count: 0 (missing stats) + │ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey + │ equality cols are key + │ lookup condition: (part IN ('new york', 'seattle')) AND (column1 = id) │ - └── • values - columns: (column1, column2, column3) - size: 3 columns, 2 rows - row 0, expr 0: 4321 - row 0, expr 1: 'some_email' - row 0, expr 2: 'seattle' - row 1, expr 0: 8765 - row 1, expr 1: 'another_email' - row 1, expr 2: 'new york' + └── • render + │ columns: (crdb_internal_email_shard_16_comp, column1, column2, column3) + │ render crdb_internal_email_shard_16_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16) + │ render column1: column1 + │ render column2: column2 + │ render column3: column3 + │ + └── • values + columns: (column1, column2, column3) + size: 3 columns, 2 rows + row 0, expr 0: 4321 + row 0, expr 1: 'some_email' + row 0, expr 2: 'seattle' + row 1, expr 0: 8765 + row 1, expr 1: 'another_email' + row 1, expr 2: 'new york' query T EXPLAIN (VERBOSE) INSERT INTO t_unique_hash_sec_key (id, email, part) VALUES (4321, 'some_email', 'seattle') ON CONFLICT (email) DO NOTHING; diff --git a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_hash_sharded_index_query_plan b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_hash_sharded_index_query_plan index 0326924c384d..1a5b1991f86e 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_hash_sharded_index_query_plan +++ b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_hash_sharded_index_query_plan @@ -1224,45 +1224,52 @@ vectorized: true │ render crdb_region_default: crdb_region_default │ render crdb_internal_email_shard_16_comp: crdb_internal_email_shard_16_comp │ - └── • lookup join (anti) + └── • project │ columns: (column1, column2, crdb_region_default, crdb_internal_email_shard_16_comp) - │ estimated row count: 0 (missing stats) - │ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey - │ equality cols are key - │ lookup condition: (crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1')) AND (column1 = id) │ - └── • project - │ columns: (column1, column2, crdb_region_default, crdb_internal_email_shard_16_comp) + └── • lookup join (anti) + │ columns: (crdb_internal_email_shard_16_eq, column1, column2, crdb_region_default, crdb_internal_email_shard_16_comp) + │ estimated row count: 0 (missing stats) + │ table: t_unique_hash_sec_key@idx_uniq_hash_email + │ equality cols are key + │ lookup condition: ((crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1')) AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email) │ - └── • lookup join (anti) - │ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2) - │ estimated row count: 0 (missing stats) - │ table: t_unique_hash_sec_key@idx_uniq_hash_email - │ equality cols are key - │ lookup condition: ((crdb_region IN ('ca-central-1', 'us-east-1')) AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email) + └── • render + │ columns: (crdb_internal_email_shard_16_eq, column1, column2, crdb_region_default, crdb_internal_email_shard_16_comp) + │ render crdb_internal_email_shard_16_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16) + │ render column1: column1 + │ render column2: column2 + │ render crdb_region_default: crdb_region_default + │ render crdb_internal_email_shard_16_comp: crdb_internal_email_shard_16_comp │ └── • lookup join (anti) - │ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2) - │ estimated row count: 1 (missing stats) - │ table: t_unique_hash_sec_key@idx_uniq_hash_email + │ columns: (crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2) + │ estimated row count: 0 (missing stats) + │ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey │ equality cols are key - │ lookup condition: ((crdb_region = 'ap-southeast-2') AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email) + │ lookup condition: (crdb_region IN ('ca-central-1', 'us-east-1')) AND (column1 = id) │ - └── • render - │ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2) - │ render crdb_internal_email_shard_16_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16) - │ render crdb_internal_email_shard_16_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16) - │ render crdb_region_default: 'ap-southeast-2' - │ render column1: column1 - │ render column2: column2 + └── • lookup join (anti) + │ columns: (crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2) + │ estimated row count: 1 (missing stats) + │ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey + │ equality cols are key + │ lookup condition: (crdb_region = 'ap-southeast-2') AND (column1 = id) │ - └── • values - columns: (column1, column2) - size: 2 columns, 2 rows - row 0, expr 0: 4321 - row 0, expr 1: 'some_email' - row 1, expr 0: 8765 - row 1, expr 1: 'another_email' + └── • render + │ columns: (crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2) + │ render crdb_internal_email_shard_16_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16) + │ render crdb_region_default: 'ap-southeast-2' + │ render column1: column1 + │ render column2: column2 + │ + └── • values + columns: (column1, column2) + size: 2 columns, 2 rows + row 0, expr 0: 4321 + row 0, expr 1: 'some_email' + row 1, expr 0: 8765 + row 1, expr 1: 'another_email' query T EXPLAIN (VERBOSE) INSERT INTO t_unique_hash_sec_key (id, email) VALUES (4321, 'some_email') ON CONFLICT (email) DO NOTHING; diff --git a/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index b/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index index b66fa99586c0..50a8b1a71054 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index @@ -1189,37 +1189,43 @@ vectorized: true │ render column2: column2 │ render crdb_internal_b_shard_8_comp: crdb_internal_b_shard_8_comp │ - └── • lookup join (anti) + └── • project │ columns: (column1, column2, crdb_internal_b_shard_8_comp) - │ estimated row count: 0 (missing stats) - │ table: t_hash_indexed@t_hash_indexed_pkey - │ equality: (column1) = (a) - │ equality cols are key │ - └── • project - │ columns: (column1, column2, crdb_internal_b_shard_8_comp) + └── • lookup join (anti) + │ columns: (crdb_internal_b_shard_8_eq, column1, column2, crdb_internal_b_shard_8_comp) + │ estimated row count: 0 (missing stats) + │ table: t_hash_indexed@idx_t_hash_indexed + │ equality: (crdb_internal_b_shard_8_eq, column2) = (crdb_internal_b_shard_8,b) + │ equality cols are key │ - └── • lookup join (anti) - │ columns: (crdb_internal_b_shard_8_eq, crdb_internal_b_shard_8_comp, column1, column2) - │ estimated row count: 0 (missing stats) - │ table: t_hash_indexed@idx_t_hash_indexed - │ equality: (crdb_internal_b_shard_8_eq, column2) = (crdb_internal_b_shard_8,b) - │ equality cols are key + └── • render + │ columns: (crdb_internal_b_shard_8_eq, column1, column2, crdb_internal_b_shard_8_comp) + │ render crdb_internal_b_shard_8_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 8) + │ render column1: column1 + │ render column2: column2 + │ render crdb_internal_b_shard_8_comp: crdb_internal_b_shard_8_comp │ - └── • render - │ columns: (crdb_internal_b_shard_8_eq, crdb_internal_b_shard_8_comp, column1, column2) - │ render crdb_internal_b_shard_8_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 8) - │ render crdb_internal_b_shard_8_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 8) - │ render column1: column1 - │ render column2: column2 + └── • lookup join (anti) + │ columns: (crdb_internal_b_shard_8_comp, column1, column2) + │ estimated row count: 0 (missing stats) + │ table: t_hash_indexed@t_hash_indexed_pkey + │ equality: (column1) = (a) + │ equality cols are key │ - └── • values - columns: (column1, column2) - size: 2 columns, 2 rows - row 0, expr 0: 111 - row 0, expr 1: 222 - row 1, expr 0: 333 - row 1, expr 1: 444 + └── • render + │ columns: (crdb_internal_b_shard_8_comp, column1, column2) + │ render crdb_internal_b_shard_8_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 8) + │ render column1: column1 + │ render column2: column2 + │ + └── • values + columns: (column1, column2) + size: 2 columns, 2 rows + row 0, expr 0: 111 + row 0, expr 1: 222 + row 1, expr 0: 333 + row 1, expr 1: 444 query T EXPLAIN (VERBOSE) INSERT INTO t_hash_indexed VALUES (4321, 8765) ON CONFLICT (b) DO NOTHING diff --git a/pkg/sql/opt/lookupjoin/constraint_builder.go b/pkg/sql/opt/lookupjoin/constraint_builder.go index 67140a7dd95a..82cba23952ef 100644 --- a/pkg/sql/opt/lookupjoin/constraint_builder.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder.go @@ -42,6 +42,14 @@ type Constraint struct { // in RightSideCols. It will be nil if LookupExpr is non-nil. KeyCols opt.ColList + // DerivedEquivCols is the set of lookup join equijoin columns which are part + // of synthesized equality constraints based on another equality join + // condition and a computed index key column in the lookup table. Since these + // columns are not reducing the selectivity of the join, but are just added to + // facilitate index lookups, they should not be used in determining join + // selectivity. + DerivedEquivCols opt.ColSet + // RightSideCols is an ordered list of prefix index columns that are // constrained by this constraint. It corresponds 1:1 with the columns in // KeyCols if KeyCols is non-nil. Otherwise, it includes the prefix of index @@ -177,6 +185,7 @@ func (b *ConstraintBuilder) Build( numIndexKeyCols := index.LaxKeyColumnCount() keyCols := make(opt.ColList, 0, numIndexKeyCols) + var derivedEquivCols opt.ColSet rightSideCols := make(opt.ColList, 0, numIndexKeyCols) var inputProjections memo.ProjectionsExpr var lookupExpr memo.FiltersExpr @@ -253,6 +262,8 @@ func (b *ConstraintBuilder) Build( projection := b.f.ConstructProjectionsItem(b.f.RemapCols(expr, b.eqColMap), compEqCol) inputProjections = append(inputProjections, projection) addEqualityColumns(compEqCol, idxCol) + derivedEquivCols.Add(compEqCol) + derivedEquivCols.Add(idxCol) foundEqualityCols = true foundLookupCols = true continue @@ -364,6 +375,7 @@ func (b *ConstraintBuilder) Build( c := Constraint{ KeyCols: keyCols, + DerivedEquivCols: derivedEquivCols, RightSideCols: rightSideCols, LookupExpr: lookupExpr, InputProjections: inputProjections, diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index bad48e8005aa..8a080c889f4a 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -4327,10 +4327,21 @@ func addEqExprConjuncts( func (sb *statisticsBuilder) selectivityFromEquivalency( equivGroup opt.ColSet, e RelExpr, s *props.Statistics, ) (selectivity props.Selectivity) { + var derivedEquivCols opt.ColSet + if lookupJoinExpr, ok := e.(*LookupJoinExpr); ok { + if !lookupJoinExpr.DerivedEquivCols.Empty() { + derivedEquivCols = lookupJoinExpr.DerivedEquivCols + } + } // Find the maximum input distinct count for all columns in this equivalency // group. maxDistinctCount := float64(0) equivGroup.ForEach(func(i opt.ColumnID) { + if derivedEquivCols.Contains(i) { + // Don't apply selectivity from derived equivalencies internally + // manufactured by lookup join solely to facilitate index lookups. + return + } // If any of the distinct counts were updated by the filter, we want to use // the updated value. colSet := opt.MakeColSet(i) diff --git a/pkg/sql/opt/ops/relational.opt b/pkg/sql/opt/ops/relational.opt index 0e96749f0416..2d1fc5f8a140 100644 --- a/pkg/sql/opt/ops/relational.opt +++ b/pkg/sql/opt/ops/relational.opt @@ -374,6 +374,14 @@ define LookupJoinPrivate { # in all cases. KeyCols ColList + # DerivedEquivCols is the set of lookup join equijoin columns which are part + # of synthesized equality constraints based on another equality join + # condition and a computed index key column in the lookup table. Since these + # columns are not reducing the selectivity of the join, but are just added to + # facilitate index lookups, they should not be used in determining join + # selectivity. + DerivedEquivCols ColSet + # LookupExpr represents the part of the join condition used to perform # the lookup into the index. It should only be set when KeyCols is empty. # LookupExpr is used instead of KeyCols when the lookup condition is diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index b2357677b34a..447e147a2689 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -422,6 +422,7 @@ func (c *CustomFuncs) generateLookupJoinsImpl( lookupJoin.Index = index.Ordinal() lookupJoin.Locking = scanPrivate.Locking lookupJoin.KeyCols = lookupConstraint.KeyCols + lookupJoin.DerivedEquivCols = lookupConstraint.DerivedEquivCols lookupJoin.LookupExpr = lookupConstraint.LookupExpr lookupJoin.On = lookupConstraint.RemainingFilters lookupJoin.ConstFilters = lookupConstraint.ConstFilters diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 7a3afa35394e..c3d03ff1f8f0 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -227,7 +227,7 @@ inner-join (merge) memo expect=ReorderJoins SELECT * FROM abc, stu, xyz WHERE abc.a=stu.s AND stu.s=xyz.x ---- -memo (optimized, ~42KB, required=[presentation: a:1,b:2,c:3,s:7,t:8,u:9,x:12,y:13,z:14]) +memo (optimized, ~43KB, required=[presentation: a:1,b:2,c:3,s:7,t:8,u:9,x:12,y:13,z:14]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (inner-join G5 G6 G7) (inner-join G6 G5 G7) (inner-join G8 G9 G7) (inner-join G9 G8 G7) (merge-join G2 G3 G10 inner-join,+1,+7) (merge-join G3 G2 G10 inner-join,+7,+1) (lookup-join G3 G10 abc@ab,keyCols=[7],outCols=(1-3,7-9,12-14)) (merge-join G5 G6 G10 inner-join,+7,+12) (merge-join G6 G5 G10 inner-join,+12,+7) (lookup-join G6 G10 stu,keyCols=[12],outCols=(1-3,7-9,12-14)) (merge-join G8 G9 G10 inner-join,+7,+12) (lookup-join G8 G10 xyz@xy,keyCols=[7],outCols=(1-3,7-9,12-14)) (merge-join G9 G8 G10 inner-join,+12,+7) │ └── [presentation: a:1,b:2,c:3,s:7,t:8,u:9,x:12,y:13,z:14] │ ├── best: (merge-join G5="[ordering: +7]" G6="[ordering: +(1|12)]" G10 inner-join,+7,+12) @@ -12658,3 +12658,145 @@ index-join t81649 ├── flags: force-index=t81649_col3_col1_key ├── key: (4) └── fd: (2,4)-->(1), (2)==(4), (4)==(2) + +# Regression test for #85353 +exec-ddl +CREATE TABLE t85353 (a INT, b INT) +---- + +exec-ddl +CREATE TABLE u85353 (a INT, b INT, INDEX (a,b) USING HASH, INDEX (b) USING HASH) +---- + +exec-ddl +ALTER TABLE t85353 INJECT STATISTICS +'[ + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 100, + "distinct_count": 10, + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, + {"num_eq": 0, "num_range": 100, "distinct_range": 10, "upper_bound": "10"} + ], + "histo_col_type": "INT" + }, + { + "columns": ["b"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 100, + "distinct_count": 10, + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, + {"num_eq": 0, "num_range": 100, "distinct_range": 10, "upper_bound": "10"} + ], + "histo_col_type": "INT" + } +]' +---- + +exec-ddl +ALTER TABLE u85353 INJECT STATISTICS +'[ + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 100000, + "distinct_count": 10, + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, + {"num_eq": 0, "num_range": 100000, "distinct_range": 10, "upper_bound": "10"} + ], + "histo_col_type": "INT" + }, + { + "columns": ["b"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 100000, + "distinct_count": 10, + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, + {"num_eq": 0, "num_range": 100000, "distinct_range": 10, "upper_bound": "10"} + ], + "histo_col_type": "INT" + }, + { + "columns": ["a","b"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 100000, + "distinct_count": 10 + } +]' +---- + +# The derived equijoin condition between the hash bucket column in +# u85353@u85353_b_idx and a similar hash bucket function expression on t85353.b +# should not reduce join selectivity and cause the following to +# choose lookup join. +opt +EXPLAIN (OPT) SELECT * FROM t85353 INNER JOIN u85353@u85353_b_idx USING (b) WHERE u85353.a < 10; +---- +explain + ├── columns: info:11 + ├── mode: opt + └── project + ├── columns: b:2!null a:1 a:6!null + └── inner-join (merge) + ├── columns: t85353.a:1 t85353.b:2!null u85353.a:6!null u85353.b:7!null + ├── left ordering: +7 + ├── right ordering: +2 + ├── fd: (2)==(7), (7)==(2) + ├── select + │ ├── columns: u85353.a:6!null u85353.b:7 + │ ├── ordering: +7 + │ ├── index-join u85353 + │ │ ├── columns: u85353.a:6 u85353.b:7 + │ │ ├── ordering: +7 + │ │ └── scan u85353@u85353_b_idx + │ │ ├── columns: u85353.b:7 u85353.rowid:8!null + │ │ ├── flags: force-index=u85353_b_idx + │ │ ├── key: (8) + │ │ ├── fd: (8)-->(7) + │ │ └── ordering: +7 + │ └── filters + │ └── u85353.a:6 < 10 [outer=(6), constraints=(/6: (/NULL - /9]; tight)] + ├── sort + │ ├── columns: t85353.a:1 t85353.b:2 + │ ├── ordering: +2 + │ └── scan t85353 + │ └── columns: t85353.a:1 t85353.b:2 + └── filters (true) + +# The derived equijoin condition between the hash bucket column in +# u85353@u85353_a_b_idx and a similar hash bucket function expression on t85353.b +# should not reduce join selectivity and cause the following to +# choose lookup join. +opt +EXPLAIN (OPT) SELECT * FROM t85353 INNER JOIN u85353@u85353_a_b_idx USING (a,b) WHERE u85353.a < 10; +---- +explain + ├── columns: info:11 + ├── mode: opt + └── project + ├── columns: a:1!null b:2!null + └── inner-join (merge) + ├── columns: t85353.a:1!null t85353.b:2!null u85353.a:6!null u85353.b:7!null + ├── left ordering: +6,+7 + ├── right ordering: +1,+2 + ├── fd: (1)==(6), (6)==(1), (2)==(7), (7)==(2) + ├── scan u85353@u85353_a_b_idx + │ ├── columns: u85353.a:6!null u85353.b:7 + │ ├── constraint: /6/7/8: (/NULL - /9] + │ ├── flags: force-index=u85353_a_b_idx + │ └── ordering: +6,+7 + ├── sort + │ ├── columns: t85353.a:1!null t85353.b:2 + │ ├── ordering: +1,+2 + │ └── select + │ ├── columns: t85353.a:1!null t85353.b:2 + │ ├── scan t85353 + │ │ └── columns: t85353.a:1 t85353.b:2 + │ └── filters + │ └── t85353.a:1 < 10 [outer=(1), constraints=(/1: (/NULL - /9]; tight)] + └── filters (true) diff --git a/pkg/sql/opt/xform/testdata/rules/join_order b/pkg/sql/opt/xform/testdata/rules/join_order index d72927dcc547..4d3542a869bc 100644 --- a/pkg/sql/opt/xform/testdata/rules/join_order +++ b/pkg/sql/opt/xform/testdata/rules/join_order @@ -587,7 +587,7 @@ memo (optimized, ~29KB, required=[presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b: memo set=reorder_joins_limit=3 SELECT * FROM bx, cy, dz, abc WHERE x = y AND y = z AND z = a ---- -memo (optimized, ~62KB, required=[presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16]) +memo (optimized, ~63KB, required=[presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (inner-join G5 G6 G7) (inner-join G6 G5 G7) (inner-join G8 G9 G7) (inner-join G9 G8 G7) (inner-join G10 G11 G12) (inner-join G11 G10 G12) (inner-join G13 G14 G12) (inner-join G14 G13 G12) (inner-join G15 G16 G12) (inner-join G16 G15 G12) (inner-join G17 G18 G12) (inner-join G18 G17 G12) (merge-join G3 G2 G19 inner-join,+6,+2) (merge-join G6 G5 G19 inner-join,+10,+6) (merge-join G9 G8 G19 inner-join,+10,+6) (merge-join G11 G10 G19 inner-join,+13,+10) (merge-join G14 G13 G19 inner-join,+13,+10) (merge-join G16 G15 G19 inner-join,+13,+10) (lookup-join G17 G19 abc,keyCols=[10],outCols=(1,2,5,6,9,10,13-16)) (merge-join G18 G17 G19 inner-join,+13,+10) │ └── [presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16] │ ├── best: (inner-join G3 G2 G4)