Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xform: ignore derived hash bucket lookup join cols for selectivity estimate #86622

Merged
merged 1 commit into from
Sep 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1136,40 +1136,47 @@ vectorized: true
│ render column3: column3
│ render crdb_internal_email_shard_16_comp: crdb_internal_email_shard_16_comp
└── • lookup join (anti)
└── • project
│ columns: (column1, column2, column3, crdb_internal_email_shard_16_comp)
│ estimated row count: 0 (missing stats)
│ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey
│ equality cols are key
│ lookup condition: (part IN ('new york', 'seattle')) AND (column1 = id)
└── • project
│ columns: (column1, column2, column3, crdb_internal_email_shard_16_comp)
└── • lookup join (anti)
│ columns: (crdb_internal_email_shard_16_eq, column1, column2, column3, crdb_internal_email_shard_16_comp)
│ estimated row count: 0 (missing stats)
│ table: t_unique_hash_sec_key@idx_uniq_hash_email
│ equality cols are key
│ lookup condition: ((part IN ('new york', 'seattle')) AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email)
└── • lookup join (anti)
│ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, column1, column2, column3)
│ estimated row count: 0 (missing stats)
│ table: t_unique_hash_sec_key@idx_uniq_hash_email
│ equality cols are key
│ lookup condition: ((part IN ('new york', 'seattle')) AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email)
└── • render
│ columns: (crdb_internal_email_shard_16_eq, column1, column2, column3, crdb_internal_email_shard_16_comp)
│ render crdb_internal_email_shard_16_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16)
│ render column1: column1
│ render column2: column2
│ render column3: column3
│ render crdb_internal_email_shard_16_comp: crdb_internal_email_shard_16_comp
└── • render
│ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, column1, column2, column3)
│ render crdb_internal_email_shard_16_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16)
│ render crdb_internal_email_shard_16_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16)
│ render column1: column1
│ render column2: column2
│ render column3: column3
└── • lookup join (anti)
│ columns: (crdb_internal_email_shard_16_comp, column1, column2, column3)
│ estimated row count: 0 (missing stats)
│ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey
│ equality cols are key
│ lookup condition: (part IN ('new york', 'seattle')) AND (column1 = id)
└── • values
columns: (column1, column2, column3)
size: 3 columns, 2 rows
row 0, expr 0: 4321
row 0, expr 1: 'some_email'
row 0, expr 2: 'seattle'
row 1, expr 0: 8765
row 1, expr 1: 'another_email'
row 1, expr 2: 'new york'
└── • render
│ columns: (crdb_internal_email_shard_16_comp, column1, column2, column3)
│ render crdb_internal_email_shard_16_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16)
│ render column1: column1
│ render column2: column2
│ render column3: column3
└── • values
columns: (column1, column2, column3)
size: 3 columns, 2 rows
row 0, expr 0: 4321
row 0, expr 1: 'some_email'
row 0, expr 2: 'seattle'
row 1, expr 0: 8765
row 1, expr 1: 'another_email'
row 1, expr 2: 'new york'

query T
EXPLAIN (VERBOSE) INSERT INTO t_unique_hash_sec_key (id, email, part) VALUES (4321, 'some_email', 'seattle') ON CONFLICT (email) DO NOTHING;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1224,45 +1224,52 @@ vectorized: true
│ render crdb_region_default: crdb_region_default
│ render crdb_internal_email_shard_16_comp: crdb_internal_email_shard_16_comp
└── • lookup join (anti)
└── • project
│ columns: (column1, column2, crdb_region_default, crdb_internal_email_shard_16_comp)
│ estimated row count: 0 (missing stats)
│ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey
│ equality cols are key
│ lookup condition: (crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1')) AND (column1 = id)
└── • project
│ columns: (column1, column2, crdb_region_default, crdb_internal_email_shard_16_comp)
└── • lookup join (anti)
│ columns: (crdb_internal_email_shard_16_eq, column1, column2, crdb_region_default, crdb_internal_email_shard_16_comp)
│ estimated row count: 0 (missing stats)
│ table: t_unique_hash_sec_key@idx_uniq_hash_email
│ equality cols are key
│ lookup condition: ((crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1')) AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email)
└── • lookup join (anti)
│ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2)
│ estimated row count: 0 (missing stats)
│ table: t_unique_hash_sec_key@idx_uniq_hash_email
│ equality cols are key
│ lookup condition: ((crdb_region IN ('ca-central-1', 'us-east-1')) AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email)
└── • render
│ columns: (crdb_internal_email_shard_16_eq, column1, column2, crdb_region_default, crdb_internal_email_shard_16_comp)
│ render crdb_internal_email_shard_16_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16)
│ render column1: column1
│ render column2: column2
│ render crdb_region_default: crdb_region_default
│ render crdb_internal_email_shard_16_comp: crdb_internal_email_shard_16_comp
└── • lookup join (anti)
│ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2)
│ estimated row count: 1 (missing stats)
│ table: t_unique_hash_sec_key@idx_uniq_hash_email
│ columns: (crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2)
│ estimated row count: 0 (missing stats)
│ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey
│ equality cols are key
│ lookup condition: ((crdb_region = 'ap-southeast-2') AND (crdb_internal_email_shard_16_eq = crdb_internal_email_shard_16)) AND (column2 = email)
│ lookup condition: (crdb_region IN ('ca-central-1', 'us-east-1')) AND (column1 = id)
└── • render
│ columns: (crdb_internal_email_shard_16_eq, crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2)
│ render crdb_internal_email_shard_16_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16)
│ render crdb_internal_email_shard_16_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16)
│ render crdb_region_default: 'ap-southeast-2'
│ render column1: column1
│ render column2: column2
└── • lookup join (anti)
│ columns: (crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2)
│ estimated row count: 1 (missing stats)
│ table: t_unique_hash_sec_key@t_unique_hash_sec_key_pkey
│ equality cols are key
│ lookup condition: (crdb_region = 'ap-southeast-2') AND (column1 = id)
└── • values
columns: (column1, column2)
size: 2 columns, 2 rows
row 0, expr 0: 4321
row 0, expr 1: 'some_email'
row 1, expr 0: 8765
row 1, expr 1: 'another_email'
└── • render
│ columns: (crdb_internal_email_shard_16_comp, crdb_region_default, column1, column2)
│ render crdb_internal_email_shard_16_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 16)
│ render crdb_region_default: 'ap-southeast-2'
│ render column1: column1
│ render column2: column2
└── • values
columns: (column1, column2)
size: 2 columns, 2 rows
row 0, expr 0: 4321
row 0, expr 1: 'some_email'
row 1, expr 0: 8765
row 1, expr 1: 'another_email'

query T
EXPLAIN (VERBOSE) INSERT INTO t_unique_hash_sec_key (id, email) VALUES (4321, 'some_email') ON CONFLICT (email) DO NOTHING;
Expand Down
58 changes: 32 additions & 26 deletions pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index
Original file line number Diff line number Diff line change
Expand Up @@ -1189,37 +1189,43 @@ vectorized: true
│ render column2: column2
│ render crdb_internal_b_shard_8_comp: crdb_internal_b_shard_8_comp
└── • lookup join (anti)
└── • project
│ columns: (column1, column2, crdb_internal_b_shard_8_comp)
│ estimated row count: 0 (missing stats)
│ table: t_hash_indexed@t_hash_indexed_pkey
│ equality: (column1) = (a)
│ equality cols are key
└── • project
│ columns: (column1, column2, crdb_internal_b_shard_8_comp)
└── • lookup join (anti)
│ columns: (crdb_internal_b_shard_8_eq, column1, column2, crdb_internal_b_shard_8_comp)
│ estimated row count: 0 (missing stats)
│ table: t_hash_indexed@idx_t_hash_indexed
│ equality: (crdb_internal_b_shard_8_eq, column2) = (crdb_internal_b_shard_8,b)
│ equality cols are key
└── • lookup join (anti)
│ columns: (crdb_internal_b_shard_8_eq, crdb_internal_b_shard_8_comp, column1, column2)
estimated row count: 0 (missing stats)
table: t_hash_indexed@idx_t_hash_indexed
equality: (crdb_internal_b_shard_8_eq, column2) = (crdb_internal_b_shard_8,b)
equality cols are key
└── • render
│ columns: (crdb_internal_b_shard_8_eq, column1, column2, crdb_internal_b_shard_8_comp)
render crdb_internal_b_shard_8_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 8)
render column1: column1
render column2: column2
render crdb_internal_b_shard_8_comp: crdb_internal_b_shard_8_comp
└── • render
│ columns: (crdb_internal_b_shard_8_eq, crdb_internal_b_shard_8_comp, column1, column2)
render crdb_internal_b_shard_8_eq: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 8)
render crdb_internal_b_shard_8_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 8)
render column1: column1
render column2: column2
└── • lookup join (anti)
│ columns: (crdb_internal_b_shard_8_comp, column1, column2)
estimated row count: 0 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
equality: (column1) = (a)
equality cols are key
└── • values
columns: (column1, column2)
size: 2 columns, 2 rows
row 0, expr 0: 111
row 0, expr 1: 222
row 1, expr 0: 333
row 1, expr 1: 444
└── • render
│ columns: (crdb_internal_b_shard_8_comp, column1, column2)
│ render crdb_internal_b_shard_8_comp: mod(fnv32(crdb_internal.datums_to_bytes(column2)), 8)
│ render column1: column1
│ render column2: column2
└── • values
columns: (column1, column2)
size: 2 columns, 2 rows
row 0, expr 0: 111
row 0, expr 1: 222
row 1, expr 0: 333
row 1, expr 1: 444

query T
EXPLAIN (VERBOSE) INSERT INTO t_hash_indexed VALUES (4321, 8765) ON CONFLICT (b) DO NOTHING
Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/opt/lookupjoin/constraint_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ type Constraint struct {
// in RightSideCols. It will be nil if LookupExpr is non-nil.
KeyCols opt.ColList

// DerivedEquivCols is the set of lookup join equijoin columns which are part
// of synthesized equality constraints based on another equality join
// condition and a computed index key column in the lookup table. Since these
// columns are not reducing the selectivity of the join, but are just added to
// facilitate index lookups, they should not be used in determining join
// selectivity.
DerivedEquivCols opt.ColSet

// RightSideCols is an ordered list of prefix index columns that are
// constrained by this constraint. It corresponds 1:1 with the columns in
// KeyCols if KeyCols is non-nil. Otherwise, it includes the prefix of index
Expand Down Expand Up @@ -177,6 +185,7 @@ func (b *ConstraintBuilder) Build(
numIndexKeyCols := index.LaxKeyColumnCount()

keyCols := make(opt.ColList, 0, numIndexKeyCols)
var derivedEquivCols opt.ColSet
rightSideCols := make(opt.ColList, 0, numIndexKeyCols)
var inputProjections memo.ProjectionsExpr
var lookupExpr memo.FiltersExpr
Expand Down Expand Up @@ -253,6 +262,8 @@ func (b *ConstraintBuilder) Build(
projection := b.f.ConstructProjectionsItem(b.f.RemapCols(expr, b.eqColMap), compEqCol)
inputProjections = append(inputProjections, projection)
addEqualityColumns(compEqCol, idxCol)
derivedEquivCols.Add(compEqCol)
derivedEquivCols.Add(idxCol)
foundEqualityCols = true
foundLookupCols = true
continue
Expand Down Expand Up @@ -364,6 +375,7 @@ func (b *ConstraintBuilder) Build(

c := Constraint{
KeyCols: keyCols,
DerivedEquivCols: derivedEquivCols,
RightSideCols: rightSideCols,
LookupExpr: lookupExpr,
InputProjections: inputProjections,
Expand Down
9 changes: 9 additions & 0 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -4327,10 +4327,19 @@ func addEqExprConjuncts(
func (sb *statisticsBuilder) selectivityFromEquivalency(
equivGroup opt.ColSet, e RelExpr, s *props.Statistics,
) (selectivity props.Selectivity) {
var derivedEquivCols opt.ColSet
if lookupJoinExpr, ok := e.(*LookupJoinExpr); ok {
derivedEquivCols = lookupJoinExpr.DerivedEquivCols
}
// Find the maximum input distinct count for all columns in this equivalency
// group.
maxDistinctCount := float64(0)
equivGroup.ForEach(func(i opt.ColumnID) {
if derivedEquivCols.Contains(i) {
// Don't apply selectivity from derived equivalencies internally
// manufactured by lookup join solely to facilitate index lookups.
return
}
// If any of the distinct counts were updated by the filter, we want to use
// the updated value.
colSet := opt.MakeColSet(i)
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/opt/ops/relational.opt
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,14 @@ define LookupJoinPrivate {
# in all cases.
KeyCols ColList

# DerivedEquivCols is the set of lookup join equijoin columns which are part
# of synthesized equality constraints based on another equality join
# condition and a computed index key column in the lookup table. Since these
# columns are not reducing the selectivity of the join, but are just added to
# facilitate index lookups, they should not be used in determining join
# selectivity.
DerivedEquivCols ColSet

# LookupExpr represents the part of the join condition used to perform
# the lookup into the index. It should only be set when KeyCols is empty.
# LookupExpr is used instead of KeyCols when the lookup condition is
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ func (c *CustomFuncs) generateLookupJoinsImpl(
lookupJoin.Index = index.Ordinal()
lookupJoin.Locking = scanPrivate.Locking
lookupJoin.KeyCols = lookupConstraint.KeyCols
lookupJoin.DerivedEquivCols = lookupConstraint.DerivedEquivCols
lookupJoin.LookupExpr = lookupConstraint.LookupExpr
lookupJoin.On = lookupConstraint.RemainingFilters
lookupJoin.ConstFilters = lookupConstraint.ConstFilters
Expand Down
Loading