Skip to content

Commit

Permalink
memo: This commit adds redundantEquivCols to FuncDepSet. This ColSet
Browse files Browse the repository at this point in the history
tracks the columns in FD set equivalences which should not be used
for selectivity estimation.

Release justification: low risk change

Release note: none
  • Loading branch information
Mark Sirek committed Aug 29, 2022
1 parent 4e7e90d commit 2e4741b
Show file tree
Hide file tree
Showing 12 changed files with 201 additions and 59 deletions.
15 changes: 8 additions & 7 deletions pkg/sql/opt/lookupjoin/constraint_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ type Constraint struct {
// in RightSideCols. It will be nil if LookupExpr is non-nil.
KeyCols opt.ColList

// DerivedKeyCols is the set of lookup join key columns which are part of
// synthesized equality constraints based on another equality join condition
// and a computed index key column in the lookup table. Since these key
// DerivedEquivCols is the set of lookup join equijoin columns which are part
// of synthesized equality constraints based on another equality join
// condition and a computed index key column in the lookup table. Since these
// columns are not reducing the selectivity of the join, but are just added to
// facilitate index lookups, they should not be used in determining join
// selectivity.
DerivedKeyCols opt.ColSet
DerivedEquivCols opt.ColSet

// RightSideCols is an ordered list of prefix index columns that are
// constrained by this constraint. It corresponds 1:1 with the columns in
Expand Down Expand Up @@ -185,7 +185,7 @@ func (b *ConstraintBuilder) Build(
numIndexKeyCols := index.LaxKeyColumnCount()

keyCols := make(opt.ColList, 0, numIndexKeyCols)
var derivedKeyCols opt.ColSet
var derivedEquivCols opt.ColSet
rightSideCols := make(opt.ColList, 0, numIndexKeyCols)
var inputProjections memo.ProjectionsExpr
var lookupExpr memo.FiltersExpr
Expand Down Expand Up @@ -262,7 +262,8 @@ func (b *ConstraintBuilder) Build(
projection := b.f.ConstructProjectionsItem(b.f.RemapCols(expr, b.eqColMap), compEqCol)
inputProjections = append(inputProjections, projection)
addEqualityColumns(compEqCol, idxCol)
derivedKeyCols.Add(compEqCol)
derivedEquivCols.Add(compEqCol)
derivedEquivCols.Add(idxCol)
foundEqualityCols = true
foundLookupCols = true
continue
Expand Down Expand Up @@ -374,7 +375,7 @@ func (b *ConstraintBuilder) Build(

c := Constraint{
KeyCols: keyCols,
DerivedKeyCols: derivedKeyCols,
DerivedEquivCols: derivedEquivCols,
RightSideCols: rightSideCols,
LookupExpr: lookupExpr,
InputProjections: inputProjections,
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/opt/memo/logical_props_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2278,14 +2278,14 @@ func (h *joinPropsHelper) init(b *logicalPropsBuilder, joinExpr RelExpr) {
indexColID := join.Table.ColumnID(index.Column(i).Ordinal())
h.filterNotNullCols.Add(colID)
h.filterNotNullCols.Add(indexColID)
if !join.DerivedKeyCols.Contains(colID) {
h.filtersFD.AddEquivalency(colID, indexColID)
}
h.filtersFD.AddEquivalency(colID, indexColID)
if colID == indexColID {
// This can happen if an index join was converted into a lookup join.
h.selfJoinCols.Add(colID)
}
}
// Record equivalency columns which should not contribute to selectivity.
h.filtersFD.AddRedundantEquivCols(join.DerivedEquivCols)

// Lookup join has implicit equality conditions on KeyCols.
h.filterIsTrue = false
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1200,7 +1200,7 @@ func (sb *statisticsBuilder) buildJoin(
rightStats := &h.rightProps.Stats
leftCols := h.leftProps.OutputCols.Copy()
rightCols := h.rightProps.OutputCols.Copy()
equivReps := h.filtersFD.EquivReps()
equivReps := h.filtersFD.EquivRepsForSelectivity()

// Shortcut if there are no ON conditions. Note that for lookup join, there
// are implicit equality conditions on KeyCols.
Expand Down Expand Up @@ -1777,7 +1777,7 @@ func (sb *statisticsBuilder) buildZigzagJoin(
s.Available = sb.availabilityFromInput(zigzag)

leftStats := zigzag.leftProps.Stats
equivReps := h.filtersFD.EquivReps()
equivReps := h.filtersFD.EquivRepsForSelectivity()

// We assume that we only plan zigzag joins in cases where the result set
// will have a row count smaller than or equal to the left/right index
Expand Down Expand Up @@ -3023,7 +3023,7 @@ func (sb *statisticsBuilder) filterRelExpr(
for i := range filters {
equivFD.AddEquivFrom(&filters[i].ScalarProps().FuncDeps)
}
equivReps := equivFD.EquivReps()
equivReps := equivFD.EquivRepsForSelectivity()

// Calculate distinct counts and histograms for constrained columns
// ----------------------------------------------------------------
Expand Down Expand Up @@ -4221,7 +4221,7 @@ func (sb *statisticsBuilder) selectivityFromOredEquivalencies(
var selectivities []props.Selectivity
for i := 0; i < len(filters); i++ {
FD := &filtersFDs[i]
equivReps := FD.EquivReps()
equivReps := FD.EquivRepsForSelectivity()
if semiJoin {
singleSelectivity = sb.selectivityFromEquivalenciesSemiJoin(
equivReps, h.leftProps.OutputCols, h.rightProps.OutputCols, FD, e, s,
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/opt/ops/relational.opt
Original file line number Diff line number Diff line change
Expand Up @@ -374,13 +374,13 @@ define LookupJoinPrivate {
# in all cases.
KeyCols ColList

# DerivedKeyCols is the set of lookup join key columns which are part of
# synthesized equality constraints based on another equality join condition
# and a computed index key column in the lookup table. Since these key
# DerivedEquivCols is the set of lookup join equijoin columns which are part
# of synthesized equality constraints based on another equality join
# condition and a computed index key column in the lookup table. Since these
# columns are not reducing the selectivity of the join, but are just added to
# facilitate index lookups, they should not be used in determining join
# selectivity.
DerivedKeyCols ColSet
DerivedEquivCols ColSet

# LookupExpr represents the part of the join condition used to perform
# the lookup into the index. It should only be set when KeyCols is empty.
Expand Down
74 changes: 74 additions & 0 deletions pkg/sql/opt/props/func_dep.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,11 @@ type FuncDepSet struct {
// This set is immutable; to update it, replace it with a different set
// containing the desired columns.
key opt.ColSet

// redundantEquivCols is the set of columns contained in equivalencies in
// `deps` which are derived from other equivalencies, and are redundant for
// the purposes of selectivity estimation.
redundantEquivCols opt.ColSet
}

type keyType int8
Expand Down Expand Up @@ -549,6 +554,7 @@ func (f *FuncDepSet) CopyFrom(fdset *FuncDepSet) {
f.deps = append(f.deps, fdset.deps...)
f.key = fdset.key
f.hasKey = fdset.hasKey
f.redundantEquivCols = fdset.redundantEquivCols.Copy()
}

// RemapFrom copies the given FD into this FD, remapping column IDs according to
Expand All @@ -567,6 +573,7 @@ func (f *FuncDepSet) RemapFrom(fdset *FuncDepSet, fromCols, toCols opt.ColList)
f.deps[i].to = opt.TranslateColSetStrict(f.deps[i].to, fromCols, toCols)
}
f.key = opt.TranslateColSetStrict(f.key, fromCols, toCols)
f.redundantEquivCols = opt.TranslateColSetStrict(f.redundantEquivCols, fromCols, toCols)
}

// ColsAreStrictKey returns true if the given columns contain a strict key for the
Expand Down Expand Up @@ -1555,6 +1562,46 @@ func (f *FuncDepSet) EquivReps() opt.ColSet {
return reps
}

// EquivRepsForSelectivity returns one "representative" column set from each
// equivalency group in the FD set. ComputeEquivGroup can be called to obtain
// the remaining columns from each equivalency group. Equivalencies whose `from`
// and `to` sets both intersect with the redundant equivalencies column set
// contribute only non-intersecting columns to the returned ColSet. This is
// because the equality predicates corresponding to redundant equivalencies will
// never filter out any additional rows, and so should not have any contribution
// to the estimated selectivity.
func (f *FuncDepSet) EquivRepsForSelectivity() opt.ColSet {
var reps opt.ColSet

for i := 0; i < len(f.deps); i++ {
fd := &f.deps[i]
if fd.equiv {
if !f.redundantEquivCols.Empty() &&
fd.to.Intersects(f.redundantEquivCols) && fd.from.Intersects(f.redundantEquivCols) {
added := false
addReps := func(equivCols opt.ColSet) {
nonRedundantCols := equivCols.Copy()
nonRedundantCols.DifferenceWith(f.redundantEquivCols)
if !nonRedundantCols.Empty() {
added = true
reps.UnionWith(nonRedundantCols)
}
}

if !fd.to.Intersects(reps) {
addReps(fd.from)
}
if !added && !fd.from.Intersects(reps) {
addReps(fd.to)
}
} else if !fd.to.Intersects(reps) {
reps.UnionWith(fd.from)
}
}
}
return reps
}

// ComputeEquivGroup returns the group of columns that are equivalent to the
// given column. See ComputeEquivClosure for more details.
func (f *FuncDepSet) ComputeEquivGroup(rep opt.ColumnID) opt.ColSet {
Expand Down Expand Up @@ -1590,6 +1637,8 @@ func (f *FuncDepSet) ensureKeyClosure(cols opt.ColSet) {
// 7. If FD set has a key, it should be a candidate key (already reduced).
// 8. Closure of key should include all known columns in the FD set.
// 9. If FD set has no key then key columns should be empty.
// 10. If redundant equivalency columns exist, non-redundant equivalency
// columns should exist.
//
func (f *FuncDepSet) Verify() {
for i := range f.deps {
Expand Down Expand Up @@ -1644,6 +1693,15 @@ func (f *FuncDepSet) Verify() {
panic(errors.AssertionFailedf("expected empty key columns since no key: %s", f))
}
}
if !f.redundantEquivCols.Empty() {
nonRedundantEquivCols := f.EquivRepsForSelectivity()
if nonRedundantEquivCols.Empty() {
// There must be at least some non-redundant equivalencies on which the
// redundant equivalencies are based.
f.EquivRepsForSelectivity()
panic(errors.AssertionFailedf("expected non-redundant equivalencies when redundant equivalencies are present"))
}
}
}

// StringOnlyFDs returns a string representation of the FDs (without the key
Expand All @@ -1663,6 +1721,9 @@ func (f FuncDepSet) String() string {
b.WriteString("lax-")
}
fmt.Fprintf(&b, "key%s", f.key)
if !f.redundantEquivCols.Empty() {
fmt.Fprintf(&b, " redundantEquivCols%s", f.redundantEquivCols)
}
if len(f.deps) > 0 {
b.WriteString("; ")
}
Expand Down Expand Up @@ -1990,6 +2051,19 @@ func (f *FuncDepSet) makeEquivMap(from, to opt.ColSet) map[opt.ColumnID]opt.Colu
return equivMap
}

// AddRedundantEquivCols adds to the set of columns involved in redundant
// equivalencies in this FuncDepSet. These columns should not contribute to
// selectivies based on equivalencies.
func (f *FuncDepSet) AddRedundantEquivCols(cols opt.ColSet) {
f.redundantEquivCols.UnionWith(cols)
}

// ClearRedundantEquivCols resets the set of columns involved in redundant
// equivalencies in this FuncDepSet.
func (f *FuncDepSet) ClearRedundantEquivCols() {
f.redundantEquivCols = opt.ColSet{}
}

// isConstant returns true if this FD contains the set of constant columns. If
// it exists, it must always be the first FD in the set.
func (f *funcDep) isConstant() bool {
Expand Down
67 changes: 67 additions & 0 deletions pkg/sql/opt/props/func_dep_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,73 @@ func TestFuncDeps_EquivReps(t *testing.T) {
}
}

func TestFuncDeps_EquivRepsForSelectivity(t *testing.T) {
// (a)==(b,d)
// (b)==(a,c)
// (c)==(b)
// (a)~~>(e)
// (a)-->(f)
fd1 := &props.FuncDepSet{}
// This isn't intended to create a real lax key; just a lax dependency.
fd1.AddLaxKey(c(1), c(1, 5))
fd1.AddSynthesizedCol(c(1), 6)
fd1.AddEquivalency(1, 2)
fd1.AddEquivalency(2, 3)
fd1RedundEquivalency := c(1, 2)
fd1.AddRedundantEquivCols(fd1RedundEquivalency)
verifyFD(t, fd1, "lax-key(1) redundantEquivCols(1,2); (1)~~>(5), (1)-->(6), (1)==(2,3), (2)==(1,3), (3)==(1,2)")

// (a)==(b,d)
// (b)==(a,c)
// (c)==(b)
// (d)==(a)
// (a)~~>(e)
// (a)-->(f)
fd2 := &props.FuncDepSet{}
fd2.CopyFrom(fd1)
fd2.AddEquivalency(1, 4)
fd2.ClearRedundantEquivCols()
fd2RedundEquivalency := c(1, 3, 4)
fd2.AddRedundantEquivCols(fd2RedundEquivalency)
verifyFD(t, fd2, "lax-key(1) redundantEquivCols(1,3,4); (1)~~>(5), (1)-->(6), (1)==(2-4), (2)==(1,3,4), (3)==(1,2,4), (4)==(1-3)")

// (a)==(b,d)
// (b)==(a,c)
// (c)==(b)
// (d)==(e)
// (a)~~>(e)
// (a)-->(f)
fd3 := &props.FuncDepSet{}
fd3.CopyFrom(fd1)
fd3.AddEquivalency(4, 5)
fd3.ClearRedundantEquivCols()
fd3RedundEquivalency := c(1, 2, 3)
fd3.AddRedundantEquivCols(fd3RedundEquivalency)
verifyFD(t, fd3, "lax-key(1) redundantEquivCols(1-3); (1)~~>(5), (1)-->(6), (1)==(2,3), (2)==(1,3), (3)==(1,2), (4)==(5), (5)==(4)")

// Test cases that should exclude redundant equivalencies.
testcases := []struct {
fd *props.FuncDepSet
expected opt.ColSet
}{
{fd: fd1, expected: c(3)},
{fd: fd2, expected: c(2)},
{fd: fd3, expected: c(4)},
}

for _, tc := range testcases {
closureForSelectivity := tc.fd.EquivRepsForSelectivity()
if !closureForSelectivity.Equals(tc.expected) {
t.Errorf("fd: %s, expected: %s, actual: %s", tc.fd, tc.expected, closureForSelectivity)
}
}

fd3.AddRedundantEquivCols(c(4, 5))
require.Panics(t, func() {
verifyFD(t, fd3, "lax-key(1) redundantEquivCols(1-5); (1)~~>(5), (1)-->(6), (1)==(2,3), (2)==(1,3), (3)==(1,2), (4)==(5), (5)==(4)")
})
}

func TestFuncDeps_AddStrictKey(t *testing.T) {
// CREATE TABLE mnpq (m INT, n INT, p INT, q INT, PRIMARY KEY (m, n))
// SELECT DISTINCT ON (p) m, n, p, q FROM mnpq
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ func (c *CustomFuncs) generateLookupJoinsImpl(
lookupJoin.Index = index.Ordinal()
lookupJoin.Locking = scanPrivate.Locking
lookupJoin.KeyCols = lookupConstraint.KeyCols
lookupJoin.DerivedKeyCols = lookupConstraint.DerivedKeyCols
lookupJoin.DerivedEquivCols = lookupConstraint.DerivedEquivCols
lookupJoin.LookupExpr = lookupConstraint.LookupExpr
lookupJoin.On = lookupConstraint.RemainingFilters
lookupJoin.ConstFilters = lookupConstraint.ConstFilters
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/opt/xform/testdata/physprops/ordering
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ project
memo
SELECT y, x-1 AS z FROM a WHERE x>y ORDER BY x, y DESC
----
memo (optimized, ~6KB, required=[presentation: y:2,z:7] [ordering: +1,-2])
memo (optimized, ~7KB, required=[presentation: y:2,z:7] [ordering: +1,-2])
├── G1: (project G2 G3 x y)
│ ├── [presentation: y:2,z:7] [ordering: +1,-2]
│ │ ├── best: (project G2="[ordering: +1,-2]" G3 x y)
Expand Down Expand Up @@ -666,7 +666,7 @@ explain
memo
EXPLAIN (VERBOSE) SELECT * FROM a ORDER BY y
----
memo (optimized, ~3KB, required=[presentation: info:7])
memo (optimized, ~4KB, required=[presentation: info:7])
├── G1: (explain G2 [presentation: x:1,y:2,z:3,s:4] [ordering: +2])
│ └── [presentation: info:7]
│ ├── best: (explain G2="[presentation: x:1,y:2,z:3,s:4] [ordering: +2]" [presentation: x:1,y:2,z:3,s:4] [ordering: +2])
Expand Down Expand Up @@ -725,7 +725,7 @@ memo (optimized, ~6KB, required=[presentation: y:2] [ordering: +8])
memo
SELECT y FROM a WITH ORDINALITY ORDER BY ordinality, x
----
memo (optimized, ~7KB, required=[presentation: y:2] [ordering: +7])
memo (optimized, ~8KB, required=[presentation: y:2] [ordering: +7])
├── G1: (ordinality G2)
│ ├── [presentation: y:2] [ordering: +7]
│ │ ├── best: (ordinality G2)
Expand Down
Loading

0 comments on commit 2e4741b

Please sign in to comment.