Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-20.2: opt: generate lookup joins on partial indexes #54362

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/partial_index
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,64 @@ DROP INDEX i2
statement ok
DELETE from u

# Test partial indexes with lookup joins.
subtest join

statement ok
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false;
CREATE TABLE join_small (m INT, n INT);
CREATE TABLE join_large (i INT, s STRING, INDEX (i) WHERE s IN ('foo', 'bar', 'baz'));
ALTER TABLE join_small INJECT STATISTICS '[
{
"columns": ["m"],
"created_at": "2019-02-08 04:10:40.001179+00:00",
"row_count": 20,
"distinct_count": 20
}
]';
ALTER TABLE join_large INJECT STATISTICS '[
{
"columns": ["i"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 10000,
"distinct_count": 10000
},
{
"columns": ["s"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 10000,
"distinct_count": 50
}
]';
INSERT INTO join_small VALUES (1, 1), (2, 2), (3, 3);
INSERT INTO join_large VALUES (1, 'foo'), (2, 'not'), (3, 'bar'), (4, 'not');

query I rowsort
SELECT m FROM join_small JOIN join_large ON n = i AND s IN ('foo', 'bar', 'baz')
----
1
3

query I rowsort
SELECT m FROM join_small JOIN join_large ON n = i AND s = 'foo'
----
1

# A lookup semi-join is used when an expression in the semi-join filter exactly
# matches the partial index predicate.
query I rowsort
SELECT m FROM join_small WHERE EXISTS (SELECT 1 FROM join_large WHERE n = i AND s IN ('foo', 'bar', 'baz'))
----
1
3

# A lookup anti-join is used when an expression in the anti-join filter exactly
# matches the partial index predicate.
query I rowsort
SELECT m FROM join_small WHERE NOT EXISTS (SELECT 1 FROM join_large WHERE n = i AND s IN ('foo', 'bar', 'baz'))
----
2

# Test partial indexes with an ENUM in the predicate.
subtest enum

Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/opt/memo/check_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,18 @@ func (m *Memo) CheckExpr(e opt.Expr) {
if t.Cols.SubsetOf(t.Input.Relational().OutputCols) {
panic(errors.AssertionFailedf("lookup join with no lookup columns"))
}
var requiredCols opt.ColSet
requiredCols.UnionWith(t.Relational().OutputCols)
requiredCols.UnionWith(t.ConstFilters.OuterCols(m))
requiredCols.UnionWith(t.On.OuterCols(m))
requiredCols.UnionWith(t.KeyCols.ToSet())
idx := m.Metadata().Table(t.Table).Index(t.Index)
for i := range t.KeyCols {
requiredCols.Add(t.Table.ColumnID(idx.Column(i).Ordinal()))
}
if !t.Cols.SubsetOf(requiredCols) {
panic(errors.AssertionFailedf("lookup join with columns that are not required"))
}

case *InsertExpr:
tab := m.Metadata().Table(t.Table)
Expand Down
16 changes: 10 additions & 6 deletions pkg/sql/opt/memo/expr_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,12 +446,12 @@ func (f *ExprFmtCtx) formatRelational(e RelExpr, tp treeprinter.Node) {
if !t.Flags.Empty() {
tp.Childf("flags: %s", t.Flags.String())
}
idxCols := make(opt.ColList, len(t.KeyCols))
idx := md.Table(t.Table).Index(t.Index)
for i := range idxCols {
idxCols[i] = t.Table.ColumnID(idx.Column(i).Ordinal())
}
if !f.HasFlags(ExprFmtHideColumns) {
idxCols := make(opt.ColList, len(t.KeyCols))
idx := md.Table(t.Table).Index(t.Index)
for i := range idxCols {
idxCols[i] = t.Table.ColumnID(idx.Column(i).Ordinal())
}
tp.Childf("key columns: %v = %v", t.KeyCols, idxCols)
}
if t.LookupColsAreTableKey {
Expand Down Expand Up @@ -1286,7 +1286,11 @@ func FormatPrivate(f *ExprFmtCtx, private interface{}, physProps *physical.Requi
if t.Index == cat.PrimaryIndex {
fmt.Fprintf(f.Buffer, " %s", tab.Name())
} else {
fmt.Fprintf(f.Buffer, " %s@%s", tab.Name(), tab.Index(t.Index).Name())
partialStr := ""
if _, isPartial := tab.Index(t.Index).Predicate(); isPartial {
partialStr = ",partial"
}
fmt.Fprintf(f.Buffer, " %s@%s%s", tab.Name(), tab.Index(t.Index).Name(), partialStr)
}

case *InvertedJoinPrivate:
Expand Down
16 changes: 8 additions & 8 deletions pkg/sql/opt/memo/testdata/stats/join
Original file line number Diff line number Diff line change
Expand Up @@ -1368,15 +1368,15 @@ inner-join (lookup def)
expr format=show-all colstat=6 colstat=7 colstat=(6, 7) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3)
(MakeLookupJoin
(Scan [ (Table "abc") (Cols "a,b,c") ])
[ (JoinType "semi-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e,f") ]
[ (JoinType "semi-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e") ]
[ ]
)
----
semi-join (lookup def)
├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int)
├── key columns: [1 2] = [5 6]
├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(6)=1, null(6)=0, distinct(7)=1, null(7)=0, distinct(6,7)=1, null(6,7)=0, distinct(1-3)=100, null(1-3)=0]
├── cost: 2110.0507
├── cost: 2110.0506
├── key: (1,2)
├── fd: (1,2)-->(3)
├── interesting orderings: (+1,+2)
Expand All @@ -1393,15 +1393,15 @@ semi-join (lookup def)
expr format=show-all colstat=6 colstat=7 colstat=(6, 7) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3)
(MakeLookupJoin
(Scan [ (Table "abc") (Cols "a,b,c") ])
[ (JoinType "anti-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e,f") ]
[ (JoinType "anti-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e") ]
[ ]
)
----
anti-join (lookup def)
├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int)
├── key columns: [1 2] = [5 6]
├── stats: [rows=1e-10, distinct(1)=1e-10, null(1)=0, distinct(2)=1e-10, null(2)=0, distinct(3)=1e-10, null(3)=1e-10, distinct(6)=1e-10, null(6)=0, distinct(7)=1e-10, null(7)=0, distinct(6,7)=1e-10, null(6,7)=0, distinct(1-3)=1e-10, null(1-3)=0]
├── cost: 2110.0507
├── cost: 2110.0506
├── key: (1,2)
├── fd: (1,2)-->(3)
├── interesting orderings: (+1,+2)
Expand All @@ -1418,15 +1418,15 @@ anti-join (lookup def)
expr format=show-all colstat=6 colstat=7 colstat=(6, 7) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3)
(MakeLookupJoin
(Scan [ (Table "abc") (Cols "a,b,c") ])
[ (JoinType "semi-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e,f") ]
[ (JoinType "semi-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e") ]
[ (False) ]
)
----
semi-join (lookup def)
├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int)
├── key columns: [1 2] = [5 6]
├── stats: [rows=0, distinct(1)=0, null(1)=0, distinct(2)=0, null(2)=0, distinct(3)=0, null(3)=0, distinct(6)=0, null(6)=0, distinct(7)=0, null(7)=0, distinct(6,7)=0, null(6,7)=0, distinct(1-3)=0, null(1-3)=0]
├── cost: 2110.0607
├── cost: 2110.0606
├── key: (1,2)
├── fd: (1,2)-->(3)
├── interesting orderings: (+1,+2)
Expand All @@ -1444,15 +1444,15 @@ semi-join (lookup def)
expr format=show-all colstat=6 colstat=7 colstat=(6, 7) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3)
(MakeLookupJoin
(Scan [ (Table "abc") (Cols "a,b,c") ])
[ (JoinType "anti-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e,f") ]
[ (JoinType "anti-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e") ]
[ (False) ]
)
----
anti-join (lookup def)
├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int)
├── key columns: [1 2] = [5 6]
├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(6)=1, null(6)=0, distinct(7)=1, null(7)=0, distinct(6,7)=1, null(6,7)=0, distinct(1-3)=100, null(1-3)=0]
├── cost: 2110.0607
├── cost: 2110.0606
├── key: (1,2)
├── fd: (1,2)-->(3)
├── interesting orderings: (+1,+2)
Expand Down
68 changes: 59 additions & 9 deletions pkg/sql/opt/xform/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1682,8 +1682,8 @@ func (c *CustomFuncs) GenerateMergeJoins(
// Input
//
// For example:
// CREATE TABLE abc (a PRIMARY KEY, b INT, c INT)
// CREATE TABLE xyz (x PRIMARY KEY, y INT, z INT, INDEX (y))
// CREATE TABLE abc (a INT PRIMARY KEY, b INT, c INT)
// CREATE TABLE xyz (x INT PRIMARY KEY, y INT, z INT, INDEX (y))
// SELECT * FROM abc JOIN xyz ON a=y
//
// We want to first join abc with the index on y (which provides columns y, x)
Expand All @@ -1707,6 +1707,7 @@ func (c *CustomFuncs) GenerateLookupJoins(
return
}
md := c.e.mem.Metadata()
tabMeta := md.TableMeta(scanPrivate.Table)
inputProps := input.Relational()

leftEq, rightEq := memo.ExtractJoinEqualityColumns(inputProps.OutputCols, scanPrivate.Cols, on)
Expand All @@ -1717,10 +1718,24 @@ func (c *CustomFuncs) GenerateLookupJoins(

var pkCols opt.ColList

// TODO(mgartner): Use partial indexes for lookup joins when the predicate
// is implied by the on filter.
iter := makeScanIndexIter(c.e.mem, scanPrivate, rejectInvertedIndexes|rejectPartialIndexes)
iter := makeScanIndexIter(c.e.mem, scanPrivate, rejectInvertedIndexes)
for iter.Next() {
onFilters := on

// If the secondary index is a partial index, it must be implied by the
// ON filters.
_, isPartialIndex := md.Table(scanPrivate.Table).Index(iter.IndexOrdinal()).Predicate()
if isPartialIndex {
pred := memo.PartialIndexPredicate(tabMeta, iter.IndexOrdinal())
remainingFilters, ok := c.im.FiltersImplyPredicate(onFilters, pred)
if !ok {
// The ON filters do not imply the predicate, so the partial
// index cannot be used.
continue
}
onFilters = remainingFilters
}

// Find the longest prefix of index key columns that are constrained by
// an equality with another column or a constant.
numIndexKeyCols := iter.Index().LaxKeyColumnCount()
Expand All @@ -1734,7 +1749,7 @@ func (c *CustomFuncs) GenerateLookupJoins(
// in most cases.
firstIdxCol := scanPrivate.Table.IndexColumnID(iter.Index(), 0)
if _, ok := rightEq.Find(firstIdxCol); !ok {
if _, _, ok := c.findConstantFilter(on, firstIdxCol); !ok {
if _, _, ok := c.findConstantFilter(onFilters, firstIdxCol); !ok {
continue
}
}
Expand Down Expand Up @@ -1763,7 +1778,7 @@ func (c *CustomFuncs) GenerateLookupJoins(
// value. We cannot use a NULL value because the lookup join implements
// logic equivalent to simple equality between columns (where NULL never
// equals anything).
foundVal, onIdx, ok := c.findConstantFilter(on, idxCol)
foundVal, onIdx, ok := c.findConstantFilter(onFilters, idxCol)
if !ok || foundVal == tree.DNull {
break
}
Expand All @@ -1788,7 +1803,7 @@ func (c *CustomFuncs) GenerateLookupJoins(
needProjection = true
lookupJoin.KeyCols = append(lookupJoin.KeyCols, constColID)
rightSideCols = append(rightSideCols, idxCol)
constFilters = append(constFilters, on[onIdx])
constFilters = append(constFilters, onFilters[onIdx])
}

if len(lookupJoin.KeyCols) == 0 {
Expand All @@ -1807,7 +1822,7 @@ func (c *CustomFuncs) GenerateLookupJoins(
}

// Remove the redundant filters and update the lookup condition.
lookupJoin.On = memo.ExtractRemainingJoinFilters(on, lookupJoin.KeyCols, rightSideCols)
lookupJoin.On = memo.ExtractRemainingJoinFilters(onFilters, lookupJoin.KeyCols, rightSideCols)
lookupJoin.On.RemoveCommonFilters(constFilters)
lookupJoin.ConstFilters = constFilters

Expand All @@ -1818,6 +1833,41 @@ func (c *CustomFuncs) GenerateLookupJoins(
continue
}

if isPartialIndex && (joinType == opt.SemiJoinOp || joinType == opt.AntiJoinOp) {
// Typically, the index must cover all columns in the scanPrivate in
// order to generate a lookup join without an additional index join
// (case 1, see function comment). However, if the index is a
// partial index, the filters remaining after proving
// filter-predicate implication may no longer reference some
// columns. A lookup semi- or anti-join can be generated if the
// columns in the new filters from the right side of the join are
// covered by the index. Consider the example:
//
// CREATE TABLE a (a INT)
// CREATE TABLE xy (x INT, y INT, INDEX (x) WHERE y > 0)
//
// SELECT a FROM a WHERE EXISTS (SELECT 1 FROM xyz WHERE a = x AND y > 0)
//
// The original ON filters of the semi-join are (a = x AND y > 0).
// The (y > 0) expression in the filter is an exact match to the
// partial index predicate, so the remaining ON filters are (a = x).
// Column y is no longer referenced, so a lookup semi-join can be
// created despite the partial index not covering y.
//
// Note that this is a special case that only works for semi- and
// anti-joins because they never include columns from the right side
// in their output columns. Other joins include columns from the
// right side in their output columns, so even if the ON filters no
// longer reference an un-covered column, they must be fetched (case
// 2, see function comment).
filterColsFromRight := scanPrivate.Cols.Intersection(onFilters.OuterCols(c.e.mem))
if filterColsFromRight.SubsetOf(iter.IndexColumns()) {
lookupJoin.Cols = filterColsFromRight.Union(inputProps.OutputCols)
c.e.mem.AddLookupJoinToGroup(&lookupJoin, grp)
continue
}
}

// All code that follows is for case 2 (see function comment).

if scanPrivate.Flags.NoIndexJoin {
Expand Down
Loading