Skip to content

Commit

Permalink
Merge #57690
Browse files Browse the repository at this point in the history
57690: opt: generate lookup joins on partitioned indexes r=mgartner a=mgartner

#### xform: add rule expectations for GenerateLookupJoins tests

Release note: None

#### opt: generate lookup join for column constrained to multiple constants

Previously, the optimizer could create lookup join keys from filters
that constrain a column to a single constant value. This was done by
wrapping the join input in a Project that projected the constant value,
and using this new column as a key column.

This commit generalizes this behavior so that lookup join keys can also
be created from filters that constrain a column to multiple, non-ranging
constant values. The constant values are cross-joined with the input,
and the joined column is used as a key column. If a column is
constrained to a single constant value, the cross join normalizes to a
Project identical to the Projects constructed prior to this commit.

Release note (performance improvement): The query optimizer can use
filters that constrained columns to multiple constant values to generate
lookup joins. For example, a join filter `x.a = y.a AND y.b IN (1, 2)`
can be used to generate a lookup join on table `y` assuming that it has
an index on `(a, b)` or `(b, a)`.

#### opt: generate lookup joins with CHECK constraints and computed columns

Previously, only explicit filters were used to generated lookup join key
columns. Now lookup join keys can be generated from CHECK constraints
and computed column expressions.

With this commit and the previous commit, lookup joins on partitioned
indexes are explored by the optimizer.

Release note (performance improvement): The query optimizer now explores
plans with lookup joins on partitioned indexes, resulting in more
efficient query plans in some cases.

#### xform: do not propagate join hints to GenerateLookupJoins cross joins

This commit fixes a bug that prevented a `LOOKUP` join hint from
producing a plan with a lookup join. Previously, the hint was propagated
to the synthesized cross join created as input to the lookup join. This
artificially inflated the cost of the cross join, making the lookup join
too costly to be selected as the optimal plan.

Release note: None


Co-authored-by: Marcus Gartner <[email protected]>
  • Loading branch information
craig[bot] and mgartner committed Dec 10, 2020
2 parents f298478 + 4f82ebc commit c41a2ed
Show file tree
Hide file tree
Showing 15 changed files with 610 additions and 263 deletions.
20 changes: 13 additions & 7 deletions pkg/sql/opt/constraint/constraint_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,20 +312,26 @@ func (s *Set) ExtractValueForConstCol(evalCtx *tree.EvalContext, col opt.ColumnI
return nil
}

// IsSingleColumnConstValue returns true if the Set contains a single constraint
// on a single column which allows for a single constant value. On success,
// returns the column and the constant value.
func (s *Set) IsSingleColumnConstValue(
// HasSingleColumnConstValues returns true if the Set contains a single
// constraint on a single column which allows for one or more non-ranging
// constant values. On success, returns the column and the constant value.
func (s *Set) HasSingleColumnConstValues(
evalCtx *tree.EvalContext,
) (col opt.ColumnID, constValue tree.Datum, ok bool) {
) (col opt.ColumnID, constValues tree.Datums, ok bool) {
if s.Length() != 1 {
return 0, nil, false
}
c := s.Constraint(0)
if c.Columns.Count() != 1 || c.ExactPrefix(evalCtx) != 1 {
if c.Columns.Count() != 1 || c.Prefix(evalCtx) != 1 {
return 0, nil, false
}
return c.Columns.Get(0).ID(), c.Spans.Get(0).StartKey().Value(0), true
numSpans := c.Spans.Count()
constValues = make(tree.Datums, numSpans)
for i := range constValues {
val := c.Spans.Get(i).StartKey().Value(0)
constValues[i] = val
}
return c.Columns.Get(0).ID(), constValues, true
}

// allocConstraint allocates space for a new constraint in the set and returns
Expand Down
35 changes: 19 additions & 16 deletions pkg/sql/opt/constraint/constraint_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
package constraint

import (
"reflect"
"testing"

"github.com/cockroachdb/cockroach/pkg/sql/opt"
Expand Down Expand Up @@ -347,32 +348,34 @@ func TestExtractConstCols(t *testing.T) {
}
}

func TestIsSingleColumnConstValue(t *testing.T) {
func TestHasSingleColumnConstValues(t *testing.T) {
type testCase struct {
constraints []string
col opt.ColumnID
val int
vals []int
}
cases := []testCase{
{[]string{`/1: [/10 - /10]`}, 1, 10},
{[]string{`/-1: [/10 - /10]`}, 1, 10},
{[]string{`/1: [/10 - /11]`}, 0, 0},
{[]string{`/1: [/10 - /10] [/11 - /11]`}, 0, 0},
{[]string{`/1/2: [/10/2 - /10/4]`}, 0, 0},
{[]string{`/1/2: [/10/2 - /10/2]`}, 0, 0},
{[]string{`/1: [/10 - /10]`}, 1, []int{10}},
{[]string{`/-1: [/10 - /10]`}, 1, []int{10}},
{[]string{`/1: [/10 - /11]`}, 0, nil},
{[]string{`/1: [/10 - /10] [/11 - /11]`}, 1, []int{10, 11}},
{[]string{`/1: [/10 - /10] [/11 - /11] [/12 - /12]`}, 1, []int{10, 11, 12}},
{[]string{`/1: [/10 - /10] [/11 - /11] [/12 - /13]`}, 0, nil},
{[]string{`/1/2: [/10/2 - /10/4]`}, 0, nil},
{[]string{`/1/2: [/10/2 - /10/2]`}, 0, nil},
{
[]string{
`/1: [/10 - /10]`,
`/2: [/8 - /8]`,
},
0, 0,
0, nil,
},
{
[]string{
`/1: [/10 - /10]`,
`/1/2: [/10/8 - /10/8]`,
},
0, 0,
0, nil,
},
}
evalCtx := tree.NewTestingEvalContext(nil)
Expand All @@ -382,13 +385,13 @@ func TestIsSingleColumnConstValue(t *testing.T) {
constraint := ParseConstraint(evalCtx, constraint)
cs = cs.Intersect(evalCtx, SingleConstraint(&constraint))
}
col, val, ok := cs.IsSingleColumnConstValue(evalCtx)
intVal := 0
if ok {
intVal = int(*val.(*tree.DInt))
col, vals, _ := cs.HasSingleColumnConstValues(evalCtx)
var intVals []int
for _, val := range vals {
intVals = append(intVals, int(*val.(*tree.DInt)))
}
if tc.col != col || tc.val != intVal {
t.Errorf("%s: expected %d,%d got %d,%d", cs, tc.col, tc.val, col, intVal)
if tc.col != col || !reflect.DeepEqual(tc.vals, intVals) {
t.Errorf("%s: expected %d,%d got %d,%d", cs, tc.col, tc.vals, col, intVals)
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/opt/exec/execbuilder/testdata/lookup_join
Original file line number Diff line number Diff line change
Expand Up @@ -1736,13 +1736,13 @@ vectorized: true
│ estimated row count: 10 (missing stats)
└── • lookup join (semi)
│ columns: ("project_const_col_@15", pk, col0, col3)
│ columns: ("lookup_join_const_col_@15", pk, col0, col3)
│ table: tab4@tab4_col3_col4_key
│ equality: (col0, project_const_col_@15) = (col3,col4)
│ equality: (col0, lookup_join_const_col_@15) = (col3,col4)
│ equality cols are key
└── • render
│ columns: ("project_const_col_@15", pk, col0, col3)
│ columns: ("lookup_join_const_col_@15", pk, col0, col3)
│ estimated row count: 10 (missing stats)
│ render 0: 495.6
│ render 1: pk
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/memo/testdata/stats/lookup-join
Original file line number Diff line number Diff line change
Expand Up @@ -406,14 +406,14 @@ inner-join (lookup wxyz)
│ ├── stats: [rows=19.8, distinct(1)=19.8, null(1)=0, distinct(6)=19.8, null(6)=0, distinct(7)=1, null(7)=0, distinct(11)=1, null(11)=0]
│ ├── fd: ()-->(7), (9)-->(6,8), (1)==(6), (6)==(1)
│ ├── project
│ │ ├── columns: "project_const_col_@7":11(int!null) m:1(int) n:2(int)
│ │ ├── columns: "lookup_join_const_col_@7":11(int!null) m:1(int) n:2(int)
│ │ ├── stats: [rows=40, distinct(1)=40, null(1)=0, distinct(11)=1, null(11)=0]
│ │ ├── fd: ()-->(11)
│ │ ├── scan medium
│ │ │ ├── columns: m:1(int) n:2(int)
│ │ │ └── stats: [rows=40, distinct(1)=40, null(1)=0]
│ │ └── projections
│ │ └── 10 [as="project_const_col_@7":11, type=int]
│ │ └── 10 [as="lookup_join_const_col_@7":11, type=int]
│ └── filters (true)
└── filters (true)

Expand Down
74 changes: 74 additions & 0 deletions pkg/sql/opt/xform/general_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,77 @@ func (c *CustomFuncs) initIdxConstraintForIndex(
)
return ic
}

// computedColFilters generates all filters that can be derived from the list of
// computed column expressions from the given table. A computed column can be
// used as a filter when it has a constant value. That is true when:
//
// 1. All other columns it references are constant, because other filters in
// the query constrain them to be so.
// 2. All functions in the computed column expression can be folded into
// constants (i.e. they do not have problematic side effects).
//
// Note that computed columns can depend on other computed columns; in general
// the dependencies form an acyclic directed graph. computedColFilters will
// return filters for all constant computed columns, regardless of the order of
// their dependencies.
//
// As with checkConstraintFilters, computedColFilters do not really filter any
// rows, they are rather facts or guarantees about the data. Treating them as
// filters may allow some indexes to be constrained and used. Consider the
// following example:
//
// CREATE TABLE t (
// k INT NOT NULL,
// hash INT AS (k % 4) STORED,
// PRIMARY KEY (hash, k)
// )
//
// SELECT * FROM t WHERE k = 5
//
// Notice that the filter provided explicitly wouldn't allow the optimizer to
// seek using the primary index (it would have to fall back to a table scan).
// However, column "hash" can be proven to have the constant value of 1, since
// it's dependent on column "k", which has the constant value of 5. This enables
// usage of the primary index:
//
// scan t
// ├── columns: k:1(int!null) hash:2(int!null)
// ├── constraint: /2/1: [/1/5 - /1/5]
// ├── key: (2)
// └── fd: ()-->(1)
//
// The values of both columns in that index are known, enabling a single value
// constraint to be generated.
func (c *CustomFuncs) computedColFilters(
tabID opt.TableID, requiredFilters, optionalFilters memo.FiltersExpr,
) memo.FiltersExpr {
tabMeta := c.e.mem.Metadata().TableMeta(tabID)
if len(tabMeta.ComputedCols) == 0 {
return nil
}

// Start with set of constant columns, as derived from the list of filter
// conditions.
constCols := make(map[opt.ColumnID]opt.ScalarExpr)
c.findConstantFilterCols(constCols, tabID, requiredFilters)
c.findConstantFilterCols(constCols, tabID, optionalFilters)
if len(constCols) == 0 {
// No constant values could be derived from filters, so assume that there
// are also no constant computed columns.
return nil
}

// Construct a new filter condition for each computed column that is
// constant (i.e. all of its variables are in the constCols set).
var computedColFilters memo.FiltersExpr
for colID := range tabMeta.ComputedCols {
if c.tryFoldComputedCol(tabMeta, colID, constCols) {
constVal := constCols[colID]
// Note: Eq is not correct here because of NULLs.
eqOp := c.e.f.ConstructIs(c.e.f.ConstructVariable(colID), constVal)
computedColFilters = append(computedColFilters, c.e.f.ConstructFiltersItem(eqOp))
}
}
return computedColFilters
}
Loading

0 comments on commit c41a2ed

Please sign in to comment.