Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
55194: opt: improve various composite-ness checks r=RaduBerinde a=RaduBerinde

#### opt: improve composite check for PushFilterIntoSetOp

Use `CanBeCompositeSensitive` instead of disallowing any filters
involving composite columns.

Release note: None

#### opt: improve composite check for Project FDs

Release note: None

#### opt: improve composite check for join filter remapping

Use CanBeCompositeSensitive to allow remapping insensitive join
filters.

Release note: None


Co-authored-by: Radu Berinde <[email protected]>
  • Loading branch information
craig[bot] and RaduBerinde committed Oct 6, 2020
2 parents 13b508f + d215271 commit 5784427
Show file tree
Hide file tree
Showing 14 changed files with 139 additions and 43 deletions.
16 changes: 1 addition & 15 deletions pkg/sql/opt/memo/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"sort"
"strings"

"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
"github.com/cockroachdb/cockroach/pkg/sql/opt/props"
Expand Down Expand Up @@ -755,20 +754,7 @@ func (prj *ProjectExpr) initUnexportedFields(mem *Memo) {
// This does not necessarily hold for "composite" types like decimals or
// collated strings. For example if d is a decimal, d::TEXT can have
// different values for equal values of d, like 1 and 1.0.
//
// We only add the FD if composite types are not involved.
//
// TODO(radu): add an allowlist of expressions/operators that are ok, like
// arithmetic.
composite := false
for i, ok := from.Next(0); ok; i, ok = from.Next(i + 1) {
typ := mem.Metadata().ColumnMeta(i).Type
if colinfo.HasCompositeKeyEncoding(typ) {
composite = true
break
}
}
if !composite {
if !CanBeCompositeSensitive(mem.Metadata(), item.Element) {
prj.internalFuncDeps.AddSynthesizedCol(from, item.Col)
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/memo/testdata/logprops/groupby
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ project
│ ├── project
│ │ ├── columns: column7:7(bool!null) column9:9(string!null) x:1(int!null) y:2(int) z:3(float!null) s:4(string!null)
│ │ ├── key: (1)
│ │ ├── fd: ()-->(9), (1)-->(2-4,7), (3,4)-->(1,2)
│ │ ├── fd: ()-->(9), (1)-->(2-4,7), (3,4)-->(1,2,7), (3)-->(7)
│ │ ├── prune: (1-4,7,9)
│ │ ├── interesting orderings: (+1) (-4,+3,+1)
│ │ ├── select
Expand Down
20 changes: 20 additions & 0 deletions pkg/sql/opt/memo/testdata/logprops/project
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,26 @@ project
└── cast: STRING [as=d:6, type=string, outer=(4), immutable]
└── variable: xysd.d:4 [type=decimal]

# We have the FD d --> d+1.
build
SELECT d, d+1 FROM xysd
----
project
├── columns: d:4(decimal!null) "?column?":6(decimal!null)
├── immutable
├── fd: (4)-->(6)
├── prune: (4,6)
├── scan xysd
│ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) crdb_internal_mvcc_timestamp:5(decimal)
│ ├── key: (1)
│ ├── fd: (1)-->(2-5), (3,4)~~>(1,2,5)
│ ├── prune: (1-5)
│ └── interesting orderings: (+1) (-3,+4,+1)
└── projections
└── plus [as="?column?":6, type=decimal, outer=(4), immutable]
├── variable: d:4 [type=decimal]
└── const: 1 [type=decimal]

# We have the equality relation between the synthesized column and the column
# it refers to.
norm
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/memo/testdata/stats_quality/tpch/q01
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ sort
│ ├── columns: column20:20(float!null) column22:22(float!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_returnflag:9(char!null) l_linestatus:10(char!null)
│ ├── immutable
│ ├── stats: [rows=5909394.86, distinct(5)=50, null(5)=0, distinct(6)=925955, null(6)=0, distinct(7)=11, null(7)=0, distinct(9)=3, null(9)=0, distinct(10)=2, null(10)=0, distinct(20)=5909394.86, null(20)=0, distinct(22)=5909394.86, null(22)=0, distinct(9,10)=6, null(9,10)=0]
│ ├── fd: (6,7)-->(20)
│ ├── select
│ │ ├── save-table-name: q1_select_4
│ │ ├── columns: l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_tax:8(float!null) l_returnflag:9(char!null) l_linestatus:10(char!null) l_shipdate:11(date!null)
Expand Down
30 changes: 18 additions & 12 deletions pkg/sql/opt/norm/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (c *CustomFuncs) canMapJoinOpEquivalenceGroup(
leftCols, rightCols opt.ColSet,
equivFD props.FuncDepSet,
) bool {
eqCols := c.GetEquivColsWithEquivType(col, equivFD)
eqCols := c.GetEquivColsWithEquivType(col, equivFD, false /* allowCompositeEncoding */)

// To map equality conditions, the equivalent columns must intersect
// both sides and must be fully bound by both sides.
Expand Down Expand Up @@ -201,7 +201,7 @@ func (c *CustomFuncs) mapJoinOpEquivalenceGroup(
leftCols, rightCols opt.ColSet,
equivFD props.FuncDepSet,
) memo.FiltersExpr {
eqCols := c.GetEquivColsWithEquivType(col, equivFD)
eqCols := c.GetEquivColsWithEquivType(col, equivFD, false /* allowCompositeEncoding */)

// First remove all the equality conditions for this equivalence group.
newFilters := make(memo.FiltersExpr, 0, len(filters))
Expand Down Expand Up @@ -289,10 +289,12 @@ func (c *CustomFuncs) CanMapJoinOpFilter(
return false
}

allowCompositeEncoding := !memo.CanBeCompositeSensitive(c.mem.Metadata(), src)

// For CanMapJoinOpFilter to be true, each column in src must map to at
// least one column in dst.
for i, ok := scalarProps.OuterCols.Next(0); ok; i, ok = scalarProps.OuterCols.Next(i + 1) {
eqCols := c.GetEquivColsWithEquivType(i, equivFD)
eqCols := c.GetEquivColsWithEquivType(i, equivFD, allowCompositeEncoding)
if !eqCols.Intersects(dstCols) {
return false
}
Expand Down Expand Up @@ -331,12 +333,14 @@ func (c *CustomFuncs) MapJoinOpFilter(
return src.Condition
}

allowCompositeEncoding := !memo.CanBeCompositeSensitive(c.mem.Metadata(), src)

// Map each column in src to one column in dst. We choose an arbitrary column
// (the one with the smallest ColumnID) if there are multiple choices.
var colMap util.FastIntMap
outerCols := src.ScalarProps().OuterCols
for srcCol, ok := outerCols.Next(0); ok; srcCol, ok = outerCols.Next(srcCol + 1) {
eqCols := c.GetEquivColsWithEquivType(srcCol, equivFD)
eqCols := c.GetEquivColsWithEquivType(srcCol, equivFD, allowCompositeEncoding)
eqCols.IntersectionWith(dstCols)
if eqCols.Contains(srcCol) {
colMap.Set(int(srcCol), int(srcCol))
Expand Down Expand Up @@ -382,10 +386,9 @@ func (c *CustomFuncs) MapJoinOpFilter(
// equivalent columns, because operations that are valid with one type may be
// invalid with a different type.
//
// In addition, if col has a composite key encoding, we cannot guarantee that
// it will be exactly equal to other "equivalent" columns, so in that case we
// return a set containing only col. This is a conservative measure to ensure
// that we don't infer filters incorrectly. For example, consider this query:
// If the column has a composite key encoding, there are extra considerations.
// In general, replacing composite columns with "equivalent" (equal) columns
// might change the result of an expression. For example, consider this query:
//
// SELECT * FROM
// (VALUES (1.0)) AS t1(x),
Expand All @@ -401,17 +404,20 @@ func (c *CustomFuncs) MapJoinOpFilter(
// But if we use the equality predicate x=y to map x to y and infer an
// additional filter y::text = '1.0', the query would return nothing.
//
// TODO(rytaft): In the future, we may want to allow the mapping if the
// filter involves a comparison operator, such as x < 5.
// The calling code needs to decide if the remapping of this column is allowed
// or not (depending on the actual expression). If it is allowed,
// allowCompositeEncoding should be true. If allowComposite is false and the
// column has composite encoding, the function returns a set containing only
// col.
func (c *CustomFuncs) GetEquivColsWithEquivType(
col opt.ColumnID, equivFD props.FuncDepSet,
col opt.ColumnID, equivFD props.FuncDepSet, allowCompositeEncoding bool,
) opt.ColSet {
var res opt.ColSet
colType := c.f.Metadata().ColumnMeta(col).Type

// Don't bother looking for equivalent columns if colType has a composite
// key encoding.
if colinfo.HasCompositeKeyEncoding(colType) {
if !allowCompositeEncoding && colinfo.HasCompositeKeyEncoding(colType) {
res.Add(col)
return res
}
Expand Down
32 changes: 23 additions & 9 deletions pkg/sql/opt/norm/select_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
package norm

import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/cockroach/pkg/util"
Expand All @@ -20,15 +19,30 @@ import (

// CanMapOnSetOp determines whether the filter can be mapped to either
// side of a set operator.
func (c *CustomFuncs) CanMapOnSetOp(src *memo.FiltersItem) bool {
filterProps := src.ScalarProps()
for i, ok := filterProps.OuterCols.Next(0); ok; i, ok = filterProps.OuterCols.Next(i + 1) {
colType := c.f.Metadata().ColumnMeta(i).Type
if colinfo.HasCompositeKeyEncoding(colType) {
return false
}
func (c *CustomFuncs) CanMapOnSetOp(filter *memo.FiltersItem) bool {
if memo.CanBeCompositeSensitive(c.mem.Metadata(), filter) {
// In general, it is not safe to remap a composite-sensitive filter.
// For example:
// - the set operation is Except
// - the left side has the decimal 1.0
// - the right side has the decimal 1.00
// - the filter is d::string != '1.00'
//
// If we push the filter to the right side, we will incorrectly remove 1.00,
// causing the overall Except operation to return a result.
//
// TODO(radu): we can do better on a case-by-case basis. For example, it is
// OK to push the filter for Union, and it is OK to push it to the left side
// of an Except.
return false
}
return !filterProps.HasCorrelatedSubquery

if filter.ScalarProps().HasCorrelatedSubquery {
// If the filter has a correlated subquery, we want to try to hoist it up as
// much as possible to decorrelate it.
return false
}
return true
}

// MapSetOpFilterLeft maps the filter onto the left expression by replacing
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/norm/testdata/rules/groupby
Original file line number Diff line number Diff line change
Expand Up @@ -1629,7 +1629,7 @@ project
├── project
│ ├── columns: column7:7 column9:9!null k:1!null i:2!null f:3
│ ├── key: (1)
│ ├── fd: (1)-->(2,3,7), (2,3)~~>(1), (2)-->(9)
│ ├── fd: (1)-->(2,3), (2,3)~~>(1), (3)-->(7), (2)-->(9)
│ ├── scan a
│ │ ├── columns: k:1!null i:2!null f:3
│ │ ├── key: (1)
Expand Down
37 changes: 36 additions & 1 deletion pkg/sql/opt/norm/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ project
# Ensure that we do not map filters for types with composite key encoding.
norm expect-not=(PushFilterIntoJoinLeftAndRight,MapFilterIntoJoinLeft,MapFilterIntoJoinRight)
SELECT *
FROM (VALUES (1.0), (2.0)) AS t1(x), (VALUES (1.00), (2.00)) AS t2(y)WHERE x=y AND x::text = '1.0'
FROM (VALUES (1.0), (2.0)) AS t1(x), (VALUES (1.00), (2.00)) AS t2(y) WHERE x=y AND x::text = '1.0'
----
inner-join (hash)
├── columns: x:1!null y:2!null
Expand All @@ -787,6 +787,41 @@ inner-join (hash)
└── filters
└── column1:1 = column1:2 [outer=(1,2), immutable, constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)]

# Remap composite variables if filters are not composite sensitive.
norm expect=PushFilterIntoJoinLeftAndRight
SELECT *
FROM (VALUES (1.0), (2.0)) AS t1(x), (VALUES (1.00), (2.00)) AS t2(y) WHERE x=y AND x >= 1
----
inner-join (hash)
├── columns: x:1!null y:2!null
├── cardinality: [0 - 4]
├── immutable
├── fd: (1)==(2), (2)==(1)
├── select
│ ├── columns: column1:1!null
│ ├── cardinality: [0 - 2]
│ ├── immutable
│ ├── values
│ │ ├── columns: column1:1!null
│ │ ├── cardinality: [2 - 2]
│ │ ├── (1.0,)
│ │ └── (2.0,)
│ └── filters
│ └── column1:1 >= 1 [outer=(1), immutable, constraints=(/1: [/1 - ]; tight)]
├── select
│ ├── columns: column1:2!null
│ ├── cardinality: [0 - 2]
│ ├── immutable
│ ├── values
│ │ ├── columns: column1:2!null
│ │ ├── cardinality: [2 - 2]
│ │ ├── (1.00,)
│ │ └── (2.00,)
│ └── filters
│ └── column1:2 >= 1 [outer=(2), immutable, constraints=(/2: [/1 - ]; tight)]
└── filters
└── column1:1 = column1:2 [outer=(1,2), immutable, constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)]

# Optimization does not apply if equality is only on one side.
norm expect-not=(PushFilterIntoJoinLeftAndRight,MapFilterIntoJoinLeft,MapFilterIntoJoinRight)
SELECT * FROM a INNER JOIN b ON b.y=b.x AND a.k=a.i AND a.k + b.y > 5 AND b.x * a.i = 3
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/limit
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ project
├── columns: f:3 r:7
├── cardinality: [0 - 5]
├── immutable
├── fd: (3)-->(7)
├── ordering: +3
├── limit
│ ├── columns: i:2 f:3
Expand Down Expand Up @@ -375,6 +376,7 @@ SELECT f, f+1.1 AS r FROM (SELECT f, i FROM a GROUP BY f, i) a ORDER BY f OFFSET
project
├── columns: f:3 r:7
├── immutable
├── fd: (3)-->(7)
├── ordering: +3
├── offset
│ ├── columns: i:2 f:3
Expand Down Expand Up @@ -434,6 +436,7 @@ project
├── columns: f:3 r:7
├── cardinality: [0 - 10]
├── immutable
├── fd: (3)-->(7)
├── ordering: +3
├── offset
│ ├── columns: i:2 f:3
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/prune_cols
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ SELECT f, f+1.1 AS r FROM (SELECT f, k FROM a GROUP BY f, k HAVING sum(k)=100) a
project
├── columns: f:3 r:7
├── immutable
├── fd: (3)-->(7)
├── select
│ ├── columns: k:1!null f:3 sum:6!null
│ ├── immutable
Expand Down Expand Up @@ -436,6 +437,7 @@ project
├── columns: f:3 r:6
├── cardinality: [0 - 5]
├── immutable
├── fd: (3)-->(6)
├── limit
│ ├── columns: f:3 s:4
│ ├── cardinality: [0 - 5]
Expand Down Expand Up @@ -678,6 +680,7 @@ project
├── columns: f:3 r:6
├── cardinality: [0 - 5]
├── immutable
├── fd: (3)-->(6)
├── offset
│ ├── columns: f:3 s:4
│ ├── cardinality: [0 - 5]
Expand Down
29 changes: 27 additions & 2 deletions pkg/sql/opt/norm/testdata/rules/select
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ SELECT f, f+1.1 AS r FROM (SELECT f, i FROM a GROUP BY f, i HAVING sum(f)=10.0)
project
├── columns: f:3 r:8
├── immutable
├── fd: (3)-->(8)
├── select
│ ├── columns: i:2 f:3 sum:7!null
│ ├── key: (2,3)
Expand Down Expand Up @@ -1884,8 +1885,9 @@ except
└── filters
└── (1 / 0) > 0 [immutable]

norm
SELECT * FROM ((values (1.0::decimal)) EXCEPT (values (1.00::decimal))) WHERE column1::string != '1.00';
# The filter is composite-sensitive.
norm expect-not=PushFilterIntoSetOp
SELECT * FROM ((VALUES (1.0::DECIMAL)) EXCEPT (VALUES (1.00::DECIMAL))) WHERE column1::STRING != '1.00';
----
select
├── columns: column1:1!null
Expand All @@ -1912,3 +1914,26 @@ select
│ └── (1.00,)
└── filters
└── column1:1::STRING != '1.00' [outer=(1), immutable]

# The filter is not composite-sensitive (even if it involves decimals) and can be pushed down.
norm expect=PushFilterIntoSetOp
SELECT * FROM ((VALUES (1.0::DECIMAL)) EXCEPT (VALUES (1.00::DECIMAL))) WHERE column1 > 0;
----
except
├── columns: column1:1!null
├── left columns: column1:1!null
├── right columns: column1:2
├── cardinality: [0 - 1]
├── key: (1)
├── values
│ ├── columns: column1:1!null
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ ├── fd: ()-->(1)
│ └── (1.0,)
└── values
├── columns: column1:2!null
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(2)
└── (1.00,)
1 change: 1 addition & 0 deletions pkg/sql/opt/xform/testdata/external/tpch
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ sort
├── project
│ ├── columns: column20:20!null column22:22!null l_quantity:5!null l_extendedprice:6!null l_discount:7!null l_returnflag:9!null l_linestatus:10!null
│ ├── immutable
│ ├── fd: (6,7)-->(20)
│ ├── select
│ │ ├── columns: l_quantity:5!null l_extendedprice:6!null l_discount:7!null l_tax:8!null l_returnflag:9!null l_linestatus:10!null l_shipdate:11!null
│ │ ├── scan lineitem
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/opt/xform/testdata/external/tpch-no-stats
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,12 @@ group-by
├── sort
│ ├── columns: l_quantity:5!null l_extendedprice:6!null l_discount:7!null l_returnflag:9!null l_linestatus:10!null column20:20!null column22:22!null
│ ├── immutable
│ ├── fd: (6,7)-->(20)
│ ├── ordering: +9,+10
│ └── project
│ ├── columns: column20:20!null column22:22!null l_quantity:5!null l_extendedprice:6!null l_discount:7!null l_returnflag:9!null l_linestatus:10!null
│ ├── immutable
│ ├── fd: (6,7)-->(20)
│ ├── select
│ │ ├── columns: l_quantity:5!null l_extendedprice:6!null l_discount:7!null l_tax:8!null l_returnflag:9!null l_linestatus:10!null l_shipdate:11!null
│ │ ├── scan lineitem
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/xform/testdata/physprops/ordering
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,13 @@ sort (segmented)
├── columns: x:1!null computed:6!null y:2!null
├── immutable
├── key: (1,2)
├── fd: (1,2)-->(6)
├── fd: (2)-->(6)
├── ordering: +1,+6
└── project
├── columns: computed:6!null x:1!null y:2!null
├── immutable
├── key: (1,2)
├── fd: (1,2)-->(6)
├── fd: (2)-->(6)
├── ordering: +1
├── scan a
│ ├── columns: x:1!null y:2!null
Expand Down

0 comments on commit 5784427

Please sign in to comment.