Skip to content

Commit

Permalink
opt: simplify IS filters for selects and joins
Browse files Browse the repository at this point in the history
Previously, a filter in the condition of a `Select` or `Join` could
not simplify an expression of the following form:
`<expr> IS (True | False)`
This could prevent other rules from firing - contradiction detection,
for example. This patch allows the SimplifyFilters logic to effect the
following transformation on `IS` expressions in the filters of a `Select`
or `Join`:
```
<expr> IS True => <expr>
<expr> IS False => NOT <expr>
```
Note that the original `IS` expression would return `False` on a null
input, whereas the replacement expression could return `Null`. In the
case when the `IS` expression is a `Select` or `Join` condition, this
does not matter, because `False` and `Null` conditions are treated the
same way (no rows returned).

However, the same logic does not apply to `IS NOT`, because then `True`
values would become `Null`, since `Null IS NOT True` returns `True`.
This would result in less rows being returned.

Currently, only IS expressions that are top-level conjuncts of the
filters are simplified for the sake of simplicity.

Fixes #58283

Release note: None
  • Loading branch information
DrewKimball committed Dec 30, 2020
1 parent f23df8a commit 1faee2d
Show file tree
Hide file tree
Showing 6 changed files with 324 additions and 91 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/opt/memo/testdata/stats/join
Original file line number Diff line number Diff line change
Expand Up @@ -1481,7 +1481,7 @@ anti-join (lookup t.public.def)
└── false [type=bool, constraints=(contradiction; tight)]

# Regression test for #40460.
opt
opt disable=SimplifyJoinFilters
SELECT
*
FROM
Expand Down
3 changes: 2 additions & 1 deletion pkg/sql/opt/norm/rules/join.opt
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
$on:[
...
$item:(FiltersItem
(And | True | False | Null | Or)
(And | True | False | Null | Or | Is)
) &
^(IsUnsimplifiableOr $item) &
^(IsUnsimplifiableIs $item) &
^(IsContradiction $item)
...
] &
Expand Down
6 changes: 4 additions & 2 deletions pkg/sql/opt/norm/rules/select.opt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# - Replaces the Filters operator with False if any operand is False or Null
# - Flattens nested And operands by merging their conditions into parent
# - Simplifies Or operands where one side is a Null to the other side
# - Simplifies Is operands where the right side is True or False
#
# Note that the Null handling behavior is different than the SimplifyAnd rules,
# because Filters only appears as a Select or Join filter condition, both of
Expand All @@ -18,9 +19,10 @@
$filters:[
...
$item:(FiltersItem
(And | True | False | Null | Or)
(And | True | False | Null | Or | Is)
) &
^(IsUnsimplifiableOr $item)
^(IsUnsimplifiableOr $item) &
^(IsUnsimplifiableIs $item)
...
] &
^(IsFilterFalse $filters)
Expand Down
30 changes: 30 additions & 0 deletions pkg/sql/opt/norm/select_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,18 @@ func (c *CustomFuncs) IsUnsimplifiableOr(item *memo.FiltersItem) bool {
return or.Left.Op() != opt.NullOp && or.Right.Op() != opt.NullOp
}

// IsUnsimplifiableIs returns true if this is an IS where the right side is not
// True or False. SimplifyFilters simplifies an IS expression with True or False
// as the right input to its left input. This function serves a similar purpose
// to IsUnsimplifiableOr.
func (c *CustomFuncs) IsUnsimplifiableIs(item *memo.FiltersItem) bool {
is, ok := item.Condition.(*memo.IsExpr)
if !ok {
return false
}
return is.Right.Op() != opt.TrueOp && is.Right.Op() != opt.FalseOp
}

// addConjuncts recursively walks a scalar expression as long as it continues to
// find nested And operators. It adds any conjuncts (ignoring True operators) to
// the given FiltersExpr and returns true. If it finds a False or Null operator,
Expand Down Expand Up @@ -352,6 +364,24 @@ func (c *CustomFuncs) addConjuncts(
filters = append(filters, c.f.ConstructFiltersItem(t))
}

case *memo.IsExpr:
// Attempt to replace <expr> IS (True | False) with the left input. Note
// that this replacement may cause Null to be returned where the original
// expression returned False, because IS (True | False) returns False on a
// Null input. However, in this case the replacement is valid because Select
// and Join operators treat False and Null filter conditions the same way
// (no rows returned).
if t.Right.Op() == opt.TrueOp {
// <expr> IS True => <expr>
filters = append(filters, c.f.ConstructFiltersItem(t.Left))
} else if t.Right.Op() == opt.FalseOp {
// <expr> IS False => NOT <expr>
filters = append(filters, c.f.ConstructFiltersItem(c.f.ConstructNot(t.Left)))
} else {
// No replacement possible.
filters = append(filters, c.f.ConstructFiltersItem(t))
}

default:
filters = append(filters, c.f.ConstructFiltersItem(t))
}
Expand Down
232 changes: 211 additions & 21 deletions pkg/sql/opt/norm/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ exec-ddl
CREATE TABLE uv (u INT PRIMARY KEY, v INT)
----

exec-ddl
CREATE TABLE booleans(a BOOL, b BOOL, c BOOL, d BOOL, e BOOL)
----

norm
SELECT * FROM a INNER JOIN b ON a.s='foo' OR b.y<10
----
Expand Down Expand Up @@ -67,20 +71,197 @@ left-join (hash)
└── k:1 = x:7 [outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)]

# --------------------------------------------------
# DetectJoinContradiction
# SimplifyJoinFilters
# --------------------------------------------------

norm expect=DetectJoinContradiction
SELECT * FROM a INNER JOIN b ON (k<1 AND k>2) OR (k<4 AND k>5)
norm expect=SimplifyJoinFilters
SELECT * FROM a INNER JOIN xy ON x=1 OR NULL
----
values
├── columns: k:1!null i:2!null f:3!null s:4!null j:5!null x:7!null y:8!null
├── cardinality: [0 - 0]
├── key: ()
└── fd: ()-->(1-5,7,8)
inner-join (cross)
├── columns: k:1!null i:2 f:3!null s:4 j:5 x:7!null y:8
├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more)
├── key: (1)
├── fd: ()-->(7,8), (1)-->(2-5)
├── scan a
│ ├── columns: k:1!null i:2 f:3!null s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
├── select
│ ├── columns: x:7!null y:8
│ ├── cardinality: [0 - 1]
│ ├── key: ()
│ ├── fd: ()-->(7,8)
│ ├── scan xy
│ │ ├── columns: x:7!null y:8
│ │ ├── key: (7)
│ │ └── fd: (7)-->(8)
│ └── filters
│ └── x:7 = 1 [outer=(7), constraints=(/7: [/1 - /1]; tight), fd=()-->(7)]
└── filters (true)

norm expect=DetectJoinContradiction
SELECT * FROM a LEFT JOIN b ON (k<1 AND k>2) OR (k<4 AND k>5)
norm expect=SimplifyJoinFilters
SELECT * FROM a INNER JOIN xy ON (k=x AND i=y) AND true AND (f=3.5 AND s='foo')
----
inner-join (hash)
├── columns: k:1!null i:2!null f:3!null s:4!null j:5 x:7!null y:8!null
├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one)
├── key: (7)
├── fd: ()-->(3,4), (1)-->(2,5), (7)-->(8), (1)==(7), (7)==(1), (2)==(8), (8)==(2)
├── select
│ ├── columns: k:1!null i:2 f:3!null s:4!null j:5
│ ├── key: (1)
│ ├── fd: ()-->(3,4), (1)-->(2,5)
│ ├── scan a
│ │ ├── columns: k:1!null i:2 f:3!null s:4 j:5
│ │ ├── key: (1)
│ │ └── fd: (1)-->(2-5)
│ └── filters
│ ├── f:3 = 3.5 [outer=(3), constraints=(/3: [/3.5 - /3.5]; tight), fd=()-->(3)]
│ └── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)]
├── scan xy
│ ├── columns: x:7!null y:8
│ ├── key: (7)
│ └── fd: (7)-->(8)
└── filters
├── k:1 = x:7 [outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)]
└── i:2 = y:8 [outer=(2,8), constraints=(/2: (/NULL - ]; /8: (/NULL - ]), fd=(2)==(8), (8)==(2)]

norm expect-not=SimplifyJoinFilters
SELECT * FROM a INNER JOIN xy ON x=1 OR k=1
----
inner-join (cross)
├── columns: k:1!null i:2 f:3!null s:4 j:5 x:7!null y:8
├── key: (1,7)
├── fd: (1)-->(2-5), (7)-->(8)
├── scan a
│ ├── columns: k:1!null i:2 f:3!null s:4 j:5
│ ├── key: (1)
│ └── fd: (1)-->(2-5)
├── scan xy
│ ├── columns: x:7!null y:8
│ ├── key: (7)
│ └── fd: (7)-->(8)
└── filters
└── (x:7 = 1) OR (k:1 = 1) [outer=(1,7)]

# Inner join case. Simplify IS True.
norm expect=SimplifyJoinFilters
SELECT * FROM xy INNER JOIN booleans ON (a AND x > 0) IS True
----
inner-join (cross)
├── columns: x:1!null y:2 a:4!null b:5 c:6 d:7 e:8
├── fd: ()-->(4), (1)-->(2)
├── select
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ ├── fd: (1)-->(2)
│ ├── scan xy
│ │ ├── columns: x:1!null y:2
│ │ ├── key: (1)
│ │ └── fd: (1)-->(2)
│ └── filters
│ └── x:1 > 0 [outer=(1), constraints=(/1: [/1 - ]; tight)]
├── select
│ ├── columns: a:4!null b:5 c:6 d:7 e:8
│ ├── fd: ()-->(4)
│ ├── scan booleans
│ │ └── columns: a:4 b:5 c:6 d:7 e:8
│ └── filters
│ └── a:4 [outer=(4), constraints=(/4: [/true - /true]; tight), fd=()-->(4)]
└── filters (true)

# Inner join case. Simplify is False.
norm expect=SimplifyJoinFilters
SELECT * FROM xy INNER JOIN booleans ON (a AND x > 0) IS False
----
inner-join (cross)
├── columns: x:1!null y:2 a:4 b:5 c:6 d:7 e:8
├── fd: (1)-->(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── scan booleans
│ └── columns: a:4 b:5 c:6 d:7 e:8
└── filters
└── (NOT a:4) OR (x:1 <= 0) [outer=(1,4)]

# Left join case. Simplify IS True.
norm expect=SimplifyJoinFilters
SELECT * FROM xy LEFT JOIN booleans ON (a AND x > 0) IS True
----
left-join (cross)
├── columns: x:1!null y:2 a:4 b:5 c:6 d:7 e:8
├── fd: (1)-->(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── select
│ ├── columns: a:4!null b:5 c:6 d:7 e:8
│ ├── fd: ()-->(4)
│ ├── scan booleans
│ │ └── columns: a:4 b:5 c:6 d:7 e:8
│ └── filters
│ └── a:4 [outer=(4), constraints=(/4: [/true - /true]; tight), fd=()-->(4)]
└── filters
└── x:1 > 0 [outer=(1), constraints=(/1: [/1 - ]; tight)]

# Full join case. Simplify IS True.
norm expect=SimplifyJoinFilters
SELECT * FROM xy FULL JOIN booleans ON (a AND x > 0) IS True
----
full-join (cross)
├── columns: x:1 y:2 a:4 b:5 c:6 d:7 e:8
├── fd: (1)-->(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── scan booleans
│ └── columns: a:4 b:5 c:6 d:7 e:8
└── filters
├── a:4 [outer=(4), constraints=(/4: [/true - /true]; tight), fd=()-->(4)]
└── x:1 > 0 [outer=(1), constraints=(/1: [/1 - ]; tight)]

# Do not simplify IS NOT (when inputs are nullable).
norm expect-not=SimplifyJoinFilters
SELECT * FROM xy INNER JOIN booleans ON (a AND x > 0) IS NOT True
----
inner-join (cross)
├── columns: x:1!null y:2 a:4 b:5 c:6 d:7 e:8
├── fd: (1)-->(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── scan booleans
│ └── columns: a:4 b:5 c:6 d:7 e:8
└── filters
└── (a:4 AND (x:1 > 0)) IS NOT true [outer=(1,4)]

# Do not simplify the IS because the right argument is Null (vs True or False).
norm expect-not=SimplifyJoinFilters
SELECT * FROM xy INNER JOIN booleans ON (a AND x > 0) IS Null
----
inner-join (cross)
├── columns: x:1!null y:2 a:4 b:5 c:6 d:7 e:8
├── fd: (1)-->(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── scan booleans
│ └── columns: a:4 b:5 c:6 d:7 e:8
└── filters
└── (a:4 AND (x:1 > 0)) IS NULL [outer=(1,4)]

# Regression test for #54717. SimplifyJoinFilters should not simplify
# contradictions. Doing so can split the filter expressions into two
# FiltersItems such that they are no longer considered a contradiction, and
# DetectJoinContradiction does not fire.
norm expect=DetectJoinContradiction expect-not=SimplifyJoinFilters
SELECT * FROM a LEFT JOIN b ON k<1 AND k>2
----
left-join (cross)
├── columns: k:1!null i:2 f:3!null s:4 j:5 x:7 y:8
Expand All @@ -98,12 +279,21 @@ left-join (cross)
│ └── fd: ()-->(7,8)
└── filters (true)

# Regression test for #54717. SimplifyJoinFilters should not simplify
# contradictions. Doing so can split the filter expressions into two
# FiltersItems such that they are no longer considered a contradiction, and
# DetectJoinContradiction does not fire.
norm expect=DetectJoinContradiction expect-not=SimplifyJoinFilters
SELECT * FROM a LEFT JOIN b ON k<1 AND k>2
# --------------------------------------------------
# DetectJoinContradiction
# --------------------------------------------------

norm expect=DetectJoinContradiction
SELECT * FROM a INNER JOIN b ON (k<1 AND k>2) OR (k<4 AND k>5)
----
values
├── columns: k:1!null i:2!null f:3!null s:4!null j:5!null x:7!null y:8!null
├── cardinality: [0 - 0]
├── key: ()
└── fd: ()-->(1-5,7,8)

norm expect=DetectJoinContradiction
SELECT * FROM a LEFT JOIN b ON (k<1 AND k>2) OR (k<4 AND k>5)
----
left-join (cross)
├── columns: k:1!null i:2 f:3!null s:4 j:5 x:7 y:8
Expand Down Expand Up @@ -2596,7 +2786,7 @@ project
# --------------------------------------------------
# SimplifyJoinNotNullEquality
# --------------------------------------------------
norm expect=SimplifyJoinNotNullEquality
norm expect=SimplifyJoinNotNullEquality disable=SimplifyJoinFilters
SELECT * FROM a INNER JOIN b ON (a.k=b.x) IS True
----
inner-join (hash)
Expand All @@ -2615,7 +2805,7 @@ inner-join (hash)
└── filters
└── k:1 = x:7 [outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)]

norm expect=SimplifyJoinNotNullEquality
norm expect=SimplifyJoinNotNullEquality disable=SimplifyJoinFilters
SELECT * FROM a INNER JOIN b ON (a.k=b.x) IS False
----
inner-join (cross)
Expand Down Expand Up @@ -2744,7 +2934,7 @@ inner-join (hash)

# Don't trigger rule when one of the variables is nullable.
norm expect-not=SimplifyJoinNotNullEquality
SELECT * FROM a INNER JOIN b ON (a.k=b.y) IS True AND (a.i=b.x) IS False
SELECT * FROM a INNER JOIN b ON (a.k=b.y) IS Null AND (a.i=b.x) IS NOT False
----
inner-join (cross)
├── columns: k:1!null i:2 f:3!null s:4 j:5 x:7!null y:8
Expand All @@ -2759,8 +2949,8 @@ inner-join (cross)
│ ├── key: (7)
│ └── fd: (7)-->(8)
└── filters
├── (k:1 = y:8) IS true [outer=(1,8)]
└── (i:2 = x:7) IS false [outer=(2,7)]
├── (k:1 = y:8) IS NULL [outer=(1,8)]
└── (i:2 = x:7) IS NOT false [outer=(2,7)]

# --------------------------------------------------
# ExtractJoinEqualities
Expand Down
Loading

0 comments on commit 1faee2d

Please sign in to comment.