Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
85594: opt: don't convert union to distinct+union-all with empty key r=DrewKimball a=DrewKimball

Previously, the `ConvertUnionToDistinctUnionAll` normalization rule could
convert a `Union` into a `UnionAll` with a `DistinctOn` grouping on an empty
key. This violated an assumption that any given tuple of key column values
functionally determines exactly the same values on all other columns on both
sides of the input when one of the inputs was null-extended. An empty key is
only correct if all rows are the same, which may not be the case after
null-extension.

This commit fixes this problem by only matching if the key is non-empty.
The empty-key case can only occur in extremely rare cases where the
optimizer can prove that the table has only one row, so I don't expect
to see any regressions due to this change.

Fixes cockroachdb#85502

Release note (bug fix): Fixed a bug introduced in 21.2 that could cause
union queries to return incorrect results in rare cases.

85638: ccl: unskip zoneconfig_privilege_restore test r=adityamaru a=RichardJCai

Release note: None

85642: roachtest: skip on AWS when using multiple stores r=lidorcarmel a=lidorcarmel

In roachtests we cannot set the number of stores when running on AWS.
This patch skips a few (recently added) roachtests that fail when
trying to do that.

Tested by running these roachtests with --cloud=aws and --cloud=gce.

Release note: None

Co-authored-by: DrewKimball <[email protected]>
Co-authored-by: richardjcai <[email protected]>
Co-authored-by: Lidor Carmel <[email protected]>
  • Loading branch information
4 people committed Aug 5, 2022
4 parents c651d33 + 05b87c3 + 385083a + 80d7c9a commit d687b14
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 6 deletions.
1 change: 0 additions & 1 deletion pkg/ccl/backupccl/restore_old_versions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,6 @@ ORDER BY object_type, object_name`, [][]string{
})

t.Run("zoneconfig_privilege_restore", func(t *testing.T) {
skip.WithIssue(t, 84359)
dirs, err := ioutil.ReadDir(privilegeDirs)
require.NoError(t, err)
for _, dir := range dirs {
Expand Down
8 changes: 7 additions & 1 deletion pkg/cmd/roachtest/tests/kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,16 @@ func registerKV(r registry.Registry) {
if opts.encryption {
encryption = registry.EncryptionAlwaysEnabled
}
cSpec := r.MakeClusterSpec(opts.nodes+1, spec.CPU(opts.cpus), spec.SSD(opts.ssds), spec.RAID0(opts.raid0))
var skip string
if opts.ssds != 0 && cSpec.Cloud != spec.GCE {
skip = fmt.Sprintf("multi-store tests are not supported on cloud %s", cSpec.Cloud)
}
r.Add(registry.TestSpec{
Skip: skip,
Name: strings.Join(nameParts, "/"),
Owner: owner,
Cluster: r.MakeClusterSpec(opts.nodes+1, spec.CPU(opts.cpus), spec.SSD(opts.ssds), spec.RAID0(opts.raid0)),
Cluster: cSpec,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runKV(ctx, t, c, opts)
},
Expand Down
8 changes: 7 additions & 1 deletion pkg/cmd/roachtest/tests/rebalance_load.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,17 @@ func registerRebalanceLoad(r registry.Registry) {
},
},
)
cSpec := r.MakeClusterSpec(7, spec.SSD(2)) // the last node is just used to generate load
var skip string
if cSpec.Cloud != spec.GCE {
skip = fmt.Sprintf("multi-store tests are not supported on cloud %s", cSpec.Cloud)
}
r.Add(
registry.TestSpec{
Skip: skip,
Name: `rebalance/by-load/replicas/ssds=2`,
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(7, spec.SSD(2)), // the last node is just used to generate load
Cluster: cSpec,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
if c.IsLocal() {
t.Fatal("cannot run multi-store in local mode")
Expand Down
22 changes: 19 additions & 3 deletions pkg/sql/opt/norm/rules/set.opt
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,11 @@
(IntersectAll $left $right $colMap)

# ConvertUnionToDistinctUnionAll replaces a Union with a DistinctOn on top of a
# UnionAll when the left and right inputs satisfy the following conditions:
# UnionAll. This is a valid transformation when we can obtain a key over the
# output of the UnionAll that not only functionally determines all columns from
# both inputs, but functionally determines the *same* values from both inputs.
# ConvertUnionToDistinctUnionAll can match when the left and right inputs satisfy
# the following conditions:
#
# 1) All columns from both inputs originate from the same base table. This is
# necessary because it is safe to de-duplicate over a subset of columns
Expand All @@ -116,7 +120,8 @@
# 3) Each pair of columns whose rows are unioned together occupy the same
# ordinal positions in the original base table. This ensures that the
# output (and inputs) of the UnionAll only contains tuples that existed in
# the base table (though it may contain duplicates).
# the base table (though it may contain duplicates, and with the exception
# of null-extension - see condition #5).
#
# 4) The output columns of each of the inputs form a strict key over the base
# table. It is not sufficient to use keys directly from the input
Expand All @@ -125,7 +130,18 @@
# distinguish rows resulting from the union. Ex: union together the same
# (single) row, for which the empty set is a key.
#
# 5) Finally, the key columns must form a strict subset of the union columns.
# 5) There must be at least one key column, since in the empty-key case
# null-extension by outer joins can violate the requirement that a given
# tuple of values on the key columns implies the same values on all other
# columns over both sides. (e.g. for an empty key, the key values would
# always be an empty tuple, while the remaining columns could have
# different values). Null-extension is allowed when the key is non-empty
# because when all columns have the same (NULL) value, grouping on any
# subset of them results in one (all-NULL) row. This condition only applies
# in the rare case when a table can be statically proven to contain only
# one row (see #85502).
#
# 6) Finally, the key columns must form a strict subset of the union columns.
# This is not strictly necessary for correctness, but the transformation
# does not gain anything if the number of columns to de-duplicate on does
# not decrease.
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/opt/norm/set_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ func (c *CustomFuncs) CanConvertUnionToDistinctUnionAll(
return opt.ColSet{}, false
}
keyCols = leftFDs.ReduceCols(leftColSet)
if keyCols.Empty() {
// The key columns set must not be empty
return opt.ColSet{}, false
}
if keyCols.Equals(leftColSet) {
// The key columns must form a strict subset of the union columns, or the
// transformation is not worth it.
Expand Down
49 changes: 49 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/set
Original file line number Diff line number Diff line change
Expand Up @@ -1025,3 +1025,52 @@ union
│ └── fd: (6)-->(7)
└── filters
└── b:7 > 10 [outer=(7), constraints=(/7: [/11 - ]; tight)]

# Regression test for #85502 - don't fire when the candidate key is empty.
exec-ddl
CREATE TABLE t1_85502 (c0 BOOL AS (1 IS NULL) STORED, CONSTRAINT "primary" PRIMARY KEY(c0));
----

exec-ddl
CREATE TABLE t2_85502 (c0 INT);
----

norm expect-not=ConvertUnionToDistinctUnionAll
SELECT t1_85502.c0 FROM t2_85502
FULL OUTER JOIN t1_85502 ON false WHERE false IN (t1_85502.c0 IS NULL)
UNION SELECT t1_85502.c0 FROM t2_85502
FULL OUTER JOIN t1_85502 ON false WHERE NOT false IN (t1_85502.c0 IS NULL);
----
union
├── columns: c0:15
├── left columns: t1_85502.c0:5
├── right columns: t1_85502.c0:12
├── cardinality: [0 - 3]
├── key: (15)
├── scan t1_85502
│ ├── columns: t1_85502.c0:5!null
│ ├── computed column expressions
│ │ └── t1_85502.c0:5
│ │ └── false
│ ├── cardinality: [0 - 1]
│ ├── key: ()
│ └── fd: ()-->(5)
└── select
├── columns: t1_85502.c0:12
├── fd: ()-->(12)
├── full-join (cross)
│ ├── columns: t1_85502.c0:12
│ ├── multiplicity: left-rows(exactly-one), right-rows(one-or-more)
│ ├── scan t2_85502
│ ├── scan t1_85502
│ │ ├── columns: t1_85502.c0:12!null
│ │ ├── computed column expressions
│ │ │ └── t1_85502.c0:12
│ │ │ └── false
│ │ ├── cardinality: [0 - 1]
│ │ ├── key: ()
│ │ └── fd: ()-->(12)
│ └── filters
│ └── false [constraints=(contradiction; tight)]
└── filters
└── t1_85502.c0:12 IS NULL [outer=(12), constraints=(/12: [/NULL - /NULL]; tight), fd=()-->(12)]

0 comments on commit d687b14

Please sign in to comment.