From 1bceb6c0ae67b7f18c59725304bc86ad99a1ab6b Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Thu, 8 Aug 2019 21:34:23 +0200 Subject: [PATCH] opt: fix panic due to incorrect type of ArrayFlatten This commit fixes a panic caused by incorrect typing of an ArrayFlatten expression. If the input to an ArrayFlatten expression is sorted, there may be more than one output column (although the columns used for sorting are hidden). If one of these hidden columns is chosen to infer the type of the expression, the type could be incorrect. This commit fixes the problem so that only the requested column is chosen for type inference. Fixes #38867 Release note (bug fix): Fixed a panic due to incorrect type inference of some ARRAY(...) expressions. --- .../testdata/logic_test/subquery_correlated | 20 +++ pkg/sql/opt/colset.go | 15 ++- pkg/sql/opt/exec/factory.go | 3 +- pkg/sql/opt/memo/typing.go | 4 +- pkg/sql/opt/norm/custom_funcs.go | 13 +- pkg/sql/opt/norm/decorrelate.go | 16 ++- pkg/sql/opt/norm/rules/scalar.opt | 6 +- pkg/sql/opt/norm/testdata/rules/scalar | 124 ++++++++++++++++++ pkg/sql/opt/ops/scalar.opt | 2 +- 9 files changed, 181 insertions(+), 22 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/subquery_correlated b/pkg/sql/logictest/testdata/logic_test/subquery_correlated index 6e799c7ca825..5a30c2c86e0b 100644 --- a/pkg/sql/logictest/testdata/logic_test/subquery_correlated +++ b/pkg/sql/logictest/testdata/logic_test/subquery_correlated @@ -1150,3 +1150,23 @@ ORDER BY statement ok DROP TABLE stuff; DROP TABLE users; + +# Regression test for #38867. +query T +SELECT ( + SELECT + ARRAY ( + SELECT c.relname + FROM pg_inherits AS i JOIN pg_class AS c ON c.oid = i.inhparent + WHERE i.inhrelid = rel.oid + ORDER BY inhseqno + ) +) +FROM pg_class AS rel +LIMIT 5; +---- +{} +{} +{} +{} +{} diff --git a/pkg/sql/opt/colset.go b/pkg/sql/opt/colset.go index f4c023177a90..60e1cf3ec5a6 100644 --- a/pkg/sql/opt/colset.go +++ b/pkg/sql/opt/colset.go @@ -10,7 +10,10 @@ package opt -import "github.com/cockroachdb/cockroach/pkg/util" +import ( + "github.com/cockroachdb/cockroach/pkg/util" + "github.com/cockroachdb/errors" +) // ColSet efficiently stores an unordered set of column ids. type ColSet struct { @@ -85,3 +88,13 @@ func (s ColSet) SubsetOf(rhs ColSet) bool { return s.set.SubsetOf(rhs.set) } // numbers are shown as ranges. For example, for the set {1, 2, 3 5, 6, 10}, // the output is "(1-3,5,6,10)". func (s ColSet) String() string { return s.set.String() } + +// SingleColumn returns the single column in s. Panics if s does not contain +// exactly one column. +func (s ColSet) SingleColumn() ColumnID { + if s.Len() != 1 { + panic(errors.AssertionFailedf("expected a single column but found %d columns", s.Len())) + } + col, _ := s.Next(0) + return col +} diff --git a/pkg/sql/opt/exec/factory.go b/pkg/sql/opt/exec/factory.go index 456860ef8c6d..413f4e6e591b 100644 --- a/pkg/sql/opt/exec/factory.go +++ b/pkg/sql/opt/exec/factory.go @@ -104,7 +104,8 @@ type Factory interface { // // memo, rightProps, and right are the memo, required physical properties, and // RelExpr of the right side of the join that will be repeatedly modified, - // re-planned and executed for every row from the left side. + // re-planned and executed for every row from the left side. The rightProps + // always includes a presentation. // // fakeRight is a pre-planned node that is the right side of the join with // all outer columns replaced by NULL. The physical properties of this node diff --git a/pkg/sql/opt/memo/typing.go b/pkg/sql/opt/memo/typing.go index 99603531fa27..abd875a9d0bc 100644 --- a/pkg/sql/opt/memo/typing.go +++ b/pkg/sql/opt/memo/typing.go @@ -249,7 +249,7 @@ func typeCollate(e opt.ScalarExpr) *types.T { // typeArrayFlatten returns the type of the subquery as an array. func typeArrayFlatten(e opt.ScalarExpr) *types.T { input := e.Child(0).(RelExpr) - colID, _ := input.Relational().OutputCols.Next(0) + colID := e.(*ArrayFlattenExpr).RequestedCol return types.MakeArray(input.Memo().Metadata().ColumnMeta(colID).Type) } @@ -354,7 +354,7 @@ func typeCast(e opt.ScalarExpr) *types.T { // its first (and only) column. func typeSubquery(e opt.ScalarExpr) *types.T { input := e.Child(0).(RelExpr) - colID, _ := input.Relational().OutputCols.Next(0) + colID := input.Relational().OutputCols.SingleColumn() return input.Memo().Metadata().ColumnMeta(colID).Type } diff --git a/pkg/sql/opt/norm/custom_funcs.go b/pkg/sql/opt/norm/custom_funcs.go index e95f915b568a..5fe8945ecebc 100644 --- a/pkg/sql/opt/norm/custom_funcs.go +++ b/pkg/sql/opt/norm/custom_funcs.go @@ -156,10 +156,9 @@ func (c *CustomFuncs) CanConstructBinary(op opt.Operator, left, right opt.Scalar return memo.BinaryOverloadExists(op, left.DataType(), right.DataType()) } -// ArrayType returns the type of the first output column wrapped +// ArrayType returns the type of the given column wrapped // in an array. -func (c *CustomFuncs) ArrayType(in memo.RelExpr) *types.T { - inCol, _ := c.OutputCols(in).Next(0) +func (c *CustomFuncs) ArrayType(inCol opt.ColumnID) *types.T { inTyp := c.mem.Metadata().ColumnMeta(inCol).Type return types.MakeArray(inTyp) } @@ -1626,10 +1625,10 @@ func (c *CustomFuncs) SubqueryOrdering(sub *memo.SubqueryPrivate) physical.Order return oc } -// FirstCol returns the first column in the input expression. -func (c *CustomFuncs) FirstCol(in memo.RelExpr) opt.ColumnID { - inCol, _ := c.OutputCols(in).Next(0) - return inCol +// SubqueryRequestedCol returns the requested column from a SubqueryPrivate. +// This function should only be used with ArrayFlatten expressions. +func (c *CustomFuncs) SubqueryRequestedCol(sub *memo.SubqueryPrivate) opt.ColumnID { + return sub.RequestedCol } // MakeArrayAggCol returns a ColPrivate with the given type and an "array_agg" label. diff --git a/pkg/sql/opt/norm/decorrelate.go b/pkg/sql/opt/norm/decorrelate.go index afa63f5c0498..e0450bf4dd9b 100644 --- a/pkg/sql/opt/norm/decorrelate.go +++ b/pkg/sql/opt/norm/decorrelate.go @@ -697,11 +697,7 @@ func (c *CustomFuncs) ConstructNoColsRow() memo.RelExpr { // referenceSingleColumn returns a Variable operator that refers to the one and // only column that is projected by the input expression. func (c *CustomFuncs) referenceSingleColumn(in memo.RelExpr) opt.ScalarExpr { - cols := in.Relational().OutputCols - if cols.Len() != 1 { - panic(errors.AssertionFailedf("expression does not have exactly one column")) - } - colID, _ := cols.Next(0) + colID := in.Relational().OutputCols.SingleColumn() return c.f.ConstructVariable(colID) } @@ -801,8 +797,14 @@ func (r *subqueryHoister) hoistAll(scalar opt.ScalarExpr) opt.ScalarExpr { } // Replace the Subquery operator with a Variable operator referring to - // the first (and only) column in the hoisted query. - colID, _ := subqueryProps.OutputCols.Next(0) + // the output column of the hoisted query. + var colID opt.ColumnID + switch t := scalar.(type) { + case *memo.ArrayFlattenExpr: + colID = t.RequestedCol + default: + colID = subqueryProps.OutputCols.SingleColumn() + } return r.f.ConstructVariable(colID) } diff --git a/pkg/sql/opt/norm/rules/scalar.opt b/pkg/sql/opt/norm/rules/scalar.opt index aeaae4d060e7..aa7fdd5412e7 100644 --- a/pkg/sql/opt/norm/rules/scalar.opt +++ b/pkg/sql/opt/norm/rules/scalar.opt @@ -288,8 +288,8 @@ $input [ (AggregationsItem - (ArrayAgg (Variable (FirstCol $input))) - (MakeArrayAggCol (ArrayType $input)) + (ArrayAgg (Variable $requestedCol:(SubqueryRequestedCol $subquery))) + (MakeArrayAggCol (ArrayType $requestedCol)) ) ] (MakeOrderedGrouping @@ -299,6 +299,6 @@ ) (MakeUnorderedSubquery) ) - (Array [] (ArrayType $input)) + (Array [] (ArrayType $requestedCol)) ] ) diff --git a/pkg/sql/opt/norm/testdata/rules/scalar b/pkg/sql/opt/norm/testdata/rules/scalar index 09ca66098ec6..204875aab47c 100644 --- a/pkg/sql/opt/norm/testdata/rules/scalar +++ b/pkg/sql/opt/norm/testdata/rules/scalar @@ -1287,3 +1287,127 @@ project └── scan a ├── columns: k:7(int!null) └── key: (7) + +exec-ddl +CREATE TABLE pg_class ( + oid OID NULL, + relname NAME NOT NULL, + relnamespace OID NULL, + reltype OID NULL, + reloftype OID NULL, + relowner OID NULL, + relam OID NULL, + relfilenode OID NULL, + reltablespace OID NULL, + relpages INT4 NULL, + reltuples FLOAT4 NULL, + relallvisible INT4 NULL, + reltoastrelid OID NULL, + relhasindex BOOL NULL, + relisshared BOOL NULL, + relpersistence CHAR NULL, + relistemp BOOL NULL, + relkind CHAR NULL, + relnatts INT2 NULL, + relchecks INT2 NULL, + relhasoids BOOL NULL, + relhaspkey BOOL NULL, + relhasrules BOOL NULL, + relhastriggers BOOL NULL, + relhassubclass BOOL NULL, + relfrozenxid INT8 NULL, + relacl STRING[] NULL, + reloptions STRING[] NULL +) +---- + +exec-ddl +CREATE TABLE pg_inherits ( + inhrelid OID NULL, + inhparent OID NULL, + inhseqno INT4 NULL +) +---- + +# Regression test for #38867. +norm expect=NormalizeArrayFlattenToAgg +SELECT ( + SELECT + ARRAY ( + SELECT c.relname + FROM pg_inherits AS i JOIN pg_class AS c ON c.oid = i.inhparent + WHERE i.inhrelid = rel.oid + ORDER BY inhseqno + ) +) +FROM pg_class AS rel +---- +project + ├── columns: array:66(name[]) + ├── inner-join-apply + │ ├── columns: rel.oid:1(oid) array_agg:63(name[]) array:64(name[]) + │ ├── scan rel + │ │ └── columns: rel.oid:1(oid) + │ ├── inner-join-apply + │ │ ├── columns: array_agg:63(name[]) array:64(name[]) + │ │ ├── outer: (1) + │ │ ├── cardinality: [1 - 1] + │ │ ├── key: () + │ │ ├── fd: ()-->(63,64) + │ │ ├── project + │ │ │ ├── columns: array_agg:63(name[]) + │ │ │ ├── outer: (1) + │ │ │ ├── cardinality: [1 - 1] + │ │ │ ├── key: () + │ │ │ ├── fd: ()-->(63) + │ │ │ ├── group-by + │ │ │ │ ├── columns: inhrelid:30(oid) array_agg:65(name[]) + │ │ │ │ ├── internal-ordering: +32 opt(30) + │ │ │ │ ├── outer: (1) + │ │ │ │ ├── cardinality: [1 - 1] + │ │ │ │ ├── key: () + │ │ │ │ ├── fd: ()-->(30,65) + │ │ │ │ ├── sort + │ │ │ │ │ ├── columns: inhrelid:30(oid) inhparent:31(oid) inhseqno:32(int4) c.oid:34(oid) c.relname:35(name) + │ │ │ │ │ ├── outer: (1) + │ │ │ │ │ ├── cardinality: [1 - ] + │ │ │ │ │ ├── fd: (31)==(34), (34)==(31) + │ │ │ │ │ ├── ordering: +32 opt(30) [actual: +32] + │ │ │ │ │ └── left-join (hash) + │ │ │ │ │ ├── columns: inhrelid:30(oid) inhparent:31(oid) inhseqno:32(int4) c.oid:34(oid) c.relname:35(name) + │ │ │ │ │ ├── outer: (1) + │ │ │ │ │ ├── cardinality: [1 - ] + │ │ │ │ │ ├── fd: (31)==(34), (34)==(31) + │ │ │ │ │ ├── values + │ │ │ │ │ │ ├── cardinality: [1 - 1] + │ │ │ │ │ │ ├── key: () + │ │ │ │ │ │ └── tuple [type=tuple] + │ │ │ │ │ ├── inner-join (hash) + │ │ │ │ │ │ ├── columns: inhrelid:30(oid) inhparent:31(oid!null) inhseqno:32(int4) c.oid:34(oid!null) c.relname:35(name!null) + │ │ │ │ │ │ ├── fd: (31)==(34), (34)==(31) + │ │ │ │ │ │ ├── scan i + │ │ │ │ │ │ │ └── columns: inhrelid:30(oid) inhparent:31(oid) inhseqno:32(int4) + │ │ │ │ │ │ ├── scan c + │ │ │ │ │ │ │ └── columns: c.oid:34(oid) c.relname:35(name!null) + │ │ │ │ │ │ └── filters + │ │ │ │ │ │ └── c.oid = inhparent [type=bool, outer=(31,34), constraints=(/31: (/NULL - ]; /34: (/NULL - ]), fd=(31)==(34), (34)==(31)] + │ │ │ │ │ └── filters + │ │ │ │ │ └── inhrelid = rel.oid [type=bool, outer=(1,30), constraints=(/1: (/NULL - ]; /30: (/NULL - ]), fd=(1)==(30), (30)==(1)] + │ │ │ │ └── aggregations + │ │ │ │ ├── array-agg [type=name[], outer=(35)] + │ │ │ │ │ └── variable: c.relname [type=name] + │ │ │ │ └── any-not-null-agg [type=oid, outer=(30)] + │ │ │ │ └── variable: inhrelid [type=oid] + │ │ │ └── projections + │ │ │ └── CASE WHEN inhrelid IS NOT NULL THEN array_agg END [type=name[], outer=(30,65)] + │ │ ├── values + │ │ │ ├── columns: array:64(name[]) + │ │ │ ├── outer: (63) + │ │ │ ├── cardinality: [1 - 1] + │ │ │ ├── key: () + │ │ │ ├── fd: ()-->(64) + │ │ │ └── (COALESCE(array_agg, ARRAY[]),) [type=tuple{name[]}] + │ │ └── filters (true) + │ └── filters (true) + └── projections + └── variable: array [type=name[], outer=(64)] diff --git a/pkg/sql/opt/ops/scalar.opt b/pkg/sql/opt/ops/scalar.opt index 4aa0f89febaa..472d7eab2202 100644 --- a/pkg/sql/opt/ops/scalar.opt +++ b/pkg/sql/opt/ops/scalar.opt @@ -42,7 +42,7 @@ define SubqueryPrivate { # RequestedCol is set if there could possibly be other columns in the input # (say, if there was an ordering that must be respected) besides the one that - # will eventually be output. + # will eventually be output. It is only used for ArrayFlatten expressions. RequestedCol ColumnID # Cmp is only used for AnyOp.