optbuilder: handle unnest returning a tuple

Currently, the return types of SRFs that return multiple columns are represented as tuples with labels. The tuple labels are used to decide whether or not to create a single output column for the SRF, or multiple. The `unnest` function can return a single column if it has a single argument, and the type of that column can be a tuple with labels. This could cause the old logic to mistakenly create multiple output columns for `unnest`, which could lead to panics down the line and incorrect behavior otherwise. This commit adds a special case for `unnest` in the `optbuilder` to only expand tuple return types if there is more than one argument (implying more than one output column). Other SRFs do not have the same problem because they either always return the same number of columns, cannot return tuples, or both. Fixes #58438 Release note (bug fix): Fixed a bug existing since release 20.1 that could cause a panic in rare cases when the unnest function was used with a tuple return type.
cockroachdb · Jul 26, 2022 · 9c57af8 · 9c57af8
1 parent 22d36e2
commit 9c57af8
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 6 deletions.
diff --git a/pkg/sql/logictest/testdata/logic_test/srfs b/pkg/sql/logictest/testdata/logic_test/srfs
@@ -1248,3 +1248,25 @@ SELECT * FROM unnest(NULL, NULL)
 
 query error pq: column reference "unnest" is ambiguous
 SELECT unnest FROM unnest(array[1,2], array[3,4,5])
+
+# Regression test for #58438 - handle the case when unnest outputs a tuple with
+# labels. The unnest should not panic.
+statement ok
+CREATE TABLE t58438(a INT, b INT);
+
+statement ok
+INSERT INTO t58438 VALUES (1, 2), (3, 4), (5, 6);
+
+query T rowsort
+SELECT unnest(ARRAY[t58438.*]) FROM t58438;
+----
+(1,2)
+(3,4)
+(5,6)
+
+query II rowsort
+SELECT (x).* FROM (SELECT unnest(ARRAY[t58438.*]) FROM t58438) v(x);
+----
+1  2
+3  4
+5  6
diff --git a/pkg/sql/opt/optbuilder/testdata/srfs b/pkg/sql/opt/optbuilder/testdata/srfs
@@ -965,3 +965,38 @@ with &1 (data)
       │         └── filters (true)
       └── aggregations
            └── count-rows [as=count_rows:8]
+
+# Regression test for #58438 - handle the case when unnest outputs a tuple with
+# labels. The unnest should not panic.
+exec-ddl
+CREATE TABLE t58438(a INT, b INT);
+----
+
+build
+SELECT unnest(ARRAY[t58438.*]) FROM t58438;
+----
+project
+ ├── columns: unnest:6
+ └── project-set
+      ├── columns: a:1 b:2 rowid:3!null crdb_internal_mvcc_timestamp:4 tableoid:5 unnest:6
+      ├── scan t58438
+      │    └── columns: a:1 b:2 rowid:3!null crdb_internal_mvcc_timestamp:4 tableoid:5
+      └── zip
+           └── unnest(ARRAY[((a:1, b:2) AS a, b)])
+
+build
+SELECT (x).* FROM (SELECT unnest(ARRAY[t58438.*]) FROM t58438) v(x);
+----
+project
+ ├── columns: a:7 b:8
+ ├── project
+ │    ├── columns: unnest:6
+ │    └── project-set
+ │         ├── columns: t58438.a:1 t58438.b:2 rowid:3!null crdb_internal_mvcc_timestamp:4 tableoid:5 unnest:6
+ │         ├── scan t58438
+ │         │    └── columns: t58438.a:1 t58438.b:2 rowid:3!null crdb_internal_mvcc_timestamp:4 tableoid:5
+ │         └── zip
+ │              └── unnest(ARRAY[((t58438.a:1, t58438.b:2) AS a, b)])
+ └── projections
+      ├── (unnest:6).a [as=a:7]
+      └── (unnest:6).b [as=b:8]
diff --git a/pkg/sql/opt/optbuilder/util.go b/pkg/sql/opt/optbuilder/util.go
@@ -239,18 +239,29 @@ func (b *Builder) projectColumn(dst *scopeColumn, src *scopeColumn) {
 	dst.id = src.id
 }
 
-// shouldCreateDefaultColumn decides if we need to create a default
-// column and default label for a function expression.
-// Returns true if the function's return type is not an empty tuple and
-// doesn't declare any tuple labels.
+// shouldCreateDefaultColumn decides if we need to create a default column and
+// default label for a function expression. Returns true if the function's
+// return type is not an empty tuple and doesn't declare any tuple labels.
 func (b *Builder) shouldCreateDefaultColumn(texpr tree.TypedExpr) bool {
 	if texpr.ResolvedType() == types.EmptyTuple {
 		// This is only to support crdb_internal.unary_table().
 		return false
 	}
 
-	// We need to create a default column with a default name when
-	// the function return type doesn't declare any return labels.
+	if funcExpr, ok := texpr.(*tree.FuncExpr); ok {
+		if funcExpr.Func.FunctionReference.(*tree.FunctionDefinition).Name == "unnest" {
+			// Special case for unnest functions: we should create a default column in
+			// the case when there is one input argument, since this implies there
+			// will be one output column. This is necessary because the type of the
+			// single column output by unnest in this case may be a tuple with labels,
+			// which breaks the assumption made below.
+			return len(funcExpr.Exprs) == 1
+		}
+	}
+
+	// We need to create a default column with a default name when the function
+	// return type doesn't declare any return labels. This logic assumes that any
+	// SRF that has a labeled tuple as a return type returns multiple columns.
 	return len(texpr.ResolvedType().TupleLabels()) == 0
 }