opt: top-level relational expression cannot have outer columns #58438

cockroach-teamcity · 2021-01-05T00:08:12Z

This issue was autofiled by Sentry. It represents a crash or reported error on a live cluster with telemetry enabled.

Sentry link: https://sentry.io/organizations/cockroach-labs/issues/2131023460/?referrer=webhooks_plugin

Panic message:

optimizer.go:234: top-level relational expression cannot have outer columns: (7)
--
*errutil.leafError: top-level relational expression cannot have outer columns: (7) (1)
optimizer.go:234: *withstack.withStack (top exception)
*assert.withAssertionFailure
(check the extra data payloads)

Stacktrace (expand for inline code snippets):

cockroach/pkg/sql/opt/xform/optimizer.go

Lines 233 to 235 in 92d9495

    
           if !root.Relational().OuterCols.Empty() { 
        
           	return nil, errors.AssertionFailedf( 
        
           		"top-level relational expression cannot have outer columns: %s",

in pkg/sql/opt/xform.(*Optimizer).Optimize

cockroach/pkg/sql/plan_opt.go

Lines 515 to 517 in 92d9495

    
           if _, isCanned := opc.p.stmt.AST.(*tree.CannedOptPlan); !isCanned { 
        
           	if _, err := opc.optimizer.Optimize(); err != nil { 
        
           		return nil, err

in pkg/sql.(*optPlanningCtx).buildExecMemo

cockroach/pkg/sql/plan_opt.go

Lines 194 to 196 in 92d9495

    
           execMemo, err := opc.buildExecMemo(ctx) 
        
           if err != nil {

in pkg/sql.(*planner).makeOptimizerPlan

cockroach/pkg/sql/conn_executor_exec.go

Lines 900 to 902 in 92d9495

    
           if err := planner.makeOptimizerPlan(ctx); err != nil { 
        
           	log.VEventf(ctx, 1, "optimizer plan failed: %v", err)

in pkg/sql.(*connExecutor).makeExecPlan

cockroach/pkg/sql/conn_executor_exec.go

Lines 779 to 781 in 92d9495

    
           // between here and there needs to happen even if there's an error. 
        
           err := ex.makeExecPlan(ctx, planner) 
        
           // We'll be closing the plan manually below after execution; this

in pkg/sql.(*connExecutor).dispatchToExecutionEngine

cockroach/pkg/sql/conn_executor_exec.go

Lines 638 to 640 in 92d9495

    
           p.autoCommit = os.ImplicitTxn.Get() && !ex.server.cfg.TestingKnobs.DisableAutoCommit 
        
           if err := ex.dispatchToExecutionEngine(ctx, p, res); err != nil { 
        
           	return nil, nil, err

in pkg/sql.(*connExecutor).execStmtInOpenState

cockroach/pkg/sql/conn_executor_exec.go

Lines 113 to 115 in 92d9495

    
           } else { 
        
           	ev, payload, err = ex.execStmtInOpenState(ctx, stmt, res, pinfo) 
        
           }

in pkg/sql.(*connExecutor).execStmt

cockroach/pkg/sql/conn_executor.go

Lines 1464 to 1466 in 92d9495

    
           stmtCtx := withStatement(ctx, ex.curStmt) 
        
           ev, payload, err = ex.execStmt(stmtCtx, curStmt, stmtRes, nil /* pinfo */) 
        
           return err

in pkg/sql.(*connExecutor).execCmd.func1

cockroach/pkg/sql/conn_executor.go

Lines 1466 to 1468 in 92d9495

    
           	return err 
        
           }() 
        
           // Note: we write to ex.statsCollector.phaseTimes, instead of ex.phaseTimes,

in pkg/sql.(*connExecutor).execCmd

cockroach/pkg/sql/conn_executor.go

Lines 1390 to 1392 in 92d9495

    
           var err error 
        
           if err = ex.execCmd(ex.Ctx()); err != nil { 
        
           	if errors.IsAny(err, io.EOF, errDrainingComplete) {

in pkg/sql.(*connExecutor).run

cockroach/pkg/sql/conn_executor.go

Lines 507 to 509 in 92d9495

    
           	}() 
        
           	return h.ex.run(ctx, s.pool, reserved, cancel) 
        
           }

in pkg/sql.(*Server).ServeConn

cockroach/pkg/sql/pgwire/conn.go

Lines 625 to 627 in 92d9495

    
           	reservedOwned = false // We're about to pass ownership away. 
        
           	retErr = sqlServer.ServeConn(ctx, connHandler, reserved, cancelConn) 
        
           }()

in pkg/sql/pgwire.(*conn).processCommandsAsync.func1
/usr/local/go/src/runtime/asm_amd64.s#L1356-L1358 in runtime.goexit

pkg/sql/opt/xform/optimizer.go in pkg/sql/opt/xform.(*Optimizer).Optimize at line 234
pkg/sql/plan_opt.go in pkg/sql.(*optPlanningCtx).buildExecMemo at line 516
pkg/sql/plan_opt.go in pkg/sql.(*planner).makeOptimizerPlan at line 195
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).makeExecPlan at line 901
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).dispatchToExecutionEngine at line 780
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).execStmtInOpenState at line 639
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).execStmt at line 114
pkg/sql/conn_executor.go in pkg/sql.(*connExecutor).execCmd.func1 at line 1465
pkg/sql/conn_executor.go in pkg/sql.(*connExecutor).execCmd at line 1467
pkg/sql/conn_executor.go in pkg/sql.(*connExecutor).run at line 1391
pkg/sql/conn_executor.go in pkg/sql.(*Server).ServeConn at line 508
pkg/sql/pgwire/conn.go in pkg/sql/pgwire.(*conn).processCommandsAsync.func1 at line 626
/usr/local/go/src/runtime/asm_amd64.s in runtime.goexit at line 1357

Tag	Value
Cockroach Release	`v20.2.2`
Cockroach SHA:	`92d9495`
Platform	darwin amd64
Distribution	CCL
Environment	v20.2.2
Command	demo
Go Version	``
# of CPUs
# of Goroutines

Jira issue: CRDB-3393

The text was updated successfully, but these errors were encountered:

jordanlewis · 2021-01-05T00:09:12Z

This was me! It's easy to reproduce:

[email protected]:61494/movr> create table t(a int, b int);
CREATE TABLE

Time: 3ms total (execution 3ms / network 0ms)

[email protected]:61494/movr> select unnest(array[t.*]) from t;
*
* ERROR: [n1,client=127.0.0.1:61502,hostssl,user=root] Queued as error fec60f1843eb4d35860add13e533d8e2
*
ERROR: internal error: top-level relational expression cannot have outer columns: (7)
SQLSTATE: XX000
DETAIL: stack trace:
/go/src/github.com/cockroachdb/cockroach/pkg/sql/opt/xform/optimizer.go:234: Optimize()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/plan_opt.go:516: buildExecMemo()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/plan_opt.go:195: makeOptimizerPlan()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor_exec.go:901: makeExecPlan()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor_exec.go:780: dispatchToExecutionEngine()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor_exec.go:639: execStmtInOpenState()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor_exec.go:114: execStmt()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor.go:1465: func1()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor.go:1467: execCmd()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor.go:1391: run()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor.go:508: ServeConn()
/go/src/github.com/cockroachdb/cockroach/pkg/sql/pgwire/conn.go:626: func1()
runtime/asm_amd64.s:1357: goexit()

HINT: You have encountered an unexpected error.

Please check the public issue tracker to check whether this problem is
already tracked. If you cannot find it there, please report the error
with details by creating a new issue.

If you would rather not post publicly, please contact us directly
using the support form.

We appreciate your feedback.

mgartner · 2021-01-12T21:09:04Z

I originally thought that this was a bug in ConvertZipArraysToValues but I think optbuilder is incorrectly building the project-set expression:

build
SELECT unnest(ARRAY[t.*]) FROM t
----
project
 ├── columns: unnest:7
 ├── project-set
 │    ├── columns: t.a:1 t.b:2 rowid:3!null crdb_internal_mvcc_timestamp:4 a:5 b:6
 │    ├── scan t
 │    │    └── columns: t.a:1 t.b:2 rowid:3!null crdb_internal_mvcc_timestamp:4
 │    └── zip
 │         └── unnest(ARRAY[((t.a:1, t.b:2) AS a, b)])
 └── projections
      └── ((a:5, b:6) AS a, b) [as=unnest:7]

The project-set shouldn't have a:5 and b:6 as columns, rather 1 column that is produced by the zip .

cucaroach · 2021-05-11T02:13:13Z

Using generate_subscripts as a foil (ie a srf where this syntax seems to work) I tracked this down to project.constructProject, scalar is non-nil so we call b.factory.ConstructProjectionsItem(scalar, id) and that's where things go sideways. If I force it down the passthrough path it seems to work. Working backwards from there I tracked things to srfs.finishBuildGeneratorFunction where we find that the return type of unnest is a tuple(a,b). I guess what I don't get is whether ARRAY[t.*] should be seen as generated 1 column rows or if it should be interpreted as unnest(ARRAY[t.*] as x(a,b)), ie as generating tuples. Postgresql seems to support both. Although the 'as x(a,b)' form only works in from clauses its maybe implied when in the select clause?

Postgresql does this for this query:

unnest_bug=# explain  select unnest(array[t.*]) from t;
                         QUERY PLAN
------------------------------------------------------------
 ProjectSet  (cost=0.00..60.85 rows=2260 width=32)
   ->  Seq Scan on t  (cost=0.00..32.60 rows=2260 width=32)
(2 rows)

So the unnest and array cancel each other out it seems. So I think the project-set columns are correct I just think we're lacking some machinery to link them up to what comes out of the unnest. I'm not familiar enough with the code to know where we are going astray but definitely seems like srf.finishBuildGeneratorFunction isn't tying things together appropriately for non-single column inputs to unnest.

@mgartner @rytaft did I get anything right here?

cucaroach · 2021-05-11T13:11:55Z

The following diff fixes the problem and makes our behavior match postgres but I'm not sure about it:

diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go
index 9c6bb38cce..dbd04f087b 100644
--- a/pkg/sql/sem/builtins/generator_builtins.go
+++ b/pkg/sql/sem/builtins/generator_builtins.go
@@ -206,7 +206,7 @@ var generators = map[string]builtinDefinition{
                                if len(args) == 0 || args[0].ResolvedType().Family() == types.UnknownFamily {
                                        return tree.UnknownReturnType
                                }
-                               return args[0].ResolvedType().ArrayContents()
+                               return args[0].ResolvedType()
                        },
                        makeArrayGenerator,
                        "Returns the input array as a set of rows",

Actually I'm sure its wrong because it breaks a ton of srfs logictests.

rytaft · 2021-05-11T15:35:09Z

I'm coming back to the original idea that the problem is with ConvertZipArraysToValues. If I disable that rule, there is no issue with the resulting plan:

opt format=show-all disable=ConvertZipArraysToValues
select unnest(array[t.*]) from t
----
project
 ├── columns: unnest:7(tuple{int AS a, int AS b})
 ├── immutable
 ├── stats: [rows=10000]
 ├── cost: 1364.53
 ├── prune: (7)
 ├── project-set
 │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) a:5(int) b:6(int)
 │    ├── immutable
 │    ├── stats: [rows=10000]
 │    ├── cost: 1164.52
 │    ├── scan t.public.t
 │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int)
 │    │    ├── stats: [rows=1000]
 │    │    ├── cost: 1064.51
 │    │    └── prune: (1,2)
 │    └── zip
 │         └── function: unnest [type=tuple{int AS a, int AS b}, outer=(1,2), immutable]
 │              └── array: [type=tuple{int AS a, int AS b}[]]
 │                   └── tuple [type=tuple{int AS a, int AS b}]
 │                        ├── variable: t.public.t.a:1 [type=int]
 │                        └── variable: t.public.t.b:2 [type=int]
 └── projections
      └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(5,6)]
           ├── variable: a:5 [type=int]
           └── variable: b:6 [type=int]

If I comment out the panic that causes the error and run optsteps (without disabling ConvertZipArraysToValues), I get this output:

optsteps format=show-all
select unnest(array[t.*]) from t
----
================================================================================
Initial expression
  Cost: 1384.73
================================================================================
  project
   ├── columns: unnest:7(tuple{int AS a, int AS b})
   ├── immutable
   ├── stats: [rows=10000]
   ├── cost: 1384.73
   ├── prune: (7)
   ├── project-set
   │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal) a:5(int) b:6(int)
   │    ├── immutable
   │    ├── stats: [rows=10000]
   │    ├── cost: 1184.72
   │    ├── fd: (3)-->(1,2,4)
   │    ├── prune: (3,4)
   │    ├── scan t.public.t
   │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
   │    │    ├── stats: [rows=1000]
   │    │    ├── cost: 1084.71
   │    │    ├── key: (3)
   │    │    ├── fd: (3)-->(1,2,4)
   │    │    └── prune: (1-4)
   │    └── zip
   │         └── function: unnest [type=tuple{int AS a, int AS b}, outer=(1,2), immutable]
   │              └── array: [type=tuple{int AS a, int AS b}[]]
   │                   └── tuple [type=tuple{int AS a, int AS b}]
   │                        ├── variable: t.public.t.a:1 [type=int]
   │                        └── variable: t.public.t.b:2 [type=int]
   └── projections
        └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(5,6)]
             ├── variable: a:5 [type=int]
             └── variable: b:6 [type=int]
================================================================================
ConvertZipArraysToValues
  Cost: 1127.27
================================================================================
   project
    ├── columns: unnest:7(tuple{int AS a, int AS b})
  - ├── immutable
  - ├── stats: [rows=10000]
  - ├── cost: 1384.73
  + ├── outer: (6)
  + ├── stats: [rows=1000]
  + ├── cost: 1127.2675
    ├── prune: (7)
  - ├── project-set
  - │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal) a:5(int) b:6(int)
  - │    ├── immutable
  - │    ├── stats: [rows=10000]
  - │    ├── cost: 1184.72
  - │    ├── fd: (3)-->(1,2,4)
  - │    ├── prune: (3,4)
  + ├── inner-join-apply
  + │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal) a:5(int)
  + │    ├── stats: [rows=1000]
  + │    ├── cost: 1107.2575
  + │    ├── key: (3)
  + │    ├── fd: (3)-->(1,2,4,5)
    │    ├── scan t.public.t
    │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
    │    │    ├── stats: [rows=1000]
    │    │    ├── cost: 1084.71
    │    │    ├── key: (3)
    │    │    ├── fd: (3)-->(1,2,4)
    │    │    └── prune: (1-4)
  - │    └── zip
  - │         └── function: unnest [type=tuple{int AS a, int AS b}, outer=(1,2), immutable]
  - │              └── array: [type=tuple{int AS a, int AS b}[]]
  - │                   └── tuple [type=tuple{int AS a, int AS b}]
  - │                        ├── variable: t.public.t.a:1 [type=int]
  - │                        └── variable: t.public.t.b:2 [type=int]
  + │    ├── values
  + │    │    ├── columns: a:5(int)
  + │    │    ├── outer: (1,2)
  + │    │    ├── cardinality: [1 - 1]
  + │    │    ├── stats: [rows=1]
  + │    │    ├── cost: 0.02
  + │    │    ├── key: ()
  + │    │    ├── fd: ()-->(5)
  + │    │    ├── prune: (5)
  + │    │    └── tuple [type=tuple{tuple{int AS a, int AS b}}]
  + │    │         └── tuple [type=tuple{int AS a, int AS b}]
  + │    │              ├── variable: t.public.t.a:1 [type=int]
  + │    │              └── variable: t.public.t.b:2 [type=int]
  + │    └── filters (true)
    └── projections
         └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(5,6)]
              ├── variable: a:5 [type=int]
              └── variable: b:6 [type=int]
================================================================================
ProjectInnerJoinValues
  Cost: 1134.74
================================================================================
   project
    ├── columns: unnest:7(tuple{int AS a, int AS b})
    ├── outer: (6)
    ├── stats: [rows=1000]
  - ├── cost: 1127.2675
  + ├── cost: 1134.74
    ├── prune: (7)
  - ├── inner-join-apply
  + ├── select
    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal) a:5(int)
    │    ├── stats: [rows=1000]
  - │    ├── cost: 1107.2575
  + │    ├── cost: 1114.73
    │    ├── key: (3)
  - │    ├── fd: (3)-->(1,2,4,5)
  - │    ├── scan t.public.t
  - │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
  - │    │    ├── stats: [rows=1000]
  - │    │    ├── cost: 1084.71
  + │    ├── fd: (3)-->(1,2,4), (1,2)-->(5)
  + │    ├── project
  + │    │    ├── columns: a:5(int) t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
  + │    │    ├── stats: [rows=1000, distinct(3)=1000, null(3)=0]
  + │    │    ├── cost: 1104.72
    │    │    ├── key: (3)
  - │    │    ├── fd: (3)-->(1,2,4)
  - │    │    └── prune: (1-4)
  - │    ├── values
  - │    │    ├── columns: a:5(int)
  - │    │    ├── outer: (1,2)
  - │    │    ├── cardinality: [1 - 1]
  - │    │    ├── stats: [rows=1]
  - │    │    ├── cost: 0.02
  - │    │    ├── key: ()
  - │    │    ├── fd: ()-->(5)
  - │    │    ├── prune: (5)
  - │    │    └── tuple [type=tuple{tuple{int AS a, int AS b}}]
  - │    │         └── tuple [type=tuple{int AS a, int AS b}]
  + │    │    ├── fd: (3)-->(1,2,4), (1,2)-->(5)
  + │    │    ├── prune: (1-5)
  + │    │    ├── interesting orderings: (+3)
  + │    │    ├── scan t.public.t
  + │    │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
  + │    │    │    ├── stats: [rows=1000, distinct(3)=1000, null(3)=0]
  + │    │    │    ├── cost: 1084.71
  + │    │    │    ├── key: (3)
  + │    │    │    ├── fd: (3)-->(1,2,4)
  + │    │    │    ├── prune: (1-4)
  + │    │    │    └── interesting orderings: (+3)
  + │    │    └── projections
  + │    │         └── tuple [as=a:5, type=tuple{int AS a, int AS b}, outer=(1,2)]
    │    │              ├── variable: t.public.t.a:1 [type=int]
    │    │              └── variable: t.public.t.b:2 [type=int]
    │    └── filters (true)
    └── projections
         └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(5,6)]
              ├── variable: a:5 [type=int]
              └── variable: b:6 [type=int]
================================================================================
EliminateSelect
  Cost: 1124.73
================================================================================
   project
    ├── columns: unnest:7(tuple{int AS a, int AS b})
    ├── outer: (6)
    ├── stats: [rows=1000]
  - ├── cost: 1134.74
  + ├── cost: 1124.73
    ├── prune: (7)
  - ├── select
  - │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal) a:5(int)
  + ├── project
  + │    ├── columns: a:5(int) t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
    │    ├── stats: [rows=1000]
  - │    ├── cost: 1114.73
  + │    ├── cost: 1104.72
    │    ├── key: (3)
    │    ├── fd: (3)-->(1,2,4), (1,2)-->(5)
  - │    ├── project
  - │    │    ├── columns: a:5(int) t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
  - │    │    ├── stats: [rows=1000, distinct(3)=1000, null(3)=0]
  - │    │    ├── cost: 1104.72
  + │    ├── prune: (1-5)
  + │    ├── scan t.public.t
  + │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
  + │    │    ├── stats: [rows=1000]
  + │    │    ├── cost: 1084.71
    │    │    ├── key: (3)
  - │    │    ├── fd: (3)-->(1,2,4), (1,2)-->(5)
  - │    │    ├── prune: (1-5)
  - │    │    ├── interesting orderings: (+3)
  - │    │    ├── scan t.public.t
  - │    │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
  - │    │    │    ├── stats: [rows=1000, distinct(3)=1000, null(3)=0]
  - │    │    │    ├── cost: 1084.71
  - │    │    │    ├── key: (3)
  - │    │    │    ├── fd: (3)-->(1,2,4)
  - │    │    │    ├── prune: (1-4)
  - │    │    │    └── interesting orderings: (+3)
  - │    │    └── projections
  - │    │         └── tuple [as=a:5, type=tuple{int AS a, int AS b}, outer=(1,2)]
  - │    │              ├── variable: t.public.t.a:1 [type=int]
  - │    │              └── variable: t.public.t.b:2 [type=int]
  - │    └── filters (true)
  + │    │    ├── fd: (3)-->(1,2,4)
  + │    │    └── prune: (1-4)
  + │    └── projections
  + │         └── tuple [as=a:5, type=tuple{int AS a, int AS b}, outer=(1,2)]
  + │              ├── variable: t.public.t.a:1 [type=int]
  + │              └── variable: t.public.t.b:2 [type=int]
    └── projections
         └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(5,6)]
              ├── variable: a:5 [type=int]
              └── variable: b:6 [type=int]
================================================================================
PruneProjectCols
  Cost: 1124.73
================================================================================
   project
    ├── columns: unnest:7(tuple{int AS a, int AS b})
    ├── outer: (6)
    ├── stats: [rows=1000]
    ├── cost: 1124.73
    ├── prune: (7)
    ├── project
  - │    ├── columns: a:5(int) t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
  + │    ├── columns: a:5(int)
    │    ├── stats: [rows=1000]
    │    ├── cost: 1104.72
  - │    ├── key: (3)
  - │    ├── fd: (3)-->(1,2,4), (1,2)-->(5)
  - │    ├── prune: (1-5)
  + │    ├── prune: (5)
    │    ├── scan t.public.t
    │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
    │    │    ├── stats: [rows=1000]
    │    │    ├── cost: 1084.71
    │    │    ├── key: (3)
    │    │    ├── fd: (3)-->(1,2,4)
    │    │    └── prune: (1-4)
    │    └── projections
    │         └── tuple [as=a:5, type=tuple{int AS a, int AS b}, outer=(1,2)]
    │              ├── variable: t.public.t.a:1 [type=int]
    │              └── variable: t.public.t.b:2 [type=int]
    └── projections
         └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(5,6)]
              ├── variable: a:5 [type=int]
              └── variable: b:6 [type=int]
================================================================================
PruneScanCols
  Cost: 1104.53
================================================================================
   project
    ├── columns: unnest:7(tuple{int AS a, int AS b})
    ├── outer: (6)
    ├── stats: [rows=1000]
  - ├── cost: 1124.73
  + ├── cost: 1104.53
    ├── prune: (7)
    ├── project
    │    ├── columns: a:5(int)
    │    ├── stats: [rows=1000]
  - │    ├── cost: 1104.72
  + │    ├── cost: 1084.52
    │    ├── prune: (5)
    │    ├── scan t.public.t
  - │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int) t.public.t.rowid:3(int!null) t.public.t.crdb_internal_mvcc_timestamp:4(decimal)
  + │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int)
    │    │    ├── stats: [rows=1000]
  - │    │    ├── cost: 1084.71
  - │    │    ├── key: (3)
  - │    │    ├── fd: (3)-->(1,2,4)
  - │    │    └── prune: (1-4)
  + │    │    ├── cost: 1064.51
  + │    │    └── prune: (1,2)
    │    └── projections
    │         └── tuple [as=a:5, type=tuple{int AS a, int AS b}, outer=(1,2)]
    │              ├── variable: t.public.t.a:1 [type=int]
    │              └── variable: t.public.t.b:2 [type=int]
    └── projections
         └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(5,6)]
              ├── variable: a:5 [type=int]
              └── variable: b:6 [type=int]
================================================================================
InlineProjectInProject
  Cost: 1084.52
================================================================================
   project
    ├── columns: unnest:7(tuple{int AS a, int AS b})
    ├── outer: (6)
    ├── stats: [rows=1000]
  - ├── cost: 1104.53
  + ├── cost: 1084.52
    ├── prune: (7)
  - ├── project
  - │    ├── columns: a:5(int)
  + ├── scan t.public.t
  + │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int)
    │    ├── stats: [rows=1000]
  - │    ├── cost: 1084.52
  - │    ├── prune: (5)
  - │    ├── scan t.public.t
  - │    │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int)
  - │    │    ├── stats: [rows=1000]
  - │    │    ├── cost: 1064.51
  - │    │    └── prune: (1,2)
  - │    └── projections
  - │         └── tuple [as=a:5, type=tuple{int AS a, int AS b}, outer=(1,2)]
  - │              ├── variable: t.public.t.a:1 [type=int]
  - │              └── variable: t.public.t.b:2 [type=int]
  + │    ├── cost: 1064.51
  + │    └── prune: (1,2)
    └── projections
  -      └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(5,6)]
  -           ├── variable: a:5 [type=int]
  +      └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(1,2,6)]
  +           ├── tuple [type=tuple{int AS a, int AS b}]
  +           │    ├── variable: t.public.t.a:1 [type=int]
  +           │    └── variable: t.public.t.b:2 [type=int]
              └── variable: b:6 [type=int]
--------------------------------------------------------------------------------
GenerateIndexScans (no changes)
--------------------------------------------------------------------------------
================================================================================
Final best expression
  Cost: 1084.52
================================================================================
  project
   ├── columns: unnest:7(tuple{int AS a, int AS b})
   ├── outer: (6)
   ├── stats: [rows=1000]
   ├── cost: 1084.52
   ├── prune: (7)
   ├── scan t.public.t
   │    ├── columns: t.public.t.a:1(int) t.public.t.b:2(int)
   │    ├── stats: [rows=1000]
   │    ├── cost: 1064.51
   │    └── prune: (1,2)
   └── projections
        └── tuple [as=unnest:7, type=tuple{int AS a, int AS b}, outer=(1,2,6)]
             ├── tuple [type=tuple{int AS a, int AS b}]
             │    ├── variable: t.public.t.a:1 [type=int]
             │    └── variable: t.public.t.b:2 [type=int]
             └── variable: b:6 [type=int]

Notice that ConvertZipArraysToValues creates a Values with only one column: + │ │ ├── columns: a:5(int). It should probably also include column b.

DrewKimball · 2021-05-11T19:23:58Z

I'm the one who wrote that rule, so I feel a bit of responsibility for helping out. Removing the rule entirely and then executing this query:

statement ok
CREATE TABLE t (a INT PRIMARY KEY, b INT, c INT)

statement ok
INSERT INTO t VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1)

query T
SELECT unnest(ARRAY[t.*]) FROM t
----

Gives this error: pq: internal error: invalid datum type given: tuple{int AS a, int AS b, int AS c}, expected int
Doesn't necessarily mean there's nothing wrong with ConvertZipArraysToValues, but there's definitely something fishy going on here.

DrewKimball · 2021-05-11T19:27:04Z

Hm, it seems like the optbuilder code thinks the array elements are integers, when they are really tuples.

rytaft · 2021-05-11T19:27:36Z

Good find, @DrewKimball -- so maybe @mgartner's original thought that the ProjectSet needs to be returning a tuple is the right one.

DrewKimball · 2021-05-11T19:36:12Z

I think this goes all the way down to the parser, fun.

DrewKimball · 2021-05-11T19:42:23Z

Or maybe not

cucaroach · 2021-05-11T19:43:32Z

Just adding what @rytaft and I talked about on slack, ConstructValuesFromZip is hard coded to assume 1 col in the zip So one fix is to disable the rule like so:

diff --git a/pkg/sql/opt/norm/project_set_funcs.go b/pkg/sql/opt/norm/project_set_funcs.go
index 4dec3e7770..dbf2ce7f9f 100644
--- a/pkg/sql/opt/norm/project_set_funcs.go
+++ b/pkg/sql/opt/norm/project_set_funcs.go
@@ -46,6 +46,10 @@ func (c *CustomFuncs) CanConstructValuesFromZips(zip memo.ZipExpr) bool {
                        // Unnest has more than one argument.
                        return false
                }
+               if len(zip[i].Cols) != 1 {
+                       // Multi column not supported
+                       return false
+               }
                if !c.IsStaticArray(fn.Args[0]) && !c.WrapsJSONArray(fn.Args[0]) {
                        // Argument is not an ArrayExpr or ConstExpr wrapping a DArray or DJSON.
                        return false

I still need to digest Drew's comments...

DrewKimball · 2021-05-11T19:47:48Z

I think the unnest and json functions always return one row, but checking that might be a good idea anyway

DrewKimball · 2021-05-11T19:47:57Z

*Column, not row

DrewKimball · 2021-05-11T20:25:06Z

Actually, scratch what I said earlier. The optbuilder is getting the correct element type, I forgot to remove an old query from my test :/

DrewKimball · 2021-05-11T22:51:14Z

@cucaroach you picked a pretty thorny 'quick win'...
If you look at the AllColumnsSelector case at line 900 of scope.go, you'll see that the tuple that gets created has labels for the columns. Look at line 1038 of the same file and finishBuildGeneratorFunction in srfs.go - the tuple only gets a single output column when there are no tuple labels; otherwise, the tuple columns are expanded out into multiple columns. Since unnest returns only a single column, this messes things up.

I'm not really sure how you'd go about fixing this.

cucaroach · 2021-05-11T23:03:28Z

No kidding! I think the confusion may be that unnest has different semantics when it appears in the select clause vs the from clause. These examples from the postgres docs are helpful:

select unnest(ARRAY[['foo','bar'],['baz','quux']]) →

 foo
 bar
 baz
 quux

Vs

select * from unnest(ARRAY[1,2], ARRAY['foo','bar','baz']) as x(a,b) →

 a |  b
---+-----
 1 | foo
 2 | bar
   | baz

I'm thinking we need to treat these two cases differently? Actually I take that back ... I think unnest always returns a scalar but when in the from clause, the args to unnest materialize as columns, more examples:

unnest_bug=# select * from unnest(ARRAY[1,2], ARRAY['foo','bar','baz']);
 unnest | unnest
--------+--------
      1 | foo
      2 | bar
        | baz
(3 rows)

unnest_bug=# select * from unnest(ARRAY[1,2], ARRAY['foo','bar','baz']) as x(a,b);
 a |  b
---+-----
 1 | foo
 2 | bar
   | baz
(3 rows)

unnest_bug=# select * from unnest(ARRAY[1,2], ARRAY['foo','bar','baz'], ARRAY[3,4,6]);
 unnest | unnest | unnest
--------+--------+--------
      1 | foo    |      3
      2 | bar    |      4
        | baz    |      6
(3 rows)

unnest_bug=# select * from unnest(ARRAY[1,2], ARRAY['foo','bar','baz'], ARRAY[3,4,6]) as x(a,b);
 a |  b  | unnest
---+-----+--------
 1 | foo |      3
 2 | bar |      4
   | baz |      6
(3 rows)

DrewKimball · 2021-05-11T23:31:10Z

I think the difference there is because there are two arrays in the unnest vs an array with arrays inside it.
As an aside, cockroach gets a different output for queries like that first one:

{foo,bar}
{baz,quux}

I wonder if this is intentional or not?

DrewKimball · 2021-05-11T23:31:59Z

Here's an example with the nested arrays in the from clause:

postgres=# select * from unnest(ARRAY[['boop', 'bop'], ['foo','bar']]) as x(a);
  a
------
 boop
 bop
 foo
 bar
(4 rows)

cucaroach · 2021-05-12T11:31:05Z

Apparently postgres will fully unpack nested arrays but not tuples.

unnest_bug=# select unnest(ARRAY[['foo','bar'],['baz','quux']]);
 unnest
--------
 foo
 bar
 baz
 quux
(4 rows)

unnest_bug=# SELECT unnest(ARRAY[(1,2),(3,4)]);
 unnest
--------
 (1,2)
 (3,4)
(2 rows)

unnest_bug=# SELECT unnest(ARRAY[[1,2],[3,4]]);
 unnest
--------
      1
      2
      3
      4
(4 rows)

cucaroach · 2021-05-12T12:16:17Z

I have to end this lovely diversion for now but I pushed what I think are good but incomplete fixes to unnest-panic-bug branch. I think a good next step would be to audit the relevant tests in srfs to see which do and don't agree with postgres. Feel free to take this bug and run with it @DrewKimball! Otherwise I'll return to it on Friday.

DrewKimball · 2022-07-25T20:21:10Z

I think the bug that lies at the root of all this is that we represent a function that returns multiple columns the same as a function that returns a single tuple column with the same internal types; e.g. the return type for a multiple-column function is a tuple with labels. This causes the optbuilder to infer multiple columns returned by the unnest function that should only return one tuple column:

cockroach/pkg/sql/opt/optbuilder/srfs.go

Lines 152 to 157 in 9e44f56

    
           // Multi-column return type. Use the tuple labels in the SRF's return type 
        
           // as column aliases. 
        
           typ := f.ResolvedType() 
        
           for i := range typ.TupleContents() { 
        
           	b.synthesizeColumn(outScope, scopeColName(tree.Name(typ.TupleLabels()[i])), typ.TupleContents()[i], nil, fn) 
        
           }

I don't think the current way we handle types is powerful enough to describe multiple columns (hence why we use tuples - we really want composite types instead). We might have to solve that properly for UDFs anyway, but in the meantime I think the best way to solve it might be to change the signature of tree.ReturnTyper to return a boolean indicating whether the type is describing multiple columns:

type ReturnTyper func(args []TypedExpr) *types.T
=>
type ReturnTyper func(args []TypedExpr) (outTyp *types.T, isComposite bool)

…85329 84975: storage: add `MVCCRangeKeyStack` for range keys r=nicktrav,jbowens a=erikgrinaker **storage: add `MVCCRangeKeyStack` for range keys** This patch adds `MVCCRangeKeyStack` and `MVCCRangeKeyVersion`, a new range key representation that will be returned by `SimpleMVCCIterator`. It is more compact, for efficiency, and comes with a set of convenience methods to simplify common range key processing. Resolves #83895. Release note: None **storage: return `MVCCRangeKeyStack` from `SimpleMVCCIterator`** This patch changes `SimpleMVCCIterator.RangeKeys()` to return `MVCCRangeKeyStack` instead of `[]MVCCRangeKeyValue`. Callers have not been migrated to properly make use of this -- instead, they call `AsRangeKeyValues()` and construct and use the old data structure. The MVCC range tombstones tech note is also updated to reflect this. Release note: None **storage: migrate MVCC code to `MVCCRangeKeyStack`** Release note: None ***: migrate higher-level code to `MVCCRangeKeyStack`** Release note: None **kvserver/gc: partially migrate to `MVCCRangeKeyStack`** Some parts require invasive changes to MVCC stats helpers. These will shortly be consolidated with other MVCC stats logic elsewhere, so the existing logic is retained for now by using `AsRangeKeyValues()`. Release note: None **storage: remove `FirstRangeKeyAbove()` and `HasRangeKeyBetween()`** Release note: None 85017: Revert "sql: Add database ID to sampled query log" r=THardy98 a=THardy98 Reverts: #84195 This reverts commit 307817e. Removes the DatabaseID field from the `SampledQuery` telemetry log due to the potential of indefinite blocking in the case of a lease acquisition failure. Protobuf field not reserved as no official build was released with these changes yet. Release note (sql change): Removes the DatabaseID field from the `SampledQuery` telemetry log due to the potential of indefinite blocking in the case of a lease acquisition failure. 85024: cloud/gcp: add custom retryer for gcs storage, retry on stream INTERNAL_ERROR r=rhu713 a=rhu713 Currently, errors like `stream error: stream ID <x>; INTERNAL_ERROR; received from peer` are not being retried. Create a custom retryer to retry these errors as suggested by: googleapis/google-cloud-go#3735 googleapis/google-cloud-go#784 Fixes: #85217, #85216, #85204, #84162 Release note: None 85069: optbuilder: handle unnest returning a tuple r=DrewKimball a=DrewKimball Currently, the return types of SRFs that return multiple columns are represented as tuples with labels. The tuple labels are used to decide whether or not to create a single output column for the SRF, or multiple. The `unnest` function can return a single column if it has a single argument, and the type of that column can be a tuple with labels. This could cause the old logic to mistakenly create multiple output columns for `unnest`, which could lead to panics down the line and incorrect behavior otherwise. This commit adds a special case for `unnest` in the `optbuilder` to only expand tuple return types if there is more than one argument (implying more than one output column). Other SRFs do not have the same problem because they either always return the same number of columns, cannot return tuples, or both. Fixes #58438 Release note (bug fix): Fixed a bug existing since release 20.1 that could cause a panic in rare cases when the unnest function was used with a tuple return type. 85100: opt: perf improvements for large queries r=DrewKimball a=DrewKimball **opt: add bench test for slow queries** This commit adds two slow-planning queries pulled from #64793 to be used in benchmarking the optimizer. In addition, the `ReorderJoinsLimit` has been set to the default 8 for benchmarking tests. **opt: add struct for tracking column equivalence sets** Previously, the `JoinOrderBuilder` would construct a `FuncDepSet` from scratch on each call to `addJoins` in order to eliminate redundant join filters. This led to unnecessary large allocations because `addJoins` is called an exponential number of times in query size. This commit adds a struct `EquivSet` that efficiently stores equivalence relations as `ColSets` in a slice. Rather than being constructed on each call to `addJoins`, a `Reset` method is called that maintains slice memory. In the future, `EquivSet` can be used to handle equivalencies within `FuncDepSet` structs as well. This well avoid a significant number of allocations in cases with many equivalent columns, as outlined in #83963. **opt: avoid usage of FastIntMap in optimizer hot paths** Previously, `computeHashJoinCost` would use a `FastIntMap` to represent join equality filters to pass to `computeFiltersCost`. In addition, `GenerateMergeJoins` used a `FastIntMap` to look up columns among its join equality columns. This lead to unnecessary allocations since column IDs are often large enough to exceed the small field of `FastIntMap`. This commit modifies `computeFiltersCost` to take an anonymous function that is used to decide whether to skip an equality condition, removing the need for a mapping between columns. This commit also refactors `GenerateMergeJoins` to simply perform a linear scan of its equality columns; this avoids the allocation issue, and should be fast in practice because the number of equalities will not generally be large. Release note: None 85146: [backupccl] Use Expr for backup's Detached and Revision History options r=benbardin a=benbardin This will allow us to set them to null, which will be helpful for ALTER commands. Release note: None 85234: dev: add rewritable paths for norm tests r=mgartner a=mgartner Tests in `pkg/sql/opt/norm` are similar to tests in `pkg/sql/opt/xform` and `pkg/sql/opt/memo` in that they rely on fixtures in `pkg/sql/opt/testutils/opttester/testfixtures`. This commit adds these fixtures as rewritable paths for norm tests so that `./dev test pkg/sql/opt/xform --rewrite` does not fail with errors like: open pkg/sql/opt/testutils/opttester/testfixtures/tpcc_schema: operation not permitted Release note: None 85325: sql: fix explain gist output to show number of scan span constraints r=cucaroach a=cucaroach If there were span constraints we would always print 1, need to actually append them to get the count right. Fixes: #85324 Release note: None 85327: sql: fix udf logic test r=chengxiong-ruan a=chengxiong-ruan Fixes: #85303 Release note: None 85329: colexec: fix recent concat fix r=yuzefovich a=yuzefovich The recent fix of the Concat operator in the vectorized engine doesn't handle the array concatenation correctly and this is now fixed. Fixes: #85295. Release note: None Co-authored-by: Erik Grinaker <[email protected]> Co-authored-by: Thomas Hardy <[email protected]> Co-authored-by: Rui Hu <[email protected]> Co-authored-by: DrewKimball <[email protected]> Co-authored-by: Andrew Kimball <[email protected]> Co-authored-by: Ben Bardin <[email protected]> Co-authored-by: Marcus Gartner <[email protected]> Co-authored-by: Tommy Reilly <[email protected]> Co-authored-by: Chengxiong Ruan <[email protected]> Co-authored-by: Yahor Yuzefovich <[email protected]>

Currently, the return types of SRFs that return multiple columns are represented as tuples with labels. The tuple labels are used to decide whether or not to create a single output column for the SRF, or multiple. The `unnest` function can return a single column if it has a single argument, and the type of that column can be a tuple with labels. This could cause the old logic to mistakenly create multiple output columns for `unnest`, which could lead to panics down the line and incorrect behavior otherwise. This commit adds a special case for `unnest` in the `optbuilder` to only expand tuple return types if there is more than one argument (implying more than one output column). Other SRFs do not have the same problem because they either always return the same number of columns, cannot return tuples, or both. Fixes #58438 Release note (bug fix): Fixed a bug existing since release 20.1 that could cause a panic in rare cases when the unnest function was used with a tuple return type.

cockroach-teamcity added C-bug Code not up to spec/doc, specs & docs deemed correct. Solution expected to change code/behavior. O-sentry Originated from an in-the-wild panic report. labels Jan 5, 2021

jordanlewis assigned RaduBerinde Jan 5, 2021

mgartner assigned mgartner and unassigned RaduBerinde Jan 12, 2021

rytaft added the E-quick-win Likely to be a quick win for someone experienced. label May 10, 2021

jlinder added the T-sql-queries SQL Queries Team label Jun 16, 2021

rytaft removed the E-quick-win Likely to be a quick win for someone experienced. label Jul 1, 2021

rytaft unassigned mgartner Jul 1, 2021

DrewKimball mentioned this issue Jul 26, 2022

optbuilder: handle unnest returning a tuple #85069

Merged

craig bot closed this as completed in 008d5ab Jul 29, 2022

blathers-crl bot mentioned this issue Jul 29, 2022

release-21.2: optbuilder: handle unnest returning a tuple #85347

Merged

blathers-crl bot mentioned this issue Jul 29, 2022

release-22.1: optbuilder: handle unnest returning a tuple #85349

Merged

mgartner added this to SQL Queries Jul 24, 2023

mgartner moved this to Done in SQL Queries Jul 24, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

opt: top-level relational expression cannot have outer columns #58438

opt: top-level relational expression cannot have outer columns #58438

cockroach-teamcity commented Jan 5, 2021 •

edited by cockroach-jira-scripts

Loading

jordanlewis commented Jan 5, 2021

mgartner commented Jan 12, 2021 •

edited

Loading

cucaroach commented May 11, 2021 •

edited

Loading

cucaroach commented May 11, 2021 •

edited

Loading

rytaft commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

rytaft commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

cucaroach commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

cucaroach commented May 11, 2021 •

edited

Loading

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

cucaroach commented May 12, 2021

cucaroach commented May 12, 2021

DrewKimball commented Jul 25, 2022

opt: top-level relational expression cannot have outer columns #58438

opt: top-level relational expression cannot have outer columns #58438

Comments

cockroach-teamcity commented Jan 5, 2021 • edited by cockroach-jira-scripts Loading

jordanlewis commented Jan 5, 2021

mgartner commented Jan 12, 2021 • edited Loading

cucaroach commented May 11, 2021 • edited Loading

cucaroach commented May 11, 2021 • edited Loading

rytaft commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

rytaft commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

cucaroach commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

cucaroach commented May 11, 2021 • edited Loading

DrewKimball commented May 11, 2021

DrewKimball commented May 11, 2021

cucaroach commented May 12, 2021

cucaroach commented May 12, 2021

DrewKimball commented Jul 25, 2022

cockroach-teamcity commented Jan 5, 2021 •

edited by cockroach-jira-scripts

Loading

mgartner commented Jan 12, 2021 •

edited

Loading

cucaroach commented May 11, 2021 •

edited

Loading

cucaroach commented May 11, 2021 •

edited

Loading

cucaroach commented May 11, 2021 •

edited

Loading