Merge cockroachdb#121109

121109: sql: make tail-call optimization work with nested routines r=DrewKimball a=DrewKimball #### sql: defer tail-call identification until execbuilding This commit changes the way routine tail-calls are handled. Before, only PL/pgSQL sub-routines were considered as tail-calls, and this was determined by a `TailCall` property that was set during optbuilding. This approach was fragile, did not work for explicit tail-calls, and did not work well with nested routine calls in general. Now, tail-calls are determined after optimization, during execbuilding. This will allow explicit (user-specified) tail calls to be optimized. It also prevents inlining rules from causing correctness bugs, since the old `TailCall` property only applied to the original calling routine. See `ExtractTailCalls` for further details. The next commit will add additional testing. Informs cockroachdb#120916 Release note: None #### sql: add tests for tail-call property This commit adds tests for the `ExtractTailCalls` function from the previous commit, and adds a `tail-call` field to `UDFCall` expressions that are in tail-call position in another routine. It also adds a regression test for cockroachdb#120916. Informs cockroachdb#120916 Release note: None #### sql: check exception handler before applying TCO This commit finishes the tail-call optimization fix begun by the previous commits, by preventing TCO when it would lose the reference to the calling routine's exception handler. PL/pgSQL sub-routines always maintain a reference to their parent's exception handler, so this isn't a problem for them. However, explicit (user-specified) nested routines do not track the calling routine's exception handler. There is no release note because this bug hasn't appeared in any release. Fixes cockroachdb#120916 Release note: None Co-authored-by: Drew Kimball <[email protected]>
jasminejsun · Mar 28, 2024 · 00d91d5 · 00d91d5
2 parents f8dc5a0 + 332d347
commit 00d91d5
Show file tree

Hide file tree

Showing 33 changed files with 1,592 additions and 32 deletions.
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/explain_call_plpgsql b/pkg/ccl/logictestccl/testdata/logic_test/explain_call_plpgsql
@@ -85,6 +85,7 @@ call
                 ├── fd: ()-->(5)
                 └── tuple [type=tuple{void}]
                      └── udf: _stmt_raise_1 [type=void]
+                          ├── tail-call
                           ├── args
                           │    └── variable: x:1 [type=int]
                           ├── params: x:2(int)

diff --git a/pkg/ccl/logictestccl/testdata/logic_test/nested_routines b/pkg/ccl/logictestccl/testdata/logic_test/nested_routines
@@ -0,0 +1,55 @@
+# LogicTest: !local-mixed-23.1 !local-mixed-23.2
+
+# Regression test for #120916 - the nested routine is not in tail-call position,
+# and so cannot be a target for TCO.
+statement ok
+CREATE FUNCTION f_nested(x INT) RETURNS INT AS $$
+  BEGIN
+    x := x * 2;
+    RETURN x;
+  END
+$$ LANGUAGE PLpgSQL;
+
+statement ok
+CREATE FUNCTION f() RETURNS RECORD AS $$
+  DECLARE
+    a INT := -2;
+  BEGIN
+    a := f_nested(a);
+    RAISE NOTICE 'here';
+    RETURN (a, -a);
+  END
+$$ LANGUAGE PLpgSQL;
+
+query II
+SELECT * FROM f() AS g(x INT, y INT);
+----
+-4  4
+
+# Case with an exception handler on the parent routine. This prevents TCO,
+# since executing the child routine in the parent's context would lose track
+# of the exception handler.
+statement ok
+DROP FUNCTION f;
+DROP FUNCTION f_nested;
+
+statement ok
+CREATE FUNCTION f_nested() RETURNS INT AS $$
+  BEGIN
+    RETURN 1//0;
+  END
+$$ LANGUAGE PLpgSQL;
+
+statement ok
+CREATE FUNCTION f() RETURNS INT AS $$
+  BEGIN
+    RETURN f_nested();
+  EXCEPTION WHEN division_by_zero THEN
+    RETURN -1;
+  END
+$$ LANGUAGE PLpgSQL;
+
+query I
+SELECT f();
+----
+-1
diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go
diff --git a/pkg/ccl/logictestccl/tests/fakedist-disk/BUILD.bazel b/pkg/ccl/logictestccl/tests/fakedist-disk/BUILD.bazel
@@ -12,7 +12,7 @@ go_test(
         "//build/toolchains:is_heavy": {"test.Pool": "heavy"},
         "//conditions:default": {"test.Pool": "large"},
     }),
-    shard_count = 26,
+    shard_count = 27,
     tags = [
         "ccl_test",
         "cpu:2",

diff --git a/pkg/ccl/logictestccl/tests/fakedist-disk/generated_test.go b/pkg/ccl/logictestccl/tests/fakedist-disk/generated_test.go
diff --git a/pkg/ccl/logictestccl/tests/fakedist-vec-off/BUILD.bazel b/pkg/ccl/logictestccl/tests/fakedist-vec-off/BUILD.bazel
@@ -12,7 +12,7 @@ go_test(
         "//build/toolchains:is_heavy": {"test.Pool": "heavy"},
         "//conditions:default": {"test.Pool": "large"},
     }),
-    shard_count = 26,
+    shard_count = 27,
     tags = [
         "ccl_test",
         "cpu:2",

diff --git a/pkg/ccl/logictestccl/tests/fakedist-vec-off/generated_test.go b/pkg/ccl/logictestccl/tests/fakedist-vec-off/generated_test.go
diff --git a/pkg/ccl/logictestccl/tests/fakedist/BUILD.bazel b/pkg/ccl/logictestccl/tests/fakedist/BUILD.bazel
@@ -12,7 +12,7 @@ go_test(
         "//build/toolchains:is_heavy": {"test.Pool": "heavy"},
         "//conditions:default": {"test.Pool": "large"},
     }),
-    shard_count = 27,
+    shard_count = 28,
     tags = [
         "ccl_test",
         "cpu:2",

diff --git a/pkg/ccl/logictestccl/tests/fakedist/generated_test.go b/pkg/ccl/logictestccl/tests/fakedist/generated_test.go
diff --git a/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/BUILD.bazel b/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/BUILD.bazel
@@ -9,7 +9,7 @@ go_test(
         "//pkg/ccl/logictestccl:testdata",  # keep
     ],
     exec_properties = {"test.Pool": "large"},
-    shard_count = 26,
+    shard_count = 27,
     tags = [
         "ccl_test",
         "cpu:1",

diff --git a/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/generated_test.go b/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/generated_test.go
diff --git a/pkg/ccl/logictestccl/tests/local-read-committed/BUILD.bazel b/pkg/ccl/logictestccl/tests/local-read-committed/BUILD.bazel
@@ -10,7 +10,7 @@ go_test(
         "//pkg/sql/opt/exec/execbuilder:testdata",  # keep
     ],
     exec_properties = {"test.Pool": "large"},
-    shard_count = 33,
+    shard_count = 34,
     tags = [
         "ccl_test",
         "cpu:1",

diff --git a/pkg/ccl/logictestccl/tests/local-read-committed/generated_test.go b/pkg/ccl/logictestccl/tests/local-read-committed/generated_test.go
diff --git a/pkg/ccl/logictestccl/tests/local-vec-off/BUILD.bazel b/pkg/ccl/logictestccl/tests/local-vec-off/BUILD.bazel
@@ -9,7 +9,7 @@ go_test(
         "//pkg/ccl/logictestccl:testdata",  # keep
     ],
     exec_properties = {"test.Pool": "large"},
-    shard_count = 26,
+    shard_count = 27,
     tags = [
         "ccl_test",
         "cpu:1",

diff --git a/pkg/ccl/logictestccl/tests/local-vec-off/generated_test.go b/pkg/ccl/logictestccl/tests/local-vec-off/generated_test.go
diff --git a/pkg/ccl/logictestccl/tests/local/BUILD.bazel b/pkg/ccl/logictestccl/tests/local/BUILD.bazel
@@ -9,7 +9,7 @@ go_test(
         "//pkg/ccl/logictestccl:testdata",  # keep
     ],
     exec_properties = {"test.Pool": "large"},
-    shard_count = 42,
+    shard_count = 43,
     tags = [
         "ccl_test",
         "cpu:1",

diff --git a/pkg/ccl/logictestccl/tests/local/generated_test.go b/pkg/ccl/logictestccl/tests/local/generated_test.go
diff --git a/pkg/sql/opt/exec/execbuilder/builder.go b/pkg/sql/opt/exec/execbuilder/builder.go
@@ -111,6 +111,11 @@ type Builder struct {
 	// subqueries for statements inside a UDF.
 	planLazySubqueries bool
 
+	// tailCalls is used when building the last body statement of a routine. It
+	// identifies nested routines that are in tail-call position. This information
+	// is used to determine whether tail-call optimization is applicable.
+	tailCalls map[*memo.UDFCallExpr]struct{}
+
 	// -- output --
 
 	// flags tracks various properties of the plan accumulated while building.

diff --git a/pkg/sql/opt/exec/execbuilder/relational.go b/pkg/sql/opt/exec/execbuilder/relational.go
@@ -3390,10 +3390,10 @@ func (b *Builder) buildCall(c *memo.CallExpr) (_ execPlan, outputCols colOrdMap,
 		udf.Def.CalledOnNullInput,
 		udf.Def.MultiColDataSource,
 		udf.Def.SetReturning,
-		udf.TailCall,
-		true, /* procedure */
-		nil,  /* blockState */
-		nil,  /* cursorDeclaration */
+		false, /* tailCall */
+		true,  /* procedure */
+		nil,   /* blockState */
+		nil,   /* cursorDeclaration */
 	)
 
 	var ep execPlan

diff --git a/pkg/sql/opt/exec/execbuilder/scalar.go b/pkg/sql/opt/exec/execbuilder/scalar.go
@@ -953,6 +953,14 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
 		b.initRoutineExceptionHandler(blockState, udf.Def.ExceptionBlock)
 	}
 
+	// Execution expects there to be more than one body statement if a cursor is
+	// opened.
+	if udf.Def.CursorDeclaration != nil && len(udf.Def.Body) <= 1 {
+		panic(errors.AssertionFailedf(
+			"expected more than one body statement for a routine that opens a cursor",
+		))
+	}
+
 	// Create a tree.RoutinePlanFn that can plan the statements in the UDF body.
 	// TODO(mgartner): Add support for WITH expressions inside UDF bodies.
 	planGen := b.buildRoutinePlanGenerator(
@@ -969,6 +977,10 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
 	// statements.
 	enableStepping := udf.Def.Volatility == volatility.Volatile
 
+	// The calling routine, if any, will have already determined whether this
+	// routine is in tail-call position.
+	_, tailCall := b.tailCalls[udf]
+
 	return tree.NewTypedRoutineExpr(
 		udf.Def.Name,
 		args,
@@ -978,7 +990,7 @@ func (b *Builder) buildUDF(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree.Typ
 		udf.Def.CalledOnNullInput,
 		udf.Def.MultiColDataSource,
 		udf.Def.SetReturning,
-		udf.TailCall,
+		tailCall,
 		false, /* procedure */
 		blockState,
 		udf.Def.CursorDeclaration,
@@ -1173,12 +1185,23 @@ func (b *Builder) buildRoutinePlanGenerator(
 				return err
 			}
 
+			// Identify nested routines that are in tail-call position, and cache them
+			// in the Builder. When a nested routine is evaluated, this information
+			// may be used to enable tail-call optimization.
+			isFinalPlan := i == len(stmts)-1
+			var tailCalls map[*memo.UDFCallExpr]struct{}
+			if isFinalPlan {
+				tailCalls = make(map[*memo.UDFCallExpr]struct{})
+				memo.ExtractTailCalls(optimizedExpr, tailCalls)
+			}
+
 			// Build the memo into a plan.
 			ef := ref.(exec.Factory)
 			eb := New(ctx, ef, &o, f.Memo(), b.catalog, optimizedExpr, b.semaCtx, b.evalCtx, false /* allowAutoCommit */, b.IsANSIDML)
 			eb.withExprs = withExprs
 			eb.disableTelemetry = true
 			eb.planLazySubqueries = true
+			eb.tailCalls = tailCalls
 			plan, err := eb.Build()
 			if err != nil {
 				if errors.IsAssertionFailure(err) {
@@ -1194,7 +1217,6 @@ func (b *Builder) buildRoutinePlanGenerator(
 			if len(eb.subqueries) > 0 {
 				return expectedLazyRoutineError("subquery")
 			}
-			isFinalPlan := i == len(stmts)-1
 			var stmtForDistSQLDiagram string
 			if i < len(stmtStr) {
 				stmtForDistSQLDiagram = stmtStr[i]

diff --git a/pkg/sql/opt/memo/expr.go b/pkg/sql/opt/memo/expr.go
@@ -717,6 +717,9 @@ type UDFDefinition struct {
 	// builtin function.
 	RoutineType tree.RoutineType
 
+	// RoutineLang indicates the language of the routine (SQL or PL/pgSQL).
+	RoutineLang tree.RoutineLanguage
+
 	// Params is the list of columns representing parameters of the function. The
 	// i-th column in the list corresponds to the i-th parameter of the function.
 	// During execution of the UDF, these columns are replaced with the arguments

diff --git a/pkg/sql/opt/memo/expr_format.go b/pkg/sql/opt/memo/expr_format.go
@@ -171,6 +171,10 @@ type ExprFmtCtx struct {
 	// seenUDFs is used to ensure that formatting of recursive UDFs does not
 	// infinitely recurse.
 	seenUDFs map[*UDFDefinition]struct{}
+
+	// tailCalls allows for quick lookup of all the routines in tail-call position
+	// when the last body statement of a routine is formatted.
+	tailCalls map[*UDFCallExpr]struct{}
 }
 
 // makeExprFmtCtxForString creates an expression formatting context from a new
@@ -957,13 +961,18 @@ func (f *ExprFmtCtx) formatScalarWithLabel(
 			}
 			n := tp.Child("body")
 			for i := range def.Body {
+				stmtNode := n
 				if i == 0 && def.CursorDeclaration != nil {
 					// The first statement is opening a cursor.
-					cur := n.Child("open-cursor")
-					f.formatExpr(def.Body[i], cur)
-					continue
+					stmtNode = n.Child("open-cursor")
+				}
+				prevTailCalls := f.tailCalls
+				if i == len(def.Body)-1 {
+					f.tailCalls = make(map[*UDFCallExpr]struct{})
+					ExtractTailCalls(def.Body[i], f.tailCalls)
 				}
-				f.formatExpr(def.Body[i], n)
+				f.formatExpr(def.Body[i], stmtNode)
+				f.tailCalls = prevTailCalls
 			}
 			delete(f.seenUDFs, def)
 		} else {
@@ -998,6 +1007,10 @@ func (f *ExprFmtCtx) formatScalarWithLabel(
 		if !udf.Def.CalledOnNullInput {
 			tp.Child("strict")
 		}
+		if _, tailCall := f.tailCalls[udf]; tailCall {
+			// This routine is in tail-call position in the parent routine.
+			tp.Child("tail-call")
+		}
 		formatRoutineArgs(udf.Args, tp)
 		formatUDFDefinition(udf.Def, tp)
 	}