From c004da8f499bb32d6e7fa1671c8dfc2b1cac0985 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Thu, 6 Oct 2022 22:15:31 +0000 Subject: [PATCH] opt: do not plan insert fast path if expression contains UDF In general, it is not safe to perform the INSERT fast path if the VALUES clause invokes a UDF. The INSERT fast path always performs the FK checks before the INSERT. A UDF might mutate other tables (although we don't currently support this), so the FK checks must be performed after the INSERT is complete. Release note: None --- pkg/sql/opt/exec/execbuilder/mutation.go | 6 ++- pkg/sql/opt/exec/execbuilder/testdata/insert | 51 ++++++++++++++++++++ pkg/sql/opt/memo/expr_format.go | 3 ++ pkg/sql/opt/memo/logical_props_builder.go | 1 + pkg/sql/opt/memo/testdata/logprops/udf | 8 +-- pkg/sql/opt/props/logical.go | 6 ++- 6 files changed, 67 insertions(+), 8 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/mutation.go b/pkg/sql/opt/exec/execbuilder/mutation.go index 22bf209fdcfc..430a12477e47 100644 --- a/pkg/sql/opt/exec/execbuilder/mutation.go +++ b/pkg/sql/opt/exec/execbuilder/mutation.go @@ -146,8 +146,10 @@ func (b *Builder) tryBuildFastPathInsert(ins *memo.InsertExpr) (_ execPlan, ok b // that we send, not a number of rows. We use this as a guideline only, // and there is no guarantee that we won't produce a bigger batch.) values, ok := ins.Input.(*memo.ValuesExpr) - // TODO(mgartner): Prevent fast path if there is a UDF invocation. - if !ok || values.ChildCount() > mutations.MaxBatchSize(false /* forceProductionMaxBatchSize */) || values.Relational().HasSubquery { + if !ok || + values.ChildCount() > mutations.MaxBatchSize(false /* forceProductionMaxBatchSize */) || + values.Relational().HasSubquery || + values.Relational().HasUDF { return execPlan{}, false, nil } diff --git a/pkg/sql/opt/exec/execbuilder/testdata/insert b/pkg/sql/opt/exec/execbuilder/testdata/insert index 84479081b9e0..eeaf2df4ed50 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/insert +++ b/pkg/sql/opt/exec/execbuilder/testdata/insert @@ -534,6 +534,57 @@ vectorized: true statement ok ROLLBACK +# ------------------------------------------------------------------------------ +# Insert fast path. +# ------------------------------------------------------------------------------ + +# Do not plan insert fast path when VALUES clause has a subquery. +# statement ok +query T +EXPLAIN INSERT INTO kv VALUES (1, (SELECT v FROM kv)) +---- +distribution: local +vectorized: true +· +• root +│ +├── • insert +│ │ into: kv(k, v) +│ │ auto commit +│ │ +│ └── • values +│ size: 2 columns, 1 row +│ +└── • subquery + │ id: @S1 + │ original sql: (SELECT v FROM kv) + │ exec mode: one row + │ + └── • max1row + │ estimated row count: 1 + │ + └── • scan + missing stats + table: kv@kv_pkey + spans: FULL SCAN + +statement ok +CREATE FUNCTION foo() RETURNS VARCHAR LANGUAGE SQL AS 'SELECT v FROM kv' + +# Do not plan insert fast path when VALUES clause invokes a UDF. +query T +EXPLAIN INSERT INTO kv VALUES (1, foo()) +---- +distribution: local +vectorized: true +· +• insert +│ into: kv(k, v) +│ auto commit +│ +└── • values + size: 2 columns, 1 row + # Regression test for #35564: make sure we use the Insert's input required # ordering for the internal projection. diff --git a/pkg/sql/opt/memo/expr_format.go b/pkg/sql/opt/memo/expr_format.go index d3378093c892..8680cdc1998b 100644 --- a/pkg/sql/opt/memo/expr_format.go +++ b/pkg/sql/opt/memo/expr_format.go @@ -1110,6 +1110,9 @@ func (f *ExprFmtCtx) scalarPropsStrings(scalar opt.ScalarExpr) []string { } else if scalarProps.HasSubquery { emitProp("subquery") } + if scalarProps.HasUDF { + emitProp("udf") + } } if !f.HasFlags(ExprFmtHideConstraints) { diff --git a/pkg/sql/opt/memo/logical_props_builder.go b/pkg/sql/opt/memo/logical_props_builder.go index 5dc07d24b5ab..2130f862f084 100644 --- a/pkg/sql/opt/memo/logical_props_builder.go +++ b/pkg/sql/opt/memo/logical_props_builder.go @@ -1656,6 +1656,7 @@ func BuildSharedProps(e opt.Expr, shared *props.Shared, evalCtx *eval.Context) { shared.VolatilitySet.Add(volatility) case *UDFExpr: + shared.HasUDF = true shared.VolatilitySet.Add(t.Volatility) default: diff --git a/pkg/sql/opt/memo/testdata/logprops/udf b/pkg/sql/opt/memo/testdata/logprops/udf index 8651b2dd3912..bf99cb49d924 100644 --- a/pkg/sql/opt/memo/testdata/logprops/udf +++ b/pkg/sql/opt/memo/testdata/logprops/udf @@ -35,7 +35,7 @@ project │ ├── prune: (1-4) │ └── interesting orderings: (+1) └── projections - └── plus [as="?column?":6, type=int, outer=(1), volatile] + └── plus [as="?column?":6, type=int, outer=(1), volatile, udf] ├── variable: a:1 [type=int] └── udf: fn_volatile [type=int] └── body @@ -80,7 +80,7 @@ project │ ├── prune: (1-4) │ └── interesting orderings: (+1) └── filters - └── eq [type=bool, outer=(2), immutable, constraints=(/2: (/NULL - ]), fd=()-->(2)] + └── eq [type=bool, outer=(2), immutable, udf, constraints=(/2: (/NULL - ]), fd=()-->(2)] ├── variable: b:2 [type=int] └── udf: fn_immutable [type=int] └── body @@ -125,7 +125,7 @@ project │ ├── prune: (1-4) │ └── interesting orderings: (+1) └── filters - └── eq [type=bool, outer=(2), stable, constraints=(/2: (/NULL - ]), fd=()-->(2)] + └── eq [type=bool, outer=(2), stable, udf, constraints=(/2: (/NULL - ]), fd=()-->(2)] ├── variable: b:2 [type=int] └── plus [type=int] ├── udf: fn_immutable [type=int] @@ -188,7 +188,7 @@ project │ ├── prune: (1-4) │ └── interesting orderings: (+1) └── filters - └── eq [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)] + └── eq [type=bool, outer=(2), udf, constraints=(/2: (/NULL - ]), fd=()-->(2)] ├── variable: b:2 [type=int] └── udf: fn_leakproof [type=int] └── body diff --git a/pkg/sql/opt/props/logical.go b/pkg/sql/opt/props/logical.go index 1469cfbcaf41..9fcc6906c4c9 100644 --- a/pkg/sql/opt/props/logical.go +++ b/pkg/sql/opt/props/logical.go @@ -52,10 +52,12 @@ type Shared struct { // HasSubquery is true if the subtree rooted at this node contains a subquery. // The subquery can be a Subquery, Exists, Any, or ArrayFlatten expression. - // Subqueries are the only place where a relational node can be nested within a - // scalar expression. HasSubquery bool + // HasUDF is true if the subtree rooted at this node contains a UDF + // invocation. + HasUDF bool + // HasCorrelatedSubquery is true if the scalar expression tree contains a // subquery having one or more outer columns. The subquery can be a Subquery, // Exists, or Any operator. These operators usually need to be hoisted out of