From 09011ffe982e75b54461a7d8d37d001d5a11b2ea Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Fri, 13 Mar 2020 18:27:22 -0700 Subject: [PATCH] sql: rename vectorize `experimental_on` to `on` Release justification: bug fixes and low-risk updates to new functionality. This commit renames `experimental_on` option of `vectorize` variable to `on` since we're now confident in the correctness. This commit also changes the behavior of `EXPLAIN (VEC)` slightly - previously, we were setting `vectorize` to (what was) `experimental_on` and then running `SupportsVectorized` check. Now we will return an error if `vectorize` is set to `off` and in other cases we will run the check with the current `vectorize` mode. This is done so that `EXPLAIN (VEC)` better reflects reality. Release note (sql change): `experimental_on` option for `vectorize` session variable has been renamed to `on`. The only things that will not run with `auto` but will run with `on` are unordered distinct and two window functions (`percent_rank` and `cume_dist`), otherwise, the two options are identical. --- pkg/cmd/roachtest/tpchvec.go | 15 +++++++++- pkg/sql/colexec/execplan.go | 6 ++-- pkg/sql/colexec/window_functions_test.go | 2 +- pkg/sql/exec_util.go | 8 +++--- pkg/sql/explain_vec.go | 14 ++++++---- pkg/sql/logictest/logic.go | 8 +++--- pkg/sql/logictest/testdata/logic_test/set | 2 +- .../testdata/logic_test/vectorize_local | 2 +- pkg/sql/rowexec/processors_test.go | 2 +- pkg/sql/sessiondata/session_data.go | 22 +++++++-------- pkg/sql/trace_test.go | 2 +- pkg/sql/vars.go | 2 +- pkg/workload/querybench/query_bench.go | 28 +++++++++++++------ 13 files changed, 70 insertions(+), 43 deletions(-) diff --git a/pkg/cmd/roachtest/tpchvec.go b/pkg/cmd/roachtest/tpchvec.go index f9c169c01189..92171e1bae2e 100644 --- a/pkg/cmd/roachtest/tpchvec.go +++ b/pkg/cmd/roachtest/tpchvec.go @@ -49,6 +49,11 @@ func registerTPCHVec(r *testRegistry) { 19: "can cause OOM", } + vectorizeOptionByVersionPrefix := map[string]string{ + "v19.2": "experimental_on", + "v20.1": "on", + } + runTPCHVec := func(ctx context.Context, t *test, c *cluster) { TPCHTables := []string{ "nation", "region", "part", "supplier", @@ -507,7 +512,15 @@ RESTORE tpch.* FROM 'gs://cockroach-fixtures/workload/tpch/scalefactor=1/backup' } vectorizeSetting := "off" if vectorize { - vectorizeSetting = "experimental_on" + for versionPrefix, vectorizeOption := range vectorizeOptionByVersionPrefix { + if strings.HasPrefix(version, versionPrefix) { + vectorizeSetting = vectorizeOption + break + } + } + if vectorizeSetting == "off" { + t.Fatal("unexpectedly didn't find the corresponding vectorize option for ON case") + } } cmd := fmt.Sprintf("./workload run tpch --concurrency=1 --db=tpch "+ "--max-ops=%d --queries=%d --vectorize=%s {pgurl:1-%d}", diff --git a/pkg/sql/colexec/execplan.go b/pkg/sql/colexec/execplan.go index e22d9640c8e5..1fa87b49f74a 100644 --- a/pkg/sql/colexec/execplan.go +++ b/pkg/sql/colexec/execplan.go @@ -194,7 +194,7 @@ func isSupported( mode sessiondata.VectorizeExecMode, spec *execinfrapb.ProcessorSpec, ) (bool, error) { core := spec.Core - isFullVectorization := mode == sessiondata.VectorizeExperimentalOn || + isFullVectorization := mode == sessiondata.VectorizeOn || mode == sessiondata.VectorizeExperimentalAlways switch { @@ -238,7 +238,7 @@ func isSupported( } if !isFullVectorization { if len(core.Distinct.OrderedColumns) < len(core.Distinct.DistinctColumns) { - return false, errors.Newf("unordered distinct can only run in 'experimental_on' vectorize mode") + return false, errors.Newf("unordered distinct can only run in vectorize 'on' mode") } } return true, nil @@ -286,7 +286,7 @@ func isSupported( if !isFullVectorization { switch *wf.Func.WindowFunc { case execinfrapb.WindowerSpec_PERCENT_RANK, execinfrapb.WindowerSpec_CUME_DIST: - return false, errors.Newf("window function %s can only run in 'experimental_on' vectorize mode", wf.String()) + return false, errors.Newf("window function %s can only run in vectorize 'on' mode", wf.String()) } } } diff --git a/pkg/sql/colexec/window_functions_test.go b/pkg/sql/colexec/window_functions_test.go index a1217604da41..abe605508848 100644 --- a/pkg/sql/colexec/window_functions_test.go +++ b/pkg/sql/colexec/window_functions_test.go @@ -41,7 +41,7 @@ func TestWindowFunctions(t *testing.T) { st := cluster.MakeTestingClusterSettings() evalCtx := tree.MakeTestingEvalContext(st) defer evalCtx.Stop(ctx) - evalCtx.SessionData.VectorizeMode = sessiondata.VectorizeExperimentalOn + evalCtx.SessionData.VectorizeMode = sessiondata.VectorizeOn flowCtx := &execinfra.FlowCtx{ EvalCtx: &evalCtx, Cfg: &execinfra.ServerConfig{ diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 111815cfa577..2584ef0ed8e4 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -193,10 +193,10 @@ var VectorizeClusterMode = settings.RegisterEnumSetting( "default vectorize mode", "auto", map[int64]string{ - int64(sessiondata.VectorizeOff): "off", - int64(sessiondata.Vectorize192Auto): "192auto", - int64(sessiondata.VectorizeAuto): "auto", - int64(sessiondata.VectorizeExperimentalOn): "experimental_on", + int64(sessiondata.VectorizeOff): "off", + int64(sessiondata.Vectorize192Auto): "192auto", + int64(sessiondata.VectorizeAuto): "auto", + int64(sessiondata.VectorizeOn): "on", }, ) diff --git a/pkg/sql/explain_vec.go b/pkg/sql/explain_vec.go index 6a9614fcd464..c9c6420ccfef 100644 --- a/pkg/sql/explain_vec.go +++ b/pkg/sql/explain_vec.go @@ -75,11 +75,15 @@ func (n *explainVecNode) startExec(params runParams) error { flowCtx := makeFlowCtx(planCtx, plan, params) flowCtx.Cfg.ClusterID = &distSQLPlanner.rpcCtx.ClusterID - // Temporarily set vectorize to on so that we can get the whole plan back even - // if we wouldn't support it due to lack of streaming. - origMode := flowCtx.EvalCtx.SessionData.VectorizeMode - flowCtx.EvalCtx.SessionData.VectorizeMode = sessiondata.VectorizeExperimentalOn - defer func() { flowCtx.EvalCtx.SessionData.VectorizeMode = origMode }() + // We want to get the vectorized plan which would be executed with the + // current 'vectorize' option. If 'vectorize' is set to 'off', then the + // vectorized engine is disabled, and we will return an error in such case. + // With all other options, we don't change the setting to the + // most-inclusive option as we used to because the plan can be different + // based on 'vectorize' setting. + if flowCtx.EvalCtx.SessionData.VectorizeMode == sessiondata.VectorizeOff { + return errors.New("vectorize is set to 'off'") + } sortedFlows := make([]flowWithNode, 0, len(flows)) for nodeID, flow := range flows { diff --git a/pkg/sql/logictest/logic.go b/pkg/sql/logictest/logic.go index 806ae730af58..fa1e268855ac 100644 --- a/pkg/sql/logictest/logic.go +++ b/pkg/sql/logictest/logic.go @@ -438,7 +438,7 @@ var logicTestConfigs = []testClusterConfig{ name: "local-vec", numNodes: 1, overrideAutoStats: "false", - overrideVectorize: "experimental_on", + overrideVectorize: "on", }, { name: "fakedist", @@ -470,7 +470,7 @@ var logicTestConfigs = []testClusterConfig{ useFakeSpanResolver: true, overrideDistSQLMode: "on", overrideAutoStats: "false", - overrideVectorize: "experimental_on", + overrideVectorize: "on", }, { name: "fakedist-vec-disk", @@ -478,7 +478,7 @@ var logicTestConfigs = []testClusterConfig{ useFakeSpanResolver: true, overrideDistSQLMode: "on", overrideAutoStats: "false", - overrideVectorize: "experimental_on", + overrideVectorize: "on", sqlExecUseDisk: true, skipShort: true, }, @@ -516,7 +516,7 @@ var logicTestConfigs = []testClusterConfig{ name: "5node-dist-vec", numNodes: 5, overrideDistSQLMode: "on", - overrideVectorize: "experimental_on", + overrideVectorize: "on", overrideAutoStats: "false", }, { diff --git a/pkg/sql/logictest/testdata/logic_test/set b/pkg/sql/logictest/testdata/logic_test/set index 6011863ad875..db9d38e5ae36 100644 --- a/pkg/sql/logictest/testdata/logic_test/set +++ b/pkg/sql/logictest/testdata/logic_test/set @@ -172,7 +172,7 @@ statement ok SET vectorize = auto statement ok -SET vectorize = experimental_on +SET vectorize = on statement ok SET vectorize = experimental_always diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize_local b/pkg/sql/logictest/testdata/logic_test/vectorize_local index c678862017aa..0c1eb6821da7 100644 --- a/pkg/sql/logictest/testdata/logic_test/vectorize_local +++ b/pkg/sql/logictest/testdata/logic_test/vectorize_local @@ -30,7 +30,7 @@ INSERT INTO d VALUES (1, 1), (1, 2) # Test that vectorized stats are collected correctly. statement ok -SET vectorize = experimental_on +SET vectorize = on statement ok SET distsql = on diff --git a/pkg/sql/rowexec/processors_test.go b/pkg/sql/rowexec/processors_test.go index c56af9cea858..65ae36b4458d 100644 --- a/pkg/sql/rowexec/processors_test.go +++ b/pkg/sql/rowexec/processors_test.go @@ -619,7 +619,7 @@ func TestDrainingProcessorSwallowsUncertaintyError(t *testing.T) { t.Fatal(err) } if vectorize { - if _, err := conn.Exec("set vectorize='experimental_on'"); err != nil { + if _, err := conn.Exec("set vectorize='on'"); err != nil { t.Fatal(err) } } diff --git a/pkg/sql/sessiondata/session_data.go b/pkg/sql/sessiondata/session_data.go index d8f41e058ffa..9e7ac8ea78d9 100644 --- a/pkg/sql/sessiondata/session_data.go +++ b/pkg/sql/sessiondata/session_data.go @@ -265,14 +265,14 @@ const ( // VectorizeOff means that columnar execution is disabled. VectorizeOff VectorizeExecMode = iota // Vectorize192Auto means that that any supported queries that use only - // streaming operators (i.e. those that do not require any buffering) will be - // run using the columnar execution. - // TODO(asubiotto): This was the auto setting for 19.2 and is kept around as - // an escape hatch. Remove in 20.2. + // streaming operators (i.e. those that do not require any buffering) will + // be run using the columnar execution. + // TODO(asubiotto): This was the auto setting for 19.2 and is kept around + // as an escape hatch. Remove in 20.2. Vectorize192Auto - // VectorizeExperimentalOn means that any supported queries will be run using - // the columnar execution on. - VectorizeExperimentalOn + // VectorizeOn means that any supported queries will be run using the + // columnar execution. + VectorizeOn // VectorizeExperimentalAlways means that we attempt to vectorize all // queries; unsupported queries will fail. Mostly used for testing. VectorizeExperimentalAlways @@ -291,8 +291,8 @@ func (m VectorizeExecMode) String() string { return "192auto" case VectorizeAuto: return "auto" - case VectorizeExperimentalOn: - return "experimental_on" + case VectorizeOn: + return "on" case VectorizeExperimentalAlways: return "experimental_always" default: @@ -311,8 +311,8 @@ func VectorizeExecModeFromString(val string) (VectorizeExecMode, bool) { m = Vectorize192Auto case "AUTO": m = VectorizeAuto - case "EXPERIMENTAL_ON": - m = VectorizeExperimentalOn + case "ON": + m = VectorizeOn case "EXPERIMENTAL_ALWAYS": m = VectorizeExperimentalAlways default: diff --git a/pkg/sql/trace_test.go b/pkg/sql/trace_test.go index 8623b236f966..297f05740beb 100644 --- a/pkg/sql/trace_test.go +++ b/pkg/sql/trace_test.go @@ -222,7 +222,7 @@ func TestTrace(t *testing.T) { if _, err := sqlDB.Exec("SET distsql = off"); err != nil { t.Fatal(err) } - if _, err := sqlDB.Exec("SET vectorize = experimental_on"); err != nil { + if _, err := sqlDB.Exec("SET vectorize = on"); err != nil { t.Fatal(err) } if _, err := sqlDB.Exec("SET tracing = on; SELECT * FROM test.foo; SET tracing = off"); err != nil { diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index a1dd7f0b8271..37cba16cac09 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -392,7 +392,7 @@ var varGen = map[string]sessionVar{ mode, ok := sessiondata.VectorizeExecModeFromString(s) if !ok { return newVarValueError(`vectorize`, s, - "off", "auto", "experimental_on", "experimental_always") + "off", "auto", "on", "experimental_always") } m.SetVectorize(mode) return nil diff --git a/pkg/workload/querybench/query_bench.go b/pkg/workload/querybench/query_bench.go index ea28745993ef..e4b5d3ac5266 100644 --- a/pkg/workload/querybench/query_bench.go +++ b/pkg/workload/querybench/query_bench.go @@ -64,15 +64,20 @@ var queryBenchMeta = workload.Meta{ }, } -// vectorizeSetting19_1Translation is a mapping from the 19.2+ vectorize session +// vectorizeSetting19_1Translation is a mapping from the 20.1+ vectorize session // variable value to the 19.1 syntax. var vectorizeSetting19_1Translation = map[string]string{ - "experimental_on": "on", "experimental_always": "always", // Translate auto as on, this was not an option in 19.1. "auto": "on", } +// vectorizeSetting19_2Translation is a mapping from the 20.1+ vectorize session +// variable value to the 19.2 syntax. +var vectorizeSetting19_2Translation = map[string]string{ + "on": "experimental_on", +} + // Meta implements the Generator interface. func (*queryBench) Meta() workload.Meta { return queryBenchMeta } @@ -125,14 +130,19 @@ func (g *queryBench) Ops(urls []string, reg *histogram.Registry) (workload.Query if g.vectorize != "" { _, err := db.Exec("SET vectorize=" + g.vectorize) if err != nil && strings.Contains(err.Error(), "unrecognized configuration") { - if _, ok := vectorizeSetting19_1Translation[g.vectorize]; !ok { - // Unrecognized setting value. - return workload.QueryLoad{}, err + if _, ok := vectorizeSetting19_2Translation[g.vectorize]; ok { + // Fall back to using the pre-20.1 vectorize options. + _, err = db.Exec("SET vectorize=" + vectorizeSetting19_2Translation[g.vectorize]) + } else { + if _, ok := vectorizeSetting19_1Translation[g.vectorize]; !ok { + // Unrecognized setting value. + return workload.QueryLoad{}, err + } + // Fall back to using the pre-19.2 syntax. + // TODO(asubiotto): Remove this once we stop running this test + // against 19.1. + _, err = db.Exec("SET experimental_vectorize=" + vectorizeSetting19_1Translation[g.vectorize]) } - // Fall back to using the pre-19.2 syntax. - // TODO(asubiotto): Remove this once we stop running this test against - // 19.1. - _, err = db.Exec("SET experimental_vectorize=" + vectorizeSetting19_1Translation[g.vectorize]) } if err != nil { return workload.QueryLoad{}, err