Skip to content

Commit

Permalink
Merge #46080
Browse files Browse the repository at this point in the history
46080: sql: make some tweaks to using the vectorized engine r=yuzefovich a=yuzefovich

**colexec: enable wrapping of unordered distinct in vectorized flow**

Release justification: bug fixes and low-risk updates to new
functionality.

This commit enhances `isSupported` check during the vectorized execution
planning so that operators that cannot run in `auto` mode (unordered
distinct, percent_rank, and cume_dist) would be reported as
"unsupported" which will enable wrapping the processor cores into the
vectorized flow. Previously, we would refuse to vectorize the whole flow
which is a mistake.

Release note: None

**sql: rename vectorize `experimental_on` to `on`**

Release justification: bug fixes and low-risk updates to new
functionality.

This commit renames `experimental_on` option of `vectorize` variable to
`on` since we're now confident in the correctness. This commit also
changes the behavior of `EXPLAIN (VEC)` slightly - previously, we were
setting `vectorize` to (what was) `experimental_on` and then running
`SupportsVectorized` check. Now we will return an error if `vectorize`
is set to `off` and in other cases we will run the check with the
current `vectorize` mode. This is done so that `EXPLAIN (VEC)` better
reflects reality.

Release note (sql change): `experimental_on` option for `vectorize`
session variable has been renamed to `on`. The only things that will
not run with `auto` but will run with `on` are unordered distinct and
two window functions (`percent_rank` and `cume_dist`), otherwise, the
two options are identical.

Co-authored-by: Yahor Yuzefovich <[email protected]>
  • Loading branch information
craig[bot] and yuzefovich committed Mar 17, 2020
2 parents c4426dd + c8cc60b commit 733563a
Show file tree
Hide file tree
Showing 25 changed files with 124 additions and 72 deletions.
14 changes: 13 additions & 1 deletion pkg/cmd/roachtest/tpchvec.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ func registerTPCHVec(r *testRegistry) {
9: "can cause OOM",
19: "can cause OOM",
}
vectorizeOptionByVersionPrefix := map[string]string{
"v19.2": "experimental_on",
"v20.1": "on",
}

TPCHTables := []string{
"nation", "region", "part", "supplier",
Expand Down Expand Up @@ -554,7 +558,15 @@ RESTORE tpch.* FROM 'gs://cockroach-fixtures/workload/tpch/scalefactor=1/backup'
}
vectorizeSetting := "off"
if vectorize {
vectorizeSetting = "experimental_on"
for versionPrefix, vectorizeOption := range vectorizeOptionByVersionPrefix {
if strings.HasPrefix(version, versionPrefix) {
vectorizeSetting = vectorizeOption
break
}
}
if vectorizeSetting == "off" {
t.Fatal("unexpectedly didn't find the corresponding vectorize option for ON case")
}
}
cmd := fmt.Sprintf("./workload run tpch --concurrency=1 --db=tpch "+
"--max-ops=%d --queries=%d --vectorize=%s {pgurl:1-%d}",
Expand Down
32 changes: 26 additions & 6 deletions pkg/sql/colexec/execplan.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
Expand Down Expand Up @@ -139,10 +140,14 @@ type NewColOperatorResult struct {
MetadataSources []execinfrapb.MetadataSource
IsStreaming bool
// CanRunInAutoMode returns whether the result can be run in auto mode if
// IsStreaming is false. This applies to operators that can spill to disk, but
// also operators such as the hash aggregator that buffer, but not
// proportionally to the input size (in the hash aggregator's case, it is the
// number of distinct groups).
// IsStreaming is false. This applies to operators that can spill to disk,
// but also operators such as the hash aggregator that buffer, but not
// proportionally to the input size (in the hash aggregator's case, it is
// the number of distinct groups).
// NOTE: if you set this value to 'false' for some operator, make sure to
// make the corresponding adjustment to 'isSupported' check so that we can
// plan wrapped processor core in the vectorized flow rather than rejecting
// the vectorization entirely in 'auto' mode.
CanRunInAutoMode bool
BufferingOpMemMonitors []*mon.BytesMonitor
BufferingOpMemAccounts []*mon.BoundAccount
Expand Down Expand Up @@ -185,8 +190,12 @@ const noFilterIdx = -1
// isSupported checks whether we have a columnar operator equivalent to a
// processor described by spec. Note that it doesn't perform any other checks
// (like validity of the number of inputs).
func isSupported(spec *execinfrapb.ProcessorSpec) (bool, error) {
func isSupported(
mode sessiondata.VectorizeExecMode, spec *execinfrapb.ProcessorSpec,
) (bool, error) {
core := spec.Core
isFullVectorization := mode == sessiondata.VectorizeOn ||
mode == sessiondata.VectorizeExperimentalAlways

switch {
case core.Noop != nil:
Expand Down Expand Up @@ -227,6 +236,11 @@ func isSupported(spec *execinfrapb.ProcessorSpec) (bool, error) {
if core.Distinct.ErrorOnDup != "" {
return false, errors.Newf("distinct with error on duplicates not supported")
}
if !isFullVectorization {
if len(core.Distinct.OrderedColumns) < len(core.Distinct.DistinctColumns) {
return false, errors.Newf("unordered distinct can only run in vectorize 'on' mode")
}
}
return true, nil

case core.Ordinality != nil:
Expand Down Expand Up @@ -269,6 +283,12 @@ func isSupported(spec *execinfrapb.ProcessorSpec) (bool, error) {
if _, supported := SupportedWindowFns[*wf.Func.WindowFunc]; !supported {
return false, errors.Newf("window function %s is not supported", wf.String())
}
if !isFullVectorization {
switch *wf.Func.WindowFunc {
case execinfrapb.WindowerSpec_PERCENT_RANK, execinfrapb.WindowerSpec_CUME_DIST:
return false, errors.Newf("window function %s can only run in vectorize 'on' mode", wf.String())
}
}
}
return true, nil

Expand Down Expand Up @@ -519,7 +539,7 @@ func NewColOperator(
// before any specs are planned. Used if there is a need to backtrack.
resultPreSpecPlanningStateShallowCopy := result

supported, err := isSupported(spec)
supported, err := isSupported(flowCtx.EvalCtx.SessionData.VectorizeMode, spec)
if !supported {
// We refuse to wrap LocalPlanNode processor (which is a DistSQL wrapper
// around a planNode) because it creates complications, and a flow with
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/colexec/window_functions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/testutils/colcontainerutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand All @@ -45,6 +46,7 @@ func TestWindowFunctions(t *testing.T) {
st := cluster.MakeTestingClusterSettings()
evalCtx := tree.MakeTestingEvalContext(st)
defer evalCtx.Stop(ctx)
evalCtx.SessionData.VectorizeMode = sessiondata.VectorizeOn
flowCtx := &execinfra.FlowCtx{
EvalCtx: &evalCtx,
Cfg: &execinfra.ServerConfig{
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/distsql/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ func (ds *ServerImpl) setupFlow(
BytesEncodeFormat: be,
ExtraFloatDigits: int(req.EvalContext.ExtraFloatDigits),
},
VectorizeMode: sessiondata.VectorizeExecMode(req.EvalContext.Vectorize),
}
ie := &lazyInternalExecutor{
newInternalExecutor: func() sqlutil.InternalExecutor {
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,10 @@ var VectorizeClusterMode = settings.RegisterEnumSetting(
"default vectorize mode",
"auto",
map[int64]string{
int64(sessiondata.VectorizeOff): "off",
int64(sessiondata.Vectorize192Auto): "192auto",
int64(sessiondata.VectorizeAuto): "auto",
int64(sessiondata.VectorizeExperimentalOn): "experimental_on",
int64(sessiondata.VectorizeOff): "off",
int64(sessiondata.Vectorize192Auto): "192auto",
int64(sessiondata.VectorizeAuto): "auto",
int64(sessiondata.VectorizeOn): "on",
},
)

Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/explain_plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ func populateExplain(
}
_, err := colflow.SupportsVectorized(params.ctx, flowCtx, flow.Processors, fuseOpt, nil /* output */)
isVec = isVec && (err == nil)
if !isVec {
break
}
}
}
}
Expand Down
14 changes: 9 additions & 5 deletions pkg/sql/explain_vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,15 @@ func (n *explainVecNode) startExec(params runParams) error {
flowCtx := makeFlowCtx(planCtx, plan, params)
flowCtx.Cfg.ClusterID = &distSQLPlanner.rpcCtx.ClusterID

// Temporarily set vectorize to on so that we can get the whole plan back even
// if we wouldn't support it due to lack of streaming.
origMode := flowCtx.EvalCtx.SessionData.VectorizeMode
flowCtx.EvalCtx.SessionData.VectorizeMode = sessiondata.VectorizeExperimentalOn
defer func() { flowCtx.EvalCtx.SessionData.VectorizeMode = origMode }()
// We want to get the vectorized plan which would be executed with the
// current 'vectorize' option. If 'vectorize' is set to 'off', then the
// vectorized engine is disabled, and we will return an error in such case.
// With all other options, we don't change the setting to the
// most-inclusive option as we used to because the plan can be different
// based on 'vectorize' setting.
if flowCtx.EvalCtx.SessionData.VectorizeMode == sessiondata.VectorizeOff {
return errors.New("vectorize is set to 'off'")
}

sortedFlows := make([]flowWithNode, 0, len(flows))
for nodeID, flow := range flows {
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/logictest/logic.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ var logicTestConfigs = []testClusterConfig{
name: "local-vec",
numNodes: 1,
overrideAutoStats: "false",
overrideVectorize: "experimental_on",
overrideVectorize: "on",
},
{
name: "fakedist",
Expand Down Expand Up @@ -478,15 +478,15 @@ var logicTestConfigs = []testClusterConfig{
useFakeSpanResolver: true,
overrideDistSQLMode: "on",
overrideAutoStats: "false",
overrideVectorize: "experimental_on",
overrideVectorize: "on",
},
{
name: "fakedist-vec-disk",
numNodes: 3,
useFakeSpanResolver: true,
overrideDistSQLMode: "on",
overrideAutoStats: "false",
overrideVectorize: "experimental_on",
overrideVectorize: "on",
sqlExecUseDisk: true,
skipShort: true,
},
Expand Down Expand Up @@ -524,7 +524,7 @@ var logicTestConfigs = []testClusterConfig{
name: "5node-dist-vec",
numNodes: 5,
overrideDistSQLMode: "on",
overrideVectorize: "experimental_on",
overrideVectorize: "on",
overrideAutoStats: "false",
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ https://cockroachdb.github.io/distsqlplan/decode.html#eJzsmW1P40YQx9_3U6z2Fegc7F
query T
SELECT url FROM [EXPLAIN ANALYZE (DISTSQL) SELECT DISTINCT(kw.w) FROM kv JOIN kw ON kv.k = kw.w ORDER BY kw.w]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzkWV9v4rgXff99Cus-tfqFSZw_FCKN1HbKapntwiz0YWdHPKTEWyIgYW1Tpqr63VcJdGlIaztNJkaat8kfx-den3PPnPII7J8F-DDuXfc-3aA1XaBfRsPf0bfen1-uL_oDdDG4uP76Vw-dXPXHN-M_rk_R7tX0uj_4dINO5psPm9Ptsvk9-jzsD9B8g4YDNL__MEcfUfocDUdXvRG6_JpdTcCAOAnJIFgSBv43wGCADQY4YIALBngwMWBFkylhLKHpK4_Zgn74HXzLgCherXl6e2LANKEE_EfgEV8Q8OEmuF2QEQlCQk0LDAgJD6JFts2KRsuAPpzP78GA8SqImY9aZrrxcM19dJ7CoMmGIUqC0EfpJePBYoF4tCQ-shgYcPvAyfMLdhddwuTJgGTNd4D2OG4f0CxgszyCcwyTp0n22TsCPn4y3ldY-43CNvvCTPyyNLvp0uw3S9t_Zx0nNCSUhLkvTdKVslde6c-vAZt9TqKYUBMfHPyC_M1PzvHpRxrdzbJ_5RqTPkYH3cnuFVqUrT98dXuz8C7jCSUhYlFIfJS9AwYsg-9oSZYJfUBrRtJGW-i36HL3JIzYfHffKvR_31unDG2uIsajeMpN7BUOTM6JAl68xfsWNLfCsYvA25YYvFcHeK9MX8cJ5YSaNj4E9v8KyErRoJ2Di9WnB5aPRdNumc6xDMYSpZ0pDMZcaZoHI254MOJKg9FSH4yW4mBMP_eaIFLiVxiKEsrsh2JbPFcsJfWKcOUnoq1OZVtBpU7LdI9FpSVK6yioNFeaZpXaDavU_klUKqHMXqVnzarUUaeyo6BSt2V6x6LSEqV1FVSaK02zSp2GVepUUqmrrlJXf8iQ0Gav1I5YqW79IcNVZ7SrIFavdSxSLVGYJ5dqy8QoiEOEUcJnhB6LaN2GRev-JNYqIc9esN1mrVWSqkeErZKYkcNzf_XLVnrYJLwjW_KwZE2n5AtNptk228thti4LBCFhfPvU3l704-dHjAfZt3eVJ2tOdrUXS33lVCBjpvr-nbr3f5YoIzH_7zTKIMK1t6Q6pLYaJNwcJNvS0CVcgsr2D6CyeP9O3ftX7giuvSXVIbXVINVKZQlvLA1dsg8hWS8hOTlE1uFiR7jYzSvzcLErXOyJd_aEi-0836wfIMF2s26mcJJiRLX7W8n9tbiZBJION5PwRoebnTXrZpUR1e5vJffX4mYSSDrcTMIbHW7WEdpCV-wp3TJu9qIcue03HH1U_idyfGlIAql-AymNQIdfyLijJf40nH-qQ9ISiZrORKURaIlAR5iBsDgEYUkKwqViUCnjKGQk_cYhhqTFOMSQtCQPGabaraQ0d7QYRyG26zcOMSQtxiGGpOePaRJMtVtJae5oMY5Cms8bx5nEOAoJqjbjKASh9xhHvcYvhqTFOMSQ9BiHBJOicTSJqX4rKSIoZPP3GEe9fBZD0mIcYkh6jEOCSdE4msRUv5UUf3UppPr87xdYbBx2IUS92zgmT__7NwAA__89IuzF
https://cockroachdb.github.io/distsqlplan/decode.html#eJzUmF9vo0YUxd_7KUb3KVFxYAZwHKSVkt24qrepvbXz0O3KD8RMY2QM7swQJ4ry3SvsXXmBeC4EyyJv4c-PmXM5597gZ5D_ReDBpH_T_3RLUhGR38ajP8m3_t9fbq4GQ3I1vLr5-k-fnFwPJreTv25Oyfdbs-PB8NMtOVmsz9anW2zxQD6PBkOyWJPRkCwezhbkA8muk9H4uj8mH79ujqZgQJwEfOgvuQTvG1AwgIEBNhjggAEuTA1YiWTGpUxEdsvzBhgEj-BZBoTxKlXZ6akBs0Rw8J5BhSri4MGtfxfxMfcDLkwLDAi48sNos8xKhEtfPF0uHsCAycqPpUc6ZrbwKFUeucy2ceer2ZxLkqRqlZ3Mzql0FRVOSR7xmQofQvXkEevMylaSyo8iosIl94glYfpiwBbZCvixwbsnMvflPL-1SwrTl-nmGfccPPpivE1xd4_i9U6xSX_WzFqjme3VvHtOGici4IIHuSdNMxK75ZXC_e7L-eckjLkwacEqEf9XnVzS0w8ivJ9v_jpUxfgjn6UqTOJ9VdtVxK7jgutQqjCeKZO6pTIfesMGiGQtieB-8J3L28CApf9IlnyZiCeSSp7dZZE_wo97pToNXr6uGMx6WzHcNxbDPUQx3DrvfZIIxYXJaFHorwdXum-_3dx-afVuRfH-bLKOabe-Q9fQfF6hQ-c0t7VD0yN3aNr6Do24YNehu680pbd21GNJzXdoVt3wrELI7Y7ptD7kNTT3KoQ8p7mtIWdHDjlrfcgRF-xCfv7uQ25XN7xdIeROx3RbH_Iami8qhDynua0ht48ccrv1IUdcsAt5792H3KlueKdCyN1O6yNeQ7GLR7xjUuLHAaEkUXMuWh9258hhd1ofdsQPu7BfvPuwI78ljLlcJbHkRRO8-mQre_M8uOdbJ8kkFTP-RSSzzTLbw9GG23wZBVyq7VW2PRjE20vZBqvDvSYwbbQ07TahmaWnaY2asXpwrwlMGy1dqFlNulCzEs2KtPUzbedgqwjbWtjRvy1HC7v6lV0tzJie7jYJlx5GwqWHsXAhNBIuRDUSrvMm4dLDSLj0MBYuhEbChahGwtXTuvRCb9KLJuGijeYHQiMeR2jM5BiOuBxTjs2QZkOk2RRpOEYazpFmg4TqJwlFRgltNEtoaZjUsruexuyup1G7Izhmd0Q5ZvfSIK1ldz2N2V1Po3ZHcMzuiHLM7qVpmrf7OWL30mCpZffSYKlldz2N2V1Po3ZHcMzuiHLM7qWhWsvuehqzu55G7Y7gmN0R5dhnQmmq5v_hpnq7s9Js0dp9-vLL_wEAAP__Fks7Cw==

# This query verifies stats collection for WITH ORDINALITY and the hashJoiner.
query T
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/set
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ statement ok
SET vectorize = auto

statement ok
SET vectorize = experimental_on
SET vectorize = on

statement ok
SET vectorize = experimental_always
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/vectorize_local
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ INSERT INTO d VALUES (1, 1), (1, 2)

# Test that vectorized stats are collected correctly.
statement ok
SET vectorize = experimental_on
SET vectorize = on

statement ok
SET distsql = on
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ query TTTTT
EXPLAIN (TYPES) SELECT 1 a FROM kv GROUP BY v, w::DECIMAL HAVING w::DECIMAL > 1;
----
· distributed false · ·
· vectorized false · ·
· vectorized true · ·
render · · (a int) ·
│ render 0 (1)[int] · ·
└── distinct · · (column5 decimal, v int) ·
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/dist_union
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ query TTT
EXPLAIN SELECT v FROM uniontest UNION SELECT k FROM uniontest
----
· distributed true
· vectorized false
· vectorized true
union · ·
├── scan · ·
│ table uniontest@primary
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/distinct
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ query TTT
EXPLAIN SELECT DISTINCT y AS w FROM xyz ORDER BY y
----
· distributed false
· vectorized false
· vectorized true
sort · ·
│ order +w
└── distinct · ·
Expand Down
Loading

0 comments on commit 733563a

Please sign in to comment.