Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: make some tweaks to using the vectorized engine #46080

Merged
merged 2 commits into from
Mar 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion pkg/cmd/roachtest/tpchvec.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ func registerTPCHVec(r *testRegistry) {
9: "can cause OOM",
19: "can cause OOM",
}
vectorizeOptionByVersionPrefix := map[string]string{
"v19.2": "experimental_on",
"v20.1": "on",
}

TPCHTables := []string{
"nation", "region", "part", "supplier",
Expand Down Expand Up @@ -554,7 +558,15 @@ RESTORE tpch.* FROM 'gs://cockroach-fixtures/workload/tpch/scalefactor=1/backup'
}
vectorizeSetting := "off"
if vectorize {
vectorizeSetting = "experimental_on"
for versionPrefix, vectorizeOption := range vectorizeOptionByVersionPrefix {
if strings.HasPrefix(version, versionPrefix) {
vectorizeSetting = vectorizeOption
break
}
}
if vectorizeSetting == "off" {
t.Fatal("unexpectedly didn't find the corresponding vectorize option for ON case")
}
}
cmd := fmt.Sprintf("./workload run tpch --concurrency=1 --db=tpch "+
"--max-ops=%d --queries=%d --vectorize=%s {pgurl:1-%d}",
Expand Down
32 changes: 26 additions & 6 deletions pkg/sql/colexec/execplan.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
Expand Down Expand Up @@ -139,10 +140,14 @@ type NewColOperatorResult struct {
MetadataSources []execinfrapb.MetadataSource
IsStreaming bool
// CanRunInAutoMode returns whether the result can be run in auto mode if
// IsStreaming is false. This applies to operators that can spill to disk, but
// also operators such as the hash aggregator that buffer, but not
// proportionally to the input size (in the hash aggregator's case, it is the
// number of distinct groups).
// IsStreaming is false. This applies to operators that can spill to disk,
// but also operators such as the hash aggregator that buffer, but not
// proportionally to the input size (in the hash aggregator's case, it is
// the number of distinct groups).
// NOTE: if you set this value to 'false' for some operator, make sure to
// make the corresponding adjustment to 'isSupported' check so that we can
// plan wrapped processor core in the vectorized flow rather than rejecting
// the vectorization entirely in 'auto' mode.
CanRunInAutoMode bool
BufferingOpMemMonitors []*mon.BytesMonitor
BufferingOpMemAccounts []*mon.BoundAccount
Expand Down Expand Up @@ -185,8 +190,12 @@ const noFilterIdx = -1
// isSupported checks whether we have a columnar operator equivalent to a
// processor described by spec. Note that it doesn't perform any other checks
// (like validity of the number of inputs).
func isSupported(spec *execinfrapb.ProcessorSpec) (bool, error) {
func isSupported(
mode sessiondata.VectorizeExecMode, spec *execinfrapb.ProcessorSpec,
) (bool, error) {
core := spec.Core
isFullVectorization := mode == sessiondata.VectorizeOn ||
mode == sessiondata.VectorizeExperimentalAlways

switch {
case core.Noop != nil:
Expand Down Expand Up @@ -227,6 +236,11 @@ func isSupported(spec *execinfrapb.ProcessorSpec) (bool, error) {
if core.Distinct.ErrorOnDup != "" {
return false, errors.Newf("distinct with error on duplicates not supported")
}
if !isFullVectorization {
if len(core.Distinct.OrderedColumns) < len(core.Distinct.DistinctColumns) {
return false, errors.Newf("unordered distinct can only run in vectorize 'on' mode")
}
}
return true, nil

case core.Ordinality != nil:
Expand Down Expand Up @@ -269,6 +283,12 @@ func isSupported(spec *execinfrapb.ProcessorSpec) (bool, error) {
if _, supported := SupportedWindowFns[*wf.Func.WindowFunc]; !supported {
return false, errors.Newf("window function %s is not supported", wf.String())
}
if !isFullVectorization {
switch *wf.Func.WindowFunc {
case execinfrapb.WindowerSpec_PERCENT_RANK, execinfrapb.WindowerSpec_CUME_DIST:
return false, errors.Newf("window function %s can only run in vectorize 'on' mode", wf.String())
}
}
}
return true, nil

Expand Down Expand Up @@ -519,7 +539,7 @@ func NewColOperator(
// before any specs are planned. Used if there is a need to backtrack.
resultPreSpecPlanningStateShallowCopy := result

supported, err := isSupported(spec)
supported, err := isSupported(flowCtx.EvalCtx.SessionData.VectorizeMode, spec)
if !supported {
// We refuse to wrap LocalPlanNode processor (which is a DistSQL wrapper
// around a planNode) because it creates complications, and a flow with
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/colexec/window_functions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/testutils/colcontainerutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand All @@ -45,6 +46,7 @@ func TestWindowFunctions(t *testing.T) {
st := cluster.MakeTestingClusterSettings()
evalCtx := tree.MakeTestingEvalContext(st)
defer evalCtx.Stop(ctx)
evalCtx.SessionData.VectorizeMode = sessiondata.VectorizeOn
flowCtx := &execinfra.FlowCtx{
EvalCtx: &evalCtx,
Cfg: &execinfra.ServerConfig{
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/distsql/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ func (ds *ServerImpl) setupFlow(
BytesEncodeFormat: be,
ExtraFloatDigits: int(req.EvalContext.ExtraFloatDigits),
},
VectorizeMode: sessiondata.VectorizeExecMode(req.EvalContext.Vectorize),
}
ie := &lazyInternalExecutor{
newInternalExecutor: func() sqlutil.InternalExecutor {
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,10 @@ var VectorizeClusterMode = settings.RegisterEnumSetting(
"default vectorize mode",
"auto",
map[int64]string{
int64(sessiondata.VectorizeOff): "off",
int64(sessiondata.Vectorize192Auto): "192auto",
int64(sessiondata.VectorizeAuto): "auto",
int64(sessiondata.VectorizeExperimentalOn): "experimental_on",
int64(sessiondata.VectorizeOff): "off",
int64(sessiondata.Vectorize192Auto): "192auto",
int64(sessiondata.VectorizeAuto): "auto",
int64(sessiondata.VectorizeOn): "on",
},
)

Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/explain_plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ func populateExplain(
}
_, err := colflow.SupportsVectorized(params.ctx, flowCtx, flow.Processors, fuseOpt, nil /* output */)
isVec = isVec && (err == nil)
if !isVec {
break
}
}
}
}
Expand Down
14 changes: 9 additions & 5 deletions pkg/sql/explain_vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,15 @@ func (n *explainVecNode) startExec(params runParams) error {
flowCtx := makeFlowCtx(planCtx, plan, params)
flowCtx.Cfg.ClusterID = &distSQLPlanner.rpcCtx.ClusterID

// Temporarily set vectorize to on so that we can get the whole plan back even
// if we wouldn't support it due to lack of streaming.
origMode := flowCtx.EvalCtx.SessionData.VectorizeMode
flowCtx.EvalCtx.SessionData.VectorizeMode = sessiondata.VectorizeExperimentalOn
defer func() { flowCtx.EvalCtx.SessionData.VectorizeMode = origMode }()
// We want to get the vectorized plan which would be executed with the
// current 'vectorize' option. If 'vectorize' is set to 'off', then the
// vectorized engine is disabled, and we will return an error in such case.
// With all other options, we don't change the setting to the
// most-inclusive option as we used to because the plan can be different
// based on 'vectorize' setting.
if flowCtx.EvalCtx.SessionData.VectorizeMode == sessiondata.VectorizeOff {
return errors.New("vectorize is set to 'off'")
}

sortedFlows := make([]flowWithNode, 0, len(flows))
for nodeID, flow := range flows {
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/logictest/logic.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ var logicTestConfigs = []testClusterConfig{
name: "local-vec",
numNodes: 1,
overrideAutoStats: "false",
overrideVectorize: "experimental_on",
overrideVectorize: "on",
},
{
name: "fakedist",
Expand Down Expand Up @@ -478,15 +478,15 @@ var logicTestConfigs = []testClusterConfig{
useFakeSpanResolver: true,
overrideDistSQLMode: "on",
overrideAutoStats: "false",
overrideVectorize: "experimental_on",
overrideVectorize: "on",
},
{
name: "fakedist-vec-disk",
numNodes: 3,
useFakeSpanResolver: true,
overrideDistSQLMode: "on",
overrideAutoStats: "false",
overrideVectorize: "experimental_on",
overrideVectorize: "on",
sqlExecUseDisk: true,
skipShort: true,
},
Expand Down Expand Up @@ -524,7 +524,7 @@ var logicTestConfigs = []testClusterConfig{
name: "5node-dist-vec",
numNodes: 5,
overrideDistSQLMode: "on",
overrideVectorize: "experimental_on",
overrideVectorize: "on",
overrideAutoStats: "false",
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ https://cockroachdb.github.io/distsqlplan/decode.html#eJzsmW1P40YQx9_3U6z2Fegc7F
query T
SELECT url FROM [EXPLAIN ANALYZE (DISTSQL) SELECT DISTINCT(kw.w) FROM kv JOIN kw ON kv.k = kw.w ORDER BY kw.w]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzkWV9v4rgXff99Cus-tfqFSZw_FCKN1HbKapntwiz0YWdHPKTEWyIgYW1Tpqr63VcJdGlIaztNJkaat8kfx-den3PPnPII7J8F-DDuXfc-3aA1XaBfRsPf0bfen1-uL_oDdDG4uP76Vw-dXPXHN-M_rk_R7tX0uj_4dINO5psPm9Ptsvk9-jzsD9B8g4YDNL__MEcfUfocDUdXvRG6_JpdTcCAOAnJIFgSBv43wGCADQY4YIALBngwMWBFkylhLKHpK4_Zgn74HXzLgCherXl6e2LANKEE_EfgEV8Q8OEmuF2QEQlCQk0LDAgJD6JFts2KRsuAPpzP78GA8SqImY9aZrrxcM19dJ7CoMmGIUqC0EfpJePBYoF4tCQ-shgYcPvAyfMLdhddwuTJgGTNd4D2OG4f0CxgszyCcwyTp0n22TsCPn4y3ldY-43CNvvCTPyyNLvp0uw3S9t_Zx0nNCSUhLkvTdKVslde6c-vAZt9TqKYUBMfHPyC_M1PzvHpRxrdzbJ_5RqTPkYH3cnuFVqUrT98dXuz8C7jCSUhYlFIfJS9AwYsg-9oSZYJfUBrRtJGW-i36HL3JIzYfHffKvR_31unDG2uIsajeMpN7BUOTM6JAl68xfsWNLfCsYvA25YYvFcHeK9MX8cJ5YSaNj4E9v8KyErRoJ2Di9WnB5aPRdNumc6xDMYSpZ0pDMZcaZoHI254MOJKg9FSH4yW4mBMP_eaIFLiVxiKEsrsh2JbPFcsJfWKcOUnoq1OZVtBpU7LdI9FpSVK6yioNFeaZpXaDavU_klUKqHMXqVnzarUUaeyo6BSt2V6x6LSEqV1FVSaK02zSp2GVepUUqmrrlJXf8iQ0Gav1I5YqW79IcNVZ7SrIFavdSxSLVGYJ5dqy8QoiEOEUcJnhB6LaN2GRev-JNYqIc9esN1mrVWSqkeErZKYkcNzf_XLVnrYJLwjW_KwZE2n5AtNptk228thti4LBCFhfPvU3l704-dHjAfZt3eVJ2tOdrUXS33lVCBjpvr-nbr3f5YoIzH_7zTKIMK1t6Q6pLYaJNwcJNvS0CVcgsr2D6CyeP9O3ftX7giuvSXVIbXVINVKZQlvLA1dsg8hWS8hOTlE1uFiR7jYzSvzcLErXOyJd_aEi-0836wfIMF2s26mcJJiRLX7W8n9tbiZBJION5PwRoebnTXrZpUR1e5vJffX4mYSSDrcTMIbHW7WEdpCV-wp3TJu9qIcue03HH1U_idyfGlIAql-AymNQIdfyLijJf40nH-qQ9ISiZrORKURaIlAR5iBsDgEYUkKwqViUCnjKGQk_cYhhqTFOMSQtCQPGabaraQ0d7QYRyG26zcOMSQtxiGGpOePaRJMtVtJae5oMY5Cms8bx5nEOAoJqjbjKASh9xhHvcYvhqTFOMSQ9BiHBJOicTSJqX4rKSIoZPP3GEe9fBZD0mIcYkh6jEOCSdE4msRUv5UUf3UppPr87xdYbBx2IUS92zgmT__7NwAA__89IuzF
https://cockroachdb.github.io/distsqlplan/decode.html#eJzUmF9vo0YUxd_7KUb3KVFxYAZwHKSVkt24qrepvbXz0O3KD8RMY2QM7swQJ4ry3SvsXXmBeC4EyyJv4c-PmXM5597gZ5D_ReDBpH_T_3RLUhGR38ajP8m3_t9fbq4GQ3I1vLr5-k-fnFwPJreTv25Oyfdbs-PB8NMtOVmsz9anW2zxQD6PBkOyWJPRkCwezhbkA8muk9H4uj8mH79ujqZgQJwEfOgvuQTvG1AwgIEBNhjggAEuTA1YiWTGpUxEdsvzBhgEj-BZBoTxKlXZ6akBs0Rw8J5BhSri4MGtfxfxMfcDLkwLDAi48sNos8xKhEtfPF0uHsCAycqPpUc6ZrbwKFUeucy2ceer2ZxLkqRqlZ3Mzql0FRVOSR7xmQofQvXkEevMylaSyo8iosIl94glYfpiwBbZCvixwbsnMvflPL-1SwrTl-nmGfccPPpivE1xd4_i9U6xSX_WzFqjme3VvHtOGici4IIHuSdNMxK75ZXC_e7L-eckjLkwacEqEf9XnVzS0w8ivJ9v_jpUxfgjn6UqTOJ9VdtVxK7jgutQqjCeKZO6pTIfesMGiGQtieB-8J3L28CApf9IlnyZiCeSSp7dZZE_wo97pToNXr6uGMx6WzHcNxbDPUQx3DrvfZIIxYXJaFHorwdXum-_3dx-afVuRfH-bLKOabe-Q9fQfF6hQ-c0t7VD0yN3aNr6Do24YNehu680pbd21GNJzXdoVt3wrELI7Y7ptD7kNTT3KoQ8p7mtIWdHDjlrfcgRF-xCfv7uQ25XN7xdIeROx3RbH_Iami8qhDynua0ht48ccrv1IUdcsAt5792H3KlueKdCyN1O6yNeQ7GLR7xjUuLHAaEkUXMuWh9258hhd1ofdsQPu7BfvPuwI78ljLlcJbHkRRO8-mQre_M8uOdbJ8kkFTP-RSSzzTLbw9GG23wZBVyq7VW2PRjE20vZBqvDvSYwbbQ07TahmaWnaY2asXpwrwlMGy1dqFlNulCzEs2KtPUzbedgqwjbWtjRvy1HC7v6lV0tzJie7jYJlx5GwqWHsXAhNBIuRDUSrvMm4dLDSLj0MBYuhEbChahGwtXTuvRCb9KLJuGijeYHQiMeR2jM5BiOuBxTjs2QZkOk2RRpOEYazpFmg4TqJwlFRgltNEtoaZjUsruexuyup1G7Izhmd0Q5ZvfSIK1ldz2N2V1Po3ZHcMzuiHLM7qVpmrf7OWL30mCpZffSYKlldz2N2V1Po3ZHcMzuiHLM7qWhWsvuehqzu55G7Y7gmN0R5dhnQmmq5v_hpnq7s9Js0dp9-vLL_wEAAP__Fks7Cw==

# This query verifies stats collection for WITH ORDINALITY and the hashJoiner.
query T
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/set
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ statement ok
SET vectorize = auto

statement ok
SET vectorize = experimental_on
SET vectorize = on

statement ok
SET vectorize = experimental_always
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/vectorize_local
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ INSERT INTO d VALUES (1, 1), (1, 2)

# Test that vectorized stats are collected correctly.
statement ok
SET vectorize = experimental_on
SET vectorize = on

statement ok
SET distsql = on
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ query TTTTT
EXPLAIN (TYPES) SELECT 1 a FROM kv GROUP BY v, w::DECIMAL HAVING w::DECIMAL > 1;
----
· distributed false · ·
· vectorized false · ·
· vectorized true · ·
render · · (a int) ·
│ render 0 (1)[int] · ·
└── distinct · · (column5 decimal, v int) ·
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/dist_union
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ query TTT
EXPLAIN SELECT v FROM uniontest UNION SELECT k FROM uniontest
----
· distributed true
· vectorized false
· vectorized true
union · ·
├── scan · ·
│ table uniontest@primary
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/distinct
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ query TTT
EXPLAIN SELECT DISTINCT y AS w FROM xyz ORDER BY y
----
· distributed false
· vectorized false
· vectorized true
sort · ·
│ order +w
└── distinct · ·
Expand Down
Loading