From 75a4fae5a47a388c7cb59f8d74d21b9e257e3feb Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Thu, 11 Jul 2019 11:12:45 -0400 Subject: [PATCH] exec: Add support for vectorized engine to use builtin functions. Added support for the vectorized engine to use all builtin functions that are supported by distsql. The mechanism for doing this is relatively ineffecient however -- it converts each row in a batch back into a datum row, and uses the expression evaluator on this row to perform the builtin function. The goal is to in the future extend this work to create faster(vectorized) implementations of different builtin functions, and switch to those instead of the default builtin here for improved performance. This includes refactoring the materializer's batch to row code into a separate, reusable function. Release note: None --- pkg/sql/distsqlrun/column_exec_setup.go | 13 ++ pkg/sql/distsqlrun/materializer.go | 45 +---- pkg/sql/exec/builtin_funcs.go | 173 ++++++++++++++++++ pkg/sql/exec/builtin_funcs_test.go | 101 ++++++++++ pkg/sql/exec/vec_elem_to_datum.go | 65 +++++++ .../logictest/testdata/logic_test/vectorize | 30 +++ 6 files changed, 384 insertions(+), 43 deletions(-) create mode 100644 pkg/sql/exec/builtin_funcs.go create mode 100644 pkg/sql/exec/builtin_funcs_test.go create mode 100644 pkg/sql/exec/vec_elem_to_datum.go diff --git a/pkg/sql/distsqlrun/column_exec_setup.go b/pkg/sql/distsqlrun/column_exec_setup.go index cb2898d8147a..99eac2ad234f 100644 --- a/pkg/sql/distsqlrun/column_exec_setup.go +++ b/pkg/sql/distsqlrun/column_exec_setup.go @@ -614,6 +614,8 @@ func planProjectionOperators( return planProjectionExpr(ctx, t.Operator, t.TypedLeft(), t.TypedRight(), columnTypes, input) case *tree.BinaryExpr: return planProjectionExpr(ctx, t.Operator, t.TypedLeft(), t.TypedRight(), columnTypes, input) + case *tree.FuncExpr: + return planBuiltinFunctionExpr(ctx, t, columnTypes, input) case tree.Datum: datumType := t.ResolvedType() ct := columnTypes @@ -637,6 +639,17 @@ func planProjectionOperators( } } +func planBuiltinFunctionExpr( + ctx *tree.EvalContext, f *tree.FuncExpr, columnTypes []semtypes.T, input exec.Operator, +) (exec.Operator, int, []semtypes.T, error) { + funcOutputType := f.ResolvedType() + ct := columnTypes + resultIdx := len(ct) + ct = append(ct, *funcOutputType) + op, err := exec.NewBuiltinFunctionOperator(ctx, ct, input, f, resultIdx) + return op, resultIdx, ct, err +} + func planProjectionExpr( ctx *tree.EvalContext, binOp tree.Operator, diff --git a/pkg/sql/distsqlrun/materializer.go b/pkg/sql/distsqlrun/materializer.go index fe2dd72b5d1d..b6471708997f 100644 --- a/pkg/sql/distsqlrun/materializer.go +++ b/pkg/sql/distsqlrun/materializer.go @@ -12,8 +12,6 @@ package distsqlrun import ( "context" - "fmt" - "unsafe" "github.com/cockroachdb/cockroach/pkg/sql/distsqlpb" "github.com/cockroachdb/cockroach/pkg/sql/exec" @@ -21,8 +19,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" "github.com/cockroachdb/cockroach/pkg/sql/types" - "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" - "github.com/lib/pq/oid" ) // materializer converts an exec.Operator input into a RowSource. @@ -140,46 +136,9 @@ func (m *materializer) Next() (sqlbase.EncDatumRow, *distsqlpb.ProducerMetadata) continue } } - ct := typs[outIdx] - switch ct.Family() { - case types.BoolFamily: - if col.Bool()[rowIdx] { - m.row[outIdx].Datum = tree.DBoolTrue - } else { - m.row[outIdx].Datum = tree.DBoolFalse - } - case types.IntFamily: - switch ct.Width() { - case 8: - m.row[outIdx].Datum = m.da.NewDInt(tree.DInt(col.Int8()[rowIdx])) - case 16: - m.row[outIdx].Datum = m.da.NewDInt(tree.DInt(col.Int16()[rowIdx])) - case 32: - m.row[outIdx].Datum = m.da.NewDInt(tree.DInt(col.Int32()[rowIdx])) - default: - m.row[outIdx].Datum = m.da.NewDInt(tree.DInt(col.Int64()[rowIdx])) - } - case types.FloatFamily: - m.row[outIdx].Datum = m.da.NewDFloat(tree.DFloat(col.Float64()[rowIdx])) - case types.DecimalFamily: - m.row[outIdx].Datum = m.da.NewDDecimal(tree.DDecimal{Decimal: col.Decimal()[rowIdx]}) - case types.DateFamily: - m.row[outIdx].Datum = tree.NewDDate(pgdate.MakeCompatibleDateFromDisk(col.Int64()[rowIdx])) - case types.StringFamily: - b := col.Bytes()[rowIdx] - if ct.Oid() == oid.T_name { - m.row[outIdx].Datum = m.da.NewDString(tree.DString(*(*string)(unsafe.Pointer(&b)))) - } else { - m.row[outIdx].Datum = m.da.NewDName(tree.DString(*(*string)(unsafe.Pointer(&b)))) - } - case types.BytesFamily: - m.row[outIdx].Datum = m.da.NewDBytes(tree.DBytes(col.Bytes()[rowIdx])) - case types.OidFamily: - m.row[outIdx].Datum = m.da.NewDOid(tree.MakeDOid(tree.DInt(col.Int64()[rowIdx]))) - default: - panic(fmt.Sprintf("Unsupported column type %s", ct.String())) - } + + m.row[outIdx].Datum = exec.PhysicalTypeColElemToDatum(col, rowIdx, m.da, ct) } return m.ProcessRowHelper(m.row), nil } diff --git a/pkg/sql/exec/builtin_funcs.go b/pkg/sql/exec/builtin_funcs.go new file mode 100644 index 000000000000..083fcd53bd2b --- /dev/null +++ b/pkg/sql/exec/builtin_funcs.go @@ -0,0 +1,173 @@ +// Copyright 2019 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package exec + +import ( + "context" + "fmt" + + "github.com/cockroachdb/apd" + "github.com/cockroachdb/cockroach/pkg/sql/exec/coldata" + "github.com/cockroachdb/cockroach/pkg/sql/exec/types" + "github.com/cockroachdb/cockroach/pkg/sql/exec/types/conv" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" + semtypes "github.com/cockroachdb/cockroach/pkg/sql/types" +) + +type indexResolver struct { + row tree.Datums + colTypes []semtypes.T +} + +var _ tree.IndexedVarContainer = &indexResolver{} + +func (i *indexResolver) IndexedVarEval(idx int, ctx *tree.EvalContext) (tree.Datum, error) { + return i.row[idx].Eval(ctx) +} + +func (i *indexResolver) IndexedVarResolvedType(idx int) *semtypes.T { + return &i.colTypes[idx] +} + +func (i *indexResolver) IndexedVarNodeFormatter(idx int) tree.NodeFormatter { + n := tree.Name(fmt.Sprintf("$%d", idx)) + return &n +} + +type defaultBuiltinOperator struct { + input Operator + + evalCtx *tree.EvalContext + funcExpr *tree.FuncExpr + outputIdx int + colTypes []semtypes.T + + da sqlbase.DatumAlloc + row tree.Datums + + resolved *indexResolver +} + +func (d *defaultBuiltinOperator) Init() { + d.input.Init() +} + +func (d *defaultBuiltinOperator) Next(ctx context.Context) coldata.Batch { + batch := d.input.Next(ctx) + n := batch.Length() + + if n == 0 { + return batch + } + + outputType := d.funcExpr.ResolvedType() + outputPhysType := conv.FromColumnType(outputType) + + if d.outputIdx == batch.Width() { + batch.AppendCol(outputPhysType) + } + + converter := conv.GetDatumToPhysicalFn(outputType) + + writer := func(batch coldata.Batch, datum tree.Datum, i int) { + if datum == tree.DNull { + batch.ColVec(d.outputIdx).Nulls().SetNull(uint16(i)) + return + } + converted, _ := converter(datum) + switch outputPhysType { + case types.Bool: + batch.ColVec(d.outputIdx).Bool()[i] = converted.(bool) + case types.Bytes: + batch.ColVec(d.outputIdx).Bytes()[i] = converted.([]byte) + case types.Int8: + batch.ColVec(d.outputIdx).Int8()[i] = converted.(int8) + case types.Int16: + batch.ColVec(d.outputIdx).Int16()[i] = converted.(int16) + case types.Int32: + batch.ColVec(d.outputIdx).Int32()[i] = converted.(int32) + case types.Int64: + batch.ColVec(d.outputIdx).Int64()[i] = converted.(int64) + case types.Float32: + batch.ColVec(d.outputIdx).Float32()[i] = converted.(float32) + case types.Float64: + batch.ColVec(d.outputIdx).Float64()[i] = converted.(float64) + case types.Decimal: + batch.ColVec(d.outputIdx).Decimal()[i] = converted.(apd.Decimal) + default: + panic(fmt.Sprintf("unhandled type %s", outputPhysType)) + } + } + + if sel := batch.Selection(); sel != nil { + sel = sel[:n] + for _, i := range sel { + for j := 0; j < batch.Width(); j++ { + col := batch.ColVec(j) + if col.MaybeHasNulls() && col.Nulls().NullAt(i) { + d.row[j] = tree.DNull + } else { + d.row[j] = PhysicalTypeColElemToDatum(col, i, d.da, d.colTypes[j]) + } + } + + d.resolved.row = d.row + d.evalCtx.PushIVarContainer(d.resolved) + // Because the typechecker has passed, we won't get an error here. + res, _ := d.funcExpr.Eval(d.evalCtx) + d.evalCtx.PopIVarContainer() + writer(batch, res, int(i)) + } + } else { + for i := uint16(0); i < n; i++ { + for j := 0; j < batch.Width(); j++ { + col := batch.ColVec(j) + if col.MaybeHasNulls() && col.Nulls().NullAt(i) { + d.row[j] = tree.DNull + } else { + d.row[j] = PhysicalTypeColElemToDatum(col, i, d.da, d.colTypes[j]) + } + } + + d.evalCtx.PushIVarContainer(&indexResolver{row: d.row, colTypes: d.colTypes}) + // Because the typechecker has passed, we won't get an error here. + res, _ := d.funcExpr.Eval(d.evalCtx) + d.evalCtx.PopIVarContainer() + writer(batch, res, int(i)) + } + } + return batch +} + +// NewBuiltinFunctionOperator returns an operator that applies builtin functions. +func NewBuiltinFunctionOperator( + tctx *tree.EvalContext, + columnTypes []semtypes.T, + input Operator, + funcExpr *tree.FuncExpr, + outputIdx int, +) (Operator, error) { + + // For now, return the default builtin operator. Future work can specialize + // out the operators to efficient implementations of specific builtins. + op := &defaultBuiltinOperator{ + input: input, + evalCtx: tctx, + funcExpr: funcExpr, + outputIdx: outputIdx, + colTypes: columnTypes, + row: make(tree.Datums, len(columnTypes)), + resolved: &indexResolver{colTypes: columnTypes}, + } + + return op, nil +} diff --git a/pkg/sql/exec/builtin_funcs_test.go b/pkg/sql/exec/builtin_funcs_test.go new file mode 100644 index 000000000000..f75ac6ad2ada --- /dev/null +++ b/pkg/sql/exec/builtin_funcs_test.go @@ -0,0 +1,101 @@ +// Copyright 2019 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package exec + +import ( + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/sql/parser" + "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" +) + +// Typing context to for the typechecker. +type mockTypeContext struct { + ty []types.T +} + +func (p *mockTypeContext) IndexedVarEval(idx int, ctx *tree.EvalContext) (tree.Datum, error) { + return tree.DNull.Eval(ctx) +} + +func (p *mockTypeContext) IndexedVarResolvedType(idx int) *types.T { + return &p.ty[idx] +} + +func (p *mockTypeContext) IndexedVarNodeFormatter(idx int) tree.NodeFormatter { + n := tree.Name(fmt.Sprintf("$%d", idx)) + return &n +} + +func TestBasicBuiltinFunctions(t *testing.T) { + // Trick to get the init() for the builtins package to run. + _ = builtins.AllBuiltinNames + + testCases := []struct { + desc string + expr string + inputTuples tuples + inputTypes []types.T + outputTypes []types.T + outputTuples tuples + }{ + { + desc: "Substring test", + expr: "substring(@1, 1, 2)", + inputTuples: tuples{{"Hello"}, {"There"}}, + inputTypes: []types.T{*types.String}, + outputTuples: tuples{{"He"}, {"Th"}}, + outputTypes: []types.T{*types.String, *types.String}, + }, + { + desc: "Absolute value test", + expr: "abs(@1)", + inputTuples: tuples{{1}, {-1}}, + inputTypes: []types.T{*types.Int}, + outputTuples: tuples{{1}, {1}}, + outputTypes: []types.T{*types.Int, *types.Int}, + }, + { + desc: "String length test", + expr: "length(@1)", + inputTuples: tuples{{"Hello"}, {"The"}}, + inputTypes: []types.T{*types.String}, + outputTuples: tuples{{5}, {3}}, + outputTypes: []types.T{*types.String, *types.Int}, + }, + } + + tctx := tree.NewTestingEvalContext(cluster.MakeTestingClusterSettings()) + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + runTests(t, []tuples{tc.inputTuples}, tc.outputTuples, orderedVerifier, []int{1}, + func(input []Operator) (Operator, error) { + expr, err := parser.ParseExpr(tc.expr) + if err != nil { + t.Fatal(err) + } + + p := &mockTypeContext{ty: tc.inputTypes} + typedExpr, err := tree.TypeCheck(expr, &tree.SemaContext{IVarContainer: p}, types.Any) + if err != nil { + t.Fatal(err) + } + + return NewBuiltinFunctionOperator(tctx, tc.outputTypes, input[0], typedExpr.(*tree.FuncExpr), 1) + }) + }) + } +} diff --git a/pkg/sql/exec/vec_elem_to_datum.go b/pkg/sql/exec/vec_elem_to_datum.go new file mode 100644 index 000000000000..3b661edefad0 --- /dev/null +++ b/pkg/sql/exec/vec_elem_to_datum.go @@ -0,0 +1,65 @@ +// Copyright 2019 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package exec + +import ( + "fmt" + "unsafe" + + "github.com/cockroachdb/cockroach/pkg/sql/exec/coldata" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" + semtypes "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" + "github.com/lib/pq/oid" +) + +// PhysicalTypeColElemToDatum converts an element in a colvec to a datum of semtype ct. +func PhysicalTypeColElemToDatum( + col coldata.Vec, rowIdx uint16, da sqlbase.DatumAlloc, ct semtypes.T, +) tree.Datum { + switch ct.Family() { + case semtypes.BoolFamily: + if col.Bool()[rowIdx] { + return tree.DBoolTrue + } + return tree.DBoolFalse + case semtypes.IntFamily: + switch ct.Width() { + case 8: + return da.NewDInt(tree.DInt(col.Int8()[rowIdx])) + case 16: + return da.NewDInt(tree.DInt(col.Int16()[rowIdx])) + case 32: + return da.NewDInt(tree.DInt(col.Int32()[rowIdx])) + default: + return da.NewDInt(tree.DInt(col.Int64()[rowIdx])) + } + case semtypes.FloatFamily: + return da.NewDFloat(tree.DFloat(col.Float64()[rowIdx])) + case semtypes.DecimalFamily: + return da.NewDDecimal(tree.DDecimal{Decimal: col.Decimal()[rowIdx]}) + case semtypes.DateFamily: + return tree.NewDDate(pgdate.MakeCompatibleDateFromDisk(col.Int64()[rowIdx])) + case semtypes.StringFamily: + b := col.Bytes()[rowIdx] + if ct.Oid() == oid.T_name { + return da.NewDName(tree.DString(*(*string)(unsafe.Pointer(&b)))) + } + return da.NewDString(tree.DString(*(*string)(unsafe.Pointer(&b)))) + case semtypes.BytesFamily: + return da.NewDBytes(tree.DBytes(col.Bytes()[rowIdx])) + case semtypes.OidFamily: + return da.NewDOid(tree.MakeDOid(tree.DInt(col.Int64()[rowIdx]))) + default: + panic(fmt.Sprintf("Unsupported column type %s", ct.String())) + } +} diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize b/pkg/sql/logictest/testdata/logic_test/vectorize index bb54213c10c1..a2104b85f080 100644 --- a/pkg/sql/logictest/testdata/logic_test/vectorize +++ b/pkg/sql/logictest/testdata/logic_test/vectorize @@ -422,3 +422,33 @@ INSERT INTO t38626 VALUES (1, 'hello') statement ok EXPERIMENTAL SCRUB TABLE t38626 + +# Testing some builtin functions. +statement ok +CREATE TABLE builtin_test (x STRING, y INT) + +statement ok +INSERT INTO builtin_test VALUES ('Hello', 3), ('There', 2) + +query T +SELECT substring(x, 1, y) FROM builtin_test +---- +Hel +Th + +query I +SELECT abs(y) FROM builtin_test +---- +3 +2 + +statement ok +CREATE TABLE extract_test (x DATE) + +statement ok +INSERT INTO extract_test VALUES ('2017-01-01') + +query I +SELECT EXTRACT(YEAR FROM x) FROM extract_test +---- +2017